Add deleted archive titles search
authorStanislav Malyshev <smalyshev@gmail.com>
Sat, 2 Apr 2016 05:57:20 +0000 (08:57 +0300)
committerStanislav Malyshev <smalyshev@gmail.com>
Wed, 5 Apr 2017 19:02:35 +0000 (12:02 -0700)
Allows search engine to suggest deleted titles for undelete search.
Note that the titles are still verified against the archive table,
to ensure search engine is not out-of-date.

Bug: T109561
Change-Id: Id6099fe9fbf18481068a6f0a329bbde0d218135f

docs/hooks.txt
includes/page/PageArchive.php
includes/search/SearchEngine.php
includes/specials/SpecialUndelete.php

index a38f9bb..bee4477 100644 (file)
@@ -736,7 +736,10 @@ $current: the reverted revision
 $create: Whether or not the restoration caused the page to be created (i.e. it
   didn't exist before).
 $comment: The comment associated with the undeletion.
-$oldPageId: ID of page previously deleted (from archive table)
+$oldPageId: ID of page previously deleted (from archive table). This ID will be used
+  for the restored page.
+$restoredPages: Set of page IDs that have revisions restored for this undelete,
+  with keys being page IDs and values are 'true'.
 
 'ArticleUndeleteLogEntry': When a log entry is generated but not yet saved.
 $pageArchive: the PageArchive object
index c2bfb07..188088f 100644 (file)
@@ -67,6 +67,56 @@ class PageArchive {
                return self::listPages( $dbr, '' );
        }
 
+       /**
+        * List deleted pages recorded in the archive matching the
+        * given term, using search engine archive.
+        * Returns result wrapper with (ar_namespace, ar_title, count) fields.
+        *
+        * @param string $term Search term
+        * @return ResultWrapper
+        */
+       public static function listPagesBySearch( $term ) {
+               $title = Title::newFromText( $term );
+               if ( $title ) {
+                       $ns = $title->getNamespace();
+                       $termMain = $title->getText();
+                       $termDb = $title->getDBkey();
+               } else {
+                       // Prolly won't work too good
+                       // @todo handle bare namespace names cleanly?
+                       $ns = 0;
+                       $termMain = $termDb = $term;
+               }
+
+               // Try search engine first
+               $engine = MediaWikiServices::getInstance()->newSearchEngine();
+               $engine->setLimitOffset( 100 );
+               $engine->setNamespaces( [ $ns ] );
+               $results = $engine->searchArchiveTitle( $termMain );
+               if ( !$results->isOK() ) {
+                       $results = [];
+               } else {
+                       $results = $results->getValue();
+               }
+
+               if ( !$results ) {
+                       // Fall back to regular prefix search
+                       return self::listPagesByPrefix( $term );
+               }
+
+               $dbr = wfGetDB( DB_REPLICA );
+               $condTitles = array_unique( array_map( function ( Title $t ) {
+                       return $t->getDBkey();
+               }, $results ) );
+               $conds = [
+                       'ar_namespace' => $ns,
+                       $dbr->makeList( [ 'ar_title' => $condTitles ], LIST_OR ) . " OR ar_title " .
+                       $dbr->buildLike( $termDb, $dbr->anyString() )
+               ];
+
+               return self::listPages( $dbr, $conds );
+       }
+
        /**
         * List deleted pages recorded in the archive table matching the
         * given title prefix.
@@ -133,6 +183,7 @@ class PageArchive {
                $fields = [
                        'ar_minor_edit', 'ar_timestamp', 'ar_user', 'ar_user_text',
                        'ar_comment', 'ar_len', 'ar_deleted', 'ar_rev_id', 'ar_sha1',
+                       'ar_page_id'
                ];
 
                if ( $this->config->get( 'ContentHandlerUseDB' ) ) {
@@ -620,7 +671,7 @@ class PageArchive {
                $restored = 0; // number of revisions restored
                /** @var Revision $revision */
                $revision = null;
-
+               $restoredPages = [];
                // If there are no restorable revisions, we can skip most of the steps.
                if ( $latestRestorableRow === null ) {
                        $failedRevisionCount = $rev_count;
@@ -677,6 +728,7 @@ class PageArchive {
 
                                Hooks::run( 'ArticleRevisionUndeleted',
                                        [ &$this->title, $revision, $row->ar_page_id ] );
+                               $restoredPages[$row->ar_page_id] = true;
                        }
 
                        // Now that it's safely stored, take it out of the archive
@@ -717,7 +769,8 @@ class PageArchive {
                                );
                        }
 
-                       Hooks::run( 'ArticleUndelete', [ &$this->title, $created, $comment, $oldPageId ] );
+                       Hooks::run( 'ArticleUndelete',
+                               [ &$this->title, $created, $comment, $oldPageId, $restoredPages ] );
                        if ( $this->title->getNamespace() == NS_FILE ) {
                                DeferredUpdates::addUpdate( new HTMLCacheUpdate( $this->title, 'imagelinks' ) );
                        }
index 9817b6c..6bb4e5a 100644 (file)
@@ -72,6 +72,21 @@ abstract class SearchEngine {
                return null;
        }
 
+       /**
+        * Perform a title search in the article archive.
+        * NOTE: these results still should be filtered by
+        * matching against PageArchive, permissions checks etc
+        * The results returned by this methods are only sugegstions and
+        * may not end up being shown to the user.
+        *
+        * @param string $term Raw search term
+        * @return Status<Title[]>
+        * @since 1.29
+        */
+       function searchArchiveTitle( $term ) {
+               return Status::newGood( [] );
+       }
+
        /**
         * Perform a title-only search query and return a result set.
         * If title searches are not supported or disabled, return null.
index dc5f877..eb4f0cc 100644 (file)
@@ -21,6 +21,7 @@
  * @ingroup SpecialPage
  */
 
+use MediaWiki\MediaWikiServices;
 use Wikimedia\Rdbms\ResultWrapper;
 
 /**
@@ -45,6 +46,10 @@ class SpecialUndelete extends SpecialPage {
 
        /** @var Title */
        private $mTargetObj;
+       /**
+        * @var string Search prefix
+        */
+       private $mSearchPrefix;
 
        function __construct() {
                parent::__construct( 'Undelete', 'deletedhistory' );
@@ -235,6 +240,7 @@ class SpecialUndelete extends SpecialPage {
                        Xml::openElement( 'form', [ 'method' => 'get', 'action' => wfScript() ] ) .
                                Xml::fieldset( $this->msg( 'undelete-search-box' )->text() ) .
                                Html::hidden( 'title', $this->getPageTitle()->getPrefixedDBkey() ) .
+                               Html::hidden( 'fuzzy', $this->getRequest()->getVal( 'fuzzy' ) ) .
                                Html::rawElement(
                                        'label',
                                        [ 'for' => 'prefix' ],
@@ -245,15 +251,25 @@ class SpecialUndelete extends SpecialPage {
                                        20,
                                        $this->mSearchPrefix,
                                        [ 'id' => 'prefix', 'autofocus' => '' ]
-                               ) . ' ' .
-                               Xml::submitButton( $this->msg( 'undelete-search-submit' )->text() ) .
+                               ) .
+                               ' ' .
+                               Xml::submitButton(
+                                       $this->msg( 'undelete-search-submit' )->text(),
+                                       [ 'id' => 'searchUndelete' ]
+                               ) .
                                Xml::closeElement( 'fieldset' ) .
                                Xml::closeElement( 'form' )
                );
 
                # List undeletable articles
                if ( $this->mSearchPrefix ) {
-                       $result = PageArchive::listPagesByPrefix( $this->mSearchPrefix );
+                       // For now, we enable search engine match only when specifically asked to
+                       // by using fuzzy=1 parameter.
+                       if ( $this->getRequest()->getVal( "fuzzy", false ) ) {
+                               $result = PageArchive::listPagesBySearch( $this->mSearchPrefix );
+                       } else {
+                               $result = PageArchive::listPagesByPrefix( $this->mSearchPrefix );
+                       }
                        $this->showList( $result );
                }
        }
@@ -277,7 +293,7 @@ class SpecialUndelete extends SpecialPage {
 
                $linkRenderer = $this->getLinkRenderer();
                $undelete = $this->getPageTitle();
-               $out->addHTML( "<ul>\n" );
+               $out->addHTML( "<ul id='undeleteResultsList'>\n" );
                foreach ( $result as $row ) {
                        $title = Title::makeTitleSafe( $row->ar_namespace, $row->ar_title );
                        if ( $title !== null ) {
@@ -300,7 +316,7 @@ class SpecialUndelete extends SpecialPage {
                                );
                        }
                        $revs = $this->msg( 'undeleterevisions' )->numParams( $row->count )->parse();
-                       $out->addHTML( "<li>{$item} ({$revs})</li>\n" );
+                       $out->addHTML( "<li class='undeleteResult'>{$item} ({$revs})</li>\n" );
                }
                $result->free();
                $out->addHTML( "</ul>\n" );