Infrastructure for augmenting search results
authorStanislav Malyshev <smalyshev@gmail.com>
Tue, 26 Apr 2016 22:19:58 +0000 (15:19 -0700)
committerErik Bernhardson <ebernhardson@wikimedia.org>
Thu, 15 Sep 2016 22:44:03 +0000 (15:44 -0700)
Bug: T117493
Change-Id: Ia5413a7846cc961026a2dc3542b619493bc76a23

13 files changed:
autoload.php
docs/hooks.txt
includes/search/AugmentPageProps.php [new file with mode: 0644]
includes/search/PerRowAugmentor.php [new file with mode: 0644]
includes/search/ResultAugmentor.php [new file with mode: 0644]
includes/search/ResultSetAugmentor.php [new file with mode: 0644]
includes/search/SearchEngine.php
includes/search/SearchNearMatchResultSet.php
includes/search/SearchResult.php
includes/search/SearchResultSet.php
includes/search/SqlSearchResultSet.php
includes/specials/SpecialSearch.php
tests/phpunit/includes/search/SearchEngineTest.php

index 96c8190..9fd83eb 100644 (file)
@@ -153,6 +153,7 @@ $wgAutoloadLocalClasses = [
        'AtomFeed' => __DIR__ . '/includes/Feed.php',
        'AtomicSectionUpdate' => __DIR__ . '/includes/deferred/AtomicSectionUpdate.php',
        'AttachLatest' => __DIR__ . '/maintenance/attachLatest.php',
+       'AugmentPageProps' => __DIR__ . '/includes/search/AugmentPageProps.php',
        'AuthManagerSpecialPage' => __DIR__ . '/includes/specialpage/AuthManagerSpecialPage.php',
        'AuthPlugin' => __DIR__ . '/includes/AuthPlugin.php',
        'AuthPluginUser' => __DIR__ . '/includes/AuthPlugin.php',
@@ -1043,6 +1044,7 @@ $wgAutoloadLocalClasses = [
        'PatrolLog' => __DIR__ . '/includes/logging/PatrolLog.php',
        'PatrolLogFormatter' => __DIR__ . '/includes/logging/PatrolLogFormatter.php',
        'Pbkdf2Password' => __DIR__ . '/includes/password/Pbkdf2Password.php',
+       'PerRowAugmentor' => __DIR__ . '/includes/search/PerRowAugmentor.php',
        'PermissionsError' => __DIR__ . '/includes/exception/PermissionsError.php',
        'PhpHttpRequest' => __DIR__ . '/includes/HttpFunctions.php',
        'PhpXmlBugTester' => __DIR__ . '/includes/installer/PhpBugTests.php',
@@ -1182,6 +1184,8 @@ $wgAutoloadLocalClasses = [
        'ResourceLoaderUserTokensModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderUserTokensModule.php',
        'ResourceLoaderWikiModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderWikiModule.php',
        'RestbaseVirtualRESTService' => __DIR__ . '/includes/libs/virtualrest/RestbaseVirtualRESTService.php',
+       'ResultAugmentor' => __DIR__ . '/includes/search/ResultAugmentor.php',
+       'ResultSetAugmentor' => __DIR__ . '/includes/search/ResultSetAugmentor.php',
        'ResultWrapper' => __DIR__ . '/includes/libs/rdbms/database/resultwrapper/ResultWrapper.php',
        'RevDelArchiveItem' => __DIR__ . '/includes/revisiondelete/RevDelArchiveItem.php',
        'RevDelArchiveList' => __DIR__ . '/includes/revisiondelete/RevDelArchiveList.php',
index a7fb873..ae0770b 100644 (file)
@@ -2699,6 +2699,13 @@ $page: WikiPage that is being indexed
 $output: ParserOutput that is produced from the page
 $engine: SearchEngine for which the indexing is intended
 
+'SearchResultsAugment': Allows extension to add its code to the list of search
+result augmentors.
+&$setAugmentors: List of whole-set augmentor objects, must implement ResultSetAugmentor
+&$rowAugmentors: List of per-row augmentor objects, must implement ResultAugmentor.
+Note that lists should be in the format name => object and the names in both lists should
+be distinct.
+
 'SecondaryDataUpdates': Allows modification of the list of DataUpdates to
 perform when page content is modified. Currently called by
 AbstractContent::getSecondaryDataUpdates.
diff --git a/includes/search/AugmentPageProps.php b/includes/search/AugmentPageProps.php
new file mode 100644 (file)
index 0000000..29bd463
--- /dev/null
@@ -0,0 +1,20 @@
+<?php
+
+/**
+ * Augment search result set with values of certain page props.
+ */
+class AugmentPageProps implements ResultSetAugmentor {
+       /**
+        * @var array List of properties.
+        */
+       private $propnames;
+
+       public function __construct( $propnames ) {
+               $this->propnames = $propnames;
+       }
+
+       public function augmentAll( SearchResultSet $resultSet ) {
+               $titles = $resultSet->extractTitles();
+               return PageProps::getInstance()->getProperties( $titles, $this->propnames );
+       }
+}
diff --git a/includes/search/PerRowAugmentor.php b/includes/search/PerRowAugmentor.php
new file mode 100644 (file)
index 0000000..8eb8b17
--- /dev/null
@@ -0,0 +1,38 @@
+<?php
+
+/**
+ * Perform augmentation of each row and return composite result,
+ * indexed by ID.
+ */
+class PerRowAugmentor implements ResultSetAugmentor {
+
+       /**
+        * @var ResultAugmentor
+        */
+       private $rowAugmentor;
+
+       /**
+        * PerRowAugmentor constructor.
+        * @param ResultAugmentor $augmentor Per-result augmentor to use.
+        */
+       public function __construct( ResultAugmentor $augmentor ) {
+               $this->rowAugmentor = $augmentor;
+       }
+
+       /**
+        * Produce data to augment search result set.
+        * @param SearchResultSet $resultSet
+        * @return array Data for all results
+        */
+       public function augmentAll( SearchResultSet $resultSet ) {
+               $data = [];
+               foreach ( $resultSet->extractResults() as $result ) {
+                       $id = $result->getTitle()->getArticleID();
+                       if ( !$id ) {
+                               continue;
+                       }
+                       $data[$id] = $this->rowAugmentor->augment( $result );
+               }
+               return $data;
+       }
+}
diff --git a/includes/search/ResultAugmentor.php b/includes/search/ResultAugmentor.php
new file mode 100644 (file)
index 0000000..350b780
--- /dev/null
@@ -0,0 +1,14 @@
+<?php
+
+/**
+ * Augment search results.
+ *
+ */
+interface ResultAugmentor {
+       /**
+        * Produce data to augment search result set.
+        * @param SearchResult $result
+        * @return mixed Data for this result
+        */
+       public function augment( SearchResult $result );
+}
diff --git a/includes/search/ResultSetAugmentor.php b/includes/search/ResultSetAugmentor.php
new file mode 100644 (file)
index 0000000..94710a8
--- /dev/null
@@ -0,0 +1,14 @@
+<?php
+
+/**
+ * Augment search results.
+ *
+ */
+interface ResultSetAugmentor {
+       /**
+        * Produce data to augment search result set.
+        * @param SearchResultSet $resultSet
+        * @return array Data for all results
+        */
+       public function augmentAll( SearchResultSet $resultSet );
+}
index c2ccca0..1eba141 100644 (file)
@@ -695,6 +695,37 @@ abstract class SearchEngine {
                Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
                return $fields;
        }
+
+       /**
+        * Augment search results with extra data.
+        *
+        * @param SearchResultSet $resultSet
+        */
+       public function augmentSearchResults( SearchResultSet $resultSet ) {
+               $setAugmentors = [];
+               $rowAugmentors = [];
+               Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] );
+
+               if ( !$setAugmentors && !$rowAugmentors ) {
+                       // We're done here
+                       return;
+               }
+
+               // Convert row augmentors to set augmentor
+               foreach ( $rowAugmentors as $name => $row ) {
+                       if ( isset( $setAugmentors[$name] ) ) {
+                               throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
+                       }
+                       $setAugmentors[$name] = new PerRowAugmentor( $row );
+               }
+
+               foreach ( $setAugmentors as $name => $augmentor ) {
+                       $data = $augmentor->augmentAll( $resultSet );
+                       if ( $data ) {
+                               $resultSet->setAugmentedData( $name, $data );
+                       }
+               }
+       }
 }
 
 /**
index 6d66707..3141797 100644 (file)
@@ -21,7 +21,7 @@ class SearchNearMatchResultSet extends SearchResultSet {
                        return false;
                }
                $this->fetched = true;
-               return SearchResult::newFromTitle( $this->result );
+               return SearchResult::newFromTitle( $this->result, $this );
        }
 
        public function rewind() {
index 21effbb..50db84b 100644 (file)
@@ -56,15 +56,25 @@ class SearchResult {
         */
        protected $searchEngine;
 
+       /**
+        * A set of extension data.
+        * @var array[]
+        */
+       protected $extensionData;
+
        /**
         * Return a new SearchResult and initializes it with a title.
         *
-        * @param Title $title
+        * @param Title           $title
+        * @param SearchResultSet $parentSet
         * @return SearchResult
         */
-       public static function newFromTitle( $title ) {
+       public static function newFromTitle( $title, SearchResultSet $parentSet = null ) {
                $result = new static();
                $result->initFromTitle( $title );
+               if ( $parentSet ) {
+                       $parentSet->augmentResult( $result );
+               }
                return $result;
        }
 
@@ -250,4 +260,24 @@ class SearchResult {
        function isFileMatch() {
                return false;
        }
+
+       /**
+        * Get the extension data as:
+        * augmentor name => data
+        * @return array[]
+        */
+       public function getExtensionData() {
+               return $this->extensionData;
+       }
+
+       /**
+        * Set extension data for this result.
+        * The data is:
+        * augmentor name => data
+        * @param array[] $extensionData
+        */
+       public function setExtensionData( array $extensionData ) {
+               $this->extensionData = $extensionData;
+       }
+
 }
index 69795e7..978db27 100644 (file)
@@ -42,6 +42,29 @@ class SearchResultSet {
 
        protected $containedSyntax = false;
 
+       /**
+        * Cache of titles.
+        * Lists titles of the result set, in the same order as results.
+        * @var Title[]
+        */
+       private $titles;
+
+       /**
+        * Cache of results - serialization of the result iterator
+        * as an array.
+        * @var SearchResult[]
+        */
+       private $results;
+
+       /**
+        * Set of result's extra data, indexed per result id
+        * and then per data item name.
+        * The structure is:
+        * PAGE_ID => [ augmentor name => data, ... ]
+        * @var array[]
+        */
+       protected $extraData = [];
+
        public function __construct( $containedSyntax = false ) {
                $this->containedSyntax = $containedSyntax;
        }
@@ -147,15 +170,15 @@ class SearchResultSet {
        /**
         * Fetches next search result, or false.
         * STUB
-        *
-        * @return SearchResult
+        * FIXME: refactor as iterator, so we could use nicer interfaces.
+        * @return SearchResult|false
         */
        function next() {
                return false;
        }
 
        /**
-        * Rewind result set back to begining
+        * Rewind result set back to beginning
         */
        function rewind() {
        }
@@ -176,4 +199,69 @@ class SearchResultSet {
        public function searchContainedSyntax() {
                return $this->containedSyntax;
        }
+
+       /**
+        * Extract all the results in the result set as array.
+        * @return SearchResult[]
+        */
+       public function extractResults() {
+               if ( is_null( $this->results ) ) {
+                       $this->results = [];
+                       if ( $this->numRows() == 0 ) {
+                               // Don't bother if we've got empty result
+                               return $this->results;
+                       }
+                       $this->rewind();
+                       while ( ( $result = $this->next() ) != false ) {
+                               $this->results[] = $result;
+                       }
+                       $this->rewind();
+               }
+               return $this->results;
+       }
+
+       /**
+        * Extract all the titles in the result set.
+        * @return Title[]
+        */
+       public function extractTitles() {
+               if ( is_null( $this->titles ) ) {
+                       if ( $this->numRows() == 0 ) {
+                               // Don't bother if we've got empty result
+                               $this->titles = [];
+                       } else {
+                               $this->titles = array_map(
+                                       function ( SearchResult $result ) {
+                                               return $result->getTitle();
+                                       },
+                                       $this->extractResults() );
+                       }
+               }
+               return $this->titles;
+       }
+
+       /**
+        * Sets augmented data for result set.
+        * @param string $name Extra data item name
+        * @param array[] $data Extra data as PAGEID => data
+        */
+       public function setAugmentedData( $name, $data ) {
+               foreach ( $data as $id => $resultData ) {
+                       $this->extraData[$id][$name] = $resultData;
+               }
+       }
+
+       /**
+        * Returns extra data for specific result and store it in SearchResult object.
+        * @param SearchResult $result
+        * @return array|null List of data as name => value or null if none present.
+        */
+       public function augmentResult( SearchResult $result ) {
+               $id = $result->getTitle()->getArticleID();
+               if ( !$id || !isset( $this->extraData[$id] ) ) {
+                       return null;
+               }
+               $result->setExtensionData( $this->extraData[$id] );
+               return $this->extraData[$id];
+       }
 }
index 6b60899..c3985d1 100644 (file)
@@ -37,7 +37,7 @@ class SqlSearchResultSet extends SearchResultSet {
                }
 
                return SearchResult::newFromTitle(
-                       Title::makeTitle( $row->page_namespace, $row->page_title )
+                       Title::makeTitle( $row->page_namespace, $row->page_title ), $this
                );
        }
 
index 26b86f9..6daf19f 100644 (file)
@@ -403,6 +403,7 @@ class SpecialSearch extends SpecialPage {
 
                        // show results
                        if ( $numTextMatches > 0 ) {
+                               $search->augmentSearchResults( $textMatches );
                                $out->addHTML( $this->showMatches( $textMatches ) );
                        }
 
@@ -716,7 +717,7 @@ class SpecialSearch extends SpecialPage {
         *
         * @return string
         */
-       protected function showMatches( &$matches, $interwiki = null ) {
+       protected function showMatches( $matches, $interwiki = null ) {
                global $wgContLang;
 
                $terms = $wgContLang->convertForSearchResult( $matches->termMatches() );
@@ -725,7 +726,7 @@ class SpecialSearch extends SpecialPage {
                $pos = $this->offset;
 
                if ( $result && $interwiki ) {
-                       $out .= $this->interwikiHeader( $interwiki, $result );
+                       $out .= $this->interwikiHeader( $interwiki, $matches );
                }
 
                $out .= "<ul class='mw-search-results'>\n";
@@ -750,7 +751,7 @@ class SpecialSearch extends SpecialPage {
         *
         * @return string
         */
-       protected function showHit( $result, $terms, $position ) {
+       protected function showHit( SearchResult $result, $terms, $position ) {
 
                if ( $result->isBrokenTitle() ) {
                        return '';
index 0b34b6f..902fc9e 100644 (file)
@@ -207,4 +207,50 @@ class SearchEngineTest extends MediaWikiLangTestCase {
                $this->assertArrayHasKey( 'testData', $mapping );
                $this->assertEquals( 'test', $mapping['testData'] );
        }
+
+       public function hookSearchIndexFields( $mockFieldBuilder, &$fields, SearchEngine $engine ) {
+               $fields['testField'] = $mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT );
+               return true;
+       }
+
+       public function testAugmentorSearch() {
+               $this->search->setNamespaces( [ 0, 1, 4 ] );
+               $resultSet = $this->search->searchText( 'smithee' );
+               // Not using mock since PHPUnit mocks do not work properly with references in params
+               $this->mergeMwGlobalArrayValue( 'wgHooks',
+                       [ 'SearchResultsAugment' => [ [ $this, 'addAugmentors' ] ] ] );
+               $this->search->augmentSearchResults( $resultSet );
+               for ( $result = $resultSet->next(); $result; $result = $resultSet->next() ) {
+                       $id = $result->getTitle()->getArticleID();
+                       $augmentData = "Result:$id:" . $result->getTitle()->getText();
+                       $augmentData2 = "Result2:$id:" . $result->getTitle()->getText();
+                       $this->assertEquals( [ 'testSet' => $augmentData, 'testRow' => $augmentData2 ],
+                               $result->getExtensionData() );
+               }
+       }
+
+       public function addAugmentors( &$setAugmentors, &$rowAugmentors ) {
+               $setAugmentor = $this->getMock( 'ResultSetAugmentor' );
+               $setAugmentor->expects( $this->once() )
+                       ->method( 'augmentAll' )
+                       ->willReturnCallback( function ( SearchResultSet $resultSet ) {
+                               $data = [];
+                               for ( $result = $resultSet->next(); $result; $result = $resultSet->next() ) {
+                                       $id = $result->getTitle()->getArticleID();
+                                       $data[$id] = "Result:$id:" . $result->getTitle()->getText();
+                               }
+                               $resultSet->rewind();
+                               return $data;
+                       } );
+               $setAugmentors['testSet'] = $setAugmentor;
+
+               $rowAugmentor = $this->getMock( 'ResultAugmentor' );
+               $rowAugmentor->expects( $this->exactly( 2 ) )
+                       ->method( 'augment' )
+                       ->willReturnCallback( function ( SearchResult $result ) {
+                               $id = $result->getTitle()->getArticleID();
+                               return "Result2:$id:" . $result->getTitle()->getText();
+                       } );
+               $rowAugmentors['testRow'] = $rowAugmentor;
+       }
 }