Deprecate SearchResult::termMatches()
authorDavid Causse <dcausse@wikimedia.org>
Wed, 12 Jun 2019 09:02:10 +0000 (11:02 +0200)
committerErik Bernhardson <ebernhardson@wikimedia.org>
Thu, 27 Jun 2019 15:44:06 +0000 (08:44 -0700)
And start indicating that hooks relying on this data might become
unreliable as this data is only populated by SearchDatabase search
engines.

This information was only populated by SearchDatabase implementations
and due to bad initial design of SearchResult[Set] (now fixed) it forced
users of these classes to carry this information for the sole purpose of
highlighting.
Because SearchEngine can now own their SearchResult[Set] implementations
nothing that is engine specific should be exposed outside of these
specific implementations.
If there are some logic that still requires access to such list of terms
they should be made engine specific by guarding their code against
instanceof SqlSearchResult.

Change-Id: I38b82c5e4c35309ee447edc3ded60ca6a18b247a
Depends-On: I53fe37c65c7940f696c1e184125e01e592a976e4

16 files changed:
RELEASE-NOTES-1.34
autoload.php
docs/hooks.txt
includes/api/ApiQuerySearch.php
includes/search/SearchDatabase.php
includes/search/SearchEngine.php
includes/search/SearchResult.php
includes/search/SearchResultSet.php
includes/search/SqlSearchResult.php [new file with mode: 0644]
includes/search/SqlSearchResultSet.php
includes/widget/search/BasicSearchResultSetWidget.php
includes/widget/search/FullSearchResultWidget.php
includes/widget/search/InterwikiSearchResultWidget.php
includes/widget/search/SearchResultWidget.php
includes/widget/search/SimpleSearchResultWidget.php
tests/phpunit/includes/search/SearchEngineTest.php

index 549f7e8..bba7df1 100644 (file)
@@ -308,6 +308,18 @@ because of Phabricator reports.
   instead.
 * Sanitizer::attributeWhitelist() and Sanitizer::setupAttributeWhitelist()
   have been deprecated; they will be made private in the future.
+* SearchResult::termMatches() method is deprecated. It was unreliable because
+  only populated by few search engine implementations. Use
+  SqlSearchResult::getTermMatches() if really needed.
+* SearchResult::getTextSnippet( $terms ) the $terms param is being deprecated
+  and should no longer be passed. Search engine implemenations should be
+  responsible for carrying relevant information needed for highlighting with
+  their own SearchResultSet/SearchResult sub-classes.
+* SearchEngine::$searchTerms protected field is deprecated. Moved to
+  SearchDatabase.
+* The use of the $terms param in the ShowSearchHit and ShowSearchHitTitle
+  hooks is highly discouraged as it's only populated by SearchDatabase search
+  engines.
 
 === Other changes in 1.34 ===
 * …
index 0b93f49..b01827a 100644 (file)
@@ -1438,6 +1438,7 @@ $wgAutoloadLocalClasses = [
        'SpecialWatchlist' => __DIR__ . '/includes/specials/SpecialWatchlist.php',
        'SpecialWhatLinksHere' => __DIR__ . '/includes/specials/SpecialWhatLinksHere.php',
        'SqlBagOStuff' => __DIR__ . '/includes/objectcache/SqlBagOStuff.php',
+       'SqlSearchResult' => __DIR__ . '/includes/search/SqlSearchResult.php',
        'SqlSearchResultSet' => __DIR__ . '/includes/search/SqlSearchResultSet.php',
        'Sqlite' => __DIR__ . '/maintenance/sqlite.inc',
        'SqliteInstaller' => __DIR__ . '/includes/installer/SqliteInstaller.php',
index 99a3d1a..f0b720b 100644 (file)
@@ -2986,7 +2986,7 @@ $article: The article object corresponding to the page
 'ShowSearchHit': Customize display of search hit.
 $searchPage: The SpecialSearch instance.
 $result: The SearchResult to show
-$terms: Search terms, for highlighting
+$terms: Search terms, for highlighting (unreliable as search engine dependent).
 &$link: HTML of link to the matching page. May be modified.
 &$redirect: HTML of redirect info. May be modified.
 &$section: HTML of matching section. May be modified.
index 98c6551..23f702c 100644 (file)
@@ -20,8 +20,6 @@
  * @file
  */
 
-use MediaWiki\MediaWikiServices;
-
 /**
  * Query module to perform full text search within wiki titles and content
  *
@@ -145,9 +143,6 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                        }
                }
 
-               // Add the search results to the result
-               $terms = MediaWikiServices::getInstance()->getContentLanguage()->
-                       convertForSearchResult( $matches->termMatches() );
                $titles = [];
                $count = 0;
 
@@ -163,7 +158,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                        }
 
                        if ( $resultPageSet === null ) {
-                               $vals = $this->getSearchResultData( $result, $prop, $terms );
+                               $vals = $this->getSearchResultData( $result, $prop );
                                if ( $vals ) {
                                        // Add item to results and see whether it fits
                                        $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
@@ -184,13 +179,13 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                // Interwiki results inside main result set
                $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
                if ( $canAddInterwiki ) {
-                       $this->addInterwikiResults( $matches, $apiResult, $prop, $terms, 'additional',
+                       $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
                                SearchResultSet::INLINE_RESULTS );
                }
 
                // Interwiki results outside main result set
                if ( $interwiki && $resultPageSet === null ) {
-                       $this->addInterwikiResults( $matches, $apiResult, $prop, $terms, 'interwiki',
+                       $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
                                SearchResultSet::SECONDARY_RESULTS );
                }
 
@@ -217,10 +212,9 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
         * Assemble search result data.
         * @param SearchResult $result Search result
         * @param array        $prop Props to extract (as keys)
-        * @param array        $terms Terms list
         * @return array|null Result data or null if result is broken in some way.
         */
-       private function getSearchResultData( SearchResult $result, $prop, $terms ) {
+       private function getSearchResultData( SearchResult $result, $prop ) {
                // Silently skip broken and missing titles
                if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
                        return null;
@@ -239,7 +233,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                        $vals['wordcount'] = $result->getWordCount();
                }
                if ( isset( $prop['snippet'] ) ) {
-                       $vals['snippet'] = $result->getTextSnippet( $terms );
+                       $vals['snippet'] = $result->getTextSnippet();
                }
                if ( isset( $prop['timestamp'] ) ) {
                        $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
@@ -287,14 +281,13 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
         * @param SearchResultSet $matches
         * @param ApiResult       $apiResult
         * @param array           $prop Props to extract (as keys)
-        * @param array           $terms Terms list
         * @param string          $section Section name where results would go
         * @param int             $type Interwiki result type
         * @return int|null Number of total hits in the data or null if none was produced
         */
        private function addInterwikiResults(
                SearchResultSet $matches, ApiResult $apiResult, $prop,
-               $terms, $section, $type
+               $section, $type
        ) {
                $totalhits = null;
                if ( $matches->hasInterwikiResults( $type ) ) {
@@ -304,7 +297,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
 
                                foreach ( $interwikiMatches as $result ) {
                                        $title = $result->getTitle();
-                                       $vals = $this->getSearchResultData( $result, $prop, $terms );
+                                       $vals = $this->getSearchResultData( $result, $prop );
 
                                        $vals['namespace'] = $result->getInterwikiNamespaceText();
                                        $vals['title'] = $title->getText();
index 6da8f98..8ea356f 100644 (file)
@@ -35,6 +35,11 @@ abstract class SearchDatabase extends SearchEngine {
        /** @var IDatabase (backwards compatibility) */
        protected $db;
 
+       /**
+        * @var string[] search terms
+        */
+       protected $searchTerms = [];
+
        /**
         * @param ILoadBalancer $lb The load balancer for the DB cluster to search on
         */
index fa6e7fd..2fb4585 100644 (file)
@@ -46,7 +46,10 @@ abstract class SearchEngine {
        /** @var int */
        protected $offset = 0;
 
-       /** @var string[] */
+       /**
+        * @var string[]
+        * @deprecated since 1.34
+        */
        protected $searchTerms = [];
 
        /** @var bool */
index a27d719..7703e38 100644 (file)
@@ -147,26 +147,11 @@ class SearchResult {
        }
 
        /**
-        * @param string[] $terms Terms to highlight
+        * @param string[] $terms Terms to highlight (this parameter is deprecated and ignored)
         * @return string Highlighted text snippet, null (and not '') if not supported
         */
-       function getTextSnippet( $terms ) {
-               global $wgAdvancedSearchHighlighting;
-               $this->initText();
-
-               // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
-               list( $contextlines, $contextchars ) = $this->searchEngine->userHighlightPrefs();
-
-               $h = new SearchHighlighter();
-               if ( count( $terms ) > 0 ) {
-                       if ( $wgAdvancedSearchHighlighting ) {
-                               return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
-                       } else {
-                               return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
-                       }
-               } else {
-                       return $h->highlightNone( $this->mText, $contextlines, $contextchars );
-               }
+       function getTextSnippet( $terms = [] ) {
+               return '';
        }
 
        /**
index 92e2a17..5ee96cb 100644 (file)
@@ -96,6 +96,7 @@ class SearchResultSet implements Countable, IteratorAggregate {
         * STUB
         *
         * @return string[]
+        * @deprecated since 1.34 (use SqlSearchResult)
         */
        function termMatches() {
                return [];
diff --git a/includes/search/SqlSearchResult.php b/includes/search/SqlSearchResult.php
new file mode 100644 (file)
index 0000000..25e87e7
--- /dev/null
@@ -0,0 +1,69 @@
+<?php
+
+/**
+ * Search engine result issued from SearchData search engines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Search
+ */
+
+class SqlSearchResult extends SearchResult {
+       /** @var string[] */
+       private $terms;
+
+       /**
+        * SqlSearchResult constructor.
+        * @param Title $title
+        * @param string[] $terms list of parsed terms
+        */
+       public function __construct( Title $title, array $terms ) {
+               $this->initFromTitle( $title );
+               $this->terms = $terms;
+       }
+
+       /**
+        * return string[]
+        */
+       public function getTermMatches(): array {
+               return $this->terms;
+       }
+
+       /**
+        * @param array $terms Terms to highlight (this parameter is deprecated)
+        * @return string Highlighted text snippet, null (and not '') if not supported
+        */
+       function getTextSnippet( $terms = [] ) {
+               global $wgAdvancedSearchHighlighting;
+               $this->initText();
+
+               // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
+               list( $contextlines, $contextchars ) = $this->searchEngine->userHighlightPrefs();
+
+               $h = new SearchHighlighter();
+               if ( count( $this->terms ) > 0 ) {
+                       if ( $wgAdvancedSearchHighlighting ) {
+                               return $h->highlightText( $this->mText, $this->terms, $contextlines, $contextchars );
+                       } else {
+                               return $h->highlightSimple( $this->mText, $this->terms, $contextlines, $contextchars );
+                       }
+               } else {
+                       return $h->highlightNone( $this->mText, $contextlines, $contextchars );
+               }
+       }
+
+}
index f4e4a23..87068ca 100644 (file)
@@ -16,12 +16,22 @@ class SqlSearchResultSet extends SearchResultSet {
        /** @var int|null Total number of hits for $terms */
        protected $totalHits;
 
-       function __construct( IResultWrapper $resultSet, $terms, $total = null ) {
+       /**
+        * @param IResultWrapper $resultSet
+        * @param string[] $terms
+        * @param int|null $total
+        */
+       function __construct( IResultWrapper $resultSet, array $terms, $total = null ) {
+               parent::__construct();
                $this->resultSet = $resultSet;
                $this->terms = $terms;
                $this->totalHits = $total;
        }
 
+       /**
+        * @return string[]
+        * @deprecated since 1.34
+        */
        function termMatches() {
                return $this->terms;
        }
@@ -42,10 +52,15 @@ class SqlSearchResultSet extends SearchResultSet {
                if ( $this->results === null ) {
                        $this->results = [];
                        $this->resultSet->rewind();
+                       $terms = \MediaWiki\MediaWikiServices::getInstance()->getContentLanguage()
+                               ->convertForSearchResult( $this->terms );
                        while ( ( $row = $this->resultSet->fetchObject() ) !== false ) {
-                               $this->results[] = SearchResult::newFromTitle(
-                                       Title::makeTitle( $row->page_namespace, $row->page_title ), $this
+                               $result = new SqlSearchResult(
+                                       Title::makeTitle( $row->page_namespace, $row->page_title ),
+                                       $terms
                                );
+                               $this->augmentResult( $result );
+                               $this->results[] = $result;
                        }
                }
                return $this->results;
index 1a885b0..0e10287 100644 (file)
@@ -117,12 +117,9 @@ class BasicSearchResultSetWidget {
         * @return string HTML
         */
        protected function renderResultSet( SearchResultSet $resultSet, $offset ) {
-               $terms = MediaWikiServices::getInstance()->getContentLanguage()->
-                       convertForSearchResult( $resultSet->termMatches() );
-
                $hits = [];
                foreach ( $resultSet as $result ) {
-                       $hits[] = $this->resultWidget->render( $result, $terms, $offset++ );
+                       $hits[] = $this->resultWidget->render( $result, $offset++ );
                }
 
                return "<ul class='mw-search-results'>" . implode( '', $hits ) . "</ul>";
index f648535..7212dc0 100644 (file)
@@ -31,11 +31,10 @@ class FullSearchResultWidget implements SearchResultWidget {
 
        /**
         * @param SearchResult $result The result to render
-        * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet)
         * @param int $position The result position, including offset
         * @return string HTML
         */
-       public function render( SearchResult $result, $terms, $position ) {
+       public function render( SearchResult $result, $position ) {
                // If the page doesn't *exist*... our search index is out of date.
                // The least confusing at this point is to drop the result.
                // You may get less results, but... on well. :P
@@ -43,7 +42,7 @@ class FullSearchResultWidget implements SearchResultWidget {
                        return '';
                }
 
-               $link = $this->generateMainLinkHtml( $result, $terms, $position );
+               $link = $this->generateMainLinkHtml( $result, $position );
                // If page content is not readable, just return ths title.
                // This is not quite safe, but better than showing excerpts from
                // non-readable pages. Note that hiding the entry entirely would
@@ -63,7 +62,7 @@ class FullSearchResultWidget implements SearchResultWidget {
                        $this->specialPage->getUser()
                );
                list( $file, $desc, $thumb ) = $this->generateFileHtml( $result );
-               $snippet = $result->getTextSnippet( $terms );
+               $snippet = $result->getTextSnippet();
                if ( $snippet ) {
                        $extract = "<div class='searchresult'>$snippet</div>";
                } else {
@@ -80,6 +79,9 @@ class FullSearchResultWidget implements SearchResultWidget {
                        $html = null;
                        $score = '';
                        $related = '';
+                       // TODO: remove this instanceof and always pass [], let implementors do the cast if
+                       // they want to be SearchDatabase specific
+                       $terms = $result instanceof \SqlSearchResult ? $result->getTermMatches() : [];
                        if ( !Hooks::run( 'ShowSearchHit', [
                                $this->specialPage, $result, $terms,
                                &$link, &$redirect, &$section, &$extract,
@@ -121,11 +123,10 @@ class FullSearchResultWidget implements SearchResultWidget {
         * title with highlighted words).
         *
         * @param SearchResult $result
-        * @param string[] $terms
         * @param int $position
         * @return string HTML
         */
-       protected function generateMainLinkHtml( SearchResult $result, $terms, $position ) {
+       protected function generateMainLinkHtml( SearchResult $result, $position ) {
                $snippet = $result->getTitleSnippet();
                if ( $snippet === '' ) {
                        $snippet = null;
@@ -139,7 +140,9 @@ class FullSearchResultWidget implements SearchResultWidget {
 
                $attributes = [ 'data-serp-pos' => $position ];
                Hooks::run( 'ShowSearchHitTitle',
-                       [ &$title, &$snippet, $result, $terms, $this->specialPage, &$query, &$attributes ] );
+                       [ &$title, &$snippet, $result,
+                         $result instanceof \SqlSearchResult ? $result->getTermMatches() : [],
+                         $this->specialPage, &$query, &$attributes ] );
 
                $link = $this->linkRenderer->makeLink(
                        $title,
index 745bc12..3f758db 100644 (file)
@@ -24,14 +24,13 @@ class InterwikiSearchResultWidget implements SearchResultWidget {
 
        /**
         * @param SearchResult $result The result to render
-        * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet)
         * @param int $position The result position, including offset
         * @return string HTML
         */
-       public function render( SearchResult $result, $terms, $position ) {
+       public function render( SearchResult $result, $position ) {
                $title = $result->getTitle();
                $titleSnippet = $result->getTitleSnippet();
-               $snippet = $result->getTextSnippet( $terms );
+               $snippet = $result->getTextSnippet();
 
                if ( $titleSnippet ) {
                        $titleSnippet = new HtmlArmor( $titleSnippet );
index 4f0a271..e001395 100644 (file)
@@ -10,9 +10,8 @@ use SearchResult;
 interface SearchResultWidget {
        /**
         * @param SearchResult $result The result to render
-        * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet)
         * @param int $position The zero indexed result position, including offset
         * @return string HTML
         */
-       public function render( SearchResult $result, $terms, $position );
+       public function render( SearchResult $result, $position );
 }
index 86a04b1..fe8b4d5 100644 (file)
@@ -26,11 +26,10 @@ class SimpleSearchResultWidget implements SearchResultWidget {
 
        /**
         * @param SearchResult $result The result to render
-        * @param string[] $terms Terms to be highlighted (@see SearchResult::getTextSnippet)
         * @param int $position The result position, including offset
         * @return string HTML
         */
-       public function render( SearchResult $result, $terms, $position ) {
+       public function render( SearchResult $result, $position ) {
                $title = $result->getTitle();
                $titleSnippet = $result->getTitleSnippet();
                if ( $titleSnippet ) {
index 1a0393e..2772b0d 100644 (file)
@@ -190,7 +190,7 @@ class SearchEngineTest extends MediaWikiLangTestCase {
                $match = $res->getIterator()->current();
                $snippet = "A <span class='searchmatch'>" . $phrase . "</span>";
                $this->assertStringStartsWith( $snippet,
-                       $match->getTextSnippet( $res->termMatches() ),
+                       $match->getTextSnippet(),
                        "Highlight a phrase search" );
        }