Expose SearchEngine specific profiles
authordcausse <dcausse@wikimedia.org>
Thu, 31 Mar 2016 09:13:21 +0000 (11:13 +0200)
committerdcausse <dcausse@wikimedia.org>
Mon, 30 May 2016 18:43:53 +0000 (20:43 +0200)
This patch introduces a way for SearchEngine implementations to expose
specific search profiles useful to fine-tune the various behaviors related to
search.

A SearchEngine can expose a list of profiles by overriding
SearchEngine::getProfiles( $profileType ), profileType stands for the type of
profile being customized. Two types are added in this patch:
- completion: exposed by ApiQueryPrefixSearch and ApiOpenSearch to control
  the behavior of the algorithm behind "search as you type" suggestions.
- fulltext query independent profiles: exposed by ApiQuerySearch to customize
  query indpendent ranking profiles (e.g. boost by templates/incoming
  links/popularity/...)

This patch allows api consumers that might have been confused by fuzzy
suggestions to switch to stricter profiles and to officialize the behavior
behind the hidden param cirrusUseCompletionSuggester. Or to control the
fulltext ranking behaviors like cirrusBoostLinks=(yes|no).

The list of profiles can be discovered by using ApiSandbox/ApiHelp and is totally
controlled by search engine implementations.

Bug: T132477
Change-Id: I66be724d8975976c98c91badbf421f237e014f89

autoload.php
includes/api/ApiOpenSearch.php
includes/api/ApiQueryPrefixSearch.php
includes/api/ApiQuerySearch.php
includes/api/SearchApi.php [new file with mode: 0644]
includes/api/i18n/en.json
includes/api/i18n/qqq.json
includes/search/SearchEngine.php

index f635bc1..982a162 100644 (file)
@@ -1199,6 +1199,7 @@ $wgAutoloadLocalClasses = [
        'SavepointPostgres' => __DIR__ . '/includes/db/DatabasePostgres.php',
        'ScopedCallback' => __DIR__ . '/includes/libs/ScopedCallback.php',
        'ScopedLock' => __DIR__ . '/includes/filebackend/lockmanager/ScopedLock.php',
+       'SearchApi' => __DIR__ . '/includes/api/SearchApi.php',
        'SearchDatabase' => __DIR__ . '/includes/search/SearchDatabase.php',
        'SearchDump' => __DIR__ . '/maintenance/dumpIterator.php',
        'SearchEngine' => __DIR__ . '/includes/search/SearchEngine.php',
index 058e0a3..066aaa3 100644 (file)
@@ -30,10 +30,14 @@ use MediaWiki\MediaWikiServices;
  * @ingroup API
  */
 class ApiOpenSearch extends ApiBase {
+       use SearchApi;
 
        private $format = null;
        private $fm = null;
 
+       /** @var array list of api allowed params */
+       private $allowedParams = null;
+
        /**
         * Get the output format
         *
@@ -80,24 +84,13 @@ class ApiOpenSearch extends ApiBase {
        public function execute() {
                $params = $this->extractRequestParams();
                $search = $params['search'];
-               $limit = $params['limit'];
-               $namespaces = $params['namespace'];
                $suggest = $params['suggest'];
-
-               if ( $params['redirects'] === null ) {
-                       // Backwards compatibility, don't resolve for JSON.
-                       $resolveRedir = $this->getFormat() !== 'json';
-               } else {
-                       $resolveRedir = $params['redirects'] === 'resolve';
-               }
-
                $results = [];
-
                if ( !$suggest || $this->getConfig()->get( 'EnableOpenSearchSuggest' ) ) {
                        // Open search results may be stored for a very long time
                        $this->getMain()->setCacheMaxAge( $this->getConfig()->get( 'SearchSuggestCacheExpiry' ) );
                        $this->getMain()->setCacheMode( 'public' );
-                       $this->search( $search, $limit, $namespaces, $resolveRedir, $results );
+                       $results = $this->search( $search, $params );
 
                        // Allow hooks to populate extracts and images
                        Hooks::run( 'ApiOpenSearchSuggest', [ &$results ] );
@@ -117,21 +110,17 @@ class ApiOpenSearch extends ApiBase {
 
        /**
         * Perform the search
-        *
-        * @param string $search Text to search
-        * @param int $limit Maximum items to return
-        * @param array $namespaces Namespaces to search
-        * @param bool $resolveRedir Whether to resolve redirects
-        * @param array &$results Put results here. Keys have to be integers.
+        * @param string $search the search query
+        * @param array $params api request params
+        * @return array search results. Keys are integers.
         */
-       protected function search( $search, $limit, $namespaces, $resolveRedir, &$results ) {
-               $searchEngine = MediaWikiServices::getInstance()->newSearchEngine();
-               $searchEngine->setLimitOffset( $limit );
-               $searchEngine->setNamespaces( $namespaces );
+       private function search( $search, array $params ) {
+               $searchEngine = $this->buildSearchEngine( $params );
                $titles = $searchEngine->extractTitles( $searchEngine->completionSearchWithVariants( $search ) );
+               $results = [];
 
                if ( !$titles ) {
-                       return;
+                       return $results;
                }
 
                // Special pages need unique integer ids in the return list, so we just
@@ -139,6 +128,13 @@ class ApiOpenSearch extends ApiBase {
                // always positive articleIds that non-special pages get.
                $nextSpecialPageId = -1;
 
+               if ( $params['redirects'] === null ) {
+                       // Backwards compatibility, don't resolve for JSON.
+                       $resolveRedir = $this->getFormat() !== 'json';
+               } else {
+                       $resolveRedir = $params['redirects'] === 'resolve';
+               }
+
                if ( $resolveRedir ) {
                        // Query for redirects
                        $redirects = [];
@@ -206,6 +202,8 @@ class ApiOpenSearch extends ApiBase {
                                ];
                        }
                }
+
+               return $results;
        }
 
        /**
@@ -271,7 +269,10 @@ class ApiOpenSearch extends ApiBase {
        }
 
        public function getAllowedParams() {
-               return [
+               if ( $this->allowedParams !== null ) {
+                       return $this->allowedParams;
+               }
+               $this->allowedParams = [
                        'search' => null,
                        'limit' => [
                                ApiBase::PARAM_DFLT => $this->getConfig()->get( 'OpenSearchDefaultLimit' ),
@@ -295,6 +296,20 @@ class ApiOpenSearch extends ApiBase {
                        ],
                        'warningsaserror' => false,
                ];
+
+               $profileParam = $this->buildProfileApiParam( SearchEngine::COMPLETION_PROFILE_TYPE,
+                       'apihelp-query+prefixsearch-param-profile' );
+               if ( $profileParam ) {
+                       $this->allowedParams['profile'] = $profileParam;
+               }
+               return $this->allowedParams;
+       }
+
+       public function getSearchProfileParams() {
+               if ( isset( $this->getAllowedParams()['profile'] ) ) {
+                       return [ SearchEngine::COMPLETION_PROFILE_TYPE => 'profile' ];
+               }
+               return [];
        }
 
        protected function getExamplesMessages() {
index 5c50273..46538e0 100644 (file)
@@ -25,6 +25,11 @@ use MediaWiki\MediaWikiServices;
  * @ingroup API
  */
 class ApiQueryPrefixSearch extends ApiQueryGeneratorBase {
+       use SearchApi;
+
+       /** @var array list of api allowed params */
+       private $allowedParams;
+
        public function __construct( $query, $moduleName ) {
                parent::__construct( $query, $moduleName, 'ps' );
        }
@@ -44,12 +49,9 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase {
                $params = $this->extractRequestParams();
                $search = $params['search'];
                $limit = $params['limit'];
-               $namespaces = $params['namespace'];
                $offset = $params['offset'];
 
-               $searchEngine = MediaWikiServices::getInstance()->newSearchEngine();
-               $searchEngine->setLimitOffset( $limit + 1, $offset );
-               $searchEngine->setNamespaces( $namespaces );
+               $searchEngine = $this->buildSearchEngine( $params );
                $titles = $searchEngine->extractTitles( $searchEngine->completionSearchWithVariants( $search ) );
 
                if ( $resultPageSet ) {
@@ -60,7 +62,7 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase {
                                return $current;
                        } );
                        if ( count( $titles ) > $limit ) {
-                               $this->setContinueEnumParameter( 'offset', $offset + $params['limit'] );
+                               $this->setContinueEnumParameter( 'offset', $offset + $limit );
                                array_pop( $titles );
                        }
                        $resultPageSet->populateFromTitles( $titles );
@@ -72,7 +74,7 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase {
                        $count = 0;
                        foreach ( $titles as $title ) {
                                if ( ++$count > $limit ) {
-                                       $this->setContinueEnumParameter( 'offset', $offset + $params['limit'] );
+                                       $this->setContinueEnumParameter( 'offset', $offset + $limit );
                                        break;
                                }
                                $vals = [
@@ -101,29 +103,45 @@ class ApiQueryPrefixSearch extends ApiQueryGeneratorBase {
        }
 
        public function getAllowedParams() {
-                       return [
-                               'search' => [
-                                       ApiBase::PARAM_TYPE => 'string',
-                                       ApiBase::PARAM_REQUIRED => true,
-                               ],
-                               'namespace' => [
-                                       ApiBase::PARAM_DFLT => NS_MAIN,
-                                       ApiBase::PARAM_TYPE => 'namespace',
-                                       ApiBase::PARAM_ISMULTI => true,
-                               ],
-                               'limit' => [
-                                       ApiBase::PARAM_DFLT => 10,
-                                       ApiBase::PARAM_TYPE => 'limit',
-                                       ApiBase::PARAM_MIN => 1,
-                                       // Non-standard value for compatibility with action=opensearch
-                                       ApiBase::PARAM_MAX => 100,
-                                       ApiBase::PARAM_MAX2 => 200,
-                               ],
-                               'offset' => [
-                                       ApiBase::PARAM_DFLT => 0,
-                                       ApiBase::PARAM_TYPE => 'integer',
-                               ],
-                       ];
+               if ( $this->allowedParams !== null ) {
+                       return $this->allowedParams;
+               }
+               $this->allowedParams = [
+                       'search' => [
+                               ApiBase::PARAM_TYPE => 'string',
+                               ApiBase::PARAM_REQUIRED => true,
+                       ],
+                       'namespace' => [
+                               ApiBase::PARAM_DFLT => NS_MAIN,
+                               ApiBase::PARAM_TYPE => 'namespace',
+                               ApiBase::PARAM_ISMULTI => true,
+                       ],
+                       'limit' => [
+                               ApiBase::PARAM_DFLT => 10,
+                               ApiBase::PARAM_TYPE => 'limit',
+                               ApiBase::PARAM_MIN => 1,
+                               // Non-standard value for compatibility with action=opensearch
+                               ApiBase::PARAM_MAX => 100,
+                               ApiBase::PARAM_MAX2 => 200,
+                       ],
+                       'offset' => [
+                               ApiBase::PARAM_DFLT => 0,
+                               ApiBase::PARAM_TYPE => 'integer',
+                       ],
+               ];
+               $profileParam = $this->buildProfileApiParam( SearchEngine::COMPLETION_PROFILE_TYPE,
+                       'apihelp-query+prefixsearch-param-profile' );
+               if ( $profileParam ) {
+                       $this->allowedParams['profile'] = $profileParam;
+               }
+               return $this->allowedParams;
+       }
+
+       public function getSearchProfileParams() {
+               if ( isset( $this->getAllowedParams()['profile'] ) ) {
+                       return [ SearchEngine::COMPLETION_PROFILE_TYPE => 'profile' ];
+               }
+               return [];
        }
 
        protected function getExamplesMessages() {
index f57d3a3..80798a1 100644 (file)
@@ -32,6 +32,10 @@ use MediaWiki\MediaWikiServices;
  * @ingroup API
  */
 class ApiQuerySearch extends ApiQueryGeneratorBase {
+       use SearchApi;
+
+       /** @var array list of api allowed params */
+       private $allowedParams;
 
        /**
         * When $wgSearchType is null, $wgSearchAlternatives[0] is null. Null isn't
@@ -61,8 +65,11 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                global $wgContLang;
                $params = $this->extractRequestParams();
 
+               if ( isset( $params['backend'] ) && $params['backend'] == self::BACKEND_NULL_PARAM ) {
+                       unset( $params['backend'] );
+               }
+
                // Extract parameters
-               $limit = $params['limit'];
                $query = $params['search'];
                $what = $params['what'];
                $interwiki = $params['interwiki'];
@@ -80,11 +87,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                }
 
                // Create search engine instance and set options
-               $type = isset( $params['backend'] ) && $params['backend'] != self::BACKEND_NULL_PARAM ?
-                       $params['backend'] : null;
-               $search = MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
-               $search->setLimitOffset( $limit + 1, $params['offset'] );
-               $search->setNamespaces( $params['namespace'] );
+               $search = $this->buildSearchEngine( $params );
                $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
 
                $query = $search->transformSearchTerm( $query );
@@ -152,6 +155,7 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                $titles = [];
                $count = 0;
                $result = $matches->next();
+               $limit = $params['limit'];
 
                while ( $result ) {
                        if ( ++$count > $limit ) {
@@ -301,7 +305,11 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
        }
 
        public function getAllowedParams() {
-               $params = [
+               if ( $this->allowedParams !== null ) {
+                       return $this->allowedParams;
+               }
+
+               $this->allowedParams = [
                        'search' => [
                                ApiBase::PARAM_TYPE => 'string',
                                ApiBase::PARAM_REQUIRED => true
@@ -368,13 +376,31 @@ class ApiQuerySearch extends ApiQueryGeneratorBase {
                        if ( $alternatives[0] === null ) {
                                $alternatives[0] = self::BACKEND_NULL_PARAM;
                        }
-                       $params['backend'] = [
+                       $this->allowedParams['backend'] = [
                                ApiBase::PARAM_DFLT => $searchConfig->getSearchType(),
                                ApiBase::PARAM_TYPE => $alternatives,
                        ];
+                       // @todo: support profile selection when multiple
+                       // backends are available. The solution could be to
+                       // merge all possible profiles and let ApiBase
+                       // subclasses do the check. Making ApiHelp and ApiSandbox
+                       // comprehensive might be more difficult.
+               } else {
+                       $profileParam = $this->buildProfileApiParam( SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
+                               'apihelp-query+search-param-qiprofile' );
+                       if ( $profileParam ) {
+                               $this->allowedParams['qiprofile'] = $profileParam;
+                       }
                }
 
-               return $params;
+               return $this->allowedParams;
+       }
+
+       public function getSearchProfileParams() {
+               if ( isset( $this->getAllowedParams()['qiprofile'] ) ) {
+                       return [ SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE => 'qiprofile' ];
+               }
+               return [];
        }
 
        protected function getExamplesMessages() {
diff --git a/includes/api/SearchApi.php b/includes/api/SearchApi.php
new file mode 100644 (file)
index 0000000..26d7a0e
--- /dev/null
@@ -0,0 +1,116 @@
+<?php
+use MediaWiki\MediaWikiServices;
+
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @since 1.28
+ */
+
+/**
+ * Traits for API components that use a SearchEngine.
+ * @ingroup API
+ */
+trait SearchApi {
+       /**
+        * Build the profile api param definitions.
+        *
+        * @param string $profileType type of profile to customize
+        * @param string $helpMsg i18n message
+        * @param string|null $backendType SearchEngine backend type or null for default engine
+        * @return array|null the api param definition or null if profiles are
+        * not supported by the searchEngine implementation.
+        */
+       public function buildProfileApiParam( $profileType, $helpMsg, $backendType = null ) {
+               $searchEngine = null;
+               if ( $backendType !== null ) {
+                       $searchEngine = MediaWikiServices::getInstance()
+                               ->getSearchEngineFactory()->create( $backendType );
+               } else {
+                       $searchEngine = MediaWikiServices::getInstance()->newSearchEngine();
+               }
+
+               $profiles = $searchEngine->getProfiles( $profileType );
+               if ( $profiles ) {
+                       $types = [];
+                       $helpMessages = [];
+                       $defaultProfile = null;
+                       foreach ( $profiles as $profile ) {
+                               $types[] = $profile['name'];
+                               if ( isset ( $profile['desc-message'] ) ) {
+                                       $helpMessages[$profile['name']] = $profile['desc-message'];
+                               }
+                               if ( !empty( $profile['default'] ) ) {
+                                       $defaultProfile = $profile['name'];
+                               }
+                       }
+                       return [
+                               ApiBase::PARAM_TYPE => $types,
+                               ApiBase::PARAM_HELP_MSG => $helpMsg,
+                               ApiBase::PARAM_HELP_MSG_PER_VALUE => $helpMessages,
+                               ApiBase::PARAM_DFLT => $defaultProfile,
+                       ];
+               }
+               return null;
+       }
+
+       /**
+        * Build the search engine to use.
+        * If $params is provided then the following searchEngine options
+        * will be set:
+        *  - limit: mandatory
+        *  - offset: optional, if set limit will be incremented by
+        *    one ( to support the continue parameter )
+        *  - namespace: mandatory
+        *  - search engine profiles defined by SearchApi::getSearchProfileParams()
+        * @param string[]|null API request params (must be sanitized by
+        * ApiBase::extractRequestParams() before)
+        * @return SearchEngine the search engine
+        */
+       public function buildSearchEngine( array $params = null ) {
+               if ( $params != null ) {
+                       $type = isset( $params['backend'] ) ? $params['backend'] : null;
+                       $searchEngine = MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
+                       $limit = $params['limit'];
+                       $namespaces = $params['namespace'];
+                       $offset = null;
+                       if ( isset( $params['offset'] ) ) {
+                               // If the API supports offset then it probably
+                               // wants to fetch limit+1 so it can check if
+                               // more results are available to properly set
+                               // the continue param
+                               $offset = $params['offset'];
+                               $limit += 1;
+                       }
+                       $searchEngine->setLimitOffset( $limit, $offset );
+                       foreach ( $this->getSearchProfileParams() as $type => $param ) {
+                               if ( isset( $params[$param] ) ) {
+                                       $searchEngine->setFeatureData( $type, $params[$param] );
+                               }
+                       }
+               } else {
+                       $searchEngine = MediaWikiServices::getInstance()->newSearchEngine();
+               }
+               return $searchEngine;
+       }
+
+       /**
+        * @return string[] the list of supported search profile types. Key is
+        * the profile type and its associated value is the request param.
+        */
+       abstract public function getSearchProfileParams();
+}
index 4e9309e..29bfce9 100644 (file)
        "apihelp-query+prefixsearch-param-limit": "Maximum number of results to return.",
        "apihelp-query+prefixsearch-param-offset": "Number of results to skip.",
        "apihelp-query+prefixsearch-example-simple": "Search for page titles beginning with <kbd>meaning</kbd>.",
+       "apihelp-query+prefixsearch-param-profile": "Search profile to use.",
 
        "apihelp-query+protectedtitles-description": "List all titles protected from creation.",
        "apihelp-query+protectedtitles-param-namespace": "Only list titles in these namespaces.",
        "apihelp-query+search-param-what": "Which type of search to perform.",
        "apihelp-query+search-param-info": "Which metadata to return.",
        "apihelp-query+search-param-prop": "Which properties to return:",
+       "apihelp-query+search-param-qiprofile": "Query independent profile to use (affects ranking algorithm).",
        "apihelp-query+search-paramvalue-prop-size": "Adds the size of the page in bytes.",
        "apihelp-query+search-paramvalue-prop-wordcount": "Adds the word count of the page.",
        "apihelp-query+search-paramvalue-prop-timestamp": "Adds the timestamp of when the page was last edited.",
index 6137457..a7284ff 100644 (file)
        "apihelp-query+prefixsearch-param-limit": "{{doc-apihelp-param|query+prefixsearch|limit}}",
        "apihelp-query+prefixsearch-param-offset": "{{doc-apihelp-param|query+prefixsearch|offset}}",
        "apihelp-query+prefixsearch-example-simple": "{{doc-apihelp-example|query+prefixsearch}}",
+       "apihelp-query+prefixsearch-param-profile": "{{doc-apihelp-param|query+prefixsearch|profile|paramvalues=1}}",
        "apihelp-query+protectedtitles-description": "{{doc-apihelp-description|query+protectedtitles}}",
        "apihelp-query+protectedtitles-param-namespace": "{{doc-apihelp-param|query+protectedtitles|namespace}}",
        "apihelp-query+protectedtitles-param-level": "{{doc-apihelp-param|query+protectedtitles|level}}",
        "apihelp-query+search-param-what": "{{doc-apihelp-param|query+search|what}}",
        "apihelp-query+search-param-info": "{{doc-apihelp-param|query+search|info}}",
        "apihelp-query+search-param-prop": "{{doc-apihelp-param|query+search|prop|paramvalues=1}}",
+       "apihelp-query+search-param-qiprofile": "{{doc-apihelp-param|query+search|qiprofile|paramvalues=1}}",
        "apihelp-query+search-paramvalue-prop-size": "{{doc-apihelp-paramvalue|query+search|prop|size}}",
        "apihelp-query+search-paramvalue-prop-wordcount": "{{doc-apihelp-paramvalue|query+search|prop|wordcount}}",
        "apihelp-query+search-paramvalue-prop-timestamp": "{{doc-apihelp-paramvalue|query+search|prop|timestamp}}",
index dcef95c..0171ed9 100644 (file)
@@ -54,6 +54,12 @@ abstract class SearchEngine {
        /** @var array Feature values */
        protected $features = [];
 
+       /** @const string profile type for completionSearch */
+       const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
+
+       /** @const string profile type for query independent ranking features */
+       const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
+
        /**
         * Perform a full text search query and return a result set.
         * If full text searches are not supported or disabled, return null.
@@ -631,6 +637,24 @@ abstract class SearchEngine {
                return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes();
        }
 
+       /**
+        * Get a list of supported profiles.
+        * Some search engine implementations may expose specific profiles to fine-tune
+        * its behaviors.
+        * The profile can be passed as a feature data with setFeatureData( $profileType, $profileName )
+        * The array returned by this function contains the following keys:
+        * - name: the profile name to use with setFeatureData
+        * - desc-message: the i18n description
+        * - default: set to true if this profile is the default
+        *
+        * @since 1.28
+        * @param $profileType the type of profiles
+        * @return array|null the list of profiles or null if none available
+        */
+       public function getProfiles( $profileType ) {
+               return null;
+       }
+
 }
 
 /**