Merge "PrefixSearch: Enforce including the exact match as first result"
[lhc/web/wiklou.git] / includes / PrefixSearch.php
1 <?php
2 /**
3 * Prefix search of page names.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Handles searching prefixes of titles and finding any page
25 * names that match. Used largely by the OpenSearch implementation.
26 *
27 * @ingroup Search
28 */
29 abstract class PrefixSearch {
30 /**
31 * Do a prefix search of titles and return a list of matching page names.
32 * @deprecated Since 1.23, use TitlePrefixSearch or StringPrefixSearch classes
33 *
34 * @param string $search
35 * @param int $limit
36 * @param array $namespaces Used if query is not explicitly prefixed
37 * @return array Array of strings
38 */
39 public static function titleSearch( $search, $limit, $namespaces = array() ) {
40 $prefixSearch = new StringPrefixSearch;
41 return $prefixSearch->search( $search, $limit, $namespaces );
42 }
43
44 /**
45 * Do a prefix search of titles and return a list of matching page names.
46 *
47 * @param string $search
48 * @param int $limit
49 * @param array $namespaces Used if query is not explicitly prefixed
50 * @return array Array of strings or Title objects
51 */
52 public function search( $search, $limit, $namespaces = array() ) {
53 $search = trim( $search );
54 if ( $search == '' ) {
55 return array(); // Return empty result
56 }
57 $namespaces = $this->validateNamespaces( $namespaces );
58
59 // Find a Title which is not an interwiki and is in NS_MAIN
60 $title = Title::newFromText( $search );
61 if ( $title && !$title->isExternal() ) {
62 $ns = array( $title->getNamespace() );
63 if ( $ns[0] == NS_MAIN ) {
64 $ns = $namespaces; // no explicit prefix, use default namespaces
65 }
66 return $this->searchBackend( $ns, $title->getText(), $limit );
67 }
68
69 // Is this a namespace prefix?
70 $title = Title::newFromText( $search . 'Dummy' );
71 if ( $title && $title->getText() == 'Dummy'
72 && $title->getNamespace() != NS_MAIN
73 && !$title->isExternal() )
74 {
75 $namespaces = array( $title->getNamespace() );
76 $search = '';
77 }
78
79 return $this->searchBackend( $namespaces, $search, $limit );
80 }
81
82 /**
83 * Do a prefix search for all possible variants of the prefix
84 * @param string $search
85 * @param int $limit
86 * @param array $namespaces
87 *
88 * @return array
89 */
90 public function searchWithVariants( $search, $limit, array $namespaces ) {
91 wfProfileIn( __METHOD__ );
92 $searches = $this->search( $search, $limit, $namespaces );
93
94 // if the content language has variants, try to retrieve fallback results
95 $fallbackLimit = $limit - count( $searches );
96 if ( $fallbackLimit > 0 ) {
97 global $wgContLang;
98
99 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
100 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), array( $search ) );
101
102 foreach ( $fallbackSearches as $fbs ) {
103 $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces );
104 $searches = array_merge( $searches, $fallbackSearchResult );
105 $fallbackLimit -= count( $fallbackSearchResult );
106
107 if ( $fallbackLimit == 0 ) {
108 break;
109 }
110 }
111 }
112 wfProfileOut( __METHOD__ );
113 return $searches;
114 }
115
116 /**
117 * When implemented in a descendant class, receives an array of Title objects and returns
118 * either an unmodified array or an array of strings corresponding to titles passed to it.
119 *
120 * @param array $titles
121 * @return array
122 */
123 abstract protected function titles( array $titles );
124
125 /**
126 * When implemented in a descendant class, receives an array of titles as strings and returns
127 * either an unmodified array or an array of Title objects corresponding to strings received.
128 *
129 * @param array $strings
130 *
131 * @return array
132 */
133 abstract protected function strings( array $strings );
134
135 /**
136 * Do a prefix search of titles and return a list of matching page names.
137 * @param array $namespaces
138 * @param string $search
139 * @param int $limit
140 * @return array Array of strings
141 */
142 protected function searchBackend( $namespaces, $search, $limit ) {
143 if ( count( $namespaces ) == 1 ) {
144 $ns = $namespaces[0];
145 if ( $ns == NS_MEDIA ) {
146 $namespaces = array( NS_FILE );
147 } elseif ( $ns == NS_SPECIAL ) {
148 return $this->titles( $this->specialSearch( $search, $limit ) );
149 }
150 }
151 $srchres = array();
152 if ( wfRunHooks( 'PrefixSearchBackend', array( $namespaces, $search, $limit, &$srchres ) ) ) {
153 return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit ) );
154 } else {
155 // Default search backend does proper prefix searching, but custom backends
156 // may sort based on other algorythms that may cause the exact title match
157 // to not be in the results or be lower down the list.
158
159 // Pick namespace (based on PrefixSearch::defaultSearchBackend)
160 $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0];
161 $t = Title::newFromText( $search, $ns );
162 $string = $t->getPrefixedText();
163
164 $key = array_search( $string, $srchres );
165 if ( $key !== false ) {
166 // Move it to the front
167 $cut = array_splice( $srchres, $key, 1 );
168 array_unshift( $srchres, $cut[0] );
169 } elseif ( $t->exists() ) {
170 // Add it in front
171 array_unshift( $srchres, $string );
172
173 if ( count( $srchres ) > $limit ) {
174 array_pop( $srchres );
175 }
176 }
177 }
178
179 return $this->strings( $srchres );
180 }
181
182 /**
183 * Prefix search special-case for Special: namespace.
184 *
185 * @param string $search Term
186 * @param int $limit Max number of items to return
187 * @return array
188 */
189 protected function specialSearch( $search, $limit ) {
190 global $wgContLang;
191
192 $searchParts = explode( '/', $search, 2 );
193 $searchKey = $searchParts[0];
194 $subpageSearch = isset( $searchParts[1] ) ? $searchParts[1] : null;
195
196 // Handle subpage search separately.
197 if ( $subpageSearch !== null ) {
198 // Try matching the full search string as a page name
199 $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey );
200 if ( !$specialTitle ) {
201 return array();
202 }
203 $special = SpecialPageFactory::getPage( $specialTitle->getText() );
204 if ( $special ) {
205 $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit );
206 return array_map( function ( $sub ) use ( $specialTitle ) {
207 return $specialTitle->getSubpage( $sub );
208 }, $subpages );
209 } else {
210 return array();
211 }
212 }
213
214 # normalize searchKey, so aliases with spaces can be found - bug 25675
215 $searchKey = str_replace( ' ', '_', $searchKey );
216 $searchKey = $wgContLang->caseFold( $searchKey );
217
218 // Unlike SpecialPage itself, we want the canonical forms of both
219 // canonical and alias title forms...
220 $keys = array();
221 foreach ( SpecialPageFactory::getNames() as $page ) {
222 $keys[$wgContLang->caseFold( $page )] = $page;
223 }
224
225 foreach ( $wgContLang->getSpecialPageAliases() as $page => $aliases ) {
226 if ( !in_array( $page, SpecialPageFactory::getNames() ) ) {# bug 20885
227 continue;
228 }
229
230 foreach ( $aliases as $alias ) {
231 $keys[$wgContLang->caseFold( $alias )] = $alias;
232 }
233 }
234 ksort( $keys );
235
236 $srchres = array();
237 foreach ( $keys as $pageKey => $page ) {
238 if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) {
239 // bug 27671: Don't use SpecialPage::getTitleFor() here because it
240 // localizes its input leading to searches for e.g. Special:All
241 // returning Spezial:MediaWiki-Systemnachrichten and returning
242 // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de'
243 $srchres[] = Title::makeTitleSafe( NS_SPECIAL, $page );
244 }
245
246 if ( count( $srchres ) >= $limit ) {
247 break;
248 }
249 }
250
251 return $srchres;
252 }
253
254 /**
255 * Unless overridden by PrefixSearchBackend hook...
256 * This is case-sensitive (First character may
257 * be automatically capitalized by Title::secureAndSpit()
258 * later on depending on $wgCapitalLinks)
259 *
260 * @param array $namespaces Namespaces to search in
261 * @param string $search Term
262 * @param int $limit Max number of items to return
263 * @return array Array of Title objects
264 */
265 protected function defaultSearchBackend( $namespaces, $search, $limit ) {
266 $ns = array_shift( $namespaces ); // support only one namespace
267 if ( in_array( NS_MAIN, $namespaces ) ) {
268 $ns = NS_MAIN; // if searching on many always default to main
269 }
270
271 $t = Title::newFromText( $search, $ns );
272 $prefix = $t ? $t->getDBkey() : '';
273 $dbr = wfGetDB( DB_SLAVE );
274 $res = $dbr->select( 'page',
275 array( 'page_id', 'page_namespace', 'page_title' ),
276 array(
277 'page_namespace' => $ns,
278 'page_title ' . $dbr->buildLike( $prefix, $dbr->anyString() )
279 ),
280 __METHOD__,
281 array( 'LIMIT' => $limit, 'ORDER BY' => 'page_title' )
282 );
283 $srchres = array();
284 foreach ( $res as $row ) {
285 $srchres[] = Title::newFromRow( $row );
286 }
287 return $srchres;
288 }
289
290 /**
291 * Validate an array of numerical namespace indexes
292 *
293 * @param array $namespaces
294 * @return array (default: contains only NS_MAIN)
295 */
296 protected function validateNamespaces( $namespaces ) {
297 global $wgContLang;
298
299 // We will look at each given namespace against wgContLang namespaces
300 $validNamespaces = $wgContLang->getNamespaces();
301 if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) {
302 $valid = array();
303 foreach ( $namespaces as $ns ) {
304 if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) {
305 $valid[] = $ns;
306 }
307 }
308 if ( count( $valid ) > 0 ) {
309 return $valid;
310 }
311 }
312
313 return array( NS_MAIN );
314 }
315 }
316
317 /**
318 * Performs prefix search, returning Title objects
319 * @ingroup Search
320 */
321 class TitlePrefixSearch extends PrefixSearch {
322
323 protected function titles( array $titles ) {
324 return $titles;
325 }
326
327 protected function strings( array $strings ) {
328 $titles = array_map( 'Title::newFromText', $strings );
329 $lb = new LinkBatch( $titles );
330 $lb->setCaller( __METHOD__ );
331 $lb->execute();
332 return $titles;
333 }
334 }
335
336 /**
337 * Performs prefix search, returning strings
338 * @ingroup Search
339 */
340 class StringPrefixSearch extends PrefixSearch {
341
342 protected function titles( array $titles ) {
343 return array_map( function ( Title $t ) {
344 return $t->getPrefixedText();
345 }, $titles );
346 }
347
348 protected function strings( array $strings ) {
349 return $strings;
350 }
351 }