955313b3eb8365f0e6350c4478189d57ba63be5b
[lhc/web/wiklou.git] / includes / PrefixSearch.php
1 <?php
2 /**
3 * Prefix search of page names.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Handles searching prefixes of titles and finding any page
25 * names that match. Used largely by the OpenSearch implementation.
26 *
27 * @ingroup Search
28 */
29 abstract class PrefixSearch {
30 /**
31 * Do a prefix search of titles and return a list of matching page names.
32 * @deprecated Since 1.23, use TitlePrefixSearch or StringPrefixSearch classes
33 *
34 * @param string $search
35 * @param int $limit
36 * @param array $namespaces Used if query is not explicitly prefixed
37 * @return array Array of strings
38 */
39 public static function titleSearch( $search, $limit, $namespaces = array() ) {
40 $prefixSearch = new StringPrefixSearch;
41 return $prefixSearch->search( $search, $limit, $namespaces );
42 }
43
44 /**
45 * Do a prefix search of titles and return a list of matching page names.
46 *
47 * @param string $search
48 * @param int $limit
49 * @param array $namespaces Used if query is not explicitly prefixed
50 * @return array Array of strings or Title objects
51 */
52 public function search( $search, $limit, $namespaces = array() ) {
53 $search = trim( $search );
54 if ( $search == '' ) {
55 return array(); // Return empty result
56 }
57 $namespaces = $this->validateNamespaces( $namespaces );
58
59 // Find a Title which is not an interwiki and is in NS_MAIN
60 $title = Title::newFromText( $search );
61 if ( $title && !$title->isExternal() ) {
62 $ns = array( $title->getNamespace() );
63 $search = $title->getText();
64 if ( $ns[0] == NS_MAIN ) {
65 $ns = $namespaces; // no explicit prefix, use default namespaces
66 wfRunHooks( 'PrefixSearchExtractNamespace', array( &$ns, &$search ) );
67 }
68 return $this->searchBackend( $ns, $search, $limit );
69 }
70
71 // Is this a namespace prefix?
72 $title = Title::newFromText( $search . 'Dummy' );
73 if ( $title && $title->getText() == 'Dummy'
74 && $title->getNamespace() != NS_MAIN
75 && !$title->isExternal() )
76 {
77 $namespaces = array( $title->getNamespace() );
78 $search = '';
79 } else {
80 wfRunHooks( 'PrefixSearchExtractNamespace', array( &$namespaces, &$search ) );
81 }
82
83 return $this->searchBackend( $namespaces, $search, $limit );
84 }
85
86 /**
87 * Do a prefix search for all possible variants of the prefix
88 * @param string $search
89 * @param int $limit
90 * @param array $namespaces
91 *
92 * @return array
93 */
94 public function searchWithVariants( $search, $limit, array $namespaces ) {
95 wfProfileIn( __METHOD__ );
96 $searches = $this->search( $search, $limit, $namespaces );
97
98 // if the content language has variants, try to retrieve fallback results
99 $fallbackLimit = $limit - count( $searches );
100 if ( $fallbackLimit > 0 ) {
101 global $wgContLang;
102
103 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
104 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), array( $search ) );
105
106 foreach ( $fallbackSearches as $fbs ) {
107 $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces );
108 $searches = array_merge( $searches, $fallbackSearchResult );
109 $fallbackLimit -= count( $fallbackSearchResult );
110
111 if ( $fallbackLimit == 0 ) {
112 break;
113 }
114 }
115 }
116 wfProfileOut( __METHOD__ );
117 return $searches;
118 }
119
120 /**
121 * When implemented in a descendant class, receives an array of Title objects and returns
122 * either an unmodified array or an array of strings corresponding to titles passed to it.
123 *
124 * @param array $titles
125 * @return array
126 */
127 abstract protected function titles( array $titles );
128
129 /**
130 * When implemented in a descendant class, receives an array of titles as strings and returns
131 * either an unmodified array or an array of Title objects corresponding to strings received.
132 *
133 * @param array $strings
134 *
135 * @return array
136 */
137 abstract protected function strings( array $strings );
138
139 /**
140 * Do a prefix search of titles and return a list of matching page names.
141 * @param array $namespaces
142 * @param string $search
143 * @param int $limit
144 * @return array Array of strings
145 */
146 protected function searchBackend( $namespaces, $search, $limit ) {
147 if ( count( $namespaces ) == 1 ) {
148 $ns = $namespaces[0];
149 if ( $ns == NS_MEDIA ) {
150 $namespaces = array( NS_FILE );
151 } elseif ( $ns == NS_SPECIAL ) {
152 return $this->titles( $this->specialSearch( $search, $limit ) );
153 }
154 }
155 $srchres = array();
156 if ( wfRunHooks( 'PrefixSearchBackend', array( $namespaces, $search, $limit, &$srchres ) ) ) {
157 return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit ) );
158 }
159 return $this->strings( $this->handleResultFromHook( $srchres, $namespaces, $search, $limit ) );
160 }
161
162 /**
163 * Default search backend does proper prefix searching, but custom backends
164 * may sort based on other algorythms that may cause the exact title match
165 * to not be in the results or be lower down the list.
166 * @param array $srchres results from the hook
167 * @return array munged results from the hook
168 */
169 private function handleResultFromHook( $srchres, $namespaces, $search, $limit ) {
170 // Pick namespace (based on PrefixSearch::defaultSearchBackend)
171 $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0];
172 $t = Title::newFromText( $search, $ns );
173 if ( !$t || !$t->exists() ) {
174 // No exact match so just return the search results
175 return $srchres;
176 }
177 $string = $t->getPrefixedText();
178 $key = array_search( $string, $srchres );
179 if ( $key !== false ) {
180 // Exact match was in the results so just move it to the front
181 return $this->pullFront( $key, $srchres );
182 }
183 // Exact match not in the search results so check for some redirect handling cases
184 if ( $t->isRedirect() ) {
185 $target = $this->getRedirectTarget( $t );
186 $key = array_search( $target, $srchres );
187 if ( $key !== false ) {
188 // Exact match is a redirect to one of the returned matches so pull the
189 // returned match to the front. This might look odd but the alternative
190 // is to put the redirect in front and drop the match. The name of the
191 // found match is often more descriptive/better formed than the name of
192 // the redirec AND by definition they share a prefix. Hopefully this
193 // choice is less confusing and more helpful. But it might now be. But
194 // it is the choice we're going with for now.
195 return $this->pullFront( $key, $srchres );
196 }
197 $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
198 if ( isset( $redirectTargetsToRedirect[ $target ] ) ) {
199 // The exact match and something in the results list are both redirects
200 // to the same thing! In this case we'll pull the returned match to the
201 // top following the same logic above. Again, it might not be a perfect
202 // choice but it'll do.
203 return $this->pullFront( $redirectTargetsToRedirect[ $target ], $srchres );
204 }
205 } else {
206 $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
207 if ( isset( $redirectTargetsToRedirect[ $string ] ) ) {
208 // The exact match is the target of a redirect already in the results list so remove
209 // the redirect from the results list and push the exact match to the front
210 array_splice( $srchres, $redirectTargetsToRedirect[ $string ], 1 );
211 array_unshift( $srchres, $string );
212 return $srchres;
213 }
214 }
215
216 // Exact match is totally unique from the other results so just add it to the front
217 array_unshift( $srchres, $string );
218 // And roll one off the end if the results are too long
219 if ( count( $srchres ) > $limit ) {
220 array_pop( $srchres );
221 }
222 return $srchres;
223 }
224
225 /**
226 * @param Array(string) $titles as strings
227 * @return Array(string => int) redirect target prefixedText to index of title in titles
228 * that is a redirect to it.
229 */
230 private function redirectTargetsToRedirect( $titles ) {
231 $result = array();
232 foreach ( $titles as $key => $titleText ) {
233 $title = Title::newFromText( $titleText );
234 if ( !$title || !$title->isRedirect() ) {
235 continue;
236 }
237 $target = $this->getRedirectTarget( $title );
238 if ( !$target ) {
239 continue;
240 }
241 $result[ $target ] = $key;
242 }
243 return $result;
244 }
245
246 /**
247 * @param int $key key to pull to the front
248 * @return array $array with the item at $key pulled to the front
249 */
250 private function pullFront( $key, $array ) {
251 $cut = array_splice( $array, $key, 1 );
252 array_unshift( $array, $cut[0] );
253 return $array;
254 }
255
256 private function getRedirectTarget( $title ) {
257 $page = WikiPage::factory( $title );
258 if ( !$page->exists() ) {
259 return null;
260 }
261 return $page->getRedirectTarget()->getPrefixedText();
262 }
263
264 /**
265 * Prefix search special-case for Special: namespace.
266 *
267 * @param string $search Term
268 * @param int $limit Max number of items to return
269 * @return array
270 */
271 protected function specialSearch( $search, $limit ) {
272 global $wgContLang;
273
274 $searchParts = explode( '/', $search, 2 );
275 $searchKey = $searchParts[0];
276 $subpageSearch = isset( $searchParts[1] ) ? $searchParts[1] : null;
277
278 // Handle subpage search separately.
279 if ( $subpageSearch !== null ) {
280 // Try matching the full search string as a page name
281 $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey );
282 if ( !$specialTitle ) {
283 return array();
284 }
285 $special = SpecialPageFactory::getPage( $specialTitle->getText() );
286 if ( $special ) {
287 $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit );
288 return array_map( function ( $sub ) use ( $specialTitle ) {
289 return $specialTitle->getSubpage( $sub );
290 }, $subpages );
291 } else {
292 return array();
293 }
294 }
295
296 # normalize searchKey, so aliases with spaces can be found - bug 25675
297 $searchKey = str_replace( ' ', '_', $searchKey );
298 $searchKey = $wgContLang->caseFold( $searchKey );
299
300 // Unlike SpecialPage itself, we want the canonical forms of both
301 // canonical and alias title forms...
302 $keys = array();
303 foreach ( SpecialPageFactory::getNames() as $page ) {
304 $keys[$wgContLang->caseFold( $page )] = $page;
305 }
306
307 foreach ( $wgContLang->getSpecialPageAliases() as $page => $aliases ) {
308 if ( !in_array( $page, SpecialPageFactory::getNames() ) ) {# bug 20885
309 continue;
310 }
311
312 foreach ( $aliases as $alias ) {
313 $keys[$wgContLang->caseFold( $alias )] = $alias;
314 }
315 }
316 ksort( $keys );
317
318 $srchres = array();
319 foreach ( $keys as $pageKey => $page ) {
320 if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) {
321 // bug 27671: Don't use SpecialPage::getTitleFor() here because it
322 // localizes its input leading to searches for e.g. Special:All
323 // returning Spezial:MediaWiki-Systemnachrichten and returning
324 // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de'
325 $srchres[] = Title::makeTitleSafe( NS_SPECIAL, $page );
326 }
327
328 if ( count( $srchres ) >= $limit ) {
329 break;
330 }
331 }
332
333 return $srchres;
334 }
335
336 /**
337 * Unless overridden by PrefixSearchBackend hook...
338 * This is case-sensitive (First character may
339 * be automatically capitalized by Title::secureAndSpit()
340 * later on depending on $wgCapitalLinks)
341 *
342 * @param array $namespaces Namespaces to search in
343 * @param string $search Term
344 * @param int $limit Max number of items to return
345 * @return array Array of Title objects
346 */
347 protected function defaultSearchBackend( $namespaces, $search, $limit ) {
348 $ns = array_shift( $namespaces ); // support only one namespace
349 if ( in_array( NS_MAIN, $namespaces ) ) {
350 $ns = NS_MAIN; // if searching on many always default to main
351 }
352
353 $t = Title::newFromText( $search, $ns );
354 $prefix = $t ? $t->getDBkey() : '';
355 $dbr = wfGetDB( DB_SLAVE );
356 $res = $dbr->select( 'page',
357 array( 'page_id', 'page_namespace', 'page_title' ),
358 array(
359 'page_namespace' => $ns,
360 'page_title ' . $dbr->buildLike( $prefix, $dbr->anyString() )
361 ),
362 __METHOD__,
363 array( 'LIMIT' => $limit, 'ORDER BY' => 'page_title' )
364 );
365 $srchres = array();
366 foreach ( $res as $row ) {
367 $srchres[] = Title::newFromRow( $row );
368 }
369 return $srchres;
370 }
371
372 /**
373 * Validate an array of numerical namespace indexes
374 *
375 * @param array $namespaces
376 * @return array (default: contains only NS_MAIN)
377 */
378 protected function validateNamespaces( $namespaces ) {
379 global $wgContLang;
380
381 // We will look at each given namespace against wgContLang namespaces
382 $validNamespaces = $wgContLang->getNamespaces();
383 if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) {
384 $valid = array();
385 foreach ( $namespaces as $ns ) {
386 if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) {
387 $valid[] = $ns;
388 }
389 }
390 if ( count( $valid ) > 0 ) {
391 return $valid;
392 }
393 }
394
395 return array( NS_MAIN );
396 }
397 }
398
399 /**
400 * Performs prefix search, returning Title objects
401 * @ingroup Search
402 */
403 class TitlePrefixSearch extends PrefixSearch {
404
405 protected function titles( array $titles ) {
406 return $titles;
407 }
408
409 protected function strings( array $strings ) {
410 $titles = array_map( 'Title::newFromText', $strings );
411 $lb = new LinkBatch( $titles );
412 $lb->setCaller( __METHOD__ );
413 $lb->execute();
414 return $titles;
415 }
416 }
417
418 /**
419 * Performs prefix search, returning strings
420 * @ingroup Search
421 */
422 class StringPrefixSearch extends PrefixSearch {
423
424 protected function titles( array $titles ) {
425 return array_map( function ( Title $t ) {
426 return $t->getPrefixedText();
427 }, $titles );
428 }
429
430 protected function strings( array $strings ) {
431 return $strings;
432 }
433 }