Add hook to extract namespace in prefix search
[lhc/web/wiklou.git] / includes / PrefixSearch.php
1 <?php
2 /**
3 * Prefix search of page names.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Handles searching prefixes of titles and finding any page
25 * names that match. Used largely by the OpenSearch implementation.
26 *
27 * @ingroup Search
28 */
29 abstract class PrefixSearch {
30 /**
31 * Do a prefix search of titles and return a list of matching page names.
32 * @deprecated Since 1.23, use TitlePrefixSearch or StringPrefixSearch classes
33 *
34 * @param string $search
35 * @param int $limit
36 * @param array $namespaces Used if query is not explicitly prefixed
37 * @return array Array of strings
38 */
39 public static function titleSearch( $search, $limit, $namespaces = array() ) {
40 $prefixSearch = new StringPrefixSearch;
41 return $prefixSearch->search( $search, $limit, $namespaces );
42 }
43
44 /**
45 * Do a prefix search of titles and return a list of matching page names.
46 *
47 * @param string $search
48 * @param int $limit
49 * @param array $namespaces Used if query is not explicitly prefixed
50 * @return array Array of strings or Title objects
51 */
52 public function search( $search, $limit, $namespaces = array() ) {
53 $search = trim( $search );
54 if ( $search == '' ) {
55 return array(); // Return empty result
56 }
57 $namespaces = $this->validateNamespaces( $namespaces );
58
59 // Find a Title which is not an interwiki and is in NS_MAIN
60 $title = Title::newFromText( $search );
61 if ( $title && !$title->isExternal() ) {
62 $ns = array( $title->getNamespace() );
63 $search = $title->getText();
64 if ( $ns[0] == NS_MAIN ) {
65 $ns = $namespaces; // no explicit prefix, use default namespaces
66 wfRunHooks( 'PrefixSearchExtractNamespace', array( &$ns, &$search ) );
67 }
68 return $this->searchBackend( $ns, $search, $limit );
69 }
70
71 // Is this a namespace prefix?
72 $title = Title::newFromText( $search . 'Dummy' );
73 if ( $title && $title->getText() == 'Dummy'
74 && $title->getNamespace() != NS_MAIN
75 && !$title->isExternal() )
76 {
77 $namespaces = array( $title->getNamespace() );
78 $search = '';
79 } else {
80 wfRunHooks( 'PrefixSearchExtractNamespace', array( &$namespaces, &$search ) );
81 }
82
83 return $this->searchBackend( $namespaces, $search, $limit );
84 }
85
86 /**
87 * Do a prefix search for all possible variants of the prefix
88 * @param string $search
89 * @param int $limit
90 * @param array $namespaces
91 *
92 * @return array
93 */
94 public function searchWithVariants( $search, $limit, array $namespaces ) {
95 wfProfileIn( __METHOD__ );
96 $searches = $this->search( $search, $limit, $namespaces );
97
98 // if the content language has variants, try to retrieve fallback results
99 $fallbackLimit = $limit - count( $searches );
100 if ( $fallbackLimit > 0 ) {
101 global $wgContLang;
102
103 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
104 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), array( $search ) );
105
106 foreach ( $fallbackSearches as $fbs ) {
107 $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces );
108 $searches = array_merge( $searches, $fallbackSearchResult );
109 $fallbackLimit -= count( $fallbackSearchResult );
110
111 if ( $fallbackLimit == 0 ) {
112 break;
113 }
114 }
115 }
116 wfProfileOut( __METHOD__ );
117 return $searches;
118 }
119
120 /**
121 * When implemented in a descendant class, receives an array of Title objects and returns
122 * either an unmodified array or an array of strings corresponding to titles passed to it.
123 *
124 * @param array $titles
125 * @return array
126 */
127 abstract protected function titles( array $titles );
128
129 /**
130 * When implemented in a descendant class, receives an array of titles as strings and returns
131 * either an unmodified array or an array of Title objects corresponding to strings received.
132 *
133 * @param array $strings
134 *
135 * @return array
136 */
137 abstract protected function strings( array $strings );
138
139 /**
140 * Do a prefix search of titles and return a list of matching page names.
141 * @param array $namespaces
142 * @param string $search
143 * @param int $limit
144 * @return array Array of strings
145 */
146 protected function searchBackend( $namespaces, $search, $limit ) {
147 if ( count( $namespaces ) == 1 ) {
148 $ns = $namespaces[0];
149 if ( $ns == NS_MEDIA ) {
150 $namespaces = array( NS_FILE );
151 } elseif ( $ns == NS_SPECIAL ) {
152 return $this->titles( $this->specialSearch( $search, $limit ) );
153 }
154 }
155 $srchres = array();
156 if ( wfRunHooks( 'PrefixSearchBackend', array( $namespaces, $search, $limit, &$srchres ) ) ) {
157 return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit ) );
158 } else {
159 // Default search backend does proper prefix searching, but custom backends
160 // may sort based on other algorythms that may cause the exact title match
161 // to not be in the results or be lower down the list.
162
163 // Pick namespace (based on PrefixSearch::defaultSearchBackend)
164 $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0];
165 $t = Title::newFromText( $search, $ns );
166 if ( $t ) {
167 // If text is a valid title and is in the search results
168 $string = $t->getPrefixedText();
169 $key = array_search( $string, $srchres );
170 if ( $key !== false ) {
171 // Move it to the front
172 $cut = array_splice( $srchres, $key, 1 );
173 array_unshift( $srchres, $cut[0] );
174 } elseif ( $t->exists() ) {
175 // Add it in front
176 array_unshift( $srchres, $string );
177
178 if ( count( $srchres ) > $limit ) {
179 array_pop( $srchres );
180 }
181 }
182 }
183 }
184
185 return $this->strings( $srchres );
186 }
187
188 /**
189 * Prefix search special-case for Special: namespace.
190 *
191 * @param string $search Term
192 * @param int $limit Max number of items to return
193 * @return array
194 */
195 protected function specialSearch( $search, $limit ) {
196 global $wgContLang;
197
198 $searchParts = explode( '/', $search, 2 );
199 $searchKey = $searchParts[0];
200 $subpageSearch = isset( $searchParts[1] ) ? $searchParts[1] : null;
201
202 // Handle subpage search separately.
203 if ( $subpageSearch !== null ) {
204 // Try matching the full search string as a page name
205 $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey );
206 if ( !$specialTitle ) {
207 return array();
208 }
209 $special = SpecialPageFactory::getPage( $specialTitle->getText() );
210 if ( $special ) {
211 $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit );
212 return array_map( function ( $sub ) use ( $specialTitle ) {
213 return $specialTitle->getSubpage( $sub );
214 }, $subpages );
215 } else {
216 return array();
217 }
218 }
219
220 # normalize searchKey, so aliases with spaces can be found - bug 25675
221 $searchKey = str_replace( ' ', '_', $searchKey );
222 $searchKey = $wgContLang->caseFold( $searchKey );
223
224 // Unlike SpecialPage itself, we want the canonical forms of both
225 // canonical and alias title forms...
226 $keys = array();
227 foreach ( SpecialPageFactory::getNames() as $page ) {
228 $keys[$wgContLang->caseFold( $page )] = $page;
229 }
230
231 foreach ( $wgContLang->getSpecialPageAliases() as $page => $aliases ) {
232 if ( !in_array( $page, SpecialPageFactory::getNames() ) ) {# bug 20885
233 continue;
234 }
235
236 foreach ( $aliases as $alias ) {
237 $keys[$wgContLang->caseFold( $alias )] = $alias;
238 }
239 }
240 ksort( $keys );
241
242 $srchres = array();
243 foreach ( $keys as $pageKey => $page ) {
244 if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) {
245 // bug 27671: Don't use SpecialPage::getTitleFor() here because it
246 // localizes its input leading to searches for e.g. Special:All
247 // returning Spezial:MediaWiki-Systemnachrichten and returning
248 // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de'
249 $srchres[] = Title::makeTitleSafe( NS_SPECIAL, $page );
250 }
251
252 if ( count( $srchres ) >= $limit ) {
253 break;
254 }
255 }
256
257 return $srchres;
258 }
259
260 /**
261 * Unless overridden by PrefixSearchBackend hook...
262 * This is case-sensitive (First character may
263 * be automatically capitalized by Title::secureAndSpit()
264 * later on depending on $wgCapitalLinks)
265 *
266 * @param array $namespaces Namespaces to search in
267 * @param string $search Term
268 * @param int $limit Max number of items to return
269 * @return array Array of Title objects
270 */
271 protected function defaultSearchBackend( $namespaces, $search, $limit ) {
272 $ns = array_shift( $namespaces ); // support only one namespace
273 if ( in_array( NS_MAIN, $namespaces ) ) {
274 $ns = NS_MAIN; // if searching on many always default to main
275 }
276
277 $t = Title::newFromText( $search, $ns );
278 $prefix = $t ? $t->getDBkey() : '';
279 $dbr = wfGetDB( DB_SLAVE );
280 $res = $dbr->select( 'page',
281 array( 'page_id', 'page_namespace', 'page_title' ),
282 array(
283 'page_namespace' => $ns,
284 'page_title ' . $dbr->buildLike( $prefix, $dbr->anyString() )
285 ),
286 __METHOD__,
287 array( 'LIMIT' => $limit, 'ORDER BY' => 'page_title' )
288 );
289 $srchres = array();
290 foreach ( $res as $row ) {
291 $srchres[] = Title::newFromRow( $row );
292 }
293 return $srchres;
294 }
295
296 /**
297 * Validate an array of numerical namespace indexes
298 *
299 * @param array $namespaces
300 * @return array (default: contains only NS_MAIN)
301 */
302 protected function validateNamespaces( $namespaces ) {
303 global $wgContLang;
304
305 // We will look at each given namespace against wgContLang namespaces
306 $validNamespaces = $wgContLang->getNamespaces();
307 if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) {
308 $valid = array();
309 foreach ( $namespaces as $ns ) {
310 if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) {
311 $valid[] = $ns;
312 }
313 }
314 if ( count( $valid ) > 0 ) {
315 return $valid;
316 }
317 }
318
319 return array( NS_MAIN );
320 }
321 }
322
323 /**
324 * Performs prefix search, returning Title objects
325 * @ingroup Search
326 */
327 class TitlePrefixSearch extends PrefixSearch {
328
329 protected function titles( array $titles ) {
330 return $titles;
331 }
332
333 protected function strings( array $strings ) {
334 $titles = array_map( 'Title::newFromText', $strings );
335 $lb = new LinkBatch( $titles );
336 $lb->setCaller( __METHOD__ );
337 $lb->execute();
338 return $titles;
339 }
340 }
341
342 /**
343 * Performs prefix search, returning strings
344 * @ingroup Search
345 */
346 class StringPrefixSearch extends PrefixSearch {
347
348 protected function titles( array $titles ) {
349 return array_map( function ( Title $t ) {
350 return $t->getPrefixedText();
351 }, $titles );
352 }
353
354 protected function strings( array $strings ) {
355 return $strings;
356 }
357 }