Merge "Add support for blacklisting common passwords"
[lhc/web/wiklou.git] / includes / PrefixSearch.php
1 <?php
2 /**
3 * Prefix search of page names.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Handles searching prefixes of titles and finding any page
25 * names that match. Used largely by the OpenSearch implementation.
26 *
27 * @ingroup Search
28 */
29 abstract class PrefixSearch {
30 /**
31 * Do a prefix search of titles and return a list of matching page names.
32 * @deprecated Since 1.23, use TitlePrefixSearch or StringPrefixSearch classes
33 *
34 * @param string $search
35 * @param int $limit
36 * @param array $namespaces Used if query is not explicitly prefixed
37 * @param int $offset How many results to offset from the beginning
38 * @return array Array of strings
39 */
40 public static function titleSearch( $search, $limit, $namespaces = array(), $offset = 0 ) {
41 $prefixSearch = new StringPrefixSearch;
42 return $prefixSearch->search( $search, $limit, $namespaces, $offset );
43 }
44
45 /**
46 * Do a prefix search of titles and return a list of matching page names.
47 *
48 * @param string $search
49 * @param int $limit
50 * @param array $namespaces Used if query is not explicitly prefixed
51 * @param int $offset How many results to offset from the beginning
52 * @return array Array of strings or Title objects
53 */
54 public function search( $search, $limit, $namespaces = array(), $offset = 0 ) {
55 $search = trim( $search );
56 if ( $search == '' ) {
57 return array(); // Return empty result
58 }
59 $namespaces = $this->validateNamespaces( $namespaces );
60
61 // Find a Title which is not an interwiki and is in NS_MAIN
62 $title = Title::newFromText( $search );
63 if ( $title && !$title->isExternal() ) {
64 $ns = array( $title->getNamespace() );
65 $search = $title->getText();
66 if ( $ns[0] == NS_MAIN ) {
67 $ns = $namespaces; // no explicit prefix, use default namespaces
68 Hooks::run( 'PrefixSearchExtractNamespace', array( &$ns, &$search ) );
69 }
70 return $this->searchBackend( $ns, $search, $limit, $offset );
71 }
72
73 // Is this a namespace prefix?
74 $title = Title::newFromText( $search . 'Dummy' );
75 if ( $title && $title->getText() == 'Dummy'
76 && $title->getNamespace() != NS_MAIN
77 && !$title->isExternal() )
78 {
79 $namespaces = array( $title->getNamespace() );
80 $search = '';
81 } else {
82 Hooks::run( 'PrefixSearchExtractNamespace', array( &$namespaces, &$search ) );
83 }
84
85 return $this->searchBackend( $namespaces, $search, $limit, $offset );
86 }
87
88 /**
89 * Do a prefix search for all possible variants of the prefix
90 * @param string $search
91 * @param int $limit
92 * @param array $namespaces
93 * @param int $offset How many results to offset from the beginning
94 *
95 * @return array
96 */
97 public function searchWithVariants( $search, $limit, array $namespaces, $offset = 0 ) {
98 $searches = $this->search( $search, $limit, $namespaces, $offset );
99
100 // if the content language has variants, try to retrieve fallback results
101 $fallbackLimit = $limit - count( $searches );
102 if ( $fallbackLimit > 0 ) {
103 global $wgContLang;
104
105 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
106 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), array( $search ) );
107
108 foreach ( $fallbackSearches as $fbs ) {
109 $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces );
110 $searches = array_merge( $searches, $fallbackSearchResult );
111 $fallbackLimit -= count( $fallbackSearchResult );
112
113 if ( $fallbackLimit == 0 ) {
114 break;
115 }
116 }
117 }
118 return $searches;
119 }
120
121 /**
122 * When implemented in a descendant class, receives an array of Title objects and returns
123 * either an unmodified array or an array of strings corresponding to titles passed to it.
124 *
125 * @param array $titles
126 * @return array
127 */
128 abstract protected function titles( array $titles );
129
130 /**
131 * When implemented in a descendant class, receives an array of titles as strings and returns
132 * either an unmodified array or an array of Title objects corresponding to strings received.
133 *
134 * @param array $strings
135 *
136 * @return array
137 */
138 abstract protected function strings( array $strings );
139
140 /**
141 * Do a prefix search of titles and return a list of matching page names.
142 * @param array $namespaces
143 * @param string $search
144 * @param int $limit
145 * @param int $offset How many results to offset from the beginning
146 * @return array Array of strings
147 */
148 protected function searchBackend( $namespaces, $search, $limit, $offset ) {
149 if ( count( $namespaces ) == 1 ) {
150 $ns = $namespaces[0];
151 if ( $ns == NS_MEDIA ) {
152 $namespaces = array( NS_FILE );
153 } elseif ( $ns == NS_SPECIAL ) {
154 return $this->titles( $this->specialSearch( $search, $limit, $offset ) );
155 }
156 }
157 $srchres = array();
158 if ( Hooks::run(
159 'PrefixSearchBackend',
160 array( $namespaces, $search, $limit, &$srchres, $offset )
161 ) ) {
162 return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit, $offset ) );
163 }
164 return $this->strings( $this->handleResultFromHook( $srchres, $namespaces, $search, $limit ) );
165 }
166
167 private function handleResultFromHook( $srchres, $namespaces, $search, $limit ) {
168 $rescorer = new SearchExactMatchRescorer();
169 return $rescorer->rescore( $search, $namespaces, $srchres, $limit );
170 }
171
172 /**
173 * Prefix search special-case for Special: namespace.
174 *
175 * @param string $search Term
176 * @param int $limit Max number of items to return
177 * @param int $offset Number of items to offset
178 * @return array
179 */
180 protected function specialSearch( $search, $limit, $offset ) {
181 global $wgContLang;
182
183 $searchParts = explode( '/', $search, 2 );
184 $searchKey = $searchParts[0];
185 $subpageSearch = isset( $searchParts[1] ) ? $searchParts[1] : null;
186
187 // Handle subpage search separately.
188 if ( $subpageSearch !== null ) {
189 // Try matching the full search string as a page name
190 $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey );
191 if ( !$specialTitle ) {
192 return array();
193 }
194 $special = SpecialPageFactory::getPage( $specialTitle->getText() );
195 if ( $special ) {
196 $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit, $offset );
197 return array_map( function ( $sub ) use ( $specialTitle ) {
198 return $specialTitle->getSubpage( $sub );
199 }, $subpages );
200 } else {
201 return array();
202 }
203 }
204
205 # normalize searchKey, so aliases with spaces can be found - bug 25675
206 $searchKey = str_replace( ' ', '_', $searchKey );
207 $searchKey = $wgContLang->caseFold( $searchKey );
208
209 // Unlike SpecialPage itself, we want the canonical forms of both
210 // canonical and alias title forms...
211 $keys = array();
212 foreach ( SpecialPageFactory::getNames() as $page ) {
213 $keys[$wgContLang->caseFold( $page )] = $page;
214 }
215
216 foreach ( $wgContLang->getSpecialPageAliases() as $page => $aliases ) {
217 if ( !in_array( $page, SpecialPageFactory::getNames() ) ) {# bug 20885
218 continue;
219 }
220
221 foreach ( $aliases as $alias ) {
222 $keys[$wgContLang->caseFold( $alias )] = $alias;
223 }
224 }
225 ksort( $keys );
226
227 $srchres = array();
228 $skipped = 0;
229 foreach ( $keys as $pageKey => $page ) {
230 if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) {
231 // bug 27671: Don't use SpecialPage::getTitleFor() here because it
232 // localizes its input leading to searches for e.g. Special:All
233 // returning Spezial:MediaWiki-Systemnachrichten and returning
234 // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de'
235 if ( $offset > 0 && $skipped < $offset ) {
236 $skipped++;
237 continue;
238 }
239 $srchres[] = Title::makeTitleSafe( NS_SPECIAL, $page );
240 }
241
242 if ( count( $srchres ) >= $limit ) {
243 break;
244 }
245 }
246
247 return $srchres;
248 }
249
250 /**
251 * Unless overridden by PrefixSearchBackend hook...
252 * This is case-sensitive (First character may
253 * be automatically capitalized by Title::secureAndSpit()
254 * later on depending on $wgCapitalLinks)
255 *
256 * @param array $namespaces Namespaces to search in
257 * @param string $search Term
258 * @param int $limit Max number of items to return
259 * @param int $offset Number of items to skip
260 * @return array Array of Title objects
261 */
262 protected function defaultSearchBackend( $namespaces, $search, $limit, $offset ) {
263 $ns = array_shift( $namespaces ); // support only one namespace
264 if ( in_array( NS_MAIN, $namespaces ) ) {
265 $ns = NS_MAIN; // if searching on many always default to main
266 }
267
268 $t = Title::newFromText( $search, $ns );
269
270 $prefix = $t ? $t->getDBkey() : '';
271 $dbr = wfGetDB( DB_SLAVE );
272 $res = $dbr->select( 'page',
273 array( 'page_id', 'page_namespace', 'page_title' ),
274 array(
275 'page_namespace' => $ns,
276 'page_title ' . $dbr->buildLike( $prefix, $dbr->anyString() )
277 ),
278 __METHOD__,
279 array(
280 'LIMIT' => $limit,
281 'ORDER BY' => 'page_title',
282 'OFFSET' => $offset
283 )
284 );
285 $srchres = array();
286 foreach ( $res as $row ) {
287 $srchres[] = Title::newFromRow( $row );
288 }
289 return $srchres;
290 }
291
292 /**
293 * Validate an array of numerical namespace indexes
294 *
295 * @param array $namespaces
296 * @return array (default: contains only NS_MAIN)
297 */
298 protected function validateNamespaces( $namespaces ) {
299 global $wgContLang;
300
301 // We will look at each given namespace against wgContLang namespaces
302 $validNamespaces = $wgContLang->getNamespaces();
303 if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) {
304 $valid = array();
305 foreach ( $namespaces as $ns ) {
306 if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) {
307 $valid[] = $ns;
308 }
309 }
310 if ( count( $valid ) > 0 ) {
311 return $valid;
312 }
313 }
314
315 return array( NS_MAIN );
316 }
317 }
318
319 /**
320 * Performs prefix search, returning Title objects
321 * @ingroup Search
322 */
323 class TitlePrefixSearch extends PrefixSearch {
324
325 protected function titles( array $titles ) {
326 return $titles;
327 }
328
329 protected function strings( array $strings ) {
330 $titles = array_map( 'Title::newFromText', $strings );
331 $lb = new LinkBatch( $titles );
332 $lb->setCaller( __METHOD__ );
333 $lb->execute();
334 return $titles;
335 }
336 }
337
338 /**
339 * Performs prefix search, returning strings
340 * @ingroup Search
341 */
342 class StringPrefixSearch extends PrefixSearch {
343
344 protected function titles( array $titles ) {
345 return array_map( function ( Title $t ) {
346 return $t->getPrefixedText();
347 }, $titles );
348 }
349
350 protected function strings( array $strings ) {
351 return $strings;
352 }
353 }