Merge "Upstream EasyDeflate library from VisualEditor"
[lhc/web/wiklou.git] / includes / PrefixSearch.php
1 <?php
2 /**
3 * Prefix search of page names.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * Handles searching prefixes of titles and finding any page
25 * names that match. Used largely by the OpenSearch implementation.
26 * @deprecated Since 1.27, Use SearchEngine::defaultPrefixSearch or SearchEngine::completionSearch
27 *
28 * @ingroup Search
29 */
30 abstract class PrefixSearch {
31 /**
32 * Do a prefix search of titles and return a list of matching page names.
33 * @deprecated Since 1.23, use TitlePrefixSearch or StringPrefixSearch classes
34 *
35 * @param string $search
36 * @param int $limit
37 * @param array $namespaces Used if query is not explicitly prefixed
38 * @param int $offset How many results to offset from the beginning
39 * @return array Array of strings
40 */
41 public static function titleSearch( $search, $limit, $namespaces = [], $offset = 0 ) {
42 $prefixSearch = new StringPrefixSearch;
43 return $prefixSearch->search( $search, $limit, $namespaces, $offset );
44 }
45
46 /**
47 * Do a prefix search of titles and return a list of matching page names.
48 *
49 * @param string $search
50 * @param int $limit
51 * @param array $namespaces Used if query is not explicitly prefixed
52 * @param int $offset How many results to offset from the beginning
53 * @return array Array of strings or Title objects
54 */
55 public function search( $search, $limit, $namespaces = [], $offset = 0 ) {
56 $search = trim( $search );
57 if ( $search == '' ) {
58 return []; // Return empty result
59 }
60
61 $hasNamespace = SearchEngine::parseNamespacePrefixes( $search, false, true );
62 if ( $hasNamespace !== false ) {
63 list( $search, $namespaces ) = $hasNamespace;
64 }
65
66 return $this->searchBackend( $namespaces, $search, $limit, $offset );
67 }
68
69 /**
70 * Do a prefix search for all possible variants of the prefix
71 * @param string $search
72 * @param int $limit
73 * @param array $namespaces
74 * @param int $offset How many results to offset from the beginning
75 *
76 * @return array
77 */
78 public function searchWithVariants( $search, $limit, array $namespaces, $offset = 0 ) {
79 $searches = $this->search( $search, $limit, $namespaces, $offset );
80
81 // if the content language has variants, try to retrieve fallback results
82 $fallbackLimit = $limit - count( $searches );
83 if ( $fallbackLimit > 0 ) {
84 global $wgContLang;
85
86 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
87 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
88
89 foreach ( $fallbackSearches as $fbs ) {
90 $fallbackSearchResult = $this->search( $fbs, $fallbackLimit, $namespaces );
91 $searches = array_merge( $searches, $fallbackSearchResult );
92 $fallbackLimit -= count( $fallbackSearchResult );
93
94 if ( $fallbackLimit == 0 ) {
95 break;
96 }
97 }
98 }
99 return $searches;
100 }
101
102 /**
103 * When implemented in a descendant class, receives an array of Title objects and returns
104 * either an unmodified array or an array of strings corresponding to titles passed to it.
105 *
106 * @param array $titles
107 * @return array
108 */
109 abstract protected function titles( array $titles );
110
111 /**
112 * When implemented in a descendant class, receives an array of titles as strings and returns
113 * either an unmodified array or an array of Title objects corresponding to strings received.
114 *
115 * @param array $strings
116 *
117 * @return array
118 */
119 abstract protected function strings( array $strings );
120
121 /**
122 * Do a prefix search of titles and return a list of matching page names.
123 * @param array $namespaces
124 * @param string $search
125 * @param int $limit
126 * @param int $offset How many results to offset from the beginning
127 * @return array Array of strings
128 */
129 protected function searchBackend( $namespaces, $search, $limit, $offset ) {
130 if ( count( $namespaces ) == 1 ) {
131 $ns = $namespaces[0];
132 if ( $ns == NS_MEDIA ) {
133 $namespaces = [ NS_FILE ];
134 } elseif ( $ns == NS_SPECIAL ) {
135 return $this->titles( $this->specialSearch( $search, $limit, $offset ) );
136 }
137 }
138 $srchres = [];
139 if ( Hooks::run(
140 'PrefixSearchBackend',
141 [ $namespaces, $search, $limit, &$srchres, $offset ]
142 ) ) {
143 return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit, $offset ) );
144 }
145 return $this->strings(
146 $this->handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) );
147 }
148
149 private function handleResultFromHook( $srchres, $namespaces, $search, $limit, $offset ) {
150 if ( $offset === 0 ) {
151 // Only perform exact db match if offset === 0
152 // This is still far from perfect but at least we avoid returning the
153 // same title afain and again when the user is scrolling with a query
154 // that matches a title in the db.
155 $rescorer = new SearchExactMatchRescorer();
156 $srchres = $rescorer->rescore( $search, $namespaces, $srchres, $limit );
157 }
158 return $srchres;
159 }
160
161 /**
162 * Prefix search special-case for Special: namespace.
163 *
164 * @param string $search Term
165 * @param int $limit Max number of items to return
166 * @param int $offset Number of items to offset
167 * @return array
168 */
169 protected function specialSearch( $search, $limit, $offset ) {
170 global $wgContLang;
171
172 $searchParts = explode( '/', $search, 2 );
173 $searchKey = $searchParts[0];
174 $subpageSearch = $searchParts[1] ?? null;
175
176 // Handle subpage search separately.
177 if ( $subpageSearch !== null ) {
178 // Try matching the full search string as a page name
179 $specialTitle = Title::makeTitleSafe( NS_SPECIAL, $searchKey );
180 if ( !$specialTitle ) {
181 return [];
182 }
183 $special = SpecialPageFactory::getPage( $specialTitle->getText() );
184 if ( $special ) {
185 $subpages = $special->prefixSearchSubpages( $subpageSearch, $limit, $offset );
186 return array_map( function ( $sub ) use ( $specialTitle ) {
187 return $specialTitle->getSubpage( $sub );
188 }, $subpages );
189 } else {
190 return [];
191 }
192 }
193
194 # normalize searchKey, so aliases with spaces can be found - T27675
195 $searchKey = str_replace( ' ', '_', $searchKey );
196 $searchKey = $wgContLang->caseFold( $searchKey );
197
198 // Unlike SpecialPage itself, we want the canonical forms of both
199 // canonical and alias title forms...
200 $keys = [];
201 foreach ( SpecialPageFactory::getNames() as $page ) {
202 $keys[$wgContLang->caseFold( $page )] = [ 'page' => $page, 'rank' => 0 ];
203 }
204
205 foreach ( $wgContLang->getSpecialPageAliases() as $page => $aliases ) {
206 if ( !in_array( $page, SpecialPageFactory::getNames() ) ) {# T22885
207 continue;
208 }
209
210 foreach ( $aliases as $key => $alias ) {
211 $keys[$wgContLang->caseFold( $alias )] = [ 'page' => $alias, 'rank' => $key ];
212 }
213 }
214 ksort( $keys );
215
216 $matches = [];
217 foreach ( $keys as $pageKey => $page ) {
218 if ( $searchKey === '' || strpos( $pageKey, $searchKey ) === 0 ) {
219 // T29671: Don't use SpecialPage::getTitleFor() here because it
220 // localizes its input leading to searches for e.g. Special:All
221 // returning Spezial:MediaWiki-Systemnachrichten and returning
222 // Spezial:Alle_Seiten twice when $wgLanguageCode == 'de'
223 $matches[$page['rank']][] = Title::makeTitleSafe( NS_SPECIAL, $page['page'] );
224
225 if ( isset( $matches[0] ) && count( $matches[0] ) >= $limit + $offset ) {
226 // We have enough items in primary rank, no use to continue
227 break;
228 }
229 }
230
231 }
232
233 // Ensure keys are in order
234 ksort( $matches );
235 // Flatten the array
236 $matches = array_reduce( $matches, 'array_merge', [] );
237
238 return array_slice( $matches, $offset, $limit );
239 }
240
241 /**
242 * Unless overridden by PrefixSearchBackend hook...
243 * This is case-sensitive (First character may
244 * be automatically capitalized by Title::secureAndSpit()
245 * later on depending on $wgCapitalLinks)
246 *
247 * @param array|null $namespaces Namespaces to search in
248 * @param string $search Term
249 * @param int $limit Max number of items to return
250 * @param int $offset Number of items to skip
251 * @return Title[] Array of Title objects
252 */
253 public function defaultSearchBackend( $namespaces, $search, $limit, $offset ) {
254 // Backwards compatability with old code. Default to NS_MAIN if no namespaces provided.
255 if ( $namespaces === null ) {
256 $namespaces = [];
257 }
258 if ( !$namespaces ) {
259 $namespaces[] = NS_MAIN;
260 }
261
262 // Construct suitable prefix for each namespace. They differ in cases where
263 // some namespaces always capitalize and some don't.
264 $prefixes = [];
265 foreach ( $namespaces as $namespace ) {
266 // For now, if special is included, ignore the other namespaces
267 if ( $namespace == NS_SPECIAL ) {
268 return $this->specialSearch( $search, $limit, $offset );
269 }
270
271 $title = Title::makeTitleSafe( $namespace, $search );
272 // Why does the prefix default to empty?
273 $prefix = $title ? $title->getDBkey() : '';
274 $prefixes[$prefix][] = $namespace;
275 }
276
277 $dbr = wfGetDB( DB_REPLICA );
278 // Often there is only one prefix that applies to all requested namespaces,
279 // but sometimes there are two if some namespaces do not always capitalize.
280 $conds = [];
281 foreach ( $prefixes as $prefix => $namespaces ) {
282 $condition = [
283 'page_namespace' => $namespaces,
284 'page_title' . $dbr->buildLike( $prefix, $dbr->anyString() ),
285 ];
286 $conds[] = $dbr->makeList( $condition, LIST_AND );
287 }
288
289 $table = 'page';
290 $fields = [ 'page_id', 'page_namespace', 'page_title' ];
291 $conds = $dbr->makeList( $conds, LIST_OR );
292 $options = [
293 'LIMIT' => $limit,
294 'ORDER BY' => [ 'page_title', 'page_namespace' ],
295 'OFFSET' => $offset
296 ];
297
298 $res = $dbr->select( $table, $fields, $conds, __METHOD__, $options );
299
300 return iterator_to_array( TitleArray::newFromResult( $res ) );
301 }
302
303 /**
304 * Validate an array of numerical namespace indexes
305 *
306 * @param array $namespaces
307 * @return array (default: contains only NS_MAIN)
308 */
309 protected function validateNamespaces( $namespaces ) {
310 global $wgContLang;
311
312 // We will look at each given namespace against wgContLang namespaces
313 $validNamespaces = $wgContLang->getNamespaces();
314 if ( is_array( $namespaces ) && count( $namespaces ) > 0 ) {
315 $valid = [];
316 foreach ( $namespaces as $ns ) {
317 if ( is_numeric( $ns ) && array_key_exists( $ns, $validNamespaces ) ) {
318 $valid[] = $ns;
319 }
320 }
321 if ( count( $valid ) > 0 ) {
322 return $valid;
323 }
324 }
325
326 return [ NS_MAIN ];
327 }
328 }
329
330 /**
331 * Performs prefix search, returning Title objects
332 * @deprecated Since 1.27, Use SearchEngine::defaultPrefixSearch or SearchEngine::completionSearch
333 * @ingroup Search
334 */
335 class TitlePrefixSearch extends PrefixSearch {
336
337 protected function titles( array $titles ) {
338 return $titles;
339 }
340
341 protected function strings( array $strings ) {
342 $titles = array_map( 'Title::newFromText', $strings );
343 $lb = new LinkBatch( $titles );
344 $lb->setCaller( __METHOD__ );
345 $lb->execute();
346 return $titles;
347 }
348 }
349
350 /**
351 * Performs prefix search, returning strings
352 * @deprecated Since 1.27, Use SearchEngine::prefixSearchSubpages or SearchEngine::completionSearch
353 * @ingroup Search
354 */
355 class StringPrefixSearch extends PrefixSearch {
356
357 protected function titles( array $titles ) {
358 return array_map( function ( Title $t ) {
359 return $t->getPrefixedText();
360 }, $titles );
361 }
362
363 protected function strings( array $strings ) {
364 return $strings;
365 }
366 }