Fix for bug 13004, in which the Postgres full-text search has too many results,
[lhc/web/wiklou.git] / includes / SearchEngine.php
1 <?php
2 /**
3 * Contain a class for special pages
4 * @addtogroup Search
5 */
6 class SearchEngine {
7 var $limit = 10;
8 var $offset = 0;
9 var $searchTerms = array();
10 var $namespaces = array( NS_MAIN );
11 var $showRedirects = false;
12
13 /**
14 * Perform a full text search query and return a result set.
15 * If title searches are not supported or disabled, return null.
16 *
17 * @param string $term - Raw search term
18 * @return SearchResultSet
19 * @access public
20 * @abstract
21 */
22 function searchText( $term ) {
23 return null;
24 }
25
26 /**
27 * Perform a title-only search query and return a result set.
28 * If title searches are not supported or disabled, return null.
29 *
30 * @param string $term - Raw search term
31 * @return SearchResultSet
32 * @access public
33 * @abstract
34 */
35 function searchTitle( $term ) {
36 return null;
37 }
38
39 /**
40 * If an exact title match can be find, or a very slightly close match,
41 * return the title. If no match, returns NULL.
42 *
43 * @param string $term
44 * @return Title
45 */
46 public static function getNearMatch( $searchterm ) {
47 global $wgContLang;
48
49 $allSearchTerms = array($searchterm);
50
51 if($wgContLang->hasVariants()){
52 $allSearchTerms = array_merge($allSearchTerms,$wgContLang->convertLinkToAllVariants($searchterm));
53 }
54
55 foreach($allSearchTerms as $term){
56
57 # Exact match? No need to look further.
58 $title = Title::newFromText( $term );
59 if (is_null($title))
60 return NULL;
61
62 if ( $title->getNamespace() == NS_SPECIAL || $title->exists() ) {
63 return $title;
64 }
65
66 # Now try all lower case (i.e. first letter capitalized)
67 #
68 $title = Title::newFromText( $wgContLang->lc( $term ) );
69 if ( $title->exists() ) {
70 return $title;
71 }
72
73 # Now try capitalized string
74 #
75 $title = Title::newFromText( $wgContLang->ucwords( $term ) );
76 if ( $title->exists() ) {
77 return $title;
78 }
79
80 # Now try all upper case
81 #
82 $title = Title::newFromText( $wgContLang->uc( $term ) );
83 if ( $title->exists() ) {
84 return $title;
85 }
86
87 # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
88 $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
89 if ( $title->exists() ) {
90 return $title;
91 }
92
93 global $wgCapitalLinks, $wgContLang;
94 if( !$wgCapitalLinks ) {
95 // Catch differs-by-first-letter-case-only
96 $title = Title::newFromText( $wgContLang->ucfirst( $term ) );
97 if ( $title->exists() ) {
98 return $title;
99 }
100 $title = Title::newFromText( $wgContLang->lcfirst( $term ) );
101 if ( $title->exists() ) {
102 return $title;
103 }
104 }
105
106 // Give hooks a chance at better match variants
107 $title = null;
108 if( !wfRunHooks( 'SearchGetNearMatch', array( $term, &$title ) ) ) {
109 return $title;
110 }
111 }
112
113 $title = Title::newFromText( $searchterm );
114
115 # Entering an IP address goes to the contributions page
116 if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
117 || User::isIP( trim( $searchterm ) ) ) {
118 return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
119 }
120
121
122 # Entering a user goes to the user page whether it's there or not
123 if ( $title->getNamespace() == NS_USER ) {
124 return $title;
125 }
126
127 # Go to images that exist even if there's no local page.
128 # There may have been a funny upload, or it may be on a shared
129 # file repository such as Wikimedia Commons.
130 if( $title->getNamespace() == NS_IMAGE ) {
131 $image = wfFindFile( $title );
132 if( $image ) {
133 return $title;
134 }
135 }
136
137 # MediaWiki namespace? Page may be "implied" if not customized.
138 # Just return it, with caps forced as the message system likes it.
139 if( $title->getNamespace() == NS_MEDIAWIKI ) {
140 return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( $title->getText() ) );
141 }
142
143 # Quoted term? Try without the quotes...
144 $matches = array();
145 if( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
146 return SearchEngine::getNearMatch( $matches[1] );
147 }
148
149 return NULL;
150 }
151
152 public static function legalSearchChars() {
153 return "A-Za-z_'0-9\\x80-\\xFF\\-";
154 }
155
156 /**
157 * Set the maximum number of results to return
158 * and how many to skip before returning the first.
159 *
160 * @param int $limit
161 * @param int $offset
162 * @access public
163 */
164 function setLimitOffset( $limit, $offset = 0 ) {
165 $this->limit = intval( $limit );
166 $this->offset = intval( $offset );
167 }
168
169 /**
170 * Set which namespaces the search should include.
171 * Give an array of namespace index numbers.
172 *
173 * @param array $namespaces
174 * @access public
175 */
176 function setNamespaces( $namespaces ) {
177 $this->namespaces = $namespaces;
178 }
179
180 /**
181 * Make a list of searchable namespaces and their canonical names.
182 * @return array
183 */
184 public static function searchableNamespaces() {
185 global $wgContLang;
186 $arr = array();
187 foreach( $wgContLang->getNamespaces() as $ns => $name ) {
188 if( $ns >= NS_MAIN ) {
189 $arr[$ns] = $name;
190 }
191 }
192 return $arr;
193 }
194
195 /**
196 * Return a 'cleaned up' search string
197 *
198 * @return string
199 * @access public
200 */
201 function filter( $text ) {
202 $lc = $this->legalSearchChars();
203 return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
204 }
205 /**
206 * Load up the appropriate search engine class for the currently
207 * active database backend, and return a configured instance.
208 *
209 * @return SearchEngine
210 */
211 public static function create() {
212 global $wgDBtype, $wgSearchType;
213 if( $wgSearchType ) {
214 $class = $wgSearchType;
215 } elseif( $wgDBtype == 'mysql' ) {
216 $class = 'SearchMySQL4';
217 } else if ( $wgDBtype == 'postgres' ) {
218 $class = 'SearchPostgres';
219 } else if ( $wgDBtype == 'oracle' ) {
220 $class = 'SearchOracle';
221 } else {
222 $class = 'SearchEngineDummy';
223 }
224 $search = new $class( wfGetDB( DB_SLAVE ) );
225 $search->setLimitOffset(0,0);
226 return $search;
227 }
228
229 /**
230 * Create or update the search index record for the given page.
231 * Title and text should be pre-processed.
232 *
233 * @param int $id
234 * @param string $title
235 * @param string $text
236 * @abstract
237 */
238 function update( $id, $title, $text ) {
239 // no-op
240 }
241
242 /**
243 * Update a search index record's title only.
244 * Title should be pre-processed.
245 *
246 * @param int $id
247 * @param string $title
248 * @abstract
249 */
250 function updateTitle( $id, $title ) {
251 // no-op
252 }
253 }
254
255
256 /**
257 * @addtogroup Search
258 */
259 class SearchResultSet {
260 /**
261 * Fetch an array of regular expression fragments for matching
262 * the search terms as parsed by this engine in a text extract.
263 *
264 * @return array
265 * @access public
266 * @abstract
267 */
268 function termMatches() {
269 return array();
270 }
271
272 function numRows() {
273 return 0;
274 }
275
276 /**
277 * Return true if results are included in this result set.
278 * @return bool
279 * @abstract
280 */
281 function hasResults() {
282 return false;
283 }
284
285 /**
286 * Some search modes return a total hit count for the query
287 * in the entire article database. This may include pages
288 * in namespaces that would not be matched on the given
289 * settings.
290 *
291 * Return null if no total hits number is supported.
292 *
293 * @return int
294 * @access public
295 */
296 function getTotalHits() {
297 return null;
298 }
299
300 /**
301 * Some search modes return a suggested alternate term if there are
302 * no exact hits. Returns true if there is one on this set.
303 *
304 * @return bool
305 * @access public
306 */
307 function hasSuggestion() {
308 return false;
309 }
310
311 /**
312 * Some search modes return a suggested alternate term if there are
313 * no exact hits. Check hasSuggestion() first.
314 *
315 * @return string
316 * @access public
317 */
318 function getSuggestion() {
319 return '';
320 }
321
322 /**
323 * Fetches next search result, or false.
324 * @return SearchResult
325 * @access public
326 * @abstract
327 */
328 function next() {
329 return false;
330 }
331
332 /**
333 * Frees the result set, if applicable.
334 * @ access public
335 */
336 function free() {
337 // ...
338 }
339 }
340
341
342 /**
343 * @addtogroup Search
344 */
345 class SearchResultTooMany {
346 ## Some search engines may bail out if too many matches are found
347 }
348
349
350 /**
351 * @addtogroup Search
352 */
353 class SearchResult {
354
355 function SearchResult( $row ) {
356 $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
357 }
358
359 /**
360 * @return Title
361 * @access public
362 */
363 function getTitle() {
364 return $this->mTitle;
365 }
366
367 /**
368 * @return double or null if not supported
369 */
370 function getScore() {
371 return null;
372 }
373 }
374
375 /**
376 * @addtogroup Search
377 */
378 class SearchEngineDummy {
379 function search( $term ) {
380 return null;
381 }
382 function setLimitOffset($l, $o) {}
383 function legalSearchChars() {}
384 function update() {}
385 function setnamespaces() {}
386 function searchtitle() {}
387 function searchtext() {}
388 }
389