#3344: Redirected terms cannot be included in categories
[lhc/web/wiklou.git] / includes / SearchEngine.php
1 <?php
2 /**
3 * Contain a class for special pages
4 * @package MediaWiki
5 * @subpackage Search
6 */
7
8 /**
9 * @package MediaWiki
10 */
11 class SearchEngine {
12 var $limit = 10;
13 var $offset = 0;
14 var $searchTerms = array();
15 var $namespaces = array( 0 );
16 var $showRedirects = false;
17
18 /**
19 * Perform a full text search query and return a result set.
20 * If title searches are not supported or disabled, return null.
21 *
22 * @param string $term - Raw search term
23 * @return SearchResultSet
24 * @access public
25 * @abstract
26 */
27 function searchText( $term ) {
28 return null;
29 }
30
31 /**
32 * Perform a title-only search query and return a result set.
33 * If title searches are not supported or disabled, return null.
34 *
35 * @param string $term - Raw search term
36 * @return SearchResultSet
37 * @access public
38 * @abstract
39 */
40 function searchTitle( $term ) {
41 return null;
42 }
43
44 /**
45 * If an exact title match can be find, or a very slightly close match,
46 * return the title. If no match, returns NULL.
47 *
48 * @static
49 * @param string $term
50 * @return Title
51 * @access private
52 */
53 function getNearMatch( $term ) {
54 # Exact match? No need to look further.
55 $title = Title::newFromText( $term );
56 if (is_null($title))
57 return NULL;
58
59 if ( $title->getNamespace() == NS_SPECIAL || $title->exists() ) {
60 return $title;
61 }
62
63 # Now try all lower case (i.e. first letter capitalized)
64 #
65 $title = Title::newFromText( strtolower( $term ) );
66 if ( $title->exists() ) {
67 return $title;
68 }
69
70 # Now try capitalized string
71 #
72 $title = Title::newFromText( ucwords( strtolower( $term ) ) );
73 if ( $title->exists() ) {
74 return $title;
75 }
76
77 # Now try all upper case
78 #
79 $title = Title::newFromText( strtoupper( $term ) );
80 if ( $title->exists() ) {
81 return $title;
82 }
83
84 global $wgCapitalLinks, $wgContLang;
85 if( !$wgCapitalLinks ) {
86 // Catch differs-by-first-letter-case-only
87 $title = Title::newFromText( $wgContLang->ucfirst( $term ) );
88 if ( $title->exists() ) {
89 return $title;
90 }
91 $title = Title::newFromText( $wgContLang->lcfirst( $term ) );
92 if ( $title->exists() ) {
93 return $title;
94 }
95 }
96
97 $title = Title::newFromText( $term );
98
99 # Entering an IP address goes to the contributions page
100 if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
101 || User::isIP( trim( $term ) ) ) {
102 return Title::makeTitle( NS_SPECIAL, "Contributions/" . $title->getDbkey() );
103 }
104
105
106 # Entering a user goes to the user page whether it's there or not
107 if ( $title->getNamespace() == NS_USER ) {
108 return $title;
109 }
110
111 # Quoted term? Try without the quotes...
112 if( preg_match( '/^"([^"]+)"$/', $term, $matches ) ) {
113 return SearchEngine::getNearMatch( $matches[1] );
114 }
115
116 return NULL;
117 }
118
119 function legalSearchChars() {
120 return "A-Za-z_'0-9\\x80-\\xFF\\-";
121 }
122
123 /**
124 * Set the maximum number of results to return
125 * and how many to skip before returning the first.
126 *
127 * @param int $limit
128 * @param int $offset
129 * @access public
130 */
131 function setLimitOffset( $limit, $offset = 0 ) {
132 $this->limit = intval( $limit );
133 $this->offset = intval( $offset );
134 }
135
136 /**
137 * Set which namespaces the search should include.
138 * Give an array of namespace index numbers.
139 *
140 * @param array $namespaces
141 * @access public
142 */
143 function setNamespaces( $namespaces ) {
144 $this->namespaces = $namespaces;
145 }
146
147 /**
148 * Make a list of searchable namespaces and their canonical names.
149 * @return array
150 * @access public
151 */
152 function searchableNamespaces() {
153 global $wgContLang;
154 $arr = array();
155 foreach( $wgContLang->getNamespaces() as $ns => $name ) {
156 if( $ns >= NS_MAIN ) {
157 $arr[$ns] = $name;
158 }
159 }
160 return $arr;
161 }
162
163 /**
164 * Return a 'cleaned up' search string
165 *
166 * @return string
167 * @access public
168 */
169 function filter( $text ) {
170 $lc = $this->legalSearchChars();
171 return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
172 }
173 /**
174 * Load up the appropriate search engine class for the currently
175 * active database backend, and return a configured instance.
176 *
177 * @return SearchEngine
178 * @access private
179 */
180 function create() {
181 global $wgDBtype, $wgSearchType;
182 if( $wgSearchType ) {
183 $class = $wgSearchType;
184 } elseif( $wgDBtype == 'mysql' ) {
185 $class = 'SearchMySQL4';
186 require_once( 'SearchMySQL4.php' );
187 } else if ( $wgDBtype == 'PostgreSQL' ) {
188 $class = 'SearchTsearch2';
189 require_once( 'SearchTsearch2.php' );
190 } else {
191 $class = 'SearchEngineDummy';
192 }
193 $search = new $class( wfGetDB( DB_SLAVE ) );
194 $search->setLimitOffset(0,0);
195 return $search;
196 }
197
198 /**
199 * Create or update the search index record for the given page.
200 * Title and text should be pre-processed.
201 *
202 * @param int $id
203 * @param string $title
204 * @param string $text
205 * @abstract
206 */
207 function update( $id, $title, $text ) {
208 // no-op
209 }
210
211 /**
212 * Update a search index record's title only.
213 * Title should be pre-processed.
214 *
215 * @param int $id
216 * @param string $title
217 * @abstract
218 */
219 function updateTitle( $id, $title ) {
220 // no-op
221 }
222 }
223
224 /** @package MediaWiki */
225 class SearchResultSet {
226 /**
227 * Fetch an array of regular expression fragments for matching
228 * the search terms as parsed by this engine in a text extract.
229 *
230 * @return array
231 * @access public
232 * @abstract
233 */
234 function termMatches() {
235 return array();
236 }
237
238 function numRows() {
239 return 0;
240 }
241
242 /**
243 * Return true if results are included in this result set.
244 * @return bool
245 * @abstract
246 */
247 function hasResults() {
248 return false;
249 }
250
251 /**
252 * Some search modes return a total hit count for the query
253 * in the entire article database. This may include pages
254 * in namespaces that would not be matched on the given
255 * settings.
256 *
257 * Return null if no total hits number is supported.
258 *
259 * @return int
260 * @access public
261 */
262 function getTotalHits() {
263 return null;
264 }
265
266 /**
267 * Some search modes return a suggested alternate term if there are
268 * no exact hits. Returns true if there is one on this set.
269 *
270 * @return bool
271 * @access public
272 */
273 function hasSuggestion() {
274 return false;
275 }
276
277 /**
278 * Some search modes return a suggested alternate term if there are
279 * no exact hits. Check hasSuggestion() first.
280 *
281 * @return string
282 * @access public
283 */
284 function getSuggestion() {
285 return '';
286 }
287
288 /**
289 * Fetches next search result, or false.
290 * @return SearchResult
291 * @access public
292 * @abstract
293 */
294 function next() {
295 return false;
296 }
297 }
298
299 /** @package MediaWiki */
300 class SearchResult {
301 function SearchResult( $row ) {
302 $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
303 }
304
305 /**
306 * @return Title
307 * @access public
308 */
309 function getTitle() {
310 return $this->mTitle;
311 }
312
313 /**
314 * @return double or null if not supported
315 */
316 function getScore() {
317 return null;
318 }
319 }
320
321 /**
322 * @package MediaWiki
323 */
324 class SearchEngineDummy {
325 function search( $term ) {
326 return null;
327 }
328 function setLimitOffset($l, $o) {}
329 function legalSearchChars() {}
330 function update() {}
331 function setnamespaces() {}
332 function searchtitle() {}
333 function searchtext() {}
334 }
335