query parser rewrite to get boolean operators to work
[lhc/web/wiklou.git] / includes / search / SearchOracle.php
1 <?php
2 # Copyright (C) 2004 Brion Vibber <brion@pobox.com>
3 # http://www.mediawiki.org/
4 #
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 # http://www.gnu.org/copyleft/gpl.html
19
20 /**
21 * @file
22 * @ingroup Search
23 */
24
25 /**
26 * Search engine hook base class for Oracle (ConText).
27 * @ingroup Search
28 */
29 class SearchOracle extends SearchEngine {
30 function __construct($db) {
31 $this->db = $db;
32 }
33
34 /**
35 * Perform a full text search query and return a result set.
36 *
37 * @param $term String: raw search term
38 * @return OracleSearchResultSet
39 */
40 function searchText( $term ) {
41 if ($term == '')
42 return new OracleSearchResultSet(false, '');
43
44 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), true)));
45 return new OracleSearchResultSet($resultSet, $this->searchTerms);
46 }
47
48 /**
49 * Perform a title-only search query and return a result set.
50 *
51 * @param $term String: raw search term
52 * @return ORacleSearchResultSet
53 */
54 function searchTitle($term) {
55 if ($term == '')
56 return new OracleSearchResultSet(false, '');
57
58 $resultSet = $this->db->resultObject($this->db->query($this->getQuery($this->filter($term), false)));
59 return new MySQLSearchResultSet($resultSet, $this->searchTerms);
60 }
61
62
63 /**
64 * Return a partial WHERE clause to exclude redirects, if so set
65 * @return String
66 */
67 function queryRedirect() {
68 if ($this->showRedirects) {
69 return '';
70 } else {
71 return 'AND page_is_redirect=0';
72 }
73 }
74
75 /**
76 * Return a partial WHERE clause to limit the search to the given namespaces
77 * @return String
78 */
79 function queryNamespaces() {
80 if( is_null($this->namespaces) )
81 return '';
82 if ( !count( $this->namespaces ) ) {
83 $namespaces = '0';
84 } else {
85 $namespaces = $this->db->makeList( $this->namespaces );
86 }
87 return 'AND page_namespace IN (' . $namespaces . ')';
88 }
89
90 /**
91 * Return a LIMIT clause to limit results on the query.
92 * @return String
93 */
94 function queryLimit($sql) {
95 return $this->db->limitResult($sql, $this->limit, $this->offset);
96 }
97
98 /**
99 * Does not do anything for generic search engine
100 * subclasses may define this though
101 * @return String
102 */
103 function queryRanking($filteredTerm, $fulltext) {
104 return ' ORDER BY score(1)';
105 }
106
107 /**
108 * Construct the full SQL query to do the search.
109 * The guts shoulds be constructed in queryMain()
110 * @param $filteredTerm String
111 * @param $fulltext Boolean
112 */
113 function getQuery( $filteredTerm, $fulltext ) {
114 return $this->queryLimit($this->queryMain($filteredTerm, $fulltext) . ' ' .
115 $this->queryRedirect() . ' ' .
116 $this->queryNamespaces() . ' ' .
117 $this->queryRanking( $filteredTerm, $fulltext ) . ' ');
118 }
119
120
121 /**
122 * Picks which field to index on, depending on what type of query.
123 * @param $fulltext Boolean
124 * @return String
125 */
126 function getIndexField($fulltext) {
127 return $fulltext ? 'si_text' : 'si_title';
128 }
129
130 /**
131 * Get the base part of the search query.
132 *
133 * @param $filteredTerm String
134 * @param $fulltext Boolean
135 * @return String
136 */
137 function queryMain( $filteredTerm, $fulltext ) {
138 $match = $this->parseQuery($filteredTerm, $fulltext);
139 $page = $this->db->tableName('page');
140 $searchindex = $this->db->tableName('searchindex');
141 return 'SELECT page_id, page_namespace, page_title ' .
142 "FROM $page,$searchindex " .
143 'WHERE page_id=si_page AND ' . $match;
144 }
145
146 /**
147 * Parse a user input search string, and return an SQL fragment to be used
148 * as part of a WHERE clause
149 */
150 function parseQuery($filteredText, $fulltext) {
151 global $wgContLang;
152 $lc = SearchEngine::legalSearchChars();
153 $this->searchTerms = array();
154
155 # FIXME: This doesn't handle parenthetical expressions.
156 $m = array();
157 $searchon = '';
158 if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
159 $filteredText, $m, PREG_SET_ORDER)) {
160 foreach($m as $terms) {
161 // Search terms in all variant forms, only
162 // apply on wiki with LanguageConverter
163 if(in_array($wgContLang->stripForSearch( $terms[2] ), $cc))
164 $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] );
165 if( is_array( $temp_terms )) {
166 $temp_terms = array_unique( array_values( $temp_terms ));
167 foreach( $temp_terms as $t ) {
168 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t );
169 }
170 }
171 else {
172 $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] );
173 }
174 if (!empty($terms[3])) {
175 $regexp = preg_quote( $terms[3], '/' );
176 if ($terms[4])
177 $regexp .= "[0-9A-Za-z_]+";
178 } else {
179 $regexp = preg_quote(str_replace('"', '', $terms[2]), '/');
180 }
181 $this->searchTerms[] = $regexp;
182 }
183 }
184
185
186 $searchon = $this->db->addQuotes(ltrim($searchon, ' &'));
187 $field = $this->getIndexField($fulltext);
188 return " CONTAINS($field, $searchon, 1) > 0 ";
189 }
190
191 private function escapeTerm($t) {
192 global $wgContLang;
193 $t = $wgContLang->stripForSearch($t);
194 $t = preg_replace('/^"(.*)"$/', '($1)', $t);
195 $t = preg_replace('/([-&|])/', '\\\\$1', $t);
196 return $t;
197 }
198 /**
199 * Create or update the search index record for the given page.
200 * Title and text should be pre-processed.
201 *
202 * @param $id Integer
203 * @param $title String
204 * @param $text String
205 */
206 function update($id, $title, $text) {
207 $dbw = wfGetDB(DB_MASTER);
208 $dbw->replace('searchindex',
209 array('si_page'),
210 array(
211 'si_page' => $id,
212 'si_title' => $title,
213 'si_text' => $text
214 ), 'SearchOracle::update' );
215 $dbw->query("CALL ctx_ddl.sync_index('si_text_idx')");
216 $dbw->query("CALL ctx_ddl.sync_index('si_title_idx')");
217 }
218
219 /**
220 * Update a search index record's title only.
221 * Title should be pre-processed.
222 *
223 * @param int $id
224 * @param string $title
225 */
226 function updateTitle($id, $title) {
227 $dbw = wfGetDB(DB_MASTER);
228
229 $dbw->update('searchindex',
230 array('si_title' => $title),
231 array('si_page' => $id),
232 'SearchOracle::updateTitle',
233 array());
234 }
235
236
237 public static function legalSearchChars() {
238 return "\"" . parent::legalSearchChars();
239 }
240 }
241
242 /**
243 * @ingroup Search
244 */
245 class OracleSearchResultSet extends SearchResultSet {
246
247 function __construct($resultSet, $terms) {
248 $this->mResultSet = $resultSet;
249 $this->mTerms = $terms;
250 }
251
252 function termMatches() {
253 return $this->mTerms;
254 }
255
256 function numRows() {
257 if ($this->mResultSet === false )
258 return 0;
259 else
260 return $this->mResultSet->numRows();
261 }
262
263 function next() {
264 if ($this->mResultSet === false )
265 return false;
266
267 $row = $this->mResultSet->fetchObject();
268 if ($row === false)
269 return false;
270 return new SearchResult($row);
271 }
272 }