Disable warning about direct text table access for now
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * See docs/magicword.txt.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\MediaWikiServices;
25
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
28 *
29 * @par Usage:
30 * @code
31 * if ( $magicWordFactory->get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
35 *
36 * Please avoid reading the data out of one of these objects and then writing
37 * special case code. If possible, add another match()-like function here.
38 *
39 * To add magic words in an extension, use $magicWords in a file listed in
40 * $wgExtensionMessagesFiles[].
41 *
42 * @par Example:
43 * @code
44 * $magicWords = [];
45 *
46 * $magicWords['en'] = [
47 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
48 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
49 * ];
50 * @endcode
51 *
52 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
53 * hook. Use string keys.
54 *
55 * @ingroup Parser
56 */
57 class MagicWord {
58 /**#@-*/
59
60 /** @var string */
61 public $mId;
62
63 /** @var string[] */
64 public $mSynonyms;
65
66 /** @var bool */
67 public $mCaseSensitive;
68
69 /** @var string */
70 private $mRegex = '';
71
72 /** @var string */
73 private $mRegexStart = '';
74
75 /** @var string */
76 private $mRegexStartToEnd = '';
77
78 /** @var string */
79 private $mBaseRegex = '';
80
81 /** @var string */
82 private $mVariableRegex = '';
83
84 /** @var string */
85 private $mVariableStartToEndRegex = '';
86
87 /** @var bool */
88 private $mModified = false;
89
90 /** @var bool */
91 private $mFound = false;
92
93 /**#@-*/
94
95 /**
96 * Create a new MagicWord object
97 *
98 * Use factory instead: MagicWordFactory::get
99 *
100 * @param string|null $id The internal name of the magic word
101 * @param string[]|string $syn synonyms for the magic word
102 * @param bool $cs If magic word is case sensitive
103 */
104 public function __construct( $id = null, $syn = [], $cs = false ) {
105 $this->mId = $id;
106 $this->mSynonyms = (array)$syn;
107 $this->mCaseSensitive = $cs;
108 }
109
110 /**
111 * Factory: creates an object representing an ID
112 *
113 * @param string $id The internal name of the magic word
114 *
115 * @return MagicWord
116 * @deprecated since 1.32, use MagicWordFactory::get
117 */
118 public static function get( $id ) {
119 return MediaWikiServices::getInstance()->getMagicWordFactory()->get( $id );
120 }
121
122 /**
123 * Get an array of parser variable IDs
124 *
125 * @return string[]
126 * @deprecated since 1.32, use MagicWordFactory::getVariableIDs
127 */
128 public static function getVariableIDs() {
129 return MediaWikiServices::getInstance()->getMagicWordFactory()->getVariableIDs();
130 }
131
132 /**
133 * Get an array of parser substitution modifier IDs
134 * @return string[]
135 * @deprecated since 1.32, use MagicWordFactory::getSubstIDs
136 */
137 public static function getSubstIDs() {
138 return MediaWikiServices::getInstance()->getMagicWordFactory()->getSubstIDs();
139 }
140
141 /**
142 * Allow external reads of TTL array
143 *
144 * @param string $id
145 * @return int
146 * @deprecated since 1.32, use MagicWordFactory::getCacheTTL
147 */
148 public static function getCacheTTL( $id ) {
149 return MediaWikiServices::getInstance()->getMagicWordFactory()->getCacheTTL( $id );
150 }
151
152 /**
153 * Get a MagicWordArray of double-underscore entities
154 *
155 * @return MagicWordArray
156 * @deprecated since 1.32, use MagicWordFactory::getDoubleUnderscoreArray
157 */
158 public static function getDoubleUnderscoreArray() {
159 return MediaWikiServices::getInstance()->getMagicWordFactory()->getDoubleUnderscoreArray();
160 }
161
162 /**
163 * Initialises this object with an ID
164 *
165 * @param string $id
166 * @throws MWException
167 */
168 public function load( $id ) {
169 global $wgContLang;
170 $this->mId = $id;
171 $wgContLang->getMagic( $this );
172 if ( !$this->mSynonyms ) {
173 $this->mSynonyms = [ 'brionmademeputthishere' ];
174 throw new MWException( "Error: invalid magic word '$id'" );
175 }
176 }
177
178 /**
179 * Preliminary initialisation
180 * @private
181 */
182 public function initRegex() {
183 // Sort the synonyms by length, descending, so that the longest synonym
184 // matches in precedence to the shortest
185 $synonyms = $this->mSynonyms;
186 usort( $synonyms, [ $this, 'compareStringLength' ] );
187
188 $escSyn = [];
189 foreach ( $synonyms as $synonym ) {
190 // In case a magic word contains /, like that's going to happen;)
191 $escSyn[] = preg_quote( $synonym, '/' );
192 }
193 $this->mBaseRegex = implode( '|', $escSyn );
194
195 $case = $this->mCaseSensitive ? '' : 'iu';
196 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
197 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
198 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
199 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
200 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
201 "/^(?:{$this->mBaseRegex})$/{$case}" );
202 }
203
204 /**
205 * A comparison function that returns -1, 0 or 1 depending on whether the
206 * first string is longer, the same length or shorter than the second
207 * string.
208 *
209 * @param string $s1
210 * @param string $s2
211 *
212 * @return int
213 */
214 public function compareStringLength( $s1, $s2 ) {
215 $l1 = strlen( $s1 );
216 $l2 = strlen( $s2 );
217 return $l2 <=> $l1; // descending
218 }
219
220 /**
221 * Gets a regex representing matching the word
222 *
223 * @return string
224 */
225 public function getRegex() {
226 if ( $this->mRegex == '' ) {
227 $this->initRegex();
228 }
229 return $this->mRegex;
230 }
231
232 /**
233 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
234 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
235 * the complete expression
236 *
237 * @return string
238 */
239 public function getRegexCase() {
240 if ( $this->mRegex === '' ) {
241 $this->initRegex();
242 }
243
244 return $this->mCaseSensitive ? '' : 'iu';
245 }
246
247 /**
248 * Gets a regex matching the word, if it is at the string start
249 *
250 * @return string
251 */
252 public function getRegexStart() {
253 if ( $this->mRegex == '' ) {
254 $this->initRegex();
255 }
256 return $this->mRegexStart;
257 }
258
259 /**
260 * Gets a regex matching the word from start to end of a string
261 *
262 * @return string
263 * @since 1.23
264 */
265 public function getRegexStartToEnd() {
266 if ( $this->mRegexStartToEnd == '' ) {
267 $this->initRegex();
268 }
269 return $this->mRegexStartToEnd;
270 }
271
272 /**
273 * regex without the slashes and what not
274 *
275 * @return string
276 */
277 public function getBaseRegex() {
278 if ( $this->mRegex == '' ) {
279 $this->initRegex();
280 }
281 return $this->mBaseRegex;
282 }
283
284 /**
285 * Returns true if the text contains the word
286 *
287 * @param string $text
288 *
289 * @return bool
290 */
291 public function match( $text ) {
292 return (bool)preg_match( $this->getRegex(), $text );
293 }
294
295 /**
296 * Returns true if the text starts with the word
297 *
298 * @param string $text
299 *
300 * @return bool
301 */
302 public function matchStart( $text ) {
303 return (bool)preg_match( $this->getRegexStart(), $text );
304 }
305
306 /**
307 * Returns true if the text matched the word
308 *
309 * @param string $text
310 *
311 * @return bool
312 * @since 1.23
313 */
314 public function matchStartToEnd( $text ) {
315 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
316 }
317
318 /**
319 * Returns NULL if there's no match, the value of $1 otherwise
320 * The return code is the matched string, if there's no variable
321 * part in the regex and the matched variable part ($1) if there
322 * is one.
323 *
324 * @param string $text
325 *
326 * @return string
327 */
328 public function matchVariableStartToEnd( $text ) {
329 $matches = [];
330 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
331 if ( $matchcount == 0 ) {
332 return null;
333 } else {
334 # multiple matched parts (variable match); some will be empty because of
335 # synonyms. The variable will be the second non-empty one so remove any
336 # blank elements and re-sort the indices.
337 # See also T8526
338
339 $matches = array_values( array_filter( $matches ) );
340
341 if ( count( $matches ) == 1 ) {
342 return $matches[0];
343 } else {
344 return $matches[1];
345 }
346 }
347 }
348
349 /**
350 * Returns true if the text matches the word, and alters the
351 * input string, removing all instances of the word
352 *
353 * @param string &$text
354 *
355 * @return bool
356 */
357 public function matchAndRemove( &$text ) {
358 $this->mFound = false;
359 $text = preg_replace_callback(
360 $this->getRegex(),
361 [ $this, 'pregRemoveAndRecord' ],
362 $text
363 );
364
365 return $this->mFound;
366 }
367
368 /**
369 * @param string &$text
370 * @return bool
371 */
372 public function matchStartAndRemove( &$text ) {
373 $this->mFound = false;
374 $text = preg_replace_callback(
375 $this->getRegexStart(),
376 [ $this, 'pregRemoveAndRecord' ],
377 $text
378 );
379
380 return $this->mFound;
381 }
382
383 /**
384 * Used in matchAndRemove()
385 *
386 * @return string
387 */
388 public function pregRemoveAndRecord() {
389 $this->mFound = true;
390 return '';
391 }
392
393 /**
394 * Replaces the word with something else
395 *
396 * @param string $replacement
397 * @param string $subject
398 * @param int $limit
399 *
400 * @return string
401 */
402 public function replace( $replacement, $subject, $limit = -1 ) {
403 $res = preg_replace(
404 $this->getRegex(),
405 StringUtils::escapeRegexReplacement( $replacement ),
406 $subject,
407 $limit
408 );
409 $this->mModified = $res !== $subject;
410 return $res;
411 }
412
413 /**
414 * Variable handling: {{SUBST:xxx}} style words
415 * Calls back a function to determine what to replace xxx with
416 * Input word must contain $1
417 *
418 * @param string $text
419 * @param callable $callback
420 *
421 * @return string
422 */
423 public function substituteCallback( $text, $callback ) {
424 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
425 $this->mModified = $res !== $text;
426 return $res;
427 }
428
429 /**
430 * Matches the word, where $1 is a wildcard
431 *
432 * @return string
433 */
434 public function getVariableRegex() {
435 if ( $this->mVariableRegex == '' ) {
436 $this->initRegex();
437 }
438 return $this->mVariableRegex;
439 }
440
441 /**
442 * Matches the entire string, where $1 is a wildcard
443 *
444 * @return string
445 */
446 public function getVariableStartToEndRegex() {
447 if ( $this->mVariableStartToEndRegex == '' ) {
448 $this->initRegex();
449 }
450 return $this->mVariableStartToEndRegex;
451 }
452
453 /**
454 * Accesses the synonym list directly
455 *
456 * @param int $i
457 *
458 * @return string
459 */
460 public function getSynonym( $i ) {
461 return $this->mSynonyms[$i];
462 }
463
464 /**
465 * @return string[]
466 */
467 public function getSynonyms() {
468 return $this->mSynonyms;
469 }
470
471 /**
472 * Returns true if the last call to replace() or substituteCallback()
473 * returned a modified text, otherwise false.
474 *
475 * @return bool
476 */
477 public function getWasModified() {
478 return $this->mModified;
479 }
480
481 /**
482 * Adds all the synonyms of this MagicWord to an array, to allow quick
483 * lookup in a list of magic words
484 *
485 * @param string[] &$array
486 * @param string $value
487 */
488 public function addToArray( &$array, $value ) {
489 global $wgContLang;
490 foreach ( $this->mSynonyms as $syn ) {
491 $array[$wgContLang->lc( $syn )] = $value;
492 }
493 }
494
495 /**
496 * @return bool
497 */
498 public function isCaseSensitive() {
499 return $this->mCaseSensitive;
500 }
501
502 /**
503 * @return string
504 */
505 public function getId() {
506 return $this->mId;
507 }
508 }