Remove HWLDFWordAccumulator, deprecated in 1.28
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * See docs/magicword.txt.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\MediaWikiServices;
25
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
28 *
29 * @par Usage:
30 * @code
31 * if ( $magicWordFactory->get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
35 *
36 * Please avoid reading the data out of one of these objects and then writing
37 * special case code. If possible, add another match()-like function here.
38 *
39 * To add magic words in an extension, use $magicWords in a file listed in
40 * $wgExtensionMessagesFiles[].
41 *
42 * @par Example:
43 * @code
44 * $magicWords = [];
45 *
46 * $magicWords['en'] = [
47 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
48 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
49 * ];
50 * @endcode
51 *
52 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
53 * hook. Use string keys.
54 *
55 * @ingroup Parser
56 */
57 class MagicWord {
58 /**#@-*/
59
60 /** @var string */
61 public $mId;
62
63 /** @var string[] */
64 public $mSynonyms;
65
66 /** @var bool */
67 public $mCaseSensitive;
68
69 /** @var string */
70 private $mRegex = '';
71
72 /** @var string */
73 private $mRegexStart = '';
74
75 /** @var string */
76 private $mRegexStartToEnd = '';
77
78 /** @var string */
79 private $mBaseRegex = '';
80
81 /** @var string */
82 private $mVariableRegex = '';
83
84 /** @var string */
85 private $mVariableStartToEndRegex = '';
86
87 /** @var bool */
88 private $mModified = false;
89
90 /** @var bool */
91 private $mFound = false;
92
93 /** @var Language */
94 private $contLang;
95
96 /**#@-*/
97
98 /**
99 * Create a new MagicWord object
100 *
101 * Use factory instead: MagicWordFactory::get
102 *
103 * @param string|null $id The internal name of the magic word
104 * @param string[]|string $syn synonyms for the magic word
105 * @param bool $cs If magic word is case sensitive
106 * @param Language|null $contLang Content language
107 */
108 public function __construct( $id = null, $syn = [], $cs = false, Language $contLang = null ) {
109 $this->mId = $id;
110 $this->mSynonyms = (array)$syn;
111 $this->mCaseSensitive = $cs;
112 $this->contLang = $contLang;
113
114 if ( !$contLang ) {
115 $this->contLang = MediaWikiServices::getInstance()->getContentLanguage();
116 }
117 }
118
119 /**
120 * Factory: creates an object representing an ID
121 *
122 * @param string $id The internal name of the magic word
123 *
124 * @return MagicWord
125 * @deprecated since 1.32, use MagicWordFactory::get
126 */
127 public static function get( $id ) {
128 wfDeprecated( __METHOD__, '1.32' );
129 return MediaWikiServices::getInstance()->getMagicWordFactory()->get( $id );
130 }
131
132 /**
133 * Get an array of parser variable IDs
134 *
135 * @return string[]
136 * @deprecated since 1.32, use MagicWordFactory::getVariableIDs
137 */
138 public static function getVariableIDs() {
139 wfDeprecated( __METHOD__, '1.32' );
140 return MediaWikiServices::getInstance()->getMagicWordFactory()->getVariableIDs();
141 }
142
143 /**
144 * Get an array of parser substitution modifier IDs
145 * @return string[]
146 * @deprecated since 1.32, use MagicWordFactory::getSubstIDs
147 */
148 public static function getSubstIDs() {
149 wfDeprecated( __METHOD__, '1.32' );
150 return MediaWikiServices::getInstance()->getMagicWordFactory()->getSubstIDs();
151 }
152
153 /**
154 * Allow external reads of TTL array
155 *
156 * @param string $id
157 * @return int
158 * @deprecated since 1.32, use MagicWordFactory::getCacheTTL
159 */
160 public static function getCacheTTL( $id ) {
161 wfDeprecated( __METHOD__, '1.32' );
162 return MediaWikiServices::getInstance()->getMagicWordFactory()->getCacheTTL( $id );
163 }
164
165 /**
166 * Get a MagicWordArray of double-underscore entities
167 *
168 * @return MagicWordArray
169 * @deprecated since 1.32, use MagicWordFactory::getDoubleUnderscoreArray
170 */
171 public static function getDoubleUnderscoreArray() {
172 wfDeprecated( __METHOD__, '1.32' );
173 return MediaWikiServices::getInstance()->getMagicWordFactory()->getDoubleUnderscoreArray();
174 }
175
176 /**
177 * Initialises this object with an ID
178 *
179 * @param string $id
180 * @throws MWException
181 */
182 public function load( $id ) {
183 $this->mId = $id;
184 $this->contLang->getMagic( $this );
185 if ( !$this->mSynonyms ) {
186 $this->mSynonyms = [ 'brionmademeputthishere' ];
187 throw new MWException( "Error: invalid magic word '$id'" );
188 }
189 }
190
191 /**
192 * Preliminary initialisation
193 * @private
194 */
195 public function initRegex() {
196 // Sort the synonyms by length, descending, so that the longest synonym
197 // matches in precedence to the shortest
198 $synonyms = $this->mSynonyms;
199 usort( $synonyms, [ $this, 'compareStringLength' ] );
200
201 $escSyn = [];
202 foreach ( $synonyms as $synonym ) {
203 // In case a magic word contains /, like that's going to happen;)
204 $escSyn[] = preg_quote( $synonym, '/' );
205 }
206 $this->mBaseRegex = implode( '|', $escSyn );
207
208 $case = $this->mCaseSensitive ? '' : 'iu';
209 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
210 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
211 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
212 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
213 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
214 "/^(?:{$this->mBaseRegex})$/{$case}" );
215 }
216
217 /**
218 * A comparison function that returns -1, 0 or 1 depending on whether the
219 * first string is longer, the same length or shorter than the second
220 * string.
221 *
222 * @param string $s1
223 * @param string $s2
224 *
225 * @return int
226 */
227 public function compareStringLength( $s1, $s2 ) {
228 $l1 = strlen( $s1 );
229 $l2 = strlen( $s2 );
230 return $l2 <=> $l1; // descending
231 }
232
233 /**
234 * Gets a regex representing matching the word
235 *
236 * @return string
237 */
238 public function getRegex() {
239 if ( $this->mRegex == '' ) {
240 $this->initRegex();
241 }
242 return $this->mRegex;
243 }
244
245 /**
246 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
247 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
248 * the complete expression
249 *
250 * @return string
251 */
252 public function getRegexCase() {
253 if ( $this->mRegex === '' ) {
254 $this->initRegex();
255 }
256
257 return $this->mCaseSensitive ? '' : 'iu';
258 }
259
260 /**
261 * Gets a regex matching the word, if it is at the string start
262 *
263 * @return string
264 */
265 public function getRegexStart() {
266 if ( $this->mRegex == '' ) {
267 $this->initRegex();
268 }
269 return $this->mRegexStart;
270 }
271
272 /**
273 * Gets a regex matching the word from start to end of a string
274 *
275 * @return string
276 * @since 1.23
277 */
278 public function getRegexStartToEnd() {
279 if ( $this->mRegexStartToEnd == '' ) {
280 $this->initRegex();
281 }
282 return $this->mRegexStartToEnd;
283 }
284
285 /**
286 * regex without the slashes and what not
287 *
288 * @return string
289 */
290 public function getBaseRegex() {
291 if ( $this->mRegex == '' ) {
292 $this->initRegex();
293 }
294 return $this->mBaseRegex;
295 }
296
297 /**
298 * Returns true if the text contains the word
299 *
300 * @param string $text
301 *
302 * @return bool
303 */
304 public function match( $text ) {
305 return (bool)preg_match( $this->getRegex(), $text );
306 }
307
308 /**
309 * Returns true if the text starts with the word
310 *
311 * @param string $text
312 *
313 * @return bool
314 */
315 public function matchStart( $text ) {
316 return (bool)preg_match( $this->getRegexStart(), $text );
317 }
318
319 /**
320 * Returns true if the text matched the word
321 *
322 * @param string $text
323 *
324 * @return bool
325 * @since 1.23
326 */
327 public function matchStartToEnd( $text ) {
328 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
329 }
330
331 /**
332 * Returns NULL if there's no match, the value of $1 otherwise
333 * The return code is the matched string, if there's no variable
334 * part in the regex and the matched variable part ($1) if there
335 * is one.
336 *
337 * @param string $text
338 *
339 * @return string
340 */
341 public function matchVariableStartToEnd( $text ) {
342 $matches = [];
343 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
344 if ( $matchcount == 0 ) {
345 return null;
346 } else {
347 # multiple matched parts (variable match); some will be empty because of
348 # synonyms. The variable will be the second non-empty one so remove any
349 # blank elements and re-sort the indices.
350 # See also T8526
351
352 $matches = array_values( array_filter( $matches ) );
353
354 if ( count( $matches ) == 1 ) {
355 return $matches[0];
356 } else {
357 return $matches[1];
358 }
359 }
360 }
361
362 /**
363 * Returns true if the text matches the word, and alters the
364 * input string, removing all instances of the word
365 *
366 * @param string &$text
367 *
368 * @return bool
369 */
370 public function matchAndRemove( &$text ) {
371 $this->mFound = false;
372 $text = preg_replace_callback(
373 $this->getRegex(),
374 [ $this, 'pregRemoveAndRecord' ],
375 $text
376 );
377
378 return $this->mFound;
379 }
380
381 /**
382 * @param string &$text
383 * @return bool
384 */
385 public function matchStartAndRemove( &$text ) {
386 $this->mFound = false;
387 $text = preg_replace_callback(
388 $this->getRegexStart(),
389 [ $this, 'pregRemoveAndRecord' ],
390 $text
391 );
392
393 return $this->mFound;
394 }
395
396 /**
397 * Used in matchAndRemove()
398 *
399 * @return string
400 */
401 public function pregRemoveAndRecord() {
402 $this->mFound = true;
403 return '';
404 }
405
406 /**
407 * Replaces the word with something else
408 *
409 * @param string $replacement
410 * @param string $subject
411 * @param int $limit
412 *
413 * @return string
414 */
415 public function replace( $replacement, $subject, $limit = -1 ) {
416 $res = preg_replace(
417 $this->getRegex(),
418 StringUtils::escapeRegexReplacement( $replacement ),
419 $subject,
420 $limit
421 );
422 $this->mModified = $res !== $subject;
423 return $res;
424 }
425
426 /**
427 * Variable handling: {{SUBST:xxx}} style words
428 * Calls back a function to determine what to replace xxx with
429 * Input word must contain $1
430 *
431 * @param string $text
432 * @param callable $callback
433 *
434 * @return string
435 */
436 public function substituteCallback( $text, $callback ) {
437 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
438 $this->mModified = $res !== $text;
439 return $res;
440 }
441
442 /**
443 * Matches the word, where $1 is a wildcard
444 *
445 * @return string
446 */
447 public function getVariableRegex() {
448 if ( $this->mVariableRegex == '' ) {
449 $this->initRegex();
450 }
451 return $this->mVariableRegex;
452 }
453
454 /**
455 * Matches the entire string, where $1 is a wildcard
456 *
457 * @return string
458 */
459 public function getVariableStartToEndRegex() {
460 if ( $this->mVariableStartToEndRegex == '' ) {
461 $this->initRegex();
462 }
463 return $this->mVariableStartToEndRegex;
464 }
465
466 /**
467 * Accesses the synonym list directly
468 *
469 * @param int $i
470 *
471 * @return string
472 */
473 public function getSynonym( $i ) {
474 return $this->mSynonyms[$i];
475 }
476
477 /**
478 * @return string[]
479 */
480 public function getSynonyms() {
481 return $this->mSynonyms;
482 }
483
484 /**
485 * Returns true if the last call to replace() or substituteCallback()
486 * returned a modified text, otherwise false.
487 *
488 * @return bool
489 */
490 public function getWasModified() {
491 return $this->mModified;
492 }
493
494 /**
495 * Adds all the synonyms of this MagicWord to an array, to allow quick
496 * lookup in a list of magic words
497 *
498 * @param string[] &$array
499 * @param string $value
500 */
501 public function addToArray( &$array, $value ) {
502 foreach ( $this->mSynonyms as $syn ) {
503 $array[$this->contLang->lc( $syn )] = $value;
504 }
505 }
506
507 /**
508 * @return bool
509 */
510 public function isCaseSensitive() {
511 return $this->mCaseSensitive;
512 }
513
514 /**
515 * @return string
516 */
517 public function getId() {
518 return $this->mId;
519 }
520 }