Merge "parser: Validate $length in padleft/padright parser functions"
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * See docs/magicword.txt.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
26 *
27 * @par Usage:
28 * @code
29 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
30 * // some code
31 * }
32 * @endcode
33 *
34 * Possible future improvements:
35 * * Simultaneous searching for a number of magic words
36 * * MagicWord::$mObjects in shared memory
37 *
38 * Please avoid reading the data out of one of these objects and then writing
39 * special case code. If possible, add another match()-like function here.
40 *
41 * To add magic words in an extension, use $magicWords in a file listed in
42 * $wgExtensionMessagesFiles[].
43 *
44 * @par Example:
45 * @code
46 * $magicWords = [];
47 *
48 * $magicWords['en'] = [
49 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
50 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
51 * ];
52 * @endcode
53 *
54 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
55 * hook. Use string keys.
56 *
57 * @ingroup Parser
58 */
59 class MagicWord {
60 /**#@-*/
61
62 /** @var string */
63 public $mId;
64
65 /** @var string[] */
66 public $mSynonyms;
67
68 /** @var bool */
69 public $mCaseSensitive;
70
71 /** @var string */
72 private $mRegex = '';
73
74 /** @var string */
75 private $mRegexStart = '';
76
77 /** @var string */
78 private $mRegexStartToEnd = '';
79
80 /** @var string */
81 private $mBaseRegex = '';
82
83 /** @var string */
84 private $mVariableRegex = '';
85
86 /** @var string */
87 private $mVariableStartToEndRegex = '';
88
89 /** @var bool */
90 private $mModified = false;
91
92 /** @var bool */
93 private $mFound = false;
94
95 /** @var bool */
96 public static $mVariableIDsInitialised = false;
97
98 /** @var string[] */
99 public static $mVariableIDs = [
100 '!',
101 'currentmonth',
102 'currentmonth1',
103 'currentmonthname',
104 'currentmonthnamegen',
105 'currentmonthabbrev',
106 'currentday',
107 'currentday2',
108 'currentdayname',
109 'currentyear',
110 'currenttime',
111 'currenthour',
112 'localmonth',
113 'localmonth1',
114 'localmonthname',
115 'localmonthnamegen',
116 'localmonthabbrev',
117 'localday',
118 'localday2',
119 'localdayname',
120 'localyear',
121 'localtime',
122 'localhour',
123 'numberofarticles',
124 'numberoffiles',
125 'numberofedits',
126 'articlepath',
127 'pageid',
128 'sitename',
129 'server',
130 'servername',
131 'scriptpath',
132 'stylepath',
133 'pagename',
134 'pagenamee',
135 'fullpagename',
136 'fullpagenamee',
137 'namespace',
138 'namespacee',
139 'namespacenumber',
140 'currentweek',
141 'currentdow',
142 'localweek',
143 'localdow',
144 'revisionid',
145 'revisionday',
146 'revisionday2',
147 'revisionmonth',
148 'revisionmonth1',
149 'revisionyear',
150 'revisiontimestamp',
151 'revisionuser',
152 'revisionsize',
153 'subpagename',
154 'subpagenamee',
155 'talkspace',
156 'talkspacee',
157 'subjectspace',
158 'subjectspacee',
159 'talkpagename',
160 'talkpagenamee',
161 'subjectpagename',
162 'subjectpagenamee',
163 'numberofusers',
164 'numberofactiveusers',
165 'numberofpages',
166 'currentversion',
167 'rootpagename',
168 'rootpagenamee',
169 'basepagename',
170 'basepagenamee',
171 'currenttimestamp',
172 'localtimestamp',
173 'directionmark',
174 'contentlanguage',
175 'pagelanguage',
176 'numberofadmins',
177 'cascadingsources',
178 ];
179
180 /** Array of caching hints for ParserCache
181 * @var array [ string => int ]
182 */
183 public static $mCacheTTLs = [
184 'currentmonth' => 86400,
185 'currentmonth1' => 86400,
186 'currentmonthname' => 86400,
187 'currentmonthnamegen' => 86400,
188 'currentmonthabbrev' => 86400,
189 'currentday' => 3600,
190 'currentday2' => 3600,
191 'currentdayname' => 3600,
192 'currentyear' => 86400,
193 'currenttime' => 3600,
194 'currenthour' => 3600,
195 'localmonth' => 86400,
196 'localmonth1' => 86400,
197 'localmonthname' => 86400,
198 'localmonthnamegen' => 86400,
199 'localmonthabbrev' => 86400,
200 'localday' => 3600,
201 'localday2' => 3600,
202 'localdayname' => 3600,
203 'localyear' => 86400,
204 'localtime' => 3600,
205 'localhour' => 3600,
206 'numberofarticles' => 3600,
207 'numberoffiles' => 3600,
208 'numberofedits' => 3600,
209 'currentweek' => 3600,
210 'currentdow' => 3600,
211 'localweek' => 3600,
212 'localdow' => 3600,
213 'numberofusers' => 3600,
214 'numberofactiveusers' => 3600,
215 'numberofpages' => 3600,
216 'currentversion' => 86400,
217 'currenttimestamp' => 3600,
218 'localtimestamp' => 3600,
219 'pagesinnamespace' => 3600,
220 'numberofadmins' => 3600,
221 'numberingroup' => 3600,
222 ];
223
224 /** @var string[] */
225 public static $mDoubleUnderscoreIDs = [
226 'notoc',
227 'nogallery',
228 'forcetoc',
229 'toc',
230 'noeditsection',
231 'newsectionlink',
232 'nonewsectionlink',
233 'hiddencat',
234 'index',
235 'noindex',
236 'staticredirect',
237 'notitleconvert',
238 'nocontentconvert',
239 ];
240
241 /** @var string[] */
242 public static $mSubstIDs = [
243 'subst',
244 'safesubst',
245 ];
246
247 /** @var array [ string => MagicWord ] */
248 public static $mObjects = [];
249
250 /** @var MagicWordArray */
251 public static $mDoubleUnderscoreArray = null;
252
253 /**#@-*/
254
255 /**
256 * Create a new MagicWord object
257 *
258 * Use factory instead: MagicWord::get
259 *
260 * @param string $id The internal name of the magic word
261 * @param string[]|string $syn synonyms for the magic word
262 * @param bool $cs If magic word is case sensitive
263 */
264 public function __construct( $id = null, $syn = [], $cs = false ) {
265 $this->mId = $id;
266 $this->mSynonyms = (array)$syn;
267 $this->mCaseSensitive = $cs;
268 }
269
270 /**
271 * Factory: creates an object representing an ID
272 *
273 * @param string $id The internal name of the magic word
274 *
275 * @return MagicWord
276 */
277 public static function &get( $id ) {
278 if ( !isset( self::$mObjects[$id] ) ) {
279 $mw = new MagicWord();
280 $mw->load( $id );
281 self::$mObjects[$id] = $mw;
282 }
283 return self::$mObjects[$id];
284 }
285
286 /**
287 * Get an array of parser variable IDs
288 *
289 * @return string[]
290 */
291 public static function getVariableIDs() {
292 if ( !self::$mVariableIDsInitialised ) {
293 # Get variable IDs
294 Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
295 self::$mVariableIDsInitialised = true;
296 }
297 return self::$mVariableIDs;
298 }
299
300 /**
301 * Get an array of parser substitution modifier IDs
302 * @return string[]
303 */
304 public static function getSubstIDs() {
305 return self::$mSubstIDs;
306 }
307
308 /**
309 * Allow external reads of TTL array
310 *
311 * @param string $id
312 * @return int
313 */
314 public static function getCacheTTL( $id ) {
315 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
316 return self::$mCacheTTLs[$id];
317 } else {
318 return -1;
319 }
320 }
321
322 /**
323 * Get a MagicWordArray of double-underscore entities
324 *
325 * @return MagicWordArray
326 */
327 public static function getDoubleUnderscoreArray() {
328 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
329 Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
330 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
331 }
332 return self::$mDoubleUnderscoreArray;
333 }
334
335 /**
336 * Clear the self::$mObjects variable
337 * For use in parser tests
338 */
339 public static function clearCache() {
340 self::$mObjects = [];
341 }
342
343 /**
344 * Initialises this object with an ID
345 *
346 * @param string $id
347 * @throws MWException
348 */
349 public function load( $id ) {
350 global $wgContLang;
351 $this->mId = $id;
352 $wgContLang->getMagic( $this );
353 if ( !$this->mSynonyms ) {
354 $this->mSynonyms = [ 'brionmademeputthishere' ];
355 throw new MWException( "Error: invalid magic word '$id'" );
356 }
357 }
358
359 /**
360 * Preliminary initialisation
361 * @private
362 */
363 public function initRegex() {
364 // Sort the synonyms by length, descending, so that the longest synonym
365 // matches in precedence to the shortest
366 $synonyms = $this->mSynonyms;
367 usort( $synonyms, [ $this, 'compareStringLength' ] );
368
369 $escSyn = [];
370 foreach ( $synonyms as $synonym ) {
371 // In case a magic word contains /, like that's going to happen;)
372 $escSyn[] = preg_quote( $synonym, '/' );
373 }
374 $this->mBaseRegex = implode( '|', $escSyn );
375
376 $case = $this->mCaseSensitive ? '' : 'iu';
377 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
378 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
379 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
380 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
381 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
382 "/^(?:{$this->mBaseRegex})$/{$case}" );
383 }
384
385 /**
386 * A comparison function that returns -1, 0 or 1 depending on whether the
387 * first string is longer, the same length or shorter than the second
388 * string.
389 *
390 * @param string $s1
391 * @param string $s2
392 *
393 * @return int
394 */
395 public function compareStringLength( $s1, $s2 ) {
396 $l1 = strlen( $s1 );
397 $l2 = strlen( $s2 );
398 return $l2 <=> $l1; // descending
399 }
400
401 /**
402 * Gets a regex representing matching the word
403 *
404 * @return string
405 */
406 public function getRegex() {
407 if ( $this->mRegex == '' ) {
408 $this->initRegex();
409 }
410 return $this->mRegex;
411 }
412
413 /**
414 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
415 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
416 * the complete expression
417 *
418 * @return string
419 */
420 public function getRegexCase() {
421 if ( $this->mRegex === '' ) {
422 $this->initRegex();
423 }
424
425 return $this->mCaseSensitive ? '' : 'iu';
426 }
427
428 /**
429 * Gets a regex matching the word, if it is at the string start
430 *
431 * @return string
432 */
433 public function getRegexStart() {
434 if ( $this->mRegex == '' ) {
435 $this->initRegex();
436 }
437 return $this->mRegexStart;
438 }
439
440 /**
441 * Gets a regex matching the word from start to end of a string
442 *
443 * @return string
444 * @since 1.23
445 */
446 public function getRegexStartToEnd() {
447 if ( $this->mRegexStartToEnd == '' ) {
448 $this->initRegex();
449 }
450 return $this->mRegexStartToEnd;
451 }
452
453 /**
454 * regex without the slashes and what not
455 *
456 * @return string
457 */
458 public function getBaseRegex() {
459 if ( $this->mRegex == '' ) {
460 $this->initRegex();
461 }
462 return $this->mBaseRegex;
463 }
464
465 /**
466 * Returns true if the text contains the word
467 *
468 * @param string $text
469 *
470 * @return bool
471 */
472 public function match( $text ) {
473 return (bool)preg_match( $this->getRegex(), $text );
474 }
475
476 /**
477 * Returns true if the text starts with the word
478 *
479 * @param string $text
480 *
481 * @return bool
482 */
483 public function matchStart( $text ) {
484 return (bool)preg_match( $this->getRegexStart(), $text );
485 }
486
487 /**
488 * Returns true if the text matched the word
489 *
490 * @param string $text
491 *
492 * @return bool
493 * @since 1.23
494 */
495 public function matchStartToEnd( $text ) {
496 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
497 }
498
499 /**
500 * Returns NULL if there's no match, the value of $1 otherwise
501 * The return code is the matched string, if there's no variable
502 * part in the regex and the matched variable part ($1) if there
503 * is one.
504 *
505 * @param string $text
506 *
507 * @return string
508 */
509 public function matchVariableStartToEnd( $text ) {
510 $matches = [];
511 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
512 if ( $matchcount == 0 ) {
513 return null;
514 } else {
515 # multiple matched parts (variable match); some will be empty because of
516 # synonyms. The variable will be the second non-empty one so remove any
517 # blank elements and re-sort the indices.
518 # See also T8526
519
520 $matches = array_values( array_filter( $matches ) );
521
522 if ( count( $matches ) == 1 ) {
523 return $matches[0];
524 } else {
525 return $matches[1];
526 }
527 }
528 }
529
530 /**
531 * Returns true if the text matches the word, and alters the
532 * input string, removing all instances of the word
533 *
534 * @param string &$text
535 *
536 * @return bool
537 */
538 public function matchAndRemove( &$text ) {
539 $this->mFound = false;
540 $text = preg_replace_callback(
541 $this->getRegex(),
542 [ $this, 'pregRemoveAndRecord' ],
543 $text
544 );
545
546 return $this->mFound;
547 }
548
549 /**
550 * @param string &$text
551 * @return bool
552 */
553 public function matchStartAndRemove( &$text ) {
554 $this->mFound = false;
555 $text = preg_replace_callback(
556 $this->getRegexStart(),
557 [ $this, 'pregRemoveAndRecord' ],
558 $text
559 );
560
561 return $this->mFound;
562 }
563
564 /**
565 * Used in matchAndRemove()
566 *
567 * @return string
568 */
569 public function pregRemoveAndRecord() {
570 $this->mFound = true;
571 return '';
572 }
573
574 /**
575 * Replaces the word with something else
576 *
577 * @param string $replacement
578 * @param string $subject
579 * @param int $limit
580 *
581 * @return string
582 */
583 public function replace( $replacement, $subject, $limit = -1 ) {
584 $res = preg_replace(
585 $this->getRegex(),
586 StringUtils::escapeRegexReplacement( $replacement ),
587 $subject,
588 $limit
589 );
590 $this->mModified = $res !== $subject;
591 return $res;
592 }
593
594 /**
595 * Variable handling: {{SUBST:xxx}} style words
596 * Calls back a function to determine what to replace xxx with
597 * Input word must contain $1
598 *
599 * @param string $text
600 * @param callable $callback
601 *
602 * @return string
603 */
604 public function substituteCallback( $text, $callback ) {
605 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
606 $this->mModified = $res !== $text;
607 return $res;
608 }
609
610 /**
611 * Matches the word, where $1 is a wildcard
612 *
613 * @return string
614 */
615 public function getVariableRegex() {
616 if ( $this->mVariableRegex == '' ) {
617 $this->initRegex();
618 }
619 return $this->mVariableRegex;
620 }
621
622 /**
623 * Matches the entire string, where $1 is a wildcard
624 *
625 * @return string
626 */
627 public function getVariableStartToEndRegex() {
628 if ( $this->mVariableStartToEndRegex == '' ) {
629 $this->initRegex();
630 }
631 return $this->mVariableStartToEndRegex;
632 }
633
634 /**
635 * Accesses the synonym list directly
636 *
637 * @param int $i
638 *
639 * @return string
640 */
641 public function getSynonym( $i ) {
642 return $this->mSynonyms[$i];
643 }
644
645 /**
646 * @return string[]
647 */
648 public function getSynonyms() {
649 return $this->mSynonyms;
650 }
651
652 /**
653 * Returns true if the last call to replace() or substituteCallback()
654 * returned a modified text, otherwise false.
655 *
656 * @return bool
657 */
658 public function getWasModified() {
659 return $this->mModified;
660 }
661
662 /**
663 * Adds all the synonyms of this MagicWord to an array, to allow quick
664 * lookup in a list of magic words
665 *
666 * @param string[] &$array
667 * @param string $value
668 */
669 public function addToArray( &$array, $value ) {
670 global $wgContLang;
671 foreach ( $this->mSynonyms as $syn ) {
672 $array[$wgContLang->lc( $syn )] = $value;
673 }
674 }
675
676 /**
677 * @return bool
678 */
679 public function isCaseSensitive() {
680 return $this->mCaseSensitive;
681 }
682
683 /**
684 * @return string
685 */
686 public function getId() {
687 return $this->mId;
688 }
689 }