Update ImportableUploadRevisionImporter for interwiki usernames
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * See docs/magicword.txt.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
26 *
27 * @par Usage:
28 * @code
29 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
30 * // some code
31 * }
32 * @endcode
33 *
34 * Possible future improvements:
35 * * Simultaneous searching for a number of magic words
36 * * MagicWord::$mObjects in shared memory
37 *
38 * Please avoid reading the data out of one of these objects and then writing
39 * special case code. If possible, add another match()-like function here.
40 *
41 * To add magic words in an extension, use $magicWords in a file listed in
42 * $wgExtensionMessagesFiles[].
43 *
44 * @par Example:
45 * @code
46 * $magicWords = [];
47 *
48 * $magicWords['en'] = [
49 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
50 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
51 * ];
52 * @endcode
53 *
54 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
55 * hook. Use string keys.
56 *
57 * @ingroup Parser
58 */
59 class MagicWord {
60 /**#@-*/
61
62 /** @var string */
63 public $mId;
64
65 /** @var string[] */
66 public $mSynonyms;
67
68 /** @var bool */
69 public $mCaseSensitive;
70
71 /** @var string */
72 private $mRegex = '';
73
74 /** @var string */
75 private $mRegexStart = '';
76
77 /** @var string */
78 private $mRegexStartToEnd = '';
79
80 /** @var string */
81 private $mBaseRegex = '';
82
83 /** @var string */
84 private $mVariableRegex = '';
85
86 /** @var string */
87 private $mVariableStartToEndRegex = '';
88
89 /** @var bool */
90 private $mModified = false;
91
92 /** @var bool */
93 private $mFound = false;
94
95 /** @var bool */
96 public static $mVariableIDsInitialised = false;
97
98 /** @var string[] */
99 public static $mVariableIDs = [
100 '!',
101 'currentmonth',
102 'currentmonth1',
103 'currentmonthname',
104 'currentmonthnamegen',
105 'currentmonthabbrev',
106 'currentday',
107 'currentday2',
108 'currentdayname',
109 'currentyear',
110 'currenttime',
111 'currenthour',
112 'localmonth',
113 'localmonth1',
114 'localmonthname',
115 'localmonthnamegen',
116 'localmonthabbrev',
117 'localday',
118 'localday2',
119 'localdayname',
120 'localyear',
121 'localtime',
122 'localhour',
123 'numberofarticles',
124 'numberoffiles',
125 'numberofedits',
126 'articlepath',
127 'pageid',
128 'sitename',
129 'server',
130 'servername',
131 'scriptpath',
132 'stylepath',
133 'pagename',
134 'pagenamee',
135 'fullpagename',
136 'fullpagenamee',
137 'namespace',
138 'namespacee',
139 'namespacenumber',
140 'currentweek',
141 'currentdow',
142 'localweek',
143 'localdow',
144 'revisionid',
145 'revisionday',
146 'revisionday2',
147 'revisionmonth',
148 'revisionmonth1',
149 'revisionyear',
150 'revisiontimestamp',
151 'revisionuser',
152 'revisionsize',
153 'subpagename',
154 'subpagenamee',
155 'talkspace',
156 'talkspacee',
157 'subjectspace',
158 'subjectspacee',
159 'talkpagename',
160 'talkpagenamee',
161 'subjectpagename',
162 'subjectpagenamee',
163 'numberofusers',
164 'numberofactiveusers',
165 'numberofpages',
166 'currentversion',
167 'rootpagename',
168 'rootpagenamee',
169 'basepagename',
170 'basepagenamee',
171 'currenttimestamp',
172 'localtimestamp',
173 'directionmark',
174 'contentlanguage',
175 'pagelanguage',
176 'numberofadmins',
177 'cascadingsources',
178 ];
179
180 /** Array of caching hints for ParserCache
181 * @var array [ string => int ]
182 */
183 public static $mCacheTTLs = [
184 'currentmonth' => 86400,
185 'currentmonth1' => 86400,
186 'currentmonthname' => 86400,
187 'currentmonthnamegen' => 86400,
188 'currentmonthabbrev' => 86400,
189 'currentday' => 3600,
190 'currentday2' => 3600,
191 'currentdayname' => 3600,
192 'currentyear' => 86400,
193 'currenttime' => 3600,
194 'currenthour' => 3600,
195 'localmonth' => 86400,
196 'localmonth1' => 86400,
197 'localmonthname' => 86400,
198 'localmonthnamegen' => 86400,
199 'localmonthabbrev' => 86400,
200 'localday' => 3600,
201 'localday2' => 3600,
202 'localdayname' => 3600,
203 'localyear' => 86400,
204 'localtime' => 3600,
205 'localhour' => 3600,
206 'numberofarticles' => 3600,
207 'numberoffiles' => 3600,
208 'numberofedits' => 3600,
209 'currentweek' => 3600,
210 'currentdow' => 3600,
211 'localweek' => 3600,
212 'localdow' => 3600,
213 'numberofusers' => 3600,
214 'numberofactiveusers' => 3600,
215 'numberofpages' => 3600,
216 'currentversion' => 86400,
217 'currenttimestamp' => 3600,
218 'localtimestamp' => 3600,
219 'pagesinnamespace' => 3600,
220 'numberofadmins' => 3600,
221 'numberingroup' => 3600,
222 ];
223
224 /** @var string[] */
225 public static $mDoubleUnderscoreIDs = [
226 'notoc',
227 'nogallery',
228 'forcetoc',
229 'toc',
230 'noeditsection',
231 'newsectionlink',
232 'nonewsectionlink',
233 'hiddencat',
234 'index',
235 'noindex',
236 'staticredirect',
237 'notitleconvert',
238 'nocontentconvert',
239 ];
240
241 /** @var string[] */
242 public static $mSubstIDs = [
243 'subst',
244 'safesubst',
245 ];
246
247 /** @var array [ string => MagicWord ] */
248 public static $mObjects = [];
249
250 /** @var MagicWordArray */
251 public static $mDoubleUnderscoreArray = null;
252
253 /**#@-*/
254
255 /**
256 * Create a new MagicWord object
257 *
258 * Use factory instead: MagicWord::get
259 *
260 * @param string $id The internal name of the magic word
261 * @param string[]|string $syn synonyms for the magic word
262 * @param bool $cs If magic word is case sensitive
263 */
264 public function __construct( $id = null, $syn = [], $cs = false ) {
265 $this->mId = $id;
266 $this->mSynonyms = (array)$syn;
267 $this->mCaseSensitive = $cs;
268 }
269
270 /**
271 * Factory: creates an object representing an ID
272 *
273 * @param string $id The internal name of the magic word
274 *
275 * @return MagicWord
276 */
277 public static function &get( $id ) {
278 if ( !isset( self::$mObjects[$id] ) ) {
279 $mw = new MagicWord();
280 $mw->load( $id );
281 self::$mObjects[$id] = $mw;
282 }
283 return self::$mObjects[$id];
284 }
285
286 /**
287 * Get an array of parser variable IDs
288 *
289 * @return string[]
290 */
291 public static function getVariableIDs() {
292 if ( !self::$mVariableIDsInitialised ) {
293 # Get variable IDs
294 Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
295 self::$mVariableIDsInitialised = true;
296 }
297 return self::$mVariableIDs;
298 }
299
300 /**
301 * Get an array of parser substitution modifier IDs
302 * @return string[]
303 */
304 public static function getSubstIDs() {
305 return self::$mSubstIDs;
306 }
307
308 /**
309 * Allow external reads of TTL array
310 *
311 * @param string $id
312 * @return int
313 */
314 public static function getCacheTTL( $id ) {
315 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
316 return self::$mCacheTTLs[$id];
317 } else {
318 return -1;
319 }
320 }
321
322 /**
323 * Get a MagicWordArray of double-underscore entities
324 *
325 * @return MagicWordArray
326 */
327 public static function getDoubleUnderscoreArray() {
328 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
329 Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
330 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
331 }
332 return self::$mDoubleUnderscoreArray;
333 }
334
335 /**
336 * Clear the self::$mObjects variable
337 * For use in parser tests
338 */
339 public static function clearCache() {
340 self::$mObjects = [];
341 }
342
343 /**
344 * Initialises this object with an ID
345 *
346 * @param string $id
347 * @throws MWException
348 */
349 public function load( $id ) {
350 global $wgContLang;
351 $this->mId = $id;
352 $wgContLang->getMagic( $this );
353 if ( !$this->mSynonyms ) {
354 $this->mSynonyms = [ 'brionmademeputthishere' ];
355 throw new MWException( "Error: invalid magic word '$id'" );
356 }
357 }
358
359 /**
360 * Preliminary initialisation
361 * @private
362 */
363 public function initRegex() {
364 // Sort the synonyms by length, descending, so that the longest synonym
365 // matches in precedence to the shortest
366 $synonyms = $this->mSynonyms;
367 usort( $synonyms, [ $this, 'compareStringLength' ] );
368
369 $escSyn = [];
370 foreach ( $synonyms as $synonym ) {
371 // In case a magic word contains /, like that's going to happen;)
372 $escSyn[] = preg_quote( $synonym, '/' );
373 }
374 $this->mBaseRegex = implode( '|', $escSyn );
375
376 $case = $this->mCaseSensitive ? '' : 'iu';
377 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
378 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
379 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
380 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
381 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
382 "/^(?:{$this->mBaseRegex})$/{$case}" );
383 }
384
385 /**
386 * A comparison function that returns -1, 0 or 1 depending on whether the
387 * first string is longer, the same length or shorter than the second
388 * string.
389 *
390 * @param string $s1
391 * @param string $s2
392 *
393 * @return int
394 */
395 public function compareStringLength( $s1, $s2 ) {
396 $l1 = strlen( $s1 );
397 $l2 = strlen( $s2 );
398 if ( $l1 < $l2 ) {
399 return 1;
400 } elseif ( $l1 > $l2 ) {
401 return -1;
402 } else {
403 return 0;
404 }
405 }
406
407 /**
408 * Gets a regex representing matching the word
409 *
410 * @return string
411 */
412 public function getRegex() {
413 if ( $this->mRegex == '' ) {
414 $this->initRegex();
415 }
416 return $this->mRegex;
417 }
418
419 /**
420 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
421 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
422 * the complete expression
423 *
424 * @return string
425 */
426 public function getRegexCase() {
427 if ( $this->mRegex === '' ) {
428 $this->initRegex();
429 }
430
431 return $this->mCaseSensitive ? '' : 'iu';
432 }
433
434 /**
435 * Gets a regex matching the word, if it is at the string start
436 *
437 * @return string
438 */
439 public function getRegexStart() {
440 if ( $this->mRegex == '' ) {
441 $this->initRegex();
442 }
443 return $this->mRegexStart;
444 }
445
446 /**
447 * Gets a regex matching the word from start to end of a string
448 *
449 * @return string
450 * @since 1.23
451 */
452 public function getRegexStartToEnd() {
453 if ( $this->mRegexStartToEnd == '' ) {
454 $this->initRegex();
455 }
456 return $this->mRegexStartToEnd;
457 }
458
459 /**
460 * regex without the slashes and what not
461 *
462 * @return string
463 */
464 public function getBaseRegex() {
465 if ( $this->mRegex == '' ) {
466 $this->initRegex();
467 }
468 return $this->mBaseRegex;
469 }
470
471 /**
472 * Returns true if the text contains the word
473 *
474 * @param string $text
475 *
476 * @return bool
477 */
478 public function match( $text ) {
479 return (bool)preg_match( $this->getRegex(), $text );
480 }
481
482 /**
483 * Returns true if the text starts with the word
484 *
485 * @param string $text
486 *
487 * @return bool
488 */
489 public function matchStart( $text ) {
490 return (bool)preg_match( $this->getRegexStart(), $text );
491 }
492
493 /**
494 * Returns true if the text matched the word
495 *
496 * @param string $text
497 *
498 * @return bool
499 * @since 1.23
500 */
501 public function matchStartToEnd( $text ) {
502 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
503 }
504
505 /**
506 * Returns NULL if there's no match, the value of $1 otherwise
507 * The return code is the matched string, if there's no variable
508 * part in the regex and the matched variable part ($1) if there
509 * is one.
510 *
511 * @param string $text
512 *
513 * @return string
514 */
515 public function matchVariableStartToEnd( $text ) {
516 $matches = [];
517 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
518 if ( $matchcount == 0 ) {
519 return null;
520 } else {
521 # multiple matched parts (variable match); some will be empty because of
522 # synonyms. The variable will be the second non-empty one so remove any
523 # blank elements and re-sort the indices.
524 # See also T8526
525
526 $matches = array_values( array_filter( $matches ) );
527
528 if ( count( $matches ) == 1 ) {
529 return $matches[0];
530 } else {
531 return $matches[1];
532 }
533 }
534 }
535
536 /**
537 * Returns true if the text matches the word, and alters the
538 * input string, removing all instances of the word
539 *
540 * @param string &$text
541 *
542 * @return bool
543 */
544 public function matchAndRemove( &$text ) {
545 $this->mFound = false;
546 $text = preg_replace_callback(
547 $this->getRegex(),
548 [ $this, 'pregRemoveAndRecord' ],
549 $text
550 );
551
552 return $this->mFound;
553 }
554
555 /**
556 * @param string &$text
557 * @return bool
558 */
559 public function matchStartAndRemove( &$text ) {
560 $this->mFound = false;
561 $text = preg_replace_callback(
562 $this->getRegexStart(),
563 [ $this, 'pregRemoveAndRecord' ],
564 $text
565 );
566
567 return $this->mFound;
568 }
569
570 /**
571 * Used in matchAndRemove()
572 *
573 * @return string
574 */
575 public function pregRemoveAndRecord() {
576 $this->mFound = true;
577 return '';
578 }
579
580 /**
581 * Replaces the word with something else
582 *
583 * @param string $replacement
584 * @param string $subject
585 * @param int $limit
586 *
587 * @return string
588 */
589 public function replace( $replacement, $subject, $limit = -1 ) {
590 $res = preg_replace(
591 $this->getRegex(),
592 StringUtils::escapeRegexReplacement( $replacement ),
593 $subject,
594 $limit
595 );
596 $this->mModified = $res !== $subject;
597 return $res;
598 }
599
600 /**
601 * Variable handling: {{SUBST:xxx}} style words
602 * Calls back a function to determine what to replace xxx with
603 * Input word must contain $1
604 *
605 * @param string $text
606 * @param callable $callback
607 *
608 * @return string
609 */
610 public function substituteCallback( $text, $callback ) {
611 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
612 $this->mModified = $res !== $text;
613 return $res;
614 }
615
616 /**
617 * Matches the word, where $1 is a wildcard
618 *
619 * @return string
620 */
621 public function getVariableRegex() {
622 if ( $this->mVariableRegex == '' ) {
623 $this->initRegex();
624 }
625 return $this->mVariableRegex;
626 }
627
628 /**
629 * Matches the entire string, where $1 is a wildcard
630 *
631 * @return string
632 */
633 public function getVariableStartToEndRegex() {
634 if ( $this->mVariableStartToEndRegex == '' ) {
635 $this->initRegex();
636 }
637 return $this->mVariableStartToEndRegex;
638 }
639
640 /**
641 * Accesses the synonym list directly
642 *
643 * @param int $i
644 *
645 * @return string
646 */
647 public function getSynonym( $i ) {
648 return $this->mSynonyms[$i];
649 }
650
651 /**
652 * @return string[]
653 */
654 public function getSynonyms() {
655 return $this->mSynonyms;
656 }
657
658 /**
659 * Returns true if the last call to replace() or substituteCallback()
660 * returned a modified text, otherwise false.
661 *
662 * @return bool
663 */
664 public function getWasModified() {
665 return $this->mModified;
666 }
667
668 /**
669 * Adds all the synonyms of this MagicWord to an array, to allow quick
670 * lookup in a list of magic words
671 *
672 * @param string[] &$array
673 * @param string $value
674 */
675 public function addToArray( &$array, $value ) {
676 global $wgContLang;
677 foreach ( $this->mSynonyms as $syn ) {
678 $array[$wgContLang->lc( $syn )] = $value;
679 }
680 }
681
682 /**
683 * @return bool
684 */
685 public function isCaseSensitive() {
686 return $this->mCaseSensitive;
687 }
688
689 /**
690 * @return string
691 */
692 public function getId() {
693 return $this->mId;
694 }
695 }