Fix fixme on r107328, attempting to use $this in a static method
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
15 *
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
19 *
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
22 *
23 * To add magic words in an extension, use the LanguageGetMagic hook. For
24 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
25 * hook. Use string keys.
26 *
27 * @ingroup Parser
28 */
29 class MagicWord {
30 /**#@+
31 * @private
32 */
33 var $mId, $mSynonyms, $mCaseSensitive;
34 var $mRegex = '';
35 var $mRegexStart = '';
36 var $mBaseRegex = '';
37 var $mVariableRegex = '';
38 var $mVariableStartToEndRegex = '';
39 var $mModified = false;
40 var $mFound = false;
41
42 static public $mVariableIDsInitialised = false;
43 static public $mVariableIDs = array(
44 'currentmonth',
45 'currentmonth1',
46 'currentmonthname',
47 'currentmonthnamegen',
48 'currentmonthabbrev',
49 'currentday',
50 'currentday2',
51 'currentdayname',
52 'currentyear',
53 'currenttime',
54 'currenthour',
55 'localmonth',
56 'localmonth1',
57 'localmonthname',
58 'localmonthnamegen',
59 'localmonthabbrev',
60 'localday',
61 'localday2',
62 'localdayname',
63 'localyear',
64 'localtime',
65 'localhour',
66 'numberofarticles',
67 'numberoffiles',
68 'numberofedits',
69 'articlepath',
70 'pageid',
71 'sitename',
72 'server',
73 'servername',
74 'scriptpath',
75 'stylepath',
76 'pagename',
77 'pagenamee',
78 'fullpagename',
79 'fullpagenamee',
80 'namespace',
81 'namespacee',
82 'currentweek',
83 'currentdow',
84 'localweek',
85 'localdow',
86 'revisionid',
87 'revisionday',
88 'revisionday2',
89 'revisionmonth',
90 'revisionmonth1',
91 'revisionyear',
92 'revisiontimestamp',
93 'revisionuser',
94 'subpagename',
95 'subpagenamee',
96 'talkspace',
97 'talkspacee',
98 'subjectspace',
99 'subjectspacee',
100 'talkpagename',
101 'talkpagenamee',
102 'subjectpagename',
103 'subjectpagenamee',
104 'numberofusers',
105 'numberofactiveusers',
106 'numberofpages',
107 'currentversion',
108 'basepagename',
109 'basepagenamee',
110 'currenttimestamp',
111 'localtimestamp',
112 'directionmark',
113 'contentlanguage',
114 'numberofadmins',
115 'numberofviews',
116 );
117
118 /* Array of caching hints for ParserCache */
119 static public $mCacheTTLs = array (
120 'currentmonth' => 86400,
121 'currentmonth1' => 86400,
122 'currentmonthname' => 86400,
123 'currentmonthnamegen' => 86400,
124 'currentmonthabbrev' => 86400,
125 'currentday' => 3600,
126 'currentday2' => 3600,
127 'currentdayname' => 3600,
128 'currentyear' => 86400,
129 'currenttime' => 3600,
130 'currenthour' => 3600,
131 'localmonth' => 86400,
132 'localmonth1' => 86400,
133 'localmonthname' => 86400,
134 'localmonthnamegen' => 86400,
135 'localmonthabbrev' => 86400,
136 'localday' => 3600,
137 'localday2' => 3600,
138 'localdayname' => 3600,
139 'localyear' => 86400,
140 'localtime' => 3600,
141 'localhour' => 3600,
142 'numberofarticles' => 3600,
143 'numberoffiles' => 3600,
144 'numberofedits' => 3600,
145 'currentweek' => 3600,
146 'currentdow' => 3600,
147 'localweek' => 3600,
148 'localdow' => 3600,
149 'numberofusers' => 3600,
150 'numberofactiveusers' => 3600,
151 'numberofpages' => 3600,
152 'currentversion' => 86400,
153 'currenttimestamp' => 3600,
154 'localtimestamp' => 3600,
155 'pagesinnamespace' => 3600,
156 'numberofadmins' => 3600,
157 'numberofviews' => 3600,
158 'numberingroup' => 3600,
159 );
160
161 static public $mDoubleUnderscoreIDs = array(
162 'notoc',
163 'nogallery',
164 'forcetoc',
165 'toc',
166 'noeditsection',
167 'newsectionlink',
168 'nonewsectionlink',
169 'hiddencat',
170 'index',
171 'noindex',
172 'staticredirect',
173 'notitleconvert',
174 'nocontentconvert',
175 );
176
177 static public $mSubstIDs = array(
178 'subst',
179 'safesubst',
180 );
181
182 static public $mObjects = array();
183 static public $mDoubleUnderscoreArray = null;
184
185 /**#@-*/
186
187 function __construct($id = 0, $syn = array(), $cs = false) {
188 $this->mId = $id;
189 $this->mSynonyms = (array)$syn;
190 $this->mCaseSensitive = $cs;
191 }
192
193 /**
194 * Factory: creates an object representing an ID
195 *
196 * @param $id
197 *
198 * @return MagicWord
199 */
200 static function &get( $id ) {
201 if ( !isset( self::$mObjects[$id] ) ) {
202 $mw = new MagicWord();
203 $mw->load( $id );
204 self::$mObjects[$id] = $mw;
205 }
206 return self::$mObjects[$id];
207 }
208
209 /**
210 * Get an array of parser variable IDs
211 *
212 * @return array
213 */
214 static function getVariableIDs() {
215 if ( !self::$mVariableIDsInitialised ) {
216 # Deprecated constant definition hook, available for extensions that need it
217 $magicWords = array();
218 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
219 foreach ( $magicWords as $word ) {
220 define( $word, $word );
221 }
222
223 # Get variable IDs
224 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
225 self::$mVariableIDsInitialised = true;
226 }
227 return self::$mVariableIDs;
228 }
229
230 /**
231 * Get an array of parser substitution modifier IDs
232 * @return array
233 */
234 static function getSubstIDs() {
235 return self::$mSubstIDs;
236 }
237
238 /**
239 * Allow external reads of TTL array
240 *
241 * @param $id int
242 * @return array
243 */
244 static function getCacheTTL( $id ) {
245 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
246 return self::$mCacheTTLs[$id];
247 } else {
248 return -1;
249 }
250 }
251
252 /**
253 * Get a MagicWordArray of double-underscore entities
254 *
255 * @return MagicWordArray
256 */
257 static function getDoubleUnderscoreArray() {
258 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
259 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
260 }
261 return self::$mDoubleUnderscoreArray;
262 }
263
264 /**
265 * Clear the self::$mObjects variable
266 * For use in parser tests
267 */
268 public static function clearCache() {
269 self::$mObjects = array();
270 }
271
272 /**
273 * Initialises this object with an ID
274 *
275 * @param $id
276 */
277 function load( $id ) {
278 global $wgContLang;
279 wfProfileIn( __METHOD__ );
280 $this->mId = $id;
281 $wgContLang->getMagic( $this );
282 if ( !$this->mSynonyms ) {
283 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
284 #throw new MWException( "Error: invalid magic word '$id'" );
285 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
286 }
287 wfProfileOut( __METHOD__ );
288 }
289
290 /**
291 * Preliminary initialisation
292 * @private
293 */
294 function initRegex() {
295 // Sort the synonyms by length, descending, so that the longest synonym
296 // matches in precedence to the shortest
297 $synonyms = $this->mSynonyms;
298 usort( $synonyms, array( $this, 'compareStringLength' ) );
299
300 $escSyn = array();
301 foreach ( $synonyms as $synonym )
302 // In case a magic word contains /, like that's going to happen;)
303 $escSyn[] = preg_quote( $synonym, '/' );
304 $this->mBaseRegex = implode( '|', $escSyn );
305
306 $case = $this->mCaseSensitive ? '' : 'iu';
307 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
308 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
309 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
310 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
311 "/^(?:{$this->mBaseRegex})$/{$case}" );
312 }
313
314 /**
315 * A comparison function that returns -1, 0 or 1 depending on whether the
316 * first string is longer, the same length or shorter than the second
317 * string.
318 *
319 * @param $s1 string
320 * @param $s2 string
321 *
322 * @return int
323 */
324 function compareStringLength( $s1, $s2 ) {
325 $l1 = strlen( $s1 );
326 $l2 = strlen( $s2 );
327 if ( $l1 < $l2 ) {
328 return 1;
329 } elseif ( $l1 > $l2 ) {
330 return -1;
331 } else {
332 return 0;
333 }
334 }
335
336 /**
337 * Gets a regex representing matching the word
338 *
339 * @return string
340 */
341 function getRegex() {
342 if ($this->mRegex == '' ) {
343 $this->initRegex();
344 }
345 return $this->mRegex;
346 }
347
348 /**
349 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
350 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
351 * the complete expression
352 *
353 * @return string
354 */
355 function getRegexCase() {
356 if ( $this->mRegex === '' )
357 $this->initRegex();
358
359 return $this->mCaseSensitive ? '' : 'iu';
360 }
361
362 /**
363 * Gets a regex matching the word, if it is at the string start
364 *
365 * @return string
366 */
367 function getRegexStart() {
368 if ($this->mRegex == '' ) {
369 $this->initRegex();
370 }
371 return $this->mRegexStart;
372 }
373
374 /**
375 * regex without the slashes and what not
376 *
377 * @return string
378 */
379 function getBaseRegex() {
380 if ($this->mRegex == '') {
381 $this->initRegex();
382 }
383 return $this->mBaseRegex;
384 }
385
386 /**
387 * Returns true if the text contains the word
388 *
389 * @param $text string
390 *
391 * @return bool
392 */
393 function match( $text ) {
394 return (bool)preg_match( $this->getRegex(), $text );
395 }
396
397 /**
398 * Returns true if the text starts with the word
399 *
400 * @param $text string
401 *
402 * @return bool
403 */
404 function matchStart( $text ) {
405 return (bool)preg_match( $this->getRegexStart(), $text );
406 }
407
408 /**
409 * Returns NULL if there's no match, the value of $1 otherwise
410 * The return code is the matched string, if there's no variable
411 * part in the regex and the matched variable part ($1) if there
412 * is one.
413 *
414 * @param $text string
415 *
416 * @return string
417 */
418 function matchVariableStartToEnd( $text ) {
419 $matches = array();
420 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
421 if ( $matchcount == 0 ) {
422 return null;
423 } else {
424 # multiple matched parts (variable match); some will be empty because of
425 # synonyms. The variable will be the second non-empty one so remove any
426 # blank elements and re-sort the indices.
427 # See also bug 6526
428
429 $matches = array_values(array_filter($matches));
430
431 if ( count($matches) == 1 ) {
432 return $matches[0];
433 } else {
434 return $matches[1];
435 }
436 }
437 }
438
439
440 /**
441 * Returns true if the text matches the word, and alters the
442 * input string, removing all instances of the word
443 *
444 * @param $text string
445 *
446 * @return bool
447 */
448 function matchAndRemove( &$text ) {
449 $this->mFound = false;
450 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
451 return $this->mFound;
452 }
453
454 /**
455 * @param $text
456 * @return bool
457 */
458 function matchStartAndRemove( &$text ) {
459 $this->mFound = false;
460 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
461 return $this->mFound;
462 }
463
464 /**
465 * Used in matchAndRemove()
466 *
467 * @return string
468 */
469 function pregRemoveAndRecord() {
470 $this->mFound = true;
471 return '';
472 }
473
474 /**
475 * Replaces the word with something else
476 *
477 * @param $replacement
478 * @param $subject
479 * @param $limit int
480 *
481 * @return string
482 */
483 function replace( $replacement, $subject, $limit = -1 ) {
484 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
485 $this->mModified = !($res === $subject);
486 return $res;
487 }
488
489 /**
490 * Variable handling: {{SUBST:xxx}} style words
491 * Calls back a function to determine what to replace xxx with
492 * Input word must contain $1
493 *
494 * @param $text string
495 * @param $callback
496 *
497 * @return string
498 */
499 function substituteCallback( $text, $callback ) {
500 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
501 $this->mModified = !($res === $text);
502 return $res;
503 }
504
505 /**
506 * Matches the word, where $1 is a wildcard
507 *
508 * @return string
509 */
510 function getVariableRegex() {
511 if ( $this->mVariableRegex == '' ) {
512 $this->initRegex();
513 }
514 return $this->mVariableRegex;
515 }
516
517 /**
518 * Matches the entire string, where $1 is a wildcard
519 *
520 * @return string
521 */
522 function getVariableStartToEndRegex() {
523 if ( $this->mVariableStartToEndRegex == '' ) {
524 $this->initRegex();
525 }
526 return $this->mVariableStartToEndRegex;
527 }
528
529 /**
530 * Accesses the synonym list directly
531 *
532 * @param $i int
533 *
534 * @return string
535 */
536 function getSynonym( $i ) {
537 return $this->mSynonyms[$i];
538 }
539
540 /**
541 * @return array
542 */
543 function getSynonyms() {
544 return $this->mSynonyms;
545 }
546
547 /**
548 * Returns true if the last call to replace() or substituteCallback()
549 * returned a modified text, otherwise false.
550 *
551 * @return bool
552 */
553 function getWasModified(){
554 return $this->mModified;
555 }
556
557 /**
558 * $magicarr is an associative array of (magic word ID => replacement)
559 * This method uses the php feature to do several replacements at the same time,
560 * thereby gaining some efficiency. The result is placed in the out variable
561 * $result. The return value is true if something was replaced.
562 * @todo Should this be static? It doesn't seem to be used at all
563 *
564 * @param $magicarr
565 * @param $subject
566 * @param $result
567 *
568 * @return bool
569 */
570 function replaceMultiple( $magicarr, $subject, &$result ){
571 $search = array();
572 $replace = array();
573 foreach( $magicarr as $id => $replacement ){
574 $mw = MagicWord::get( $id );
575 $search[] = $mw->getRegex();
576 $replace[] = $replacement;
577 }
578
579 $result = preg_replace( $search, $replace, $subject );
580 return !($result === $subject);
581 }
582
583 /**
584 * Adds all the synonyms of this MagicWord to an array, to allow quick
585 * lookup in a list of magic words
586 *
587 * @param $array
588 * @param $value
589 */
590 function addToArray( &$array, $value ) {
591 global $wgContLang;
592 foreach ( $this->mSynonyms as $syn ) {
593 $array[$wgContLang->lc($syn)] = $value;
594 }
595 }
596
597 /**
598 * @return bool
599 */
600 function isCaseSensitive() {
601 return $this->mCaseSensitive;
602 }
603
604 /**
605 * @return int
606 */
607 function getId() {
608 return $this->mId;
609 }
610 }
611
612 /**
613 * Class for handling an array of magic words
614 * @ingroup Parser
615 */
616 class MagicWordArray {
617 var $names = array();
618 var $hash;
619 var $baseRegex, $regex;
620 var $matches;
621
622 function __construct( $names = array() ) {
623 $this->names = $names;
624 }
625
626 /**
627 * Add a magic word by name
628 *
629 * @param $name string
630 */
631 public function add( $name ) {
632 $this->names[] = $name;
633 $this->hash = $this->baseRegex = $this->regex = null;
634 }
635
636 /**
637 * Add a number of magic words by name
638 *
639 * @param $names array
640 */
641 public function addArray( $names ) {
642 $this->names = array_merge( $this->names, array_values( $names ) );
643 $this->hash = $this->baseRegex = $this->regex = null;
644 }
645
646 /**
647 * Get a 2-d hashtable for this array
648 */
649 function getHash() {
650 if ( is_null( $this->hash ) ) {
651 global $wgContLang;
652 $this->hash = array( 0 => array(), 1 => array() );
653 foreach ( $this->names as $name ) {
654 $magic = MagicWord::get( $name );
655 $case = intval( $magic->isCaseSensitive() );
656 foreach ( $magic->getSynonyms() as $syn ) {
657 if ( !$case ) {
658 $syn = $wgContLang->lc( $syn );
659 }
660 $this->hash[$case][$syn] = $name;
661 }
662 }
663 }
664 return $this->hash;
665 }
666
667 /**
668 * Get the base regex
669 */
670 function getBaseRegex() {
671 if ( is_null( $this->baseRegex ) ) {
672 $this->baseRegex = array( 0 => '', 1 => '' );
673 foreach ( $this->names as $name ) {
674 $magic = MagicWord::get( $name );
675 $case = intval( $magic->isCaseSensitive() );
676 foreach ( $magic->getSynonyms() as $i => $syn ) {
677 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
678 if ( $this->baseRegex[$case] === '' ) {
679 $this->baseRegex[$case] = $group;
680 } else {
681 $this->baseRegex[$case] .= '|' . $group;
682 }
683 }
684 }
685 }
686 return $this->baseRegex;
687 }
688
689 /**
690 * Get an unanchored regex that does not match parameters
691 */
692 function getRegex() {
693 if ( is_null( $this->regex ) ) {
694 $base = $this->getBaseRegex();
695 $this->regex = array( '', '' );
696 if ( $this->baseRegex[0] !== '' ) {
697 $this->regex[0] = "/{$base[0]}/iuS";
698 }
699 if ( $this->baseRegex[1] !== '' ) {
700 $this->regex[1] = "/{$base[1]}/S";
701 }
702 }
703 return $this->regex;
704 }
705
706 /**
707 * Get a regex for matching variables with parameters
708 *
709 * @return string
710 */
711 function getVariableRegex() {
712 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
713 }
714
715 /**
716 * Get a regex anchored to the start of the string that does not match parameters
717 *
718 * @return array
719 */
720 function getRegexStart() {
721 $base = $this->getBaseRegex();
722 $newRegex = array( '', '' );
723 if ( $base[0] !== '' ) {
724 $newRegex[0] = "/^(?:{$base[0]})/iuS";
725 }
726 if ( $base[1] !== '' ) {
727 $newRegex[1] = "/^(?:{$base[1]})/S";
728 }
729 return $newRegex;
730 }
731
732 /**
733 * Get an anchored regex for matching variables with parameters
734 *
735 * @return array
736 */
737 function getVariableStartToEndRegex() {
738 $base = $this->getBaseRegex();
739 $newRegex = array( '', '' );
740 if ( $base[0] !== '' ) {
741 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
742 }
743 if ( $base[1] !== '' ) {
744 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
745 }
746 return $newRegex;
747 }
748
749 /**
750 * Parse a match array from preg_match
751 * Returns array(magic word ID, parameter value)
752 * If there is no parameter value, that element will be false.
753 *
754 * @param $m array
755 *
756 * @return array
757 */
758 function parseMatch( $m ) {
759 reset( $m );
760 while ( list( $key, $value ) = each( $m ) ) {
761 if ( $key === 0 || $value === '' ) {
762 continue;
763 }
764 $parts = explode( '_', $key, 2 );
765 if ( count( $parts ) != 2 ) {
766 // This shouldn't happen
767 // continue;
768 throw new MWException( __METHOD__ . ': bad parameter name' );
769 }
770 list( /* $synIndex */, $magicName ) = $parts;
771 $paramValue = next( $m );
772 return array( $magicName, $paramValue );
773 }
774 // This shouldn't happen either
775 throw new MWException( __METHOD__.': parameter not found' );
776 }
777
778 /**
779 * Match some text, with parameter capture
780 * Returns an array with the magic word name in the first element and the
781 * parameter in the second element.
782 * Both elements are false if there was no match.
783 *
784 * @param $text string
785 *
786 * @return array
787 */
788 public function matchVariableStartToEnd( $text ) {
789 $regexes = $this->getVariableStartToEndRegex();
790 foreach ( $regexes as $regex ) {
791 if ( $regex !== '' ) {
792 $m = false;
793 if ( preg_match( $regex, $text, $m ) ) {
794 return $this->parseMatch( $m );
795 }
796 }
797 }
798 return array( false, false );
799 }
800
801 /**
802 * Match some text, without parameter capture
803 * Returns the magic word name, or false if there was no capture
804 *
805 * @param $text string
806 *
807 * @return string|false
808 */
809 public function matchStartToEnd( $text ) {
810 $hash = $this->getHash();
811 if ( isset( $hash[1][$text] ) ) {
812 return $hash[1][$text];
813 }
814 global $wgContLang;
815 $lc = $wgContLang->lc( $text );
816 if ( isset( $hash[0][$lc] ) ) {
817 return $hash[0][$lc];
818 }
819 return false;
820 }
821
822 /**
823 * Returns an associative array, ID => param value, for all items that match
824 * Removes the matched items from the input string (passed by reference)
825 *
826 * @param $text string
827 *
828 * @return array
829 */
830 public function matchAndRemove( &$text ) {
831 $found = array();
832 $regexes = $this->getRegex();
833 foreach ( $regexes as $regex ) {
834 if ( $regex === '' ) {
835 continue;
836 }
837 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
838 foreach ( $matches as $m ) {
839 list( $name, $param ) = $this->parseMatch( $m );
840 $found[$name] = $param;
841 }
842 $text = preg_replace( $regex, '', $text );
843 }
844 return $found;
845 }
846
847 /**
848 * Return the ID of the magic word at the start of $text, and remove
849 * the prefix from $text.
850 * Return false if no match found and $text is not modified.
851 * Does not match parameters.
852 *
853 * @param $text string
854 *
855 * @return int|false
856 */
857 public function matchStartAndRemove( &$text ) {
858 $regexes = $this->getRegexStart();
859 foreach ( $regexes as $regex ) {
860 if ( $regex === '' ) {
861 continue;
862 }
863 if ( preg_match( $regex, $text, $m ) ) {
864 list( $id, ) = $this->parseMatch( $m );
865 if ( strlen( $m[0] ) >= strlen( $text ) ) {
866 $text = '';
867 } else {
868 $text = substr( $text, strlen( $m[0] ) );
869 }
870 return $id;
871 }
872 }
873 return false;
874 }
875 }