Merge "Use local context to get messages"
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 *
14 * @par Usage:
15 * @code
16 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
17 * // some code
18 * }
19 * @endcode
20 *
21 * Possible future improvements:
22 * * Simultaneous searching for a number of magic words
23 * * MagicWord::$mObjects in shared memory
24 *
25 * Please avoid reading the data out of one of these objects and then writing
26 * special case code. If possible, add another match()-like function here.
27 *
28 * To add magic words in an extension, use $magicWords in a file listed in
29 * $wgExtensionMessagesFiles[].
30 *
31 * @par Example:
32 * @code
33 * $magicWords = array();
34 *
35 * $magicWords['en'] = array(
36 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
37 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
38 * );
39 * @endcode
40 *
41 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
42 * hook. Use string keys.
43 *
44 * @ingroup Parser
45 */
46 class MagicWord {
47 /**#@+
48 * @private
49 */
50 var $mId, $mSynonyms, $mCaseSensitive;
51 var $mRegex = '';
52 var $mRegexStart = '';
53 var $mBaseRegex = '';
54 var $mVariableRegex = '';
55 var $mVariableStartToEndRegex = '';
56 var $mModified = false;
57 var $mFound = false;
58
59 static public $mVariableIDsInitialised = false;
60 static public $mVariableIDs = array(
61 'currentmonth',
62 'currentmonth1',
63 'currentmonthname',
64 'currentmonthnamegen',
65 'currentmonthabbrev',
66 'currentday',
67 'currentday2',
68 'currentdayname',
69 'currentyear',
70 'currenttime',
71 'currenthour',
72 'localmonth',
73 'localmonth1',
74 'localmonthname',
75 'localmonthnamegen',
76 'localmonthabbrev',
77 'localday',
78 'localday2',
79 'localdayname',
80 'localyear',
81 'localtime',
82 'localhour',
83 'numberofarticles',
84 'numberoffiles',
85 'numberofedits',
86 'articlepath',
87 'sitename',
88 'server',
89 'servername',
90 'scriptpath',
91 'stylepath',
92 'pagename',
93 'pagenamee',
94 'fullpagename',
95 'fullpagenamee',
96 'namespace',
97 'namespacee',
98 'namespacenumber',
99 'currentweek',
100 'currentdow',
101 'localweek',
102 'localdow',
103 'revisionid',
104 'revisionday',
105 'revisionday2',
106 'revisionmonth',
107 'revisionmonth1',
108 'revisionyear',
109 'revisiontimestamp',
110 'revisionuser',
111 'subpagename',
112 'subpagenamee',
113 'talkspace',
114 'talkspacee',
115 'subjectspace',
116 'subjectspacee',
117 'talkpagename',
118 'talkpagenamee',
119 'subjectpagename',
120 'subjectpagenamee',
121 'numberofusers',
122 'numberofactiveusers',
123 'numberofpages',
124 'currentversion',
125 'basepagename',
126 'basepagenamee',
127 'currenttimestamp',
128 'localtimestamp',
129 'directionmark',
130 'contentlanguage',
131 'numberofadmins',
132 'numberofviews',
133 );
134
135 /* Array of caching hints for ParserCache */
136 static public $mCacheTTLs = array (
137 'currentmonth' => 86400,
138 'currentmonth1' => 86400,
139 'currentmonthname' => 86400,
140 'currentmonthnamegen' => 86400,
141 'currentmonthabbrev' => 86400,
142 'currentday' => 3600,
143 'currentday2' => 3600,
144 'currentdayname' => 3600,
145 'currentyear' => 86400,
146 'currenttime' => 3600,
147 'currenthour' => 3600,
148 'localmonth' => 86400,
149 'localmonth1' => 86400,
150 'localmonthname' => 86400,
151 'localmonthnamegen' => 86400,
152 'localmonthabbrev' => 86400,
153 'localday' => 3600,
154 'localday2' => 3600,
155 'localdayname' => 3600,
156 'localyear' => 86400,
157 'localtime' => 3600,
158 'localhour' => 3600,
159 'numberofarticles' => 3600,
160 'numberoffiles' => 3600,
161 'numberofedits' => 3600,
162 'currentweek' => 3600,
163 'currentdow' => 3600,
164 'localweek' => 3600,
165 'localdow' => 3600,
166 'numberofusers' => 3600,
167 'numberofactiveusers' => 3600,
168 'numberofpages' => 3600,
169 'currentversion' => 86400,
170 'currenttimestamp' => 3600,
171 'localtimestamp' => 3600,
172 'pagesinnamespace' => 3600,
173 'numberofadmins' => 3600,
174 'numberofviews' => 3600,
175 'numberingroup' => 3600,
176 );
177
178 static public $mDoubleUnderscoreIDs = array(
179 'notoc',
180 'nogallery',
181 'forcetoc',
182 'toc',
183 'noeditsection',
184 'newsectionlink',
185 'nonewsectionlink',
186 'hiddencat',
187 'index',
188 'noindex',
189 'staticredirect',
190 'notitleconvert',
191 'nocontentconvert',
192 );
193
194 static public $mSubstIDs = array(
195 'subst',
196 'safesubst',
197 );
198
199 static public $mObjects = array();
200 static public $mDoubleUnderscoreArray = null;
201
202 /**#@-*/
203
204 function __construct($id = 0, $syn = array(), $cs = false) {
205 $this->mId = $id;
206 $this->mSynonyms = (array)$syn;
207 $this->mCaseSensitive = $cs;
208 }
209
210 /**
211 * Factory: creates an object representing an ID
212 *
213 * @param $id
214 *
215 * @return MagicWord
216 */
217 static function &get( $id ) {
218 if ( !isset( self::$mObjects[$id] ) ) {
219 $mw = new MagicWord();
220 $mw->load( $id );
221 self::$mObjects[$id] = $mw;
222 }
223 return self::$mObjects[$id];
224 }
225
226 /**
227 * Get an array of parser variable IDs
228 *
229 * @return array
230 */
231 static function getVariableIDs() {
232 if ( !self::$mVariableIDsInitialised ) {
233 # Get variable IDs
234 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
235 self::$mVariableIDsInitialised = true;
236 }
237 return self::$mVariableIDs;
238 }
239
240 /**
241 * Get an array of parser substitution modifier IDs
242 * @return array
243 */
244 static function getSubstIDs() {
245 return self::$mSubstIDs;
246 }
247
248 /**
249 * Allow external reads of TTL array
250 *
251 * @param $id int
252 * @return array
253 */
254 static function getCacheTTL( $id ) {
255 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
256 return self::$mCacheTTLs[$id];
257 } else {
258 return -1;
259 }
260 }
261
262 /**
263 * Get a MagicWordArray of double-underscore entities
264 *
265 * @return MagicWordArray
266 */
267 static function getDoubleUnderscoreArray() {
268 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
269 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
270 }
271 return self::$mDoubleUnderscoreArray;
272 }
273
274 /**
275 * Clear the self::$mObjects variable
276 * For use in parser tests
277 */
278 public static function clearCache() {
279 self::$mObjects = array();
280 }
281
282 /**
283 * Initialises this object with an ID
284 *
285 * @param $id
286 */
287 function load( $id ) {
288 global $wgContLang;
289 wfProfileIn( __METHOD__ );
290 $this->mId = $id;
291 $wgContLang->getMagic( $this );
292 if ( !$this->mSynonyms ) {
293 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
294 #throw new MWException( "Error: invalid magic word '$id'" );
295 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
296 }
297 wfProfileOut( __METHOD__ );
298 }
299
300 /**
301 * Preliminary initialisation
302 * @private
303 */
304 function initRegex() {
305 // Sort the synonyms by length, descending, so that the longest synonym
306 // matches in precedence to the shortest
307 $synonyms = $this->mSynonyms;
308 usort( $synonyms, array( $this, 'compareStringLength' ) );
309
310 $escSyn = array();
311 foreach ( $synonyms as $synonym )
312 // In case a magic word contains /, like that's going to happen;)
313 $escSyn[] = preg_quote( $synonym, '/' );
314 $this->mBaseRegex = implode( '|', $escSyn );
315
316 $case = $this->mCaseSensitive ? '' : 'iu';
317 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
318 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
319 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
320 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
321 "/^(?:{$this->mBaseRegex})$/{$case}" );
322 }
323
324 /**
325 * A comparison function that returns -1, 0 or 1 depending on whether the
326 * first string is longer, the same length or shorter than the second
327 * string.
328 *
329 * @param $s1 string
330 * @param $s2 string
331 *
332 * @return int
333 */
334 function compareStringLength( $s1, $s2 ) {
335 $l1 = strlen( $s1 );
336 $l2 = strlen( $s2 );
337 if ( $l1 < $l2 ) {
338 return 1;
339 } elseif ( $l1 > $l2 ) {
340 return -1;
341 } else {
342 return 0;
343 }
344 }
345
346 /**
347 * Gets a regex representing matching the word
348 *
349 * @return string
350 */
351 function getRegex() {
352 if ($this->mRegex == '' ) {
353 $this->initRegex();
354 }
355 return $this->mRegex;
356 }
357
358 /**
359 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
360 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
361 * the complete expression
362 *
363 * @return string
364 */
365 function getRegexCase() {
366 if ( $this->mRegex === '' )
367 $this->initRegex();
368
369 return $this->mCaseSensitive ? '' : 'iu';
370 }
371
372 /**
373 * Gets a regex matching the word, if it is at the string start
374 *
375 * @return string
376 */
377 function getRegexStart() {
378 if ($this->mRegex == '' ) {
379 $this->initRegex();
380 }
381 return $this->mRegexStart;
382 }
383
384 /**
385 * regex without the slashes and what not
386 *
387 * @return string
388 */
389 function getBaseRegex() {
390 if ($this->mRegex == '') {
391 $this->initRegex();
392 }
393 return $this->mBaseRegex;
394 }
395
396 /**
397 * Returns true if the text contains the word
398 *
399 * @param $text string
400 *
401 * @return bool
402 */
403 function match( $text ) {
404 return (bool)preg_match( $this->getRegex(), $text );
405 }
406
407 /**
408 * Returns true if the text starts with the word
409 *
410 * @param $text string
411 *
412 * @return bool
413 */
414 function matchStart( $text ) {
415 return (bool)preg_match( $this->getRegexStart(), $text );
416 }
417
418 /**
419 * Returns NULL if there's no match, the value of $1 otherwise
420 * The return code is the matched string, if there's no variable
421 * part in the regex and the matched variable part ($1) if there
422 * is one.
423 *
424 * @param $text string
425 *
426 * @return string
427 */
428 function matchVariableStartToEnd( $text ) {
429 $matches = array();
430 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
431 if ( $matchcount == 0 ) {
432 return null;
433 } else {
434 # multiple matched parts (variable match); some will be empty because of
435 # synonyms. The variable will be the second non-empty one so remove any
436 # blank elements and re-sort the indices.
437 # See also bug 6526
438
439 $matches = array_values(array_filter($matches));
440
441 if ( count($matches) == 1 ) {
442 return $matches[0];
443 } else {
444 return $matches[1];
445 }
446 }
447 }
448
449
450 /**
451 * Returns true if the text matches the word, and alters the
452 * input string, removing all instances of the word
453 *
454 * @param $text string
455 *
456 * @return bool
457 */
458 function matchAndRemove( &$text ) {
459 $this->mFound = false;
460 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
461 return $this->mFound;
462 }
463
464 /**
465 * @param $text
466 * @return bool
467 */
468 function matchStartAndRemove( &$text ) {
469 $this->mFound = false;
470 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
471 return $this->mFound;
472 }
473
474 /**
475 * Used in matchAndRemove()
476 *
477 * @return string
478 */
479 function pregRemoveAndRecord() {
480 $this->mFound = true;
481 return '';
482 }
483
484 /**
485 * Replaces the word with something else
486 *
487 * @param $replacement
488 * @param $subject
489 * @param $limit int
490 *
491 * @return string
492 */
493 function replace( $replacement, $subject, $limit = -1 ) {
494 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
495 $this->mModified = !($res === $subject);
496 return $res;
497 }
498
499 /**
500 * Variable handling: {{SUBST:xxx}} style words
501 * Calls back a function to determine what to replace xxx with
502 * Input word must contain $1
503 *
504 * @param $text string
505 * @param $callback
506 *
507 * @return string
508 */
509 function substituteCallback( $text, $callback ) {
510 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
511 $this->mModified = !($res === $text);
512 return $res;
513 }
514
515 /**
516 * Matches the word, where $1 is a wildcard
517 *
518 * @return string
519 */
520 function getVariableRegex() {
521 if ( $this->mVariableRegex == '' ) {
522 $this->initRegex();
523 }
524 return $this->mVariableRegex;
525 }
526
527 /**
528 * Matches the entire string, where $1 is a wildcard
529 *
530 * @return string
531 */
532 function getVariableStartToEndRegex() {
533 if ( $this->mVariableStartToEndRegex == '' ) {
534 $this->initRegex();
535 }
536 return $this->mVariableStartToEndRegex;
537 }
538
539 /**
540 * Accesses the synonym list directly
541 *
542 * @param $i int
543 *
544 * @return string
545 */
546 function getSynonym( $i ) {
547 return $this->mSynonyms[$i];
548 }
549
550 /**
551 * @return array
552 */
553 function getSynonyms() {
554 return $this->mSynonyms;
555 }
556
557 /**
558 * Returns true if the last call to replace() or substituteCallback()
559 * returned a modified text, otherwise false.
560 *
561 * @return bool
562 */
563 function getWasModified(){
564 return $this->mModified;
565 }
566
567 /**
568 * $magicarr is an associative array of (magic word ID => replacement)
569 * This method uses the php feature to do several replacements at the same time,
570 * thereby gaining some efficiency. The result is placed in the out variable
571 * $result. The return value is true if something was replaced.
572 * @todo Should this be static? It doesn't seem to be used at all
573 *
574 * @param $magicarr
575 * @param $subject
576 * @param $result
577 *
578 * @return bool
579 */
580 function replaceMultiple( $magicarr, $subject, &$result ){
581 $search = array();
582 $replace = array();
583 foreach( $magicarr as $id => $replacement ){
584 $mw = MagicWord::get( $id );
585 $search[] = $mw->getRegex();
586 $replace[] = $replacement;
587 }
588
589 $result = preg_replace( $search, $replace, $subject );
590 return !($result === $subject);
591 }
592
593 /**
594 * Adds all the synonyms of this MagicWord to an array, to allow quick
595 * lookup in a list of magic words
596 *
597 * @param $array
598 * @param $value
599 */
600 function addToArray( &$array, $value ) {
601 global $wgContLang;
602 foreach ( $this->mSynonyms as $syn ) {
603 $array[$wgContLang->lc($syn)] = $value;
604 }
605 }
606
607 /**
608 * @return bool
609 */
610 function isCaseSensitive() {
611 return $this->mCaseSensitive;
612 }
613
614 /**
615 * @return int
616 */
617 function getId() {
618 return $this->mId;
619 }
620 }
621
622 /**
623 * Class for handling an array of magic words
624 * @ingroup Parser
625 */
626 class MagicWordArray {
627 var $names = array();
628 var $hash;
629 var $baseRegex, $regex;
630 var $matches;
631
632 function __construct( $names = array() ) {
633 $this->names = $names;
634 }
635
636 /**
637 * Add a magic word by name
638 *
639 * @param $name string
640 */
641 public function add( $name ) {
642 $this->names[] = $name;
643 $this->hash = $this->baseRegex = $this->regex = null;
644 }
645
646 /**
647 * Add a number of magic words by name
648 *
649 * @param $names array
650 */
651 public function addArray( $names ) {
652 $this->names = array_merge( $this->names, array_values( $names ) );
653 $this->hash = $this->baseRegex = $this->regex = null;
654 }
655
656 /**
657 * Get a 2-d hashtable for this array
658 */
659 function getHash() {
660 if ( is_null( $this->hash ) ) {
661 global $wgContLang;
662 $this->hash = array( 0 => array(), 1 => array() );
663 foreach ( $this->names as $name ) {
664 $magic = MagicWord::get( $name );
665 $case = intval( $magic->isCaseSensitive() );
666 foreach ( $magic->getSynonyms() as $syn ) {
667 if ( !$case ) {
668 $syn = $wgContLang->lc( $syn );
669 }
670 $this->hash[$case][$syn] = $name;
671 }
672 }
673 }
674 return $this->hash;
675 }
676
677 /**
678 * Get the base regex
679 */
680 function getBaseRegex() {
681 if ( is_null( $this->baseRegex ) ) {
682 $this->baseRegex = array( 0 => '', 1 => '' );
683 foreach ( $this->names as $name ) {
684 $magic = MagicWord::get( $name );
685 $case = intval( $magic->isCaseSensitive() );
686 foreach ( $magic->getSynonyms() as $i => $syn ) {
687 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
688 if ( $this->baseRegex[$case] === '' ) {
689 $this->baseRegex[$case] = $group;
690 } else {
691 $this->baseRegex[$case] .= '|' . $group;
692 }
693 }
694 }
695 }
696 return $this->baseRegex;
697 }
698
699 /**
700 * Get an unanchored regex that does not match parameters
701 */
702 function getRegex() {
703 if ( is_null( $this->regex ) ) {
704 $base = $this->getBaseRegex();
705 $this->regex = array( '', '' );
706 if ( $this->baseRegex[0] !== '' ) {
707 $this->regex[0] = "/{$base[0]}/iuS";
708 }
709 if ( $this->baseRegex[1] !== '' ) {
710 $this->regex[1] = "/{$base[1]}/S";
711 }
712 }
713 return $this->regex;
714 }
715
716 /**
717 * Get a regex for matching variables with parameters
718 *
719 * @return string
720 */
721 function getVariableRegex() {
722 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
723 }
724
725 /**
726 * Get a regex anchored to the start of the string that does not match parameters
727 *
728 * @return array
729 */
730 function getRegexStart() {
731 $base = $this->getBaseRegex();
732 $newRegex = array( '', '' );
733 if ( $base[0] !== '' ) {
734 $newRegex[0] = "/^(?:{$base[0]})/iuS";
735 }
736 if ( $base[1] !== '' ) {
737 $newRegex[1] = "/^(?:{$base[1]})/S";
738 }
739 return $newRegex;
740 }
741
742 /**
743 * Get an anchored regex for matching variables with parameters
744 *
745 * @return array
746 */
747 function getVariableStartToEndRegex() {
748 $base = $this->getBaseRegex();
749 $newRegex = array( '', '' );
750 if ( $base[0] !== '' ) {
751 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
752 }
753 if ( $base[1] !== '' ) {
754 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
755 }
756 return $newRegex;
757 }
758
759 /**
760 * Parse a match array from preg_match
761 * Returns array(magic word ID, parameter value)
762 * If there is no parameter value, that element will be false.
763 *
764 * @param $m array
765 *
766 * @return array
767 */
768 function parseMatch( $m ) {
769 reset( $m );
770 while ( list( $key, $value ) = each( $m ) ) {
771 if ( $key === 0 || $value === '' ) {
772 continue;
773 }
774 $parts = explode( '_', $key, 2 );
775 if ( count( $parts ) != 2 ) {
776 // This shouldn't happen
777 // continue;
778 throw new MWException( __METHOD__ . ': bad parameter name' );
779 }
780 list( /* $synIndex */, $magicName ) = $parts;
781 $paramValue = next( $m );
782 return array( $magicName, $paramValue );
783 }
784 // This shouldn't happen either
785 throw new MWException( __METHOD__.': parameter not found' );
786 }
787
788 /**
789 * Match some text, with parameter capture
790 * Returns an array with the magic word name in the first element and the
791 * parameter in the second element.
792 * Both elements are false if there was no match.
793 *
794 * @param $text string
795 *
796 * @return array
797 */
798 public function matchVariableStartToEnd( $text ) {
799 $regexes = $this->getVariableStartToEndRegex();
800 foreach ( $regexes as $regex ) {
801 if ( $regex !== '' ) {
802 $m = false;
803 if ( preg_match( $regex, $text, $m ) ) {
804 return $this->parseMatch( $m );
805 }
806 }
807 }
808 return array( false, false );
809 }
810
811 /**
812 * Match some text, without parameter capture
813 * Returns the magic word name, or false if there was no capture
814 *
815 * @param $text string
816 *
817 * @return string|bool False on failure
818 */
819 public function matchStartToEnd( $text ) {
820 $hash = $this->getHash();
821 if ( isset( $hash[1][$text] ) ) {
822 return $hash[1][$text];
823 }
824 global $wgContLang;
825 $lc = $wgContLang->lc( $text );
826 if ( isset( $hash[0][$lc] ) ) {
827 return $hash[0][$lc];
828 }
829 return false;
830 }
831
832 /**
833 * Returns an associative array, ID => param value, for all items that match
834 * Removes the matched items from the input string (passed by reference)
835 *
836 * @param $text string
837 *
838 * @return array
839 */
840 public function matchAndRemove( &$text ) {
841 $found = array();
842 $regexes = $this->getRegex();
843 foreach ( $regexes as $regex ) {
844 if ( $regex === '' ) {
845 continue;
846 }
847 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
848 foreach ( $matches as $m ) {
849 list( $name, $param ) = $this->parseMatch( $m );
850 $found[$name] = $param;
851 }
852 $text = preg_replace( $regex, '', $text );
853 }
854 return $found;
855 }
856
857 /**
858 * Return the ID of the magic word at the start of $text, and remove
859 * the prefix from $text.
860 * Return false if no match found and $text is not modified.
861 * Does not match parameters.
862 *
863 * @param $text string
864 *
865 * @return int|bool False on failure
866 */
867 public function matchStartAndRemove( &$text ) {
868 $regexes = $this->getRegexStart();
869 foreach ( $regexes as $regex ) {
870 if ( $regex === '' ) {
871 continue;
872 }
873 if ( preg_match( $regex, $text, $m ) ) {
874 list( $id, ) = $this->parseMatch( $m );
875 if ( strlen( $m[0] ) >= strlen( $text ) ) {
876 $text = '';
877 } else {
878 $text = substr( $text, strlen( $m[0] ) );
879 }
880 return $id;
881 }
882 }
883 return false;
884 }
885 }