introduce a new setting that allows extension authors to whitelist deprecated funtion...
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
15 *
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
19 *
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
22 *
23 * To add magic words in an extension, use the LanguageGetMagic hook. For
24 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
25 * hook. Use string keys.
26 *
27 * @ingroup Parser
28 */
29 class MagicWord {
30 /**#@+
31 * @private
32 */
33 var $mId, $mSynonyms, $mCaseSensitive;
34 var $mRegex = '';
35 var $mRegexStart = '';
36 var $mBaseRegex = '';
37 var $mVariableRegex = '';
38 var $mVariableStartToEndRegex = '';
39 var $mModified = false;
40 var $mFound = false;
41
42 static public $mVariableIDsInitialised = false;
43 static public $mVariableIDs = array(
44 'currentmonth',
45 'currentmonth1',
46 'currentmonthname',
47 'currentmonthnamegen',
48 'currentmonthabbrev',
49 'currentday',
50 'currentday2',
51 'currentdayname',
52 'currentyear',
53 'currenttime',
54 'currenthour',
55 'localmonth',
56 'localmonth1',
57 'localmonthname',
58 'localmonthnamegen',
59 'localmonthabbrev',
60 'localday',
61 'localday2',
62 'localdayname',
63 'localyear',
64 'localtime',
65 'localhour',
66 'numberofarticles',
67 'numberoffiles',
68 'numberofedits',
69 'articlepath',
70 'sitename',
71 'server',
72 'servername',
73 'scriptpath',
74 'stylepath',
75 'pagename',
76 'pagenamee',
77 'fullpagename',
78 'fullpagenamee',
79 'namespace',
80 'namespacee',
81 'currentweek',
82 'currentdow',
83 'localweek',
84 'localdow',
85 'revisionid',
86 'revisionday',
87 'revisionday2',
88 'revisionmonth',
89 'revisionmonth1',
90 'revisionyear',
91 'revisiontimestamp',
92 'revisionuser',
93 'subpagename',
94 'subpagenamee',
95 'talkspace',
96 'talkspacee',
97 'subjectspace',
98 'subjectspacee',
99 'talkpagename',
100 'talkpagenamee',
101 'subjectpagename',
102 'subjectpagenamee',
103 'numberofusers',
104 'numberofactiveusers',
105 'numberofpages',
106 'currentversion',
107 'basepagename',
108 'basepagenamee',
109 'currenttimestamp',
110 'localtimestamp',
111 'directionmark',
112 'contentlanguage',
113 'numberofadmins',
114 'numberofviews',
115 );
116
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonth1' => 86400,
121 'currentmonthname' => 86400,
122 'currentmonthnamegen' => 86400,
123 'currentmonthabbrev' => 86400,
124 'currentday' => 3600,
125 'currentday2' => 3600,
126 'currentdayname' => 3600,
127 'currentyear' => 86400,
128 'currenttime' => 3600,
129 'currenthour' => 3600,
130 'localmonth' => 86400,
131 'localmonth1' => 86400,
132 'localmonthname' => 86400,
133 'localmonthnamegen' => 86400,
134 'localmonthabbrev' => 86400,
135 'localday' => 3600,
136 'localday2' => 3600,
137 'localdayname' => 3600,
138 'localyear' => 86400,
139 'localtime' => 3600,
140 'localhour' => 3600,
141 'numberofarticles' => 3600,
142 'numberoffiles' => 3600,
143 'numberofedits' => 3600,
144 'currentweek' => 3600,
145 'currentdow' => 3600,
146 'localweek' => 3600,
147 'localdow' => 3600,
148 'numberofusers' => 3600,
149 'numberofactiveusers' => 3600,
150 'numberofpages' => 3600,
151 'currentversion' => 86400,
152 'currenttimestamp' => 3600,
153 'localtimestamp' => 3600,
154 'pagesinnamespace' => 3600,
155 'numberofadmins' => 3600,
156 'numberofviews' => 3600,
157 'numberingroup' => 3600,
158 );
159
160 static public $mDoubleUnderscoreIDs = array(
161 'notoc',
162 'nogallery',
163 'forcetoc',
164 'toc',
165 'noeditsection',
166 'newsectionlink',
167 'nonewsectionlink',
168 'hiddencat',
169 'index',
170 'noindex',
171 'staticredirect',
172 'notitleconvert',
173 'nocontentconvert',
174 );
175
176 static public $mSubstIDs = array(
177 'subst',
178 'safesubst',
179 );
180
181 static public $mObjects = array();
182 static public $mDoubleUnderscoreArray = null;
183
184 /**#@-*/
185
186 function __construct($id = 0, $syn = array(), $cs = false) {
187 $this->mId = $id;
188 $this->mSynonyms = (array)$syn;
189 $this->mCaseSensitive = $cs;
190 }
191
192 /**
193 * Factory: creates an object representing an ID
194 *
195 * @param $id
196 *
197 * @return MagicWord
198 */
199 static function &get( $id ) {
200 if ( !isset( self::$mObjects[$id] ) ) {
201 $mw = new MagicWord();
202 $mw->load( $id );
203 self::$mObjects[$id] = $mw;
204 }
205 return self::$mObjects[$id];
206 }
207
208 /**
209 * Get an array of parser variable IDs
210 *
211 * @return array
212 */
213 static function getVariableIDs() {
214 if ( !self::$mVariableIDsInitialised ) {
215 # Deprecated constant definition hook, available for extensions that need it
216 $magicWords = array();
217 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
218 foreach ( $magicWords as $word ) {
219 define( $word, $word );
220 }
221
222 # Get variable IDs
223 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
224 self::$mVariableIDsInitialised = true;
225 }
226 return self::$mVariableIDs;
227 }
228
229 /**
230 * Get an array of parser substitution modifier IDs
231 * @return array
232 */
233 static function getSubstIDs() {
234 return self::$mSubstIDs;
235 }
236
237 /**
238 * Allow external reads of TTL array
239 *
240 * @param $id int
241 * @return array
242 */
243 static function getCacheTTL( $id ) {
244 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
245 return self::$mCacheTTLs[$id];
246 } else {
247 return -1;
248 }
249 }
250
251 /**
252 * Get a MagicWordArray of double-underscore entities
253 *
254 * @return MagicWordArray
255 */
256 static function getDoubleUnderscoreArray() {
257 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
258 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
259 }
260 return self::$mDoubleUnderscoreArray;
261 }
262
263 /**
264 * Clear the self::$mObjects variable
265 * For use in parser tests
266 */
267 public static function clearCache() {
268 self::$mObjects = array();
269 }
270
271 /**
272 * Initialises this object with an ID
273 *
274 * @param $id
275 */
276 function load( $id ) {
277 global $wgContLang;
278 wfProfileIn( __METHOD__ );
279 $this->mId = $id;
280 $wgContLang->getMagic( $this );
281 if ( !$this->mSynonyms ) {
282 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
283 #throw new MWException( "Error: invalid magic word '$id'" );
284 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
285 }
286 wfProfileOut( __METHOD__ );
287 }
288
289 /**
290 * Preliminary initialisation
291 * @private
292 */
293 function initRegex() {
294 // Sort the synonyms by length, descending, so that the longest synonym
295 // matches in precedence to the shortest
296 $synonyms = $this->mSynonyms;
297 usort( $synonyms, array( $this, 'compareStringLength' ) );
298
299 $escSyn = array();
300 foreach ( $synonyms as $synonym )
301 // In case a magic word contains /, like that's going to happen;)
302 $escSyn[] = preg_quote( $synonym, '/' );
303 $this->mBaseRegex = implode( '|', $escSyn );
304
305 $case = $this->mCaseSensitive ? '' : 'iu';
306 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
307 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
308 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
309 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
310 "/^(?:{$this->mBaseRegex})$/{$case}" );
311 }
312
313 /**
314 * A comparison function that returns -1, 0 or 1 depending on whether the
315 * first string is longer, the same length or shorter than the second
316 * string.
317 *
318 * @param $s1 string
319 * @param $s2 string
320 *
321 * @return int
322 */
323 function compareStringLength( $s1, $s2 ) {
324 $l1 = strlen( $s1 );
325 $l2 = strlen( $s2 );
326 if ( $l1 < $l2 ) {
327 return 1;
328 } elseif ( $l1 > $l2 ) {
329 return -1;
330 } else {
331 return 0;
332 }
333 }
334
335 /**
336 * Gets a regex representing matching the word
337 *
338 * @return string
339 */
340 function getRegex() {
341 if ($this->mRegex == '' ) {
342 $this->initRegex();
343 }
344 return $this->mRegex;
345 }
346
347 /**
348 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
349 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
350 * the complete expression
351 *
352 * @return string
353 */
354 function getRegexCase() {
355 if ( $this->mRegex === '' )
356 $this->initRegex();
357
358 return $this->mCaseSensitive ? '' : 'iu';
359 }
360
361 /**
362 * Gets a regex matching the word, if it is at the string start
363 *
364 * @return string
365 */
366 function getRegexStart() {
367 if ($this->mRegex == '' ) {
368 $this->initRegex();
369 }
370 return $this->mRegexStart;
371 }
372
373 /**
374 * regex without the slashes and what not
375 *
376 * @return string
377 */
378 function getBaseRegex() {
379 if ($this->mRegex == '') {
380 $this->initRegex();
381 }
382 return $this->mBaseRegex;
383 }
384
385 /**
386 * Returns true if the text contains the word
387 *
388 * @param $text string
389 *
390 * @return bool
391 */
392 function match( $text ) {
393 return (bool)preg_match( $this->getRegex(), $text );
394 }
395
396 /**
397 * Returns true if the text starts with the word
398 *
399 * @param $text string
400 *
401 * @return bool
402 */
403 function matchStart( $text ) {
404 return (bool)preg_match( $this->getRegexStart(), $text );
405 }
406
407 /**
408 * Returns NULL if there's no match, the value of $1 otherwise
409 * The return code is the matched string, if there's no variable
410 * part in the regex and the matched variable part ($1) if there
411 * is one.
412 *
413 * @param $text string
414 *
415 * @return string
416 */
417 function matchVariableStartToEnd( $text ) {
418 $matches = array();
419 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
420 if ( $matchcount == 0 ) {
421 return null;
422 } else {
423 # multiple matched parts (variable match); some will be empty because of
424 # synonyms. The variable will be the second non-empty one so remove any
425 # blank elements and re-sort the indices.
426 # See also bug 6526
427
428 $matches = array_values(array_filter($matches));
429
430 if ( count($matches) == 1 ) {
431 return $matches[0];
432 } else {
433 return $matches[1];
434 }
435 }
436 }
437
438
439 /**
440 * Returns true if the text matches the word, and alters the
441 * input string, removing all instances of the word
442 *
443 * @param $text string
444 *
445 * @return bool
446 */
447 function matchAndRemove( &$text ) {
448 $this->mFound = false;
449 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
450 return $this->mFound;
451 }
452
453 /**
454 * @param $text
455 * @return bool
456 */
457 function matchStartAndRemove( &$text ) {
458 $this->mFound = false;
459 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
460 return $this->mFound;
461 }
462
463 /**
464 * Used in matchAndRemove()
465 *
466 * @return string
467 */
468 function pregRemoveAndRecord() {
469 $this->mFound = true;
470 return '';
471 }
472
473 /**
474 * Replaces the word with something else
475 *
476 * @param $replacement
477 * @param $subject
478 * @param $limit int
479 *
480 * @return string
481 */
482 function replace( $replacement, $subject, $limit = -1 ) {
483 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
484 $this->mModified = !($res === $subject);
485 return $res;
486 }
487
488 /**
489 * Variable handling: {{SUBST:xxx}} style words
490 * Calls back a function to determine what to replace xxx with
491 * Input word must contain $1
492 *
493 * @param $text string
494 * @param $callback
495 *
496 * @return string
497 */
498 function substituteCallback( $text, $callback ) {
499 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
500 $this->mModified = !($res === $text);
501 return $res;
502 }
503
504 /**
505 * Matches the word, where $1 is a wildcard
506 *
507 * @return string
508 */
509 function getVariableRegex() {
510 if ( $this->mVariableRegex == '' ) {
511 $this->initRegex();
512 }
513 return $this->mVariableRegex;
514 }
515
516 /**
517 * Matches the entire string, where $1 is a wildcard
518 *
519 * @return string
520 */
521 function getVariableStartToEndRegex() {
522 if ( $this->mVariableStartToEndRegex == '' ) {
523 $this->initRegex();
524 }
525 return $this->mVariableStartToEndRegex;
526 }
527
528 /**
529 * Accesses the synonym list directly
530 *
531 * @param $i int
532 *
533 * @return string
534 */
535 function getSynonym( $i ) {
536 return $this->mSynonyms[$i];
537 }
538
539 /**
540 * @return array
541 */
542 function getSynonyms() {
543 return $this->mSynonyms;
544 }
545
546 /**
547 * Returns true if the last call to replace() or substituteCallback()
548 * returned a modified text, otherwise false.
549 *
550 * @return bool
551 */
552 function getWasModified(){
553 return $this->mModified;
554 }
555
556 /**
557 * $magicarr is an associative array of (magic word ID => replacement)
558 * This method uses the php feature to do several replacements at the same time,
559 * thereby gaining some efficiency. The result is placed in the out variable
560 * $result. The return value is true if something was replaced.
561 * @todo Should this be static? It doesn't seem to be used at all
562 *
563 * @param $magicarr
564 * @param $subject
565 * @param $result
566 *
567 * @return bool
568 */
569 function replaceMultiple( $magicarr, $subject, &$result ){
570 $search = array();
571 $replace = array();
572 foreach( $magicarr as $id => $replacement ){
573 $mw = MagicWord::get( $id );
574 $search[] = $mw->getRegex();
575 $replace[] = $replacement;
576 }
577
578 $result = preg_replace( $search, $replace, $subject );
579 return !($result === $subject);
580 }
581
582 /**
583 * Adds all the synonyms of this MagicWord to an array, to allow quick
584 * lookup in a list of magic words
585 *
586 * @param $array
587 * @param $value
588 */
589 function addToArray( &$array, $value ) {
590 global $wgContLang;
591 foreach ( $this->mSynonyms as $syn ) {
592 $array[$wgContLang->lc($syn)] = $value;
593 }
594 }
595
596 /**
597 * @return bool
598 */
599 function isCaseSensitive() {
600 return $this->mCaseSensitive;
601 }
602
603 /**
604 * @return int
605 */
606 function getId() {
607 return $this->mId;
608 }
609 }
610
611 /**
612 * Class for handling an array of magic words
613 * @ingroup Parser
614 */
615 class MagicWordArray {
616 var $names = array();
617 var $hash;
618 var $baseRegex, $regex;
619 var $matches;
620
621 function __construct( $names = array() ) {
622 $this->names = $names;
623 }
624
625 /**
626 * Add a magic word by name
627 *
628 * @param $name string
629 */
630 public function add( $name ) {
631 $this->names[] = $name;
632 $this->hash = $this->baseRegex = $this->regex = null;
633 }
634
635 /**
636 * Add a number of magic words by name
637 *
638 * @param $names array
639 */
640 public function addArray( $names ) {
641 $this->names = array_merge( $this->names, array_values( $names ) );
642 $this->hash = $this->baseRegex = $this->regex = null;
643 }
644
645 /**
646 * Get a 2-d hashtable for this array
647 */
648 function getHash() {
649 if ( is_null( $this->hash ) ) {
650 global $wgContLang;
651 $this->hash = array( 0 => array(), 1 => array() );
652 foreach ( $this->names as $name ) {
653 $magic = MagicWord::get( $name );
654 $case = intval( $magic->isCaseSensitive() );
655 foreach ( $magic->getSynonyms() as $syn ) {
656 if ( !$case ) {
657 $syn = $wgContLang->lc( $syn );
658 }
659 $this->hash[$case][$syn] = $name;
660 }
661 }
662 }
663 return $this->hash;
664 }
665
666 /**
667 * Get the base regex
668 */
669 function getBaseRegex() {
670 if ( is_null( $this->baseRegex ) ) {
671 $this->baseRegex = array( 0 => '', 1 => '' );
672 foreach ( $this->names as $name ) {
673 $magic = MagicWord::get( $name );
674 $case = intval( $magic->isCaseSensitive() );
675 foreach ( $magic->getSynonyms() as $i => $syn ) {
676 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
677 if ( $this->baseRegex[$case] === '' ) {
678 $this->baseRegex[$case] = $group;
679 } else {
680 $this->baseRegex[$case] .= '|' . $group;
681 }
682 }
683 }
684 }
685 return $this->baseRegex;
686 }
687
688 /**
689 * Get an unanchored regex that does not match parameters
690 */
691 function getRegex() {
692 if ( is_null( $this->regex ) ) {
693 $base = $this->getBaseRegex();
694 $this->regex = array( '', '' );
695 if ( $this->baseRegex[0] !== '' ) {
696 $this->regex[0] = "/{$base[0]}/iuS";
697 }
698 if ( $this->baseRegex[1] !== '' ) {
699 $this->regex[1] = "/{$base[1]}/S";
700 }
701 }
702 return $this->regex;
703 }
704
705 /**
706 * Get a regex for matching variables with parameters
707 *
708 * @return string
709 */
710 function getVariableRegex() {
711 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
712 }
713
714 /**
715 * Get a regex anchored to the start of the string that does not match parameters
716 *
717 * @return array
718 */
719 function getRegexStart() {
720 $base = $this->getBaseRegex();
721 $newRegex = array( '', '' );
722 if ( $base[0] !== '' ) {
723 $newRegex[0] = "/^(?:{$base[0]})/iuS";
724 }
725 if ( $base[1] !== '' ) {
726 $newRegex[1] = "/^(?:{$base[1]})/S";
727 }
728 return $newRegex;
729 }
730
731 /**
732 * Get an anchored regex for matching variables with parameters
733 *
734 * @return array
735 */
736 function getVariableStartToEndRegex() {
737 $base = $this->getBaseRegex();
738 $newRegex = array( '', '' );
739 if ( $base[0] !== '' ) {
740 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
741 }
742 if ( $base[1] !== '' ) {
743 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
744 }
745 return $newRegex;
746 }
747
748 /**
749 * Parse a match array from preg_match
750 * Returns array(magic word ID, parameter value)
751 * If there is no parameter value, that element will be false.
752 *
753 * @param $m array
754 *
755 * @return array
756 */
757 function parseMatch( $m ) {
758 reset( $m );
759 while ( list( $key, $value ) = each( $m ) ) {
760 if ( $key === 0 || $value === '' ) {
761 continue;
762 }
763 $parts = explode( '_', $key, 2 );
764 if ( count( $parts ) != 2 ) {
765 // This shouldn't happen
766 // continue;
767 throw new MWException( __METHOD__ . ': bad parameter name' );
768 }
769 list( /* $synIndex */, $magicName ) = $parts;
770 $paramValue = next( $m );
771 return array( $magicName, $paramValue );
772 }
773 // This shouldn't happen either
774 throw new MWException( __METHOD__.': parameter not found' );
775 }
776
777 /**
778 * Match some text, with parameter capture
779 * Returns an array with the magic word name in the first element and the
780 * parameter in the second element.
781 * Both elements are false if there was no match.
782 *
783 * @param $text string
784 *
785 * @return array
786 */
787 public function matchVariableStartToEnd( $text ) {
788 $regexes = $this->getVariableStartToEndRegex();
789 foreach ( $regexes as $regex ) {
790 if ( $regex !== '' ) {
791 $m = false;
792 if ( preg_match( $regex, $text, $m ) ) {
793 return $this->parseMatch( $m );
794 }
795 }
796 }
797 return array( false, false );
798 }
799
800 /**
801 * Match some text, without parameter capture
802 * Returns the magic word name, or false if there was no capture
803 *
804 * @param $text string
805 *
806 * @return string|false
807 */
808 public function matchStartToEnd( $text ) {
809 $hash = $this->getHash();
810 if ( isset( $hash[1][$text] ) ) {
811 return $hash[1][$text];
812 }
813 global $wgContLang;
814 $lc = $wgContLang->lc( $text );
815 if ( isset( $hash[0][$lc] ) ) {
816 return $hash[0][$lc];
817 }
818 return false;
819 }
820
821 /**
822 * Returns an associative array, ID => param value, for all items that match
823 * Removes the matched items from the input string (passed by reference)
824 *
825 * @param $text string
826 *
827 * @return array
828 */
829 public function matchAndRemove( &$text ) {
830 $found = array();
831 $regexes = $this->getRegex();
832 foreach ( $regexes as $regex ) {
833 if ( $regex === '' ) {
834 continue;
835 }
836 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
837 foreach ( $matches as $m ) {
838 list( $name, $param ) = $this->parseMatch( $m );
839 $found[$name] = $param;
840 }
841 $text = preg_replace( $regex, '', $text );
842 }
843 return $found;
844 }
845
846 /**
847 * Return the ID of the magic word at the start of $text, and remove
848 * the prefix from $text.
849 * Return false if no match found and $text is not modified.
850 * Does not match parameters.
851 *
852 * @param $text string
853 *
854 * @return int|false
855 */
856 public function matchStartAndRemove( &$text ) {
857 $regexes = $this->getRegexStart();
858 foreach ( $regexes as $regex ) {
859 if ( $regex === '' ) {
860 continue;
861 }
862 if ( preg_match( $regex, $text, $m ) ) {
863 list( $id, ) = $this->parseMatch( $m );
864 if ( strlen( $m[0] ) >= strlen( $text ) ) {
865 $text = '';
866 } else {
867 $text = substr( $text, strlen( $m[0] ) );
868 }
869 return $id;
870 }
871 }
872 return false;
873 }
874 }