* Added a proper Pager::doBatchLookups() function
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
15 *
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
19 *
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
22 *
23 * To add magic words in an extension, use the LanguageGetMagic hook. For
24 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
25 * hook. Use string keys.
26 *
27 * @ingroup Parser
28 */
29 class MagicWord {
30 /**#@+
31 * @private
32 */
33 var $mId, $mSynonyms, $mCaseSensitive;
34 var $mRegex = '';
35 var $mRegexStart = '';
36 var $mBaseRegex = '';
37 var $mVariableRegex = '';
38 var $mVariableStartToEndRegex = '';
39 var $mModified = false;
40 var $mFound = false;
41
42 static public $mVariableIDsInitialised = false;
43 static public $mVariableIDs = array(
44 'currentmonth',
45 'currentmonth1',
46 'currentmonthname',
47 'currentmonthnamegen',
48 'currentmonthabbrev',
49 'currentday',
50 'currentday2',
51 'currentdayname',
52 'currentyear',
53 'currenttime',
54 'currenthour',
55 'localmonth',
56 'localmonth1',
57 'localmonthname',
58 'localmonthnamegen',
59 'localmonthabbrev',
60 'localday',
61 'localday2',
62 'localdayname',
63 'localyear',
64 'localtime',
65 'localhour',
66 'numberofarticles',
67 'numberoffiles',
68 'numberofedits',
69 'articlepath',
70 'sitename',
71 'server',
72 'servername',
73 'scriptpath',
74 'stylepath',
75 'pagename',
76 'pagenamee',
77 'fullpagename',
78 'fullpagenamee',
79 'namespace',
80 'namespacee',
81 'currentweek',
82 'currentdow',
83 'localweek',
84 'localdow',
85 'revisionid',
86 'revisionday',
87 'revisionday2',
88 'revisionmonth',
89 'revisionmonth1',
90 'revisionyear',
91 'revisiontimestamp',
92 'revisionuser',
93 'subpagename',
94 'subpagenamee',
95 'talkspace',
96 'talkspacee',
97 'subjectspace',
98 'subjectspacee',
99 'talkpagename',
100 'talkpagenamee',
101 'subjectpagename',
102 'subjectpagenamee',
103 'numberofusers',
104 'numberofactiveusers',
105 'numberofpages',
106 'currentversion',
107 'basepagename',
108 'basepagenamee',
109 'currenttimestamp',
110 'localtimestamp',
111 'directionmark',
112 'contentlanguage',
113 'numberofadmins',
114 'numberofviews',
115 );
116
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonth1' => 86400,
121 'currentmonthname' => 86400,
122 'currentmonthnamegen' => 86400,
123 'currentmonthabbrev' => 86400,
124 'currentday' => 3600,
125 'currentday2' => 3600,
126 'currentdayname' => 3600,
127 'currentyear' => 86400,
128 'currenttime' => 3600,
129 'currenthour' => 3600,
130 'localmonth' => 86400,
131 'localmonth1' => 86400,
132 'localmonthname' => 86400,
133 'localmonthnamegen' => 86400,
134 'localmonthabbrev' => 86400,
135 'localday' => 3600,
136 'localday2' => 3600,
137 'localdayname' => 3600,
138 'localyear' => 86400,
139 'localtime' => 3600,
140 'localhour' => 3600,
141 'numberofarticles' => 3600,
142 'numberoffiles' => 3600,
143 'numberofedits' => 3600,
144 'currentweek' => 3600,
145 'currentdow' => 3600,
146 'localweek' => 3600,
147 'localdow' => 3600,
148 'numberofusers' => 3600,
149 'numberofactiveusers' => 3600,
150 'numberofpages' => 3600,
151 'currentversion' => 86400,
152 'currenttimestamp' => 3600,
153 'localtimestamp' => 3600,
154 'pagesinnamespace' => 3600,
155 'numberofadmins' => 3600,
156 'numberofviews' => 3600,
157 'numberingroup' => 3600,
158 );
159
160 static public $mDoubleUnderscoreIDs = array(
161 'notoc',
162 'nogallery',
163 'forcetoc',
164 'toc',
165 'noeditsection',
166 'newsectionlink',
167 'nonewsectionlink',
168 'hiddencat',
169 'index',
170 'noindex',
171 'staticredirect',
172 'notitleconvert',
173 'nocontentconvert',
174 );
175
176 static public $mSubstIDs = array(
177 'subst',
178 'safesubst',
179 );
180
181 static public $mObjects = array();
182 static public $mDoubleUnderscoreArray = null;
183
184 /**#@-*/
185
186 function __construct($id = 0, $syn = array(), $cs = false) {
187 $this->mId = $id;
188 $this->mSynonyms = (array)$syn;
189 $this->mCaseSensitive = $cs;
190 }
191
192 /**
193 * Factory: creates an object representing an ID
194 *
195 * @param $id
196 *
197 * @return MagicWord
198 */
199 static function &get( $id ) {
200 if ( !isset( self::$mObjects[$id] ) ) {
201 $mw = new MagicWord();
202 $mw->load( $id );
203 self::$mObjects[$id] = $mw;
204 }
205 return self::$mObjects[$id];
206 }
207
208 /**
209 * Get an array of parser variable IDs
210 *
211 * @return array
212 */
213 static function getVariableIDs() {
214 if ( !self::$mVariableIDsInitialised ) {
215 # Deprecated constant definition hook, available for extensions that need it
216 $magicWords = array();
217 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
218 foreach ( $magicWords as $word ) {
219 define( $word, $word );
220 }
221
222 # Get variable IDs
223 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
224 self::$mVariableIDsInitialised = true;
225 }
226 return self::$mVariableIDs;
227 }
228
229 /**
230 * Get an array of parser substitution modifier IDs
231 */
232 static function getSubstIDs() {
233 return self::$mSubstIDs;
234 }
235
236 /**
237 * Allow external reads of TTL array
238 *
239 * @return array
240 */
241 static function getCacheTTL($id) {
242 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
243 return self::$mCacheTTLs[$id];
244 } else {
245 return -1;
246 }
247 }
248
249 /**
250 * Get a MagicWordArray of double-underscore entities
251 *
252 * @return MagicWordArray
253 */
254 static function getDoubleUnderscoreArray() {
255 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
256 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
257 }
258 return self::$mDoubleUnderscoreArray;
259 }
260
261 /**
262 * Clear the self::$mObjects variable
263 * For use in parser tests
264 */
265 public static function clearCache() {
266 self::$mObjects = array();
267 }
268
269 /**
270 * Initialises this object with an ID
271 *
272 * @param $id
273 */
274 function load( $id ) {
275 global $wgContLang;
276 wfProfileIn( __METHOD__ );
277 $this->mId = $id;
278 $wgContLang->getMagic( $this );
279 if ( !$this->mSynonyms ) {
280 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
281 #throw new MWException( "Error: invalid magic word '$id'" );
282 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
283 }
284 wfProfileOut( __METHOD__ );
285 }
286
287 /**
288 * Preliminary initialisation
289 * @private
290 */
291 function initRegex() {
292 // Sort the synonyms by length, descending, so that the longest synonym
293 // matches in precedence to the shortest
294 $synonyms = $this->mSynonyms;
295 usort( $synonyms, array( $this, 'compareStringLength' ) );
296
297 $escSyn = array();
298 foreach ( $synonyms as $synonym )
299 // In case a magic word contains /, like that's going to happen;)
300 $escSyn[] = preg_quote( $synonym, '/' );
301 $this->mBaseRegex = implode( '|', $escSyn );
302
303 $case = $this->mCaseSensitive ? '' : 'iu';
304 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
305 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
306 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
307 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
308 "/^(?:{$this->mBaseRegex})$/{$case}" );
309 }
310
311 /**
312 * A comparison function that returns -1, 0 or 1 depending on whether the
313 * first string is longer, the same length or shorter than the second
314 * string.
315 *
316 * @param $s1 string
317 * @param $s2 string
318 *
319 * @return int
320 */
321 function compareStringLength( $s1, $s2 ) {
322 $l1 = strlen( $s1 );
323 $l2 = strlen( $s2 );
324 if ( $l1 < $l2 ) {
325 return 1;
326 } elseif ( $l1 > $l2 ) {
327 return -1;
328 } else {
329 return 0;
330 }
331 }
332
333 /**
334 * Gets a regex representing matching the word
335 *
336 * @return string
337 */
338 function getRegex() {
339 if ($this->mRegex == '' ) {
340 $this->initRegex();
341 }
342 return $this->mRegex;
343 }
344
345 /**
346 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
347 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
348 * the complete expression
349 *
350 * @return string
351 */
352 function getRegexCase() {
353 if ( $this->mRegex === '' )
354 $this->initRegex();
355
356 return $this->mCaseSensitive ? '' : 'iu';
357 }
358
359 /**
360 * Gets a regex matching the word, if it is at the string start
361 *
362 * @return string
363 */
364 function getRegexStart() {
365 if ($this->mRegex == '' ) {
366 $this->initRegex();
367 }
368 return $this->mRegexStart;
369 }
370
371 /**
372 * regex without the slashes and what not
373 *
374 * @return string
375 */
376 function getBaseRegex() {
377 if ($this->mRegex == '') {
378 $this->initRegex();
379 }
380 return $this->mBaseRegex;
381 }
382
383 /**
384 * Returns true if the text contains the word
385 *
386 * @paran $text string
387 *
388 * @return bool
389 */
390 function match( $text ) {
391 return (bool)preg_match( $this->getRegex(), $text );
392 }
393
394 /**
395 * Returns true if the text starts with the word
396 *
397 * @param $text string
398 *
399 * @return bool
400 */
401 function matchStart( $text ) {
402 return (bool)preg_match( $this->getRegexStart(), $text );
403 }
404
405 /**
406 * Returns NULL if there's no match, the value of $1 otherwise
407 * The return code is the matched string, if there's no variable
408 * part in the regex and the matched variable part ($1) if there
409 * is one.
410 *
411 * @param $text string
412 *
413 * @return string
414 */
415 function matchVariableStartToEnd( $text ) {
416 $matches = array();
417 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
418 if ( $matchcount == 0 ) {
419 return null;
420 } else {
421 # multiple matched parts (variable match); some will be empty because of
422 # synonyms. The variable will be the second non-empty one so remove any
423 # blank elements and re-sort the indices.
424 # See also bug 6526
425
426 $matches = array_values(array_filter($matches));
427
428 if ( count($matches) == 1 ) {
429 return $matches[0];
430 } else {
431 return $matches[1];
432 }
433 }
434 }
435
436
437 /**
438 * Returns true if the text matches the word, and alters the
439 * input string, removing all instances of the word
440 *
441 * @param $text string
442 *
443 * @return bool
444 */
445 function matchAndRemove( &$text ) {
446 $this->mFound = false;
447 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
448 return $this->mFound;
449 }
450
451 /**
452 * @param $text
453 * @return bool
454 */
455 function matchStartAndRemove( &$text ) {
456 $this->mFound = false;
457 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
458 return $this->mFound;
459 }
460
461 /**
462 * Used in matchAndRemove()
463 *
464 * @return string
465 */
466 function pregRemoveAndRecord() {
467 $this->mFound = true;
468 return '';
469 }
470
471 /**
472 * Replaces the word with something else
473 *
474 * @param $replacement
475 * @param $subject
476 * @param $limit int
477 *
478 * @return string
479 */
480 function replace( $replacement, $subject, $limit = -1 ) {
481 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
482 $this->mModified = !($res === $subject);
483 return $res;
484 }
485
486 /**
487 * Variable handling: {{SUBST:xxx}} style words
488 * Calls back a function to determine what to replace xxx with
489 * Input word must contain $1
490 *
491 * @param $text string
492 * @param $callback
493 *
494 * @return string
495 */
496 function substituteCallback( $text, $callback ) {
497 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
498 $this->mModified = !($res === $text);
499 return $res;
500 }
501
502 /**
503 * Matches the word, where $1 is a wildcard
504 *
505 * @return string
506 */
507 function getVariableRegex() {
508 if ( $this->mVariableRegex == '' ) {
509 $this->initRegex();
510 }
511 return $this->mVariableRegex;
512 }
513
514 /**
515 * Matches the entire string, where $1 is a wildcard
516 *
517 * @return string
518 */
519 function getVariableStartToEndRegex() {
520 if ( $this->mVariableStartToEndRegex == '' ) {
521 $this->initRegex();
522 }
523 return $this->mVariableStartToEndRegex;
524 }
525
526 /**
527 * Accesses the synonym list directly
528 *
529 * @param $i int
530 *
531 * @return string
532 */
533 function getSynonym( $i ) {
534 return $this->mSynonyms[$i];
535 }
536
537 /**
538 * @return array
539 */
540 function getSynonyms() {
541 return $this->mSynonyms;
542 }
543
544 /**
545 * Returns true if the last call to replace() or substituteCallback()
546 * returned a modified text, otherwise false.
547 *
548 * @return bool
549 */
550 function getWasModified(){
551 return $this->mModified;
552 }
553
554 /**
555 * $magicarr is an associative array of (magic word ID => replacement)
556 * This method uses the php feature to do several replacements at the same time,
557 * thereby gaining some efficiency. The result is placed in the out variable
558 * $result. The return value is true if something was replaced.
559 * @todo Should this be static? It doesn't seem to be used at all
560 *
561 * @param $magicarr
562 * @param $subject
563 * @param $result
564 *
565 * @return bool
566 */
567 function replaceMultiple( $magicarr, $subject, &$result ){
568 $search = array();
569 $replace = array();
570 foreach( $magicarr as $id => $replacement ){
571 $mw = MagicWord::get( $id );
572 $search[] = $mw->getRegex();
573 $replace[] = $replacement;
574 }
575
576 $result = preg_replace( $search, $replace, $subject );
577 return !($result === $subject);
578 }
579
580 /**
581 * Adds all the synonyms of this MagicWord to an array, to allow quick
582 * lookup in a list of magic words
583 *
584 * @param $array
585 * @param $value
586 */
587 function addToArray( &$array, $value ) {
588 global $wgContLang;
589 foreach ( $this->mSynonyms as $syn ) {
590 $array[$wgContLang->lc($syn)] = $value;
591 }
592 }
593
594 /**
595 * @return bool
596 */
597 function isCaseSensitive() {
598 return $this->mCaseSensitive;
599 }
600
601 /**
602 * @return int
603 */
604 function getId() {
605 return $this->mId;
606 }
607 }
608
609 /**
610 * Class for handling an array of magic words
611 * @ingroup Parser
612 */
613 class MagicWordArray {
614 var $names = array();
615 var $hash;
616 var $baseRegex, $regex;
617 var $matches;
618
619 function __construct( $names = array() ) {
620 $this->names = $names;
621 }
622
623 /**
624 * Add a magic word by name
625 *
626 * @param $name string
627 */
628 public function add( $name ) {
629 $this->names[] = $name;
630 $this->hash = $this->baseRegex = $this->regex = null;
631 }
632
633 /**
634 * Add a number of magic words by name
635 *
636 * @param $names array
637 */
638 public function addArray( $names ) {
639 $this->names = array_merge( $this->names, array_values( $names ) );
640 $this->hash = $this->baseRegex = $this->regex = null;
641 }
642
643 /**
644 * Get a 2-d hashtable for this array
645 */
646 function getHash() {
647 if ( is_null( $this->hash ) ) {
648 global $wgContLang;
649 $this->hash = array( 0 => array(), 1 => array() );
650 foreach ( $this->names as $name ) {
651 $magic = MagicWord::get( $name );
652 $case = intval( $magic->isCaseSensitive() );
653 foreach ( $magic->getSynonyms() as $syn ) {
654 if ( !$case ) {
655 $syn = $wgContLang->lc( $syn );
656 }
657 $this->hash[$case][$syn] = $name;
658 }
659 }
660 }
661 return $this->hash;
662 }
663
664 /**
665 * Get the base regex
666 */
667 function getBaseRegex() {
668 if ( is_null( $this->baseRegex ) ) {
669 $this->baseRegex = array( 0 => '', 1 => '' );
670 foreach ( $this->names as $name ) {
671 $magic = MagicWord::get( $name );
672 $case = intval( $magic->isCaseSensitive() );
673 foreach ( $magic->getSynonyms() as $i => $syn ) {
674 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
675 if ( $this->baseRegex[$case] === '' ) {
676 $this->baseRegex[$case] = $group;
677 } else {
678 $this->baseRegex[$case] .= '|' . $group;
679 }
680 }
681 }
682 }
683 return $this->baseRegex;
684 }
685
686 /**
687 * Get an unanchored regex that does not match parameters
688 */
689 function getRegex() {
690 if ( is_null( $this->regex ) ) {
691 $base = $this->getBaseRegex();
692 $this->regex = array( '', '' );
693 if ( $this->baseRegex[0] !== '' ) {
694 $this->regex[0] = "/{$base[0]}/iuS";
695 }
696 if ( $this->baseRegex[1] !== '' ) {
697 $this->regex[1] = "/{$base[1]}/S";
698 }
699 }
700 return $this->regex;
701 }
702
703 /**
704 * Get a regex for matching variables with parameters
705 *
706 * @return string
707 */
708 function getVariableRegex() {
709 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
710 }
711
712 /**
713 * Get a regex anchored to the start of the string that does not match parameters
714 *
715 * @return string
716 */
717 function getRegexStart() {
718 $base = $this->getBaseRegex();
719 $newRegex = array( '', '' );
720 if ( $base[0] !== '' ) {
721 $newRegex[0] = "/^(?:{$base[0]})/iuS";
722 }
723 if ( $base[1] !== '' ) {
724 $newRegex[1] = "/^(?:{$base[1]})/S";
725 }
726 return $newRegex;
727 }
728
729 /**
730 * Get an anchored regex for matching variables with parameters
731 *
732 * @return string
733 */
734 function getVariableStartToEndRegex() {
735 $base = $this->getBaseRegex();
736 $newRegex = array( '', '' );
737 if ( $base[0] !== '' ) {
738 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
739 }
740 if ( $base[1] !== '' ) {
741 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
742 }
743 return $newRegex;
744 }
745
746 /**
747 * Parse a match array from preg_match
748 * Returns array(magic word ID, parameter value)
749 * If there is no parameter value, that element will be false.
750 *
751 * @param $m arrray
752 *
753 * @return array
754 */
755 function parseMatch( $m ) {
756 reset( $m );
757 while ( list( $key, $value ) = each( $m ) ) {
758 if ( $key === 0 || $value === '' ) {
759 continue;
760 }
761 $parts = explode( '_', $key, 2 );
762 if ( count( $parts ) != 2 ) {
763 // This shouldn't happen
764 // continue;
765 throw new MWException( __METHOD__ . ': bad parameter name' );
766 }
767 list( /* $synIndex */, $magicName ) = $parts;
768 $paramValue = next( $m );
769 return array( $magicName, $paramValue );
770 }
771 // This shouldn't happen either
772 throw new MWException( __METHOD__.': parameter not found' );
773 }
774
775 /**
776 * Match some text, with parameter capture
777 * Returns an array with the magic word name in the first element and the
778 * parameter in the second element.
779 * Both elements are false if there was no match.
780 *
781 * @param $text string
782 *
783 * @return array
784 */
785 public function matchVariableStartToEnd( $text ) {
786 $regexes = $this->getVariableStartToEndRegex();
787 foreach ( $regexes as $regex ) {
788 if ( $regex !== '' ) {
789 $m = false;
790 if ( preg_match( $regex, $text, $m ) ) {
791 return $this->parseMatch( $m );
792 }
793 }
794 }
795 return array( false, false );
796 }
797
798 /**
799 * Match some text, without parameter capture
800 * Returns the magic word name, or false if there was no capture
801 *
802 * @param $text string
803 *
804 * @return string|false
805 */
806 public function matchStartToEnd( $text ) {
807 $hash = $this->getHash();
808 if ( isset( $hash[1][$text] ) ) {
809 return $hash[1][$text];
810 }
811 global $wgContLang;
812 $lc = $wgContLang->lc( $text );
813 if ( isset( $hash[0][$lc] ) ) {
814 return $hash[0][$lc];
815 }
816 return false;
817 }
818
819 /**
820 * Returns an associative array, ID => param value, for all items that match
821 * Removes the matched items from the input string (passed by reference)
822 *
823 * @param $text string
824 *
825 * @return array
826 */
827 public function matchAndRemove( &$text ) {
828 $found = array();
829 $regexes = $this->getRegex();
830 foreach ( $regexes as $regex ) {
831 if ( $regex === '' ) {
832 continue;
833 }
834 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
835 foreach ( $matches as $m ) {
836 list( $name, $param ) = $this->parseMatch( $m );
837 $found[$name] = $param;
838 }
839 $text = preg_replace( $regex, '', $text );
840 }
841 return $found;
842 }
843
844 /**
845 * Return the ID of the magic word at the start of $text, and remove
846 * the prefix from $text.
847 * Return false if no match found and $text is not modified.
848 * Does not match parameters.
849 *
850 * @param $text string
851 *
852 * @return int|false
853 */
854 public function matchStartAndRemove( &$text ) {
855 $regexes = $this->getRegexStart();
856 foreach ( $regexes as $regex ) {
857 if ( $regex === '' ) {
858 continue;
859 }
860 if ( preg_match( $regex, $text, $m ) ) {
861 list( $id, ) = $this->parseMatch( $m );
862 if ( strlen( $m[0] ) >= strlen( $text ) ) {
863 $text = '';
864 } else {
865 $text = substr( $text, strlen( $m[0] ) );
866 }
867 return $id;
868 }
869 }
870 return false;
871 }
872 }