(bug 34289) user.options CSS loaded twice. Fixed by splitting off the CSS part of...
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 *
14 * @par Usage:
15 * @code
16 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
17 * // some code
18 * }
19 * @endcode
20 *
21 * Possible future improvements:
22 * * Simultaneous searching for a number of magic words
23 * * MagicWord::$mObjects in shared memory
24 *
25 * Please avoid reading the data out of one of these objects and then writing
26 * special case code. If possible, add another match()-like function here.
27 *
28 * To add magic words in an extension, use $magicWords in a file listed in
29 * $wgExtensionMessagesFiles[].
30 *
31 * @par Example:
32 * @code
33 * $magicWords = array();
34 *
35 * $magicWords['en'] = array(
36 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
37 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
38 * );
39 * @endcode
40 *
41 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
42 * hook. Use string keys.
43 *
44 * @ingroup Parser
45 */
46 class MagicWord {
47 /**#@+
48 * @private
49 */
50 var $mId, $mSynonyms, $mCaseSensitive;
51 var $mRegex = '';
52 var $mRegexStart = '';
53 var $mBaseRegex = '';
54 var $mVariableRegex = '';
55 var $mVariableStartToEndRegex = '';
56 var $mModified = false;
57 var $mFound = false;
58
59 static public $mVariableIDsInitialised = false;
60 static public $mVariableIDs = array(
61 'currentmonth',
62 'currentmonth1',
63 'currentmonthname',
64 'currentmonthnamegen',
65 'currentmonthabbrev',
66 'currentday',
67 'currentday2',
68 'currentdayname',
69 'currentyear',
70 'currenttime',
71 'currenthour',
72 'localmonth',
73 'localmonth1',
74 'localmonthname',
75 'localmonthnamegen',
76 'localmonthabbrev',
77 'localday',
78 'localday2',
79 'localdayname',
80 'localyear',
81 'localtime',
82 'localhour',
83 'numberofarticles',
84 'numberoffiles',
85 'numberofedits',
86 'articlepath',
87 'sitename',
88 'server',
89 'servername',
90 'scriptpath',
91 'stylepath',
92 'pagename',
93 'pagenamee',
94 'fullpagename',
95 'fullpagenamee',
96 'namespace',
97 'namespacee',
98 'currentweek',
99 'currentdow',
100 'localweek',
101 'localdow',
102 'revisionid',
103 'revisionday',
104 'revisionday2',
105 'revisionmonth',
106 'revisionmonth1',
107 'revisionyear',
108 'revisiontimestamp',
109 'revisionuser',
110 'subpagename',
111 'subpagenamee',
112 'talkspace',
113 'talkspacee',
114 'subjectspace',
115 'subjectspacee',
116 'talkpagename',
117 'talkpagenamee',
118 'subjectpagename',
119 'subjectpagenamee',
120 'numberofusers',
121 'numberofactiveusers',
122 'numberofpages',
123 'currentversion',
124 'basepagename',
125 'basepagenamee',
126 'currenttimestamp',
127 'localtimestamp',
128 'directionmark',
129 'contentlanguage',
130 'numberofadmins',
131 'numberofviews',
132 );
133
134 /* Array of caching hints for ParserCache */
135 static public $mCacheTTLs = array (
136 'currentmonth' => 86400,
137 'currentmonth1' => 86400,
138 'currentmonthname' => 86400,
139 'currentmonthnamegen' => 86400,
140 'currentmonthabbrev' => 86400,
141 'currentday' => 3600,
142 'currentday2' => 3600,
143 'currentdayname' => 3600,
144 'currentyear' => 86400,
145 'currenttime' => 3600,
146 'currenthour' => 3600,
147 'localmonth' => 86400,
148 'localmonth1' => 86400,
149 'localmonthname' => 86400,
150 'localmonthnamegen' => 86400,
151 'localmonthabbrev' => 86400,
152 'localday' => 3600,
153 'localday2' => 3600,
154 'localdayname' => 3600,
155 'localyear' => 86400,
156 'localtime' => 3600,
157 'localhour' => 3600,
158 'numberofarticles' => 3600,
159 'numberoffiles' => 3600,
160 'numberofedits' => 3600,
161 'currentweek' => 3600,
162 'currentdow' => 3600,
163 'localweek' => 3600,
164 'localdow' => 3600,
165 'numberofusers' => 3600,
166 'numberofactiveusers' => 3600,
167 'numberofpages' => 3600,
168 'currentversion' => 86400,
169 'currenttimestamp' => 3600,
170 'localtimestamp' => 3600,
171 'pagesinnamespace' => 3600,
172 'numberofadmins' => 3600,
173 'numberofviews' => 3600,
174 'numberingroup' => 3600,
175 );
176
177 static public $mDoubleUnderscoreIDs = array(
178 'notoc',
179 'nogallery',
180 'forcetoc',
181 'toc',
182 'noeditsection',
183 'newsectionlink',
184 'nonewsectionlink',
185 'hiddencat',
186 'index',
187 'noindex',
188 'staticredirect',
189 'notitleconvert',
190 'nocontentconvert',
191 );
192
193 static public $mSubstIDs = array(
194 'subst',
195 'safesubst',
196 );
197
198 static public $mObjects = array();
199 static public $mDoubleUnderscoreArray = null;
200
201 /**#@-*/
202
203 function __construct($id = 0, $syn = array(), $cs = false) {
204 $this->mId = $id;
205 $this->mSynonyms = (array)$syn;
206 $this->mCaseSensitive = $cs;
207 }
208
209 /**
210 * Factory: creates an object representing an ID
211 *
212 * @param $id
213 *
214 * @return MagicWord
215 */
216 static function &get( $id ) {
217 if ( !isset( self::$mObjects[$id] ) ) {
218 $mw = new MagicWord();
219 $mw->load( $id );
220 self::$mObjects[$id] = $mw;
221 }
222 return self::$mObjects[$id];
223 }
224
225 /**
226 * Get an array of parser variable IDs
227 *
228 * @return array
229 */
230 static function getVariableIDs() {
231 if ( !self::$mVariableIDsInitialised ) {
232 # Get variable IDs
233 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
234 self::$mVariableIDsInitialised = true;
235 }
236 return self::$mVariableIDs;
237 }
238
239 /**
240 * Get an array of parser substitution modifier IDs
241 * @return array
242 */
243 static function getSubstIDs() {
244 return self::$mSubstIDs;
245 }
246
247 /**
248 * Allow external reads of TTL array
249 *
250 * @param $id int
251 * @return array
252 */
253 static function getCacheTTL( $id ) {
254 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
255 return self::$mCacheTTLs[$id];
256 } else {
257 return -1;
258 }
259 }
260
261 /**
262 * Get a MagicWordArray of double-underscore entities
263 *
264 * @return MagicWordArray
265 */
266 static function getDoubleUnderscoreArray() {
267 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
268 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
269 }
270 return self::$mDoubleUnderscoreArray;
271 }
272
273 /**
274 * Clear the self::$mObjects variable
275 * For use in parser tests
276 */
277 public static function clearCache() {
278 self::$mObjects = array();
279 }
280
281 /**
282 * Initialises this object with an ID
283 *
284 * @param $id
285 */
286 function load( $id ) {
287 global $wgContLang;
288 wfProfileIn( __METHOD__ );
289 $this->mId = $id;
290 $wgContLang->getMagic( $this );
291 if ( !$this->mSynonyms ) {
292 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
293 #throw new MWException( "Error: invalid magic word '$id'" );
294 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
295 }
296 wfProfileOut( __METHOD__ );
297 }
298
299 /**
300 * Preliminary initialisation
301 * @private
302 */
303 function initRegex() {
304 // Sort the synonyms by length, descending, so that the longest synonym
305 // matches in precedence to the shortest
306 $synonyms = $this->mSynonyms;
307 usort( $synonyms, array( $this, 'compareStringLength' ) );
308
309 $escSyn = array();
310 foreach ( $synonyms as $synonym )
311 // In case a magic word contains /, like that's going to happen;)
312 $escSyn[] = preg_quote( $synonym, '/' );
313 $this->mBaseRegex = implode( '|', $escSyn );
314
315 $case = $this->mCaseSensitive ? '' : 'iu';
316 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
317 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
318 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
319 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
320 "/^(?:{$this->mBaseRegex})$/{$case}" );
321 }
322
323 /**
324 * A comparison function that returns -1, 0 or 1 depending on whether the
325 * first string is longer, the same length or shorter than the second
326 * string.
327 *
328 * @param $s1 string
329 * @param $s2 string
330 *
331 * @return int
332 */
333 function compareStringLength( $s1, $s2 ) {
334 $l1 = strlen( $s1 );
335 $l2 = strlen( $s2 );
336 if ( $l1 < $l2 ) {
337 return 1;
338 } elseif ( $l1 > $l2 ) {
339 return -1;
340 } else {
341 return 0;
342 }
343 }
344
345 /**
346 * Gets a regex representing matching the word
347 *
348 * @return string
349 */
350 function getRegex() {
351 if ($this->mRegex == '' ) {
352 $this->initRegex();
353 }
354 return $this->mRegex;
355 }
356
357 /**
358 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
359 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
360 * the complete expression
361 *
362 * @return string
363 */
364 function getRegexCase() {
365 if ( $this->mRegex === '' )
366 $this->initRegex();
367
368 return $this->mCaseSensitive ? '' : 'iu';
369 }
370
371 /**
372 * Gets a regex matching the word, if it is at the string start
373 *
374 * @return string
375 */
376 function getRegexStart() {
377 if ($this->mRegex == '' ) {
378 $this->initRegex();
379 }
380 return $this->mRegexStart;
381 }
382
383 /**
384 * regex without the slashes and what not
385 *
386 * @return string
387 */
388 function getBaseRegex() {
389 if ($this->mRegex == '') {
390 $this->initRegex();
391 }
392 return $this->mBaseRegex;
393 }
394
395 /**
396 * Returns true if the text contains the word
397 *
398 * @param $text string
399 *
400 * @return bool
401 */
402 function match( $text ) {
403 return (bool)preg_match( $this->getRegex(), $text );
404 }
405
406 /**
407 * Returns true if the text starts with the word
408 *
409 * @param $text string
410 *
411 * @return bool
412 */
413 function matchStart( $text ) {
414 return (bool)preg_match( $this->getRegexStart(), $text );
415 }
416
417 /**
418 * Returns NULL if there's no match, the value of $1 otherwise
419 * The return code is the matched string, if there's no variable
420 * part in the regex and the matched variable part ($1) if there
421 * is one.
422 *
423 * @param $text string
424 *
425 * @return string
426 */
427 function matchVariableStartToEnd( $text ) {
428 $matches = array();
429 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
430 if ( $matchcount == 0 ) {
431 return null;
432 } else {
433 # multiple matched parts (variable match); some will be empty because of
434 # synonyms. The variable will be the second non-empty one so remove any
435 # blank elements and re-sort the indices.
436 # See also bug 6526
437
438 $matches = array_values(array_filter($matches));
439
440 if ( count($matches) == 1 ) {
441 return $matches[0];
442 } else {
443 return $matches[1];
444 }
445 }
446 }
447
448
449 /**
450 * Returns true if the text matches the word, and alters the
451 * input string, removing all instances of the word
452 *
453 * @param $text string
454 *
455 * @return bool
456 */
457 function matchAndRemove( &$text ) {
458 $this->mFound = false;
459 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
460 return $this->mFound;
461 }
462
463 /**
464 * @param $text
465 * @return bool
466 */
467 function matchStartAndRemove( &$text ) {
468 $this->mFound = false;
469 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
470 return $this->mFound;
471 }
472
473 /**
474 * Used in matchAndRemove()
475 *
476 * @return string
477 */
478 function pregRemoveAndRecord() {
479 $this->mFound = true;
480 return '';
481 }
482
483 /**
484 * Replaces the word with something else
485 *
486 * @param $replacement
487 * @param $subject
488 * @param $limit int
489 *
490 * @return string
491 */
492 function replace( $replacement, $subject, $limit = -1 ) {
493 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
494 $this->mModified = !($res === $subject);
495 return $res;
496 }
497
498 /**
499 * Variable handling: {{SUBST:xxx}} style words
500 * Calls back a function to determine what to replace xxx with
501 * Input word must contain $1
502 *
503 * @param $text string
504 * @param $callback
505 *
506 * @return string
507 */
508 function substituteCallback( $text, $callback ) {
509 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
510 $this->mModified = !($res === $text);
511 return $res;
512 }
513
514 /**
515 * Matches the word, where $1 is a wildcard
516 *
517 * @return string
518 */
519 function getVariableRegex() {
520 if ( $this->mVariableRegex == '' ) {
521 $this->initRegex();
522 }
523 return $this->mVariableRegex;
524 }
525
526 /**
527 * Matches the entire string, where $1 is a wildcard
528 *
529 * @return string
530 */
531 function getVariableStartToEndRegex() {
532 if ( $this->mVariableStartToEndRegex == '' ) {
533 $this->initRegex();
534 }
535 return $this->mVariableStartToEndRegex;
536 }
537
538 /**
539 * Accesses the synonym list directly
540 *
541 * @param $i int
542 *
543 * @return string
544 */
545 function getSynonym( $i ) {
546 return $this->mSynonyms[$i];
547 }
548
549 /**
550 * @return array
551 */
552 function getSynonyms() {
553 return $this->mSynonyms;
554 }
555
556 /**
557 * Returns true if the last call to replace() or substituteCallback()
558 * returned a modified text, otherwise false.
559 *
560 * @return bool
561 */
562 function getWasModified(){
563 return $this->mModified;
564 }
565
566 /**
567 * $magicarr is an associative array of (magic word ID => replacement)
568 * This method uses the php feature to do several replacements at the same time,
569 * thereby gaining some efficiency. The result is placed in the out variable
570 * $result. The return value is true if something was replaced.
571 * @todo Should this be static? It doesn't seem to be used at all
572 *
573 * @param $magicarr
574 * @param $subject
575 * @param $result
576 *
577 * @return bool
578 */
579 function replaceMultiple( $magicarr, $subject, &$result ){
580 $search = array();
581 $replace = array();
582 foreach( $magicarr as $id => $replacement ){
583 $mw = MagicWord::get( $id );
584 $search[] = $mw->getRegex();
585 $replace[] = $replacement;
586 }
587
588 $result = preg_replace( $search, $replace, $subject );
589 return !($result === $subject);
590 }
591
592 /**
593 * Adds all the synonyms of this MagicWord to an array, to allow quick
594 * lookup in a list of magic words
595 *
596 * @param $array
597 * @param $value
598 */
599 function addToArray( &$array, $value ) {
600 global $wgContLang;
601 foreach ( $this->mSynonyms as $syn ) {
602 $array[$wgContLang->lc($syn)] = $value;
603 }
604 }
605
606 /**
607 * @return bool
608 */
609 function isCaseSensitive() {
610 return $this->mCaseSensitive;
611 }
612
613 /**
614 * @return int
615 */
616 function getId() {
617 return $this->mId;
618 }
619 }
620
621 /**
622 * Class for handling an array of magic words
623 * @ingroup Parser
624 */
625 class MagicWordArray {
626 var $names = array();
627 var $hash;
628 var $baseRegex, $regex;
629 var $matches;
630
631 function __construct( $names = array() ) {
632 $this->names = $names;
633 }
634
635 /**
636 * Add a magic word by name
637 *
638 * @param $name string
639 */
640 public function add( $name ) {
641 $this->names[] = $name;
642 $this->hash = $this->baseRegex = $this->regex = null;
643 }
644
645 /**
646 * Add a number of magic words by name
647 *
648 * @param $names array
649 */
650 public function addArray( $names ) {
651 $this->names = array_merge( $this->names, array_values( $names ) );
652 $this->hash = $this->baseRegex = $this->regex = null;
653 }
654
655 /**
656 * Get a 2-d hashtable for this array
657 */
658 function getHash() {
659 if ( is_null( $this->hash ) ) {
660 global $wgContLang;
661 $this->hash = array( 0 => array(), 1 => array() );
662 foreach ( $this->names as $name ) {
663 $magic = MagicWord::get( $name );
664 $case = intval( $magic->isCaseSensitive() );
665 foreach ( $magic->getSynonyms() as $syn ) {
666 if ( !$case ) {
667 $syn = $wgContLang->lc( $syn );
668 }
669 $this->hash[$case][$syn] = $name;
670 }
671 }
672 }
673 return $this->hash;
674 }
675
676 /**
677 * Get the base regex
678 */
679 function getBaseRegex() {
680 if ( is_null( $this->baseRegex ) ) {
681 $this->baseRegex = array( 0 => '', 1 => '' );
682 foreach ( $this->names as $name ) {
683 $magic = MagicWord::get( $name );
684 $case = intval( $magic->isCaseSensitive() );
685 foreach ( $magic->getSynonyms() as $i => $syn ) {
686 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
687 if ( $this->baseRegex[$case] === '' ) {
688 $this->baseRegex[$case] = $group;
689 } else {
690 $this->baseRegex[$case] .= '|' . $group;
691 }
692 }
693 }
694 }
695 return $this->baseRegex;
696 }
697
698 /**
699 * Get an unanchored regex that does not match parameters
700 */
701 function getRegex() {
702 if ( is_null( $this->regex ) ) {
703 $base = $this->getBaseRegex();
704 $this->regex = array( '', '' );
705 if ( $this->baseRegex[0] !== '' ) {
706 $this->regex[0] = "/{$base[0]}/iuS";
707 }
708 if ( $this->baseRegex[1] !== '' ) {
709 $this->regex[1] = "/{$base[1]}/S";
710 }
711 }
712 return $this->regex;
713 }
714
715 /**
716 * Get a regex for matching variables with parameters
717 *
718 * @return string
719 */
720 function getVariableRegex() {
721 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
722 }
723
724 /**
725 * Get a regex anchored to the start of the string that does not match parameters
726 *
727 * @return array
728 */
729 function getRegexStart() {
730 $base = $this->getBaseRegex();
731 $newRegex = array( '', '' );
732 if ( $base[0] !== '' ) {
733 $newRegex[0] = "/^(?:{$base[0]})/iuS";
734 }
735 if ( $base[1] !== '' ) {
736 $newRegex[1] = "/^(?:{$base[1]})/S";
737 }
738 return $newRegex;
739 }
740
741 /**
742 * Get an anchored regex for matching variables with parameters
743 *
744 * @return array
745 */
746 function getVariableStartToEndRegex() {
747 $base = $this->getBaseRegex();
748 $newRegex = array( '', '' );
749 if ( $base[0] !== '' ) {
750 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
751 }
752 if ( $base[1] !== '' ) {
753 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
754 }
755 return $newRegex;
756 }
757
758 /**
759 * Parse a match array from preg_match
760 * Returns array(magic word ID, parameter value)
761 * If there is no parameter value, that element will be false.
762 *
763 * @param $m array
764 *
765 * @return array
766 */
767 function parseMatch( $m ) {
768 reset( $m );
769 while ( list( $key, $value ) = each( $m ) ) {
770 if ( $key === 0 || $value === '' ) {
771 continue;
772 }
773 $parts = explode( '_', $key, 2 );
774 if ( count( $parts ) != 2 ) {
775 // This shouldn't happen
776 // continue;
777 throw new MWException( __METHOD__ . ': bad parameter name' );
778 }
779 list( /* $synIndex */, $magicName ) = $parts;
780 $paramValue = next( $m );
781 return array( $magicName, $paramValue );
782 }
783 // This shouldn't happen either
784 throw new MWException( __METHOD__.': parameter not found' );
785 }
786
787 /**
788 * Match some text, with parameter capture
789 * Returns an array with the magic word name in the first element and the
790 * parameter in the second element.
791 * Both elements are false if there was no match.
792 *
793 * @param $text string
794 *
795 * @return array
796 */
797 public function matchVariableStartToEnd( $text ) {
798 $regexes = $this->getVariableStartToEndRegex();
799 foreach ( $regexes as $regex ) {
800 if ( $regex !== '' ) {
801 $m = false;
802 if ( preg_match( $regex, $text, $m ) ) {
803 return $this->parseMatch( $m );
804 }
805 }
806 }
807 return array( false, false );
808 }
809
810 /**
811 * Match some text, without parameter capture
812 * Returns the magic word name, or false if there was no capture
813 *
814 * @param $text string
815 *
816 * @return string|false
817 */
818 public function matchStartToEnd( $text ) {
819 $hash = $this->getHash();
820 if ( isset( $hash[1][$text] ) ) {
821 return $hash[1][$text];
822 }
823 global $wgContLang;
824 $lc = $wgContLang->lc( $text );
825 if ( isset( $hash[0][$lc] ) ) {
826 return $hash[0][$lc];
827 }
828 return false;
829 }
830
831 /**
832 * Returns an associative array, ID => param value, for all items that match
833 * Removes the matched items from the input string (passed by reference)
834 *
835 * @param $text string
836 *
837 * @return array
838 */
839 public function matchAndRemove( &$text ) {
840 $found = array();
841 $regexes = $this->getRegex();
842 foreach ( $regexes as $regex ) {
843 if ( $regex === '' ) {
844 continue;
845 }
846 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
847 foreach ( $matches as $m ) {
848 list( $name, $param ) = $this->parseMatch( $m );
849 $found[$name] = $param;
850 }
851 $text = preg_replace( $regex, '', $text );
852 }
853 return $found;
854 }
855
856 /**
857 * Return the ID of the magic word at the start of $text, and remove
858 * the prefix from $text.
859 * Return false if no match found and $text is not modified.
860 * Does not match parameters.
861 *
862 * @param $text string
863 *
864 * @return int|false
865 */
866 public function matchStartAndRemove( &$text ) {
867 $regexes = $this->getRegexStart();
868 foreach ( $regexes as $regex ) {
869 if ( $regex === '' ) {
870 continue;
871 }
872 if ( preg_match( $regex, $text, $m ) ) {
873 list( $id, ) = $this->parseMatch( $m );
874 if ( strlen( $m[0] ) >= strlen( $text ) ) {
875 $text = '';
876 } else {
877 $text = substr( $text, strlen( $m[0] ) );
878 }
879 return $id;
880 }
881 }
882 return false;
883 }
884 }