(bug 9947) Add PROTECTIONLEVEL parser function to return the protection level for...
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 * See docs/magicword.txt
5 *
6 * @file
7 * @ingroup Parser
8 */
9
10 /**
11 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
12 * Usage:
13 * if (MagicWord::get( 'redirect' )->match( $text ) )
14 *
15 * Possible future improvements:
16 * * Simultaneous searching for a number of magic words
17 * * MagicWord::$mObjects in shared memory
18 *
19 * Please avoid reading the data out of one of these objects and then writing
20 * special case code. If possible, add another match()-like function here.
21 *
22 * To add magic words in an extension, use the LanguageGetMagic hook. For
23 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
24 * hook. Use string keys.
25 *
26 * @ingroup Parser
27 */
28 class MagicWord {
29 /**#@+
30 * @private
31 */
32 var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
33 var $mRegexStart, $mBaseRegex, $mVariableRegex;
34 var $mModified, $mFound;
35
36 static public $mVariableIDsInitialised = false;
37 static public $mVariableIDs = array(
38 'currentmonth',
39 'currentmonthname',
40 'currentmonthnamegen',
41 'currentmonthabbrev',
42 'currentday',
43 'currentday2',
44 'currentdayname',
45 'currentyear',
46 'currenttime',
47 'currenthour',
48 'localmonth',
49 'localmonthname',
50 'localmonthnamegen',
51 'localmonthabbrev',
52 'localday',
53 'localday2',
54 'localdayname',
55 'localyear',
56 'localtime',
57 'localhour',
58 'numberofarticles',
59 'numberoffiles',
60 'numberofedits',
61 'sitename',
62 'server',
63 'servername',
64 'scriptpath',
65 'pagename',
66 'pagenamee',
67 'fullpagename',
68 'fullpagenamee',
69 'namespace',
70 'namespacee',
71 'currentweek',
72 'currentdow',
73 'localweek',
74 'localdow',
75 'revisionid',
76 'revisionday',
77 'revisionday2',
78 'revisionmonth',
79 'revisionyear',
80 'revisiontimestamp',
81 'subpagename',
82 'subpagenamee',
83 'displaytitle',
84 'talkspace',
85 'talkspacee',
86 'subjectspace',
87 'subjectspacee',
88 'talkpagename',
89 'talkpagenamee',
90 'subjectpagename',
91 'subjectpagenamee',
92 'numberofusers',
93 'newsectionlink',
94 'numberofpages',
95 'currentversion',
96 'basepagename',
97 'basepagenamee',
98 'urlencode',
99 'currenttimestamp',
100 'localtimestamp',
101 'directionmark',
102 'language',
103 'contentlanguage',
104 'pagesinnamespace',
105 'numberofadmins',
106 'numberofviews',
107 'defaultsort',
108 'pagesincategory',
109 'index',
110 'noindex',
111 'numberingroup',
112 'protectionlevel',
113 );
114
115 /* Array of caching hints for ParserCache */
116 static public $mCacheTTLs = array (
117 'currentmonth' => 86400,
118 'currentmonthname' => 86400,
119 'currentmonthnamegen' => 86400,
120 'currentmonthabbrev' => 86400,
121 'currentday' => 3600,
122 'currentday2' => 3600,
123 'currentdayname' => 3600,
124 'currentyear' => 86400,
125 'currenttime' => 3600,
126 'currenthour' => 3600,
127 'localmonth' => 86400,
128 'localmonthname' => 86400,
129 'localmonthnamegen' => 86400,
130 'localmonthabbrev' => 86400,
131 'localday' => 3600,
132 'localday2' => 3600,
133 'localdayname' => 3600,
134 'localyear' => 86400,
135 'localtime' => 3600,
136 'localhour' => 3600,
137 'numberofarticles' => 3600,
138 'numberoffiles' => 3600,
139 'numberofedits' => 3600,
140 'currentweek' => 3600,
141 'currentdow' => 3600,
142 'localweek' => 3600,
143 'localdow' => 3600,
144 'numberofusers' => 3600,
145 'numberofpages' => 3600,
146 'currentversion' => 86400,
147 'currenttimestamp' => 3600,
148 'localtimestamp' => 3600,
149 'pagesinnamespace' => 3600,
150 'numberofadmins' => 3600,
151 'numberofviews' => 3600,
152 'numberingroup' => 3600,
153 );
154
155 static public $mDoubleUnderscoreIDs = array(
156 'notoc',
157 'nogallery',
158 'forcetoc',
159 'toc',
160 'noeditsection',
161 'newsectionlink',
162 'hiddencat',
163 'index',
164 'noindex',
165 'staticredirect',
166 );
167
168
169 static public $mObjects = array();
170 static public $mDoubleUnderscoreArray = null;
171
172 /**#@-*/
173
174 function __construct($id = 0, $syn = '', $cs = false) {
175 $this->mId = $id;
176 $this->mSynonyms = (array)$syn;
177 $this->mCaseSensitive = $cs;
178 $this->mRegex = '';
179 $this->mRegexStart = '';
180 $this->mVariableRegex = '';
181 $this->mVariableStartToEndRegex = '';
182 $this->mModified = false;
183 }
184
185 /**
186 * Factory: creates an object representing an ID
187 * @static
188 */
189 static function &get( $id ) {
190 wfProfileIn( __METHOD__ );
191 if (!array_key_exists( $id, self::$mObjects ) ) {
192 $mw = new MagicWord();
193 $mw->load( $id );
194 self::$mObjects[$id] = $mw;
195 }
196 wfProfileOut( __METHOD__ );
197 return self::$mObjects[$id];
198 }
199
200 /**
201 * Get an array of parser variable IDs
202 */
203 static function getVariableIDs() {
204 if ( !self::$mVariableIDsInitialised ) {
205 # Deprecated constant definition hook, available for extensions that need it
206 $magicWords = array();
207 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
208 foreach ( $magicWords as $word ) {
209 define( $word, $word );
210 }
211
212 # Get variable IDs
213 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
214 self::$mVariableIDsInitialised = true;
215 }
216 return self::$mVariableIDs;
217 }
218
219 /* Allow external reads of TTL array */
220 static function getCacheTTL($id) {
221 if (array_key_exists($id,self::$mCacheTTLs)) {
222 return self::$mCacheTTLs[$id];
223 } else {
224 return -1;
225 }
226 }
227
228 /** Get a MagicWordArray of double-underscore entities */
229 static function getDoubleUnderscoreArray() {
230 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
231 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
232 }
233 return self::$mDoubleUnderscoreArray;
234 }
235
236 # Initialises this object with an ID
237 function load( $id ) {
238 global $wgContLang;
239 $this->mId = $id;
240 $wgContLang->getMagic( $this );
241 if ( !$this->mSynonyms ) {
242 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
243 #throw new MWException( "Error: invalid magic word '$id'" );
244 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
245 }
246 }
247
248 /**
249 * Preliminary initialisation
250 * @private
251 */
252 function initRegex() {
253 #$variableClass = Title::legalChars();
254 # This was used for matching "$1" variables, but different uses of the feature will have
255 # different restrictions, which should be checked *after* the MagicWord has been matched,
256 # not here. - IMSoP
257
258 $escSyn = array();
259 foreach ( $this->mSynonyms as $synonym )
260 // In case a magic word contains /, like that's going to happen;)
261 $escSyn[] = preg_quote( $synonym, '/' );
262 $this->mBaseRegex = implode( '|', $escSyn );
263
264 $case = $this->mCaseSensitive ? '' : 'iu';
265 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
266 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
267 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
268 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
269 "/^(?:{$this->mBaseRegex})$/{$case}" );
270 }
271
272 /**
273 * Gets a regex representing matching the word
274 */
275 function getRegex() {
276 if ($this->mRegex == '' ) {
277 $this->initRegex();
278 }
279 return $this->mRegex;
280 }
281
282 /**
283 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
284 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
285 * the complete expression
286 */
287 function getRegexCase() {
288 if ( $this->mRegex === '' )
289 $this->initRegex();
290
291 return $this->mCaseSensitive ? '' : 'iu';
292 }
293
294 /**
295 * Gets a regex matching the word, if it is at the string start
296 */
297 function getRegexStart() {
298 if ($this->mRegex == '' ) {
299 $this->initRegex();
300 }
301 return $this->mRegexStart;
302 }
303
304 /**
305 * regex without the slashes and what not
306 */
307 function getBaseRegex() {
308 if ($this->mRegex == '') {
309 $this->initRegex();
310 }
311 return $this->mBaseRegex;
312 }
313
314 /**
315 * Returns true if the text contains the word
316 * @return bool
317 */
318 function match( $text ) {
319 return preg_match( $this->getRegex(), $text );
320 }
321
322 /**
323 * Returns true if the text starts with the word
324 * @return bool
325 */
326 function matchStart( $text ) {
327 return preg_match( $this->getRegexStart(), $text );
328 }
329
330 /**
331 * Returns NULL if there's no match, the value of $1 otherwise
332 * The return code is the matched string, if there's no variable
333 * part in the regex and the matched variable part ($1) if there
334 * is one.
335 */
336 function matchVariableStartToEnd( $text ) {
337 $matches = array();
338 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
339 if ( $matchcount == 0 ) {
340 return NULL;
341 } else {
342 # multiple matched parts (variable match); some will be empty because of
343 # synonyms. The variable will be the second non-empty one so remove any
344 # blank elements and re-sort the indices.
345 # See also bug 6526
346
347 $matches = array_values(array_filter($matches));
348
349 if ( count($matches) == 1 ) { return $matches[0]; }
350 else { return $matches[1]; }
351 }
352 }
353
354
355 /**
356 * Returns true if the text matches the word, and alters the
357 * input string, removing all instances of the word
358 */
359 function matchAndRemove( &$text ) {
360 $this->mFound = false;
361 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
362 return $this->mFound;
363 }
364
365 function matchStartAndRemove( &$text ) {
366 $this->mFound = false;
367 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
368 return $this->mFound;
369 }
370
371 /**
372 * Used in matchAndRemove()
373 * @private
374 **/
375 function pregRemoveAndRecord( ) {
376 $this->mFound = true;
377 return '';
378 }
379
380 /**
381 * Replaces the word with something else
382 */
383 function replace( $replacement, $subject, $limit=-1 ) {
384 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
385 $this->mModified = !($res === $subject);
386 return $res;
387 }
388
389 /**
390 * Variable handling: {{SUBST:xxx}} style words
391 * Calls back a function to determine what to replace xxx with
392 * Input word must contain $1
393 */
394 function substituteCallback( $text, $callback ) {
395 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
396 $this->mModified = !($res === $text);
397 return $res;
398 }
399
400 /**
401 * Matches the word, where $1 is a wildcard
402 */
403 function getVariableRegex() {
404 if ( $this->mVariableRegex == '' ) {
405 $this->initRegex();
406 }
407 return $this->mVariableRegex;
408 }
409
410 /**
411 * Matches the entire string, where $1 is a wildcard
412 */
413 function getVariableStartToEndRegex() {
414 if ( $this->mVariableStartToEndRegex == '' ) {
415 $this->initRegex();
416 }
417 return $this->mVariableStartToEndRegex;
418 }
419
420 /**
421 * Accesses the synonym list directly
422 */
423 function getSynonym( $i ) {
424 return $this->mSynonyms[$i];
425 }
426
427 function getSynonyms() {
428 return $this->mSynonyms;
429 }
430
431 /**
432 * Returns true if the last call to replace() or substituteCallback()
433 * returned a modified text, otherwise false.
434 */
435 function getWasModified(){
436 return $this->mModified;
437 }
438
439 /**
440 * $magicarr is an associative array of (magic word ID => replacement)
441 * This method uses the php feature to do several replacements at the same time,
442 * thereby gaining some efficiency. The result is placed in the out variable
443 * $result. The return value is true if something was replaced.
444 * @static
445 **/
446 function replaceMultiple( $magicarr, $subject, &$result ){
447 $search = array();
448 $replace = array();
449 foreach( $magicarr as $id => $replacement ){
450 $mw = MagicWord::get( $id );
451 $search[] = $mw->getRegex();
452 $replace[] = $replacement;
453 }
454
455 $result = preg_replace( $search, $replace, $subject );
456 return !($result === $subject);
457 }
458
459 /**
460 * Adds all the synonyms of this MagicWord to an array, to allow quick
461 * lookup in a list of magic words
462 */
463 function addToArray( &$array, $value ) {
464 global $wgContLang;
465 foreach ( $this->mSynonyms as $syn ) {
466 $array[$wgContLang->lc($syn)] = $value;
467 }
468 }
469
470 function isCaseSensitive() {
471 return $this->mCaseSensitive;
472 }
473
474 function getId() {
475 return $this->mId;
476 }
477 }
478
479 /**
480 * Class for handling an array of magic words
481 * @ingroup Parser
482 */
483 class MagicWordArray {
484 var $names = array();
485 var $hash;
486 var $baseRegex, $regex;
487 var $matches;
488
489 function __construct( $names = array() ) {
490 $this->names = $names;
491 }
492
493 /**
494 * Add a magic word by name
495 */
496 public function add( $name ) {
497 global $wgContLang;
498 $this->names[] = $name;
499 $this->hash = $this->baseRegex = $this->regex = null;
500 }
501
502 /**
503 * Add a number of magic words by name
504 */
505 public function addArray( $names ) {
506 $this->names = array_merge( $this->names, array_values( $names ) );
507 $this->hash = $this->baseRegex = $this->regex = null;
508 }
509
510 /**
511 * Get a 2-d hashtable for this array
512 */
513 function getHash() {
514 if ( is_null( $this->hash ) ) {
515 global $wgContLang;
516 $this->hash = array( 0 => array(), 1 => array() );
517 foreach ( $this->names as $name ) {
518 $magic = MagicWord::get( $name );
519 $case = intval( $magic->isCaseSensitive() );
520 foreach ( $magic->getSynonyms() as $syn ) {
521 if ( !$case ) {
522 $syn = $wgContLang->lc( $syn );
523 }
524 $this->hash[$case][$syn] = $name;
525 }
526 }
527 }
528 return $this->hash;
529 }
530
531 /**
532 * Get the base regex
533 */
534 function getBaseRegex() {
535 if ( is_null( $this->baseRegex ) ) {
536 $this->baseRegex = array( 0 => '', 1 => '' );
537 foreach ( $this->names as $name ) {
538 $magic = MagicWord::get( $name );
539 $case = intval( $magic->isCaseSensitive() );
540 foreach ( $magic->getSynonyms() as $i => $syn ) {
541 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
542 if ( $this->baseRegex[$case] === '' ) {
543 $this->baseRegex[$case] = $group;
544 } else {
545 $this->baseRegex[$case] .= '|' . $group;
546 }
547 }
548 }
549 }
550 return $this->baseRegex;
551 }
552
553 /**
554 * Get an unanchored regex
555 */
556 function getRegex() {
557 if ( is_null( $this->regex ) ) {
558 $base = $this->getBaseRegex();
559 $this->regex = array( '', '' );
560 if ( $this->baseRegex[0] !== '' ) {
561 $this->regex[0] = "/{$base[0]}/iuS";
562 }
563 if ( $this->baseRegex[1] !== '' ) {
564 $this->regex[1] = "/{$base[1]}/S";
565 }
566 }
567 return $this->regex;
568 }
569
570 /**
571 * Get a regex for matching variables
572 */
573 function getVariableRegex() {
574 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
575 }
576
577 /**
578 * Get an anchored regex for matching variables
579 */
580 function getVariableStartToEndRegex() {
581 $base = $this->getBaseRegex();
582 $newRegex = array( '', '' );
583 if ( $base[0] !== '' ) {
584 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
585 }
586 if ( $base[1] !== '' ) {
587 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
588 }
589 return $newRegex;
590 }
591
592 /**
593 * Parse a match array from preg_match
594 * Returns array(magic word ID, parameter value)
595 * If there is no parameter value, that element will be false.
596 */
597 function parseMatch( $m ) {
598 reset( $m );
599 while ( list( $key, $value ) = each( $m ) ) {
600 if ( $key === 0 || $value === '' ) {
601 continue;
602 }
603 $parts = explode( '_', $key, 2 );
604 if ( count( $parts ) != 2 ) {
605 // This shouldn't happen
606 // continue;
607 throw new MWException( __METHOD__ . ': bad parameter name' );
608 }
609 list( /* $synIndex */, $magicName ) = $parts;
610 $paramValue = next( $m );
611 return array( $magicName, $paramValue );
612 }
613 // This shouldn't happen either
614 throw new MWException( __METHOD__.': parameter not found' );
615 return array( false, false );
616 }
617
618 /**
619 * Match some text, with parameter capture
620 * Returns an array with the magic word name in the first element and the
621 * parameter in the second element.
622 * Both elements are false if there was no match.
623 */
624 public function matchVariableStartToEnd( $text ) {
625 global $wgContLang;
626 $regexes = $this->getVariableStartToEndRegex();
627 foreach ( $regexes as $regex ) {
628 if ( $regex !== '' ) {
629 $m = false;
630 if ( preg_match( $regex, $text, $m ) ) {
631 return $this->parseMatch( $m );
632 }
633 }
634 }
635 return array( false, false );
636 }
637
638 /**
639 * Match some text, without parameter capture
640 * Returns the magic word name, or false if there was no capture
641 */
642 public function matchStartToEnd( $text ) {
643 $hash = $this->getHash();
644 if ( isset( $hash[1][$text] ) ) {
645 return $hash[1][$text];
646 }
647 global $wgContLang;
648 $lc = $wgContLang->lc( $text );
649 if ( isset( $hash[0][$lc] ) ) {
650 return $hash[0][$lc];
651 }
652 return false;
653 }
654
655 /**
656 * Returns an associative array, ID => param value, for all items that match
657 * Removes the matched items from the input string (passed by reference)
658 */
659 public function matchAndRemove( &$text ) {
660 $found = array();
661 $regexes = $this->getRegex();
662 foreach ( $regexes as $regex ) {
663 if ( $regex === '' ) {
664 continue;
665 }
666 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
667 foreach ( $matches as $m ) {
668 list( $name, $param ) = $this->parseMatch( $m );
669 $found[$name] = $param;
670 }
671 $text = preg_replace( $regex, '', $text );
672 }
673 return $found;
674 }
675 }