Add "check" parameter to action=email
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 * See docs/magicword.txt
5 *
6 * @file
7 * @ingroup Parser
8 */
9
10 /**
11 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
12 * Usage:
13 * if (MagicWord::get( 'redirect' )->match( $text ) )
14 *
15 * Possible future improvements:
16 * * Simultaneous searching for a number of magic words
17 * * MagicWord::$mObjects in shared memory
18 *
19 * Please avoid reading the data out of one of these objects and then writing
20 * special case code. If possible, add another match()-like function here.
21 *
22 * To add magic words in an extension, use the LanguageGetMagic hook. For
23 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
24 * hook. Use string keys.
25 *
26 * @ingroup Parser
27 */
28 class MagicWord {
29 /**#@+
30 * @private
31 */
32 var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
33 var $mRegexStart, $mBaseRegex, $mVariableRegex;
34 var $mModified, $mFound;
35
36 static public $mVariableIDsInitialised = false;
37 static public $mVariableIDs = array(
38 'currentmonth',
39 'currentmonthname',
40 'currentmonthnamegen',
41 'currentmonthabbrev',
42 'currentday',
43 'currentday2',
44 'currentdayname',
45 'currentyear',
46 'currenttime',
47 'currenthour',
48 'localmonth',
49 'localmonthname',
50 'localmonthnamegen',
51 'localmonthabbrev',
52 'localday',
53 'localday2',
54 'localdayname',
55 'localyear',
56 'localtime',
57 'localhour',
58 'numberofarticles',
59 'numberoffiles',
60 'numberofedits',
61 'sitename',
62 'server',
63 'servername',
64 'scriptpath',
65 'pagename',
66 'pagenamee',
67 'fullpagename',
68 'fullpagenamee',
69 'namespace',
70 'namespacee',
71 'currentweek',
72 'currentdow',
73 'localweek',
74 'localdow',
75 'revisionid',
76 'revisionday',
77 'revisionday2',
78 'revisionmonth',
79 'revisionyear',
80 'revisiontimestamp',
81 'subpagename',
82 'subpagenamee',
83 'displaytitle',
84 'talkspace',
85 'talkspacee',
86 'subjectspace',
87 'subjectspacee',
88 'talkpagename',
89 'talkpagenamee',
90 'subjectpagename',
91 'subjectpagenamee',
92 'numberofusers',
93 'newsectionlink',
94 'numberofpages',
95 'currentversion',
96 'basepagename',
97 'basepagenamee',
98 'urlencode',
99 'currenttimestamp',
100 'localtimestamp',
101 'directionmark',
102 'language',
103 'contentlanguage',
104 'pagesinnamespace',
105 'numberofadmins',
106 'numberofviews',
107 'defaultsort',
108 'pagesincategory',
109 'index',
110 'noindex',
111 'numberingroup',
112 );
113
114 /* Array of caching hints for ParserCache */
115 static public $mCacheTTLs = array (
116 'currentmonth' => 86400,
117 'currentmonthname' => 86400,
118 'currentmonthnamegen' => 86400,
119 'currentmonthabbrev' => 86400,
120 'currentday' => 3600,
121 'currentday2' => 3600,
122 'currentdayname' => 3600,
123 'currentyear' => 86400,
124 'currenttime' => 3600,
125 'currenthour' => 3600,
126 'localmonth' => 86400,
127 'localmonthname' => 86400,
128 'localmonthnamegen' => 86400,
129 'localmonthabbrev' => 86400,
130 'localday' => 3600,
131 'localday2' => 3600,
132 'localdayname' => 3600,
133 'localyear' => 86400,
134 'localtime' => 3600,
135 'localhour' => 3600,
136 'numberofarticles' => 3600,
137 'numberoffiles' => 3600,
138 'numberofedits' => 3600,
139 'currentweek' => 3600,
140 'currentdow' => 3600,
141 'localweek' => 3600,
142 'localdow' => 3600,
143 'numberofusers' => 3600,
144 'numberofpages' => 3600,
145 'currentversion' => 86400,
146 'currenttimestamp' => 3600,
147 'localtimestamp' => 3600,
148 'pagesinnamespace' => 3600,
149 'numberofadmins' => 3600,
150 'numberofviews' => 3600,
151 'numberingroup' => 3600,
152 );
153
154 static public $mDoubleUnderscoreIDs = array(
155 'notoc',
156 'nogallery',
157 'forcetoc',
158 'toc',
159 'noeditsection',
160 'newsectionlink',
161 'hiddencat',
162 'index',
163 'noindex',
164 'staticredirect',
165 );
166
167
168 static public $mObjects = array();
169 static public $mDoubleUnderscoreArray = null;
170
171 /**#@-*/
172
173 function __construct($id = 0, $syn = '', $cs = false) {
174 $this->mId = $id;
175 $this->mSynonyms = (array)$syn;
176 $this->mCaseSensitive = $cs;
177 $this->mRegex = '';
178 $this->mRegexStart = '';
179 $this->mVariableRegex = '';
180 $this->mVariableStartToEndRegex = '';
181 $this->mModified = false;
182 }
183
184 /**
185 * Factory: creates an object representing an ID
186 * @static
187 */
188 static function &get( $id ) {
189 wfProfileIn( __METHOD__ );
190 if (!array_key_exists( $id, self::$mObjects ) ) {
191 $mw = new MagicWord();
192 $mw->load( $id );
193 self::$mObjects[$id] = $mw;
194 }
195 wfProfileOut( __METHOD__ );
196 return self::$mObjects[$id];
197 }
198
199 /**
200 * Get an array of parser variable IDs
201 */
202 static function getVariableIDs() {
203 if ( !self::$mVariableIDsInitialised ) {
204 # Deprecated constant definition hook, available for extensions that need it
205 $magicWords = array();
206 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
207 foreach ( $magicWords as $word ) {
208 define( $word, $word );
209 }
210
211 # Get variable IDs
212 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
213 self::$mVariableIDsInitialised = true;
214 }
215 return self::$mVariableIDs;
216 }
217
218 /* Allow external reads of TTL array */
219 static function getCacheTTL($id) {
220 if (array_key_exists($id,self::$mCacheTTLs)) {
221 return self::$mCacheTTLs[$id];
222 } else {
223 return -1;
224 }
225 }
226
227 /** Get a MagicWordArray of double-underscore entities */
228 static function getDoubleUnderscoreArray() {
229 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
230 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
231 }
232 return self::$mDoubleUnderscoreArray;
233 }
234
235 # Initialises this object with an ID
236 function load( $id ) {
237 global $wgContLang;
238 $this->mId = $id;
239 $wgContLang->getMagic( $this );
240 if ( !$this->mSynonyms ) {
241 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
242 #throw new MWException( "Error: invalid magic word '$id'" );
243 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
244 }
245 }
246
247 /**
248 * Preliminary initialisation
249 * @private
250 */
251 function initRegex() {
252 #$variableClass = Title::legalChars();
253 # This was used for matching "$1" variables, but different uses of the feature will have
254 # different restrictions, which should be checked *after* the MagicWord has been matched,
255 # not here. - IMSoP
256
257 $escSyn = array();
258 foreach ( $this->mSynonyms as $synonym )
259 // In case a magic word contains /, like that's going to happen;)
260 $escSyn[] = preg_quote( $synonym, '/' );
261 $this->mBaseRegex = implode( '|', $escSyn );
262
263 $case = $this->mCaseSensitive ? '' : 'iu';
264 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
265 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
266 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
267 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
268 "/^(?:{$this->mBaseRegex})$/{$case}" );
269 }
270
271 /**
272 * Gets a regex representing matching the word
273 */
274 function getRegex() {
275 if ($this->mRegex == '' ) {
276 $this->initRegex();
277 }
278 return $this->mRegex;
279 }
280
281 /**
282 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
283 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
284 * the complete expression
285 */
286 function getRegexCase() {
287 if ( $this->mRegex === '' )
288 $this->initRegex();
289
290 return $this->mCaseSensitive ? '' : 'iu';
291 }
292
293 /**
294 * Gets a regex matching the word, if it is at the string start
295 */
296 function getRegexStart() {
297 if ($this->mRegex == '' ) {
298 $this->initRegex();
299 }
300 return $this->mRegexStart;
301 }
302
303 /**
304 * regex without the slashes and what not
305 */
306 function getBaseRegex() {
307 if ($this->mRegex == '') {
308 $this->initRegex();
309 }
310 return $this->mBaseRegex;
311 }
312
313 /**
314 * Returns true if the text contains the word
315 * @return bool
316 */
317 function match( $text ) {
318 return preg_match( $this->getRegex(), $text );
319 }
320
321 /**
322 * Returns true if the text starts with the word
323 * @return bool
324 */
325 function matchStart( $text ) {
326 return preg_match( $this->getRegexStart(), $text );
327 }
328
329 /**
330 * Returns NULL if there's no match, the value of $1 otherwise
331 * The return code is the matched string, if there's no variable
332 * part in the regex and the matched variable part ($1) if there
333 * is one.
334 */
335 function matchVariableStartToEnd( $text ) {
336 $matches = array();
337 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
338 if ( $matchcount == 0 ) {
339 return NULL;
340 } else {
341 # multiple matched parts (variable match); some will be empty because of
342 # synonyms. The variable will be the second non-empty one so remove any
343 # blank elements and re-sort the indices.
344 # See also bug 6526
345
346 $matches = array_values(array_filter($matches));
347
348 if ( count($matches) == 1 ) { return $matches[0]; }
349 else { return $matches[1]; }
350 }
351 }
352
353
354 /**
355 * Returns true if the text matches the word, and alters the
356 * input string, removing all instances of the word
357 */
358 function matchAndRemove( &$text ) {
359 $this->mFound = false;
360 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
361 return $this->mFound;
362 }
363
364 function matchStartAndRemove( &$text ) {
365 $this->mFound = false;
366 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
367 return $this->mFound;
368 }
369
370 /**
371 * Used in matchAndRemove()
372 * @private
373 **/
374 function pregRemoveAndRecord( ) {
375 $this->mFound = true;
376 return '';
377 }
378
379 /**
380 * Replaces the word with something else
381 */
382 function replace( $replacement, $subject, $limit=-1 ) {
383 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
384 $this->mModified = !($res === $subject);
385 return $res;
386 }
387
388 /**
389 * Variable handling: {{SUBST:xxx}} style words
390 * Calls back a function to determine what to replace xxx with
391 * Input word must contain $1
392 */
393 function substituteCallback( $text, $callback ) {
394 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
395 $this->mModified = !($res === $text);
396 return $res;
397 }
398
399 /**
400 * Matches the word, where $1 is a wildcard
401 */
402 function getVariableRegex() {
403 if ( $this->mVariableRegex == '' ) {
404 $this->initRegex();
405 }
406 return $this->mVariableRegex;
407 }
408
409 /**
410 * Matches the entire string, where $1 is a wildcard
411 */
412 function getVariableStartToEndRegex() {
413 if ( $this->mVariableStartToEndRegex == '' ) {
414 $this->initRegex();
415 }
416 return $this->mVariableStartToEndRegex;
417 }
418
419 /**
420 * Accesses the synonym list directly
421 */
422 function getSynonym( $i ) {
423 return $this->mSynonyms[$i];
424 }
425
426 function getSynonyms() {
427 return $this->mSynonyms;
428 }
429
430 /**
431 * Returns true if the last call to replace() or substituteCallback()
432 * returned a modified text, otherwise false.
433 */
434 function getWasModified(){
435 return $this->mModified;
436 }
437
438 /**
439 * $magicarr is an associative array of (magic word ID => replacement)
440 * This method uses the php feature to do several replacements at the same time,
441 * thereby gaining some efficiency. The result is placed in the out variable
442 * $result. The return value is true if something was replaced.
443 * @static
444 **/
445 function replaceMultiple( $magicarr, $subject, &$result ){
446 $search = array();
447 $replace = array();
448 foreach( $magicarr as $id => $replacement ){
449 $mw = MagicWord::get( $id );
450 $search[] = $mw->getRegex();
451 $replace[] = $replacement;
452 }
453
454 $result = preg_replace( $search, $replace, $subject );
455 return !($result === $subject);
456 }
457
458 /**
459 * Adds all the synonyms of this MagicWord to an array, to allow quick
460 * lookup in a list of magic words
461 */
462 function addToArray( &$array, $value ) {
463 global $wgContLang;
464 foreach ( $this->mSynonyms as $syn ) {
465 $array[$wgContLang->lc($syn)] = $value;
466 }
467 }
468
469 function isCaseSensitive() {
470 return $this->mCaseSensitive;
471 }
472
473 function getId() {
474 return $this->mId;
475 }
476 }
477
478 /**
479 * Class for handling an array of magic words
480 * @ingroup Parser
481 */
482 class MagicWordArray {
483 var $names = array();
484 var $hash;
485 var $baseRegex, $regex;
486 var $matches;
487
488 function __construct( $names = array() ) {
489 $this->names = $names;
490 }
491
492 /**
493 * Add a magic word by name
494 */
495 public function add( $name ) {
496 global $wgContLang;
497 $this->names[] = $name;
498 $this->hash = $this->baseRegex = $this->regex = null;
499 }
500
501 /**
502 * Add a number of magic words by name
503 */
504 public function addArray( $names ) {
505 $this->names = array_merge( $this->names, array_values( $names ) );
506 $this->hash = $this->baseRegex = $this->regex = null;
507 }
508
509 /**
510 * Get a 2-d hashtable for this array
511 */
512 function getHash() {
513 if ( is_null( $this->hash ) ) {
514 global $wgContLang;
515 $this->hash = array( 0 => array(), 1 => array() );
516 foreach ( $this->names as $name ) {
517 $magic = MagicWord::get( $name );
518 $case = intval( $magic->isCaseSensitive() );
519 foreach ( $magic->getSynonyms() as $syn ) {
520 if ( !$case ) {
521 $syn = $wgContLang->lc( $syn );
522 }
523 $this->hash[$case][$syn] = $name;
524 }
525 }
526 }
527 return $this->hash;
528 }
529
530 /**
531 * Get the base regex
532 */
533 function getBaseRegex() {
534 if ( is_null( $this->baseRegex ) ) {
535 $this->baseRegex = array( 0 => '', 1 => '' );
536 foreach ( $this->names as $name ) {
537 $magic = MagicWord::get( $name );
538 $case = intval( $magic->isCaseSensitive() );
539 foreach ( $magic->getSynonyms() as $i => $syn ) {
540 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
541 if ( $this->baseRegex[$case] === '' ) {
542 $this->baseRegex[$case] = $group;
543 } else {
544 $this->baseRegex[$case] .= '|' . $group;
545 }
546 }
547 }
548 }
549 return $this->baseRegex;
550 }
551
552 /**
553 * Get an unanchored regex
554 */
555 function getRegex() {
556 if ( is_null( $this->regex ) ) {
557 $base = $this->getBaseRegex();
558 $this->regex = array( '', '' );
559 if ( $this->baseRegex[0] !== '' ) {
560 $this->regex[0] = "/{$base[0]}/iuS";
561 }
562 if ( $this->baseRegex[1] !== '' ) {
563 $this->regex[1] = "/{$base[1]}/S";
564 }
565 }
566 return $this->regex;
567 }
568
569 /**
570 * Get a regex for matching variables
571 */
572 function getVariableRegex() {
573 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
574 }
575
576 /**
577 * Get an anchored regex for matching variables
578 */
579 function getVariableStartToEndRegex() {
580 $base = $this->getBaseRegex();
581 $newRegex = array( '', '' );
582 if ( $base[0] !== '' ) {
583 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
584 }
585 if ( $base[1] !== '' ) {
586 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
587 }
588 return $newRegex;
589 }
590
591 /**
592 * Parse a match array from preg_match
593 * Returns array(magic word ID, parameter value)
594 * If there is no parameter value, that element will be false.
595 */
596 function parseMatch( $m ) {
597 reset( $m );
598 while ( list( $key, $value ) = each( $m ) ) {
599 if ( $key === 0 || $value === '' ) {
600 continue;
601 }
602 $parts = explode( '_', $key, 2 );
603 if ( count( $parts ) != 2 ) {
604 // This shouldn't happen
605 // continue;
606 throw new MWException( __METHOD__ . ': bad parameter name' );
607 }
608 list( /* $synIndex */, $magicName ) = $parts;
609 $paramValue = next( $m );
610 return array( $magicName, $paramValue );
611 }
612 // This shouldn't happen either
613 throw new MWException( __METHOD__.': parameter not found' );
614 return array( false, false );
615 }
616
617 /**
618 * Match some text, with parameter capture
619 * Returns an array with the magic word name in the first element and the
620 * parameter in the second element.
621 * Both elements are false if there was no match.
622 */
623 public function matchVariableStartToEnd( $text ) {
624 global $wgContLang;
625 $regexes = $this->getVariableStartToEndRegex();
626 foreach ( $regexes as $regex ) {
627 if ( $regex !== '' ) {
628 $m = false;
629 if ( preg_match( $regex, $text, $m ) ) {
630 return $this->parseMatch( $m );
631 }
632 }
633 }
634 return array( false, false );
635 }
636
637 /**
638 * Match some text, without parameter capture
639 * Returns the magic word name, or false if there was no capture
640 */
641 public function matchStartToEnd( $text ) {
642 $hash = $this->getHash();
643 if ( isset( $hash[1][$text] ) ) {
644 return $hash[1][$text];
645 }
646 global $wgContLang;
647 $lc = $wgContLang->lc( $text );
648 if ( isset( $hash[0][$lc] ) ) {
649 return $hash[0][$lc];
650 }
651 return false;
652 }
653
654 /**
655 * Returns an associative array, ID => param value, for all items that match
656 * Removes the matched items from the input string (passed by reference)
657 */
658 public function matchAndRemove( &$text ) {
659 $found = array();
660 $regexes = $this->getRegex();
661 foreach ( $regexes as $regex ) {
662 if ( $regex === '' ) {
663 continue;
664 }
665 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
666 foreach ( $matches as $m ) {
667 list( $name, $param ) = $this->parseMatch( $m );
668 $found[$name] = $param;
669 }
670 $text = preg_replace( $regex, '', $text );
671 }
672 return $found;
673 }
674 }