* close connection in class destructor (unlike MySql, Oracle does not commit on close)
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * File for magic words
4 *
5 * See docs/magicword.txt
6 *
7 * @file
8 * @ingroup Parser
9 */
10
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
15 *
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
19 *
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
22 *
23 * To add magic words in an extension, use the LanguageGetMagic hook. For
24 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
25 * hook. Use string keys.
26 *
27 * @ingroup Parser
28 */
29 class MagicWord {
30 /**#@+
31 * @private
32 */
33 var $mId, $mSynonyms, $mCaseSensitive;
34 var $mRegex = '';
35 var $mRegexStart = '';
36 var $mBaseRegex = '';
37 var $mVariableRegex = '';
38 var $mVariableStartToEndRegex = '';
39 var $mModified = false;
40 var $mFound = false;
41
42 static public $mVariableIDsInitialised = false;
43 static public $mVariableIDs = array(
44 'currentmonth',
45 'currentmonth1',
46 'currentmonthname',
47 'currentmonthnamegen',
48 'currentmonthabbrev',
49 'currentday',
50 'currentday2',
51 'currentdayname',
52 'currentyear',
53 'currenttime',
54 'currenthour',
55 'localmonth',
56 'localmonth1',
57 'localmonthname',
58 'localmonthnamegen',
59 'localmonthabbrev',
60 'localday',
61 'localday2',
62 'localdayname',
63 'localyear',
64 'localtime',
65 'localhour',
66 'numberofarticles',
67 'numberoffiles',
68 'numberofedits',
69 'articlepath',
70 'sitename',
71 'server',
72 'servername',
73 'scriptpath',
74 'stylepath',
75 'pagename',
76 'pagenamee',
77 'fullpagename',
78 'fullpagenamee',
79 'namespace',
80 'namespacee',
81 'currentweek',
82 'currentdow',
83 'localweek',
84 'localdow',
85 'revisionid',
86 'revisionday',
87 'revisionday2',
88 'revisionmonth',
89 'revisionmonth1',
90 'revisionyear',
91 'revisiontimestamp',
92 'revisionuser',
93 'subpagename',
94 'subpagenamee',
95 'talkspace',
96 'talkspacee',
97 'subjectspace',
98 'subjectspacee',
99 'talkpagename',
100 'talkpagenamee',
101 'subjectpagename',
102 'subjectpagenamee',
103 'numberofusers',
104 'numberofactiveusers',
105 'numberofpages',
106 'currentversion',
107 'basepagename',
108 'basepagenamee',
109 'currenttimestamp',
110 'localtimestamp',
111 'directionmark',
112 'contentlanguage',
113 'numberofadmins',
114 'numberofviews',
115 );
116
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonth1' => 86400,
121 'currentmonthname' => 86400,
122 'currentmonthnamegen' => 86400,
123 'currentmonthabbrev' => 86400,
124 'currentday' => 3600,
125 'currentday2' => 3600,
126 'currentdayname' => 3600,
127 'currentyear' => 86400,
128 'currenttime' => 3600,
129 'currenthour' => 3600,
130 'localmonth' => 86400,
131 'localmonth1' => 86400,
132 'localmonthname' => 86400,
133 'localmonthnamegen' => 86400,
134 'localmonthabbrev' => 86400,
135 'localday' => 3600,
136 'localday2' => 3600,
137 'localdayname' => 3600,
138 'localyear' => 86400,
139 'localtime' => 3600,
140 'localhour' => 3600,
141 'numberofarticles' => 3600,
142 'numberoffiles' => 3600,
143 'numberofedits' => 3600,
144 'currentweek' => 3600,
145 'currentdow' => 3600,
146 'localweek' => 3600,
147 'localdow' => 3600,
148 'numberofusers' => 3600,
149 'numberofactiveusers' => 3600,
150 'numberofpages' => 3600,
151 'currentversion' => 86400,
152 'currenttimestamp' => 3600,
153 'localtimestamp' => 3600,
154 'pagesinnamespace' => 3600,
155 'numberofadmins' => 3600,
156 'numberofviews' => 3600,
157 'numberingroup' => 3600,
158 );
159
160 static public $mDoubleUnderscoreIDs = array(
161 'notoc',
162 'nogallery',
163 'forcetoc',
164 'toc',
165 'noeditsection',
166 'newsectionlink',
167 'nonewsectionlink',
168 'hiddencat',
169 'index',
170 'noindex',
171 'staticredirect',
172 'notitleconvert',
173 'nocontentconvert',
174 );
175
176 static public $mSubstIDs = array(
177 'subst',
178 'safesubst',
179 );
180
181 static public $mObjects = array();
182 static public $mDoubleUnderscoreArray = null;
183
184 /**#@-*/
185
186 function __construct($id = 0, $syn = array(), $cs = false) {
187 $this->mId = $id;
188 $this->mSynonyms = (array)$syn;
189 $this->mCaseSensitive = $cs;
190 }
191
192 /**
193 * Factory: creates an object representing an ID
194 * @return MagicWord
195 */
196 static function &get( $id ) {
197 wfProfileIn( __METHOD__ );
198 if ( !isset( self::$mObjects[$id] ) ) {
199 $mw = new MagicWord();
200 $mw->load( $id );
201 self::$mObjects[$id] = $mw;
202 }
203 wfProfileOut( __METHOD__ );
204 return self::$mObjects[$id];
205 }
206
207 /**
208 * Get an array of parser variable IDs
209 */
210 static function getVariableIDs() {
211 if ( !self::$mVariableIDsInitialised ) {
212 # Deprecated constant definition hook, available for extensions that need it
213 $magicWords = array();
214 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
215 foreach ( $magicWords as $word ) {
216 define( $word, $word );
217 }
218
219 # Get variable IDs
220 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
221 self::$mVariableIDsInitialised = true;
222 }
223 return self::$mVariableIDs;
224 }
225
226 /**
227 * Get an array of parser substitution modifier IDs
228 */
229 static function getSubstIDs() {
230 return self::$mSubstIDs;
231 }
232
233 /* Allow external reads of TTL array */
234 static function getCacheTTL($id) {
235 if (array_key_exists($id,self::$mCacheTTLs)) {
236 return self::$mCacheTTLs[$id];
237 } else {
238 return -1;
239 }
240 }
241
242 /** Get a MagicWordArray of double-underscore entities */
243 static function getDoubleUnderscoreArray() {
244 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
245 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
246 }
247 return self::$mDoubleUnderscoreArray;
248 }
249
250 /**
251 * Clear the self::$mObjects variable
252 * For use in parser tests
253 */
254 public static function clearCache() {
255 self::$mObjects = array();
256 }
257
258 # Initialises this object with an ID
259 function load( $id ) {
260 global $wgContLang;
261 $this->mId = $id;
262 $wgContLang->getMagic( $this );
263 if ( !$this->mSynonyms ) {
264 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
265 #throw new MWException( "Error: invalid magic word '$id'" );
266 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
267 }
268 }
269
270 /**
271 * Preliminary initialisation
272 * @private
273 */
274 function initRegex() {
275 // Sort the synonyms by length, descending, so that the longest synonym
276 // matches in precedence to the shortest
277 $synonyms = $this->mSynonyms;
278 usort( $synonyms, array( $this, 'compareStringLength' ) );
279
280 $escSyn = array();
281 foreach ( $synonyms as $synonym )
282 // In case a magic word contains /, like that's going to happen;)
283 $escSyn[] = preg_quote( $synonym, '/' );
284 $this->mBaseRegex = implode( '|', $escSyn );
285
286 $case = $this->mCaseSensitive ? '' : 'iu';
287 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
288 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
289 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
290 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
291 "/^(?:{$this->mBaseRegex})$/{$case}" );
292 }
293
294 /**
295 * A comparison function that returns -1, 0 or 1 depending on whether the
296 * first string is longer, the same length or shorter than the second
297 * string.
298 */
299 function compareStringLength( $s1, $s2 ) {
300 $l1 = strlen( $s1 );
301 $l2 = strlen( $s2 );
302 if ( $l1 < $l2 ) {
303 return 1;
304 } elseif ( $l1 > $l2 ) {
305 return -1;
306 } else {
307 return 0;
308 }
309 }
310
311 /**
312 * Gets a regex representing matching the word
313 */
314 function getRegex() {
315 if ($this->mRegex == '' ) {
316 $this->initRegex();
317 }
318 return $this->mRegex;
319 }
320
321 /**
322 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
323 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
324 * the complete expression
325 */
326 function getRegexCase() {
327 if ( $this->mRegex === '' )
328 $this->initRegex();
329
330 return $this->mCaseSensitive ? '' : 'iu';
331 }
332
333 /**
334 * Gets a regex matching the word, if it is at the string start
335 */
336 function getRegexStart() {
337 if ($this->mRegex == '' ) {
338 $this->initRegex();
339 }
340 return $this->mRegexStart;
341 }
342
343 /**
344 * regex without the slashes and what not
345 */
346 function getBaseRegex() {
347 if ($this->mRegex == '') {
348 $this->initRegex();
349 }
350 return $this->mBaseRegex;
351 }
352
353 /**
354 * Returns true if the text contains the word
355 * @return bool
356 */
357 function match( $text ) {
358 return (bool)preg_match( $this->getRegex(), $text );
359 }
360
361 /**
362 * Returns true if the text starts with the word
363 * @return bool
364 */
365 function matchStart( $text ) {
366 return (bool)preg_match( $this->getRegexStart(), $text );
367 }
368
369 /**
370 * Returns NULL if there's no match, the value of $1 otherwise
371 * The return code is the matched string, if there's no variable
372 * part in the regex and the matched variable part ($1) if there
373 * is one.
374 */
375 function matchVariableStartToEnd( $text ) {
376 $matches = array();
377 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
378 if ( $matchcount == 0 ) {
379 return null;
380 } else {
381 # multiple matched parts (variable match); some will be empty because of
382 # synonyms. The variable will be the second non-empty one so remove any
383 # blank elements and re-sort the indices.
384 # See also bug 6526
385
386 $matches = array_values(array_filter($matches));
387
388 if ( count($matches) == 1 ) { return $matches[0]; }
389 else { return $matches[1]; }
390 }
391 }
392
393
394 /**
395 * Returns true if the text matches the word, and alters the
396 * input string, removing all instances of the word
397 */
398 function matchAndRemove( &$text ) {
399 $this->mFound = false;
400 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
401 return $this->mFound;
402 }
403
404 function matchStartAndRemove( &$text ) {
405 $this->mFound = false;
406 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
407 return $this->mFound;
408 }
409
410 /**
411 * Used in matchAndRemove()
412 * @private
413 **/
414 function pregRemoveAndRecord( ) {
415 $this->mFound = true;
416 return '';
417 }
418
419 /**
420 * Replaces the word with something else
421 */
422 function replace( $replacement, $subject, $limit=-1 ) {
423 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
424 $this->mModified = !($res === $subject);
425 return $res;
426 }
427
428 /**
429 * Variable handling: {{SUBST:xxx}} style words
430 * Calls back a function to determine what to replace xxx with
431 * Input word must contain $1
432 */
433 function substituteCallback( $text, $callback ) {
434 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
435 $this->mModified = !($res === $text);
436 return $res;
437 }
438
439 /**
440 * Matches the word, where $1 is a wildcard
441 */
442 function getVariableRegex() {
443 if ( $this->mVariableRegex == '' ) {
444 $this->initRegex();
445 }
446 return $this->mVariableRegex;
447 }
448
449 /**
450 * Matches the entire string, where $1 is a wildcard
451 */
452 function getVariableStartToEndRegex() {
453 if ( $this->mVariableStartToEndRegex == '' ) {
454 $this->initRegex();
455 }
456 return $this->mVariableStartToEndRegex;
457 }
458
459 /**
460 * Accesses the synonym list directly
461 */
462 function getSynonym( $i ) {
463 return $this->mSynonyms[$i];
464 }
465
466 function getSynonyms() {
467 return $this->mSynonyms;
468 }
469
470 /**
471 * Returns true if the last call to replace() or substituteCallback()
472 * returned a modified text, otherwise false.
473 */
474 function getWasModified(){
475 return $this->mModified;
476 }
477
478 /**
479 * $magicarr is an associative array of (magic word ID => replacement)
480 * This method uses the php feature to do several replacements at the same time,
481 * thereby gaining some efficiency. The result is placed in the out variable
482 * $result. The return value is true if something was replaced.
483 * @static
484 * @todo Should this be static? It doesn't seem to be used at all
485 **/
486 function replaceMultiple( $magicarr, $subject, &$result ){
487 $search = array();
488 $replace = array();
489 foreach( $magicarr as $id => $replacement ){
490 $mw = MagicWord::get( $id );
491 $search[] = $mw->getRegex();
492 $replace[] = $replacement;
493 }
494
495 $result = preg_replace( $search, $replace, $subject );
496 return !($result === $subject);
497 }
498
499 /**
500 * Adds all the synonyms of this MagicWord to an array, to allow quick
501 * lookup in a list of magic words
502 */
503 function addToArray( &$array, $value ) {
504 global $wgContLang;
505 foreach ( $this->mSynonyms as $syn ) {
506 $array[$wgContLang->lc($syn)] = $value;
507 }
508 }
509
510 function isCaseSensitive() {
511 return $this->mCaseSensitive;
512 }
513
514 function getId() {
515 return $this->mId;
516 }
517 }
518
519 /**
520 * Class for handling an array of magic words
521 * @ingroup Parser
522 */
523 class MagicWordArray {
524 var $names = array();
525 var $hash;
526 var $baseRegex, $regex;
527 var $matches;
528
529 function __construct( $names = array() ) {
530 $this->names = $names;
531 }
532
533 /**
534 * Add a magic word by name
535 */
536 public function add( $name ) {
537 $this->names[] = $name;
538 $this->hash = $this->baseRegex = $this->regex = null;
539 }
540
541 /**
542 * Add a number of magic words by name
543 */
544 public function addArray( $names ) {
545 $this->names = array_merge( $this->names, array_values( $names ) );
546 $this->hash = $this->baseRegex = $this->regex = null;
547 }
548
549 /**
550 * Get a 2-d hashtable for this array
551 */
552 function getHash() {
553 if ( is_null( $this->hash ) ) {
554 global $wgContLang;
555 $this->hash = array( 0 => array(), 1 => array() );
556 foreach ( $this->names as $name ) {
557 $magic = MagicWord::get( $name );
558 $case = intval( $magic->isCaseSensitive() );
559 foreach ( $magic->getSynonyms() as $syn ) {
560 if ( !$case ) {
561 $syn = $wgContLang->lc( $syn );
562 }
563 $this->hash[$case][$syn] = $name;
564 }
565 }
566 }
567 return $this->hash;
568 }
569
570 /**
571 * Get the base regex
572 */
573 function getBaseRegex() {
574 if ( is_null( $this->baseRegex ) ) {
575 $this->baseRegex = array( 0 => '', 1 => '' );
576 foreach ( $this->names as $name ) {
577 $magic = MagicWord::get( $name );
578 $case = intval( $magic->isCaseSensitive() );
579 foreach ( $magic->getSynonyms() as $i => $syn ) {
580 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
581 if ( $this->baseRegex[$case] === '' ) {
582 $this->baseRegex[$case] = $group;
583 } else {
584 $this->baseRegex[$case] .= '|' . $group;
585 }
586 }
587 }
588 }
589 return $this->baseRegex;
590 }
591
592 /**
593 * Get an unanchored regex that does not match parameters
594 */
595 function getRegex() {
596 if ( is_null( $this->regex ) ) {
597 $base = $this->getBaseRegex();
598 $this->regex = array( '', '' );
599 if ( $this->baseRegex[0] !== '' ) {
600 $this->regex[0] = "/{$base[0]}/iuS";
601 }
602 if ( $this->baseRegex[1] !== '' ) {
603 $this->regex[1] = "/{$base[1]}/S";
604 }
605 }
606 return $this->regex;
607 }
608
609 /**
610 * Get a regex for matching variables with parameters
611 */
612 function getVariableRegex() {
613 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
614 }
615
616 /**
617 * Get a regex anchored to the start of the string that does not match parameters
618 */
619 function getRegexStart() {
620 $base = $this->getBaseRegex();
621 $newRegex = array( '', '' );
622 if ( $base[0] !== '' ) {
623 $newRegex[0] = "/^(?:{$base[0]})/iuS";
624 }
625 if ( $base[1] !== '' ) {
626 $newRegex[1] = "/^(?:{$base[1]})/S";
627 }
628 return $newRegex;
629 }
630
631 /**
632 * Get an anchored regex for matching variables with parameters
633 */
634 function getVariableStartToEndRegex() {
635 $base = $this->getBaseRegex();
636 $newRegex = array( '', '' );
637 if ( $base[0] !== '' ) {
638 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
639 }
640 if ( $base[1] !== '' ) {
641 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
642 }
643 return $newRegex;
644 }
645
646 /**
647 * Parse a match array from preg_match
648 * Returns array(magic word ID, parameter value)
649 * If there is no parameter value, that element will be false.
650 */
651 function parseMatch( $m ) {
652 reset( $m );
653 while ( list( $key, $value ) = each( $m ) ) {
654 if ( $key === 0 || $value === '' ) {
655 continue;
656 }
657 $parts = explode( '_', $key, 2 );
658 if ( count( $parts ) != 2 ) {
659 // This shouldn't happen
660 // continue;
661 throw new MWException( __METHOD__ . ': bad parameter name' );
662 }
663 list( /* $synIndex */, $magicName ) = $parts;
664 $paramValue = next( $m );
665 return array( $magicName, $paramValue );
666 }
667 // This shouldn't happen either
668 throw new MWException( __METHOD__.': parameter not found' );
669 }
670
671 /**
672 * Match some text, with parameter capture
673 * Returns an array with the magic word name in the first element and the
674 * parameter in the second element.
675 * Both elements are false if there was no match.
676 */
677 public function matchVariableStartToEnd( $text ) {
678 $regexes = $this->getVariableStartToEndRegex();
679 foreach ( $regexes as $regex ) {
680 if ( $regex !== '' ) {
681 $m = false;
682 if ( preg_match( $regex, $text, $m ) ) {
683 return $this->parseMatch( $m );
684 }
685 }
686 }
687 return array( false, false );
688 }
689
690 /**
691 * Match some text, without parameter capture
692 * Returns the magic word name, or false if there was no capture
693 */
694 public function matchStartToEnd( $text ) {
695 $hash = $this->getHash();
696 if ( isset( $hash[1][$text] ) ) {
697 return $hash[1][$text];
698 }
699 global $wgContLang;
700 $lc = $wgContLang->lc( $text );
701 if ( isset( $hash[0][$lc] ) ) {
702 return $hash[0][$lc];
703 }
704 return false;
705 }
706
707 /**
708 * Returns an associative array, ID => param value, for all items that match
709 * Removes the matched items from the input string (passed by reference)
710 */
711 public function matchAndRemove( &$text ) {
712 $found = array();
713 $regexes = $this->getRegex();
714 foreach ( $regexes as $regex ) {
715 if ( $regex === '' ) {
716 continue;
717 }
718 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
719 foreach ( $matches as $m ) {
720 list( $name, $param ) = $this->parseMatch( $m );
721 $found[$name] = $param;
722 }
723 $text = preg_replace( $regex, '', $text );
724 }
725 return $found;
726 }
727
728 /**
729 * Return the ID of the magic word at the start of $text, and remove
730 * the prefix from $text.
731 * Return false if no match found and $text is not modified.
732 * Does not match parameters.
733 */
734 public function matchStartAndRemove( &$text ) {
735 $regexes = $this->getRegexStart();
736 foreach ( $regexes as $regex ) {
737 if ( $regex === '' ) {
738 continue;
739 }
740 if ( preg_match( $regex, $text, $m ) ) {
741 list( $id, ) = $this->parseMatch( $m );
742 if ( strlen( $m[0] ) >= strlen( $text ) ) {
743 $text = '';
744 } else {
745 $text = substr( $text, strlen( $m[0] ) );
746 }
747 return $id;
748 }
749 }
750 return false;
751 }
752 }