Quote $default in PostgresUpdater::setDefault
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * See docs/magicword.txt.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
26 *
27 * @par Usage:
28 * @code
29 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
30 * // some code
31 * }
32 * @endcode
33 *
34 * Possible future improvements:
35 * * Simultaneous searching for a number of magic words
36 * * MagicWord::$mObjects in shared memory
37 *
38 * Please avoid reading the data out of one of these objects and then writing
39 * special case code. If possible, add another match()-like function here.
40 *
41 * To add magic words in an extension, use $magicWords in a file listed in
42 * $wgExtensionMessagesFiles[].
43 *
44 * @par Example:
45 * @code
46 * $magicWords = [];
47 *
48 * $magicWords['en'] = [
49 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
50 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
51 * ];
52 * @endcode
53 *
54 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
55 * hook. Use string keys.
56 *
57 * @ingroup Parser
58 */
59 class MagicWord {
60 /**#@-*/
61
62 /** @var int */
63 public $mId;
64
65 /** @var array */
66 public $mSynonyms;
67
68 /** @var bool */
69 public $mCaseSensitive;
70
71 /** @var string */
72 private $mRegex = '';
73
74 /** @var string */
75 private $mRegexStart = '';
76
77 /** @var string */
78 private $mRegexStartToEnd = '';
79
80 /** @var string */
81 private $mBaseRegex = '';
82
83 /** @var string */
84 private $mVariableRegex = '';
85
86 /** @var string */
87 private $mVariableStartToEndRegex = '';
88
89 /** @var bool */
90 private $mModified = false;
91
92 /** @var bool */
93 private $mFound = false;
94
95 public static $mVariableIDsInitialised = false;
96 public static $mVariableIDs = [
97 '!',
98 'currentmonth',
99 'currentmonth1',
100 'currentmonthname',
101 'currentmonthnamegen',
102 'currentmonthabbrev',
103 'currentday',
104 'currentday2',
105 'currentdayname',
106 'currentyear',
107 'currenttime',
108 'currenthour',
109 'localmonth',
110 'localmonth1',
111 'localmonthname',
112 'localmonthnamegen',
113 'localmonthabbrev',
114 'localday',
115 'localday2',
116 'localdayname',
117 'localyear',
118 'localtime',
119 'localhour',
120 'numberofarticles',
121 'numberoffiles',
122 'numberofedits',
123 'articlepath',
124 'pageid',
125 'sitename',
126 'server',
127 'servername',
128 'scriptpath',
129 'stylepath',
130 'pagename',
131 'pagenamee',
132 'fullpagename',
133 'fullpagenamee',
134 'namespace',
135 'namespacee',
136 'namespacenumber',
137 'currentweek',
138 'currentdow',
139 'localweek',
140 'localdow',
141 'revisionid',
142 'revisionday',
143 'revisionday2',
144 'revisionmonth',
145 'revisionmonth1',
146 'revisionyear',
147 'revisiontimestamp',
148 'revisionuser',
149 'revisionsize',
150 'subpagename',
151 'subpagenamee',
152 'talkspace',
153 'talkspacee',
154 'subjectspace',
155 'subjectspacee',
156 'talkpagename',
157 'talkpagenamee',
158 'subjectpagename',
159 'subjectpagenamee',
160 'numberofusers',
161 'numberofactiveusers',
162 'numberofpages',
163 'currentversion',
164 'rootpagename',
165 'rootpagenamee',
166 'basepagename',
167 'basepagenamee',
168 'currenttimestamp',
169 'localtimestamp',
170 'directionmark',
171 'contentlanguage',
172 'pagelanguage',
173 'numberofadmins',
174 'cascadingsources',
175 ];
176
177 /* Array of caching hints for ParserCache */
178 public static $mCacheTTLs = [
179 'currentmonth' => 86400,
180 'currentmonth1' => 86400,
181 'currentmonthname' => 86400,
182 'currentmonthnamegen' => 86400,
183 'currentmonthabbrev' => 86400,
184 'currentday' => 3600,
185 'currentday2' => 3600,
186 'currentdayname' => 3600,
187 'currentyear' => 86400,
188 'currenttime' => 3600,
189 'currenthour' => 3600,
190 'localmonth' => 86400,
191 'localmonth1' => 86400,
192 'localmonthname' => 86400,
193 'localmonthnamegen' => 86400,
194 'localmonthabbrev' => 86400,
195 'localday' => 3600,
196 'localday2' => 3600,
197 'localdayname' => 3600,
198 'localyear' => 86400,
199 'localtime' => 3600,
200 'localhour' => 3600,
201 'numberofarticles' => 3600,
202 'numberoffiles' => 3600,
203 'numberofedits' => 3600,
204 'currentweek' => 3600,
205 'currentdow' => 3600,
206 'localweek' => 3600,
207 'localdow' => 3600,
208 'numberofusers' => 3600,
209 'numberofactiveusers' => 3600,
210 'numberofpages' => 3600,
211 'currentversion' => 86400,
212 'currenttimestamp' => 3600,
213 'localtimestamp' => 3600,
214 'pagesinnamespace' => 3600,
215 'numberofadmins' => 3600,
216 'numberingroup' => 3600,
217 ];
218
219 public static $mDoubleUnderscoreIDs = [
220 'notoc',
221 'nogallery',
222 'forcetoc',
223 'toc',
224 'noeditsection',
225 'newsectionlink',
226 'nonewsectionlink',
227 'hiddencat',
228 'index',
229 'noindex',
230 'staticredirect',
231 'notitleconvert',
232 'nocontentconvert',
233 ];
234
235 public static $mSubstIDs = [
236 'subst',
237 'safesubst',
238 ];
239
240 public static $mObjects = [];
241 public static $mDoubleUnderscoreArray = null;
242
243 /**#@-*/
244
245 public function __construct( $id = 0, $syn = [], $cs = false ) {
246 $this->mId = $id;
247 $this->mSynonyms = (array)$syn;
248 $this->mCaseSensitive = $cs;
249 }
250
251 /**
252 * Factory: creates an object representing an ID
253 *
254 * @param int $id
255 *
256 * @return MagicWord
257 */
258 public static function &get( $id ) {
259 if ( !isset( self::$mObjects[$id] ) ) {
260 $mw = new MagicWord();
261 $mw->load( $id );
262 self::$mObjects[$id] = $mw;
263 }
264 return self::$mObjects[$id];
265 }
266
267 /**
268 * Get an array of parser variable IDs
269 *
270 * @return array
271 */
272 public static function getVariableIDs() {
273 if ( !self::$mVariableIDsInitialised ) {
274 # Get variable IDs
275 Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
276 self::$mVariableIDsInitialised = true;
277 }
278 return self::$mVariableIDs;
279 }
280
281 /**
282 * Get an array of parser substitution modifier IDs
283 * @return array
284 */
285 public static function getSubstIDs() {
286 return self::$mSubstIDs;
287 }
288
289 /**
290 * Allow external reads of TTL array
291 *
292 * @param int $id
293 * @return int
294 */
295 public static function getCacheTTL( $id ) {
296 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
297 return self::$mCacheTTLs[$id];
298 } else {
299 return -1;
300 }
301 }
302
303 /**
304 * Get a MagicWordArray of double-underscore entities
305 *
306 * @return MagicWordArray
307 */
308 public static function getDoubleUnderscoreArray() {
309 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
310 Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
311 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
312 }
313 return self::$mDoubleUnderscoreArray;
314 }
315
316 /**
317 * Clear the self::$mObjects variable
318 * For use in parser tests
319 */
320 public static function clearCache() {
321 self::$mObjects = [];
322 }
323
324 /**
325 * Initialises this object with an ID
326 *
327 * @param int $id
328 * @throws MWException
329 */
330 public function load( $id ) {
331 global $wgContLang;
332 $this->mId = $id;
333 $wgContLang->getMagic( $this );
334 if ( !$this->mSynonyms ) {
335 $this->mSynonyms = [ 'brionmademeputthishere' ];
336 throw new MWException( "Error: invalid magic word '$id'" );
337 }
338 }
339
340 /**
341 * Preliminary initialisation
342 * @private
343 */
344 public function initRegex() {
345 // Sort the synonyms by length, descending, so that the longest synonym
346 // matches in precedence to the shortest
347 $synonyms = $this->mSynonyms;
348 usort( $synonyms, [ $this, 'compareStringLength' ] );
349
350 $escSyn = [];
351 foreach ( $synonyms as $synonym ) {
352 // In case a magic word contains /, like that's going to happen;)
353 $escSyn[] = preg_quote( $synonym, '/' );
354 }
355 $this->mBaseRegex = implode( '|', $escSyn );
356
357 $case = $this->mCaseSensitive ? '' : 'iu';
358 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
359 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
360 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
361 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
362 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
363 "/^(?:{$this->mBaseRegex})$/{$case}" );
364 }
365
366 /**
367 * A comparison function that returns -1, 0 or 1 depending on whether the
368 * first string is longer, the same length or shorter than the second
369 * string.
370 *
371 * @param string $s1
372 * @param string $s2
373 *
374 * @return int
375 */
376 public function compareStringLength( $s1, $s2 ) {
377 $l1 = strlen( $s1 );
378 $l2 = strlen( $s2 );
379 if ( $l1 < $l2 ) {
380 return 1;
381 } elseif ( $l1 > $l2 ) {
382 return -1;
383 } else {
384 return 0;
385 }
386 }
387
388 /**
389 * Gets a regex representing matching the word
390 *
391 * @return string
392 */
393 public function getRegex() {
394 if ( $this->mRegex == '' ) {
395 $this->initRegex();
396 }
397 return $this->mRegex;
398 }
399
400 /**
401 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
402 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
403 * the complete expression
404 *
405 * @return string
406 */
407 public function getRegexCase() {
408 if ( $this->mRegex === '' ) {
409 $this->initRegex();
410 }
411
412 return $this->mCaseSensitive ? '' : 'iu';
413 }
414
415 /**
416 * Gets a regex matching the word, if it is at the string start
417 *
418 * @return string
419 */
420 public function getRegexStart() {
421 if ( $this->mRegex == '' ) {
422 $this->initRegex();
423 }
424 return $this->mRegexStart;
425 }
426
427 /**
428 * Gets a regex matching the word from start to end of a string
429 *
430 * @return string
431 * @since 1.23
432 */
433 public function getRegexStartToEnd() {
434 if ( $this->mRegexStartToEnd == '' ) {
435 $this->initRegex();
436 }
437 return $this->mRegexStartToEnd;
438 }
439
440 /**
441 * regex without the slashes and what not
442 *
443 * @return string
444 */
445 public function getBaseRegex() {
446 if ( $this->mRegex == '' ) {
447 $this->initRegex();
448 }
449 return $this->mBaseRegex;
450 }
451
452 /**
453 * Returns true if the text contains the word
454 *
455 * @param string $text
456 *
457 * @return bool
458 */
459 public function match( $text ) {
460 return (bool)preg_match( $this->getRegex(), $text );
461 }
462
463 /**
464 * Returns true if the text starts with the word
465 *
466 * @param string $text
467 *
468 * @return bool
469 */
470 public function matchStart( $text ) {
471 return (bool)preg_match( $this->getRegexStart(), $text );
472 }
473
474 /**
475 * Returns true if the text matched the word
476 *
477 * @param string $text
478 *
479 * @return bool
480 * @since 1.23
481 */
482 public function matchStartToEnd( $text ) {
483 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
484 }
485
486 /**
487 * Returns NULL if there's no match, the value of $1 otherwise
488 * The return code is the matched string, if there's no variable
489 * part in the regex and the matched variable part ($1) if there
490 * is one.
491 *
492 * @param string $text
493 *
494 * @return string
495 */
496 public function matchVariableStartToEnd( $text ) {
497 $matches = [];
498 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
499 if ( $matchcount == 0 ) {
500 return null;
501 } else {
502 # multiple matched parts (variable match); some will be empty because of
503 # synonyms. The variable will be the second non-empty one so remove any
504 # blank elements and re-sort the indices.
505 # See also T8526
506
507 $matches = array_values( array_filter( $matches ) );
508
509 if ( count( $matches ) == 1 ) {
510 return $matches[0];
511 } else {
512 return $matches[1];
513 }
514 }
515 }
516
517 /**
518 * Returns true if the text matches the word, and alters the
519 * input string, removing all instances of the word
520 *
521 * @param string &$text
522 *
523 * @return bool
524 */
525 public function matchAndRemove( &$text ) {
526 $this->mFound = false;
527 $text = preg_replace_callback(
528 $this->getRegex(),
529 [ $this, 'pregRemoveAndRecord' ],
530 $text
531 );
532
533 return $this->mFound;
534 }
535
536 /**
537 * @param string &$text
538 * @return bool
539 */
540 public function matchStartAndRemove( &$text ) {
541 $this->mFound = false;
542 $text = preg_replace_callback(
543 $this->getRegexStart(),
544 [ $this, 'pregRemoveAndRecord' ],
545 $text
546 );
547
548 return $this->mFound;
549 }
550
551 /**
552 * Used in matchAndRemove()
553 *
554 * @return string
555 */
556 public function pregRemoveAndRecord() {
557 $this->mFound = true;
558 return '';
559 }
560
561 /**
562 * Replaces the word with something else
563 *
564 * @param string $replacement
565 * @param string $subject
566 * @param int $limit
567 *
568 * @return string
569 */
570 public function replace( $replacement, $subject, $limit = -1 ) {
571 $res = preg_replace(
572 $this->getRegex(),
573 StringUtils::escapeRegexReplacement( $replacement ),
574 $subject,
575 $limit
576 );
577 $this->mModified = $res !== $subject;
578 return $res;
579 }
580
581 /**
582 * Variable handling: {{SUBST:xxx}} style words
583 * Calls back a function to determine what to replace xxx with
584 * Input word must contain $1
585 *
586 * @param string $text
587 * @param callable $callback
588 *
589 * @return string
590 */
591 public function substituteCallback( $text, $callback ) {
592 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
593 $this->mModified = $res !== $text;
594 return $res;
595 }
596
597 /**
598 * Matches the word, where $1 is a wildcard
599 *
600 * @return string
601 */
602 public function getVariableRegex() {
603 if ( $this->mVariableRegex == '' ) {
604 $this->initRegex();
605 }
606 return $this->mVariableRegex;
607 }
608
609 /**
610 * Matches the entire string, where $1 is a wildcard
611 *
612 * @return string
613 */
614 public function getVariableStartToEndRegex() {
615 if ( $this->mVariableStartToEndRegex == '' ) {
616 $this->initRegex();
617 }
618 return $this->mVariableStartToEndRegex;
619 }
620
621 /**
622 * Accesses the synonym list directly
623 *
624 * @param int $i
625 *
626 * @return string
627 */
628 public function getSynonym( $i ) {
629 return $this->mSynonyms[$i];
630 }
631
632 /**
633 * @return array
634 */
635 public function getSynonyms() {
636 return $this->mSynonyms;
637 }
638
639 /**
640 * Returns true if the last call to replace() or substituteCallback()
641 * returned a modified text, otherwise false.
642 *
643 * @return bool
644 */
645 public function getWasModified() {
646 return $this->mModified;
647 }
648
649 /**
650 * Adds all the synonyms of this MagicWord to an array, to allow quick
651 * lookup in a list of magic words
652 *
653 * @param array &$array
654 * @param string $value
655 */
656 public function addToArray( &$array, $value ) {
657 global $wgContLang;
658 foreach ( $this->mSynonyms as $syn ) {
659 $array[$wgContLang->lc( $syn )] = $value;
660 }
661 }
662
663 /**
664 * @return bool
665 */
666 public function isCaseSensitive() {
667 return $this->mCaseSensitive;
668 }
669
670 /**
671 * @return int
672 */
673 public function getId() {
674 return $this->mId;
675 }
676 }