Merge "Revert "Use display name in category page subheadings if provided""
[lhc/web/wiklou.git] / includes / MagicWord.php
1 <?php
2 /**
3 * See docs/magicword.txt.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 /**
25 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
26 *
27 * @par Usage:
28 * @code
29 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
30 * // some code
31 * }
32 * @endcode
33 *
34 * Possible future improvements:
35 * * Simultaneous searching for a number of magic words
36 * * MagicWord::$mObjects in shared memory
37 *
38 * Please avoid reading the data out of one of these objects and then writing
39 * special case code. If possible, add another match()-like function here.
40 *
41 * To add magic words in an extension, use $magicWords in a file listed in
42 * $wgExtensionMessagesFiles[].
43 *
44 * @par Example:
45 * @code
46 * $magicWords = [];
47 *
48 * $magicWords['en'] = [
49 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
50 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
51 * ];
52 * @endcode
53 *
54 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
55 * hook. Use string keys.
56 *
57 * @ingroup Parser
58 */
59 class MagicWord {
60 /**#@-*/
61
62 /** @var int */
63 public $mId;
64
65 /** @var array */
66 public $mSynonyms;
67
68 /** @var bool */
69 public $mCaseSensitive;
70
71 /** @var string */
72 private $mRegex = '';
73
74 /** @var string */
75 private $mRegexStart = '';
76
77 /** @var string */
78 private $mRegexStartToEnd = '';
79
80 /** @var string */
81 private $mBaseRegex = '';
82
83 /** @var string */
84 private $mVariableRegex = '';
85
86 /** @var string */
87 private $mVariableStartToEndRegex = '';
88
89 /** @var bool */
90 private $mModified = false;
91
92 /** @var bool */
93 private $mFound = false;
94
95 public static $mVariableIDsInitialised = false;
96 public static $mVariableIDs = [
97 '!',
98 'currentmonth',
99 'currentmonth1',
100 'currentmonthname',
101 'currentmonthnamegen',
102 'currentmonthabbrev',
103 'currentday',
104 'currentday2',
105 'currentdayname',
106 'currentyear',
107 'currenttime',
108 'currenthour',
109 'localmonth',
110 'localmonth1',
111 'localmonthname',
112 'localmonthnamegen',
113 'localmonthabbrev',
114 'localday',
115 'localday2',
116 'localdayname',
117 'localyear',
118 'localtime',
119 'localhour',
120 'numberofarticles',
121 'numberoffiles',
122 'numberofedits',
123 'articlepath',
124 'pageid',
125 'sitename',
126 'server',
127 'servername',
128 'scriptpath',
129 'stylepath',
130 'pagename',
131 'pagenamee',
132 'fullpagename',
133 'fullpagenamee',
134 'namespace',
135 'namespacee',
136 'namespacenumber',
137 'currentweek',
138 'currentdow',
139 'localweek',
140 'localdow',
141 'revisionid',
142 'revisionday',
143 'revisionday2',
144 'revisionmonth',
145 'revisionmonth1',
146 'revisionyear',
147 'revisiontimestamp',
148 'revisionuser',
149 'revisionsize',
150 'subpagename',
151 'subpagenamee',
152 'talkspace',
153 'talkspacee',
154 'subjectspace',
155 'subjectspacee',
156 'talkpagename',
157 'talkpagenamee',
158 'subjectpagename',
159 'subjectpagenamee',
160 'numberofusers',
161 'numberofactiveusers',
162 'numberofpages',
163 'currentversion',
164 'rootpagename',
165 'rootpagenamee',
166 'basepagename',
167 'basepagenamee',
168 'currenttimestamp',
169 'localtimestamp',
170 'directionmark',
171 'contentlanguage',
172 'numberofadmins',
173 'cascadingsources',
174 ];
175
176 /* Array of caching hints for ParserCache */
177 public static $mCacheTTLs = [
178 'currentmonth' => 86400,
179 'currentmonth1' => 86400,
180 'currentmonthname' => 86400,
181 'currentmonthnamegen' => 86400,
182 'currentmonthabbrev' => 86400,
183 'currentday' => 3600,
184 'currentday2' => 3600,
185 'currentdayname' => 3600,
186 'currentyear' => 86400,
187 'currenttime' => 3600,
188 'currenthour' => 3600,
189 'localmonth' => 86400,
190 'localmonth1' => 86400,
191 'localmonthname' => 86400,
192 'localmonthnamegen' => 86400,
193 'localmonthabbrev' => 86400,
194 'localday' => 3600,
195 'localday2' => 3600,
196 'localdayname' => 3600,
197 'localyear' => 86400,
198 'localtime' => 3600,
199 'localhour' => 3600,
200 'numberofarticles' => 3600,
201 'numberoffiles' => 3600,
202 'numberofedits' => 3600,
203 'currentweek' => 3600,
204 'currentdow' => 3600,
205 'localweek' => 3600,
206 'localdow' => 3600,
207 'numberofusers' => 3600,
208 'numberofactiveusers' => 3600,
209 'numberofpages' => 3600,
210 'currentversion' => 86400,
211 'currenttimestamp' => 3600,
212 'localtimestamp' => 3600,
213 'pagesinnamespace' => 3600,
214 'numberofadmins' => 3600,
215 'numberingroup' => 3600,
216 ];
217
218 public static $mDoubleUnderscoreIDs = [
219 'notoc',
220 'nogallery',
221 'forcetoc',
222 'toc',
223 'noeditsection',
224 'newsectionlink',
225 'nonewsectionlink',
226 'hiddencat',
227 'index',
228 'noindex',
229 'staticredirect',
230 'notitleconvert',
231 'nocontentconvert',
232 ];
233
234 public static $mSubstIDs = [
235 'subst',
236 'safesubst',
237 ];
238
239 public static $mObjects = [];
240 public static $mDoubleUnderscoreArray = null;
241
242 /**#@-*/
243
244 public function __construct( $id = 0, $syn = [], $cs = false ) {
245 $this->mId = $id;
246 $this->mSynonyms = (array)$syn;
247 $this->mCaseSensitive = $cs;
248 }
249
250 /**
251 * Factory: creates an object representing an ID
252 *
253 * @param int $id
254 *
255 * @return MagicWord
256 */
257 public static function &get( $id ) {
258 if ( !isset( self::$mObjects[$id] ) ) {
259 $mw = new MagicWord();
260 $mw->load( $id );
261 self::$mObjects[$id] = $mw;
262 }
263 return self::$mObjects[$id];
264 }
265
266 /**
267 * Get an array of parser variable IDs
268 *
269 * @return array
270 */
271 public static function getVariableIDs() {
272 if ( !self::$mVariableIDsInitialised ) {
273 # Get variable IDs
274 Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
275 self::$mVariableIDsInitialised = true;
276 }
277 return self::$mVariableIDs;
278 }
279
280 /**
281 * Get an array of parser substitution modifier IDs
282 * @return array
283 */
284 public static function getSubstIDs() {
285 return self::$mSubstIDs;
286 }
287
288 /**
289 * Allow external reads of TTL array
290 *
291 * @param int $id
292 * @return int
293 */
294 public static function getCacheTTL( $id ) {
295 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
296 return self::$mCacheTTLs[$id];
297 } else {
298 return -1;
299 }
300 }
301
302 /**
303 * Get a MagicWordArray of double-underscore entities
304 *
305 * @return MagicWordArray
306 */
307 public static function getDoubleUnderscoreArray() {
308 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
309 Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
310 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
311 }
312 return self::$mDoubleUnderscoreArray;
313 }
314
315 /**
316 * Clear the self::$mObjects variable
317 * For use in parser tests
318 */
319 public static function clearCache() {
320 self::$mObjects = [];
321 }
322
323 /**
324 * Initialises this object with an ID
325 *
326 * @param int $id
327 * @throws MWException
328 */
329 public function load( $id ) {
330 global $wgContLang;
331 $this->mId = $id;
332 $wgContLang->getMagic( $this );
333 if ( !$this->mSynonyms ) {
334 $this->mSynonyms = [ 'brionmademeputthishere' ];
335 throw new MWException( "Error: invalid magic word '$id'" );
336 }
337 }
338
339 /**
340 * Preliminary initialisation
341 * @private
342 */
343 public function initRegex() {
344 // Sort the synonyms by length, descending, so that the longest synonym
345 // matches in precedence to the shortest
346 $synonyms = $this->mSynonyms;
347 usort( $synonyms, [ $this, 'compareStringLength' ] );
348
349 $escSyn = [];
350 foreach ( $synonyms as $synonym ) {
351 // In case a magic word contains /, like that's going to happen;)
352 $escSyn[] = preg_quote( $synonym, '/' );
353 }
354 $this->mBaseRegex = implode( '|', $escSyn );
355
356 $case = $this->mCaseSensitive ? '' : 'iu';
357 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
358 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
359 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
360 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
361 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
362 "/^(?:{$this->mBaseRegex})$/{$case}" );
363 }
364
365 /**
366 * A comparison function that returns -1, 0 or 1 depending on whether the
367 * first string is longer, the same length or shorter than the second
368 * string.
369 *
370 * @param string $s1
371 * @param string $s2
372 *
373 * @return int
374 */
375 public function compareStringLength( $s1, $s2 ) {
376 $l1 = strlen( $s1 );
377 $l2 = strlen( $s2 );
378 if ( $l1 < $l2 ) {
379 return 1;
380 } elseif ( $l1 > $l2 ) {
381 return -1;
382 } else {
383 return 0;
384 }
385 }
386
387 /**
388 * Gets a regex representing matching the word
389 *
390 * @return string
391 */
392 public function getRegex() {
393 if ( $this->mRegex == '' ) {
394 $this->initRegex();
395 }
396 return $this->mRegex;
397 }
398
399 /**
400 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
401 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
402 * the complete expression
403 *
404 * @return string
405 */
406 public function getRegexCase() {
407 if ( $this->mRegex === '' ) {
408 $this->initRegex();
409 }
410
411 return $this->mCaseSensitive ? '' : 'iu';
412 }
413
414 /**
415 * Gets a regex matching the word, if it is at the string start
416 *
417 * @return string
418 */
419 public function getRegexStart() {
420 if ( $this->mRegex == '' ) {
421 $this->initRegex();
422 }
423 return $this->mRegexStart;
424 }
425
426 /**
427 * Gets a regex matching the word from start to end of a string
428 *
429 * @return string
430 * @since 1.23
431 */
432 public function getRegexStartToEnd() {
433 if ( $this->mRegexStartToEnd == '' ) {
434 $this->initRegex();
435 }
436 return $this->mRegexStartToEnd;
437 }
438
439 /**
440 * regex without the slashes and what not
441 *
442 * @return string
443 */
444 public function getBaseRegex() {
445 if ( $this->mRegex == '' ) {
446 $this->initRegex();
447 }
448 return $this->mBaseRegex;
449 }
450
451 /**
452 * Returns true if the text contains the word
453 *
454 * @param string $text
455 *
456 * @return bool
457 */
458 public function match( $text ) {
459 return (bool)preg_match( $this->getRegex(), $text );
460 }
461
462 /**
463 * Returns true if the text starts with the word
464 *
465 * @param string $text
466 *
467 * @return bool
468 */
469 public function matchStart( $text ) {
470 return (bool)preg_match( $this->getRegexStart(), $text );
471 }
472
473 /**
474 * Returns true if the text matched the word
475 *
476 * @param string $text
477 *
478 * @return bool
479 * @since 1.23
480 */
481 public function matchStartToEnd( $text ) {
482 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
483 }
484
485 /**
486 * Returns NULL if there's no match, the value of $1 otherwise
487 * The return code is the matched string, if there's no variable
488 * part in the regex and the matched variable part ($1) if there
489 * is one.
490 *
491 * @param string $text
492 *
493 * @return string
494 */
495 public function matchVariableStartToEnd( $text ) {
496 $matches = [];
497 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
498 if ( $matchcount == 0 ) {
499 return null;
500 } else {
501 # multiple matched parts (variable match); some will be empty because of
502 # synonyms. The variable will be the second non-empty one so remove any
503 # blank elements and re-sort the indices.
504 # See also bug 6526
505
506 $matches = array_values( array_filter( $matches ) );
507
508 if ( count( $matches ) == 1 ) {
509 return $matches[0];
510 } else {
511 return $matches[1];
512 }
513 }
514 }
515
516 /**
517 * Returns true if the text matches the word, and alters the
518 * input string, removing all instances of the word
519 *
520 * @param string $text
521 *
522 * @return bool
523 */
524 public function matchAndRemove( &$text ) {
525 $this->mFound = false;
526 $text = preg_replace_callback(
527 $this->getRegex(),
528 [ &$this, 'pregRemoveAndRecord' ],
529 $text
530 );
531
532 return $this->mFound;
533 }
534
535 /**
536 * @param string $text
537 * @return bool
538 */
539 public function matchStartAndRemove( &$text ) {
540 $this->mFound = false;
541 $text = preg_replace_callback(
542 $this->getRegexStart(),
543 [ &$this, 'pregRemoveAndRecord' ],
544 $text
545 );
546
547 return $this->mFound;
548 }
549
550 /**
551 * Used in matchAndRemove()
552 *
553 * @return string
554 */
555 public function pregRemoveAndRecord() {
556 $this->mFound = true;
557 return '';
558 }
559
560 /**
561 * Replaces the word with something else
562 *
563 * @param string $replacement
564 * @param string $subject
565 * @param int $limit
566 *
567 * @return string
568 */
569 public function replace( $replacement, $subject, $limit = -1 ) {
570 $res = preg_replace(
571 $this->getRegex(),
572 StringUtils::escapeRegexReplacement( $replacement ),
573 $subject,
574 $limit
575 );
576 $this->mModified = $res !== $subject;
577 return $res;
578 }
579
580 /**
581 * Variable handling: {{SUBST:xxx}} style words
582 * Calls back a function to determine what to replace xxx with
583 * Input word must contain $1
584 *
585 * @param string $text
586 * @param callable $callback
587 *
588 * @return string
589 */
590 public function substituteCallback( $text, $callback ) {
591 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
592 $this->mModified = $res !== $text;
593 return $res;
594 }
595
596 /**
597 * Matches the word, where $1 is a wildcard
598 *
599 * @return string
600 */
601 public function getVariableRegex() {
602 if ( $this->mVariableRegex == '' ) {
603 $this->initRegex();
604 }
605 return $this->mVariableRegex;
606 }
607
608 /**
609 * Matches the entire string, where $1 is a wildcard
610 *
611 * @return string
612 */
613 public function getVariableStartToEndRegex() {
614 if ( $this->mVariableStartToEndRegex == '' ) {
615 $this->initRegex();
616 }
617 return $this->mVariableStartToEndRegex;
618 }
619
620 /**
621 * Accesses the synonym list directly
622 *
623 * @param int $i
624 *
625 * @return string
626 */
627 public function getSynonym( $i ) {
628 return $this->mSynonyms[$i];
629 }
630
631 /**
632 * @return array
633 */
634 public function getSynonyms() {
635 return $this->mSynonyms;
636 }
637
638 /**
639 * Returns true if the last call to replace() or substituteCallback()
640 * returned a modified text, otherwise false.
641 *
642 * @return bool
643 */
644 public function getWasModified() {
645 return $this->mModified;
646 }
647
648 /**
649 * $magicarr is an associative array of (magic word ID => replacement)
650 * This method uses the php feature to do several replacements at the same time,
651 * thereby gaining some efficiency. The result is placed in the out variable
652 * $result. The return value is true if something was replaced.
653 * @deprecated since 1.25, unused
654 *
655 * @param array $magicarr
656 * @param string $subject
657 * @param string $result
658 *
659 * @return bool
660 */
661 public function replaceMultiple( $magicarr, $subject, &$result ) {
662 wfDeprecated( __METHOD__, '1.25' );
663 $search = [];
664 $replace = [];
665 foreach ( $magicarr as $id => $replacement ) {
666 $mw = MagicWord::get( $id );
667 $search[] = $mw->getRegex();
668 $replace[] = $replacement;
669 }
670
671 $result = preg_replace( $search, $replace, $subject );
672 return $result !== $subject;
673 }
674
675 /**
676 * Adds all the synonyms of this MagicWord to an array, to allow quick
677 * lookup in a list of magic words
678 *
679 * @param array $array
680 * @param string $value
681 */
682 public function addToArray( &$array, $value ) {
683 global $wgContLang;
684 foreach ( $this->mSynonyms as $syn ) {
685 $array[$wgContLang->lc( $syn )] = $value;
686 }
687 }
688
689 /**
690 * @return bool
691 */
692 public function isCaseSensitive() {
693 return $this->mCaseSensitive;
694 }
695
696 /**
697 * @return int
698 */
699 public function getId() {
700 return $this->mId;
701 }
702 }