Merge "hooks.txt: Convert docs to modern extension registration style"
[lhc/web/wiklou.git] / includes / parser / Parser.php
1 <?php
2 /**
3 * PHP parser that converts wiki markup to HTML.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23 use MediaWiki\Config\ServiceOptions;
24 use MediaWiki\Linker\LinkRenderer;
25 use MediaWiki\Linker\LinkRendererFactory;
26 use MediaWiki\Linker\LinkTarget;
27 use MediaWiki\MediaWikiServices;
28 use MediaWiki\Special\SpecialPageFactory;
29 use Psr\Log\NullLogger;
30 use Wikimedia\ScopedCallback;
31 use Psr\Log\LoggerInterface;
32
33 /**
34 * @defgroup Parser Parser
35 */
36
37 /**
38 * PHP Parser - Processes wiki markup (which uses a more user-friendly
39 * syntax, such as "[[link]]" for making links), and provides a one-way
40 * transformation of that wiki markup it into (X)HTML output / markup
41 * (which in turn the browser understands, and can display).
42 *
43 * There are seven main entry points into the Parser class:
44 *
45 * - Parser::parse()
46 * produces HTML output
47 * - Parser::preSaveTransform()
48 * produces altered wiki markup
49 * - Parser::preprocess()
50 * removes HTML comments and expands templates
51 * - Parser::cleanSig() and Parser::cleanSigInSig()
52 * cleans a signature before saving it to preferences
53 * - Parser::getSection()
54 * return the content of a section from an article for section editing
55 * - Parser::replaceSection()
56 * replaces a section by number inside an article
57 * - Parser::getPreloadText()
58 * removes <noinclude> sections and <includeonly> tags
59 *
60 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
61 *
62 * @par Settings:
63 * $wgNamespacesWithSubpages
64 *
65 * @par Settings only within ParserOptions:
66 * $wgAllowExternalImages
67 * $wgAllowSpecialInclusion
68 * $wgInterwikiMagic
69 * $wgMaxArticleSize
70 *
71 * @ingroup Parser
72 */
73 class Parser {
74 /**
75 * Update this version number when the ParserOutput format
76 * changes in an incompatible way, so the parser cache
77 * can automatically discard old data.
78 */
79 const VERSION = '1.6.4';
80
81 /**
82 * Update this version number when the output of serialiseHalfParsedText()
83 * changes in an incompatible way
84 */
85 const HALF_PARSED_VERSION = 2;
86
87 # Flags for Parser::setFunctionHook
88 const SFH_NO_HASH = 1;
89 const SFH_OBJECT_ARGS = 2;
90
91 # Constants needed for external link processing
92 # Everything except bracket, space, or control characters
93 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
94 # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
95 # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
96 # uses to replace invalid HTML characters.
97 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
98 # Simplified expression to match an IPv4 or IPv6 address, or
99 # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
100 const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
101 # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
102 // phpcs:ignore Generic.Files.LineLength
103 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
104 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
105
106 # Regular expression for a non-newline space
107 const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
108
109 # Flags for preprocessToDom
110 const PTD_FOR_INCLUSION = 1;
111
112 # Allowed values for $this->mOutputType
113 # Parameter to startExternalParse().
114 const OT_HTML = 1; # like parse()
115 const OT_WIKI = 2; # like preSaveTransform()
116 const OT_PREPROCESS = 3; # like preprocess()
117 const OT_MSG = 3;
118 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
119
120 /**
121 * @var string Prefix and suffix for temporary replacement strings
122 * for the multipass parser.
123 *
124 * \x7f should never appear in input as it's disallowed in XML.
125 * Using it at the front also gives us a little extra robustness
126 * since it shouldn't match when butted up against identifier-like
127 * string constructs.
128 *
129 * Must not consist of all title characters, or else it will change
130 * the behavior of <nowiki> in a link.
131 *
132 * Must have a character that needs escaping in attributes, otherwise
133 * someone could put a strip marker in an attribute, to get around
134 * escaping quote marks, and break out of the attribute. Thus we add
135 * `'".
136 */
137 const MARKER_SUFFIX = "-QINU`\"'\x7f";
138 const MARKER_PREFIX = "\x7f'\"`UNIQ-";
139
140 # Markers used for wrapping the table of contents
141 const TOC_START = '<mw:toc>';
142 const TOC_END = '</mw:toc>';
143
144 /** @var int Assume that no output will later be saved this many seconds after parsing */
145 const MAX_TTS = 900;
146
147 # Persistent:
148 public $mTagHooks = [];
149 public $mTransparentTagHooks = [];
150 public $mFunctionHooks = [];
151 public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
152 public $mFunctionTagHooks = [];
153 public $mStripList = [];
154 public $mDefaultStripList = [];
155 public $mVarCache = [];
156 public $mImageParams = [];
157 public $mImageParamsMagicArray = [];
158 public $mMarkerIndex = 0;
159 /**
160 * @var bool Whether firstCallInit still needs to be called
161 */
162 public $mFirstCall = true;
163
164 # Initialised by initialiseVariables()
165
166 /**
167 * @var MagicWordArray
168 */
169 public $mVariables;
170
171 /**
172 * @var MagicWordArray
173 */
174 public $mSubstWords;
175
176 /**
177 * @deprecated since 1.34, there should be no need to use this
178 * @var array
179 */
180 public $mConf;
181
182 # Initialised in constructor
183 public $mExtLinkBracketedRegex, $mUrlProtocols;
184
185 # Initialized in getPreprocessor()
186 /** @var Preprocessor */
187 public $mPreprocessor;
188
189 # Cleared with clearState():
190 /**
191 * @var ParserOutput
192 */
193 public $mOutput;
194 public $mAutonumber;
195
196 /**
197 * @var StripState
198 */
199 public $mStripState;
200
201 public $mIncludeCount;
202 /**
203 * @var LinkHolderArray
204 */
205 public $mLinkHolders;
206
207 public $mLinkID;
208 public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
209 public $mDefaultSort;
210 public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
211 public $mExpensiveFunctionCount; # number of expensive parser function calls
212 public $mShowToc, $mForceTocPosition;
213 /** @var array */
214 public $mTplDomCache;
215
216 /**
217 * @var User
218 */
219 public $mUser; # User object; only used when doing pre-save transform
220
221 # Temporary
222 # These are variables reset at least once per parse regardless of $clearState
223
224 /**
225 * @var ParserOptions
226 */
227 public $mOptions;
228
229 /**
230 * @var Title
231 */
232 public $mTitle; # Title context, used for self-link rendering and similar things
233 public $mOutputType; # Output type, one of the OT_xxx constants
234 public $ot; # Shortcut alias, see setOutputType()
235 public $mRevisionObject; # The revision object of the specified revision ID
236 public $mRevisionId; # ID to display in {{REVISIONID}} tags
237 public $mRevisionTimestamp; # The timestamp of the specified revision ID
238 public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
239 public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
240 public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
241 public $mInputSize = false; # For {{PAGESIZE}} on current page.
242
243 /**
244 * @var array Array with the language name of each language link (i.e. the
245 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
246 * duplicate language links to the ParserOutput.
247 */
248 public $mLangLinkLanguages;
249
250 /**
251 * @var MapCacheLRU|null
252 * @since 1.24
253 *
254 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
255 */
256 public $currentRevisionCache;
257
258 /**
259 * @var bool|string Recursive call protection.
260 * This variable should be treated as if it were private.
261 */
262 public $mInParse = false;
263
264 /** @var SectionProfiler */
265 protected $mProfiler;
266
267 /**
268 * @var LinkRenderer
269 */
270 protected $mLinkRenderer;
271
272 /** @var MagicWordFactory */
273 private $magicWordFactory;
274
275 /** @var Language */
276 private $contLang;
277
278 /** @var ParserFactory */
279 private $factory;
280
281 /** @var SpecialPageFactory */
282 private $specialPageFactory;
283
284 /**
285 * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
286 * $mOptions, which is public and widely used, and also with the local variable $options used
287 * for ParserOptions throughout this file.
288 *
289 * @var ServiceOptions
290 */
291 private $svcOptions;
292
293 /** @var LinkRendererFactory */
294 private $linkRendererFactory;
295
296 /** @var NamespaceInfo */
297 private $nsInfo;
298
299 /** @var LoggerInterface */
300 private $logger;
301
302 /**
303 * TODO Make this a const when HHVM support is dropped (T192166)
304 *
305 * @var array
306 * @since 1.33
307 */
308 public static $constructorOptions = [
309 // See $wgParserConf documentation
310 'class',
311 'preprocessorClass',
312 // See documentation for the corresponding config options
313 'ArticlePath',
314 'EnableScaryTranscluding',
315 'ExtraInterlanguageLinkPrefixes',
316 'FragmentMode',
317 'LanguageCode',
318 'MaxSigChars',
319 'MaxTocLevel',
320 'MiserMode',
321 'ScriptPath',
322 'Server',
323 'ServerName',
324 'ShowHostnames',
325 'Sitename',
326 'StylePath',
327 'TranscludeCacheExpiry',
328 ];
329
330 /**
331 * Constructing parsers directly is deprecated! Use a ParserFactory.
332 *
333 * @param ServiceOptions|null $svcOptions
334 * @param MagicWordFactory|null $magicWordFactory
335 * @param Language|null $contLang Content language
336 * @param ParserFactory|null $factory
337 * @param string|null $urlProtocols As returned from wfUrlProtocols()
338 * @param SpecialPageFactory|null $spFactory
339 * @param LinkRendererFactory|null $linkRendererFactory
340 * @param NamespaceInfo|null $nsInfo
341 * @param LoggerInterface|null $logger
342 */
343 public function __construct(
344 $svcOptions = null,
345 MagicWordFactory $magicWordFactory = null,
346 Language $contLang = null,
347 ParserFactory $factory = null,
348 $urlProtocols = null,
349 SpecialPageFactory $spFactory = null,
350 $linkRendererFactory = null,
351 $nsInfo = null,
352 $logger = null
353 ) {
354 if ( !$svcOptions || is_array( $svcOptions ) ) {
355 // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
356 // Config, and the eighth is LinkRendererFactory.
357 $this->mConf = (array)$svcOptions;
358 if ( empty( $this->mConf['class'] ) ) {
359 $this->mConf['class'] = self::class;
360 }
361 if ( empty( $this->mConf['preprocessorClass'] ) ) {
362 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
363 }
364 $this->svcOptions = new ServiceOptions( self::$constructorOptions,
365 $this->mConf, func_num_args() > 6
366 ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
367 );
368 $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
369 $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
370 } else {
371 // New calling convention
372 $svcOptions->assertRequiredOptions( self::$constructorOptions );
373 // $this->mConf is public, so we'll keep those two options there as well for
374 // compatibility until it's removed
375 $this->mConf = [
376 'class' => $svcOptions->get( 'class' ),
377 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
378 ];
379 $this->svcOptions = $svcOptions;
380 }
381
382 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
383 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
384 self::EXT_LINK_ADDR .
385 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
386
387 $this->magicWordFactory = $magicWordFactory ??
388 MediaWikiServices::getInstance()->getMagicWordFactory();
389
390 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
391
392 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
393 $this->specialPageFactory = $spFactory ??
394 MediaWikiServices::getInstance()->getSpecialPageFactory();
395 $this->linkRendererFactory = $linkRendererFactory ??
396 MediaWikiServices::getInstance()->getLinkRendererFactory();
397 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
398 $this->logger = $logger ?: new NullLogger();
399 }
400
401 /**
402 * Reduce memory usage to reduce the impact of circular references
403 */
404 public function __destruct() {
405 if ( isset( $this->mLinkHolders ) ) {
406 unset( $this->mLinkHolders );
407 }
408 foreach ( $this as $name => $value ) {
409 unset( $this->$name );
410 }
411 }
412
413 /**
414 * Allow extensions to clean up when the parser is cloned
415 */
416 public function __clone() {
417 $this->mInParse = false;
418
419 // T58226: When you create a reference "to" an object field, that
420 // makes the object field itself be a reference too (until the other
421 // reference goes out of scope). When cloning, any field that's a
422 // reference is copied as a reference in the new object. Both of these
423 // are defined PHP5 behaviors, as inconvenient as it is for us when old
424 // hooks from PHP4 days are passing fields by reference.
425 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
426 // Make a non-reference copy of the field, then rebind the field to
427 // reference the new copy.
428 $tmp = $this->$k;
429 $this->$k =& $tmp;
430 unset( $tmp );
431 }
432
433 Hooks::run( 'ParserCloned', [ $this ] );
434 }
435
436 /**
437 * Which class should we use for the preprocessor if not otherwise specified?
438 *
439 * @since 1.34
440 * @deprecated since 1.34, removing configurability of preprocessor
441 * @return string
442 */
443 public static function getDefaultPreprocessorClass() {
444 return Preprocessor_Hash::class;
445 }
446
447 /**
448 * Do various kinds of initialisation on the first call of the parser
449 */
450 public function firstCallInit() {
451 if ( !$this->mFirstCall ) {
452 return;
453 }
454 $this->mFirstCall = false;
455
456 CoreParserFunctions::register( $this );
457 CoreTagHooks::register( $this );
458 $this->initialiseVariables();
459
460 // Avoid PHP 7.1 warning from passing $this by reference
461 $parser = $this;
462 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
463 }
464
465 /**
466 * Clear Parser state
467 *
468 * @private
469 */
470 public function clearState() {
471 $this->firstCallInit();
472 $this->resetOutput();
473 $this->mAutonumber = 0;
474 $this->mIncludeCount = [];
475 $this->mLinkHolders = new LinkHolderArray( $this );
476 $this->mLinkID = 0;
477 $this->mRevisionObject = $this->mRevisionTimestamp =
478 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
479 $this->mVarCache = [];
480 $this->mUser = null;
481 $this->mLangLinkLanguages = [];
482 $this->currentRevisionCache = null;
483
484 $this->mStripState = new StripState( $this );
485
486 # Clear these on every parse, T6549
487 $this->mTplRedirCache = $this->mTplDomCache = [];
488
489 $this->mShowToc = true;
490 $this->mForceTocPosition = false;
491 $this->mIncludeSizes = [
492 'post-expand' => 0,
493 'arg' => 0,
494 ];
495 $this->mPPNodeCount = 0;
496 $this->mGeneratedPPNodeCount = 0;
497 $this->mHighestExpansionDepth = 0;
498 $this->mDefaultSort = false;
499 $this->mHeadings = [];
500 $this->mDoubleUnderscores = [];
501 $this->mExpensiveFunctionCount = 0;
502
503 # Fix cloning
504 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
505 $this->mPreprocessor = null;
506 }
507
508 $this->mProfiler = new SectionProfiler();
509
510 // Avoid PHP 7.1 warning from passing $this by reference
511 $parser = $this;
512 Hooks::run( 'ParserClearState', [ &$parser ] );
513 }
514
515 /**
516 * Reset the ParserOutput
517 */
518 public function resetOutput() {
519 $this->mOutput = new ParserOutput;
520 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
521 }
522
523 /**
524 * Convert wikitext to HTML
525 * Do not call this function recursively.
526 *
527 * @param string $text Text we want to parse
528 * @param-taint $text escapes_htmlnoent
529 * @param Title $title
530 * @param ParserOptions $options
531 * @param bool $linestart
532 * @param bool $clearState
533 * @param int|null $revid Number to pass in {{REVISIONID}}
534 * @return ParserOutput A ParserOutput
535 * @return-taint escaped
536 */
537 public function parse(
538 $text, Title $title, ParserOptions $options,
539 $linestart = true, $clearState = true, $revid = null
540 ) {
541 if ( $clearState ) {
542 // We use U+007F DELETE to construct strip markers, so we have to make
543 // sure that this character does not occur in the input text.
544 $text = strtr( $text, "\x7f", "?" );
545 $magicScopeVariable = $this->lock();
546 }
547 // Strip U+0000 NULL (T159174)
548 $text = str_replace( "\000", '', $text );
549
550 $this->startParse( $title, $options, self::OT_HTML, $clearState );
551
552 $this->currentRevisionCache = null;
553 $this->mInputSize = strlen( $text );
554 if ( $this->mOptions->getEnableLimitReport() ) {
555 $this->mOutput->resetParseStartTime();
556 }
557
558 $oldRevisionId = $this->mRevisionId;
559 $oldRevisionObject = $this->mRevisionObject;
560 $oldRevisionTimestamp = $this->mRevisionTimestamp;
561 $oldRevisionUser = $this->mRevisionUser;
562 $oldRevisionSize = $this->mRevisionSize;
563 if ( $revid !== null ) {
564 $this->mRevisionId = $revid;
565 $this->mRevisionObject = null;
566 $this->mRevisionTimestamp = null;
567 $this->mRevisionUser = null;
568 $this->mRevisionSize = null;
569 }
570
571 // Avoid PHP 7.1 warning from passing $this by reference
572 $parser = $this;
573 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
574 # No more strip!
575 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
576 $text = $this->internalParse( $text );
577 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
578
579 $text = $this->internalParseHalfParsed( $text, true, $linestart );
580
581 /**
582 * A converted title will be provided in the output object if title and
583 * content conversion are enabled, the article text does not contain
584 * a conversion-suppressing double-underscore tag, and no
585 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
586 * automatic link conversion.
587 */
588 if ( !( $options->getDisableTitleConversion()
589 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
590 || isset( $this->mDoubleUnderscores['notitleconvert'] )
591 || $this->mOutput->getDisplayTitle() !== false )
592 ) {
593 $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
594 if ( $convruletitle ) {
595 $this->mOutput->setTitleText( $convruletitle );
596 } else {
597 $titleText = $this->getTargetLanguage()->convertTitle( $title );
598 $this->mOutput->setTitleText( $titleText );
599 }
600 }
601
602 # Compute runtime adaptive expiry if set
603 $this->mOutput->finalizeAdaptiveCacheExpiry();
604
605 # Warn if too many heavyweight parser functions were used
606 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
607 $this->limitationWarn( 'expensive-parserfunction',
608 $this->mExpensiveFunctionCount,
609 $this->mOptions->getExpensiveParserFunctionLimit()
610 );
611 }
612
613 # Information on limits, for the benefit of users who try to skirt them
614 if ( $this->mOptions->getEnableLimitReport() ) {
615 $text .= $this->makeLimitReport();
616 }
617
618 # Wrap non-interface parser output in a <div> so it can be targeted
619 # with CSS (T37247)
620 $class = $this->mOptions->getWrapOutputClass();
621 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
622 $this->mOutput->addWrapperDivClass( $class );
623 }
624
625 $this->mOutput->setText( $text );
626
627 $this->mRevisionId = $oldRevisionId;
628 $this->mRevisionObject = $oldRevisionObject;
629 $this->mRevisionTimestamp = $oldRevisionTimestamp;
630 $this->mRevisionUser = $oldRevisionUser;
631 $this->mRevisionSize = $oldRevisionSize;
632 $this->mInputSize = false;
633 $this->currentRevisionCache = null;
634
635 return $this->mOutput;
636 }
637
638 /**
639 * Set the limit report data in the current ParserOutput, and return the
640 * limit report HTML comment.
641 *
642 * @return string
643 */
644 protected function makeLimitReport() {
645 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
646
647 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
648 if ( $cpuTime !== null ) {
649 $this->mOutput->setLimitReportData( 'limitreport-cputime',
650 sprintf( "%.3f", $cpuTime )
651 );
652 }
653
654 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
655 $this->mOutput->setLimitReportData( 'limitreport-walltime',
656 sprintf( "%.3f", $wallTime )
657 );
658
659 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
660 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
661 );
662 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
663 [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
664 );
665 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
666 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
667 );
668 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
669 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
670 );
671 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
672 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
673 );
674 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
675 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
676 );
677
678 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
679 $this->mOutput->setLimitReportData( $key, $value );
680 }
681
682 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
683
684 $limitReport = "NewPP limit report\n";
685 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
686 $limitReport .= 'Parsed by ' . wfHostname() . "\n";
687 }
688 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
689 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
690 $limitReport .= 'Dynamic content: ' .
691 ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
692 "\n";
693 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
694
695 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
696 if ( Hooks::run( 'ParserLimitReportFormat',
697 [ $key, &$value, &$limitReport, false, false ]
698 ) ) {
699 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
700 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
701 ->inLanguage( 'en' )->useDatabase( false );
702 if ( !$valueMsg->exists() ) {
703 $valueMsg = new RawMessage( '$1' );
704 }
705 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
706 $valueMsg->params( $value );
707 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
708 }
709 }
710 }
711 // Since we're not really outputting HTML, decode the entities and
712 // then re-encode the things that need hiding inside HTML comments.
713 $limitReport = htmlspecialchars_decode( $limitReport );
714
715 // Sanitize for comment. Note '‐' in the replacement is U+2010,
716 // which looks much like the problematic '-'.
717 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
718 $text = "\n<!-- \n$limitReport-->\n";
719
720 // Add on template profiling data in human/machine readable way
721 $dataByFunc = $this->mProfiler->getFunctionStats();
722 uasort( $dataByFunc, function ( $a, $b ) {
723 return $b['real'] <=> $a['real']; // descending order
724 } );
725 $profileReport = [];
726 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
727 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
728 $item['%real'], $item['real'], $item['calls'],
729 htmlspecialchars( $item['name'] ) );
730 }
731 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
732 $text .= implode( "\n", $profileReport ) . "\n-->\n";
733
734 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
735
736 // Add other cache related metadata
737 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
738 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
739 }
740 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
741 $this->mOutput->getCacheTime() );
742 $this->mOutput->setLimitReportData( 'cachereport-ttl',
743 $this->mOutput->getCacheExpiry() );
744 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
745 $this->mOutput->hasDynamicContent() );
746
747 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
748 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
749 $this->mTitle->getPrefixedDBkey() );
750 }
751 return $text;
752 }
753
754 /**
755 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
756 * can be called from an extension tag hook.
757 *
758 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
759 * instead, which means that lists and links have not been fully parsed yet,
760 * and strip markers are still present.
761 *
762 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
763 *
764 * Use this function if you're a parser tag hook and you want to parse
765 * wikitext before or after applying additional transformations, and you
766 * intend to *return the result as hook output*, which will cause it to go
767 * through the rest of parsing process automatically.
768 *
769 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
770 * $text are not expanded
771 *
772 * @param string $text Text extension wants to have parsed
773 * @param-taint $text escapes_htmlnoent
774 * @param bool|PPFrame $frame The frame to use for expanding any template variables
775 * @return string UNSAFE half-parsed HTML
776 * @return-taint escaped
777 */
778 public function recursiveTagParse( $text, $frame = false ) {
779 // Avoid PHP 7.1 warning from passing $this by reference
780 $parser = $this;
781 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
782 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
783 $text = $this->internalParse( $text, false, $frame );
784 return $text;
785 }
786
787 /**
788 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
789 * point can be called from an extension tag hook.
790 *
791 * The output of this function is fully-parsed HTML that is safe for output.
792 * If you're a parser tag hook, you might want to use recursiveTagParse()
793 * instead.
794 *
795 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
796 * $text are not expanded
797 *
798 * @since 1.25
799 *
800 * @param string $text Text extension wants to have parsed
801 * @param-taint $text escapes_htmlnoent
802 * @param bool|PPFrame $frame The frame to use for expanding any template variables
803 * @return string Fully parsed HTML
804 * @return-taint escaped
805 */
806 public function recursiveTagParseFully( $text, $frame = false ) {
807 $text = $this->recursiveTagParse( $text, $frame );
808 $text = $this->internalParseHalfParsed( $text, false );
809 return $text;
810 }
811
812 /**
813 * Expand templates and variables in the text, producing valid, static wikitext.
814 * Also removes comments.
815 * Do not call this function recursively.
816 * @param string $text
817 * @param Title|null $title
818 * @param ParserOptions $options
819 * @param int|null $revid
820 * @param bool|PPFrame $frame
821 * @return mixed|string
822 */
823 public function preprocess( $text, Title $title = null,
824 ParserOptions $options, $revid = null, $frame = false
825 ) {
826 $magicScopeVariable = $this->lock();
827 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
828 if ( $revid !== null ) {
829 $this->mRevisionId = $revid;
830 }
831 // Avoid PHP 7.1 warning from passing $this by reference
832 $parser = $this;
833 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
834 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
835 $text = $this->replaceVariables( $text, $frame );
836 $text = $this->mStripState->unstripBoth( $text );
837 return $text;
838 }
839
840 /**
841 * Recursive parser entry point that can be called from an extension tag
842 * hook.
843 *
844 * @param string $text Text to be expanded
845 * @param bool|PPFrame $frame The frame to use for expanding any template variables
846 * @return string
847 * @since 1.19
848 */
849 public function recursivePreprocess( $text, $frame = false ) {
850 $text = $this->replaceVariables( $text, $frame );
851 $text = $this->mStripState->unstripBoth( $text );
852 return $text;
853 }
854
855 /**
856 * Process the wikitext for the "?preload=" feature. (T7210)
857 *
858 * "<noinclude>", "<includeonly>" etc. are parsed as for template
859 * transclusion, comments, templates, arguments, tags hooks and parser
860 * functions are untouched.
861 *
862 * @param string $text
863 * @param Title $title
864 * @param ParserOptions $options
865 * @param array $params
866 * @return string
867 */
868 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
869 $msg = new RawMessage( $text );
870 $text = $msg->params( $params )->plain();
871
872 # Parser (re)initialisation
873 $magicScopeVariable = $this->lock();
874 $this->startParse( $title, $options, self::OT_PLAIN, true );
875
876 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
877 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
878 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
879 $text = $this->mStripState->unstripBoth( $text );
880 return $text;
881 }
882
883 /**
884 * Set the current user.
885 * Should only be used when doing pre-save transform.
886 *
887 * @param User|null $user User object or null (to reset)
888 */
889 public function setUser( $user ) {
890 $this->mUser = $user;
891 }
892
893 /**
894 * Set the context title
895 *
896 * @param Title $t
897 */
898 public function setTitle( $t ) {
899 if ( !$t ) {
900 $t = Title::newFromText( 'NO TITLE' );
901 }
902
903 if ( $t->hasFragment() ) {
904 # Strip the fragment to avoid various odd effects
905 $this->mTitle = $t->createFragmentTarget( '' );
906 } else {
907 $this->mTitle = $t;
908 }
909 }
910
911 /**
912 * Accessor for the Title object
913 *
914 * @return Title|null
915 */
916 public function getTitle() {
917 return $this->mTitle;
918 }
919
920 /**
921 * Accessor/mutator for the Title object
922 *
923 * @param Title|null $x Title object or null to just get the current one
924 * @return Title
925 */
926 public function Title( $x = null ) {
927 return wfSetVar( $this->mTitle, $x );
928 }
929
930 /**
931 * Set the output type
932 *
933 * @param int $ot New value
934 */
935 public function setOutputType( $ot ) {
936 $this->mOutputType = $ot;
937 # Shortcut alias
938 $this->ot = [
939 'html' => $ot == self::OT_HTML,
940 'wiki' => $ot == self::OT_WIKI,
941 'pre' => $ot == self::OT_PREPROCESS,
942 'plain' => $ot == self::OT_PLAIN,
943 ];
944 }
945
946 /**
947 * Accessor/mutator for the output type
948 *
949 * @param int|null $x New value or null to just get the current one
950 * @return int
951 */
952 public function OutputType( $x = null ) {
953 return wfSetVar( $this->mOutputType, $x );
954 }
955
956 /**
957 * Get the ParserOutput object
958 *
959 * @return ParserOutput
960 */
961 public function getOutput() {
962 return $this->mOutput;
963 }
964
965 /**
966 * Get the ParserOptions object
967 *
968 * @return ParserOptions
969 */
970 public function getOptions() {
971 return $this->mOptions;
972 }
973
974 /**
975 * Accessor/mutator for the ParserOptions object
976 *
977 * @param ParserOptions|null $x New value or null to just get the current one
978 * @return ParserOptions Current ParserOptions object
979 */
980 public function Options( $x = null ) {
981 return wfSetVar( $this->mOptions, $x );
982 }
983
984 /**
985 * @return int
986 */
987 public function nextLinkID() {
988 return $this->mLinkID++;
989 }
990
991 /**
992 * @param int $id
993 */
994 public function setLinkID( $id ) {
995 $this->mLinkID = $id;
996 }
997
998 /**
999 * Get a language object for use in parser functions such as {{FORMATNUM:}}
1000 * @return Language
1001 */
1002 public function getFunctionLang() {
1003 return $this->getTargetLanguage();
1004 }
1005
1006 /**
1007 * Get the target language for the content being parsed. This is usually the
1008 * language that the content is in.
1009 *
1010 * @since 1.19
1011 *
1012 * @throws MWException
1013 * @return Language
1014 */
1015 public function getTargetLanguage() {
1016 $target = $this->mOptions->getTargetLanguage();
1017
1018 if ( $target !== null ) {
1019 return $target;
1020 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1021 return $this->mOptions->getUserLangObj();
1022 } elseif ( is_null( $this->mTitle ) ) {
1023 throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1024 }
1025
1026 return $this->mTitle->getPageLanguage();
1027 }
1028
1029 /**
1030 * Get the language object for language conversion
1031 * @deprecated since 1.32, just use getTargetLanguage()
1032 * @return Language|null
1033 */
1034 public function getConverterLanguage() {
1035 return $this->getTargetLanguage();
1036 }
1037
1038 /**
1039 * Get a User object either from $this->mUser, if set, or from the
1040 * ParserOptions object otherwise
1041 *
1042 * @return User
1043 */
1044 public function getUser() {
1045 if ( !is_null( $this->mUser ) ) {
1046 return $this->mUser;
1047 }
1048 return $this->mOptions->getUser();
1049 }
1050
1051 /**
1052 * Get a preprocessor object
1053 *
1054 * @return Preprocessor
1055 */
1056 public function getPreprocessor() {
1057 if ( !isset( $this->mPreprocessor ) ) {
1058 $class = $this->svcOptions->get( 'preprocessorClass' );
1059 $this->mPreprocessor = new $class( $this );
1060 }
1061 return $this->mPreprocessor;
1062 }
1063
1064 /**
1065 * Get a LinkRenderer instance to make links with
1066 *
1067 * @since 1.28
1068 * @return LinkRenderer
1069 */
1070 public function getLinkRenderer() {
1071 // XXX We make the LinkRenderer with current options and then cache it forever
1072 if ( !$this->mLinkRenderer ) {
1073 $this->mLinkRenderer = $this->linkRendererFactory->create();
1074 $this->mLinkRenderer->setStubThreshold(
1075 $this->getOptions()->getStubThreshold()
1076 );
1077 }
1078
1079 return $this->mLinkRenderer;
1080 }
1081
1082 /**
1083 * Get the MagicWordFactory that this Parser is using
1084 *
1085 * @since 1.32
1086 * @return MagicWordFactory
1087 */
1088 public function getMagicWordFactory() {
1089 return $this->magicWordFactory;
1090 }
1091
1092 /**
1093 * Get the content language that this Parser is using
1094 *
1095 * @since 1.32
1096 * @return Language
1097 */
1098 public function getContentLanguage() {
1099 return $this->contLang;
1100 }
1101
1102 /**
1103 * Replaces all occurrences of HTML-style comments and the given tags
1104 * in the text with a random marker and returns the next text. The output
1105 * parameter $matches will be an associative array filled with data in
1106 * the form:
1107 *
1108 * @code
1109 * 'UNIQ-xxxxx' => [
1110 * 'element',
1111 * 'tag content',
1112 * [ 'param' => 'x' ],
1113 * '<element param="x">tag content</element>' ]
1114 * @endcode
1115 *
1116 * @param array $elements List of element names. Comments are always extracted.
1117 * @param string $text Source text string.
1118 * @param array &$matches Out parameter, Array: extracted tags
1119 * @return string Stripped text
1120 */
1121 public static function extractTagsAndParams( $elements, $text, &$matches ) {
1122 static $n = 1;
1123 $stripped = '';
1124 $matches = [];
1125
1126 $taglist = implode( '|', $elements );
1127 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1128
1129 while ( $text != '' ) {
1130 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1131 $stripped .= $p[0];
1132 if ( count( $p ) < 5 ) {
1133 break;
1134 }
1135 if ( count( $p ) > 5 ) {
1136 # comment
1137 $element = $p[4];
1138 $attributes = '';
1139 $close = '';
1140 $inside = $p[5];
1141 } else {
1142 # tag
1143 list( , $element, $attributes, $close, $inside ) = $p;
1144 }
1145
1146 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1147 $stripped .= $marker;
1148
1149 if ( $close === '/>' ) {
1150 # Empty element tag, <tag />
1151 $content = null;
1152 $text = $inside;
1153 $tail = null;
1154 } else {
1155 if ( $element === '!--' ) {
1156 $end = '/(-->)/';
1157 } else {
1158 $end = "/(<\\/$element\\s*>)/i";
1159 }
1160 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1161 $content = $q[0];
1162 if ( count( $q ) < 3 ) {
1163 # No end tag -- let it run out to the end of the text.
1164 $tail = '';
1165 $text = '';
1166 } else {
1167 list( , $tail, $text ) = $q;
1168 }
1169 }
1170
1171 $matches[$marker] = [ $element,
1172 $content,
1173 Sanitizer::decodeTagAttributes( $attributes ),
1174 "<$element$attributes$close$content$tail" ];
1175 }
1176 return $stripped;
1177 }
1178
1179 /**
1180 * Get a list of strippable XML-like elements
1181 *
1182 * @return array
1183 */
1184 public function getStripList() {
1185 return $this->mStripList;
1186 }
1187
1188 /**
1189 * Get the StripState
1190 *
1191 * @return StripState
1192 */
1193 public function getStripState() {
1194 return $this->mStripState;
1195 }
1196
1197 /**
1198 * Add an item to the strip state
1199 * Returns the unique tag which must be inserted into the stripped text
1200 * The tag will be replaced with the original text in unstrip()
1201 *
1202 * @param string $text
1203 *
1204 * @return string
1205 */
1206 public function insertStripItem( $text ) {
1207 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1208 $this->mMarkerIndex++;
1209 $this->mStripState->addGeneral( $marker, $text );
1210 return $marker;
1211 }
1212
1213 /**
1214 * parse the wiki syntax used to render tables
1215 *
1216 * @private
1217 * @param string $text
1218 * @return string
1219 */
1220 public function doTableStuff( $text ) {
1221 $lines = StringUtils::explode( "\n", $text );
1222 $out = '';
1223 $td_history = []; # Is currently a td tag open?
1224 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1225 $tr_history = []; # Is currently a tr tag open?
1226 $tr_attributes = []; # history of tr attributes
1227 $has_opened_tr = []; # Did this table open a <tr> element?
1228 $indent_level = 0; # indent level of the table
1229
1230 foreach ( $lines as $outLine ) {
1231 $line = trim( $outLine );
1232
1233 if ( $line === '' ) { # empty line, go to next line
1234 $out .= $outLine . "\n";
1235 continue;
1236 }
1237
1238 $first_character = $line[0];
1239 $first_two = substr( $line, 0, 2 );
1240 $matches = [];
1241
1242 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1243 # First check if we are starting a new table
1244 $indent_level = strlen( $matches[1] );
1245
1246 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1247 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1248
1249 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1250 array_push( $td_history, false );
1251 array_push( $last_tag_history, '' );
1252 array_push( $tr_history, false );
1253 array_push( $tr_attributes, '' );
1254 array_push( $has_opened_tr, false );
1255 } elseif ( count( $td_history ) == 0 ) {
1256 # Don't do any of the following
1257 $out .= $outLine . "\n";
1258 continue;
1259 } elseif ( $first_two === '|}' ) {
1260 # We are ending a table
1261 $line = '</table>' . substr( $line, 2 );
1262 $last_tag = array_pop( $last_tag_history );
1263
1264 if ( !array_pop( $has_opened_tr ) ) {
1265 $line = "<tr><td></td></tr>{$line}";
1266 }
1267
1268 if ( array_pop( $tr_history ) ) {
1269 $line = "</tr>{$line}";
1270 }
1271
1272 if ( array_pop( $td_history ) ) {
1273 $line = "</{$last_tag}>{$line}";
1274 }
1275 array_pop( $tr_attributes );
1276 if ( $indent_level > 0 ) {
1277 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1278 } else {
1279 $outLine = $line;
1280 }
1281 } elseif ( $first_two === '|-' ) {
1282 # Now we have a table row
1283 $line = preg_replace( '#^\|-+#', '', $line );
1284
1285 # Whats after the tag is now only attributes
1286 $attributes = $this->mStripState->unstripBoth( $line );
1287 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1288 array_pop( $tr_attributes );
1289 array_push( $tr_attributes, $attributes );
1290
1291 $line = '';
1292 $last_tag = array_pop( $last_tag_history );
1293 array_pop( $has_opened_tr );
1294 array_push( $has_opened_tr, true );
1295
1296 if ( array_pop( $tr_history ) ) {
1297 $line = '</tr>';
1298 }
1299
1300 if ( array_pop( $td_history ) ) {
1301 $line = "</{$last_tag}>{$line}";
1302 }
1303
1304 $outLine = $line;
1305 array_push( $tr_history, false );
1306 array_push( $td_history, false );
1307 array_push( $last_tag_history, '' );
1308 } elseif ( $first_character === '|'
1309 || $first_character === '!'
1310 || $first_two === '|+'
1311 ) {
1312 # This might be cell elements, td, th or captions
1313 if ( $first_two === '|+' ) {
1314 $first_character = '+';
1315 $line = substr( $line, 2 );
1316 } else {
1317 $line = substr( $line, 1 );
1318 }
1319
1320 // Implies both are valid for table headings.
1321 if ( $first_character === '!' ) {
1322 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1323 }
1324
1325 # Split up multiple cells on the same line.
1326 # FIXME : This can result in improper nesting of tags processed
1327 # by earlier parser steps.
1328 $cells = explode( '||', $line );
1329
1330 $outLine = '';
1331
1332 # Loop through each table cell
1333 foreach ( $cells as $cell ) {
1334 $previous = '';
1335 if ( $first_character !== '+' ) {
1336 $tr_after = array_pop( $tr_attributes );
1337 if ( !array_pop( $tr_history ) ) {
1338 $previous = "<tr{$tr_after}>\n";
1339 }
1340 array_push( $tr_history, true );
1341 array_push( $tr_attributes, '' );
1342 array_pop( $has_opened_tr );
1343 array_push( $has_opened_tr, true );
1344 }
1345
1346 $last_tag = array_pop( $last_tag_history );
1347
1348 if ( array_pop( $td_history ) ) {
1349 $previous = "</{$last_tag}>\n{$previous}";
1350 }
1351
1352 if ( $first_character === '|' ) {
1353 $last_tag = 'td';
1354 } elseif ( $first_character === '!' ) {
1355 $last_tag = 'th';
1356 } elseif ( $first_character === '+' ) {
1357 $last_tag = 'caption';
1358 } else {
1359 $last_tag = '';
1360 }
1361
1362 array_push( $last_tag_history, $last_tag );
1363
1364 # A cell could contain both parameters and data
1365 $cell_data = explode( '|', $cell, 2 );
1366
1367 # T2553: Note that a '|' inside an invalid link should not
1368 # be mistaken as delimiting cell parameters
1369 # Bug T153140: Neither should language converter markup.
1370 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1371 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1372 } elseif ( count( $cell_data ) == 1 ) {
1373 // Whitespace in cells is trimmed
1374 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1375 } else {
1376 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1377 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1378 // Whitespace in cells is trimmed
1379 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1380 }
1381
1382 $outLine .= $cell;
1383 array_push( $td_history, true );
1384 }
1385 }
1386 $out .= $outLine . "\n";
1387 }
1388
1389 # Closing open td, tr && table
1390 while ( count( $td_history ) > 0 ) {
1391 if ( array_pop( $td_history ) ) {
1392 $out .= "</td>\n";
1393 }
1394 if ( array_pop( $tr_history ) ) {
1395 $out .= "</tr>\n";
1396 }
1397 if ( !array_pop( $has_opened_tr ) ) {
1398 $out .= "<tr><td></td></tr>\n";
1399 }
1400
1401 $out .= "</table>\n";
1402 }
1403
1404 # Remove trailing line-ending (b/c)
1405 if ( substr( $out, -1 ) === "\n" ) {
1406 $out = substr( $out, 0, -1 );
1407 }
1408
1409 # special case: don't return empty table
1410 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1411 $out = '';
1412 }
1413
1414 return $out;
1415 }
1416
1417 /**
1418 * Helper function for parse() that transforms wiki markup into half-parsed
1419 * HTML. Only called for $mOutputType == self::OT_HTML.
1420 *
1421 * @private
1422 *
1423 * @param string $text The text to parse
1424 * @param-taint $text escapes_html
1425 * @param bool $isMain Whether this is being called from the main parse() function
1426 * @param PPFrame|bool $frame A pre-processor frame
1427 *
1428 * @return string
1429 */
1430 public function internalParse( $text, $isMain = true, $frame = false ) {
1431 $origText = $text;
1432
1433 // Avoid PHP 7.1 warning from passing $this by reference
1434 $parser = $this;
1435
1436 # Hook to suspend the parser in this state
1437 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1438 return $text;
1439 }
1440
1441 # if $frame is provided, then use $frame for replacing any variables
1442 if ( $frame ) {
1443 # use frame depth to infer how include/noinclude tags should be handled
1444 # depth=0 means this is the top-level document; otherwise it's an included document
1445 if ( !$frame->depth ) {
1446 $flag = 0;
1447 } else {
1448 $flag = self::PTD_FOR_INCLUSION;
1449 }
1450 $dom = $this->preprocessToDom( $text, $flag );
1451 $text = $frame->expand( $dom );
1452 } else {
1453 # if $frame is not provided, then use old-style replaceVariables
1454 $text = $this->replaceVariables( $text );
1455 }
1456
1457 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1458 $text = Sanitizer::removeHTMLtags(
1459 $text,
1460 [ $this, 'attributeStripCallback' ],
1461 false,
1462 array_keys( $this->mTransparentTagHooks ),
1463 [],
1464 [ $this, 'addTrackingCategory' ]
1465 );
1466 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1467
1468 # Tables need to come after variable replacement for things to work
1469 # properly; putting them before other transformations should keep
1470 # exciting things like link expansions from showing up in surprising
1471 # places.
1472 $text = $this->doTableStuff( $text );
1473
1474 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1475
1476 $text = $this->doDoubleUnderscore( $text );
1477
1478 $text = $this->doHeadings( $text );
1479 $text = $this->replaceInternalLinks( $text );
1480 $text = $this->doAllQuotes( $text );
1481 $text = $this->replaceExternalLinks( $text );
1482
1483 # replaceInternalLinks may sometimes leave behind
1484 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1485 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1486
1487 $text = $this->doMagicLinks( $text );
1488 $text = $this->formatHeadings( $text, $origText, $isMain );
1489
1490 return $text;
1491 }
1492
1493 /**
1494 * Helper function for parse() that transforms half-parsed HTML into fully
1495 * parsed HTML.
1496 *
1497 * @param string $text
1498 * @param bool $isMain
1499 * @param bool $linestart
1500 * @return string
1501 */
1502 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1503 $text = $this->mStripState->unstripGeneral( $text );
1504
1505 // Avoid PHP 7.1 warning from passing $this by reference
1506 $parser = $this;
1507
1508 if ( $isMain ) {
1509 Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1510 }
1511
1512 # Clean up special characters, only run once, next-to-last before doBlockLevels
1513 $text = Sanitizer::armorFrenchSpaces( $text );
1514
1515 $text = $this->doBlockLevels( $text, $linestart );
1516
1517 $this->replaceLinkHolders( $text );
1518
1519 /**
1520 * The input doesn't get language converted if
1521 * a) It's disabled
1522 * b) Content isn't converted
1523 * c) It's a conversion table
1524 * d) it is an interface message (which is in the user language)
1525 */
1526 if ( !( $this->mOptions->getDisableContentConversion()
1527 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1528 && !$this->mOptions->getInterfaceMessage()
1529 ) {
1530 # The position of the convert() call should not be changed. it
1531 # assumes that the links are all replaced and the only thing left
1532 # is the <nowiki> mark.
1533 $text = $this->getTargetLanguage()->convert( $text );
1534 }
1535
1536 $text = $this->mStripState->unstripNoWiki( $text );
1537
1538 if ( $isMain ) {
1539 Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1540 }
1541
1542 $text = $this->replaceTransparentTags( $text );
1543 $text = $this->mStripState->unstripGeneral( $text );
1544
1545 $text = Sanitizer::normalizeCharReferences( $text );
1546
1547 if ( MWTidy::isEnabled() ) {
1548 if ( $this->mOptions->getTidy() ) {
1549 $text = MWTidy::tidy( $text );
1550 }
1551 } else {
1552 # attempt to sanitize at least some nesting problems
1553 # (T4702 and quite a few others)
1554 # This code path is buggy and deprecated!
1555 wfDeprecated( 'disabling tidy', '1.33' );
1556 $tidyregs = [
1557 # ''Something [http://www.cool.com cool''] -->
1558 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1559 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1560 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1561 # fix up an anchor inside another anchor, only
1562 # at least for a single single nested link (T5695)
1563 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1564 '\\1\\2</a>\\3</a>\\1\\4</a>',
1565 # fix div inside inline elements- doBlockLevels won't wrap a line which
1566 # contains a div, so fix it up here; replace
1567 # div with escaped text
1568 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1569 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1570 # remove empty italic or bold tag pairs, some
1571 # introduced by rules above
1572 '/<([bi])><\/\\1>/' => '',
1573 ];
1574
1575 $text = preg_replace(
1576 array_keys( $tidyregs ),
1577 array_values( $tidyregs ),
1578 $text );
1579 }
1580
1581 if ( $isMain ) {
1582 Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1583 }
1584
1585 return $text;
1586 }
1587
1588 /**
1589 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1590 * magic external links.
1591 *
1592 * DML
1593 * @private
1594 *
1595 * @param string $text
1596 *
1597 * @return string
1598 */
1599 public function doMagicLinks( $text ) {
1600 $prots = wfUrlProtocolsWithoutProtRel();
1601 $urlChar = self::EXT_LINK_URL_CLASS;
1602 $addr = self::EXT_LINK_ADDR;
1603 $space = self::SPACE_NOT_NL; # non-newline space
1604 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1605 $spaces = "$space++"; # possessive match of 1 or more spaces
1606 $text = preg_replace_callback(
1607 '!(?: # Start cases
1608 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1609 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1610 (\b # m[3]: Free external links
1611 (?i:$prots)
1612 ($addr$urlChar*) # m[4]: Post-protocol path
1613 ) |
1614 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1615 ([0-9]+)\b |
1616 \bISBN $spaces ( # m[6]: ISBN, capture number
1617 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1618 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1619 [0-9Xx] # check digit
1620 )\b
1621 )!xu", [ $this, 'magicLinkCallback' ], $text );
1622 return $text;
1623 }
1624
1625 /**
1626 * @throws MWException
1627 * @param array $m
1628 * @return string HTML
1629 */
1630 public function magicLinkCallback( $m ) {
1631 if ( isset( $m[1] ) && $m[1] !== '' ) {
1632 # Skip anchor
1633 return $m[0];
1634 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1635 # Skip HTML element
1636 return $m[0];
1637 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1638 # Free external link
1639 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1640 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1641 # RFC or PMID
1642 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1643 if ( !$this->mOptions->getMagicRFCLinks() ) {
1644 return $m[0];
1645 }
1646 $keyword = 'RFC';
1647 $urlmsg = 'rfcurl';
1648 $cssClass = 'mw-magiclink-rfc';
1649 $trackingCat = 'magiclink-tracking-rfc';
1650 $id = $m[5];
1651 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1652 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1653 return $m[0];
1654 }
1655 $keyword = 'PMID';
1656 $urlmsg = 'pubmedurl';
1657 $cssClass = 'mw-magiclink-pmid';
1658 $trackingCat = 'magiclink-tracking-pmid';
1659 $id = $m[5];
1660 } else {
1661 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1662 substr( $m[0], 0, 20 ) . '"' );
1663 }
1664 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1665 $this->addTrackingCategory( $trackingCat );
1666 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1667 } elseif ( isset( $m[6] ) && $m[6] !== ''
1668 && $this->mOptions->getMagicISBNLinks()
1669 ) {
1670 # ISBN
1671 $isbn = $m[6];
1672 $space = self::SPACE_NOT_NL; # non-newline space
1673 $isbn = preg_replace( "/$space/", ' ', $isbn );
1674 $num = strtr( $isbn, [
1675 '-' => '',
1676 ' ' => '',
1677 'x' => 'X',
1678 ] );
1679 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1680 return $this->getLinkRenderer()->makeKnownLink(
1681 SpecialPage::getTitleFor( 'Booksources', $num ),
1682 "ISBN $isbn",
1683 [
1684 'class' => 'internal mw-magiclink-isbn',
1685 'title' => false // suppress title attribute
1686 ]
1687 );
1688 } else {
1689 return $m[0];
1690 }
1691 }
1692
1693 /**
1694 * Make a free external link, given a user-supplied URL
1695 *
1696 * @param string $url
1697 * @param int $numPostProto
1698 * The number of characters after the protocol.
1699 * @return string HTML
1700 * @private
1701 */
1702 public function makeFreeExternalLink( $url, $numPostProto ) {
1703 $trail = '';
1704
1705 # The characters '<' and '>' (which were escaped by
1706 # removeHTMLtags()) should not be included in
1707 # URLs, per RFC 2396.
1708 # Make &nbsp; terminate a URL as well (bug T84937)
1709 $m2 = [];
1710 if ( preg_match(
1711 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1712 $url,
1713 $m2,
1714 PREG_OFFSET_CAPTURE
1715 ) ) {
1716 $trail = substr( $url, $m2[0][1] ) . $trail;
1717 $url = substr( $url, 0, $m2[0][1] );
1718 }
1719
1720 # Move trailing punctuation to $trail
1721 $sep = ',;\.:!?';
1722 # If there is no left bracket, then consider right brackets fair game too
1723 if ( strpos( $url, '(' ) === false ) {
1724 $sep .= ')';
1725 }
1726
1727 $urlRev = strrev( $url );
1728 $numSepChars = strspn( $urlRev, $sep );
1729 # Don't break a trailing HTML entity by moving the ; into $trail
1730 # This is in hot code, so use substr_compare to avoid having to
1731 # create a new string object for the comparison
1732 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1733 # more optimization: instead of running preg_match with a $
1734 # anchor, which can be slow, do the match on the reversed
1735 # string starting at the desired offset.
1736 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1737 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1738 $numSepChars--;
1739 }
1740 }
1741 if ( $numSepChars ) {
1742 $trail = substr( $url, -$numSepChars ) . $trail;
1743 $url = substr( $url, 0, -$numSepChars );
1744 }
1745
1746 # Verify that we still have a real URL after trail removal, and
1747 # not just lone protocol
1748 if ( strlen( $trail ) >= $numPostProto ) {
1749 return $url . $trail;
1750 }
1751
1752 $url = Sanitizer::cleanUrl( $url );
1753
1754 # Is this an external image?
1755 $text = $this->maybeMakeExternalImage( $url );
1756 if ( $text === false ) {
1757 # Not an image, make a link
1758 $text = Linker::makeExternalLink( $url,
1759 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1760 true, 'free',
1761 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1762 # Register it in the output object...
1763 $this->mOutput->addExternalLink( $url );
1764 }
1765 return $text . $trail;
1766 }
1767
1768 /**
1769 * Parse headers and return html
1770 *
1771 * @private
1772 *
1773 * @param string $text
1774 *
1775 * @return string
1776 */
1777 public function doHeadings( $text ) {
1778 for ( $i = 6; $i >= 1; --$i ) {
1779 $h = str_repeat( '=', $i );
1780 // Trim non-newline whitespace from headings
1781 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1782 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1783 }
1784 return $text;
1785 }
1786
1787 /**
1788 * Replace single quotes with HTML markup
1789 * @private
1790 *
1791 * @param string $text
1792 *
1793 * @return string The altered text
1794 */
1795 public function doAllQuotes( $text ) {
1796 $outtext = '';
1797 $lines = StringUtils::explode( "\n", $text );
1798 foreach ( $lines as $line ) {
1799 $outtext .= $this->doQuotes( $line ) . "\n";
1800 }
1801 $outtext = substr( $outtext, 0, -1 );
1802 return $outtext;
1803 }
1804
1805 /**
1806 * Helper function for doAllQuotes()
1807 *
1808 * @param string $text
1809 *
1810 * @return string
1811 */
1812 public function doQuotes( $text ) {
1813 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1814 $countarr = count( $arr );
1815 if ( $countarr == 1 ) {
1816 return $text;
1817 }
1818
1819 // First, do some preliminary work. This may shift some apostrophes from
1820 // being mark-up to being text. It also counts the number of occurrences
1821 // of bold and italics mark-ups.
1822 $numbold = 0;
1823 $numitalics = 0;
1824 for ( $i = 1; $i < $countarr; $i += 2 ) {
1825 $thislen = strlen( $arr[$i] );
1826 // If there are ever four apostrophes, assume the first is supposed to
1827 // be text, and the remaining three constitute mark-up for bold text.
1828 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1829 if ( $thislen == 4 ) {
1830 $arr[$i - 1] .= "'";
1831 $arr[$i] = "'''";
1832 $thislen = 3;
1833 } elseif ( $thislen > 5 ) {
1834 // If there are more than 5 apostrophes in a row, assume they're all
1835 // text except for the last 5.
1836 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1837 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1838 $arr[$i] = "'''''";
1839 $thislen = 5;
1840 }
1841 // Count the number of occurrences of bold and italics mark-ups.
1842 if ( $thislen == 2 ) {
1843 $numitalics++;
1844 } elseif ( $thislen == 3 ) {
1845 $numbold++;
1846 } elseif ( $thislen == 5 ) {
1847 $numitalics++;
1848 $numbold++;
1849 }
1850 }
1851
1852 // If there is an odd number of both bold and italics, it is likely
1853 // that one of the bold ones was meant to be an apostrophe followed
1854 // by italics. Which one we cannot know for certain, but it is more
1855 // likely to be one that has a single-letter word before it.
1856 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1857 $firstsingleletterword = -1;
1858 $firstmultiletterword = -1;
1859 $firstspace = -1;
1860 for ( $i = 1; $i < $countarr; $i += 2 ) {
1861 if ( strlen( $arr[$i] ) == 3 ) {
1862 $x1 = substr( $arr[$i - 1], -1 );
1863 $x2 = substr( $arr[$i - 1], -2, 1 );
1864 if ( $x1 === ' ' ) {
1865 if ( $firstspace == -1 ) {
1866 $firstspace = $i;
1867 }
1868 } elseif ( $x2 === ' ' ) {
1869 $firstsingleletterword = $i;
1870 // if $firstsingleletterword is set, we don't
1871 // look at the other options, so we can bail early.
1872 break;
1873 } elseif ( $firstmultiletterword == -1 ) {
1874 $firstmultiletterword = $i;
1875 }
1876 }
1877 }
1878
1879 // If there is a single-letter word, use it!
1880 if ( $firstsingleletterword > -1 ) {
1881 $arr[$firstsingleletterword] = "''";
1882 $arr[$firstsingleletterword - 1] .= "'";
1883 } elseif ( $firstmultiletterword > -1 ) {
1884 // If not, but there's a multi-letter word, use that one.
1885 $arr[$firstmultiletterword] = "''";
1886 $arr[$firstmultiletterword - 1] .= "'";
1887 } elseif ( $firstspace > -1 ) {
1888 // ... otherwise use the first one that has neither.
1889 // (notice that it is possible for all three to be -1 if, for example,
1890 // there is only one pentuple-apostrophe in the line)
1891 $arr[$firstspace] = "''";
1892 $arr[$firstspace - 1] .= "'";
1893 }
1894 }
1895
1896 // Now let's actually convert our apostrophic mush to HTML!
1897 $output = '';
1898 $buffer = '';
1899 $state = '';
1900 $i = 0;
1901 foreach ( $arr as $r ) {
1902 if ( ( $i % 2 ) == 0 ) {
1903 if ( $state === 'both' ) {
1904 $buffer .= $r;
1905 } else {
1906 $output .= $r;
1907 }
1908 } else {
1909 $thislen = strlen( $r );
1910 if ( $thislen == 2 ) {
1911 if ( $state === 'i' ) {
1912 $output .= '</i>';
1913 $state = '';
1914 } elseif ( $state === 'bi' ) {
1915 $output .= '</i>';
1916 $state = 'b';
1917 } elseif ( $state === 'ib' ) {
1918 $output .= '</b></i><b>';
1919 $state = 'b';
1920 } elseif ( $state === 'both' ) {
1921 $output .= '<b><i>' . $buffer . '</i>';
1922 $state = 'b';
1923 } else { // $state can be 'b' or ''
1924 $output .= '<i>';
1925 $state .= 'i';
1926 }
1927 } elseif ( $thislen == 3 ) {
1928 if ( $state === 'b' ) {
1929 $output .= '</b>';
1930 $state = '';
1931 } elseif ( $state === 'bi' ) {
1932 $output .= '</i></b><i>';
1933 $state = 'i';
1934 } elseif ( $state === 'ib' ) {
1935 $output .= '</b>';
1936 $state = 'i';
1937 } elseif ( $state === 'both' ) {
1938 $output .= '<i><b>' . $buffer . '</b>';
1939 $state = 'i';
1940 } else { // $state can be 'i' or ''
1941 $output .= '<b>';
1942 $state .= 'b';
1943 }
1944 } elseif ( $thislen == 5 ) {
1945 if ( $state === 'b' ) {
1946 $output .= '</b><i>';
1947 $state = 'i';
1948 } elseif ( $state === 'i' ) {
1949 $output .= '</i><b>';
1950 $state = 'b';
1951 } elseif ( $state === 'bi' ) {
1952 $output .= '</i></b>';
1953 $state = '';
1954 } elseif ( $state === 'ib' ) {
1955 $output .= '</b></i>';
1956 $state = '';
1957 } elseif ( $state === 'both' ) {
1958 $output .= '<i><b>' . $buffer . '</b></i>';
1959 $state = '';
1960 } else { // ($state == '')
1961 $buffer = '';
1962 $state = 'both';
1963 }
1964 }
1965 }
1966 $i++;
1967 }
1968 // Now close all remaining tags. Notice that the order is important.
1969 if ( $state === 'b' || $state === 'ib' ) {
1970 $output .= '</b>';
1971 }
1972 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1973 $output .= '</i>';
1974 }
1975 if ( $state === 'bi' ) {
1976 $output .= '</b>';
1977 }
1978 // There might be lonely ''''', so make sure we have a buffer
1979 if ( $state === 'both' && $buffer ) {
1980 $output .= '<b><i>' . $buffer . '</i></b>';
1981 }
1982 return $output;
1983 }
1984
1985 /**
1986 * Replace external links (REL)
1987 *
1988 * Note: this is all very hackish and the order of execution matters a lot.
1989 * Make sure to run tests/parser/parserTests.php if you change this code.
1990 *
1991 * @private
1992 *
1993 * @param string $text
1994 *
1995 * @throws MWException
1996 * @return string
1997 */
1998 public function replaceExternalLinks( $text ) {
1999 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2000 if ( $bits === false ) {
2001 throw new MWException( "PCRE needs to be compiled with "
2002 . "--enable-unicode-properties in order for MediaWiki to function" );
2003 }
2004 $s = array_shift( $bits );
2005
2006 $i = 0;
2007 while ( $i < count( $bits ) ) {
2008 $url = $bits[$i++];
2009 $i++; // protocol
2010 $text = $bits[$i++];
2011 $trail = $bits[$i++];
2012
2013 # The characters '<' and '>' (which were escaped by
2014 # removeHTMLtags()) should not be included in
2015 # URLs, per RFC 2396.
2016 $m2 = [];
2017 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2018 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2019 $url = substr( $url, 0, $m2[0][1] );
2020 }
2021
2022 # If the link text is an image URL, replace it with an <img> tag
2023 # This happened by accident in the original parser, but some people used it extensively
2024 $img = $this->maybeMakeExternalImage( $text );
2025 if ( $img !== false ) {
2026 $text = $img;
2027 }
2028
2029 $dtrail = '';
2030
2031 # Set linktype for CSS
2032 $linktype = 'text';
2033
2034 # No link text, e.g. [http://domain.tld/some.link]
2035 if ( $text == '' ) {
2036 # Autonumber
2037 $langObj = $this->getTargetLanguage();
2038 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2039 $linktype = 'autonumber';
2040 } else {
2041 # Have link text, e.g. [http://domain.tld/some.link text]s
2042 # Check for trail
2043 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2044 }
2045
2046 // Excluding protocol-relative URLs may avoid many false positives.
2047 if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2048 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2049 }
2050
2051 $url = Sanitizer::cleanUrl( $url );
2052
2053 # Use the encoded URL
2054 # This means that users can paste URLs directly into the text
2055 # Funny characters like ö aren't valid in URLs anyway
2056 # This was changed in August 2004
2057 $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2058 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2059
2060 # Register link in the output object.
2061 $this->mOutput->addExternalLink( $url );
2062 }
2063
2064 return $s;
2065 }
2066
2067 /**
2068 * Get the rel attribute for a particular external link.
2069 *
2070 * @since 1.21
2071 * @param string|bool $url Optional URL, to extract the domain from for rel =>
2072 * nofollow if appropriate
2073 * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2074 * @return string|null Rel attribute for $url
2075 */
2076 public static function getExternalLinkRel( $url = false, $title = null ) {
2077 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2078 $ns = $title ? $title->getNamespace() : false;
2079 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2080 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2081 ) {
2082 return 'nofollow';
2083 }
2084 return null;
2085 }
2086
2087 /**
2088 * Get an associative array of additional HTML attributes appropriate for a
2089 * particular external link. This currently may include rel => nofollow
2090 * (depending on configuration, namespace, and the URL's domain) and/or a
2091 * target attribute (depending on configuration).
2092 *
2093 * @param string $url URL to extract the domain from for rel =>
2094 * nofollow if appropriate
2095 * @return array Associative array of HTML attributes
2096 */
2097 public function getExternalLinkAttribs( $url ) {
2098 $attribs = [];
2099 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2100
2101 $target = $this->mOptions->getExternalLinkTarget();
2102 if ( $target ) {
2103 $attribs['target'] = $target;
2104 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2105 // T133507. New windows can navigate parent cross-origin.
2106 // Including noreferrer due to lacking browser
2107 // support of noopener. Eventually noreferrer should be removed.
2108 if ( $rel !== '' ) {
2109 $rel .= ' ';
2110 }
2111 $rel .= 'noreferrer noopener';
2112 }
2113 }
2114 $attribs['rel'] = $rel;
2115 return $attribs;
2116 }
2117
2118 /**
2119 * Replace unusual escape codes in a URL with their equivalent characters
2120 *
2121 * This generally follows the syntax defined in RFC 3986, with special
2122 * consideration for HTTP query strings.
2123 *
2124 * @param string $url
2125 * @return string
2126 */
2127 public static function normalizeLinkUrl( $url ) {
2128 # Test for RFC 3986 IPv6 syntax
2129 $scheme = '[a-z][a-z0-9+.-]*:';
2130 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2131 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2132 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2133 IP::isValid( rawurldecode( $m[1] ) )
2134 ) {
2135 $isIPv6 = rawurldecode( $m[1] );
2136 } else {
2137 $isIPv6 = false;
2138 }
2139
2140 # Make sure unsafe characters are encoded
2141 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2142 function ( $m ) {
2143 return rawurlencode( $m[0] );
2144 },
2145 $url
2146 );
2147
2148 $ret = '';
2149 $end = strlen( $url );
2150
2151 # Fragment part - 'fragment'
2152 $start = strpos( $url, '#' );
2153 if ( $start !== false && $start < $end ) {
2154 $ret = self::normalizeUrlComponent(
2155 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2156 $end = $start;
2157 }
2158
2159 # Query part - 'query' minus &=+;
2160 $start = strpos( $url, '?' );
2161 if ( $start !== false && $start < $end ) {
2162 $ret = self::normalizeUrlComponent(
2163 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2164 $end = $start;
2165 }
2166
2167 # Scheme and path part - 'pchar'
2168 # (we assume no userinfo or encoded colons in the host)
2169 $ret = self::normalizeUrlComponent(
2170 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2171
2172 # Fix IPv6 syntax
2173 if ( $isIPv6 !== false ) {
2174 $ipv6Host = "%5B({$isIPv6})%5D";
2175 $ret = preg_replace(
2176 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2177 "$1[$2]",
2178 $ret
2179 );
2180 }
2181
2182 return $ret;
2183 }
2184
2185 private static function normalizeUrlComponent( $component, $unsafe ) {
2186 $callback = function ( $matches ) use ( $unsafe ) {
2187 $char = urldecode( $matches[0] );
2188 $ord = ord( $char );
2189 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2190 # Unescape it
2191 return $char;
2192 } else {
2193 # Leave it escaped, but use uppercase for a-f
2194 return strtoupper( $matches[0] );
2195 }
2196 };
2197 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2198 }
2199
2200 /**
2201 * make an image if it's allowed, either through the global
2202 * option, through the exception, or through the on-wiki whitelist
2203 *
2204 * @param string $url
2205 *
2206 * @return string
2207 */
2208 private function maybeMakeExternalImage( $url ) {
2209 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2210 $imagesexception = !empty( $imagesfrom );
2211 $text = false;
2212 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2213 if ( $imagesexception && is_array( $imagesfrom ) ) {
2214 $imagematch = false;
2215 foreach ( $imagesfrom as $match ) {
2216 if ( strpos( $url, $match ) === 0 ) {
2217 $imagematch = true;
2218 break;
2219 }
2220 }
2221 } elseif ( $imagesexception ) {
2222 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2223 } else {
2224 $imagematch = false;
2225 }
2226
2227 if ( $this->mOptions->getAllowExternalImages()
2228 || ( $imagesexception && $imagematch )
2229 ) {
2230 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2231 # Image found
2232 $text = Linker::makeExternalImage( $url );
2233 }
2234 }
2235 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2236 && preg_match( self::EXT_IMAGE_REGEX, $url )
2237 ) {
2238 $whitelist = explode(
2239 "\n",
2240 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2241 );
2242
2243 foreach ( $whitelist as $entry ) {
2244 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2245 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2246 continue;
2247 }
2248 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2249 # Image matches a whitelist entry
2250 $text = Linker::makeExternalImage( $url );
2251 break;
2252 }
2253 }
2254 }
2255 return $text;
2256 }
2257
2258 /**
2259 * Process [[ ]] wikilinks
2260 *
2261 * @param string $s
2262 *
2263 * @return string Processed text
2264 *
2265 * @private
2266 */
2267 public function replaceInternalLinks( $s ) {
2268 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2269 return $s;
2270 }
2271
2272 /**
2273 * Process [[ ]] wikilinks (RIL)
2274 * @param string &$s
2275 * @throws MWException
2276 * @return LinkHolderArray
2277 *
2278 * @private
2279 */
2280 public function replaceInternalLinks2( &$s ) {
2281 static $tc = false, $e1, $e1_img;
2282 # the % is needed to support urlencoded titles as well
2283 if ( !$tc ) {
2284 $tc = Title::legalChars() . '#%';
2285 # Match a link having the form [[namespace:link|alternate]]trail
2286 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2287 # Match cases where there is no "]]", which might still be images
2288 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2289 }
2290
2291 $holders = new LinkHolderArray( $this );
2292
2293 # split the entire text string on occurrences of [[
2294 $a = StringUtils::explode( '[[', ' ' . $s );
2295 # get the first element (all text up to first [[), and remove the space we added
2296 $s = $a->current();
2297 $a->next();
2298 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2299 $s = substr( $s, 1 );
2300
2301 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2302 $e2 = null;
2303 if ( $useLinkPrefixExtension ) {
2304 # Match the end of a line for a word that's not followed by whitespace,
2305 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2306 $charset = $this->contLang->linkPrefixCharset();
2307 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2308 }
2309
2310 if ( is_null( $this->mTitle ) ) {
2311 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2312 }
2313 $nottalk = !$this->mTitle->isTalkPage();
2314
2315 if ( $useLinkPrefixExtension ) {
2316 $m = [];
2317 if ( preg_match( $e2, $s, $m ) ) {
2318 $first_prefix = $m[2];
2319 } else {
2320 $first_prefix = false;
2321 }
2322 } else {
2323 $prefix = '';
2324 }
2325
2326 $useSubpages = $this->areSubpagesAllowed();
2327
2328 # Loop for each link
2329 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2330 # Check for excessive memory usage
2331 if ( $holders->isBig() ) {
2332 # Too big
2333 # Do the existence check, replace the link holders and clear the array
2334 $holders->replace( $s );
2335 $holders->clear();
2336 }
2337
2338 if ( $useLinkPrefixExtension ) {
2339 if ( preg_match( $e2, $s, $m ) ) {
2340 list( , $s, $prefix ) = $m;
2341 } else {
2342 $prefix = '';
2343 }
2344 # first link
2345 if ( $first_prefix ) {
2346 $prefix = $first_prefix;
2347 $first_prefix = false;
2348 }
2349 }
2350
2351 $might_be_img = false;
2352
2353 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2354 $text = $m[2];
2355 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2356 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2357 # the real problem is with the $e1 regex
2358 # See T1500.
2359 # Still some problems for cases where the ] is meant to be outside punctuation,
2360 # and no image is in sight. See T4095.
2361 if ( $text !== ''
2362 && substr( $m[3], 0, 1 ) === ']'
2363 && strpos( $text, '[' ) !== false
2364 ) {
2365 $text .= ']'; # so that replaceExternalLinks($text) works later
2366 $m[3] = substr( $m[3], 1 );
2367 }
2368 # fix up urlencoded title texts
2369 if ( strpos( $m[1], '%' ) !== false ) {
2370 # Should anchors '#' also be rejected?
2371 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2372 }
2373 $trail = $m[3];
2374 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2375 # Invalid, but might be an image with a link in its caption
2376 $might_be_img = true;
2377 $text = $m[2];
2378 if ( strpos( $m[1], '%' ) !== false ) {
2379 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2380 }
2381 $trail = "";
2382 } else { # Invalid form; output directly
2383 $s .= $prefix . '[[' . $line;
2384 continue;
2385 }
2386
2387 $origLink = ltrim( $m[1], ' ' );
2388
2389 # Don't allow internal links to pages containing
2390 # PROTO: where PROTO is a valid URL protocol; these
2391 # should be external links.
2392 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2393 $s .= $prefix . '[[' . $line;
2394 continue;
2395 }
2396
2397 # Make subpage if necessary
2398 if ( $useSubpages ) {
2399 $link = $this->maybeDoSubpageLink( $origLink, $text );
2400 } else {
2401 $link = $origLink;
2402 }
2403
2404 // \x7f isn't a default legal title char, so most likely strip
2405 // markers will force us into the "invalid form" path above. But,
2406 // just in case, let's assert that xmlish tags aren't valid in
2407 // the title position.
2408 $unstrip = $this->mStripState->killMarkers( $link );
2409 $noMarkers = ( $unstrip === $link );
2410
2411 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2412 if ( $nt === null ) {
2413 $s .= $prefix . '[[' . $line;
2414 continue;
2415 }
2416
2417 $ns = $nt->getNamespace();
2418 $iw = $nt->getInterwiki();
2419
2420 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2421
2422 if ( $might_be_img ) { # if this is actually an invalid link
2423 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2424 $found = false;
2425 while ( true ) {
2426 # look at the next 'line' to see if we can close it there
2427 $a->next();
2428 $next_line = $a->current();
2429 if ( $next_line === false || $next_line === null ) {
2430 break;
2431 }
2432 $m = explode( ']]', $next_line, 3 );
2433 if ( count( $m ) == 3 ) {
2434 # the first ]] closes the inner link, the second the image
2435 $found = true;
2436 $text .= "[[{$m[0]}]]{$m[1]}";
2437 $trail = $m[2];
2438 break;
2439 } elseif ( count( $m ) == 2 ) {
2440 # if there's exactly one ]] that's fine, we'll keep looking
2441 $text .= "[[{$m[0]}]]{$m[1]}";
2442 } else {
2443 # if $next_line is invalid too, we need look no further
2444 $text .= '[[' . $next_line;
2445 break;
2446 }
2447 }
2448 if ( !$found ) {
2449 # we couldn't find the end of this imageLink, so output it raw
2450 # but don't ignore what might be perfectly normal links in the text we've examined
2451 $holders->merge( $this->replaceInternalLinks2( $text ) );
2452 $s .= "{$prefix}[[$link|$text";
2453 # note: no $trail, because without an end, there *is* no trail
2454 continue;
2455 }
2456 } else { # it's not an image, so output it raw
2457 $s .= "{$prefix}[[$link|$text";
2458 # note: no $trail, because without an end, there *is* no trail
2459 continue;
2460 }
2461 }
2462
2463 $wasblank = ( $text == '' );
2464 if ( $wasblank ) {
2465 $text = $link;
2466 if ( !$noforce ) {
2467 # Strip off leading ':'
2468 $text = substr( $text, 1 );
2469 }
2470 } else {
2471 # T6598 madness. Handle the quotes only if they come from the alternate part
2472 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2473 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2474 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2475 $text = $this->doQuotes( $text );
2476 }
2477
2478 # Link not escaped by : , create the various objects
2479 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2480 # Interwikis
2481 if (
2482 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2483 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2484 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2485 )
2486 ) {
2487 # T26502: filter duplicates
2488 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2489 $this->mLangLinkLanguages[$iw] = true;
2490 $this->mOutput->addLanguageLink( $nt->getFullText() );
2491 }
2492
2493 /**
2494 * Strip the whitespace interwiki links produce, see T10897
2495 */
2496 $s = rtrim( $s . $prefix ) . $trail; # T175416
2497 continue;
2498 }
2499
2500 if ( $ns == NS_FILE ) {
2501 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2502 if ( $wasblank ) {
2503 # if no parameters were passed, $text
2504 # becomes something like "File:Foo.png",
2505 # which we don't want to pass on to the
2506 # image generator
2507 $text = '';
2508 } else {
2509 # recursively parse links inside the image caption
2510 # actually, this will parse them in any other parameters, too,
2511 # but it might be hard to fix that, and it doesn't matter ATM
2512 $text = $this->replaceExternalLinks( $text );
2513 $holders->merge( $this->replaceInternalLinks2( $text ) );
2514 }
2515 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2516 $s .= $prefix . $this->armorLinks(
2517 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2518 continue;
2519 }
2520 } elseif ( $ns == NS_CATEGORY ) {
2521 /**
2522 * Strip the whitespace Category links produce, see T2087
2523 */
2524 $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2525
2526 if ( $wasblank ) {
2527 $sortkey = $this->getDefaultSort();
2528 } else {
2529 $sortkey = $text;
2530 }
2531 $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2532 $sortkey = str_replace( "\n", '', $sortkey );
2533 $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2534 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2535
2536 continue;
2537 }
2538 }
2539
2540 # Self-link checking. For some languages, variants of the title are checked in
2541 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2542 # for linking to a different variant.
2543 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2544 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2545 continue;
2546 }
2547
2548 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2549 # @todo FIXME: Should do batch file existence checks, see comment below
2550 if ( $ns == NS_MEDIA ) {
2551 # Give extensions a chance to select the file revision for us
2552 $options = [];
2553 $descQuery = false;
2554 Hooks::run( 'BeforeParserFetchFileAndTitle',
2555 [ $this, $nt, &$options, &$descQuery ] );
2556 # Fetch and register the file (file title may be different via hooks)
2557 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2558 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2559 $s .= $prefix . $this->armorLinks(
2560 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2561 continue;
2562 }
2563
2564 # Some titles, such as valid special pages or files in foreign repos, should
2565 # be shown as bluelinks even though they're not included in the page table
2566 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2567 # batch file existence checks for NS_FILE and NS_MEDIA
2568 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2569 $this->mOutput->addLink( $nt );
2570 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2571 } else {
2572 # Links will be added to the output link list after checking
2573 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2574 }
2575 }
2576 return $holders;
2577 }
2578
2579 /**
2580 * Render a forced-blue link inline; protect against double expansion of
2581 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2582 * Since this little disaster has to split off the trail text to avoid
2583 * breaking URLs in the following text without breaking trails on the
2584 * wiki links, it's been made into a horrible function.
2585 *
2586 * @param Title $nt
2587 * @param string $text
2588 * @param string $trail
2589 * @param string $prefix
2590 * @return string HTML-wikitext mix oh yuck
2591 */
2592 protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2593 list( $inside, $trail ) = Linker::splitTrail( $trail );
2594
2595 if ( $text == '' ) {
2596 $text = htmlspecialchars( $nt->getPrefixedText() );
2597 }
2598
2599 $link = $this->getLinkRenderer()->makeKnownLink(
2600 $nt, new HtmlArmor( "$prefix$text$inside" )
2601 );
2602
2603 return $this->armorLinks( $link ) . $trail;
2604 }
2605
2606 /**
2607 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2608 * going to go through further parsing steps before inline URL expansion.
2609 *
2610 * Not needed quite as much as it used to be since free links are a bit
2611 * more sensible these days. But bracketed links are still an issue.
2612 *
2613 * @param string $text More-or-less HTML
2614 * @return string Less-or-more HTML with NOPARSE bits
2615 */
2616 public function armorLinks( $text ) {
2617 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2618 self::MARKER_PREFIX . "NOPARSE$1", $text );
2619 }
2620
2621 /**
2622 * Return true if subpage links should be expanded on this page.
2623 * @return bool
2624 */
2625 public function areSubpagesAllowed() {
2626 # Some namespaces don't allow subpages
2627 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2628 }
2629
2630 /**
2631 * Handle link to subpage if necessary
2632 *
2633 * @param string $target The source of the link
2634 * @param string &$text The link text, modified as necessary
2635 * @return string The full name of the link
2636 * @private
2637 */
2638 public function maybeDoSubpageLink( $target, &$text ) {
2639 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2640 }
2641
2642 /**
2643 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2644 *
2645 * @param string $text
2646 * @param bool $linestart Whether or not this is at the start of a line.
2647 * @private
2648 * @return string The lists rendered as HTML
2649 */
2650 public function doBlockLevels( $text, $linestart ) {
2651 return BlockLevelPass::doBlockLevels( $text, $linestart );
2652 }
2653
2654 /**
2655 * Return value of a magic variable (like PAGENAME)
2656 *
2657 * @private
2658 *
2659 * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2660 * @param bool|PPFrame $frame
2661 *
2662 * @throws MWException
2663 * @return string
2664 */
2665 public function getVariableValue( $index, $frame = false ) {
2666 if ( is_null( $this->mTitle ) ) {
2667 // If no title set, bad things are going to happen
2668 // later. Title should always be set since this
2669 // should only be called in the middle of a parse
2670 // operation (but the unit-tests do funky stuff)
2671 throw new MWException( __METHOD__ . ' Should only be '
2672 . ' called while parsing (no title set)' );
2673 }
2674
2675 // Avoid PHP 7.1 warning from passing $this by reference
2676 $parser = $this;
2677
2678 /**
2679 * Some of these require message or data lookups and can be
2680 * expensive to check many times.
2681 */
2682 if (
2683 Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2684 isset( $this->mVarCache[$index] )
2685 ) {
2686 return $this->mVarCache[$index];
2687 }
2688
2689 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2690 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2691
2692 $pageLang = $this->getFunctionLang();
2693
2694 switch ( $index ) {
2695 case '!':
2696 $value = '|';
2697 break;
2698 case 'currentmonth':
2699 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2700 break;
2701 case 'currentmonth1':
2702 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2703 break;
2704 case 'currentmonthname':
2705 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2706 break;
2707 case 'currentmonthnamegen':
2708 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2709 break;
2710 case 'currentmonthabbrev':
2711 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2712 break;
2713 case 'currentday':
2714 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2715 break;
2716 case 'currentday2':
2717 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2718 break;
2719 case 'localmonth':
2720 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2721 break;
2722 case 'localmonth1':
2723 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2724 break;
2725 case 'localmonthname':
2726 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2727 break;
2728 case 'localmonthnamegen':
2729 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2730 break;
2731 case 'localmonthabbrev':
2732 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2733 break;
2734 case 'localday':
2735 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2736 break;
2737 case 'localday2':
2738 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2739 break;
2740 case 'pagename':
2741 $value = wfEscapeWikiText( $this->mTitle->getText() );
2742 break;
2743 case 'pagenamee':
2744 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2745 break;
2746 case 'fullpagename':
2747 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2748 break;
2749 case 'fullpagenamee':
2750 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2751 break;
2752 case 'subpagename':
2753 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2754 break;
2755 case 'subpagenamee':
2756 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2757 break;
2758 case 'rootpagename':
2759 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2760 break;
2761 case 'rootpagenamee':
2762 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2763 ' ',
2764 '_',
2765 $this->mTitle->getRootText()
2766 ) ) );
2767 break;
2768 case 'basepagename':
2769 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2770 break;
2771 case 'basepagenamee':
2772 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2773 ' ',
2774 '_',
2775 $this->mTitle->getBaseText()
2776 ) ) );
2777 break;
2778 case 'talkpagename':
2779 if ( $this->mTitle->canHaveTalkPage() ) {
2780 $talkPage = $this->mTitle->getTalkPage();
2781 $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2782 } else {
2783 $value = '';
2784 }
2785 break;
2786 case 'talkpagenamee':
2787 if ( $this->mTitle->canHaveTalkPage() ) {
2788 $talkPage = $this->mTitle->getTalkPage();
2789 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2790 } else {
2791 $value = '';
2792 }
2793 break;
2794 case 'subjectpagename':
2795 $subjPage = $this->mTitle->getSubjectPage();
2796 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2797 break;
2798 case 'subjectpagenamee':
2799 $subjPage = $this->mTitle->getSubjectPage();
2800 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2801 break;
2802 case 'pageid': // requested in T25427
2803 # Inform the edit saving system that getting the canonical output
2804 # after page insertion requires a parse that used that exact page ID
2805 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2806 $value = $this->mTitle->getArticleID();
2807 if ( !$value ) {
2808 $value = $this->mOptions->getSpeculativePageId();
2809 if ( $value ) {
2810 $this->mOutput->setSpeculativePageIdUsed( $value );
2811 }
2812 }
2813 break;
2814 case 'revisionid':
2815 if (
2816 $this->svcOptions->get( 'MiserMode' ) &&
2817 !$this->mOptions->getInterfaceMessage() &&
2818 // @TODO: disallow this word on all namespaces
2819 $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2820 ) {
2821 // Use a stub result instead of the actual revision ID in order to avoid
2822 // double parses on page save but still allow preview detection (T137900)
2823 if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2824 $value = '-';
2825 } else {
2826 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2827 $value = '';
2828 }
2829 } else {
2830 # Inform the edit saving system that getting the canonical output after
2831 # revision insertion requires a parse that used that exact revision ID
2832 $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2833 $value = $this->getRevisionId();
2834 if ( $value === 0 ) {
2835 $rev = $this->getRevisionObject();
2836 $value = $rev ? $rev->getId() : $value;
2837 }
2838 if ( !$value ) {
2839 $value = $this->mOptions->getSpeculativeRevId();
2840 if ( $value ) {
2841 $this->mOutput->setSpeculativeRevIdUsed( $value );
2842 }
2843 }
2844 }
2845 break;
2846 case 'revisionday':
2847 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2848 break;
2849 case 'revisionday2':
2850 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2851 break;
2852 case 'revisionmonth':
2853 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2854 break;
2855 case 'revisionmonth1':
2856 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2857 break;
2858 case 'revisionyear':
2859 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2860 break;
2861 case 'revisiontimestamp':
2862 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2863 break;
2864 case 'revisionuser':
2865 # Inform the edit saving system that getting the canonical output after
2866 # revision insertion requires a parse that used the actual user ID
2867 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2868 $value = $this->getRevisionUser();
2869 break;
2870 case 'revisionsize':
2871 $value = $this->getRevisionSize();
2872 break;
2873 case 'namespace':
2874 $value = str_replace( '_', ' ',
2875 $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2876 break;
2877 case 'namespacee':
2878 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2879 break;
2880 case 'namespacenumber':
2881 $value = $this->mTitle->getNamespace();
2882 break;
2883 case 'talkspace':
2884 $value = $this->mTitle->canHaveTalkPage()
2885 ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2886 : '';
2887 break;
2888 case 'talkspacee':
2889 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2890 break;
2891 case 'subjectspace':
2892 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2893 break;
2894 case 'subjectspacee':
2895 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2896 break;
2897 case 'currentdayname':
2898 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2899 break;
2900 case 'currentyear':
2901 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2902 break;
2903 case 'currenttime':
2904 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2905 break;
2906 case 'currenthour':
2907 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2908 break;
2909 case 'currentweek':
2910 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2911 # int to remove the padding
2912 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2913 break;
2914 case 'currentdow':
2915 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2916 break;
2917 case 'localdayname':
2918 $value = $pageLang->getWeekdayName(
2919 (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2920 );
2921 break;
2922 case 'localyear':
2923 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2924 break;
2925 case 'localtime':
2926 $value = $pageLang->time(
2927 MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2928 false,
2929 false
2930 );
2931 break;
2932 case 'localhour':
2933 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2934 break;
2935 case 'localweek':
2936 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2937 # int to remove the padding
2938 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2939 break;
2940 case 'localdow':
2941 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2942 break;
2943 case 'numberofarticles':
2944 $value = $pageLang->formatNum( SiteStats::articles() );
2945 break;
2946 case 'numberoffiles':
2947 $value = $pageLang->formatNum( SiteStats::images() );
2948 break;
2949 case 'numberofusers':
2950 $value = $pageLang->formatNum( SiteStats::users() );
2951 break;
2952 case 'numberofactiveusers':
2953 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2954 break;
2955 case 'numberofpages':
2956 $value = $pageLang->formatNum( SiteStats::pages() );
2957 break;
2958 case 'numberofadmins':
2959 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2960 break;
2961 case 'numberofedits':
2962 $value = $pageLang->formatNum( SiteStats::edits() );
2963 break;
2964 case 'currenttimestamp':
2965 $value = wfTimestamp( TS_MW, $ts );
2966 break;
2967 case 'localtimestamp':
2968 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2969 break;
2970 case 'currentversion':
2971 $value = SpecialVersion::getVersion();
2972 break;
2973 case 'articlepath':
2974 return $this->svcOptions->get( 'ArticlePath' );
2975 case 'sitename':
2976 return $this->svcOptions->get( 'Sitename' );
2977 case 'server':
2978 return $this->svcOptions->get( 'Server' );
2979 case 'servername':
2980 return $this->svcOptions->get( 'ServerName' );
2981 case 'scriptpath':
2982 return $this->svcOptions->get( 'ScriptPath' );
2983 case 'stylepath':
2984 return $this->svcOptions->get( 'StylePath' );
2985 case 'directionmark':
2986 return $pageLang->getDirMark();
2987 case 'contentlanguage':
2988 return $this->svcOptions->get( 'LanguageCode' );
2989 case 'pagelanguage':
2990 $value = $pageLang->getCode();
2991 break;
2992 case 'cascadingsources':
2993 $value = CoreParserFunctions::cascadingsources( $this );
2994 break;
2995 default:
2996 $ret = null;
2997 Hooks::run(
2998 'ParserGetVariableValueSwitch',
2999 [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3000 );
3001
3002 return $ret;
3003 }
3004
3005 if ( $index ) {
3006 $this->mVarCache[$index] = $value;
3007 }
3008
3009 return $value;
3010 }
3011
3012 /**
3013 * @param int $start
3014 * @param int $len
3015 * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3016 * @param string $variable Parser variable name
3017 * @return string
3018 */
3019 private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3020 # Get the timezone-adjusted timestamp to be used for this revision
3021 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3022 # Possibly set vary-revision if there is not yet an associated revision
3023 if ( !$this->getRevisionObject() ) {
3024 # Get the timezone-adjusted timestamp $mtts seconds in the future.
3025 # This future is relative to the current time and not that of the
3026 # parser options. The rendered timestamp can be compared to that
3027 # of the timestamp specified by the parser options.
3028 $resThen = substr(
3029 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3030 $start,
3031 $len
3032 );
3033
3034 if ( $resNow !== $resThen ) {
3035 # Inform the edit saving system that getting the canonical output after
3036 # revision insertion requires a parse that used an actual revision timestamp
3037 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3038 }
3039 }
3040
3041 return $resNow;
3042 }
3043
3044 /**
3045 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3046 *
3047 * @private
3048 */
3049 public function initialiseVariables() {
3050 $variableIDs = $this->magicWordFactory->getVariableIDs();
3051 $substIDs = $this->magicWordFactory->getSubstIDs();
3052
3053 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3054 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3055 }
3056
3057 /**
3058 * Preprocess some wikitext and return the document tree.
3059 * This is the ghost of replace_variables().
3060 *
3061 * @param string $text The text to parse
3062 * @param int $flags Bitwise combination of:
3063 * - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3064 * included. Default is to assume a direct page view.
3065 *
3066 * The generated DOM tree must depend only on the input text and the flags.
3067 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3068 *
3069 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3070 * change in the DOM tree for a given text, must be passed through the section identifier
3071 * in the section edit link and thus back to extractSections().
3072 *
3073 * The output of this function is currently only cached in process memory, but a persistent
3074 * cache may be implemented at a later date which takes further advantage of these strict
3075 * dependency requirements.
3076 *
3077 * @return PPNode
3078 */
3079 public function preprocessToDom( $text, $flags = 0 ) {
3080 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3081 return $dom;
3082 }
3083
3084 /**
3085 * Return a three-element array: leading whitespace, string contents, trailing whitespace
3086 *
3087 * @param string $s
3088 *
3089 * @return array
3090 */
3091 public static function splitWhitespace( $s ) {
3092 $ltrimmed = ltrim( $s );
3093 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3094 $trimmed = rtrim( $ltrimmed );
3095 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3096 if ( $diff > 0 ) {
3097 $w2 = substr( $ltrimmed, -$diff );
3098 } else {
3099 $w2 = '';
3100 }
3101 return [ $w1, $trimmed, $w2 ];
3102 }
3103
3104 /**
3105 * Replace magic variables, templates, and template arguments
3106 * with the appropriate text. Templates are substituted recursively,
3107 * taking care to avoid infinite loops.
3108 *
3109 * Note that the substitution depends on value of $mOutputType:
3110 * self::OT_WIKI: only {{subst:}} templates
3111 * self::OT_PREPROCESS: templates but not extension tags
3112 * self::OT_HTML: all templates and extension tags
3113 *
3114 * @param string $text The text to transform
3115 * @param false|PPFrame|array $frame Object describing the arguments passed to the
3116 * template. Arguments may also be provided as an associative array, as
3117 * was the usual case before MW1.12. Providing arguments this way may be
3118 * useful for extensions wishing to perform variable replacement
3119 * explicitly.
3120 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3121 * double-brace expansion.
3122 * @return string
3123 */
3124 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3125 # Is there any text? Also, Prevent too big inclusions!
3126 $textSize = strlen( $text );
3127 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3128 return $text;
3129 }
3130
3131 if ( $frame === false ) {
3132 $frame = $this->getPreprocessor()->newFrame();
3133 } elseif ( !( $frame instanceof PPFrame ) ) {
3134 $this->logger->debug(
3135 __METHOD__ . " called using plain parameters instead of " .
3136 "a PPFrame instance. Creating custom frame."
3137 );
3138 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3139 }
3140
3141 $dom = $this->preprocessToDom( $text );
3142 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3143 $text = $frame->expand( $dom, $flags );
3144
3145 return $text;
3146 }
3147
3148 /**
3149 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3150 *
3151 * @param array $args
3152 *
3153 * @return array
3154 */
3155 public static function createAssocArgs( $args ) {
3156 $assocArgs = [];
3157 $index = 1;
3158 foreach ( $args as $arg ) {
3159 $eqpos = strpos( $arg, '=' );
3160 if ( $eqpos === false ) {
3161 $assocArgs[$index++] = $arg;
3162 } else {
3163 $name = trim( substr( $arg, 0, $eqpos ) );
3164 $value = trim( substr( $arg, $eqpos + 1 ) );
3165 if ( $value === false ) {
3166 $value = '';
3167 }
3168 if ( $name !== false ) {
3169 $assocArgs[$name] = $value;
3170 }
3171 }
3172 }
3173
3174 return $assocArgs;
3175 }
3176
3177 /**
3178 * Warn the user when a parser limitation is reached
3179 * Will warn at most once the user per limitation type
3180 *
3181 * The results are shown during preview and run through the Parser (See EditPage.php)
3182 *
3183 * @param string $limitationType Should be one of:
3184 * 'expensive-parserfunction' (corresponding messages:
3185 * 'expensive-parserfunction-warning',
3186 * 'expensive-parserfunction-category')
3187 * 'post-expand-template-argument' (corresponding messages:
3188 * 'post-expand-template-argument-warning',
3189 * 'post-expand-template-argument-category')
3190 * 'post-expand-template-inclusion' (corresponding messages:
3191 * 'post-expand-template-inclusion-warning',
3192 * 'post-expand-template-inclusion-category')
3193 * 'node-count-exceeded' (corresponding messages:
3194 * 'node-count-exceeded-warning',
3195 * 'node-count-exceeded-category')
3196 * 'expansion-depth-exceeded' (corresponding messages:
3197 * 'expansion-depth-exceeded-warning',
3198 * 'expansion-depth-exceeded-category')
3199 * @param string|int|null $current Current value
3200 * @param string|int|null $max Maximum allowed, when an explicit limit has been
3201 * exceeded, provide the values (optional)
3202 */
3203 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3204 # does no harm if $current and $max are present but are unnecessary for the message
3205 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3206 # only during preview, and that would split the parser cache unnecessarily.
3207 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3208 ->text();
3209 $this->mOutput->addWarning( $warning );
3210 $this->addTrackingCategory( "$limitationType-category" );
3211 }
3212
3213 /**
3214 * Return the text of a template, after recursively
3215 * replacing any variables or templates within the template.
3216 *
3217 * @param array $piece The parts of the template
3218 * $piece['title']: the title, i.e. the part before the |
3219 * $piece['parts']: the parameter array
3220 * $piece['lineStart']: whether the brace was at the start of a line
3221 * @param PPFrame $frame The current frame, contains template arguments
3222 * @throws Exception
3223 * @return string|array The text of the template
3224 */
3225 public function braceSubstitution( $piece, $frame ) {
3226 // Flags
3227
3228 // $text has been filled
3229 $found = false;
3230 // wiki markup in $text should be escaped
3231 $nowiki = false;
3232 // $text is HTML, armour it against wikitext transformation
3233 $isHTML = false;
3234 // Force interwiki transclusion to be done in raw mode not rendered
3235 $forceRawInterwiki = false;
3236 // $text is a DOM node needing expansion in a child frame
3237 $isChildObj = false;
3238 // $text is a DOM node needing expansion in the current frame
3239 $isLocalObj = false;
3240
3241 # Title object, where $text came from
3242 $title = false;
3243
3244 # $part1 is the bit before the first |, and must contain only title characters.
3245 # Various prefixes will be stripped from it later.
3246 $titleWithSpaces = $frame->expand( $piece['title'] );
3247 $part1 = trim( $titleWithSpaces );
3248 $titleText = false;
3249
3250 # Original title text preserved for various purposes
3251 $originalTitle = $part1;
3252
3253 # $args is a list of argument nodes, starting from index 0, not including $part1
3254 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3255 # below won't work b/c this $args isn't an object
3256 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3257
3258 $profileSection = null; // profile templates
3259
3260 # SUBST
3261 if ( !$found ) {
3262 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3263
3264 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3265 # Decide whether to expand template or keep wikitext as-is.
3266 if ( $this->ot['wiki'] ) {
3267 if ( $substMatch === false ) {
3268 $literal = true; # literal when in PST with no prefix
3269 } else {
3270 $literal = false; # expand when in PST with subst: or safesubst:
3271 }
3272 } else {
3273 if ( $substMatch == 'subst' ) {
3274 $literal = true; # literal when not in PST with plain subst:
3275 } else {
3276 $literal = false; # expand when not in PST with safesubst: or no prefix
3277 }
3278 }
3279 if ( $literal ) {
3280 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3281 $isLocalObj = true;
3282 $found = true;
3283 }
3284 }
3285
3286 # Variables
3287 if ( !$found && $args->getLength() == 0 ) {
3288 $id = $this->mVariables->matchStartToEnd( $part1 );
3289 if ( $id !== false ) {
3290 $text = $this->getVariableValue( $id, $frame );
3291 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3292 $this->mOutput->updateCacheExpiry(
3293 $this->magicWordFactory->getCacheTTL( $id ) );
3294 }
3295 $found = true;
3296 }
3297 }
3298
3299 # MSG, MSGNW and RAW
3300 if ( !$found ) {
3301 # Check for MSGNW:
3302 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3303 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3304 $nowiki = true;
3305 } else {
3306 # Remove obsolete MSG:
3307 $mwMsg = $this->magicWordFactory->get( 'msg' );
3308 $mwMsg->matchStartAndRemove( $part1 );
3309 }
3310
3311 # Check for RAW:
3312 $mwRaw = $this->magicWordFactory->get( 'raw' );
3313 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3314 $forceRawInterwiki = true;
3315 }
3316 }
3317
3318 # Parser functions
3319 if ( !$found ) {
3320 $colonPos = strpos( $part1, ':' );
3321 if ( $colonPos !== false ) {
3322 $func = substr( $part1, 0, $colonPos );
3323 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3324 $argsLength = $args->getLength();
3325 for ( $i = 0; $i < $argsLength; $i++ ) {
3326 $funcArgs[] = $args->item( $i );
3327 }
3328
3329 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3330
3331 // Extract any forwarded flags
3332 if ( isset( $result['title'] ) ) {
3333 $title = $result['title'];
3334 }
3335 if ( isset( $result['found'] ) ) {
3336 $found = $result['found'];
3337 }
3338 if ( array_key_exists( 'text', $result ) ) {
3339 // a string or null
3340 $text = $result['text'];
3341 }
3342 if ( isset( $result['nowiki'] ) ) {
3343 $nowiki = $result['nowiki'];
3344 }
3345 if ( isset( $result['isHTML'] ) ) {
3346 $isHTML = $result['isHTML'];
3347 }
3348 if ( isset( $result['forceRawInterwiki'] ) ) {
3349 $forceRawInterwiki = $result['forceRawInterwiki'];
3350 }
3351 if ( isset( $result['isChildObj'] ) ) {
3352 $isChildObj = $result['isChildObj'];
3353 }
3354 if ( isset( $result['isLocalObj'] ) ) {
3355 $isLocalObj = $result['isLocalObj'];
3356 }
3357 }
3358 }
3359
3360 # Finish mangling title and then check for loops.
3361 # Set $title to a Title object and $titleText to the PDBK
3362 if ( !$found ) {
3363 $ns = NS_TEMPLATE;
3364 # Split the title into page and subpage
3365 $subpage = '';
3366 $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3367 if ( $part1 !== $relative ) {
3368 $part1 = $relative;
3369 $ns = $this->mTitle->getNamespace();
3370 }
3371 $title = Title::newFromText( $part1, $ns );
3372 if ( $title ) {
3373 $titleText = $title->getPrefixedText();
3374 # Check for language variants if the template is not found
3375 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3376 $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3377 }
3378 # Do recursion depth check
3379 $limit = $this->mOptions->getMaxTemplateDepth();
3380 if ( $frame->depth >= $limit ) {
3381 $found = true;
3382 $text = '<span class="error">'
3383 . wfMessage( 'parser-template-recursion-depth-warning' )
3384 ->numParams( $limit )->inContentLanguage()->text()
3385 . '</span>';
3386 }
3387 }
3388 }
3389
3390 # Load from database
3391 if ( !$found && $title ) {
3392 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3393 if ( !$title->isExternal() ) {
3394 if ( $title->isSpecialPage()
3395 && $this->mOptions->getAllowSpecialInclusion()
3396 && $this->ot['html']
3397 ) {
3398 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3399 // Pass the template arguments as URL parameters.
3400 // "uselang" will have no effect since the Language object
3401 // is forced to the one defined in ParserOptions.
3402 $pageArgs = [];
3403 $argsLength = $args->getLength();
3404 for ( $i = 0; $i < $argsLength; $i++ ) {
3405 $bits = $args->item( $i )->splitArg();
3406 if ( strval( $bits['index'] ) === '' ) {
3407 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3408 $value = trim( $frame->expand( $bits['value'] ) );
3409 $pageArgs[$name] = $value;
3410 }
3411 }
3412
3413 // Create a new context to execute the special page
3414 $context = new RequestContext;
3415 $context->setTitle( $title );
3416 $context->setRequest( new FauxRequest( $pageArgs ) );
3417 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3418 $context->setUser( $this->getUser() );
3419 } else {
3420 // If this page is cached, then we better not be per user.
3421 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3422 }
3423 $context->setLanguage( $this->mOptions->getUserLangObj() );
3424 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3425 if ( $ret ) {
3426 $text = $context->getOutput()->getHTML();
3427 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3428 $found = true;
3429 $isHTML = true;
3430 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3431 $this->mOutput->updateRuntimeAdaptiveExpiry(
3432 $specialPage->maxIncludeCacheTime()
3433 );
3434 }
3435 }
3436 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3437 $found = false; # access denied
3438 $this->logger->debug(
3439 __METHOD__ .
3440 ": template inclusion denied for " . $title->getPrefixedDBkey()
3441 );
3442 } else {
3443 list( $text, $title ) = $this->getTemplateDom( $title );
3444 if ( $text !== false ) {
3445 $found = true;
3446 $isChildObj = true;
3447 }
3448 }
3449
3450 # If the title is valid but undisplayable, make a link to it
3451 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3452 $text = "[[:$titleText]]";
3453 $found = true;
3454 }
3455 } elseif ( $title->isTrans() ) {
3456 # Interwiki transclusion
3457 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3458 $text = $this->interwikiTransclude( $title, 'render' );
3459 $isHTML = true;
3460 } else {
3461 $text = $this->interwikiTransclude( $title, 'raw' );
3462 # Preprocess it like a template
3463 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3464 $isChildObj = true;
3465 }
3466 $found = true;
3467 }
3468
3469 # Do infinite loop check
3470 # This has to be done after redirect resolution to avoid infinite loops via redirects
3471 if ( !$frame->loopCheck( $title ) ) {
3472 $found = true;
3473 $text = '<span class="error">'
3474 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3475 . '</span>';
3476 $this->addTrackingCategory( 'template-loop-category' );
3477 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3478 wfEscapeWikiText( $titleText ) )->text() );
3479 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3480 }
3481 }
3482
3483 # If we haven't found text to substitute by now, we're done
3484 # Recover the source wikitext and return it
3485 if ( !$found ) {
3486 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3487 if ( $profileSection ) {
3488 $this->mProfiler->scopedProfileOut( $profileSection );
3489 }
3490 return [ 'object' => $text ];
3491 }
3492
3493 # Expand DOM-style return values in a child frame
3494 if ( $isChildObj ) {
3495 # Clean up argument array
3496 $newFrame = $frame->newChild( $args, $title );
3497
3498 if ( $nowiki ) {
3499 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3500 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3501 # Expansion is eligible for the empty-frame cache
3502 $text = $newFrame->cachedExpand( $titleText, $text );
3503 } else {
3504 # Uncached expansion
3505 $text = $newFrame->expand( $text );
3506 }
3507 }
3508 if ( $isLocalObj && $nowiki ) {
3509 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3510 $isLocalObj = false;
3511 }
3512
3513 if ( $profileSection ) {
3514 $this->mProfiler->scopedProfileOut( $profileSection );
3515 }
3516
3517 # Replace raw HTML by a placeholder
3518 if ( $isHTML ) {
3519 $text = $this->insertStripItem( $text );
3520 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3521 # Escape nowiki-style return values
3522 $text = wfEscapeWikiText( $text );
3523 } elseif ( is_string( $text )
3524 && !$piece['lineStart']
3525 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3526 ) {
3527 # T2529: if the template begins with a table or block-level
3528 # element, it should be treated as beginning a new line.
3529 # This behavior is somewhat controversial.
3530 $text = "\n" . $text;
3531 }
3532
3533 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3534 # Error, oversize inclusion
3535 if ( $titleText !== false ) {
3536 # Make a working, properly escaped link if possible (T25588)
3537 $text = "[[:$titleText]]";
3538 } else {
3539 # This will probably not be a working link, but at least it may
3540 # provide some hint of where the problem is
3541 preg_replace( '/^:/', '', $originalTitle );
3542 $text = "[[:$originalTitle]]";
3543 }
3544 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3545 . 'post-expand include size too large -->' );
3546 $this->limitationWarn( 'post-expand-template-inclusion' );
3547 }
3548
3549 if ( $isLocalObj ) {
3550 $ret = [ 'object' => $text ];
3551 } else {
3552 $ret = [ 'text' => $text ];
3553 }
3554
3555 return $ret;
3556 }
3557
3558 /**
3559 * Call a parser function and return an array with text and flags.
3560 *
3561 * The returned array will always contain a boolean 'found', indicating
3562 * whether the parser function was found or not. It may also contain the
3563 * following:
3564 * text: string|object, resulting wikitext or PP DOM object
3565 * isHTML: bool, $text is HTML, armour it against wikitext transformation
3566 * isChildObj: bool, $text is a DOM node needing expansion in a child frame
3567 * isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3568 * nowiki: bool, wiki markup in $text should be escaped
3569 *
3570 * @since 1.21
3571 * @param PPFrame $frame The current frame, contains template arguments
3572 * @param string $function Function name
3573 * @param array $args Arguments to the function
3574 * @throws MWException
3575 * @return array
3576 */
3577 public function callParserFunction( $frame, $function, array $args = [] ) {
3578 # Case sensitive functions
3579 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3580 $function = $this->mFunctionSynonyms[1][$function];
3581 } else {
3582 # Case insensitive functions
3583 $function = $this->contLang->lc( $function );
3584 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3585 $function = $this->mFunctionSynonyms[0][$function];
3586 } else {
3587 return [ 'found' => false ];
3588 }
3589 }
3590
3591 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3592
3593 // Avoid PHP 7.1 warning from passing $this by reference
3594 $parser = $this;
3595
3596 $allArgs = [ &$parser ];
3597 if ( $flags & self::SFH_OBJECT_ARGS ) {
3598 # Convert arguments to PPNodes and collect for appending to $allArgs
3599 $funcArgs = [];
3600 foreach ( $args as $k => $v ) {
3601 if ( $v instanceof PPNode || $k === 0 ) {
3602 $funcArgs[] = $v;
3603 } else {
3604 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3605 }
3606 }
3607
3608 # Add a frame parameter, and pass the arguments as an array
3609 $allArgs[] = $frame;
3610 $allArgs[] = $funcArgs;
3611 } else {
3612 # Convert arguments to plain text and append to $allArgs
3613 foreach ( $args as $k => $v ) {
3614 if ( $v instanceof PPNode ) {
3615 $allArgs[] = trim( $frame->expand( $v ) );
3616 } elseif ( is_int( $k ) && $k >= 0 ) {
3617 $allArgs[] = trim( $v );
3618 } else {
3619 $allArgs[] = trim( "$k=$v" );
3620 }
3621 }
3622 }
3623
3624 $result = $callback( ...$allArgs );
3625
3626 # The interface for function hooks allows them to return a wikitext
3627 # string or an array containing the string and any flags. This mungs
3628 # things around to match what this method should return.
3629 if ( !is_array( $result ) ) {
3630 $result = [
3631 'found' => true,
3632 'text' => $result,
3633 ];
3634 } else {
3635 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3636 $result['text'] = $result[0];
3637 }
3638 unset( $result[0] );
3639 $result += [
3640 'found' => true,
3641 ];
3642 }
3643
3644 $noparse = true;
3645 $preprocessFlags = 0;
3646 if ( isset( $result['noparse'] ) ) {
3647 $noparse = $result['noparse'];
3648 }
3649 if ( isset( $result['preprocessFlags'] ) ) {
3650 $preprocessFlags = $result['preprocessFlags'];
3651 }
3652
3653 if ( !$noparse ) {
3654 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3655 $result['isChildObj'] = true;
3656 }
3657
3658 return $result;
3659 }
3660
3661 /**
3662 * Get the semi-parsed DOM representation of a template with a given title,
3663 * and its redirect destination title. Cached.
3664 *
3665 * @param Title $title
3666 *
3667 * @return array
3668 */
3669 public function getTemplateDom( $title ) {
3670 $cacheTitle = $title;
3671 $titleText = $title->getPrefixedDBkey();
3672
3673 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3674 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3675 $title = Title::makeTitle( $ns, $dbk );
3676 $titleText = $title->getPrefixedDBkey();
3677 }
3678 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3679 return [ $this->mTplDomCache[$titleText], $title ];
3680 }
3681
3682 # Cache miss, go to the database
3683 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3684
3685 if ( $text === false ) {
3686 $this->mTplDomCache[$titleText] = false;
3687 return [ false, $title ];
3688 }
3689
3690 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3691 $this->mTplDomCache[$titleText] = $dom;
3692
3693 if ( !$title->equals( $cacheTitle ) ) {
3694 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3695 [ $title->getNamespace(), $title->getDBkey() ];
3696 }
3697
3698 return [ $dom, $title ];
3699 }
3700
3701 /**
3702 * Fetch the current revision of a given title. Note that the revision
3703 * (and even the title) may not exist in the database, so everything
3704 * contributing to the output of the parser should use this method
3705 * where possible, rather than getting the revisions themselves. This
3706 * method also caches its results, so using it benefits performance.
3707 *
3708 * @since 1.24
3709 * @param Title $title
3710 * @return Revision
3711 */
3712 public function fetchCurrentRevisionOfTitle( $title ) {
3713 $cacheKey = $title->getPrefixedDBkey();
3714 if ( !$this->currentRevisionCache ) {
3715 $this->currentRevisionCache = new MapCacheLRU( 100 );
3716 }
3717 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3718 $this->currentRevisionCache->set( $cacheKey,
3719 // Defaults to Parser::statelessFetchRevision()
3720 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3721 );
3722 }
3723 return $this->currentRevisionCache->get( $cacheKey );
3724 }
3725
3726 /**
3727 * @param Title $title
3728 * @return bool
3729 * @since 1.34
3730 */
3731 public function isCurrentRevisionOfTitleCached( $title ) {
3732 return (
3733 $this->currentRevisionCache &&
3734 $this->currentRevisionCache->has( $title->getPrefixedText() )
3735 );
3736 }
3737
3738 /**
3739 * Wrapper around Revision::newFromTitle to allow passing additional parameters
3740 * without passing them on to it.
3741 *
3742 * @since 1.24
3743 * @param Title $title
3744 * @param Parser|bool $parser
3745 * @return Revision|bool False if missing
3746 */
3747 public static function statelessFetchRevision( Title $title, $parser = false ) {
3748 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3749
3750 return $rev;
3751 }
3752
3753 /**
3754 * Fetch the unparsed text of a template and register a reference to it.
3755 * @param Title $title
3756 * @return array ( string or false, Title )
3757 */
3758 public function fetchTemplateAndTitle( $title ) {
3759 // Defaults to Parser::statelessFetchTemplate()
3760 $templateCb = $this->mOptions->getTemplateCallback();
3761 $stuff = call_user_func( $templateCb, $title, $this );
3762 $rev = $stuff['revision'] ?? null;
3763 $text = $stuff['text'];
3764 if ( is_string( $stuff['text'] ) ) {
3765 // We use U+007F DELETE to distinguish strip markers from regular text
3766 $text = strtr( $text, "\x7f", "?" );
3767 }
3768 $finalTitle = $stuff['finalTitle'] ?? $title;
3769 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3770 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3771 if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3772 // Self-transclusion; final result may change based on the new page version
3773 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3774 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3775 }
3776 }
3777
3778 return [ $text, $finalTitle ];
3779 }
3780
3781 /**
3782 * Fetch the unparsed text of a template and register a reference to it.
3783 * @param Title $title
3784 * @return string|bool
3785 */
3786 public function fetchTemplate( $title ) {
3787 return $this->fetchTemplateAndTitle( $title )[0];
3788 }
3789
3790 /**
3791 * Static function to get a template
3792 * Can be overridden via ParserOptions::setTemplateCallback().
3793 *
3794 * @param Title $title
3795 * @param bool|Parser $parser
3796 *
3797 * @return array
3798 */
3799 public static function statelessFetchTemplate( $title, $parser = false ) {
3800 $text = $skip = false;
3801 $finalTitle = $title;
3802 $deps = [];
3803 $rev = null;
3804
3805 # Loop to fetch the article, with up to 1 redirect
3806 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3807 # Give extensions a chance to select the revision instead
3808 $id = false; # Assume current
3809 Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3810 [ $parser, $title, &$skip, &$id ] );
3811
3812 if ( $skip ) {
3813 $text = false;
3814 $deps[] = [
3815 'title' => $title,
3816 'page_id' => $title->getArticleID(),
3817 'rev_id' => null
3818 ];
3819 break;
3820 }
3821 # Get the revision
3822 if ( $id ) {
3823 $rev = Revision::newFromId( $id );
3824 } elseif ( $parser ) {
3825 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3826 } else {
3827 $rev = Revision::newFromTitle( $title );
3828 }
3829 $rev_id = $rev ? $rev->getId() : 0;
3830 # If there is no current revision, there is no page
3831 if ( $id === false && !$rev ) {
3832 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3833 $linkCache->addBadLinkObj( $title );
3834 }
3835
3836 $deps[] = [
3837 'title' => $title,
3838 'page_id' => $title->getArticleID(),
3839 'rev_id' => $rev_id
3840 ];
3841 if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3842 # We fetched a rev from a different title; register it too...
3843 $deps[] = [
3844 'title' => $rev->getTitle(),
3845 'page_id' => $rev->getPage(),
3846 'rev_id' => $rev_id
3847 ];
3848 }
3849
3850 if ( $rev ) {
3851 $content = $rev->getContent();
3852 $text = $content ? $content->getWikitextForTransclusion() : null;
3853
3854 Hooks::run( 'ParserFetchTemplate',
3855 [ $parser, $title, $rev, &$text, &$deps ] );
3856
3857 if ( $text === false || $text === null ) {
3858 $text = false;
3859 break;
3860 }
3861 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3862 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3863 lcfirst( $title->getText() ) )->inContentLanguage();
3864 if ( !$message->exists() ) {
3865 $text = false;
3866 break;
3867 }
3868 $content = $message->content();
3869 $text = $message->plain();
3870 } else {
3871 break;
3872 }
3873 if ( !$content ) {
3874 break;
3875 }
3876 # Redirect?
3877 $finalTitle = $title;
3878 $title = $content->getRedirectTarget();
3879 }
3880 return [
3881 'revision' => $rev,
3882 'text' => $text,
3883 'finalTitle' => $finalTitle,
3884 'deps' => $deps
3885 ];
3886 }
3887
3888 /**
3889 * Fetch a file and its title and register a reference to it.
3890 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3891 * @param Title $title
3892 * @param array $options Array of options to RepoGroup::findFile
3893 * @return array ( File or false, Title of file )
3894 */
3895 public function fetchFileAndTitle( $title, $options = [] ) {
3896 $file = $this->fetchFileNoRegister( $title, $options );
3897
3898 $time = $file ? $file->getTimestamp() : false;
3899 $sha1 = $file ? $file->getSha1() : false;
3900 # Register the file as a dependency...
3901 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3902 if ( $file && !$title->equals( $file->getTitle() ) ) {
3903 # Update fetched file title
3904 $title = $file->getTitle();
3905 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3906 }
3907 return [ $file, $title ];
3908 }
3909
3910 /**
3911 * Helper function for fetchFileAndTitle.
3912 *
3913 * Also useful if you need to fetch a file but not use it yet,
3914 * for example to get the file's handler.
3915 *
3916 * @param Title $title
3917 * @param array $options Array of options to RepoGroup::findFile
3918 * @return File|bool
3919 */
3920 protected function fetchFileNoRegister( $title, $options = [] ) {
3921 if ( isset( $options['broken'] ) ) {
3922 $file = false; // broken thumbnail forced by hook
3923 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3924 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3925 } else { // get by (name,timestamp)
3926 $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3927 }
3928 return $file;
3929 }
3930
3931 /**
3932 * Transclude an interwiki link.
3933 *
3934 * @param Title $title
3935 * @param string $action Usually one of (raw, render)
3936 *
3937 * @return string
3938 */
3939 public function interwikiTransclude( $title, $action ) {
3940 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3941 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3942 }
3943
3944 $url = $title->getFullURL( [ 'action' => $action ] );
3945 if ( strlen( $url ) > 1024 ) {
3946 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3947 }
3948
3949 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3950
3951 $fname = __METHOD__;
3952 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3953
3954 $data = $cache->getWithSetCallback(
3955 $cache->makeGlobalKey(
3956 'interwiki-transclude',
3957 ( $wikiId !== false ) ? $wikiId : 'external',
3958 sha1( $url )
3959 ),
3960 $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3961 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3962 $req = MWHttpRequest::factory( $url, [], $fname );
3963
3964 $status = $req->execute(); // Status object
3965 if ( !$status->isOK() ) {
3966 $ttl = $cache::TTL_UNCACHEABLE;
3967 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3968 $ttl = min( $cache::TTL_LAGGED, $ttl );
3969 }
3970
3971 return [
3972 'text' => $status->isOK() ? $req->getContent() : null,
3973 'code' => $req->getStatus()
3974 ];
3975 },
3976 [
3977 'checkKeys' => ( $wikiId !== false )
3978 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3979 : [],
3980 'pcGroup' => 'interwiki-transclude:5',
3981 'pcTTL' => $cache::TTL_PROC_LONG
3982 ]
3983 );
3984
3985 if ( is_string( $data['text'] ) ) {
3986 $text = $data['text'];
3987 } elseif ( $data['code'] != 200 ) {
3988 // Though we failed to fetch the content, this status is useless.
3989 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3990 ->params( $url, $data['code'] )->inContentLanguage()->text();
3991 } else {
3992 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3993 }
3994
3995 return $text;
3996 }
3997
3998 /**
3999 * Triple brace replacement -- used for template arguments
4000 * @private
4001 *
4002 * @param array $piece
4003 * @param PPFrame $frame
4004 *
4005 * @return array
4006 */
4007 public function argSubstitution( $piece, $frame ) {
4008 $error = false;
4009 $parts = $piece['parts'];
4010 $nameWithSpaces = $frame->expand( $piece['title'] );
4011 $argName = trim( $nameWithSpaces );
4012 $object = false;
4013 $text = $frame->getArgument( $argName );
4014 if ( $text === false && $parts->getLength() > 0
4015 && ( $this->ot['html']
4016 || $this->ot['pre']
4017 || ( $this->ot['wiki'] && $frame->isTemplate() )
4018 )
4019 ) {
4020 # No match in frame, use the supplied default
4021 $object = $parts->item( 0 )->getChildren();
4022 }
4023 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4024 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4025 $this->limitationWarn( 'post-expand-template-argument' );
4026 }
4027
4028 if ( $text === false && $object === false ) {
4029 # No match anywhere
4030 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4031 }
4032 if ( $error !== false ) {
4033 $text .= $error;
4034 }
4035 if ( $object !== false ) {
4036 $ret = [ 'object' => $object ];
4037 } else {
4038 $ret = [ 'text' => $text ];
4039 }
4040
4041 return $ret;
4042 }
4043
4044 /**
4045 * Return the text to be used for a given extension tag.
4046 * This is the ghost of strip().
4047 *
4048 * @param array $params Associative array of parameters:
4049 * name PPNode for the tag name
4050 * attr PPNode for unparsed text where tag attributes are thought to be
4051 * attributes Optional associative array of parsed attributes
4052 * inner Contents of extension element
4053 * noClose Original text did not have a close tag
4054 * @param PPFrame $frame
4055 *
4056 * @throws MWException
4057 * @return string
4058 */
4059 public function extensionSubstitution( $params, $frame ) {
4060 static $errorStr = '<span class="error">';
4061 static $errorLen = 20;
4062
4063 $name = $frame->expand( $params['name'] );
4064 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4065 // Probably expansion depth or node count exceeded. Just punt the
4066 // error up.
4067 return $name;
4068 }
4069
4070 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4071 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4072 // See above
4073 return $attrText;
4074 }
4075
4076 // We can't safely check if the expansion for $content resulted in an
4077 // error, because the content could happen to be the error string
4078 // (T149622).
4079 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4080
4081 $marker = self::MARKER_PREFIX . "-$name-"
4082 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4083
4084 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4085 ( $this->ot['html'] || $this->ot['pre'] );
4086 if ( $isFunctionTag ) {
4087 $markerType = 'none';
4088 } else {
4089 $markerType = 'general';
4090 }
4091 if ( $this->ot['html'] || $isFunctionTag ) {
4092 $name = strtolower( $name );
4093 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4094 if ( isset( $params['attributes'] ) ) {
4095 $attributes += $params['attributes'];
4096 }
4097
4098 if ( isset( $this->mTagHooks[$name] ) ) {
4099 $output = call_user_func_array( $this->mTagHooks[$name],
4100 [ $content, $attributes, $this, $frame ] );
4101 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4102 list( $callback, ) = $this->mFunctionTagHooks[$name];
4103
4104 // Avoid PHP 7.1 warning from passing $this by reference
4105 $parser = $this;
4106 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4107 } else {
4108 $output = '<span class="error">Invalid tag extension name: ' .
4109 htmlspecialchars( $name ) . '</span>';
4110 }
4111
4112 if ( is_array( $output ) ) {
4113 // Extract flags
4114 $flags = $output;
4115 $output = $flags[0];
4116 if ( isset( $flags['markerType'] ) ) {
4117 $markerType = $flags['markerType'];
4118 }
4119 }
4120 } else {
4121 if ( is_null( $attrText ) ) {
4122 $attrText = '';
4123 }
4124 if ( isset( $params['attributes'] ) ) {
4125 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4126 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4127 htmlspecialchars( $attrValue ) . '"';
4128 }
4129 }
4130 if ( $content === null ) {
4131 $output = "<$name$attrText/>";
4132 } else {
4133 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4134 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4135 // See above
4136 return $close;
4137 }
4138 $output = "<$name$attrText>$content$close";
4139 }
4140 }
4141
4142 if ( $markerType === 'none' ) {
4143 return $output;
4144 } elseif ( $markerType === 'nowiki' ) {
4145 $this->mStripState->addNoWiki( $marker, $output );
4146 } elseif ( $markerType === 'general' ) {
4147 $this->mStripState->addGeneral( $marker, $output );
4148 } else {
4149 throw new MWException( __METHOD__ . ': invalid marker type' );
4150 }
4151 return $marker;
4152 }
4153
4154 /**
4155 * Increment an include size counter
4156 *
4157 * @param string $type The type of expansion
4158 * @param int $size The size of the text
4159 * @return bool False if this inclusion would take it over the maximum, true otherwise
4160 */
4161 public function incrementIncludeSize( $type, $size ) {
4162 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4163 return false;
4164 } else {
4165 $this->mIncludeSizes[$type] += $size;
4166 return true;
4167 }
4168 }
4169
4170 /**
4171 * Increment the expensive function count
4172 *
4173 * @return bool False if the limit has been exceeded
4174 */
4175 public function incrementExpensiveFunctionCount() {
4176 $this->mExpensiveFunctionCount++;
4177 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4178 }
4179
4180 /**
4181 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4182 * Fills $this->mDoubleUnderscores, returns the modified text
4183 *
4184 * @param string $text
4185 *
4186 * @return string
4187 */
4188 public function doDoubleUnderscore( $text ) {
4189 # The position of __TOC__ needs to be recorded
4190 $mw = $this->magicWordFactory->get( 'toc' );
4191 if ( $mw->match( $text ) ) {
4192 $this->mShowToc = true;
4193 $this->mForceTocPosition = true;
4194
4195 # Set a placeholder. At the end we'll fill it in with the TOC.
4196 $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4197
4198 # Only keep the first one.
4199 $text = $mw->replace( '', $text );
4200 }
4201
4202 # Now match and remove the rest of them
4203 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4204 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4205
4206 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4207 $this->mOutput->mNoGallery = true;
4208 }
4209 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4210 $this->mShowToc = false;
4211 }
4212 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4213 && $this->mTitle->getNamespace() == NS_CATEGORY
4214 ) {
4215 $this->addTrackingCategory( 'hidden-category-category' );
4216 }
4217 # (T10068) Allow control over whether robots index a page.
4218 # __INDEX__ always overrides __NOINDEX__, see T16899
4219 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4220 $this->mOutput->setIndexPolicy( 'noindex' );
4221 $this->addTrackingCategory( 'noindex-category' );
4222 }
4223 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4224 $this->mOutput->setIndexPolicy( 'index' );
4225 $this->addTrackingCategory( 'index-category' );
4226 }
4227
4228 # Cache all double underscores in the database
4229 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4230 $this->mOutput->setProperty( $key, '' );
4231 }
4232
4233 return $text;
4234 }
4235
4236 /**
4237 * @see ParserOutput::addTrackingCategory()
4238 * @param string $msg Message key
4239 * @return bool Whether the addition was successful
4240 */
4241 public function addTrackingCategory( $msg ) {
4242 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4243 }
4244
4245 /**
4246 * This function accomplishes several tasks:
4247 * 1) Auto-number headings if that option is enabled
4248 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4249 * 3) Add a Table of contents on the top for users who have enabled the option
4250 * 4) Auto-anchor headings
4251 *
4252 * It loops through all headlines, collects the necessary data, then splits up the
4253 * string and re-inserts the newly formatted headlines.
4254 *
4255 * @param string $text
4256 * @param string $origText Original, untouched wikitext
4257 * @param bool $isMain
4258 * @return mixed|string
4259 * @private
4260 */
4261 public function formatHeadings( $text, $origText, $isMain = true ) {
4262 # Inhibit editsection links if requested in the page
4263 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4264 $maybeShowEditLink = false;
4265 } else {
4266 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4267 }
4268
4269 # Get all headlines for numbering them and adding funky stuff like [edit]
4270 # links - this is for later, but we need the number of headlines right now
4271 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4272 # be trimmed here since whitespace in HTML headings is significant.
4273 $matches = [];
4274 $numMatches = preg_match_all(
4275 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4276 $text,
4277 $matches
4278 );
4279
4280 # if there are fewer than 4 headlines in the article, do not show TOC
4281 # unless it's been explicitly enabled.
4282 $enoughToc = $this->mShowToc &&
4283 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4284
4285 # Allow user to stipulate that a page should have a "new section"
4286 # link added via __NEWSECTIONLINK__
4287 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4288 $this->mOutput->setNewSection( true );
4289 }
4290
4291 # Allow user to remove the "new section"
4292 # link via __NONEWSECTIONLINK__
4293 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4294 $this->mOutput->hideNewSection( true );
4295 }
4296
4297 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4298 # override above conditions and always show TOC above first header
4299 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4300 $this->mShowToc = true;
4301 $enoughToc = true;
4302 }
4303
4304 # headline counter
4305 $headlineCount = 0;
4306 $numVisible = 0;
4307
4308 # Ugh .. the TOC should have neat indentation levels which can be
4309 # passed to the skin functions. These are determined here
4310 $toc = '';
4311 $full = '';
4312 $head = [];
4313 $sublevelCount = [];
4314 $levelCount = [];
4315 $level = 0;
4316 $prevlevel = 0;
4317 $toclevel = 0;
4318 $prevtoclevel = 0;
4319 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4320 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4321 $oldType = $this->mOutputType;
4322 $this->setOutputType( self::OT_WIKI );
4323 $frame = $this->getPreprocessor()->newFrame();
4324 $root = $this->preprocessToDom( $origText );
4325 $node = $root->getFirstChild();
4326 $byteOffset = 0;
4327 $tocraw = [];
4328 $refers = [];
4329
4330 $headlines = $numMatches !== false ? $matches[3] : [];
4331
4332 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4333 foreach ( $headlines as $headline ) {
4334 $isTemplate = false;
4335 $titleText = false;
4336 $sectionIndex = false;
4337 $numbering = '';
4338 $markerMatches = [];
4339 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4340 $serial = $markerMatches[1];
4341 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4342 $isTemplate = ( $titleText != $baseTitleText );
4343 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4344 }
4345
4346 if ( $toclevel ) {
4347 $prevlevel = $level;
4348 }
4349 $level = $matches[1][$headlineCount];
4350
4351 if ( $level > $prevlevel ) {
4352 # Increase TOC level
4353 $toclevel++;
4354 $sublevelCount[$toclevel] = 0;
4355 if ( $toclevel < $maxTocLevel ) {
4356 $prevtoclevel = $toclevel;
4357 $toc .= Linker::tocIndent();
4358 $numVisible++;
4359 }
4360 } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4361 # Decrease TOC level, find level to jump to
4362
4363 for ( $i = $toclevel; $i > 0; $i-- ) {
4364 if ( $levelCount[$i] == $level ) {
4365 # Found last matching level
4366 $toclevel = $i;
4367 break;
4368 } elseif ( $levelCount[$i] < $level ) {
4369 # Found first matching level below current level
4370 $toclevel = $i + 1;
4371 break;
4372 }
4373 }
4374 if ( $i == 0 ) {
4375 $toclevel = 1;
4376 }
4377 if ( $toclevel < $maxTocLevel ) {
4378 if ( $prevtoclevel < $maxTocLevel ) {
4379 # Unindent only if the previous toc level was shown :p
4380 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4381 $prevtoclevel = $toclevel;
4382 } else {
4383 $toc .= Linker::tocLineEnd();
4384 }
4385 }
4386 } else {
4387 # No change in level, end TOC line
4388 if ( $toclevel < $maxTocLevel ) {
4389 $toc .= Linker::tocLineEnd();
4390 }
4391 }
4392
4393 $levelCount[$toclevel] = $level;
4394
4395 # count number of headlines for each level
4396 $sublevelCount[$toclevel]++;
4397 $dot = 0;
4398 for ( $i = 1; $i <= $toclevel; $i++ ) {
4399 if ( !empty( $sublevelCount[$i] ) ) {
4400 if ( $dot ) {
4401 $numbering .= '.';
4402 }
4403 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4404 $dot = 1;
4405 }
4406 }
4407
4408 # The safe header is a version of the header text safe to use for links
4409
4410 # Remove link placeholders by the link text.
4411 # <!--LINK number-->
4412 # turns into
4413 # link text with suffix
4414 # Do this before unstrip since link text can contain strip markers
4415 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4416
4417 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4418 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4419
4420 # Remove any <style> or <script> tags (T198618)
4421 $safeHeadline = preg_replace(
4422 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4423 '',
4424 $safeHeadline
4425 );
4426
4427 # Strip out HTML (first regex removes any tag not allowed)
4428 # Allowed tags are:
4429 # * <sup> and <sub> (T10393)
4430 # * <i> (T28375)
4431 # * <b> (r105284)
4432 # * <bdi> (T74884)
4433 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4434 # * <s> and <strike> (T35715)
4435 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4436 # to allow setting directionality in toc items.
4437 $tocline = preg_replace(
4438 [
4439 '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4440 '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4441 ],
4442 [ '', '<$1>' ],
4443 $safeHeadline
4444 );
4445
4446 # Strip '<span></span>', which is the result from the above if
4447 # <span id="foo"></span> is used to produce an additional anchor
4448 # for a section.
4449 $tocline = str_replace( '<span></span>', '', $tocline );
4450
4451 $tocline = trim( $tocline );
4452
4453 # For the anchor, strip out HTML-y stuff period
4454 $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4455 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4456
4457 # Save headline for section edit hint before it's escaped
4458 $headlineHint = $safeHeadline;
4459
4460 # Decode HTML entities
4461 $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4462
4463 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4464
4465 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4466 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4467 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4468 if ( $fallbackHeadline === $safeHeadline ) {
4469 # No reason to have both (in fact, we can't)
4470 $fallbackHeadline = false;
4471 }
4472
4473 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4474 # @todo FIXME: We may be changing them depending on the current locale.
4475 $arrayKey = strtolower( $safeHeadline );
4476 if ( $fallbackHeadline === false ) {
4477 $fallbackArrayKey = false;
4478 } else {
4479 $fallbackArrayKey = strtolower( $fallbackHeadline );
4480 }
4481
4482 # Create the anchor for linking from the TOC to the section
4483 $anchor = $safeHeadline;
4484 $fallbackAnchor = $fallbackHeadline;
4485 if ( isset( $refers[$arrayKey] ) ) {
4486 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4487 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4488 $anchor .= "_$i";
4489 $linkAnchor .= "_$i";
4490 $refers["${arrayKey}_$i"] = true;
4491 } else {
4492 $refers[$arrayKey] = true;
4493 }
4494 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4495 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4496 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4497 $fallbackAnchor .= "_$i";
4498 $refers["${fallbackArrayKey}_$i"] = true;
4499 } else {
4500 $refers[$fallbackArrayKey] = true;
4501 }
4502
4503 # Don't number the heading if it is the only one (looks silly)
4504 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4505 # the two are different if the line contains a link
4506 $headline = Html::element(
4507 'span',
4508 [ 'class' => 'mw-headline-number' ],
4509 $numbering
4510 ) . ' ' . $headline;
4511 }
4512
4513 if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4514 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4515 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4516 }
4517
4518 # Add the section to the section tree
4519 # Find the DOM node for this header
4520 $noOffset = ( $isTemplate || $sectionIndex === false );
4521 while ( $node && !$noOffset ) {
4522 if ( $node->getName() === 'h' ) {
4523 $bits = $node->splitHeading();
4524 if ( $bits['i'] == $sectionIndex ) {
4525 break;
4526 }
4527 }
4528 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4529 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4530 $node = $node->getNextSibling();
4531 }
4532 $tocraw[] = [
4533 'toclevel' => $toclevel,
4534 'level' => $level,
4535 'line' => $tocline,
4536 'number' => $numbering,
4537 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4538 'fromtitle' => $titleText,
4539 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4540 'anchor' => $anchor,
4541 ];
4542
4543 # give headline the correct <h#> tag
4544 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4545 // Output edit section links as markers with styles that can be customized by skins
4546 if ( $isTemplate ) {
4547 # Put a T flag in the section identifier, to indicate to extractSections()
4548 # that sections inside <includeonly> should be counted.
4549 $editsectionPage = $titleText;
4550 $editsectionSection = "T-$sectionIndex";
4551 $editsectionContent = null;
4552 } else {
4553 $editsectionPage = $this->mTitle->getPrefixedText();
4554 $editsectionSection = $sectionIndex;
4555 $editsectionContent = $headlineHint;
4556 }
4557 // We use a bit of pesudo-xml for editsection markers. The
4558 // language converter is run later on. Using a UNIQ style marker
4559 // leads to the converter screwing up the tokens when it
4560 // converts stuff. And trying to insert strip tags fails too. At
4561 // this point all real inputted tags have already been escaped,
4562 // so we don't have to worry about a user trying to input one of
4563 // these markers directly. We use a page and section attribute
4564 // to stop the language converter from converting these
4565 // important bits of data, but put the headline hint inside a
4566 // content block because the language converter is supposed to
4567 // be able to convert that piece of data.
4568 // Gets replaced with html in ParserOutput::getText
4569 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4570 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4571 if ( $editsectionContent !== null ) {
4572 $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4573 } else {
4574 $editlink .= '/>';
4575 }
4576 } else {
4577 $editlink = '';
4578 }
4579 $head[$headlineCount] = Linker::makeHeadline( $level,
4580 $matches['attrib'][$headlineCount], $anchor, $headline,
4581 $editlink, $fallbackAnchor );
4582
4583 $headlineCount++;
4584 }
4585
4586 $this->setOutputType( $oldType );
4587
4588 # Never ever show TOC if no headers
4589 if ( $numVisible < 1 ) {
4590 $enoughToc = false;
4591 }
4592
4593 if ( $enoughToc ) {
4594 if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4595 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4596 }
4597 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4598 $this->mOutput->setTOCHTML( $toc );
4599 $toc = self::TOC_START . $toc . self::TOC_END;
4600 }
4601
4602 if ( $isMain ) {
4603 $this->mOutput->setSections( $tocraw );
4604 }
4605
4606 # split up and insert constructed headlines
4607 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4608 $i = 0;
4609
4610 // build an array of document sections
4611 $sections = [];
4612 foreach ( $blocks as $block ) {
4613 // $head is zero-based, sections aren't.
4614 if ( empty( $head[$i - 1] ) ) {
4615 $sections[$i] = $block;
4616 } else {
4617 $sections[$i] = $head[$i - 1] . $block;
4618 }
4619
4620 /**
4621 * Send a hook, one per section.
4622 * The idea here is to be able to make section-level DIVs, but to do so in a
4623 * lower-impact, more correct way than r50769
4624 *
4625 * $this : caller
4626 * $section : the section number
4627 * &$sectionContent : ref to the content of the section
4628 * $maybeShowEditLinks : boolean describing whether this section has an edit link
4629 */
4630 Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4631
4632 $i++;
4633 }
4634
4635 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4636 // append the TOC at the beginning
4637 // Top anchor now in skin
4638 $sections[0] .= $toc . "\n";
4639 }
4640
4641 $full .= implode( '', $sections );
4642
4643 if ( $this->mForceTocPosition ) {
4644 return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4645 } else {
4646 return $full;
4647 }
4648 }
4649
4650 /**
4651 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4652 * conversion, substituting signatures, {{subst:}} templates, etc.
4653 *
4654 * @param string $text The text to transform
4655 * @param Title $title The Title object for the current article
4656 * @param User $user The User object describing the current user
4657 * @param ParserOptions $options Parsing options
4658 * @param bool $clearState Whether to clear the parser state first
4659 * @return string The altered wiki markup
4660 */
4661 public function preSaveTransform( $text, Title $title, User $user,
4662 ParserOptions $options, $clearState = true
4663 ) {
4664 if ( $clearState ) {
4665 $magicScopeVariable = $this->lock();
4666 }
4667 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4668 $this->setUser( $user );
4669
4670 // Strip U+0000 NULL (T159174)
4671 $text = str_replace( "\000", '', $text );
4672
4673 // We still normalize line endings for backwards-compatibility
4674 // with other code that just calls PST, but this should already
4675 // be handled in TextContent subclasses
4676 $text = TextContent::normalizeLineEndings( $text );
4677
4678 if ( $options->getPreSaveTransform() ) {
4679 $text = $this->pstPass2( $text, $user );
4680 }
4681 $text = $this->mStripState->unstripBoth( $text );
4682
4683 $this->setUser( null ); # Reset
4684
4685 return $text;
4686 }
4687
4688 /**
4689 * Pre-save transform helper function
4690 *
4691 * @param string $text
4692 * @param User $user
4693 *
4694 * @return string
4695 */
4696 private function pstPass2( $text, $user ) {
4697 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4698 # $this->contLang here in order to give everyone the same signature and use the default one
4699 # rather than the one selected in each user's preferences. (see also T14815)
4700 $ts = $this->mOptions->getTimestamp();
4701 $timestamp = MWTimestamp::getLocalInstance( $ts );
4702 $ts = $timestamp->format( 'YmdHis' );
4703 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4704
4705 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4706
4707 # Variable replacement
4708 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4709 $text = $this->replaceVariables( $text );
4710
4711 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4712 # which may corrupt this parser instance via its wfMessage()->text() call-
4713
4714 # Signatures
4715 if ( strpos( $text, '~~~' ) !== false ) {
4716 $sigText = $this->getUserSig( $user );
4717 $text = strtr( $text, [
4718 '~~~~~' => $d,
4719 '~~~~' => "$sigText $d",
4720 '~~~' => $sigText
4721 ] );
4722 # The main two signature forms used above are time-sensitive
4723 $this->setOutputFlag( 'user-signature', 'User signature detected' );
4724 }
4725
4726 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4727 $tc = '[' . Title::legalChars() . ']';
4728 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4729
4730 // [[ns:page (context)|]]
4731 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4732 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4733 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4734 // [[ns:page (context), context|]] (using either single or double-width comma)
4735 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4736 // [[|page]] (reverse pipe trick: add context from page title)
4737 $p2 = "/\[\[\\|($tc+)]]/";
4738
4739 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4740 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4741 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4742 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4743
4744 $t = $this->mTitle->getText();
4745 $m = [];
4746 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4747 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4748 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4749 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4750 } else {
4751 # if there's no context, don't bother duplicating the title
4752 $text = preg_replace( $p2, '[[\\1]]', $text );
4753 }
4754
4755 return $text;
4756 }
4757
4758 /**
4759 * Fetch the user's signature text, if any, and normalize to
4760 * validated, ready-to-insert wikitext.
4761 * If you have pre-fetched the nickname or the fancySig option, you can
4762 * specify them here to save a database query.
4763 * Do not reuse this parser instance after calling getUserSig(),
4764 * as it may have changed.
4765 *
4766 * @param User &$user
4767 * @param string|bool $nickname Nickname to use or false to use user's default nickname
4768 * @param bool|null $fancySig whether the nicknname is the complete signature
4769 * or null to use default value
4770 * @return string
4771 */
4772 public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4773 $username = $user->getName();
4774
4775 # If not given, retrieve from the user object.
4776 if ( $nickname === false ) {
4777 $nickname = $user->getOption( 'nickname' );
4778 }
4779
4780 if ( is_null( $fancySig ) ) {
4781 $fancySig = $user->getBoolOption( 'fancysig' );
4782 }
4783
4784 $nickname = $nickname == null ? $username : $nickname;
4785
4786 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4787 $nickname = $username;
4788 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4789 } elseif ( $fancySig !== false ) {
4790 # Sig. might contain markup; validate this
4791 if ( $this->validateSig( $nickname ) !== false ) {
4792 # Validated; clean up (if needed) and return it
4793 return $this->cleanSig( $nickname, true );
4794 } else {
4795 # Failed to validate; fall back to the default
4796 $nickname = $username;
4797 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4798 }
4799 }
4800
4801 # Make sure nickname doesnt get a sig in a sig
4802 $nickname = self::cleanSigInSig( $nickname );
4803
4804 # If we're still here, make it a link to the user page
4805 $userText = wfEscapeWikiText( $username );
4806 $nickText = wfEscapeWikiText( $nickname );
4807 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4808
4809 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4810 ->title( $this->getTitle() )->text();
4811 }
4812
4813 /**
4814 * Check that the user's signature contains no bad XML
4815 *
4816 * @param string $text
4817 * @return string|bool An expanded string, or false if invalid.
4818 */
4819 public function validateSig( $text ) {
4820 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4821 }
4822
4823 /**
4824 * Clean up signature text
4825 *
4826 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4827 * 2) Substitute all transclusions
4828 *
4829 * @param string $text
4830 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4831 * @return string Signature text
4832 */
4833 public function cleanSig( $text, $parsing = false ) {
4834 if ( !$parsing ) {
4835 global $wgTitle;
4836 $magicScopeVariable = $this->lock();
4837 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4838 }
4839
4840 # Option to disable this feature
4841 if ( !$this->mOptions->getCleanSignatures() ) {
4842 return $text;
4843 }
4844
4845 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4846 # => Move this logic to braceSubstitution()
4847 $substWord = $this->magicWordFactory->get( 'subst' );
4848 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4849 $substText = '{{' . $substWord->getSynonym( 0 );
4850
4851 $text = preg_replace( $substRegex, $substText, $text );
4852 $text = self::cleanSigInSig( $text );
4853 $dom = $this->preprocessToDom( $text );
4854 $frame = $this->getPreprocessor()->newFrame();
4855 $text = $frame->expand( $dom );
4856
4857 if ( !$parsing ) {
4858 $text = $this->mStripState->unstripBoth( $text );
4859 }
4860
4861 return $text;
4862 }
4863
4864 /**
4865 * Strip 3, 4 or 5 tildes out of signatures.
4866 *
4867 * @param string $text
4868 * @return string Signature text with /~{3,5}/ removed
4869 */
4870 public static function cleanSigInSig( $text ) {
4871 $text = preg_replace( '/~{3,5}/', '', $text );
4872 return $text;
4873 }
4874
4875 /**
4876 * Set up some variables which are usually set up in parse()
4877 * so that an external function can call some class members with confidence
4878 *
4879 * @param Title|null $title
4880 * @param ParserOptions $options
4881 * @param int $outputType
4882 * @param bool $clearState
4883 * @param int|null $revId
4884 */
4885 public function startExternalParse( Title $title = null, ParserOptions $options,
4886 $outputType, $clearState = true, $revId = null
4887 ) {
4888 $this->startParse( $title, $options, $outputType, $clearState );
4889 if ( $revId !== null ) {
4890 $this->mRevisionId = $revId;
4891 }
4892 }
4893
4894 /**
4895 * @param Title|null $title
4896 * @param ParserOptions $options
4897 * @param int $outputType
4898 * @param bool $clearState
4899 */
4900 private function startParse( Title $title = null, ParserOptions $options,
4901 $outputType, $clearState = true
4902 ) {
4903 $this->setTitle( $title );
4904 $this->mOptions = $options;
4905 $this->setOutputType( $outputType );
4906 if ( $clearState ) {
4907 $this->clearState();
4908 }
4909 }
4910
4911 /**
4912 * Wrapper for preprocess()
4913 *
4914 * @param string $text The text to preprocess
4915 * @param ParserOptions $options
4916 * @param Title|null $title Title object or null to use $wgTitle
4917 * @return string
4918 */
4919 public function transformMsg( $text, $options, $title = null ) {
4920 static $executing = false;
4921
4922 # Guard against infinite recursion
4923 if ( $executing ) {
4924 return $text;
4925 }
4926 $executing = true;
4927
4928 if ( !$title ) {
4929 global $wgTitle;
4930 $title = $wgTitle;
4931 }
4932
4933 $text = $this->preprocess( $text, $title, $options );
4934
4935 $executing = false;
4936 return $text;
4937 }
4938
4939 /**
4940 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4941 * The callback should have the following form:
4942 * function myParserHook( $text, $params, $parser, $frame ) { ... }
4943 *
4944 * Transform and return $text. Use $parser for any required context, e.g. use
4945 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4946 *
4947 * Hooks may return extended information by returning an array, of which the
4948 * first numbered element (index 0) must be the return string, and all other
4949 * entries are extracted into local variables within an internal function
4950 * in the Parser class.
4951 *
4952 * This interface (introduced r61913) appears to be undocumented, but
4953 * 'markerType' is used by some core tag hooks to override which strip
4954 * array their results are placed in. **Use great caution if attempting
4955 * this interface, as it is not documented and injudicious use could smash
4956 * private variables.**
4957 *
4958 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4959 * @param callable $callback The callback function (and object) to use for the tag
4960 * @throws MWException
4961 * @return callable|null The old value of the mTagHooks array associated with the hook
4962 */
4963 public function setHook( $tag, callable $callback ) {
4964 $tag = strtolower( $tag );
4965 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4966 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4967 }
4968 $oldVal = $this->mTagHooks[$tag] ?? null;
4969 $this->mTagHooks[$tag] = $callback;
4970 if ( !in_array( $tag, $this->mStripList ) ) {
4971 $this->mStripList[] = $tag;
4972 }
4973
4974 return $oldVal;
4975 }
4976
4977 /**
4978 * As setHook(), but letting the contents be parsed.
4979 *
4980 * Transparent tag hooks are like regular XML-style tag hooks, except they
4981 * operate late in the transformation sequence, on HTML instead of wikitext.
4982 *
4983 * This is probably obsoleted by things dealing with parser frames?
4984 * The only extension currently using it is geoserver.
4985 *
4986 * @since 1.10
4987 * @todo better document or deprecate this
4988 *
4989 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4990 * @param callable $callback The callback function (and object) to use for the tag
4991 * @throws MWException
4992 * @return callable|null The old value of the mTagHooks array associated with the hook
4993 */
4994 public function setTransparentTagHook( $tag, callable $callback ) {
4995 $tag = strtolower( $tag );
4996 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4997 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4998 }
4999 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5000 $this->mTransparentTagHooks[$tag] = $callback;
5001
5002 return $oldVal;
5003 }
5004
5005 /**
5006 * Remove all tag hooks
5007 */
5008 public function clearTagHooks() {
5009 $this->mTagHooks = [];
5010 $this->mFunctionTagHooks = [];
5011 $this->mStripList = $this->mDefaultStripList;
5012 }
5013
5014 /**
5015 * Create a function, e.g. {{sum:1|2|3}}
5016 * The callback function should have the form:
5017 * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5018 *
5019 * Or with Parser::SFH_OBJECT_ARGS:
5020 * function myParserFunction( $parser, $frame, $args ) { ... }
5021 *
5022 * The callback may either return the text result of the function, or an array with the text
5023 * in element 0, and a number of flags in the other elements. The names of the flags are
5024 * specified in the keys. Valid flags are:
5025 * found The text returned is valid, stop processing the template. This
5026 * is on by default.
5027 * nowiki Wiki markup in the return value should be escaped
5028 * isHTML The returned text is HTML, armour it against wikitext transformation
5029 *
5030 * @param string $id The magic word ID
5031 * @param callable $callback The callback function (and object) to use
5032 * @param int $flags A combination of the following flags:
5033 * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5034 *
5035 * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text.
5036 * This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5037 * branches and thus speed up parsing. It is also possible to analyse the parse tree of
5038 * the arguments, and to control the way they are expanded.
5039 *
5040 * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5041 * arguments, for instance:
5042 * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5043 *
5044 * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5045 * future versions. Please call $frame->expand() on it anyway so that your code keeps
5046 * working if/when this is changed.
5047 *
5048 * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5049 * expansion.
5050 *
5051 * Please read the documentation in includes/parser/Preprocessor.php for more information
5052 * about the methods available in PPFrame and PPNode.
5053 *
5054 * @throws MWException
5055 * @return string|callable The old callback function for this name, if any
5056 */
5057 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5058 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5059 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5060
5061 # Add to function cache
5062 $mw = $this->magicWordFactory->get( $id );
5063 if ( !$mw ) {
5064 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5065 }
5066
5067 $synonyms = $mw->getSynonyms();
5068 $sensitive = intval( $mw->isCaseSensitive() );
5069
5070 foreach ( $synonyms as $syn ) {
5071 # Case
5072 if ( !$sensitive ) {
5073 $syn = $this->contLang->lc( $syn );
5074 }
5075 # Add leading hash
5076 if ( !( $flags & self::SFH_NO_HASH ) ) {
5077 $syn = '#' . $syn;
5078 }
5079 # Remove trailing colon
5080 if ( substr( $syn, -1, 1 ) === ':' ) {
5081 $syn = substr( $syn, 0, -1 );
5082 }
5083 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5084 }
5085 return $oldVal;
5086 }
5087
5088 /**
5089 * Get all registered function hook identifiers
5090 *
5091 * @return array
5092 */
5093 public function getFunctionHooks() {
5094 $this->firstCallInit();
5095 return array_keys( $this->mFunctionHooks );
5096 }
5097
5098 /**
5099 * Create a tag function, e.g. "<test>some stuff</test>".
5100 * Unlike tag hooks, tag functions are parsed at preprocessor level.
5101 * Unlike parser functions, their content is not preprocessed.
5102 * @param string $tag
5103 * @param callable $callback
5104 * @param int $flags
5105 * @throws MWException
5106 * @return null
5107 */
5108 public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5109 $tag = strtolower( $tag );
5110 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5111 throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5112 }
5113 $old = $this->mFunctionTagHooks[$tag] ?? null;
5114 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5115
5116 if ( !in_array( $tag, $this->mStripList ) ) {
5117 $this->mStripList[] = $tag;
5118 }
5119
5120 return $old;
5121 }
5122
5123 /**
5124 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5125 * Placeholders created in Linker::link()
5126 *
5127 * @param string &$text
5128 * @param int $options
5129 */
5130 public function replaceLinkHolders( &$text, $options = 0 ) {
5131 $this->mLinkHolders->replace( $text );
5132 }
5133
5134 /**
5135 * Replace "<!--LINK-->" link placeholders with plain text of links
5136 * (not HTML-formatted).
5137 *
5138 * @param string $text
5139 * @return string
5140 */
5141 public function replaceLinkHoldersText( $text ) {
5142 return $this->mLinkHolders->replaceText( $text );
5143 }
5144
5145 /**
5146 * Renders an image gallery from a text with one line per image.
5147 * text labels may be given by using |-style alternative text. E.g.
5148 * Image:one.jpg|The number "1"
5149 * Image:tree.jpg|A tree
5150 * given as text will return the HTML of a gallery with two images,
5151 * labeled 'The number "1"' and
5152 * 'A tree'.
5153 *
5154 * @param string $text
5155 * @param array $params
5156 * @return string HTML
5157 */
5158 public function renderImageGallery( $text, $params ) {
5159 $mode = false;
5160 if ( isset( $params['mode'] ) ) {
5161 $mode = $params['mode'];
5162 }
5163
5164 try {
5165 $ig = ImageGalleryBase::factory( $mode );
5166 } catch ( Exception $e ) {
5167 // If invalid type set, fallback to default.
5168 $ig = ImageGalleryBase::factory( false );
5169 }
5170
5171 $ig->setContextTitle( $this->mTitle );
5172 $ig->setShowBytes( false );
5173 $ig->setShowDimensions( false );
5174 $ig->setShowFilename( false );
5175 $ig->setParser( $this );
5176 $ig->setHideBadImages();
5177 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5178
5179 if ( isset( $params['showfilename'] ) ) {
5180 $ig->setShowFilename( true );
5181 } else {
5182 $ig->setShowFilename( false );
5183 }
5184 if ( isset( $params['caption'] ) ) {
5185 // NOTE: We aren't passing a frame here or below. Frame info
5186 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5187 // See T107332#4030581
5188 $caption = $this->recursiveTagParse( $params['caption'] );
5189 $ig->setCaptionHtml( $caption );
5190 }
5191 if ( isset( $params['perrow'] ) ) {
5192 $ig->setPerRow( $params['perrow'] );
5193 }
5194 if ( isset( $params['widths'] ) ) {
5195 $ig->setWidths( $params['widths'] );
5196 }
5197 if ( isset( $params['heights'] ) ) {
5198 $ig->setHeights( $params['heights'] );
5199 }
5200 $ig->setAdditionalOptions( $params );
5201
5202 // Avoid PHP 7.1 warning from passing $this by reference
5203 $parser = $this;
5204 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5205
5206 $lines = StringUtils::explode( "\n", $text );
5207 foreach ( $lines as $line ) {
5208 # match lines like these:
5209 # Image:someimage.jpg|This is some image
5210 $matches = [];
5211 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5212 # Skip empty lines
5213 if ( count( $matches ) == 0 ) {
5214 continue;
5215 }
5216
5217 if ( strpos( $matches[0], '%' ) !== false ) {
5218 $matches[1] = rawurldecode( $matches[1] );
5219 }
5220 $title = Title::newFromText( $matches[1], NS_FILE );
5221 if ( is_null( $title ) ) {
5222 # Bogus title. Ignore these so we don't bomb out later.
5223 continue;
5224 }
5225
5226 # We need to get what handler the file uses, to figure out parameters.
5227 # Note, a hook can overide the file name, and chose an entirely different
5228 # file (which potentially could be of a different type and have different handler).
5229 $options = [];
5230 $descQuery = false;
5231 Hooks::run( 'BeforeParserFetchFileAndTitle',
5232 [ $this, $title, &$options, &$descQuery ] );
5233 # Don't register it now, as TraditionalImageGallery does that later.
5234 $file = $this->fetchFileNoRegister( $title, $options );
5235 $handler = $file ? $file->getHandler() : false;
5236
5237 $paramMap = [
5238 'img_alt' => 'gallery-internal-alt',
5239 'img_link' => 'gallery-internal-link',
5240 ];
5241 if ( $handler ) {
5242 $paramMap += $handler->getParamMap();
5243 // We don't want people to specify per-image widths.
5244 // Additionally the width parameter would need special casing anyhow.
5245 unset( $paramMap['img_width'] );
5246 }
5247
5248 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5249
5250 $label = '';
5251 $alt = '';
5252 $link = '';
5253 $handlerOptions = [];
5254 if ( isset( $matches[3] ) ) {
5255 // look for an |alt= definition while trying not to break existing
5256 // captions with multiple pipes (|) in it, until a more sensible grammar
5257 // is defined for images in galleries
5258
5259 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5260 // splitting on '|' is a bit odd, and different from makeImage.
5261 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5262 // Protect LanguageConverter markup
5263 $parameterMatches = StringUtils::delimiterExplode(
5264 '-{', '}-', '|', $matches[3], true /* nested */
5265 );
5266
5267 foreach ( $parameterMatches as $parameterMatch ) {
5268 list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5269 if ( $magicName ) {
5270 $paramName = $paramMap[$magicName];
5271
5272 switch ( $paramName ) {
5273 case 'gallery-internal-alt':
5274 $alt = $this->stripAltText( $match, false );
5275 break;
5276 case 'gallery-internal-link':
5277 $linkValue = $this->stripAltText( $match, false );
5278 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5279 // Result of LanguageConverter::markNoConversion
5280 // invoked on an external link.
5281 $linkValue = substr( $linkValue, 4, -2 );
5282 }
5283 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5284 if ( $type === 'link-url' ) {
5285 $link = $target;
5286 $this->mOutput->addExternalLink( $target );
5287 } elseif ( $type === 'link-title' ) {
5288 $link = $target->getLinkURL();
5289 $this->mOutput->addLink( $target );
5290 }
5291 break;
5292 default:
5293 // Must be a handler specific parameter.
5294 if ( $handler->validateParam( $paramName, $match ) ) {
5295 $handlerOptions[$paramName] = $match;
5296 } else {
5297 // Guess not, consider it as caption.
5298 $this->logger->debug(
5299 "$parameterMatch failed parameter validation" );
5300 $label = $parameterMatch;
5301 }
5302 }
5303
5304 } else {
5305 // Last pipe wins.
5306 $label = $parameterMatch;
5307 }
5308 }
5309 }
5310
5311 $ig->add( $title, $label, $alt, $link, $handlerOptions );
5312 }
5313 $html = $ig->toHTML();
5314 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5315 return $html;
5316 }
5317
5318 /**
5319 * @param MediaHandler $handler
5320 * @return array
5321 */
5322 public function getImageParams( $handler ) {
5323 if ( $handler ) {
5324 $handlerClass = get_class( $handler );
5325 } else {
5326 $handlerClass = '';
5327 }
5328 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5329 # Initialise static lists
5330 static $internalParamNames = [
5331 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5332 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5333 'bottom', 'text-bottom' ],
5334 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5335 'upright', 'border', 'link', 'alt', 'class' ],
5336 ];
5337 static $internalParamMap;
5338 if ( !$internalParamMap ) {
5339 $internalParamMap = [];
5340 foreach ( $internalParamNames as $type => $names ) {
5341 foreach ( $names as $name ) {
5342 // For grep: img_left, img_right, img_center, img_none,
5343 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5344 // img_bottom, img_text_bottom,
5345 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5346 // img_border, img_link, img_alt, img_class
5347 $magicName = str_replace( '-', '_', "img_$name" );
5348 $internalParamMap[$magicName] = [ $type, $name ];
5349 }
5350 }
5351 }
5352
5353 # Add handler params
5354 $paramMap = $internalParamMap;
5355 if ( $handler ) {
5356 $handlerParamMap = $handler->getParamMap();
5357 foreach ( $handlerParamMap as $magic => $paramName ) {
5358 $paramMap[$magic] = [ 'handler', $paramName ];
5359 }
5360 }
5361 $this->mImageParams[$handlerClass] = $paramMap;
5362 $this->mImageParamsMagicArray[$handlerClass] =
5363 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5364 }
5365 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5366 }
5367
5368 /**
5369 * Parse image options text and use it to make an image
5370 *
5371 * @param Title $title
5372 * @param string $options
5373 * @param LinkHolderArray|bool $holders
5374 * @return string HTML
5375 */
5376 public function makeImage( $title, $options, $holders = false ) {
5377 # Check if the options text is of the form "options|alt text"
5378 # Options are:
5379 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5380 # * left no resizing, just left align. label is used for alt= only
5381 # * right same, but right aligned
5382 # * none same, but not aligned
5383 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5384 # * center center the image
5385 # * frame Keep original image size, no magnify-button.
5386 # * framed Same as "frame"
5387 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5388 # * upright reduce width for upright images, rounded to full __0 px
5389 # * border draw a 1px border around the image
5390 # * alt Text for HTML alt attribute (defaults to empty)
5391 # * class Set a class for img node
5392 # * link Set the target of the image link. Can be external, interwiki, or local
5393 # vertical-align values (no % or length right now):
5394 # * baseline
5395 # * sub
5396 # * super
5397 # * top
5398 # * text-top
5399 # * middle
5400 # * bottom
5401 # * text-bottom
5402
5403 # Protect LanguageConverter markup when splitting into parts
5404 $parts = StringUtils::delimiterExplode(
5405 '-{', '}-', '|', $options, true /* allow nesting */
5406 );
5407
5408 # Give extensions a chance to select the file revision for us
5409 $options = [];
5410 $descQuery = false;
5411 Hooks::run( 'BeforeParserFetchFileAndTitle',
5412 [ $this, $title, &$options, &$descQuery ] );
5413 # Fetch and register the file (file title may be different via hooks)
5414 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5415
5416 # Get parameter map
5417 $handler = $file ? $file->getHandler() : false;
5418
5419 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5420
5421 if ( !$file ) {
5422 $this->addTrackingCategory( 'broken-file-category' );
5423 }
5424
5425 # Process the input parameters
5426 $caption = '';
5427 $params = [ 'frame' => [], 'handler' => [],
5428 'horizAlign' => [], 'vertAlign' => [] ];
5429 $seenformat = false;
5430 foreach ( $parts as $part ) {
5431 $part = trim( $part );
5432 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5433 $validated = false;
5434 if ( isset( $paramMap[$magicName] ) ) {
5435 list( $type, $paramName ) = $paramMap[$magicName];
5436
5437 # Special case; width and height come in one variable together
5438 if ( $type === 'handler' && $paramName === 'width' ) {
5439 $parsedWidthParam = self::parseWidthParam( $value );
5440 if ( isset( $parsedWidthParam['width'] ) ) {
5441 $width = $parsedWidthParam['width'];
5442 if ( $handler->validateParam( 'width', $width ) ) {
5443 $params[$type]['width'] = $width;
5444 $validated = true;
5445 }
5446 }
5447 if ( isset( $parsedWidthParam['height'] ) ) {
5448 $height = $parsedWidthParam['height'];
5449 if ( $handler->validateParam( 'height', $height ) ) {
5450 $params[$type]['height'] = $height;
5451 $validated = true;
5452 }
5453 }
5454 # else no validation -- T15436
5455 } else {
5456 if ( $type === 'handler' ) {
5457 # Validate handler parameter
5458 $validated = $handler->validateParam( $paramName, $value );
5459 } else {
5460 # Validate internal parameters
5461 switch ( $paramName ) {
5462 case 'manualthumb':
5463 case 'alt':
5464 case 'class':
5465 # @todo FIXME: Possibly check validity here for
5466 # manualthumb? downstream behavior seems odd with
5467 # missing manual thumbs.
5468 $validated = true;
5469 $value = $this->stripAltText( $value, $holders );
5470 break;
5471 case 'link':
5472 list( $paramName, $value ) =
5473 $this->parseLinkParameter(
5474 $this->stripAltText( $value, $holders )
5475 );
5476 if ( $paramName ) {
5477 $validated = true;
5478 if ( $paramName === 'no-link' ) {
5479 $value = true;
5480 }
5481 if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5482 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5483 }
5484 }
5485 break;
5486 case 'frameless':
5487 case 'framed':
5488 case 'thumbnail':
5489 // use first appearing option, discard others.
5490 $validated = !$seenformat;
5491 $seenformat = true;
5492 break;
5493 default:
5494 # Most other things appear to be empty or numeric...
5495 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5496 }
5497 }
5498
5499 if ( $validated ) {
5500 $params[$type][$paramName] = $value;
5501 }
5502 }
5503 }
5504 if ( !$validated ) {
5505 $caption = $part;
5506 }
5507 }
5508
5509 # Process alignment parameters
5510 if ( $params['horizAlign'] ) {
5511 $params['frame']['align'] = key( $params['horizAlign'] );
5512 }
5513 if ( $params['vertAlign'] ) {
5514 $params['frame']['valign'] = key( $params['vertAlign'] );
5515 }
5516
5517 $params['frame']['caption'] = $caption;
5518
5519 # Will the image be presented in a frame, with the caption below?
5520 $imageIsFramed = isset( $params['frame']['frame'] )
5521 || isset( $params['frame']['framed'] )
5522 || isset( $params['frame']['thumbnail'] )
5523 || isset( $params['frame']['manualthumb'] );
5524
5525 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5526 # came to also set the caption, ordinary text after the image -- which
5527 # makes no sense, because that just repeats the text multiple times in
5528 # screen readers. It *also* came to set the title attribute.
5529 # Now that we have an alt attribute, we should not set the alt text to
5530 # equal the caption: that's worse than useless, it just repeats the
5531 # text. This is the framed/thumbnail case. If there's no caption, we
5532 # use the unnamed parameter for alt text as well, just for the time be-
5533 # ing, if the unnamed param is set and the alt param is not.
5534 # For the future, we need to figure out if we want to tweak this more,
5535 # e.g., introducing a title= parameter for the title; ignoring the un-
5536 # named parameter entirely for images without a caption; adding an ex-
5537 # plicit caption= parameter and preserving the old magic unnamed para-
5538 # meter for BC; ...
5539 if ( $imageIsFramed ) { # Framed image
5540 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5541 # No caption or alt text, add the filename as the alt text so
5542 # that screen readers at least get some description of the image
5543 $params['frame']['alt'] = $title->getText();
5544 }
5545 # Do not set $params['frame']['title'] because tooltips don't make sense
5546 # for framed images
5547 } else { # Inline image
5548 if ( !isset( $params['frame']['alt'] ) ) {
5549 # No alt text, use the "caption" for the alt text
5550 if ( $caption !== '' ) {
5551 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5552 } else {
5553 # No caption, fall back to using the filename for the
5554 # alt text
5555 $params['frame']['alt'] = $title->getText();
5556 }
5557 }
5558 # Use the "caption" for the tooltip text
5559 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5560 }
5561 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5562
5563 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5564
5565 # Linker does the rest
5566 $time = $options['time'] ?? false;
5567 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5568 $time, $descQuery, $this->mOptions->getThumbSize() );
5569
5570 # Give the handler a chance to modify the parser object
5571 if ( $handler ) {
5572 $handler->parserTransformHook( $this, $file );
5573 }
5574
5575 return $ret;
5576 }
5577
5578 /**
5579 * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5580 *
5581 * Adds an entry to appropriate link tables.
5582 *
5583 * @since 1.32
5584 * @param string $value
5585 * @return array of `[ type, target ]`, where:
5586 * - `type` is one of:
5587 * - `null`: Given value is not a valid link target, use default
5588 * - `'no-link'`: Given value is empty, do not generate a link
5589 * - `'link-url'`: Given value is a valid external link
5590 * - `'link-title'`: Given value is a valid internal link
5591 * - `target` is:
5592 * - When `type` is `null` or `'no-link'`: `false`
5593 * - When `type` is `'link-url'`: URL string corresponding to given value
5594 * - When `type` is `'link-title'`: Title object corresponding to given value
5595 */
5596 public function parseLinkParameter( $value ) {
5597 $chars = self::EXT_LINK_URL_CLASS;
5598 $addr = self::EXT_LINK_ADDR;
5599 $prots = $this->mUrlProtocols;
5600 $type = null;
5601 $target = false;
5602 if ( $value === '' ) {
5603 $type = 'no-link';
5604 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5605 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5606 $this->mOutput->addExternalLink( $value );
5607 $type = 'link-url';
5608 $target = $value;
5609 }
5610 } else {
5611 $linkTitle = Title::newFromText( $value );
5612 if ( $linkTitle ) {
5613 $this->mOutput->addLink( $linkTitle );
5614 $type = 'link-title';
5615 $target = $linkTitle;
5616 }
5617 }
5618 return [ $type, $target ];
5619 }
5620
5621 /**
5622 * @param string $caption
5623 * @param LinkHolderArray|bool $holders
5624 * @return mixed|string
5625 */
5626 protected function stripAltText( $caption, $holders ) {
5627 # Strip bad stuff out of the title (tooltip). We can't just use
5628 # replaceLinkHoldersText() here, because if this function is called
5629 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5630 if ( $holders ) {
5631 $tooltip = $holders->replaceText( $caption );
5632 } else {
5633 $tooltip = $this->replaceLinkHoldersText( $caption );
5634 }
5635
5636 # make sure there are no placeholders in thumbnail attributes
5637 # that are later expanded to html- so expand them now and
5638 # remove the tags
5639 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5640 # Compatibility hack! In HTML certain entity references not terminated
5641 # by a semicolon are decoded (but not if we're in an attribute; that's
5642 # how link URLs get away without properly escaping & in queries).
5643 # But wikitext has always required semicolon-termination of entities,
5644 # so encode & where needed to avoid decode of semicolon-less entities.
5645 # See T209236 and
5646 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5647 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5648 $tooltip = preg_replace( "/
5649 & # 1. entity prefix
5650 (?= # 2. followed by:
5651 (?: # a. one of the legacy semicolon-less named entities
5652 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5653 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5654 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5655 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5656 U(?:acute|circ|grave|uml)|Yacute|
5657 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5658 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5659 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5660 frac(?:1(?:2|4)|34)|
5661 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5662 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5663 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5664 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5665 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5666 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5667 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5668 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5669 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5670 )
5671 (?:[^;]|$)) # b. and not followed by a semicolon
5672 # S = study, for efficiency
5673 /Sx", '&amp;', $tooltip );
5674 $tooltip = Sanitizer::stripAllTags( $tooltip );
5675
5676 return $tooltip;
5677 }
5678
5679 /**
5680 * Set a flag in the output object indicating that the content is dynamic and
5681 * shouldn't be cached.
5682 * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5683 */
5684 public function disableCache() {
5685 $this->logger->debug( "Parser output marked as uncacheable." );
5686 if ( !$this->mOutput ) {
5687 throw new MWException( __METHOD__ .
5688 " can only be called when actually parsing something" );
5689 }
5690 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5691 }
5692
5693 /**
5694 * Callback from the Sanitizer for expanding items found in HTML attribute
5695 * values, so they can be safely tested and escaped.
5696 *
5697 * @param string &$text
5698 * @param bool|PPFrame $frame
5699 * @return string
5700 */
5701 public function attributeStripCallback( &$text, $frame = false ) {
5702 $text = $this->replaceVariables( $text, $frame );
5703 $text = $this->mStripState->unstripBoth( $text );
5704 return $text;
5705 }
5706
5707 /**
5708 * Accessor
5709 *
5710 * @return array
5711 */
5712 public function getTags() {
5713 $this->firstCallInit();
5714 return array_merge(
5715 array_keys( $this->mTransparentTagHooks ),
5716 array_keys( $this->mTagHooks ),
5717 array_keys( $this->mFunctionTagHooks )
5718 );
5719 }
5720
5721 /**
5722 * @since 1.32
5723 * @return array
5724 */
5725 public function getFunctionSynonyms() {
5726 $this->firstCallInit();
5727 return $this->mFunctionSynonyms;
5728 }
5729
5730 /**
5731 * @since 1.32
5732 * @return string
5733 */
5734 public function getUrlProtocols() {
5735 return $this->mUrlProtocols;
5736 }
5737
5738 /**
5739 * Replace transparent tags in $text with the values given by the callbacks.
5740 *
5741 * Transparent tag hooks are like regular XML-style tag hooks, except they
5742 * operate late in the transformation sequence, on HTML instead of wikitext.
5743 *
5744 * @param string $text
5745 *
5746 * @return string
5747 */
5748 public function replaceTransparentTags( $text ) {
5749 $matches = [];
5750 $elements = array_keys( $this->mTransparentTagHooks );
5751 $text = self::extractTagsAndParams( $elements, $text, $matches );
5752 $replacements = [];
5753
5754 foreach ( $matches as $marker => $data ) {
5755 list( $element, $content, $params, $tag ) = $data;
5756 $tagName = strtolower( $element );
5757 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5758 $output = call_user_func_array(
5759 $this->mTransparentTagHooks[$tagName],
5760 [ $content, $params, $this ]
5761 );
5762 } else {
5763 $output = $tag;
5764 }
5765 $replacements[$marker] = $output;
5766 }
5767 return strtr( $text, $replacements );
5768 }
5769
5770 /**
5771 * Break wikitext input into sections, and either pull or replace
5772 * some particular section's text.
5773 *
5774 * External callers should use the getSection and replaceSection methods.
5775 *
5776 * @param string $text Page wikitext
5777 * @param string|int $sectionId A section identifier string of the form:
5778 * "<flag1> - <flag2> - ... - <section number>"
5779 *
5780 * Currently the only recognised flag is "T", which means the target section number
5781 * was derived during a template inclusion parse, in other words this is a template
5782 * section edit link. If no flags are given, it was an ordinary section edit link.
5783 * This flag is required to avoid a section numbering mismatch when a section is
5784 * enclosed by "<includeonly>" (T8563).
5785 *
5786 * The section number 0 pulls the text before the first heading; other numbers will
5787 * pull the given section along with its lower-level subsections. If the section is
5788 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5789 *
5790 * Section 0 is always considered to exist, even if it only contains the empty
5791 * string. If $text is the empty string and section 0 is replaced, $newText is
5792 * returned.
5793 *
5794 * @param string $mode One of "get" or "replace"
5795 * @param string $newText Replacement text for section data.
5796 * @return string For "get", the extracted section text.
5797 * for "replace", the whole page with the section replaced.
5798 */
5799 private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5800 global $wgTitle; # not generally used but removes an ugly failure mode
5801
5802 $magicScopeVariable = $this->lock();
5803 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5804 $outText = '';
5805 $frame = $this->getPreprocessor()->newFrame();
5806
5807 # Process section extraction flags
5808 $flags = 0;
5809 $sectionParts = explode( '-', $sectionId );
5810 $sectionIndex = array_pop( $sectionParts );
5811 foreach ( $sectionParts as $part ) {
5812 if ( $part === 'T' ) {
5813 $flags |= self::PTD_FOR_INCLUSION;
5814 }
5815 }
5816
5817 # Check for empty input
5818 if ( strval( $text ) === '' ) {
5819 # Only sections 0 and T-0 exist in an empty document
5820 if ( $sectionIndex == 0 ) {
5821 if ( $mode === 'get' ) {
5822 return '';
5823 }
5824
5825 return $newText;
5826 } else {
5827 if ( $mode === 'get' ) {
5828 return $newText;
5829 }
5830
5831 return $text;
5832 }
5833 }
5834
5835 # Preprocess the text
5836 $root = $this->preprocessToDom( $text, $flags );
5837
5838 # <h> nodes indicate section breaks
5839 # They can only occur at the top level, so we can find them by iterating the root's children
5840 $node = $root->getFirstChild();
5841
5842 # Find the target section
5843 if ( $sectionIndex == 0 ) {
5844 # Section zero doesn't nest, level=big
5845 $targetLevel = 1000;
5846 } else {
5847 while ( $node ) {
5848 if ( $node->getName() === 'h' ) {
5849 $bits = $node->splitHeading();
5850 if ( $bits['i'] == $sectionIndex ) {
5851 $targetLevel = $bits['level'];
5852 break;
5853 }
5854 }
5855 if ( $mode === 'replace' ) {
5856 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5857 }
5858 $node = $node->getNextSibling();
5859 }
5860 }
5861
5862 if ( !$node ) {
5863 # Not found
5864 if ( $mode === 'get' ) {
5865 return $newText;
5866 } else {
5867 return $text;
5868 }
5869 }
5870
5871 # Find the end of the section, including nested sections
5872 do {
5873 if ( $node->getName() === 'h' ) {
5874 $bits = $node->splitHeading();
5875 $curLevel = $bits['level'];
5876 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5877 break;
5878 }
5879 }
5880 if ( $mode === 'get' ) {
5881 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5882 }
5883 $node = $node->getNextSibling();
5884 } while ( $node );
5885
5886 # Write out the remainder (in replace mode only)
5887 if ( $mode === 'replace' ) {
5888 # Output the replacement text
5889 # Add two newlines on -- trailing whitespace in $newText is conventionally
5890 # stripped by the editor, so we need both newlines to restore the paragraph gap
5891 # Only add trailing whitespace if there is newText
5892 if ( $newText != "" ) {
5893 $outText .= $newText . "\n\n";
5894 }
5895
5896 while ( $node ) {
5897 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5898 $node = $node->getNextSibling();
5899 }
5900 }
5901
5902 if ( is_string( $outText ) ) {
5903 # Re-insert stripped tags
5904 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5905 }
5906
5907 return $outText;
5908 }
5909
5910 /**
5911 * This function returns the text of a section, specified by a number ($section).
5912 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5913 * the first section before any such heading (section 0).
5914 *
5915 * If a section contains subsections, these are also returned.
5916 *
5917 * @param string $text Text to look in
5918 * @param string|int $sectionId Section identifier as a number or string
5919 * (e.g. 0, 1 or 'T-1').
5920 * @param string $defaultText Default to return if section is not found
5921 *
5922 * @return string Text of the requested section
5923 */
5924 public function getSection( $text, $sectionId, $defaultText = '' ) {
5925 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5926 }
5927
5928 /**
5929 * This function returns $oldtext after the content of the section
5930 * specified by $section has been replaced with $text. If the target
5931 * section does not exist, $oldtext is returned unchanged.
5932 *
5933 * @param string $oldText Former text of the article
5934 * @param string|int $sectionId Section identifier as a number or string
5935 * (e.g. 0, 1 or 'T-1').
5936 * @param string $newText Replacing text
5937 *
5938 * @return string Modified text
5939 */
5940 public function replaceSection( $oldText, $sectionId, $newText ) {
5941 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5942 }
5943
5944 /**
5945 * Get the ID of the revision we are parsing
5946 *
5947 * The return value will be either:
5948 * - a) Positive, indicating a specific revision ID (current or old)
5949 * - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5950 * - c) Null, meaning the parse is for preview mode and there is no revision
5951 *
5952 * @return int|null
5953 */
5954 public function getRevisionId() {
5955 return $this->mRevisionId;
5956 }
5957
5958 /**
5959 * Get the revision object for $this->mRevisionId
5960 *
5961 * @return Revision|null Either a Revision object or null
5962 * @since 1.23 (public since 1.23)
5963 */
5964 public function getRevisionObject() {
5965 if ( $this->mRevisionObject ) {
5966 return $this->mRevisionObject;
5967 }
5968
5969 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5970 // This is useful when parsing a revision that has not yet been saved.
5971 // However, if we get back a saved revision even though we are in
5972 // preview mode, we'll have to ignore it, see below.
5973 // NOTE: This callback may be used to inject an OLD revision that was
5974 // already loaded, so "current" is a bit of a misnomer. We can't just
5975 // skip it if mRevisionId is set.
5976 $rev = call_user_func(
5977 $this->mOptions->getCurrentRevisionCallback(),
5978 $this->getTitle(),
5979 $this
5980 );
5981
5982 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5983 // We are in preview mode (mRevisionId is null), and the current revision callback
5984 // returned an existing revision. Ignore it and return null, it's probably the page's
5985 // current revision, which is not what we want here. Note that we do want to call the
5986 // callback to allow the unsaved revision to be injected here, e.g. for
5987 // self-transclusion previews.
5988 return null;
5989 }
5990
5991 // If the parse is for a new revision, then the callback should have
5992 // already been set to force the object and should match mRevisionId.
5993 // If not, try to fetch by mRevisionId for sanity.
5994 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5995 $rev = Revision::newFromId( $this->mRevisionId );
5996 }
5997
5998 $this->mRevisionObject = $rev;
5999
6000 return $this->mRevisionObject;
6001 }
6002
6003 /**
6004 * Get the timestamp associated with the current revision, adjusted for
6005 * the default server-local timestamp
6006 * @return string TS_MW timestamp
6007 */
6008 public function getRevisionTimestamp() {
6009 if ( $this->mRevisionTimestamp !== null ) {
6010 return $this->mRevisionTimestamp;
6011 }
6012
6013 # Use specified revision timestamp, falling back to the current timestamp
6014 $revObject = $this->getRevisionObject();
6015 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6016 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6017
6018 # The cryptic '' timezone parameter tells to use the site-default
6019 # timezone offset instead of the user settings.
6020 # Since this value will be saved into the parser cache, served
6021 # to other users, and potentially even used inside links and such,
6022 # it needs to be consistent for all visitors.
6023 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6024
6025 return $this->mRevisionTimestamp;
6026 }
6027
6028 /**
6029 * Get the name of the user that edited the last revision
6030 *
6031 * @return string User name
6032 */
6033 public function getRevisionUser() {
6034 if ( is_null( $this->mRevisionUser ) ) {
6035 $revObject = $this->getRevisionObject();
6036
6037 # if this template is subst: the revision id will be blank,
6038 # so just use the current user's name
6039 if ( $revObject ) {
6040 $this->mRevisionUser = $revObject->getUserText();
6041 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6042 $this->mRevisionUser = $this->getUser()->getName();
6043 }
6044 }
6045 return $this->mRevisionUser;
6046 }
6047
6048 /**
6049 * Get the size of the revision
6050 *
6051 * @return int|null Revision size
6052 */
6053 public function getRevisionSize() {
6054 if ( is_null( $this->mRevisionSize ) ) {
6055 $revObject = $this->getRevisionObject();
6056
6057 # if this variable is subst: the revision id will be blank,
6058 # so just use the parser input size, because the own substituation
6059 # will change the size.
6060 if ( $revObject ) {
6061 $this->mRevisionSize = $revObject->getSize();
6062 } else {
6063 $this->mRevisionSize = $this->mInputSize;
6064 }
6065 }
6066 return $this->mRevisionSize;
6067 }
6068
6069 /**
6070 * Mutator for $mDefaultSort
6071 *
6072 * @param string $sort New value
6073 */
6074 public function setDefaultSort( $sort ) {
6075 $this->mDefaultSort = $sort;
6076 $this->mOutput->setProperty( 'defaultsort', $sort );
6077 }
6078
6079 /**
6080 * Accessor for $mDefaultSort
6081 * Will use the empty string if none is set.
6082 *
6083 * This value is treated as a prefix, so the
6084 * empty string is equivalent to sorting by
6085 * page name.
6086 *
6087 * @return string
6088 */
6089 public function getDefaultSort() {
6090 if ( $this->mDefaultSort !== false ) {
6091 return $this->mDefaultSort;
6092 } else {
6093 return '';
6094 }
6095 }
6096
6097 /**
6098 * Accessor for $mDefaultSort
6099 * Unlike getDefaultSort(), will return false if none is set
6100 *
6101 * @return string|bool
6102 */
6103 public function getCustomDefaultSort() {
6104 return $this->mDefaultSort;
6105 }
6106
6107 private static function getSectionNameFromStrippedText( $text ) {
6108 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6109 $text = Sanitizer::decodeCharReferences( $text );
6110 $text = self::normalizeSectionName( $text );
6111 return $text;
6112 }
6113
6114 private static function makeAnchor( $sectionName ) {
6115 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6116 }
6117
6118 private function makeLegacyAnchor( $sectionName ) {
6119 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6120 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6121 // ForAttribute() and ForLink() are the same for legacy encoding
6122 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6123 } else {
6124 $id = Sanitizer::escapeIdForLink( $sectionName );
6125 }
6126
6127 return "#$id";
6128 }
6129
6130 /**
6131 * Try to guess the section anchor name based on a wikitext fragment
6132 * presumably extracted from a heading, for example "Header" from
6133 * "== Header ==".
6134 *
6135 * @param string $text
6136 * @return string Anchor (starting with '#')
6137 */
6138 public function guessSectionNameFromWikiText( $text ) {
6139 # Strip out wikitext links(they break the anchor)
6140 $text = $this->stripSectionName( $text );
6141 $sectionName = self::getSectionNameFromStrippedText( $text );
6142 return self::makeAnchor( $sectionName );
6143 }
6144
6145 /**
6146 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6147 * instead, if possible. For use in redirects, since various versions
6148 * of Microsoft browsers interpret Location: headers as something other
6149 * than UTF-8, resulting in breakage.
6150 *
6151 * @param string $text The section name
6152 * @return string Anchor (starting with '#')
6153 */
6154 public function guessLegacySectionNameFromWikiText( $text ) {
6155 # Strip out wikitext links(they break the anchor)
6156 $text = $this->stripSectionName( $text );
6157 $sectionName = self::getSectionNameFromStrippedText( $text );
6158 return $this->makeLegacyAnchor( $sectionName );
6159 }
6160
6161 /**
6162 * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6163 * @param string $text Section name (plain text)
6164 * @return string Anchor (starting with '#')
6165 */
6166 public static function guessSectionNameFromStrippedText( $text ) {
6167 $sectionName = self::getSectionNameFromStrippedText( $text );
6168 return self::makeAnchor( $sectionName );
6169 }
6170
6171 /**
6172 * Apply the same normalization as code making links to this section would
6173 *
6174 * @param string $text
6175 * @return string
6176 */
6177 private static function normalizeSectionName( $text ) {
6178 # T90902: ensure the same normalization is applied for IDs as to links
6179 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6180 try {
6181
6182 $parts = $titleParser->splitTitleString( "#$text" );
6183 } catch ( MalformedTitleException $ex ) {
6184 return $text;
6185 }
6186 return $parts['fragment'];
6187 }
6188
6189 /**
6190 * Strips a text string of wikitext for use in a section anchor
6191 *
6192 * Accepts a text string and then removes all wikitext from the
6193 * string and leaves only the resultant text (i.e. the result of
6194 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6195 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6196 * to create valid section anchors by mimicing the output of the
6197 * parser when headings are parsed.
6198 *
6199 * @param string $text Text string to be stripped of wikitext
6200 * for use in a Section anchor
6201 * @return string Filtered text string
6202 */
6203 public function stripSectionName( $text ) {
6204 # Strip internal link markup
6205 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6206 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6207
6208 # Strip external link markup
6209 # @todo FIXME: Not tolerant to blank link text
6210 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6211 # on how many empty links there are on the page - need to figure that out.
6212 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6213
6214 # Parse wikitext quotes (italics & bold)
6215 $text = $this->doQuotes( $text );
6216
6217 # Strip HTML tags
6218 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6219 return $text;
6220 }
6221
6222 /**
6223 * strip/replaceVariables/unstrip for preprocessor regression testing
6224 *
6225 * @param string $text
6226 * @param Title $title
6227 * @param ParserOptions $options
6228 * @param int $outputType
6229 *
6230 * @return string
6231 */
6232 public function testSrvus( $text, Title $title, ParserOptions $options,
6233 $outputType = self::OT_HTML
6234 ) {
6235 $magicScopeVariable = $this->lock();
6236 $this->startParse( $title, $options, $outputType, true );
6237
6238 $text = $this->replaceVariables( $text );
6239 $text = $this->mStripState->unstripBoth( $text );
6240 $text = Sanitizer::removeHTMLtags( $text );
6241 return $text;
6242 }
6243
6244 /**
6245 * @param string $text
6246 * @param Title $title
6247 * @param ParserOptions $options
6248 * @return string
6249 */
6250 public function testPst( $text, Title $title, ParserOptions $options ) {
6251 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6252 }
6253
6254 /**
6255 * @param string $text
6256 * @param Title $title
6257 * @param ParserOptions $options
6258 * @return string
6259 */
6260 public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6261 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6262 }
6263
6264 /**
6265 * Call a callback function on all regions of the given text that are not
6266 * inside strip markers, and replace those regions with the return value
6267 * of the callback. For example, with input:
6268 *
6269 * aaa<MARKER>bbb
6270 *
6271 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6272 * two strings will be replaced with the value returned by the callback in
6273 * each case.
6274 *
6275 * @param string $s
6276 * @param callable $callback
6277 *
6278 * @return string
6279 */
6280 public function markerSkipCallback( $s, $callback ) {
6281 $i = 0;
6282 $out = '';
6283 while ( $i < strlen( $s ) ) {
6284 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6285 if ( $markerStart === false ) {
6286 $out .= call_user_func( $callback, substr( $s, $i ) );
6287 break;
6288 } else {
6289 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6290 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6291 if ( $markerEnd === false ) {
6292 $out .= substr( $s, $markerStart );
6293 break;
6294 } else {
6295 $markerEnd += strlen( self::MARKER_SUFFIX );
6296 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6297 $i = $markerEnd;
6298 }
6299 }
6300 }
6301 return $out;
6302 }
6303
6304 /**
6305 * Remove any strip markers found in the given text.
6306 *
6307 * @param string $text
6308 * @return string
6309 */
6310 public function killMarkers( $text ) {
6311 return $this->mStripState->killMarkers( $text );
6312 }
6313
6314 /**
6315 * Save the parser state required to convert the given half-parsed text to
6316 * HTML. "Half-parsed" in this context means the output of
6317 * recursiveTagParse() or internalParse(). This output has strip markers
6318 * from replaceVariables (extensionSubstitution() etc.), and link
6319 * placeholders from replaceLinkHolders().
6320 *
6321 * Returns an array which can be serialized and stored persistently. This
6322 * array can later be loaded into another parser instance with
6323 * unserializeHalfParsedText(). The text can then be safely incorporated into
6324 * the return value of a parser hook.
6325 *
6326 * @deprecated since 1.31
6327 * @param string $text
6328 *
6329 * @return array
6330 */
6331 public function serializeHalfParsedText( $text ) {
6332 wfDeprecated( __METHOD__, '1.31' );
6333 $data = [
6334 'text' => $text,
6335 'version' => self::HALF_PARSED_VERSION,
6336 'stripState' => $this->mStripState->getSubState( $text ),
6337 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6338 ];
6339 return $data;
6340 }
6341
6342 /**
6343 * Load the parser state given in the $data array, which is assumed to
6344 * have been generated by serializeHalfParsedText(). The text contents is
6345 * extracted from the array, and its markers are transformed into markers
6346 * appropriate for the current Parser instance. This transformed text is
6347 * returned, and can be safely included in the return value of a parser
6348 * hook.
6349 *
6350 * If the $data array has been stored persistently, the caller should first
6351 * check whether it is still valid, by calling isValidHalfParsedText().
6352 *
6353 * @deprecated since 1.31
6354 * @param array $data Serialized data
6355 * @throws MWException
6356 * @return string
6357 */
6358 public function unserializeHalfParsedText( $data ) {
6359 wfDeprecated( __METHOD__, '1.31' );
6360 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6361 throw new MWException( __METHOD__ . ': invalid version' );
6362 }
6363
6364 # First, extract the strip state.
6365 $texts = [ $data['text'] ];
6366 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6367
6368 # Now renumber links
6369 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6370
6371 # Should be good to go.
6372 return $texts[0];
6373 }
6374
6375 /**
6376 * Returns true if the given array, presumed to be generated by
6377 * serializeHalfParsedText(), is compatible with the current version of the
6378 * parser.
6379 *
6380 * @deprecated since 1.31
6381 * @param array $data
6382 *
6383 * @return bool
6384 */
6385 public function isValidHalfParsedText( $data ) {
6386 wfDeprecated( __METHOD__, '1.31' );
6387 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6388 }
6389
6390 /**
6391 * Parsed a width param of imagelink like 300px or 200x300px
6392 *
6393 * @param string $value
6394 * @param bool $parseHeight
6395 *
6396 * @return array
6397 * @since 1.20
6398 */
6399 public static function parseWidthParam( $value, $parseHeight = true ) {
6400 $parsedWidthParam = [];
6401 if ( $value === '' ) {
6402 return $parsedWidthParam;
6403 }
6404 $m = [];
6405 # (T15500) In both cases (width/height and width only),
6406 # permit trailing "px" for backward compatibility.
6407 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6408 $width = intval( $m[1] );
6409 $height = intval( $m[2] );
6410 $parsedWidthParam['width'] = $width;
6411 $parsedWidthParam['height'] = $height;
6412 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6413 $width = intval( $value );
6414 $parsedWidthParam['width'] = $width;
6415 }
6416 return $parsedWidthParam;
6417 }
6418
6419 /**
6420 * Lock the current instance of the parser.
6421 *
6422 * This is meant to stop someone from calling the parser
6423 * recursively and messing up all the strip state.
6424 *
6425 * @throws MWException If parser is in a parse
6426 * @return ScopedCallback The lock will be released once the return value goes out of scope.
6427 */
6428 protected function lock() {
6429 if ( $this->mInParse ) {
6430 throw new MWException( "Parser state cleared while parsing. "
6431 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6432 }
6433
6434 // Save the backtrace when locking, so that if some code tries locking again,
6435 // we can print the lock owner's backtrace for easier debugging
6436 $e = new Exception;
6437 $this->mInParse = $e->getTraceAsString();
6438
6439 $recursiveCheck = new ScopedCallback( function () {
6440 $this->mInParse = false;
6441 } );
6442
6443 return $recursiveCheck;
6444 }
6445
6446 /**
6447 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6448 *
6449 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6450 * or if there is more than one <p/> tag in the input HTML.
6451 *
6452 * @param string $html
6453 * @return string
6454 * @since 1.24
6455 */
6456 public static function stripOuterParagraph( $html ) {
6457 $m = [];
6458 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6459 $html = $m[1];
6460 }
6461
6462 return $html;
6463 }
6464
6465 /**
6466 * Return this parser if it is not doing anything, otherwise
6467 * get a fresh parser. You can use this method by doing
6468 * $newParser = $oldParser->getFreshParser(), or more simply
6469 * $oldParser->getFreshParser()->parse( ... );
6470 * if you're unsure if $oldParser is safe to use.
6471 *
6472 * @since 1.24
6473 * @return Parser A parser object that is not parsing anything
6474 */
6475 public function getFreshParser() {
6476 if ( $this->mInParse ) {
6477 return $this->factory->create();
6478 } else {
6479 return $this;
6480 }
6481 }
6482
6483 /**
6484 * Set's up the PHP implementation of OOUI for use in this request
6485 * and instructs OutputPage to enable OOUI for itself.
6486 *
6487 * @since 1.26
6488 */
6489 public function enableOOUI() {
6490 OutputPage::setupOOUI();
6491 $this->mOutput->setEnableOOUI( true );
6492 }
6493
6494 /**
6495 * @param string $flag
6496 * @param string $reason
6497 */
6498 protected function setOutputFlag( $flag, $reason ) {
6499 $this->mOutput->setFlag( $flag );
6500 $name = $this->mTitle->getPrefixedText();
6501 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6502 }
6503 }