includes/parser/Parser.php

   1 <?php
   2 /**
   3  * PHP parser that converts wiki markup to HTML.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Parser
  22  */
  23 use MediaWiki\BadFileLookup;
  24 use MediaWiki\Config\ServiceOptions;
  25 use MediaWiki\Linker\LinkRenderer;
  26 use MediaWiki\Linker\LinkRendererFactory;
  27 use MediaWiki\Linker\LinkTarget;
  28 use MediaWiki\MediaWikiServices;
  29 use MediaWiki\Special\SpecialPageFactory;
  30 use Psr\Log\NullLogger;
  31 use Wikimedia\ScopedCallback;
  32 use Psr\Log\LoggerInterface;
  33
  34 /**
  35  * @defgroup Parser Parser
  36  */
  37
  38 /**
  39  * PHP Parser - Processes wiki markup (which uses a more user-friendly
  40  * syntax, such as "[[link]]" for making links), and provides a one-way
  41  * transformation of that wiki markup it into (X)HTML output / markup
  42  * (which in turn the browser understands, and can display).
  43  *
  44  * There are seven main entry points into the Parser class:
  45  *
  46  * - Parser::parse()
  47  *     produces HTML output
  48  * - Parser::preSaveTransform()
  49  *     produces altered wiki markup
  50  * - Parser::preprocess()
  51  *     removes HTML comments and expands templates
  52  * - Parser::cleanSig() and Parser::cleanSigInSig()
  53  *     cleans a signature before saving it to preferences
  54  * - Parser::getSection()
  55  *     return the content of a section from an article for section editing
  56  * - Parser::replaceSection()
  57  *     replaces a section by number inside an article
  58  * - Parser::getPreloadText()
  59  *     removes <noinclude> sections and <includeonly> tags
  60  *
  61  * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
  62  *
  63  * @par Settings:
  64  * $wgNamespacesWithSubpages
  65  *
  66  * @par Settings only within ParserOptions:
  67  * $wgAllowExternalImages
  68  * $wgAllowSpecialInclusion
  69  * $wgInterwikiMagic
  70  * $wgMaxArticleSize
  71  *
  72  * @ingroup Parser
  73  */
  74 class Parser {
  75         /**
  76          * Update this version number when the ParserOutput format
  77          * changes in an incompatible way, so the parser cache
  78          * can automatically discard old data.
  79          */
  80         const VERSION = '1.6.4';
  81
  82         /**
  83          * Update this version number when the output of serialiseHalfParsedText()
  84          * changes in an incompatible way
  85          */
  86         const HALF_PARSED_VERSION = 2;
  87
  88         # Flags for Parser::setFunctionHook
  89         const SFH_NO_HASH = 1;
  90         const SFH_OBJECT_ARGS = 2;
  91
  92         # Constants needed for external link processing
  93         # Everything except bracket, space, or control characters
  94         # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
  95         # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
  96         # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
  97         # uses to replace invalid HTML characters.
  98         const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
  99         # Simplified expression to match an IPv4 or IPv6 address, or
 100         # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
 101         const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
 102         # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
 103         // phpcs:ignore Generic.Files.LineLength
 104         const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
 105                 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
 106
 107         # Regular expression for a non-newline space
 108         const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
 109
 110         # Flags for preprocessToDom
 111         const PTD_FOR_INCLUSION = 1;
 112
 113         # Allowed values for $this->mOutputType
 114         # Parameter to startExternalParse().
 115         const OT_HTML = 1; # like parse()
 116         const OT_WIKI = 2; # like preSaveTransform()
 117         const OT_PREPROCESS = 3; # like preprocess()
 118         const OT_MSG = 3;
 119         const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
 120
 121         /**
 122          * @var string Prefix and suffix for temporary replacement strings
 123          * for the multipass parser.
 124          *
 125          * \x7f should never appear in input as it's disallowed in XML.
 126          * Using it at the front also gives us a little extra robustness
 127          * since it shouldn't match when butted up against identifier-like
 128          * string constructs.
 129          *
 130          * Must not consist of all title characters, or else it will change
 131          * the behavior of <nowiki> in a link.
 132          *
 133          * Must have a character that needs escaping in attributes, otherwise
 134          * someone could put a strip marker in an attribute, to get around
 135          * escaping quote marks, and break out of the attribute. Thus we add
 136          * `'".
 137          */
 138         const MARKER_SUFFIX = "-QINU`\"'\x7f";
 139         const MARKER_PREFIX = "\x7f'\"`UNIQ-";
 140
 141         # Markers used for wrapping the table of contents
 142         const TOC_START = '<mw:toc>';
 143         const TOC_END = '</mw:toc>';
 144
 145         /** @var int Assume that no output will later be saved this many seconds after parsing */
 146         const MAX_TTS = 900;
 147
 148         # Persistent:
 149         public $mTagHooks = [];
 150         public $mTransparentTagHooks = [];
 151         public $mFunctionHooks = [];
 152         public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
 153         public $mFunctionTagHooks = [];
 154         public $mStripList = [];
 155         public $mDefaultStripList = [];
 156         public $mVarCache = [];
 157         public $mImageParams = [];
 158         public $mImageParamsMagicArray = [];
 159         public $mMarkerIndex = 0;
 160         /**
 161          * @var bool Whether firstCallInit still needs to be called
 162          */
 163         public $mFirstCall = true;
 164
 165         # Initialised by initialiseVariables()
 166
 167         /**
 168          * @var MagicWordArray
 169          */
 170         public $mVariables;
 171
 172         /**
 173          * @var MagicWordArray
 174          */
 175         public $mSubstWords;
 176
 177         /**
 178          * @deprecated since 1.34, there should be no need to use this
 179          * @var array
 180          */
 181         public $mConf;
 182
 183         # Initialised in constructor
 184         public $mExtLinkBracketedRegex, $mUrlProtocols;
 185
 186         # Initialized in getPreprocessor()
 187         /** @var Preprocessor */
 188         public $mPreprocessor;
 189
 190         # Cleared with clearState():
 191         /**
 192          * @var ParserOutput
 193          */
 194         public $mOutput;
 195         public $mAutonumber;
 196
 197         /**
 198          * @var StripState
 199          */
 200         public $mStripState;
 201
 202         public $mIncludeCount;
 203         /**
 204          * @var LinkHolderArray
 205          */
 206         public $mLinkHolders;
 207
 208         public $mLinkID;
 209         public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
 210         public $mDefaultSort;
 211         public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
 212         public $mExpensiveFunctionCount; # number of expensive parser function calls
 213         public $mShowToc, $mForceTocPosition;
 214         /** @var array */
 215         public $mTplDomCache;
 216
 217         /**
 218          * @var User
 219          */
 220         public $mUser; # User object; only used when doing pre-save transform
 221
 222         # Temporary
 223         # These are variables reset at least once per parse regardless of $clearState
 224
 225         /**
 226          * @var ParserOptions
 227          */
 228         public $mOptions;
 229
 230         /**
 231          * Since 1.34, leaving `mTitle` uninitialized or setting `mTitle` to
 232          * `null` is deprecated.
 233          *
 234          * @internal
 235          * @var Title|null
 236          */
 237         public $mTitle;        # Title context, used for self-link rendering and similar things
 238         public $mOutputType;   # Output type, one of the OT_xxx constants
 239         public $ot;            # Shortcut alias, see setOutputType()
 240         public $mRevisionObject; # The revision object of the specified revision ID
 241         public $mRevisionId;   # ID to display in {{REVISIONID}} tags
 242         public $mRevisionTimestamp; # The timestamp of the specified revision ID
 243         public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
 244         public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
 245         public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
 246         public $mInputSize = false; # For {{PAGESIZE}} on current page.
 247
 248         /**
 249          * @var array Array with the language name of each language link (i.e. the
 250          * interwiki prefix) in the key, value arbitrary. Used to avoid sending
 251          * duplicate language links to the ParserOutput.
 252          */
 253         public $mLangLinkLanguages;
 254
 255         /**
 256          * @var MapCacheLRU|null
 257          * @since 1.24
 258          *
 259          * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
 260          */
 261         public $currentRevisionCache;
 262
 263         /**
 264          * @var bool|string Recursive call protection.
 265          * This variable should be treated as if it were private.
 266          */
 267         public $mInParse = false;
 268
 269         /** @var SectionProfiler */
 270         protected $mProfiler;
 271
 272         /**
 273          * @var LinkRenderer
 274          */
 275         protected $mLinkRenderer;
 276
 277         /** @var MagicWordFactory */
 278         private $magicWordFactory;
 279
 280         /** @var Language */
 281         private $contLang;
 282
 283         /** @var ParserFactory */
 284         private $factory;
 285
 286         /** @var SpecialPageFactory */
 287         private $specialPageFactory;
 288
 289         /**
 290          * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
 291          * $mOptions, which is public and widely used, and also with the local variable $options used
 292          * for ParserOptions throughout this file.
 293          *
 294          * @var ServiceOptions
 295          */
 296         private $svcOptions;
 297
 298         /** @var LinkRendererFactory */
 299         private $linkRendererFactory;
 300
 301         /** @var NamespaceInfo */
 302         private $nsInfo;
 303
 304         /** @var LoggerInterface */
 305         private $logger;
 306
 307         /** @var BadFileLookup */
 308         private $badFileLookup;
 309
 310         /**
 311          * TODO Make this a const when HHVM support is dropped (T192166)
 312          *
 313          * @var array
 314          * @since 1.33
 315          */
 316         public static $constructorOptions = [
 317                 // See $wgParserConf documentation
 318                 'class',
 319                 'preprocessorClass',
 320                 // See documentation for the corresponding config options
 321                 'ArticlePath',
 322                 'EnableScaryTranscluding',
 323                 'ExtraInterlanguageLinkPrefixes',
 324                 'FragmentMode',
 325                 'LanguageCode',
 326                 'MaxSigChars',
 327                 'MaxTocLevel',
 328                 'MiserMode',
 329                 'ScriptPath',
 330                 'Server',
 331                 'ServerName',
 332                 'ShowHostnames',
 333                 'Sitename',
 334                 'StylePath',
 335                 'TranscludeCacheExpiry',
 336         ];
 337
 338         /**
 339          * Constructing parsers directly is deprecated! Use a ParserFactory.
 340          *
 341          * @param ServiceOptions|null $svcOptions
 342          * @param MagicWordFactory|null $magicWordFactory
 343          * @param Language|null $contLang Content language
 344          * @param ParserFactory|null $factory
 345          * @param string|null $urlProtocols As returned from wfUrlProtocols()
 346          * @param SpecialPageFactory|null $spFactory
 347          * @param LinkRendererFactory|null $linkRendererFactory
 348          * @param NamespaceInfo|null $nsInfo
 349          * @param LoggerInterface|null $logger
 350          * @param BadFileLookup|null $badFileLookup
 351          */
 352         public function __construct(
 353                 $svcOptions = null,
 354                 MagicWordFactory $magicWordFactory = null,
 355                 Language $contLang = null,
 356                 ParserFactory $factory = null,
 357                 $urlProtocols = null,
 358                 SpecialPageFactory $spFactory = null,
 359                 $linkRendererFactory = null,
 360                 $nsInfo = null,
 361                 $logger = null,
 362                 BadFileLookup $badFileLookup = null
 363         ) {
 364                 if ( !$svcOptions || is_array( $svcOptions ) ) {
 365                         // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
 366                         // Config, and the eighth is LinkRendererFactory.
 367                         $this->mConf = (array)$svcOptions;
 368                         if ( empty( $this->mConf['class'] ) ) {
 369                                 $this->mConf['class'] = self::class;
 370                         }
 371                         if ( empty( $this->mConf['preprocessorClass'] ) ) {
 372                                 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
 373                         }
 374                         $this->svcOptions = new ServiceOptions( self::$constructorOptions,
 375                                 $this->mConf, func_num_args() > 6
 376                                         ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
 377                         );
 378                         $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
 379                         $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
 380                 } else {
 381                         // New calling convention
 382                         $svcOptions->assertRequiredOptions( self::$constructorOptions );
 383                         // $this->mConf is public, so we'll keep those two options there as well for
 384                         // compatibility until it's removed
 385                         $this->mConf = [
 386                                 'class' => $svcOptions->get( 'class' ),
 387                                 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
 388                         ];
 389                         $this->svcOptions = $svcOptions;
 390                 }
 391
 392                 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
 393                 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
 394                         self::EXT_LINK_ADDR .
 395                         self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
 396
 397                 $this->magicWordFactory = $magicWordFactory ??
 398                         MediaWikiServices::getInstance()->getMagicWordFactory();
 399
 400                 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
 401
 402                 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
 403                 $this->specialPageFactory = $spFactory ??
 404                         MediaWikiServices::getInstance()->getSpecialPageFactory();
 405                 $this->linkRendererFactory = $linkRendererFactory ??
 406                         MediaWikiServices::getInstance()->getLinkRendererFactory();
 407                 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
 408                 $this->logger = $logger ?: new NullLogger();
 409                 $this->badFileLookup = $badFileLookup ??
 410                         MediaWikiServices::getInstance()->getBadFileLookup();
 411         }
 412
 413         /**
 414          * Reduce memory usage to reduce the impact of circular references
 415          */
 416         public function __destruct() {
 417                 if ( isset( $this->mLinkHolders ) ) {
 418                         // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
 419                         unset( $this->mLinkHolders );
 420                 }
 421                 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
 422                 foreach ( $this as $name => $value ) {
 423                         unset( $this->$name );
 424                 }
 425         }
 426
 427         /**
 428          * Allow extensions to clean up when the parser is cloned
 429          */
 430         public function __clone() {
 431                 $this->mInParse = false;
 432
 433                 // T58226: When you create a reference "to" an object field, that
 434                 // makes the object field itself be a reference too (until the other
 435                 // reference goes out of scope). When cloning, any field that's a
 436                 // reference is copied as a reference in the new object. Both of these
 437                 // are defined PHP5 behaviors, as inconvenient as it is for us when old
 438                 // hooks from PHP4 days are passing fields by reference.
 439                 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
 440                         // Make a non-reference copy of the field, then rebind the field to
 441                         // reference the new copy.
 442                         $tmp = $this->$k;
 443                         $this->$k =& $tmp;
 444                         unset( $tmp );
 445                 }
 446
 447                 Hooks::run( 'ParserCloned', [ $this ] );
 448         }
 449
 450         /**
 451          * Which class should we use for the preprocessor if not otherwise specified?
 452          *
 453          * @since 1.34
 454          * @deprecated since 1.34, removing configurability of preprocessor
 455          * @return string
 456          */
 457         public static function getDefaultPreprocessorClass() {
 458                 return Preprocessor_Hash::class;
 459         }
 460
 461         /**
 462          * Do various kinds of initialisation on the first call of the parser
 463          */
 464         public function firstCallInit() {
 465                 if ( !$this->mFirstCall ) {
 466                         return;
 467                 }
 468                 $this->mFirstCall = false;
 469
 470                 CoreParserFunctions::register( $this );
 471                 CoreTagHooks::register( $this );
 472                 $this->initialiseVariables();
 473
 474                 // Avoid PHP 7.1 warning from passing $this by reference
 475                 $parser = $this;
 476                 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
 477         }
 478
 479         /**
 480          * Clear Parser state
 481          *
 482          * @private
 483          */
 484         public function clearState() {
 485                 $this->firstCallInit();
 486                 $this->resetOutput();
 487                 $this->mAutonumber = 0;
 488                 $this->mIncludeCount = [];
 489                 $this->mLinkHolders = new LinkHolderArray( $this );
 490                 $this->mLinkID = 0;
 491                 $this->mRevisionObject = $this->mRevisionTimestamp =
 492                         $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
 493                 $this->mVarCache = [];
 494                 $this->mUser = null;
 495                 $this->mLangLinkLanguages = [];
 496                 $this->currentRevisionCache = null;
 497
 498                 $this->mStripState = new StripState( $this );
 499
 500                 # Clear these on every parse, T6549
 501                 $this->mTplRedirCache = $this->mTplDomCache = [];
 502
 503                 $this->mShowToc = true;
 504                 $this->mForceTocPosition = false;
 505                 $this->mIncludeSizes = [
 506                         'post-expand' => 0,
 507                         'arg' => 0,
 508                 ];
 509                 $this->mPPNodeCount = 0;
 510                 $this->mGeneratedPPNodeCount = 0;
 511                 $this->mHighestExpansionDepth = 0;
 512                 $this->mDefaultSort = false;
 513                 $this->mHeadings = [];
 514                 $this->mDoubleUnderscores = [];
 515                 $this->mExpensiveFunctionCount = 0;
 516
 517                 # Fix cloning
 518                 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
 519                         $this->mPreprocessor = null;
 520                 }
 521
 522                 $this->mProfiler = new SectionProfiler();
 523
 524                 // Avoid PHP 7.1 warning from passing $this by reference
 525                 $parser = $this;
 526                 Hooks::run( 'ParserClearState', [ &$parser ] );
 527         }
 528
 529         /**
 530          * Reset the ParserOutput
 531          */
 532         public function resetOutput() {
 533                 $this->mOutput = new ParserOutput;
 534                 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
 535         }
 536
 537         /**
 538          * Convert wikitext to HTML
 539          * Do not call this function recursively.
 540          *
 541          * @param string $text Text we want to parse
 542          * @param-taint $text escapes_htmlnoent
 543          * @param Title $title
 544          * @param ParserOptions $options
 545          * @param bool $linestart
 546          * @param bool $clearState
 547          * @param int|null $revid ID of the revision being rendered. This is used to render
 548          *  REVISION* magic words. 0 means that any current revision will be used. Null means
 549          *  that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
 550          *  use the current timestamp.
 551          * @return ParserOutput A ParserOutput
 552          * @return-taint escaped
 553          */
 554         public function parse(
 555                 $text, Title $title, ParserOptions $options,
 556                 $linestart = true, $clearState = true, $revid = null
 557         ) {
 558                 if ( $clearState ) {
 559                         // We use U+007F DELETE to construct strip markers, so we have to make
 560                         // sure that this character does not occur in the input text.
 561                         $text = strtr( $text, "\x7f", "?" );
 562                         $magicScopeVariable = $this->lock();
 563                 }
 564                 // Strip U+0000 NULL (T159174)
 565                 $text = str_replace( "\000", '', $text );
 566
 567                 $this->startParse( $title, $options, self::OT_HTML, $clearState );
 568
 569                 $this->currentRevisionCache = null;
 570                 $this->mInputSize = strlen( $text );
 571                 if ( $this->mOptions->getEnableLimitReport() ) {
 572                         $this->mOutput->resetParseStartTime();
 573                 }
 574
 575                 $oldRevisionId = $this->mRevisionId;
 576                 $oldRevisionObject = $this->mRevisionObject;
 577                 $oldRevisionTimestamp = $this->mRevisionTimestamp;
 578                 $oldRevisionUser = $this->mRevisionUser;
 579                 $oldRevisionSize = $this->mRevisionSize;
 580                 if ( $revid !== null ) {
 581                         $this->mRevisionId = $revid;
 582                         $this->mRevisionObject = null;
 583                         $this->mRevisionTimestamp = null;
 584                         $this->mRevisionUser = null;
 585                         $this->mRevisionSize = null;
 586                 }
 587
 588                 // Avoid PHP 7.1 warning from passing $this by reference
 589                 $parser = $this;
 590                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 591                 # No more strip!
 592                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 593                 $text = $this->internalParse( $text );
 594                 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
 595
 596                 $text = $this->internalParseHalfParsed( $text, true, $linestart );
 597
 598                 /**
 599                  * A converted title will be provided in the output object if title and
 600                  * content conversion are enabled, the article text does not contain
 601                  * a conversion-suppressing double-underscore tag, and no
 602                  * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
 603                  * automatic link conversion.
 604                  */
 605                 if ( !( $options->getDisableTitleConversion()
 606                         || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 607                         || isset( $this->mDoubleUnderscores['notitleconvert'] )
 608                         || $this->mOutput->getDisplayTitle() !== false )
 609                 ) {
 610                         $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
 611                         if ( $convruletitle ) {
 612                                 $this->mOutput->setTitleText( $convruletitle );
 613                         } else {
 614                                 $titleText = $this->getTargetLanguage()->convertTitle( $title );
 615                                 $this->mOutput->setTitleText( $titleText );
 616                         }
 617                 }
 618
 619                 # Compute runtime adaptive expiry if set
 620                 $this->mOutput->finalizeAdaptiveCacheExpiry();
 621
 622                 # Warn if too many heavyweight parser functions were used
 623                 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
 624                         $this->limitationWarn( 'expensive-parserfunction',
 625                                 $this->mExpensiveFunctionCount,
 626                                 $this->mOptions->getExpensiveParserFunctionLimit()
 627                         );
 628                 }
 629
 630                 # Information on limits, for the benefit of users who try to skirt them
 631                 if ( $this->mOptions->getEnableLimitReport() ) {
 632                         $text .= $this->makeLimitReport();
 633                 }
 634
 635                 # Wrap non-interface parser output in a <div> so it can be targeted
 636                 # with CSS (T37247)
 637                 $class = $this->mOptions->getWrapOutputClass();
 638                 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
 639                         $this->mOutput->addWrapperDivClass( $class );
 640                 }
 641
 642                 $this->mOutput->setText( $text );
 643
 644                 $this->mRevisionId = $oldRevisionId;
 645                 $this->mRevisionObject = $oldRevisionObject;
 646                 $this->mRevisionTimestamp = $oldRevisionTimestamp;
 647                 $this->mRevisionUser = $oldRevisionUser;
 648                 $this->mRevisionSize = $oldRevisionSize;
 649                 $this->mInputSize = false;
 650                 $this->currentRevisionCache = null;
 651
 652                 return $this->mOutput;
 653         }
 654
 655         /**
 656          * Set the limit report data in the current ParserOutput, and return the
 657          * limit report HTML comment.
 658          *
 659          * @return string
 660          */
 661         protected function makeLimitReport() {
 662                 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
 663
 664                 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
 665                 if ( $cpuTime !== null ) {
 666                         $this->mOutput->setLimitReportData( 'limitreport-cputime',
 667                                 sprintf( "%.3f", $cpuTime )
 668                         );
 669                 }
 670
 671                 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
 672                 $this->mOutput->setLimitReportData( 'limitreport-walltime',
 673                         sprintf( "%.3f", $wallTime )
 674                 );
 675
 676                 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
 677                         [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
 678                 );
 679                 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
 680                         [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
 681                 );
 682                 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
 683                         [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
 684                 );
 685                 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
 686                         [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
 687                 );
 688                 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
 689                         [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
 690                 );
 691                 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
 692                         [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
 693                 );
 694
 695                 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
 696                         $this->mOutput->setLimitReportData( $key, $value );
 697                 }
 698
 699                 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
 700
 701                 $limitReport = "NewPP limit report\n";
 702                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 703                         $limitReport .= 'Parsed by ' . wfHostname() . "\n";
 704                 }
 705                 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
 706                 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
 707                 $limitReport .= 'Dynamic content: ' .
 708                         ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
 709                         "\n";
 710                 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
 711
 712                 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
 713                         if ( Hooks::run( 'ParserLimitReportFormat',
 714                                 [ $key, &$value, &$limitReport, false, false ]
 715                         ) ) {
 716                                 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
 717                                 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
 718                                         ->inLanguage( 'en' )->useDatabase( false );
 719                                 if ( !$valueMsg->exists() ) {
 720                                         $valueMsg = new RawMessage( '$1' );
 721                                 }
 722                                 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
 723                                         $valueMsg->params( $value );
 724                                         $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
 725                                 }
 726                         }
 727                 }
 728                 // Since we're not really outputting HTML, decode the entities and
 729                 // then re-encode the things that need hiding inside HTML comments.
 730                 $limitReport = htmlspecialchars_decode( $limitReport );
 731
 732                 // Sanitize for comment. Note '‐' in the replacement is U+2010,
 733                 // which looks much like the problematic '-'.
 734                 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
 735                 $text = "\n<!-- \n$limitReport-->\n";
 736
 737                 // Add on template profiling data in human/machine readable way
 738                 $dataByFunc = $this->mProfiler->getFunctionStats();
 739                 uasort( $dataByFunc, function ( $a, $b ) {
 740                         return $b['real'] <=> $a['real']; // descending order
 741                 } );
 742                 $profileReport = [];
 743                 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
 744                         $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
 745                                 $item['%real'], $item['real'], $item['calls'],
 746                                 htmlspecialchars( $item['name'] ) );
 747                 }
 748                 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
 749                 $text .= implode( "\n", $profileReport ) . "\n-->\n";
 750
 751                 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
 752
 753                 // Add other cache related metadata
 754                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 755                         $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
 756                 }
 757                 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
 758                         $this->mOutput->getCacheTime() );
 759                 $this->mOutput->setLimitReportData( 'cachereport-ttl',
 760                         $this->mOutput->getCacheExpiry() );
 761                 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
 762                         $this->mOutput->hasDynamicContent() );
 763
 764                 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
 765                         wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
 766                                 $this->mTitle->getPrefixedDBkey() );
 767                 }
 768                 return $text;
 769         }
 770
 771         /**
 772          * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
 773          * can be called from an extension tag hook.
 774          *
 775          * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
 776          * instead, which means that lists and links have not been fully parsed yet,
 777          * and strip markers are still present.
 778          *
 779          * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
 780          *
 781          * Use this function if you're a parser tag hook and you want to parse
 782          * wikitext before or after applying additional transformations, and you
 783          * intend to *return the result as hook output*, which will cause it to go
 784          * through the rest of parsing process automatically.
 785          *
 786          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 787          * $text are not expanded
 788          *
 789          * @param string $text Text extension wants to have parsed
 790          * @param-taint $text escapes_htmlnoent
 791          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 792          * @return string UNSAFE half-parsed HTML
 793          * @return-taint escaped
 794          */
 795         public function recursiveTagParse( $text, $frame = false ) {
 796                 // Avoid PHP 7.1 warning from passing $this by reference
 797                 $parser = $this;
 798                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 799                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 800                 $text = $this->internalParse( $text, false, $frame );
 801                 return $text;
 802         }
 803
 804         /**
 805          * Fully parse wikitext to fully parsed HTML. This recursive parser entry
 806          * point can be called from an extension tag hook.
 807          *
 808          * The output of this function is fully-parsed HTML that is safe for output.
 809          * If you're a parser tag hook, you might want to use recursiveTagParse()
 810          * instead.
 811          *
 812          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 813          * $text are not expanded
 814          *
 815          * @since 1.25
 816          *
 817          * @param string $text Text extension wants to have parsed
 818          * @param-taint $text escapes_htmlnoent
 819          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 820          * @return string Fully parsed HTML
 821          * @return-taint escaped
 822          */
 823         public function recursiveTagParseFully( $text, $frame = false ) {
 824                 $text = $this->recursiveTagParse( $text, $frame );
 825                 $text = $this->internalParseHalfParsed( $text, false );
 826                 return $text;
 827         }
 828
 829         /**
 830          * Expand templates and variables in the text, producing valid, static wikitext.
 831          * Also removes comments.
 832          * Do not call this function recursively.
 833          * @param string $text
 834          * @param Title|null $title
 835          * @param ParserOptions $options
 836          * @param int|null $revid
 837          * @param bool|PPFrame $frame
 838          * @return mixed|string
 839          */
 840         public function preprocess( $text, Title $title = null,
 841                 ParserOptions $options, $revid = null, $frame = false
 842         ) {
 843                 $magicScopeVariable = $this->lock();
 844                 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
 845                 if ( $revid !== null ) {
 846                         $this->mRevisionId = $revid;
 847                 }
 848                 // Avoid PHP 7.1 warning from passing $this by reference
 849                 $parser = $this;
 850                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 851                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 852                 $text = $this->replaceVariables( $text, $frame );
 853                 $text = $this->mStripState->unstripBoth( $text );
 854                 return $text;
 855         }
 856
 857         /**
 858          * Recursive parser entry point that can be called from an extension tag
 859          * hook.
 860          *
 861          * @param string $text Text to be expanded
 862          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 863          * @return string
 864          * @since 1.19
 865          */
 866         public function recursivePreprocess( $text, $frame = false ) {
 867                 $text = $this->replaceVariables( $text, $frame );
 868                 $text = $this->mStripState->unstripBoth( $text );
 869                 return $text;
 870         }
 871
 872         /**
 873          * Process the wikitext for the "?preload=" feature. (T7210)
 874          *
 875          * "<noinclude>", "<includeonly>" etc. are parsed as for template
 876          * transclusion, comments, templates, arguments, tags hooks and parser
 877          * functions are untouched.
 878          *
 879          * @param string $text
 880          * @param Title $title
 881          * @param ParserOptions $options
 882          * @param array $params
 883          * @return string
 884          */
 885         public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
 886                 $msg = new RawMessage( $text );
 887                 $text = $msg->params( $params )->plain();
 888
 889                 # Parser (re)initialisation
 890                 $magicScopeVariable = $this->lock();
 891                 $this->startParse( $title, $options, self::OT_PLAIN, true );
 892
 893                 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
 894                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
 895                 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
 896                 $text = $this->mStripState->unstripBoth( $text );
 897                 return $text;
 898         }
 899
 900         /**
 901          * Set the current user.
 902          * Should only be used when doing pre-save transform.
 903          *
 904          * @param User|null $user User object or null (to reset)
 905          */
 906         public function setUser( $user ) {
 907                 $this->mUser = $user;
 908         }
 909
 910         /**
 911          * Set the context title
 912          *
 913          * @param Title|null $t
 914          */
 915         public function setTitle( Title $t = null ) {
 916                 if ( !$t ) {
 917                         $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
 918                 }
 919
 920                 if ( $t->hasFragment() ) {
 921                         # Strip the fragment to avoid various odd effects
 922                         $this->mTitle = $t->createFragmentTarget( '' );
 923                 } else {
 924                         $this->mTitle = $t;
 925                 }
 926         }
 927
 928         /**
 929          * Accessor for the Title object
 930          *
 931          * Since 1.34, leaving `mTitle` uninitialized as `null` is deprecated.
 932          *
 933          * @return Title|null
 934          */
 935         public function getTitle() : ?Title {
 936                 if ( $this->mTitle === null ) {
 937                         wfDeprecated( 'Parser title should never be null', '1.34' );
 938                 }
 939                 return $this->mTitle;
 940         }
 941
 942         /**
 943          * Accessor/mutator for the Title object
 944          *
 945          * @param Title|null $x Title object or null to just get the current one
 946          * @return Title|null
 947          */
 948         public function Title( Title $x = null ) : ?Title {
 949                 return wfSetVar( $this->mTitle, $x );
 950         }
 951
 952         /**
 953          * Set the output type
 954          *
 955          * @param int $ot New value
 956          */
 957         public function setOutputType( $ot ) {
 958                 $this->mOutputType = $ot;
 959                 # Shortcut alias
 960                 $this->ot = [
 961                         'html' => $ot == self::OT_HTML,
 962                         'wiki' => $ot == self::OT_WIKI,
 963                         'pre' => $ot == self::OT_PREPROCESS,
 964                         'plain' => $ot == self::OT_PLAIN,
 965                 ];
 966         }
 967
 968         /**
 969          * Accessor/mutator for the output type
 970          *
 971          * @param int|null $x New value or null to just get the current one
 972          * @return int
 973          */
 974         public function OutputType( $x = null ) {
 975                 return wfSetVar( $this->mOutputType, $x );
 976         }
 977
 978         /**
 979          * Get the ParserOutput object
 980          *
 981          * @return ParserOutput
 982          */
 983         public function getOutput() {
 984                 return $this->mOutput;
 985         }
 986
 987         /**
 988          * Get the ParserOptions object
 989          *
 990          * @return ParserOptions
 991          */
 992         public function getOptions() {
 993                 return $this->mOptions;
 994         }
 995
 996         /**
 997          * Accessor/mutator for the ParserOptions object
 998          *
 999          * @param ParserOptions|null $x New value or null to just get the current one
1000          * @return ParserOptions Current ParserOptions object
1001          */
1002         public function Options( $x = null ) {
1003                 return wfSetVar( $this->mOptions, $x );
1004         }
1005
1006         /**
1007          * @return int
1008          */
1009         public function nextLinkID() {
1010                 return $this->mLinkID++;
1011         }
1012
1013         /**
1014          * @param int $id
1015          */
1016         public function setLinkID( $id ) {
1017                 $this->mLinkID = $id;
1018         }
1019
1020         /**
1021          * Get a language object for use in parser functions such as {{FORMATNUM:}}
1022          * @return Language
1023          */
1024         public function getFunctionLang() {
1025                 return $this->getTargetLanguage();
1026         }
1027
1028         /**
1029          * Get the target language for the content being parsed. This is usually the
1030          * language that the content is in.
1031          *
1032          * @since 1.19
1033          *
1034          * @throws MWException
1035          * @return Language
1036          */
1037         public function getTargetLanguage() {
1038                 $target = $this->mOptions->getTargetLanguage();
1039
1040                 if ( $target !== null ) {
1041                         return $target;
1042                 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1043                         return $this->mOptions->getUserLangObj();
1044                 } elseif ( is_null( $this->mTitle ) ) {
1045                         throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1046                 }
1047
1048                 return $this->mTitle->getPageLanguage();
1049         }
1050
1051         /**
1052          * Get the language object for language conversion
1053          * @deprecated since 1.32, just use getTargetLanguage()
1054          * @return Language|null
1055          */
1056         public function getConverterLanguage() {
1057                 return $this->getTargetLanguage();
1058         }
1059
1060         /**
1061          * Get a User object either from $this->mUser, if set, or from the
1062          * ParserOptions object otherwise
1063          *
1064          * @return User
1065          */
1066         public function getUser() {
1067                 if ( !is_null( $this->mUser ) ) {
1068                         return $this->mUser;
1069                 }
1070                 return $this->mOptions->getUser();
1071         }
1072
1073         /**
1074          * Get a preprocessor object
1075          *
1076          * @return Preprocessor
1077          */
1078         public function getPreprocessor() {
1079                 if ( !isset( $this->mPreprocessor ) ) {
1080                         $class = $this->svcOptions->get( 'preprocessorClass' );
1081                         $this->mPreprocessor = new $class( $this );
1082                 }
1083                 return $this->mPreprocessor;
1084         }
1085
1086         /**
1087          * Get a LinkRenderer instance to make links with
1088          *
1089          * @since 1.28
1090          * @return LinkRenderer
1091          */
1092         public function getLinkRenderer() {
1093                 // XXX We make the LinkRenderer with current options and then cache it forever
1094                 if ( !$this->mLinkRenderer ) {
1095                         $this->mLinkRenderer = $this->linkRendererFactory->create();
1096                         $this->mLinkRenderer->setStubThreshold(
1097                                 $this->getOptions()->getStubThreshold()
1098                         );
1099                 }
1100
1101                 return $this->mLinkRenderer;
1102         }
1103
1104         /**
1105          * Get the MagicWordFactory that this Parser is using
1106          *
1107          * @since 1.32
1108          * @return MagicWordFactory
1109          */
1110         public function getMagicWordFactory() {
1111                 return $this->magicWordFactory;
1112         }
1113
1114         /**
1115          * Get the content language that this Parser is using
1116          *
1117          * @since 1.32
1118          * @return Language
1119          */
1120         public function getContentLanguage() {
1121                 return $this->contLang;
1122         }
1123
1124         /**
1125          * Replaces all occurrences of HTML-style comments and the given tags
1126          * in the text with a random marker and returns the next text. The output
1127          * parameter $matches will be an associative array filled with data in
1128          * the form:
1129          *
1130          * @code
1131          *   'UNIQ-xxxxx' => [
1132          *     'element',
1133          *     'tag content',
1134          *     [ 'param' => 'x' ],
1135          *     '<element param="x">tag content</element>' ]
1136          * @endcode
1137          *
1138          * @param array $elements List of element names. Comments are always extracted.
1139          * @param string $text Source text string.
1140          * @param array &$matches Out parameter, Array: extracted tags
1141          * @return string Stripped text
1142          */
1143         public static function extractTagsAndParams( $elements, $text, &$matches ) {
1144                 static $n = 1;
1145                 $stripped = '';
1146                 $matches = [];
1147
1148                 $taglist = implode( '|', $elements );
1149                 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1150
1151                 while ( $text != '' ) {
1152                         $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1153                         $stripped .= $p[0];
1154                         if ( count( $p ) < 5 ) {
1155                                 break;
1156                         }
1157                         if ( count( $p ) > 5 ) {
1158                                 # comment
1159                                 $element = $p[4];
1160                                 $attributes = '';
1161                                 $close = '';
1162                                 $inside = $p[5];
1163                         } else {
1164                                 # tag
1165                                 list( , $element, $attributes, $close, $inside ) = $p;
1166                         }
1167
1168                         $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1169                         $stripped .= $marker;
1170
1171                         if ( $close === '/>' ) {
1172                                 # Empty element tag, <tag />
1173                                 $content = null;
1174                                 $text = $inside;
1175                                 $tail = null;
1176                         } else {
1177                                 if ( $element === '!--' ) {
1178                                         $end = '/(-->)/';
1179                                 } else {
1180                                         $end = "/(<\\/$element\\s*>)/i";
1181                                 }
1182                                 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1183                                 $content = $q[0];
1184                                 if ( count( $q ) < 3 ) {
1185                                         # No end tag -- let it run out to the end of the text.
1186                                         $tail = '';
1187                                         $text = '';
1188                                 } else {
1189                                         list( , $tail, $text ) = $q;
1190                                 }
1191                         }
1192
1193                         $matches[$marker] = [ $element,
1194                                 $content,
1195                                 Sanitizer::decodeTagAttributes( $attributes ),
1196                                 "<$element$attributes$close$content$tail" ];
1197                 }
1198                 return $stripped;
1199         }
1200
1201         /**
1202          * Get a list of strippable XML-like elements
1203          *
1204          * @return array
1205          */
1206         public function getStripList() {
1207                 return $this->mStripList;
1208         }
1209
1210         /**
1211          * Get the StripState
1212          *
1213          * @return StripState
1214          */
1215         public function getStripState() {
1216                 return $this->mStripState;
1217         }
1218
1219         /**
1220          * Add an item to the strip state
1221          * Returns the unique tag which must be inserted into the stripped text
1222          * The tag will be replaced with the original text in unstrip()
1223          *
1224          * @param string $text
1225          *
1226          * @return string
1227          */
1228         public function insertStripItem( $text ) {
1229                 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1230                 $this->mMarkerIndex++;
1231                 $this->mStripState->addGeneral( $marker, $text );
1232                 return $marker;
1233         }
1234
1235         /**
1236          * parse the wiki syntax used to render tables
1237          *
1238          * @private
1239          * @param string $text
1240          * @return string
1241          */
1242         public function doTableStuff( $text ) {
1243                 $lines = StringUtils::explode( "\n", $text );
1244                 $out = '';
1245                 $td_history = []; # Is currently a td tag open?
1246                 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1247                 $tr_history = []; # Is currently a tr tag open?
1248                 $tr_attributes = []; # history of tr attributes
1249                 $has_opened_tr = []; # Did this table open a <tr> element?
1250                 $indent_level = 0; # indent level of the table
1251
1252                 foreach ( $lines as $outLine ) {
1253                         $line = trim( $outLine );
1254
1255                         if ( $line === '' ) { # empty line, go to next line
1256                                 $out .= $outLine . "\n";
1257                                 continue;
1258                         }
1259
1260                         $first_character = $line[0];
1261                         $first_two = substr( $line, 0, 2 );
1262                         $matches = [];
1263
1264                         if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1265                                 # First check if we are starting a new table
1266                                 $indent_level = strlen( $matches[1] );
1267
1268                                 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1269                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1270
1271                                 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1272                                 array_push( $td_history, false );
1273                                 array_push( $last_tag_history, '' );
1274                                 array_push( $tr_history, false );
1275                                 array_push( $tr_attributes, '' );
1276                                 array_push( $has_opened_tr, false );
1277                         } elseif ( count( $td_history ) == 0 ) {
1278                                 # Don't do any of the following
1279                                 $out .= $outLine . "\n";
1280                                 continue;
1281                         } elseif ( $first_two === '|}' ) {
1282                                 # We are ending a table
1283                                 $line = '</table>' . substr( $line, 2 );
1284                                 $last_tag = array_pop( $last_tag_history );
1285
1286                                 if ( !array_pop( $has_opened_tr ) ) {
1287                                         $line = "<tr><td></td></tr>{$line}";
1288                                 }
1289
1290                                 if ( array_pop( $tr_history ) ) {
1291                                         $line = "</tr>{$line}";
1292                                 }
1293
1294                                 if ( array_pop( $td_history ) ) {
1295                                         $line = "</{$last_tag}>{$line}";
1296                                 }
1297                                 array_pop( $tr_attributes );
1298                                 if ( $indent_level > 0 ) {
1299                                         $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1300                                 } else {
1301                                         $outLine = $line;
1302                                 }
1303                         } elseif ( $first_two === '|-' ) {
1304                                 # Now we have a table row
1305                                 $line = preg_replace( '#^\|-+#', '', $line );
1306
1307                                 # Whats after the tag is now only attributes
1308                                 $attributes = $this->mStripState->unstripBoth( $line );
1309                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1310                                 array_pop( $tr_attributes );
1311                                 array_push( $tr_attributes, $attributes );
1312
1313                                 $line = '';
1314                                 $last_tag = array_pop( $last_tag_history );
1315                                 array_pop( $has_opened_tr );
1316                                 array_push( $has_opened_tr, true );
1317
1318                                 if ( array_pop( $tr_history ) ) {
1319                                         $line = '</tr>';
1320                                 }
1321
1322                                 if ( array_pop( $td_history ) ) {
1323                                         $line = "</{$last_tag}>{$line}";
1324                                 }
1325
1326                                 $outLine = $line;
1327                                 array_push( $tr_history, false );
1328                                 array_push( $td_history, false );
1329                                 array_push( $last_tag_history, '' );
1330                         } elseif ( $first_character === '|'
1331                                 || $first_character === '!'
1332                                 || $first_two === '|+'
1333                         ) {
1334                                 # This might be cell elements, td, th or captions
1335                                 if ( $first_two === '|+' ) {
1336                                         $first_character = '+';
1337                                         $line = substr( $line, 2 );
1338                                 } else {
1339                                         $line = substr( $line, 1 );
1340                                 }
1341
1342                                 // Implies both are valid for table headings.
1343                                 if ( $first_character === '!' ) {
1344                                         $line = StringUtils::replaceMarkup( '!!', '||', $line );
1345                                 }
1346
1347                                 # Split up multiple cells on the same line.
1348                                 # FIXME : This can result in improper nesting of tags processed
1349                                 # by earlier parser steps.
1350                                 $cells = explode( '||', $line );
1351
1352                                 $outLine = '';
1353
1354                                 # Loop through each table cell
1355                                 foreach ( $cells as $cell ) {
1356                                         $previous = '';
1357                                         if ( $first_character !== '+' ) {
1358                                                 $tr_after = array_pop( $tr_attributes );
1359                                                 if ( !array_pop( $tr_history ) ) {
1360                                                         $previous = "<tr{$tr_after}>\n";
1361                                                 }
1362                                                 array_push( $tr_history, true );
1363                                                 array_push( $tr_attributes, '' );
1364                                                 array_pop( $has_opened_tr );
1365                                                 array_push( $has_opened_tr, true );
1366                                         }
1367
1368                                         $last_tag = array_pop( $last_tag_history );
1369
1370                                         if ( array_pop( $td_history ) ) {
1371                                                 $previous = "</{$last_tag}>\n{$previous}";
1372                                         }
1373
1374                                         if ( $first_character === '|' ) {
1375                                                 $last_tag = 'td';
1376                                         } elseif ( $first_character === '!' ) {
1377                                                 $last_tag = 'th';
1378                                         } elseif ( $first_character === '+' ) {
1379                                                 $last_tag = 'caption';
1380                                         } else {
1381                                                 $last_tag = '';
1382                                         }
1383
1384                                         array_push( $last_tag_history, $last_tag );
1385
1386                                         # A cell could contain both parameters and data
1387                                         $cell_data = explode( '|', $cell, 2 );
1388
1389                                         # T2553: Note that a '|' inside an invalid link should not
1390                                         # be mistaken as delimiting cell parameters
1391                                         # Bug T153140: Neither should language converter markup.
1392                                         if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1393                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1394                                         } elseif ( count( $cell_data ) == 1 ) {
1395                                                 // Whitespace in cells is trimmed
1396                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1397                                         } else {
1398                                                 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1399                                                 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1400                                                 // Whitespace in cells is trimmed
1401                                                 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1402                                         }
1403
1404                                         $outLine .= $cell;
1405                                         array_push( $td_history, true );
1406                                 }
1407                         }
1408                         $out .= $outLine . "\n";
1409                 }
1410
1411                 # Closing open td, tr && table
1412                 while ( count( $td_history ) > 0 ) {
1413                         if ( array_pop( $td_history ) ) {
1414                                 $out .= "</td>\n";
1415                         }
1416                         if ( array_pop( $tr_history ) ) {
1417                                 $out .= "</tr>\n";
1418                         }
1419                         if ( !array_pop( $has_opened_tr ) ) {
1420                                 $out .= "<tr><td></td></tr>\n";
1421                         }
1422
1423                         $out .= "</table>\n";
1424                 }
1425
1426                 # Remove trailing line-ending (b/c)
1427                 if ( substr( $out, -1 ) === "\n" ) {
1428                         $out = substr( $out, 0, -1 );
1429                 }
1430
1431                 # special case: don't return empty table
1432                 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1433                         $out = '';
1434                 }
1435
1436                 return $out;
1437         }
1438
1439         /**
1440          * Helper function for parse() that transforms wiki markup into half-parsed
1441          * HTML. Only called for $mOutputType == self::OT_HTML.
1442          *
1443          * @private
1444          *
1445          * @param string $text The text to parse
1446          * @param-taint $text escapes_html
1447          * @param bool $isMain Whether this is being called from the main parse() function
1448          * @param PPFrame|bool $frame A pre-processor frame
1449          *
1450          * @return string
1451          */
1452         public function internalParse( $text, $isMain = true, $frame = false ) {
1453                 $origText = $text;
1454
1455                 // Avoid PHP 7.1 warning from passing $this by reference
1456                 $parser = $this;
1457
1458                 # Hook to suspend the parser in this state
1459                 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1460                         return $text;
1461                 }
1462
1463                 # if $frame is provided, then use $frame for replacing any variables
1464                 if ( $frame ) {
1465                         # use frame depth to infer how include/noinclude tags should be handled
1466                         # depth=0 means this is the top-level document; otherwise it's an included document
1467                         if ( !$frame->depth ) {
1468                                 $flag = 0;
1469                         } else {
1470                                 $flag = self::PTD_FOR_INCLUSION;
1471                         }
1472                         $dom = $this->preprocessToDom( $text, $flag );
1473                         $text = $frame->expand( $dom );
1474                 } else {
1475                         # if $frame is not provided, then use old-style replaceVariables
1476                         $text = $this->replaceVariables( $text );
1477                 }
1478
1479                 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1480                 $text = Sanitizer::removeHTMLtags(
1481                         $text,
1482                         [ $this, 'attributeStripCallback' ],
1483                         false,
1484                         array_keys( $this->mTransparentTagHooks ),
1485                         [],
1486                         [ $this, 'addTrackingCategory' ]
1487                 );
1488                 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1489
1490                 # Tables need to come after variable replacement for things to work
1491                 # properly; putting them before other transformations should keep
1492                 # exciting things like link expansions from showing up in surprising
1493                 # places.
1494                 $text = $this->doTableStuff( $text );
1495
1496                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1497
1498                 $text = $this->doDoubleUnderscore( $text );
1499
1500                 $text = $this->doHeadings( $text );
1501                 $text = $this->replaceInternalLinks( $text );
1502                 $text = $this->doAllQuotes( $text );
1503                 $text = $this->replaceExternalLinks( $text );
1504
1505                 # replaceInternalLinks may sometimes leave behind
1506                 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1507                 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1508
1509                 $text = $this->doMagicLinks( $text );
1510                 $text = $this->formatHeadings( $text, $origText, $isMain );
1511
1512                 return $text;
1513         }
1514
1515         /**
1516          * Helper function for parse() that transforms half-parsed HTML into fully
1517          * parsed HTML.
1518          *
1519          * @param string $text
1520          * @param bool $isMain
1521          * @param bool $linestart
1522          * @return string
1523          */
1524         private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1525                 $text = $this->mStripState->unstripGeneral( $text );
1526
1527                 // Avoid PHP 7.1 warning from passing $this by reference
1528                 $parser = $this;
1529
1530                 if ( $isMain ) {
1531                         Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1532                 }
1533
1534                 # Clean up special characters, only run once, next-to-last before doBlockLevels
1535                 $text = Sanitizer::armorFrenchSpaces( $text );
1536
1537                 $text = $this->doBlockLevels( $text, $linestart );
1538
1539                 $this->replaceLinkHolders( $text );
1540
1541                 /**
1542                  * The input doesn't get language converted if
1543                  * a) It's disabled
1544                  * b) Content isn't converted
1545                  * c) It's a conversion table
1546                  * d) it is an interface message (which is in the user language)
1547                  */
1548                 if ( !( $this->mOptions->getDisableContentConversion()
1549                         || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1550                         && !$this->mOptions->getInterfaceMessage()
1551                 ) {
1552                         # The position of the convert() call should not be changed. it
1553                         # assumes that the links are all replaced and the only thing left
1554                         # is the <nowiki> mark.
1555                         $text = $this->getTargetLanguage()->convert( $text );
1556                 }
1557
1558                 $text = $this->mStripState->unstripNoWiki( $text );
1559
1560                 if ( $isMain ) {
1561                         Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1562                 }
1563
1564                 $text = $this->replaceTransparentTags( $text );
1565                 $text = $this->mStripState->unstripGeneral( $text );
1566
1567                 $text = Sanitizer::normalizeCharReferences( $text );
1568
1569                 if ( MWTidy::isEnabled() ) {
1570                         if ( $this->mOptions->getTidy() ) {
1571                                 $text = MWTidy::tidy( $text );
1572                         }
1573                 } else {
1574                         # attempt to sanitize at least some nesting problems
1575                         # (T4702 and quite a few others)
1576                         # This code path is buggy and deprecated!
1577                         wfDeprecated( 'disabling tidy', '1.33' );
1578                         $tidyregs = [
1579                                 # ''Something [http://www.cool.com cool''] -->
1580                                 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1581                                 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1582                                 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1583                                 # fix up an anchor inside another anchor, only
1584                                 # at least for a single single nested link (T5695)
1585                                 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1586                                 '\\1\\2</a>\\3</a>\\1\\4</a>',
1587                                 # fix div inside inline elements- doBlockLevels won't wrap a line which
1588                                 # contains a div, so fix it up here; replace
1589                                 # div with escaped text
1590                                 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1591                                 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1592                                 # remove empty italic or bold tag pairs, some
1593                                 # introduced by rules above
1594                                 '/<([bi])><\/\\1>/' => '',
1595                         ];
1596
1597                         $text = preg_replace(
1598                                 array_keys( $tidyregs ),
1599                                 array_values( $tidyregs ),
1600                                 $text );
1601                 }
1602
1603                 if ( $isMain ) {
1604                         Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1605                 }
1606
1607                 return $text;
1608         }
1609
1610         /**
1611          * Replace special strings like "ISBN xxx" and "RFC xxx" with
1612          * magic external links.
1613          *
1614          * DML
1615          * @private
1616          *
1617          * @param string $text
1618          *
1619          * @return string
1620          */
1621         public function doMagicLinks( $text ) {
1622                 $prots = wfUrlProtocolsWithoutProtRel();
1623                 $urlChar = self::EXT_LINK_URL_CLASS;
1624                 $addr = self::EXT_LINK_ADDR;
1625                 $space = self::SPACE_NOT_NL; #  non-newline space
1626                 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1627                 $spaces = "$space++"; # possessive match of 1 or more spaces
1628                 $text = preg_replace_callback(
1629                         '!(?:                        # Start cases
1630                                 (<a[ \t\r\n>].*?</a>) |    # m[1]: Skip link text
1631                                 (<.*?>) |                  # m[2]: Skip stuff inside HTML elements' . "
1632                                 (\b                        # m[3]: Free external links
1633                                         (?i:$prots)
1634                                         ($addr$urlChar*)         # m[4]: Post-protocol path
1635                                 ) |
1636                                 \b(?:RFC|PMID) $spaces     # m[5]: RFC or PMID, capture number
1637                                         ([0-9]+)\b |
1638                                 \bISBN $spaces (           # m[6]: ISBN, capture number
1639                                         (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1640                                         (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1641                                         [0-9Xx]                  #  check digit
1642                                 )\b
1643                         )!xu", [ $this, 'magicLinkCallback' ], $text );
1644                 return $text;
1645         }
1646
1647         /**
1648          * @throws MWException
1649          * @param array $m
1650          * @return string HTML
1651          */
1652         public function magicLinkCallback( $m ) {
1653                 if ( isset( $m[1] ) && $m[1] !== '' ) {
1654                         # Skip anchor
1655                         return $m[0];
1656                 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1657                         # Skip HTML element
1658                         return $m[0];
1659                 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1660                         # Free external link
1661                         return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1662                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1663                         # RFC or PMID
1664                         if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1665                                 if ( !$this->mOptions->getMagicRFCLinks() ) {
1666                                         return $m[0];
1667                                 }
1668                                 $keyword = 'RFC';
1669                                 $urlmsg = 'rfcurl';
1670                                 $cssClass = 'mw-magiclink-rfc';
1671                                 $trackingCat = 'magiclink-tracking-rfc';
1672                                 $id = $m[5];
1673                         } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1674                                 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1675                                         return $m[0];
1676                                 }
1677                                 $keyword = 'PMID';
1678                                 $urlmsg = 'pubmedurl';
1679                                 $cssClass = 'mw-magiclink-pmid';
1680                                 $trackingCat = 'magiclink-tracking-pmid';
1681                                 $id = $m[5];
1682                         } else {
1683                                 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1684                                         substr( $m[0], 0, 20 ) . '"' );
1685                         }
1686                         $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1687                         $this->addTrackingCategory( $trackingCat );
1688                         return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1689                 } elseif ( isset( $m[6] ) && $m[6] !== ''
1690                         && $this->mOptions->getMagicISBNLinks()
1691                 ) {
1692                         # ISBN
1693                         $isbn = $m[6];
1694                         $space = self::SPACE_NOT_NL; #  non-newline space
1695                         $isbn = preg_replace( "/$space/", ' ', $isbn );
1696                         $num = strtr( $isbn, [
1697                                 '-' => '',
1698                                 ' ' => '',
1699                                 'x' => 'X',
1700                         ] );
1701                         $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1702                         return $this->getLinkRenderer()->makeKnownLink(
1703                                 SpecialPage::getTitleFor( 'Booksources', $num ),
1704                                 "ISBN $isbn",
1705                                 [
1706                                         'class' => 'internal mw-magiclink-isbn',
1707                                         'title' => false // suppress title attribute
1708                                 ]
1709                         );
1710                 } else {
1711                         return $m[0];
1712                 }
1713         }
1714
1715         /**
1716          * Make a free external link, given a user-supplied URL
1717          *
1718          * @param string $url
1719          * @param int $numPostProto
1720          *   The number of characters after the protocol.
1721          * @return string HTML
1722          * @private
1723          */
1724         public function makeFreeExternalLink( $url, $numPostProto ) {
1725                 $trail = '';
1726
1727                 # The characters '<' and '>' (which were escaped by
1728                 # removeHTMLtags()) should not be included in
1729                 # URLs, per RFC 2396.
1730                 # Make &nbsp; terminate a URL as well (bug T84937)
1731                 $m2 = [];
1732                 if ( preg_match(
1733                         '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1734                         $url,
1735                         $m2,
1736                         PREG_OFFSET_CAPTURE
1737                 ) ) {
1738                         $trail = substr( $url, $m2[0][1] ) . $trail;
1739                         $url = substr( $url, 0, $m2[0][1] );
1740                 }
1741
1742                 # Move trailing punctuation to $trail
1743                 $sep = ',;\.:!?';
1744                 # If there is no left bracket, then consider right brackets fair game too
1745                 if ( strpos( $url, '(' ) === false ) {
1746                         $sep .= ')';
1747                 }
1748
1749                 $urlRev = strrev( $url );
1750                 $numSepChars = strspn( $urlRev, $sep );
1751                 # Don't break a trailing HTML entity by moving the ; into $trail
1752                 # This is in hot code, so use substr_compare to avoid having to
1753                 # create a new string object for the comparison
1754                 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1755                         # more optimization: instead of running preg_match with a $
1756                         # anchor, which can be slow, do the match on the reversed
1757                         # string starting at the desired offset.
1758                         # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1759                         if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1760                                 $numSepChars--;
1761                         }
1762                 }
1763                 if ( $numSepChars ) {
1764                         $trail = substr( $url, -$numSepChars ) . $trail;
1765                         $url = substr( $url, 0, -$numSepChars );
1766                 }
1767
1768                 # Verify that we still have a real URL after trail removal, and
1769                 # not just lone protocol
1770                 if ( strlen( $trail ) >= $numPostProto ) {
1771                         return $url . $trail;
1772                 }
1773
1774                 $url = Sanitizer::cleanUrl( $url );
1775
1776                 # Is this an external image?
1777                 $text = $this->maybeMakeExternalImage( $url );
1778                 if ( $text === false ) {
1779                         # Not an image, make a link
1780                         $text = Linker::makeExternalLink( $url,
1781                                 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1782                                 true, 'free',
1783                                 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1784                         # Register it in the output object...
1785                         $this->mOutput->addExternalLink( $url );
1786                 }
1787                 return $text . $trail;
1788         }
1789
1790         /**
1791          * Parse headers and return html
1792          *
1793          * @private
1794          *
1795          * @param string $text
1796          *
1797          * @return string
1798          */
1799         public function doHeadings( $text ) {
1800                 for ( $i = 6; $i >= 1; --$i ) {
1801                         $h = str_repeat( '=', $i );
1802                         // Trim non-newline whitespace from headings
1803                         // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1804                         $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1805                 }
1806                 return $text;
1807         }
1808
1809         /**
1810          * Replace single quotes with HTML markup
1811          * @private
1812          *
1813          * @param string $text
1814          *
1815          * @return string The altered text
1816          */
1817         public function doAllQuotes( $text ) {
1818                 $outtext = '';
1819                 $lines = StringUtils::explode( "\n", $text );
1820                 foreach ( $lines as $line ) {
1821                         $outtext .= $this->doQuotes( $line ) . "\n";
1822                 }
1823                 $outtext = substr( $outtext, 0, -1 );
1824                 return $outtext;
1825         }
1826
1827         /**
1828          * Helper function for doAllQuotes()
1829          *
1830          * @param string $text
1831          *
1832          * @return string
1833          */
1834         public function doQuotes( $text ) {
1835                 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1836                 $countarr = count( $arr );
1837                 if ( $countarr == 1 ) {
1838                         return $text;
1839                 }
1840
1841                 // First, do some preliminary work. This may shift some apostrophes from
1842                 // being mark-up to being text. It also counts the number of occurrences
1843                 // of bold and italics mark-ups.
1844                 $numbold = 0;
1845                 $numitalics = 0;
1846                 for ( $i = 1; $i < $countarr; $i += 2 ) {
1847                         $thislen = strlen( $arr[$i] );
1848                         // If there are ever four apostrophes, assume the first is supposed to
1849                         // be text, and the remaining three constitute mark-up for bold text.
1850                         // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1851                         if ( $thislen == 4 ) {
1852                                 $arr[$i - 1] .= "'";
1853                                 $arr[$i] = "'''";
1854                                 $thislen = 3;
1855                         } elseif ( $thislen > 5 ) {
1856                                 // If there are more than 5 apostrophes in a row, assume they're all
1857                                 // text except for the last 5.
1858                                 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1859                                 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1860                                 $arr[$i] = "'''''";
1861                                 $thislen = 5;
1862                         }
1863                         // Count the number of occurrences of bold and italics mark-ups.
1864                         if ( $thislen == 2 ) {
1865                                 $numitalics++;
1866                         } elseif ( $thislen == 3 ) {
1867                                 $numbold++;
1868                         } elseif ( $thislen == 5 ) {
1869                                 $numitalics++;
1870                                 $numbold++;
1871                         }
1872                 }
1873
1874                 // If there is an odd number of both bold and italics, it is likely
1875                 // that one of the bold ones was meant to be an apostrophe followed
1876                 // by italics. Which one we cannot know for certain, but it is more
1877                 // likely to be one that has a single-letter word before it.
1878                 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1879                         $firstsingleletterword = -1;
1880                         $firstmultiletterword = -1;
1881                         $firstspace = -1;
1882                         for ( $i = 1; $i < $countarr; $i += 2 ) {
1883                                 if ( strlen( $arr[$i] ) == 3 ) {
1884                                         $x1 = substr( $arr[$i - 1], -1 );
1885                                         $x2 = substr( $arr[$i - 1], -2, 1 );
1886                                         if ( $x1 === ' ' ) {
1887                                                 if ( $firstspace == -1 ) {
1888                                                         $firstspace = $i;
1889                                                 }
1890                                         } elseif ( $x2 === ' ' ) {
1891                                                 $firstsingleletterword = $i;
1892                                                 // if $firstsingleletterword is set, we don't
1893                                                 // look at the other options, so we can bail early.
1894                                                 break;
1895                                         } elseif ( $firstmultiletterword == -1 ) {
1896                                                 $firstmultiletterword = $i;
1897                                         }
1898                                 }
1899                         }
1900
1901                         // If there is a single-letter word, use it!
1902                         if ( $firstsingleletterword > -1 ) {
1903                                 $arr[$firstsingleletterword] = "''";
1904                                 $arr[$firstsingleletterword - 1] .= "'";
1905                         } elseif ( $firstmultiletterword > -1 ) {
1906                                 // If not, but there's a multi-letter word, use that one.
1907                                 $arr[$firstmultiletterword] = "''";
1908                                 $arr[$firstmultiletterword - 1] .= "'";
1909                         } elseif ( $firstspace > -1 ) {
1910                                 // ... otherwise use the first one that has neither.
1911                                 // (notice that it is possible for all three to be -1 if, for example,
1912                                 // there is only one pentuple-apostrophe in the line)
1913                                 $arr[$firstspace] = "''";
1914                                 $arr[$firstspace - 1] .= "'";
1915                         }
1916                 }
1917
1918                 // Now let's actually convert our apostrophic mush to HTML!
1919                 $output = '';
1920                 $buffer = '';
1921                 $state = '';
1922                 $i = 0;
1923                 foreach ( $arr as $r ) {
1924                         if ( ( $i % 2 ) == 0 ) {
1925                                 if ( $state === 'both' ) {
1926                                         $buffer .= $r;
1927                                 } else {
1928                                         $output .= $r;
1929                                 }
1930                         } else {
1931                                 $thislen = strlen( $r );
1932                                 if ( $thislen == 2 ) {
1933                                         if ( $state === 'i' ) {
1934                                                 $output .= '</i>';
1935                                                 $state = '';
1936                                         } elseif ( $state === 'bi' ) {
1937                                                 $output .= '</i>';
1938                                                 $state = 'b';
1939                                         } elseif ( $state === 'ib' ) {
1940                                                 $output .= '</b></i><b>';
1941                                                 $state = 'b';
1942                                         } elseif ( $state === 'both' ) {
1943                                                 $output .= '<b><i>' . $buffer . '</i>';
1944                                                 $state = 'b';
1945                                         } else { // $state can be 'b' or ''
1946                                                 $output .= '<i>';
1947                                                 $state .= 'i';
1948                                         }
1949                                 } elseif ( $thislen == 3 ) {
1950                                         if ( $state === 'b' ) {
1951                                                 $output .= '</b>';
1952                                                 $state = '';
1953                                         } elseif ( $state === 'bi' ) {
1954                                                 $output .= '</i></b><i>';
1955                                                 $state = 'i';
1956                                         } elseif ( $state === 'ib' ) {
1957                                                 $output .= '</b>';
1958                                                 $state = 'i';
1959                                         } elseif ( $state === 'both' ) {
1960                                                 $output .= '<i><b>' . $buffer . '</b>';
1961                                                 $state = 'i';
1962                                         } else { // $state can be 'i' or ''
1963                                                 $output .= '<b>';
1964                                                 $state .= 'b';
1965                                         }
1966                                 } elseif ( $thislen == 5 ) {
1967                                         if ( $state === 'b' ) {
1968                                                 $output .= '</b><i>';
1969                                                 $state = 'i';
1970                                         } elseif ( $state === 'i' ) {
1971                                                 $output .= '</i><b>';
1972                                                 $state = 'b';
1973                                         } elseif ( $state === 'bi' ) {
1974                                                 $output .= '</i></b>';
1975                                                 $state = '';
1976                                         } elseif ( $state === 'ib' ) {
1977                                                 $output .= '</b></i>';
1978                                                 $state = '';
1979                                         } elseif ( $state === 'both' ) {
1980                                                 $output .= '<i><b>' . $buffer . '</b></i>';
1981                                                 $state = '';
1982                                         } else { // ($state == '')
1983                                                 $buffer = '';
1984                                                 $state = 'both';
1985                                         }
1986                                 }
1987                         }
1988                         $i++;
1989                 }
1990                 // Now close all remaining tags.  Notice that the order is important.
1991                 if ( $state === 'b' || $state === 'ib' ) {
1992                         $output .= '</b>';
1993                 }
1994                 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1995                         $output .= '</i>';
1996                 }
1997                 if ( $state === 'bi' ) {
1998                         $output .= '</b>';
1999                 }
2000                 // There might be lonely ''''', so make sure we have a buffer
2001                 if ( $state === 'both' && $buffer ) {
2002                         $output .= '<b><i>' . $buffer . '</i></b>';
2003                 }
2004                 return $output;
2005         }
2006
2007         /**
2008          * Replace external links (REL)
2009          *
2010          * Note: this is all very hackish and the order of execution matters a lot.
2011          * Make sure to run tests/parser/parserTests.php if you change this code.
2012          *
2013          * @private
2014          *
2015          * @param string $text
2016          *
2017          * @throws MWException
2018          * @return string
2019          */
2020         public function replaceExternalLinks( $text ) {
2021                 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2022                 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2023                 if ( $bits === false ) {
2024                         throw new MWException( "PCRE needs to be compiled with "
2025                                 . "--enable-unicode-properties in order for MediaWiki to function" );
2026                 }
2027                 $s = array_shift( $bits );
2028
2029                 $i = 0;
2030                 while ( $i < count( $bits ) ) {
2031                         $url = $bits[$i++];
2032                         $i++; // protocol
2033                         $text = $bits[$i++];
2034                         $trail = $bits[$i++];
2035
2036                         # The characters '<' and '>' (which were escaped by
2037                         # removeHTMLtags()) should not be included in
2038                         # URLs, per RFC 2396.
2039                         $m2 = [];
2040                         if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2041                                 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2042                                 $url = substr( $url, 0, $m2[0][1] );
2043                         }
2044
2045                         # If the link text is an image URL, replace it with an <img> tag
2046                         # This happened by accident in the original parser, but some people used it extensively
2047                         $img = $this->maybeMakeExternalImage( $text );
2048                         if ( $img !== false ) {
2049                                 $text = $img;
2050                         }
2051
2052                         $dtrail = '';
2053
2054                         # Set linktype for CSS
2055                         $linktype = 'text';
2056
2057                         # No link text, e.g. [http://domain.tld/some.link]
2058                         if ( $text == '' ) {
2059                                 # Autonumber
2060                                 $langObj = $this->getTargetLanguage();
2061                                 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2062                                 $linktype = 'autonumber';
2063                         } else {
2064                                 # Have link text, e.g. [http://domain.tld/some.link text]s
2065                                 # Check for trail
2066                                 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2067                         }
2068
2069                         // Excluding protocol-relative URLs may avoid many false positives.
2070                         if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2071                                 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2072                         }
2073
2074                         $url = Sanitizer::cleanUrl( $url );
2075
2076                         # Use the encoded URL
2077                         # This means that users can paste URLs directly into the text
2078                         # Funny characters like ö aren't valid in URLs anyway
2079                         # This was changed in August 2004
2080                         $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2081                                 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2082
2083                         # Register link in the output object.
2084                         $this->mOutput->addExternalLink( $url );
2085                 }
2086
2087                 return $s;
2088         }
2089
2090         /**
2091          * Get the rel attribute for a particular external link.
2092          *
2093          * @since 1.21
2094          * @param string|bool $url Optional URL, to extract the domain from for rel =>
2095          *   nofollow if appropriate
2096          * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2097          * @return string|null Rel attribute for $url
2098          */
2099         public static function getExternalLinkRel( $url = false, $title = null ) {
2100                 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2101                 $ns = $title ? $title->getNamespace() : false;
2102                 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2103                         && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2104                 ) {
2105                         return 'nofollow';
2106                 }
2107                 return null;
2108         }
2109
2110         /**
2111          * Get an associative array of additional HTML attributes appropriate for a
2112          * particular external link.  This currently may include rel => nofollow
2113          * (depending on configuration, namespace, and the URL's domain) and/or a
2114          * target attribute (depending on configuration).
2115          *
2116          * @param string $url URL to extract the domain from for rel =>
2117          *   nofollow if appropriate
2118          * @return array Associative array of HTML attributes
2119          */
2120         public function getExternalLinkAttribs( $url ) {
2121                 $attribs = [];
2122                 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2123
2124                 $target = $this->mOptions->getExternalLinkTarget();
2125                 if ( $target ) {
2126                         $attribs['target'] = $target;
2127                         if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2128                                 // T133507. New windows can navigate parent cross-origin.
2129                                 // Including noreferrer due to lacking browser
2130                                 // support of noopener. Eventually noreferrer should be removed.
2131                                 if ( $rel !== '' ) {
2132                                         $rel .= ' ';
2133                                 }
2134                                 $rel .= 'noreferrer noopener';
2135                         }
2136                 }
2137                 $attribs['rel'] = $rel;
2138                 return $attribs;
2139         }
2140
2141         /**
2142          * Replace unusual escape codes in a URL with their equivalent characters
2143          *
2144          * This generally follows the syntax defined in RFC 3986, with special
2145          * consideration for HTTP query strings.
2146          *
2147          * @param string $url
2148          * @return string
2149          */
2150         public static function normalizeLinkUrl( $url ) {
2151                 # Test for RFC 3986 IPv6 syntax
2152                 $scheme = '[a-z][a-z0-9+.-]*:';
2153                 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2154                 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2155                 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2156                         IP::isValid( rawurldecode( $m[1] ) )
2157                 ) {
2158                         $isIPv6 = rawurldecode( $m[1] );
2159                 } else {
2160                         $isIPv6 = false;
2161                 }
2162
2163                 # Make sure unsafe characters are encoded
2164                 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2165                         function ( $m ) {
2166                                 return rawurlencode( $m[0] );
2167                         },
2168                         $url
2169                 );
2170
2171                 $ret = '';
2172                 $end = strlen( $url );
2173
2174                 # Fragment part - 'fragment'
2175                 $start = strpos( $url, '#' );
2176                 if ( $start !== false && $start < $end ) {
2177                         $ret = self::normalizeUrlComponent(
2178                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2179                         $end = $start;
2180                 }
2181
2182                 # Query part - 'query' minus &=+;
2183                 $start = strpos( $url, '?' );
2184                 if ( $start !== false && $start < $end ) {
2185                         $ret = self::normalizeUrlComponent(
2186                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2187                         $end = $start;
2188                 }
2189
2190                 # Scheme and path part - 'pchar'
2191                 # (we assume no userinfo or encoded colons in the host)
2192                 $ret = self::normalizeUrlComponent(
2193                         substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2194
2195                 # Fix IPv6 syntax
2196                 if ( $isIPv6 !== false ) {
2197                         $ipv6Host = "%5B({$isIPv6})%5D";
2198                         $ret = preg_replace(
2199                                 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2200                                 "$1[$2]",
2201                                 $ret
2202                         );
2203                 }
2204
2205                 return $ret;
2206         }
2207
2208         private static function normalizeUrlComponent( $component, $unsafe ) {
2209                 $callback = function ( $matches ) use ( $unsafe ) {
2210                         $char = urldecode( $matches[0] );
2211                         $ord = ord( $char );
2212                         if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2213                                 # Unescape it
2214                                 return $char;
2215                         } else {
2216                                 # Leave it escaped, but use uppercase for a-f
2217                                 return strtoupper( $matches[0] );
2218                         }
2219                 };
2220                 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2221         }
2222
2223         /**
2224          * make an image if it's allowed, either through the global
2225          * option, through the exception, or through the on-wiki whitelist
2226          *
2227          * @param string $url
2228          *
2229          * @return string
2230          */
2231         private function maybeMakeExternalImage( $url ) {
2232                 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2233                 $imagesexception = !empty( $imagesfrom );
2234                 $text = false;
2235                 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2236                 if ( $imagesexception && is_array( $imagesfrom ) ) {
2237                         $imagematch = false;
2238                         foreach ( $imagesfrom as $match ) {
2239                                 if ( strpos( $url, $match ) === 0 ) {
2240                                         $imagematch = true;
2241                                         break;
2242                                 }
2243                         }
2244                 } elseif ( $imagesexception ) {
2245                         $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2246                 } else {
2247                         $imagematch = false;
2248                 }
2249
2250                 if ( $this->mOptions->getAllowExternalImages()
2251                         || ( $imagesexception && $imagematch )
2252                 ) {
2253                         if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2254                                 # Image found
2255                                 $text = Linker::makeExternalImage( $url );
2256                         }
2257                 }
2258                 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2259                         && preg_match( self::EXT_IMAGE_REGEX, $url )
2260                 ) {
2261                         $whitelist = explode(
2262                                 "\n",
2263                                 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2264                         );
2265
2266                         foreach ( $whitelist as $entry ) {
2267                                 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2268                                 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2269                                         continue;
2270                                 }
2271                                 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2272                                         # Image matches a whitelist entry
2273                                         $text = Linker::makeExternalImage( $url );
2274                                         break;
2275                                 }
2276                         }
2277                 }
2278                 return $text;
2279         }
2280
2281         /**
2282          * Process [[ ]] wikilinks
2283          *
2284          * @param string $s
2285          *
2286          * @return string Processed text
2287          *
2288          * @private
2289          */
2290         public function replaceInternalLinks( $s ) {
2291                 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2292                 return $s;
2293         }
2294
2295         /**
2296          * Process [[ ]] wikilinks (RIL)
2297          * @param string &$s
2298          * @throws MWException
2299          * @return LinkHolderArray
2300          *
2301          * @private
2302          */
2303         public function replaceInternalLinks2( &$s ) {
2304                 static $tc = false, $e1, $e1_img;
2305                 # the % is needed to support urlencoded titles as well
2306                 if ( !$tc ) {
2307                         $tc = Title::legalChars() . '#%';
2308                         # Match a link having the form [[namespace:link|alternate]]trail
2309                         $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2310                         # Match cases where there is no "]]", which might still be images
2311                         $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2312                 }
2313
2314                 $holders = new LinkHolderArray( $this );
2315
2316                 # split the entire text string on occurrences of [[
2317                 $a = StringUtils::explode( '[[', ' ' . $s );
2318                 # get the first element (all text up to first [[), and remove the space we added
2319                 $s = $a->current();
2320                 $a->next();
2321                 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2322                 $s = substr( $s, 1 );
2323
2324                 if ( is_null( $this->mTitle ) ) {
2325                         throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2326                 }
2327                 $nottalk = !$this->mTitle->isTalkPage();
2328
2329                 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2330                 $e2 = null;
2331                 if ( $useLinkPrefixExtension ) {
2332                         # Match the end of a line for a word that's not followed by whitespace,
2333                         # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2334                         $charset = $this->contLang->linkPrefixCharset();
2335                         $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2336                         $m = [];
2337                         if ( preg_match( $e2, $s, $m ) ) {
2338                                 $first_prefix = $m[2];
2339                         } else {
2340                                 $first_prefix = false;
2341                         }
2342                 } else {
2343                         $prefix = '';
2344                 }
2345
2346                 $useSubpages = $this->areSubpagesAllowed();
2347
2348                 # Loop for each link
2349                 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2350                         # Check for excessive memory usage
2351                         if ( $holders->isBig() ) {
2352                                 # Too big
2353                                 # Do the existence check, replace the link holders and clear the array
2354                                 $holders->replace( $s );
2355                                 $holders->clear();
2356                         }
2357
2358                         if ( $useLinkPrefixExtension ) {
2359                                 if ( preg_match( $e2, $s, $m ) ) {
2360                                         list( , $s, $prefix ) = $m;
2361                                 } else {
2362                                         $prefix = '';
2363                                 }
2364                                 # first link
2365                                 if ( $first_prefix ) {
2366                                         $prefix = $first_prefix;
2367                                         $first_prefix = false;
2368                                 }
2369                         }
2370
2371                         $might_be_img = false;
2372
2373                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2374                                 $text = $m[2];
2375                                 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2376                                 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2377                                 # the real problem is with the $e1 regex
2378                                 # See T1500.
2379                                 # Still some problems for cases where the ] is meant to be outside punctuation,
2380                                 # and no image is in sight. See T4095.
2381                                 if ( $text !== ''
2382                                         && substr( $m[3], 0, 1 ) === ']'
2383                                         && strpos( $text, '[' ) !== false
2384                                 ) {
2385                                         $text .= ']'; # so that replaceExternalLinks($text) works later
2386                                         $m[3] = substr( $m[3], 1 );
2387                                 }
2388                                 # fix up urlencoded title texts
2389                                 if ( strpos( $m[1], '%' ) !== false ) {
2390                                         # Should anchors '#' also be rejected?
2391                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2392                                 }
2393                                 $trail = $m[3];
2394                         } elseif ( preg_match( $e1_img, $line, $m ) ) {
2395                                 # Invalid, but might be an image with a link in its caption
2396                                 $might_be_img = true;
2397                                 $text = $m[2];
2398                                 if ( strpos( $m[1], '%' ) !== false ) {
2399                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2400                                 }
2401                                 $trail = "";
2402                         } else { # Invalid form; output directly
2403                                 $s .= $prefix . '[[' . $line;
2404                                 continue;
2405                         }
2406
2407                         $origLink = ltrim( $m[1], ' ' );
2408
2409                         # Don't allow internal links to pages containing
2410                         # PROTO: where PROTO is a valid URL protocol; these
2411                         # should be external links.
2412                         if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2413                                 $s .= $prefix . '[[' . $line;
2414                                 continue;
2415                         }
2416
2417                         # Make subpage if necessary
2418                         if ( $useSubpages ) {
2419                                 $link = $this->maybeDoSubpageLink( $origLink, $text );
2420                         } else {
2421                                 $link = $origLink;
2422                         }
2423
2424                         // \x7f isn't a default legal title char, so most likely strip
2425                         // markers will force us into the "invalid form" path above.  But,
2426                         // just in case, let's assert that xmlish tags aren't valid in
2427                         // the title position.
2428                         $unstrip = $this->mStripState->killMarkers( $link );
2429                         $noMarkers = ( $unstrip === $link );
2430
2431                         $nt = $noMarkers ? Title::newFromText( $link ) : null;
2432                         if ( $nt === null ) {
2433                                 $s .= $prefix . '[[' . $line;
2434                                 continue;
2435                         }
2436
2437                         $ns = $nt->getNamespace();
2438                         $iw = $nt->getInterwiki();
2439
2440                         $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2441
2442                         if ( $might_be_img ) { # if this is actually an invalid link
2443                                 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2444                                         $found = false;
2445                                         while ( true ) {
2446                                                 # look at the next 'line' to see if we can close it there
2447                                                 $a->next();
2448                                                 $next_line = $a->current();
2449                                                 if ( $next_line === false || $next_line === null ) {
2450                                                         break;
2451                                                 }
2452                                                 $m = explode( ']]', $next_line, 3 );
2453                                                 if ( count( $m ) == 3 ) {
2454                                                         # the first ]] closes the inner link, the second the image
2455                                                         $found = true;
2456                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2457                                                         $trail = $m[2];
2458                                                         break;
2459                                                 } elseif ( count( $m ) == 2 ) {
2460                                                         # if there's exactly one ]] that's fine, we'll keep looking
2461                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2462                                                 } else {
2463                                                         # if $next_line is invalid too, we need look no further
2464                                                         $text .= '[[' . $next_line;
2465                                                         break;
2466                                                 }
2467                                         }
2468                                         if ( !$found ) {
2469                                                 # we couldn't find the end of this imageLink, so output it raw
2470                                                 # but don't ignore what might be perfectly normal links in the text we've examined
2471                                                 $holders->merge( $this->replaceInternalLinks2( $text ) );
2472                                                 $s .= "{$prefix}[[$link|$text";
2473                                                 # note: no $trail, because without an end, there *is* no trail
2474                                                 continue;
2475                                         }
2476                                 } else { # it's not an image, so output it raw
2477                                         $s .= "{$prefix}[[$link|$text";
2478                                         # note: no $trail, because without an end, there *is* no trail
2479                                         continue;
2480                                 }
2481                         }
2482
2483                         $wasblank = ( $text == '' );
2484                         if ( $wasblank ) {
2485                                 $text = $link;
2486                                 if ( !$noforce ) {
2487                                         # Strip off leading ':'
2488                                         $text = substr( $text, 1 );
2489                                 }
2490                         } else {
2491                                 # T6598 madness. Handle the quotes only if they come from the alternate part
2492                                 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2493                                 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2494                                 #    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2495                                 $text = $this->doQuotes( $text );
2496                         }
2497
2498                         # Link not escaped by : , create the various objects
2499                         if ( $noforce && !$nt->wasLocalInterwiki() ) {
2500                                 # Interwikis
2501                                 if (
2502                                         $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2503                                                 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2504                                                 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2505                                         )
2506                                 ) {
2507                                         # T26502: filter duplicates
2508                                         if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2509                                                 $this->mLangLinkLanguages[$iw] = true;
2510                                                 $this->mOutput->addLanguageLink( $nt->getFullText() );
2511                                         }
2512
2513                                         /**
2514                                          * Strip the whitespace interwiki links produce, see T10897
2515                                          */
2516                                         $s = rtrim( $s . $prefix ) . $trail; # T175416
2517                                         continue;
2518                                 }
2519
2520                                 if ( $ns == NS_FILE ) {
2521                                         if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->mTitle ) ) {
2522                                                 if ( $wasblank ) {
2523                                                         # if no parameters were passed, $text
2524                                                         # becomes something like "File:Foo.png",
2525                                                         # which we don't want to pass on to the
2526                                                         # image generator
2527                                                         $text = '';
2528                                                 } else {
2529                                                         # recursively parse links inside the image caption
2530                                                         # actually, this will parse them in any other parameters, too,
2531                                                         # but it might be hard to fix that, and it doesn't matter ATM
2532                                                         $text = $this->replaceExternalLinks( $text );
2533                                                         $holders->merge( $this->replaceInternalLinks2( $text ) );
2534                                                 }
2535                                                 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2536                                                 $s .= $prefix . $this->armorLinks(
2537                                                         $this->makeImage( $nt, $text, $holders ) ) . $trail;
2538                                                 continue;
2539                                         }
2540                                 } elseif ( $ns == NS_CATEGORY ) {
2541                                         /**
2542                                          * Strip the whitespace Category links produce, see T2087
2543                                          */
2544                                         $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2545
2546                                         if ( $wasblank ) {
2547                                                 $sortkey = $this->getDefaultSort();
2548                                         } else {
2549                                                 $sortkey = $text;
2550                                         }
2551                                         $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2552                                         $sortkey = str_replace( "\n", '', $sortkey );
2553                                         $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2554                                         $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2555
2556                                         continue;
2557                                 }
2558                         }
2559
2560                         # Self-link checking. For some languages, variants of the title are checked in
2561                         # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2562                         # for linking to a different variant.
2563                         if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2564                                 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2565                                 continue;
2566                         }
2567
2568                         # NS_MEDIA is a pseudo-namespace for linking directly to a file
2569                         # @todo FIXME: Should do batch file existence checks, see comment below
2570                         if ( $ns == NS_MEDIA ) {
2571                                 # Give extensions a chance to select the file revision for us
2572                                 $options = [];
2573                                 $descQuery = false;
2574                                 Hooks::run( 'BeforeParserFetchFileAndTitle',
2575                                         [ $this, $nt, &$options, &$descQuery ] );
2576                                 # Fetch and register the file (file title may be different via hooks)
2577                                 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2578                                 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2579                                 $s .= $prefix . $this->armorLinks(
2580                                         Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2581                                 continue;
2582                         }
2583
2584                         # Some titles, such as valid special pages or files in foreign repos, should
2585                         # be shown as bluelinks even though they're not included in the page table
2586                         # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2587                         # batch file existence checks for NS_FILE and NS_MEDIA
2588                         if ( $iw == '' && $nt->isAlwaysKnown() ) {
2589                                 $this->mOutput->addLink( $nt );
2590                                 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2591                         } else {
2592                                 # Links will be added to the output link list after checking
2593                                 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2594                         }
2595                 }
2596                 return $holders;
2597         }
2598
2599         /**
2600          * Render a forced-blue link inline; protect against double expansion of
2601          * URLs if we're in a mode that prepends full URL prefixes to internal links.
2602          * Since this little disaster has to split off the trail text to avoid
2603          * breaking URLs in the following text without breaking trails on the
2604          * wiki links, it's been made into a horrible function.
2605          *
2606          * @param Title $nt
2607          * @param string $text
2608          * @param string $trail
2609          * @param string $prefix
2610          * @return string HTML-wikitext mix oh yuck
2611          */
2612         protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2613                 list( $inside, $trail ) = Linker::splitTrail( $trail );
2614
2615                 if ( $text == '' ) {
2616                         $text = htmlspecialchars( $nt->getPrefixedText() );
2617                 }
2618
2619                 $link = $this->getLinkRenderer()->makeKnownLink(
2620                         $nt, new HtmlArmor( "$prefix$text$inside" )
2621                 );
2622
2623                 return $this->armorLinks( $link ) . $trail;
2624         }
2625
2626         /**
2627          * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2628          * going to go through further parsing steps before inline URL expansion.
2629          *
2630          * Not needed quite as much as it used to be since free links are a bit
2631          * more sensible these days. But bracketed links are still an issue.
2632          *
2633          * @param string $text More-or-less HTML
2634          * @return string Less-or-more HTML with NOPARSE bits
2635          */
2636         public function armorLinks( $text ) {
2637                 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2638                         self::MARKER_PREFIX . "NOPARSE$1", $text );
2639         }
2640
2641         /**
2642          * Return true if subpage links should be expanded on this page.
2643          * @return bool
2644          */
2645         public function areSubpagesAllowed() {
2646                 # Some namespaces don't allow subpages
2647                 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2648         }
2649
2650         /**
2651          * Handle link to subpage if necessary
2652          *
2653          * @param string $target The source of the link
2654          * @param string &$text The link text, modified as necessary
2655          * @return string The full name of the link
2656          * @private
2657          */
2658         public function maybeDoSubpageLink( $target, &$text ) {
2659                 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2660         }
2661
2662         /**
2663          * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2664          *
2665          * @param string $text
2666          * @param bool $linestart Whether or not this is at the start of a line.
2667          * @private
2668          * @return string The lists rendered as HTML
2669          */
2670         public function doBlockLevels( $text, $linestart ) {
2671                 return BlockLevelPass::doBlockLevels( $text, $linestart );
2672         }
2673
2674         /**
2675          * Return value of a magic variable (like PAGENAME)
2676          *
2677          * @private
2678          *
2679          * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2680          * @param bool|PPFrame $frame
2681          *
2682          * @throws MWException
2683          * @return string
2684          */
2685         public function getVariableValue( $index, $frame = false ) {
2686                 if ( is_null( $this->mTitle ) ) {
2687                         // If no title set, bad things are going to happen
2688                         // later. Title should always be set since this
2689                         // should only be called in the middle of a parse
2690                         // operation (but the unit-tests do funky stuff)
2691                         throw new MWException( __METHOD__ . ' Should only be '
2692                                 . ' called while parsing (no title set)' );
2693                 }
2694
2695                 // Avoid PHP 7.1 warning from passing $this by reference
2696                 $parser = $this;
2697
2698                 /**
2699                  * Some of these require message or data lookups and can be
2700                  * expensive to check many times.
2701                  */
2702                 if (
2703                         Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2704                         isset( $this->mVarCache[$index] )
2705                 ) {
2706                         return $this->mVarCache[$index];
2707                 }
2708
2709                 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2710                 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2711
2712                 $pageLang = $this->getFunctionLang();
2713
2714                 switch ( $index ) {
2715                         case '!':
2716                                 $value = '|';
2717                                 break;
2718                         case 'currentmonth':
2719                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2720                                 break;
2721                         case 'currentmonth1':
2722                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2723                                 break;
2724                         case 'currentmonthname':
2725                                 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2726                                 break;
2727                         case 'currentmonthnamegen':
2728                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2729                                 break;
2730                         case 'currentmonthabbrev':
2731                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2732                                 break;
2733                         case 'currentday':
2734                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2735                                 break;
2736                         case 'currentday2':
2737                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2738                                 break;
2739                         case 'localmonth':
2740                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2741                                 break;
2742                         case 'localmonth1':
2743                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2744                                 break;
2745                         case 'localmonthname':
2746                                 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2747                                 break;
2748                         case 'localmonthnamegen':
2749                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2750                                 break;
2751                         case 'localmonthabbrev':
2752                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2753                                 break;
2754                         case 'localday':
2755                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2756                                 break;
2757                         case 'localday2':
2758                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2759                                 break;
2760                         case 'pagename':
2761                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
2762                                 break;
2763                         case 'pagenamee':
2764                                 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2765                                 break;
2766                         case 'fullpagename':
2767                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2768                                 break;
2769                         case 'fullpagenamee':
2770                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2771                                 break;
2772                         case 'subpagename':
2773                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2774                                 break;
2775                         case 'subpagenamee':
2776                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2777                                 break;
2778                         case 'rootpagename':
2779                                 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2780                                 break;
2781                         case 'rootpagenamee':
2782                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2783                                         ' ',
2784                                         '_',
2785                                         $this->mTitle->getRootText()
2786                                 ) ) );
2787                                 break;
2788                         case 'basepagename':
2789                                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2790                                 break;
2791                         case 'basepagenamee':
2792                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2793                                         ' ',
2794                                         '_',
2795                                         $this->mTitle->getBaseText()
2796                                 ) ) );
2797                                 break;
2798                         case 'talkpagename':
2799                                 if ( $this->mTitle->canHaveTalkPage() ) {
2800                                         $talkPage = $this->mTitle->getTalkPage();
2801                                         $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2802                                 } else {
2803                                         $value = '';
2804                                 }
2805                                 break;
2806                         case 'talkpagenamee':
2807                                 if ( $this->mTitle->canHaveTalkPage() ) {
2808                                         $talkPage = $this->mTitle->getTalkPage();
2809                                         $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2810                                 } else {
2811                                         $value = '';
2812                                 }
2813                                 break;
2814                         case 'subjectpagename':
2815                                 $subjPage = $this->mTitle->getSubjectPage();
2816                                 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2817                                 break;
2818                         case 'subjectpagenamee':
2819                                 $subjPage = $this->mTitle->getSubjectPage();
2820                                 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2821                                 break;
2822                         case 'pageid': // requested in T25427
2823                                 # Inform the edit saving system that getting the canonical output
2824                                 # after page insertion requires a parse that used that exact page ID
2825                                 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2826                                 $value = $this->mTitle->getArticleID();
2827                                 if ( !$value ) {
2828                                         $value = $this->mOptions->getSpeculativePageId();
2829                                         if ( $value ) {
2830                                                 $this->mOutput->setSpeculativePageIdUsed( $value );
2831                                         }
2832                                 }
2833                                 break;
2834                         case 'revisionid':
2835                                 if (
2836                                         $this->svcOptions->get( 'MiserMode' ) &&
2837                                         !$this->mOptions->getInterfaceMessage() &&
2838                                         // @TODO: disallow this word on all namespaces
2839                                         $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2840                                 ) {
2841                                         // Use a stub result instead of the actual revision ID in order to avoid
2842                                         // double parses on page save but still allow preview detection (T137900)
2843                                         if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2844                                                 $value = '-';
2845                                         } else {
2846                                                 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2847                                                 $value = '';
2848                                         }
2849                                 } else {
2850                                         # Inform the edit saving system that getting the canonical output after
2851                                         # revision insertion requires a parse that used that exact revision ID
2852                                         $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2853                                         $value = $this->getRevisionId();
2854                                         if ( $value === 0 ) {
2855                                                 $rev = $this->getRevisionObject();
2856                                                 $value = $rev ? $rev->getId() : $value;
2857                                         }
2858                                         if ( !$value ) {
2859                                                 $value = $this->mOptions->getSpeculativeRevId();
2860                                                 if ( $value ) {
2861                                                         $this->mOutput->setSpeculativeRevIdUsed( $value );
2862                                                 }
2863                                         }
2864                                 }
2865                                 break;
2866                         case 'revisionday':
2867                                 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2868                                 break;
2869                         case 'revisionday2':
2870                                 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2871                                 break;
2872                         case 'revisionmonth':
2873                                 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2874                                 break;
2875                         case 'revisionmonth1':
2876                                 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2877                                 break;
2878                         case 'revisionyear':
2879                                 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2880                                 break;
2881                         case 'revisiontimestamp':
2882                                 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2883                                 break;
2884                         case 'revisionuser':
2885                                 # Inform the edit saving system that getting the canonical output after
2886                                 # revision insertion requires a parse that used the actual user ID
2887                                 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2888                                 $value = $this->getRevisionUser();
2889                                 break;
2890                         case 'revisionsize':
2891                                 $value = $this->getRevisionSize();
2892                                 break;
2893                         case 'namespace':
2894                                 $value = str_replace( '_', ' ',
2895                                         $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2896                                 break;
2897                         case 'namespacee':
2898                                 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2899                                 break;
2900                         case 'namespacenumber':
2901                                 $value = $this->mTitle->getNamespace();
2902                                 break;
2903                         case 'talkspace':
2904                                 $value = $this->mTitle->canHaveTalkPage()
2905                                         ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2906                                         : '';
2907                                 break;
2908                         case 'talkspacee':
2909                                 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2910                                 break;
2911                         case 'subjectspace':
2912                                 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2913                                 break;
2914                         case 'subjectspacee':
2915                                 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2916                                 break;
2917                         case 'currentdayname':
2918                                 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2919                                 break;
2920                         case 'currentyear':
2921                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2922                                 break;
2923                         case 'currenttime':
2924                                 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2925                                 break;
2926                         case 'currenthour':
2927                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2928                                 break;
2929                         case 'currentweek':
2930                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2931                                 # int to remove the padding
2932                                 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2933                                 break;
2934                         case 'currentdow':
2935                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2936                                 break;
2937                         case 'localdayname':
2938                                 $value = $pageLang->getWeekdayName(
2939                                         (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2940                                 );
2941                                 break;
2942                         case 'localyear':
2943                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2944                                 break;
2945                         case 'localtime':
2946                                 $value = $pageLang->time(
2947                                         MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2948                                         false,
2949                                         false
2950                                 );
2951                                 break;
2952                         case 'localhour':
2953                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2954                                 break;
2955                         case 'localweek':
2956                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2957                                 # int to remove the padding
2958                                 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2959                                 break;
2960                         case 'localdow':
2961                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2962                                 break;
2963                         case 'numberofarticles':
2964                                 $value = $pageLang->formatNum( SiteStats::articles() );
2965                                 break;
2966                         case 'numberoffiles':
2967                                 $value = $pageLang->formatNum( SiteStats::images() );
2968                                 break;
2969                         case 'numberofusers':
2970                                 $value = $pageLang->formatNum( SiteStats::users() );
2971                                 break;
2972                         case 'numberofactiveusers':
2973                                 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2974                                 break;
2975                         case 'numberofpages':
2976                                 $value = $pageLang->formatNum( SiteStats::pages() );
2977                                 break;
2978                         case 'numberofadmins':
2979                                 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2980                                 break;
2981                         case 'numberofedits':
2982                                 $value = $pageLang->formatNum( SiteStats::edits() );
2983                                 break;
2984                         case 'currenttimestamp':
2985                                 $value = wfTimestamp( TS_MW, $ts );
2986                                 break;
2987                         case 'localtimestamp':
2988                                 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2989                                 break;
2990                         case 'currentversion':
2991                                 $value = SpecialVersion::getVersion();
2992                                 break;
2993                         case 'articlepath':
2994                                 return $this->svcOptions->get( 'ArticlePath' );
2995                         case 'sitename':
2996                                 return $this->svcOptions->get( 'Sitename' );
2997                         case 'server':
2998                                 return $this->svcOptions->get( 'Server' );
2999                         case 'servername':
3000                                 return $this->svcOptions->get( 'ServerName' );
3001                         case 'scriptpath':
3002                                 return $this->svcOptions->get( 'ScriptPath' );
3003                         case 'stylepath':
3004                                 return $this->svcOptions->get( 'StylePath' );
3005                         case 'directionmark':
3006                                 return $pageLang->getDirMark();
3007                         case 'contentlanguage':
3008                                 return $this->svcOptions->get( 'LanguageCode' );
3009                         case 'pagelanguage':
3010                                 $value = $pageLang->getCode();
3011                                 break;
3012                         case 'cascadingsources':
3013                                 $value = CoreParserFunctions::cascadingsources( $this );
3014                                 break;
3015                         default:
3016                                 $ret = null;
3017                                 Hooks::run(
3018                                         'ParserGetVariableValueSwitch',
3019                                         [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3020                                 );
3021
3022                                 return $ret;
3023                 }
3024
3025                 if ( $index ) {
3026                         $this->mVarCache[$index] = $value;
3027                 }
3028
3029                 return $value;
3030         }
3031
3032         /**
3033          * @param int $start
3034          * @param int $len
3035          * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3036          * @param string $variable Parser variable name
3037          * @return string
3038          */
3039         private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3040                 # Get the timezone-adjusted timestamp to be used for this revision
3041                 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3042                 # Possibly set vary-revision if there is not yet an associated revision
3043                 if ( !$this->getRevisionObject() ) {
3044                         # Get the timezone-adjusted timestamp $mtts seconds in the future.
3045                         # This future is relative to the current time and not that of the
3046                         # parser options. The rendered timestamp can be compared to that
3047                         # of the timestamp specified by the parser options.
3048                         $resThen = substr(
3049                                 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3050                                 $start,
3051                                 $len
3052                         );
3053
3054                         if ( $resNow !== $resThen ) {
3055                                 # Inform the edit saving system that getting the canonical output after
3056                                 # revision insertion requires a parse that used an actual revision timestamp
3057                                 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3058                         }
3059                 }
3060
3061                 return $resNow;
3062         }
3063
3064         /**
3065          * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3066          *
3067          * @private
3068          */
3069         public function initialiseVariables() {
3070                 $variableIDs = $this->magicWordFactory->getVariableIDs();
3071                 $substIDs = $this->magicWordFactory->getSubstIDs();
3072
3073                 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3074                 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3075         }
3076
3077         /**
3078          * Preprocess some wikitext and return the document tree.
3079          * This is the ghost of replace_variables().
3080          *
3081          * @param string $text The text to parse
3082          * @param int $flags Bitwise combination of:
3083          *   - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3084          *     included. Default is to assume a direct page view.
3085          *
3086          * The generated DOM tree must depend only on the input text and the flags.
3087          * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3088          *
3089          * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3090          * change in the DOM tree for a given text, must be passed through the section identifier
3091          * in the section edit link and thus back to extractSections().
3092          *
3093          * The output of this function is currently only cached in process memory, but a persistent
3094          * cache may be implemented at a later date which takes further advantage of these strict
3095          * dependency requirements.
3096          *
3097          * @return PPNode
3098          */
3099         public function preprocessToDom( $text, $flags = 0 ) {
3100                 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3101                 return $dom;
3102         }
3103
3104         /**
3105          * Return a three-element array: leading whitespace, string contents, trailing whitespace
3106          *
3107          * @param string $s
3108          *
3109          * @return array
3110          */
3111         public static function splitWhitespace( $s ) {
3112                 $ltrimmed = ltrim( $s );
3113                 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3114                 $trimmed = rtrim( $ltrimmed );
3115                 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3116                 if ( $diff > 0 ) {
3117                         $w2 = substr( $ltrimmed, -$diff );
3118                 } else {
3119                         $w2 = '';
3120                 }
3121                 return [ $w1, $trimmed, $w2 ];
3122         }
3123
3124         /**
3125          * Replace magic variables, templates, and template arguments
3126          * with the appropriate text. Templates are substituted recursively,
3127          * taking care to avoid infinite loops.
3128          *
3129          * Note that the substitution depends on value of $mOutputType:
3130          *  self::OT_WIKI: only {{subst:}} templates
3131          *  self::OT_PREPROCESS: templates but not extension tags
3132          *  self::OT_HTML: all templates and extension tags
3133          *
3134          * @param string $text The text to transform
3135          * @param false|PPFrame|array $frame Object describing the arguments passed to the
3136          *   template. Arguments may also be provided as an associative array, as
3137          *   was the usual case before MW1.12. Providing arguments this way may be
3138          *   useful for extensions wishing to perform variable replacement
3139          *   explicitly.
3140          * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3141          *   double-brace expansion.
3142          * @return string
3143          */
3144         public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3145                 # Is there any text? Also, Prevent too big inclusions!
3146                 $textSize = strlen( $text );
3147                 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3148                         return $text;
3149                 }
3150
3151                 if ( $frame === false ) {
3152                         $frame = $this->getPreprocessor()->newFrame();
3153                 } elseif ( !( $frame instanceof PPFrame ) ) {
3154                         $this->logger->debug(
3155                                 __METHOD__ . " called using plain parameters instead of " .
3156                                 "a PPFrame instance. Creating custom frame."
3157                         );
3158                         $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3159                 }
3160
3161                 $dom = $this->preprocessToDom( $text );
3162                 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3163                 $text = $frame->expand( $dom, $flags );
3164
3165                 return $text;
3166         }
3167
3168         /**
3169          * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3170          *
3171          * @param array $args
3172          *
3173          * @return array
3174          */
3175         public static function createAssocArgs( $args ) {
3176                 $assocArgs = [];
3177                 $index = 1;
3178                 foreach ( $args as $arg ) {
3179                         $eqpos = strpos( $arg, '=' );
3180                         if ( $eqpos === false ) {
3181                                 $assocArgs[$index++] = $arg;
3182                         } else {
3183                                 $name = trim( substr( $arg, 0, $eqpos ) );
3184                                 $value = trim( substr( $arg, $eqpos + 1 ) );
3185                                 if ( $value === false ) {
3186                                         $value = '';
3187                                 }
3188                                 if ( $name !== false ) {
3189                                         $assocArgs[$name] = $value;
3190                                 }
3191                         }
3192                 }
3193
3194                 return $assocArgs;
3195         }
3196
3197         /**
3198          * Warn the user when a parser limitation is reached
3199          * Will warn at most once the user per limitation type
3200          *
3201          * The results are shown during preview and run through the Parser (See EditPage.php)
3202          *
3203          * @param string $limitationType Should be one of:
3204          *   'expensive-parserfunction' (corresponding messages:
3205          *       'expensive-parserfunction-warning',
3206          *       'expensive-parserfunction-category')
3207          *   'post-expand-template-argument' (corresponding messages:
3208          *       'post-expand-template-argument-warning',
3209          *       'post-expand-template-argument-category')
3210          *   'post-expand-template-inclusion' (corresponding messages:
3211          *       'post-expand-template-inclusion-warning',
3212          *       'post-expand-template-inclusion-category')
3213          *   'node-count-exceeded' (corresponding messages:
3214          *       'node-count-exceeded-warning',
3215          *       'node-count-exceeded-category')
3216          *   'expansion-depth-exceeded' (corresponding messages:
3217          *       'expansion-depth-exceeded-warning',
3218          *       'expansion-depth-exceeded-category')
3219          * @param string|int|null $current Current value
3220          * @param string|int|null $max Maximum allowed, when an explicit limit has been
3221          *       exceeded, provide the values (optional)
3222          */
3223         public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3224                 # does no harm if $current and $max are present but are unnecessary for the message
3225                 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3226                 # only during preview, and that would split the parser cache unnecessarily.
3227                 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3228                         ->text();
3229                 $this->mOutput->addWarning( $warning );
3230                 $this->addTrackingCategory( "$limitationType-category" );
3231         }
3232
3233         /**
3234          * Return the text of a template, after recursively
3235          * replacing any variables or templates within the template.
3236          *
3237          * @param array $piece The parts of the template
3238          *   $piece['title']: the title, i.e. the part before the |
3239          *   $piece['parts']: the parameter array
3240          *   $piece['lineStart']: whether the brace was at the start of a line
3241          * @param PPFrame $frame The current frame, contains template arguments
3242          * @throws Exception
3243          * @return string|array The text of the template
3244          */
3245         public function braceSubstitution( $piece, $frame ) {
3246                 // Flags
3247
3248                 // $text has been filled
3249                 $found = false;
3250                 // wiki markup in $text should be escaped
3251                 $nowiki = false;
3252                 // $text is HTML, armour it against wikitext transformation
3253                 $isHTML = false;
3254                 // Force interwiki transclusion to be done in raw mode not rendered
3255                 $forceRawInterwiki = false;
3256                 // $text is a DOM node needing expansion in a child frame
3257                 $isChildObj = false;
3258                 // $text is a DOM node needing expansion in the current frame
3259                 $isLocalObj = false;
3260
3261                 # Title object, where $text came from
3262                 $title = false;
3263
3264                 # $part1 is the bit before the first |, and must contain only title characters.
3265                 # Various prefixes will be stripped from it later.
3266                 $titleWithSpaces = $frame->expand( $piece['title'] );
3267                 $part1 = trim( $titleWithSpaces );
3268                 $titleText = false;
3269
3270                 # Original title text preserved for various purposes
3271                 $originalTitle = $part1;
3272
3273                 # $args is a list of argument nodes, starting from index 0, not including $part1
3274                 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3275                 # below won't work b/c this $args isn't an object
3276                 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3277
3278                 $profileSection = null; // profile templates
3279
3280                 # SUBST
3281                 if ( !$found ) {
3282                         $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3283
3284                         # Possibilities for substMatch: "subst", "safesubst" or FALSE
3285                         # Decide whether to expand template or keep wikitext as-is.
3286                         if ( $this->ot['wiki'] ) {
3287                                 if ( $substMatch === false ) {
3288                                         $literal = true;  # literal when in PST with no prefix
3289                                 } else {
3290                                         $literal = false; # expand when in PST with subst: or safesubst:
3291                                 }
3292                         } else {
3293                                 if ( $substMatch == 'subst' ) {
3294                                         $literal = true;  # literal when not in PST with plain subst:
3295                                 } else {
3296                                         $literal = false; # expand when not in PST with safesubst: or no prefix
3297                                 }
3298                         }
3299                         if ( $literal ) {
3300                                 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3301                                 $isLocalObj = true;
3302                                 $found = true;
3303                         }
3304                 }
3305
3306                 # Variables
3307                 if ( !$found && $args->getLength() == 0 ) {
3308                         $id = $this->mVariables->matchStartToEnd( $part1 );
3309                         if ( $id !== false ) {
3310                                 $text = $this->getVariableValue( $id, $frame );
3311                                 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3312                                         $this->mOutput->updateCacheExpiry(
3313                                                 $this->magicWordFactory->getCacheTTL( $id ) );
3314                                 }
3315                                 $found = true;
3316                         }
3317                 }
3318
3319                 # MSG, MSGNW and RAW
3320                 if ( !$found ) {
3321                         # Check for MSGNW:
3322                         $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3323                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3324                                 $nowiki = true;
3325                         } else {
3326                                 # Remove obsolete MSG:
3327                                 $mwMsg = $this->magicWordFactory->get( 'msg' );
3328                                 $mwMsg->matchStartAndRemove( $part1 );
3329                         }
3330
3331                         # Check for RAW:
3332                         $mwRaw = $this->magicWordFactory->get( 'raw' );
3333                         if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3334                                 $forceRawInterwiki = true;
3335                         }
3336                 }
3337
3338                 # Parser functions
3339                 if ( !$found ) {
3340                         $colonPos = strpos( $part1, ':' );
3341                         if ( $colonPos !== false ) {
3342                                 $func = substr( $part1, 0, $colonPos );
3343                                 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3344                                 $argsLength = $args->getLength();
3345                                 for ( $i = 0; $i < $argsLength; $i++ ) {
3346                                         $funcArgs[] = $args->item( $i );
3347                                 }
3348
3349                                 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3350
3351                                 // Extract any forwarded flags
3352                                 if ( isset( $result['title'] ) ) {
3353                                         $title = $result['title'];
3354                                 }
3355                                 if ( isset( $result['found'] ) ) {
3356                                         $found = $result['found'];
3357                                 }
3358                                 if ( array_key_exists( 'text', $result ) ) {
3359                                         // a string or null
3360                                         $text = $result['text'];
3361                                 }
3362                                 if ( isset( $result['nowiki'] ) ) {
3363                                         $nowiki = $result['nowiki'];
3364                                 }
3365                                 if ( isset( $result['isHTML'] ) ) {
3366                                         $isHTML = $result['isHTML'];
3367                                 }
3368                                 if ( isset( $result['forceRawInterwiki'] ) ) {
3369                                         $forceRawInterwiki = $result['forceRawInterwiki'];
3370                                 }
3371                                 if ( isset( $result['isChildObj'] ) ) {
3372                                         $isChildObj = $result['isChildObj'];
3373                                 }
3374                                 if ( isset( $result['isLocalObj'] ) ) {
3375                                         $isLocalObj = $result['isLocalObj'];
3376                                 }
3377                         }
3378                 }
3379
3380                 # Finish mangling title and then check for loops.
3381                 # Set $title to a Title object and $titleText to the PDBK
3382                 if ( !$found ) {
3383                         $ns = NS_TEMPLATE;
3384                         # Split the title into page and subpage
3385                         $subpage = '';
3386                         $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3387                         if ( $part1 !== $relative ) {
3388                                 $part1 = $relative;
3389                                 $ns = $this->mTitle->getNamespace();
3390                         }
3391                         $title = Title::newFromText( $part1, $ns );
3392                         if ( $title ) {
3393                                 $titleText = $title->getPrefixedText();
3394                                 # Check for language variants if the template is not found
3395                                 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3396                                         $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3397                                 }
3398                                 # Do recursion depth check
3399                                 $limit = $this->mOptions->getMaxTemplateDepth();
3400                                 if ( $frame->depth >= $limit ) {
3401                                         $found = true;
3402                                         $text = '<span class="error">'
3403                                                 . wfMessage( 'parser-template-recursion-depth-warning' )
3404                                                         ->numParams( $limit )->inContentLanguage()->text()
3405                                                 . '</span>';
3406                                 }
3407                         }
3408                 }
3409
3410                 # Load from database
3411                 if ( !$found && $title ) {
3412                         $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3413                         if ( !$title->isExternal() ) {
3414                                 if ( $title->isSpecialPage()
3415                                         && $this->mOptions->getAllowSpecialInclusion()
3416                                         && $this->ot['html']
3417                                 ) {
3418                                         $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3419                                         // Pass the template arguments as URL parameters.
3420                                         // "uselang" will have no effect since the Language object
3421                                         // is forced to the one defined in ParserOptions.
3422                                         $pageArgs = [];
3423                                         $argsLength = $args->getLength();
3424                                         for ( $i = 0; $i < $argsLength; $i++ ) {
3425                                                 $bits = $args->item( $i )->splitArg();
3426                                                 if ( strval( $bits['index'] ) === '' ) {
3427                                                         $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3428                                                         $value = trim( $frame->expand( $bits['value'] ) );
3429                                                         $pageArgs[$name] = $value;
3430                                                 }
3431                                         }
3432
3433                                         // Create a new context to execute the special page
3434                                         $context = new RequestContext;
3435                                         $context->setTitle( $title );
3436                                         $context->setRequest( new FauxRequest( $pageArgs ) );
3437                                         if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3438                                                 $context->setUser( $this->getUser() );
3439                                         } else {
3440                                                 // If this page is cached, then we better not be per user.
3441                                                 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3442                                         }
3443                                         $context->setLanguage( $this->mOptions->getUserLangObj() );
3444                                         $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3445                                         if ( $ret ) {
3446                                                 $text = $context->getOutput()->getHTML();
3447                                                 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3448                                                 $found = true;
3449                                                 $isHTML = true;
3450                                                 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3451                                                         $this->mOutput->updateRuntimeAdaptiveExpiry(
3452                                                                 $specialPage->maxIncludeCacheTime()
3453                                                         );
3454                                                 }
3455                                         }
3456                                 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3457                                         $found = false; # access denied
3458                                         $this->logger->debug(
3459                                                 __METHOD__ .
3460                                                 ": template inclusion denied for " . $title->getPrefixedDBkey()
3461                                         );
3462                                 } else {
3463                                         list( $text, $title ) = $this->getTemplateDom( $title );
3464                                         if ( $text !== false ) {
3465                                                 $found = true;
3466                                                 $isChildObj = true;
3467                                         }
3468                                 }
3469
3470                                 # If the title is valid but undisplayable, make a link to it
3471                                 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3472                                         $text = "[[:$titleText]]";
3473                                         $found = true;
3474                                 }
3475                         } elseif ( $title->isTrans() ) {
3476                                 # Interwiki transclusion
3477                                 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3478                                         $text = $this->interwikiTransclude( $title, 'render' );
3479                                         $isHTML = true;
3480                                 } else {
3481                                         $text = $this->interwikiTransclude( $title, 'raw' );
3482                                         # Preprocess it like a template
3483                                         $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3484                                         $isChildObj = true;
3485                                 }
3486                                 $found = true;
3487                         }
3488
3489                         # Do infinite loop check
3490                         # This has to be done after redirect resolution to avoid infinite loops via redirects
3491                         if ( !$frame->loopCheck( $title ) ) {
3492                                 $found = true;
3493                                 $text = '<span class="error">'
3494                                         . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3495                                         . '</span>';
3496                                 $this->addTrackingCategory( 'template-loop-category' );
3497                                 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3498                                         wfEscapeWikiText( $titleText ) )->text() );
3499                                 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3500                         }
3501                 }
3502
3503                 # If we haven't found text to substitute by now, we're done
3504                 # Recover the source wikitext and return it
3505                 if ( !$found ) {
3506                         $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3507                         if ( $profileSection ) {
3508                                 $this->mProfiler->scopedProfileOut( $profileSection );
3509                         }
3510                         return [ 'object' => $text ];
3511                 }
3512
3513                 # Expand DOM-style return values in a child frame
3514                 if ( $isChildObj ) {
3515                         # Clean up argument array
3516                         $newFrame = $frame->newChild( $args, $title );
3517
3518                         if ( $nowiki ) {
3519                                 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3520                         } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3521                                 # Expansion is eligible for the empty-frame cache
3522                                 $text = $newFrame->cachedExpand( $titleText, $text );
3523                         } else {
3524                                 # Uncached expansion
3525                                 $text = $newFrame->expand( $text );
3526                         }
3527                 }
3528                 if ( $isLocalObj && $nowiki ) {
3529                         $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3530                         $isLocalObj = false;
3531                 }
3532
3533                 if ( $profileSection ) {
3534                         $this->mProfiler->scopedProfileOut( $profileSection );
3535                 }
3536
3537                 # Replace raw HTML by a placeholder
3538                 if ( $isHTML ) {
3539                         $text = $this->insertStripItem( $text );
3540                 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3541                         # Escape nowiki-style return values
3542                         $text = wfEscapeWikiText( $text );
3543                 } elseif ( is_string( $text )
3544                         && !$piece['lineStart']
3545                         && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3546                 ) {
3547                         # T2529: if the template begins with a table or block-level
3548                         # element, it should be treated as beginning a new line.
3549                         # This behavior is somewhat controversial.
3550                         $text = "\n" . $text;
3551                 }
3552
3553                 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3554                         # Error, oversize inclusion
3555                         if ( $titleText !== false ) {
3556                                 # Make a working, properly escaped link if possible (T25588)
3557                                 $text = "[[:$titleText]]";
3558                         } else {
3559                                 # This will probably not be a working link, but at least it may
3560                                 # provide some hint of where the problem is
3561                                 preg_replace( '/^:/', '', $originalTitle );
3562                                 $text = "[[:$originalTitle]]";
3563                         }
3564                         $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3565                                 . 'post-expand include size too large -->' );
3566                         $this->limitationWarn( 'post-expand-template-inclusion' );
3567                 }
3568
3569                 if ( $isLocalObj ) {
3570                         $ret = [ 'object' => $text ];
3571                 } else {
3572                         $ret = [ 'text' => $text ];
3573                 }
3574
3575                 return $ret;
3576         }
3577
3578         /**
3579          * Call a parser function and return an array with text and flags.
3580          *
3581          * The returned array will always contain a boolean 'found', indicating
3582          * whether the parser function was found or not. It may also contain the
3583          * following:
3584          *  text: string|object, resulting wikitext or PP DOM object
3585          *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3586          *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3587          *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3588          *  nowiki: bool, wiki markup in $text should be escaped
3589          *
3590          * @since 1.21
3591          * @param PPFrame $frame The current frame, contains template arguments
3592          * @param string $function Function name
3593          * @param array $args Arguments to the function
3594          * @throws MWException
3595          * @return array
3596          */
3597         public function callParserFunction( $frame, $function, array $args = [] ) {
3598                 # Case sensitive functions
3599                 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3600                         $function = $this->mFunctionSynonyms[1][$function];
3601                 } else {
3602                         # Case insensitive functions
3603                         $function = $this->contLang->lc( $function );
3604                         if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3605                                 $function = $this->mFunctionSynonyms[0][$function];
3606                         } else {
3607                                 return [ 'found' => false ];
3608                         }
3609                 }
3610
3611                 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3612
3613                 // Avoid PHP 7.1 warning from passing $this by reference
3614                 $parser = $this;
3615
3616                 $allArgs = [ &$parser ];
3617                 if ( $flags & self::SFH_OBJECT_ARGS ) {
3618                         # Convert arguments to PPNodes and collect for appending to $allArgs
3619                         $funcArgs = [];
3620                         foreach ( $args as $k => $v ) {
3621                                 if ( $v instanceof PPNode || $k === 0 ) {
3622                                         $funcArgs[] = $v;
3623                                 } else {
3624                                         $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3625                                 }
3626                         }
3627
3628                         # Add a frame parameter, and pass the arguments as an array
3629                         $allArgs[] = $frame;
3630                         $allArgs[] = $funcArgs;
3631                 } else {
3632                         # Convert arguments to plain text and append to $allArgs
3633                         foreach ( $args as $k => $v ) {
3634                                 if ( $v instanceof PPNode ) {
3635                                         $allArgs[] = trim( $frame->expand( $v ) );
3636                                 } elseif ( is_int( $k ) && $k >= 0 ) {
3637                                         $allArgs[] = trim( $v );
3638                                 } else {
3639                                         $allArgs[] = trim( "$k=$v" );
3640                                 }
3641                         }
3642                 }
3643
3644                 $result = $callback( ...$allArgs );
3645
3646                 # The interface for function hooks allows them to return a wikitext
3647                 # string or an array containing the string and any flags. This mungs
3648                 # things around to match what this method should return.
3649                 if ( !is_array( $result ) ) {
3650                         $result = [
3651                                 'found' => true,
3652                                 'text' => $result,
3653                         ];
3654                 } else {
3655                         if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3656                                 $result['text'] = $result[0];
3657                         }
3658                         unset( $result[0] );
3659                         $result += [
3660                                 'found' => true,
3661                         ];
3662                 }
3663
3664                 $noparse = true;
3665                 $preprocessFlags = 0;
3666                 if ( isset( $result['noparse'] ) ) {
3667                         $noparse = $result['noparse'];
3668                 }
3669                 if ( isset( $result['preprocessFlags'] ) ) {
3670                         $preprocessFlags = $result['preprocessFlags'];
3671                 }
3672
3673                 if ( !$noparse ) {
3674                         $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3675                         $result['isChildObj'] = true;
3676                 }
3677
3678                 return $result;
3679         }
3680
3681         /**
3682          * Get the semi-parsed DOM representation of a template with a given title,
3683          * and its redirect destination title. Cached.
3684          *
3685          * @param Title $title
3686          *
3687          * @return array
3688          */
3689         public function getTemplateDom( $title ) {
3690                 $cacheTitle = $title;
3691                 $titleText = $title->getPrefixedDBkey();
3692
3693                 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3694                         list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3695                         $title = Title::makeTitle( $ns, $dbk );
3696                         $titleText = $title->getPrefixedDBkey();
3697                 }
3698                 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3699                         return [ $this->mTplDomCache[$titleText], $title ];
3700                 }
3701
3702                 # Cache miss, go to the database
3703                 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3704
3705                 if ( $text === false ) {
3706                         $this->mTplDomCache[$titleText] = false;
3707                         return [ false, $title ];
3708                 }
3709
3710                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3711                 $this->mTplDomCache[$titleText] = $dom;
3712
3713                 if ( !$title->equals( $cacheTitle ) ) {
3714                         $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3715                                 [ $title->getNamespace(), $title->getDBkey() ];
3716                 }
3717
3718                 return [ $dom, $title ];
3719         }
3720
3721         /**
3722          * Fetch the current revision of a given title. Note that the revision
3723          * (and even the title) may not exist in the database, so everything
3724          * contributing to the output of the parser should use this method
3725          * where possible, rather than getting the revisions themselves. This
3726          * method also caches its results, so using it benefits performance.
3727          *
3728          * @since 1.24
3729          * @param Title $title
3730          * @return Revision
3731          */
3732         public function fetchCurrentRevisionOfTitle( $title ) {
3733                 $cacheKey = $title->getPrefixedDBkey();
3734                 if ( !$this->currentRevisionCache ) {
3735                         $this->currentRevisionCache = new MapCacheLRU( 100 );
3736                 }
3737                 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3738                         $this->currentRevisionCache->set( $cacheKey,
3739                                 // Defaults to Parser::statelessFetchRevision()
3740                                 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3741                         );
3742                 }
3743                 return $this->currentRevisionCache->get( $cacheKey );
3744         }
3745
3746         /**
3747          * @param Title $title
3748          * @return bool
3749          * @since 1.34
3750          */
3751         public function isCurrentRevisionOfTitleCached( $title ) {
3752                 return (
3753                         $this->currentRevisionCache &&
3754                         $this->currentRevisionCache->has( $title->getPrefixedText() )
3755                 );
3756         }
3757
3758         /**
3759          * Wrapper around Revision::newFromTitle to allow passing additional parameters
3760          * without passing them on to it.
3761          *
3762          * @since 1.24
3763          * @param Title $title
3764          * @param Parser|bool $parser
3765          * @return Revision|bool False if missing
3766          */
3767         public static function statelessFetchRevision( Title $title, $parser = false ) {
3768                 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3769
3770                 return $rev;
3771         }
3772
3773         /**
3774          * Fetch the unparsed text of a template and register a reference to it.
3775          * @param Title $title
3776          * @return array ( string or false, Title )
3777          */
3778         public function fetchTemplateAndTitle( $title ) {
3779                 // Defaults to Parser::statelessFetchTemplate()
3780                 $templateCb = $this->mOptions->getTemplateCallback();
3781                 $stuff = call_user_func( $templateCb, $title, $this );
3782                 $rev = $stuff['revision'] ?? null;
3783                 $text = $stuff['text'];
3784                 if ( is_string( $stuff['text'] ) ) {
3785                         // We use U+007F DELETE to distinguish strip markers from regular text
3786                         $text = strtr( $text, "\x7f", "?" );
3787                 }
3788                 $finalTitle = $stuff['finalTitle'] ?? $title;
3789                 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3790                         $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3791                         if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3792                                 // Self-transclusion; final result may change based on the new page version
3793                                 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3794                                 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3795                         }
3796                 }
3797
3798                 return [ $text, $finalTitle ];
3799         }
3800
3801         /**
3802          * Fetch the unparsed text of a template and register a reference to it.
3803          * @param Title $title
3804          * @return string|bool
3805          */
3806         public function fetchTemplate( $title ) {
3807                 return $this->fetchTemplateAndTitle( $title )[0];
3808         }
3809
3810         /**
3811          * Static function to get a template
3812          * Can be overridden via ParserOptions::setTemplateCallback().
3813          *
3814          * @param Title $title
3815          * @param bool|Parser $parser
3816          *
3817          * @return array
3818          */
3819         public static function statelessFetchTemplate( $title, $parser = false ) {
3820                 $text = $skip = false;
3821                 $finalTitle = $title;
3822                 $deps = [];
3823                 $rev = null;
3824
3825                 # Loop to fetch the article, with up to 1 redirect
3826                 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3827                         # Give extensions a chance to select the revision instead
3828                         $id = false; # Assume current
3829                         Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3830                                 [ $parser, $title, &$skip, &$id ] );
3831
3832                         if ( $skip ) {
3833                                 $text = false;
3834                                 $deps[] = [
3835                                         'title' => $title,
3836                                         'page_id' => $title->getArticleID(),
3837                                         'rev_id' => null
3838                                 ];
3839                                 break;
3840                         }
3841                         # Get the revision
3842                         if ( $id ) {
3843                                 $rev = Revision::newFromId( $id );
3844                         } elseif ( $parser ) {
3845                                 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3846                         } else {
3847                                 $rev = Revision::newFromTitle( $title );
3848                         }
3849                         $rev_id = $rev ? $rev->getId() : 0;
3850                         # If there is no current revision, there is no page
3851                         if ( $id === false && !$rev ) {
3852                                 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3853                                 $linkCache->addBadLinkObj( $title );
3854                         }
3855
3856                         $deps[] = [
3857                                 'title' => $title,
3858                                 'page_id' => $title->getArticleID(),
3859                                 'rev_id' => $rev_id
3860                         ];
3861                         if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3862                                 # We fetched a rev from a different title; register it too...
3863                                 $deps[] = [
3864                                         'title' => $rev->getTitle(),
3865                                         'page_id' => $rev->getPage(),
3866                                         'rev_id' => $rev_id
3867                                 ];
3868                         }
3869
3870                         if ( $rev ) {
3871                                 $content = $rev->getContent();
3872                                 $text = $content ? $content->getWikitextForTransclusion() : null;
3873
3874                                 Hooks::run( 'ParserFetchTemplate',
3875                                         [ $parser, $title, $rev, &$text, &$deps ] );
3876
3877                                 if ( $text === false || $text === null ) {
3878                                         $text = false;
3879                                         break;
3880                                 }
3881                         } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3882                                 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3883                                         lcfirst( $title->getText() ) )->inContentLanguage();
3884                                 if ( !$message->exists() ) {
3885                                         $text = false;
3886                                         break;
3887                                 }
3888                                 $content = $message->content();
3889                                 $text = $message->plain();
3890                         } else {
3891                                 break;
3892                         }
3893                         if ( !$content ) {
3894                                 break;
3895                         }
3896                         # Redirect?
3897                         $finalTitle = $title;
3898                         $title = $content->getRedirectTarget();
3899                 }
3900                 return [
3901                         'revision' => $rev,
3902                         'text' => $text,
3903                         'finalTitle' => $finalTitle,
3904                         'deps' => $deps
3905                 ];
3906         }
3907
3908         /**
3909          * Fetch a file and its title and register a reference to it.
3910          * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3911          * @param Title $title
3912          * @param array $options Array of options to RepoGroup::findFile
3913          * @return array ( File or false, Title of file )
3914          */
3915         public function fetchFileAndTitle( $title, $options = [] ) {
3916                 $file = $this->fetchFileNoRegister( $title, $options );
3917
3918                 $time = $file ? $file->getTimestamp() : false;
3919                 $sha1 = $file ? $file->getSha1() : false;
3920                 # Register the file as a dependency...
3921                 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3922                 if ( $file && !$title->equals( $file->getTitle() ) ) {
3923                         # Update fetched file title
3924                         $title = $file->getTitle();
3925                         $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3926                 }
3927                 return [ $file, $title ];
3928         }
3929
3930         /**
3931          * Helper function for fetchFileAndTitle.
3932          *
3933          * Also useful if you need to fetch a file but not use it yet,
3934          * for example to get the file's handler.
3935          *
3936          * @param Title $title
3937          * @param array $options Array of options to RepoGroup::findFile
3938          * @return File|bool
3939          */
3940         protected function fetchFileNoRegister( $title, $options = [] ) {
3941                 if ( isset( $options['broken'] ) ) {
3942                         $file = false; // broken thumbnail forced by hook
3943                 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3944                         $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3945                 } else { // get by (name,timestamp)
3946                         $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3947                 }
3948                 return $file;
3949         }
3950
3951         /**
3952          * Transclude an interwiki link.
3953          *
3954          * @param Title $title
3955          * @param string $action Usually one of (raw, render)
3956          *
3957          * @return string
3958          */
3959         public function interwikiTransclude( $title, $action ) {
3960                 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3961                         return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3962                 }
3963
3964                 $url = $title->getFullURL( [ 'action' => $action ] );
3965                 if ( strlen( $url ) > 1024 ) {
3966                         return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3967                 }
3968
3969                 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3970
3971                 $fname = __METHOD__;
3972                 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3973
3974                 $data = $cache->getWithSetCallback(
3975                         $cache->makeGlobalKey(
3976                                 'interwiki-transclude',
3977                                 ( $wikiId !== false ) ? $wikiId : 'external',
3978                                 sha1( $url )
3979                         ),
3980                         $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3981                         function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3982                                 $req = MWHttpRequest::factory( $url, [], $fname );
3983
3984                                 $status = $req->execute(); // Status object
3985                                 if ( !$status->isOK() ) {
3986                                         $ttl = $cache::TTL_UNCACHEABLE;
3987                                 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3988                                         $ttl = min( $cache::TTL_LAGGED, $ttl );
3989                                 }
3990
3991                                 return [
3992                                         'text' => $status->isOK() ? $req->getContent() : null,
3993                                         'code' => $req->getStatus()
3994                                 ];
3995                         },
3996                         [
3997                                 'checkKeys' => ( $wikiId !== false )
3998                                         ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3999                                         : [],
4000                                 'pcGroup' => 'interwiki-transclude:5',
4001                                 'pcTTL' => $cache::TTL_PROC_LONG
4002                         ]
4003                 );
4004
4005                 if ( is_string( $data['text'] ) ) {
4006                         $text = $data['text'];
4007                 } elseif ( $data['code'] != 200 ) {
4008                         // Though we failed to fetch the content, this status is useless.
4009                         $text = wfMessage( 'scarytranscludefailed-httpstatus' )
4010                                 ->params( $url, $data['code'] )->inContentLanguage()->text();
4011                 } else {
4012                         $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4013                 }
4014
4015                 return $text;
4016         }
4017
4018         /**
4019          * Triple brace replacement -- used for template arguments
4020          * @private
4021          *
4022          * @param array $piece
4023          * @param PPFrame $frame
4024          *
4025          * @return array
4026          */
4027         public function argSubstitution( $piece, $frame ) {
4028                 $error = false;
4029                 $parts = $piece['parts'];
4030                 $nameWithSpaces = $frame->expand( $piece['title'] );
4031                 $argName = trim( $nameWithSpaces );
4032                 $object = false;
4033                 $text = $frame->getArgument( $argName );
4034                 if ( $text === false && $parts->getLength() > 0
4035                         && ( $this->ot['html']
4036                                 || $this->ot['pre']
4037                                 || ( $this->ot['wiki'] && $frame->isTemplate() )
4038                         )
4039                 ) {
4040                         # No match in frame, use the supplied default
4041                         $object = $parts->item( 0 )->getChildren();
4042                 }
4043                 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4044                         $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4045                         $this->limitationWarn( 'post-expand-template-argument' );
4046                 }
4047
4048                 if ( $text === false && $object === false ) {
4049                         # No match anywhere
4050                         $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4051                 }
4052                 if ( $error !== false ) {
4053                         $text .= $error;
4054                 }
4055                 if ( $object !== false ) {
4056                         $ret = [ 'object' => $object ];
4057                 } else {
4058                         $ret = [ 'text' => $text ];
4059                 }
4060
4061                 return $ret;
4062         }
4063
4064         /**
4065          * Return the text to be used for a given extension tag.
4066          * This is the ghost of strip().
4067          *
4068          * @param array $params Associative array of parameters:
4069          *     name       PPNode for the tag name
4070          *     attr       PPNode for unparsed text where tag attributes are thought to be
4071          *     attributes Optional associative array of parsed attributes
4072          *     inner      Contents of extension element
4073          *     noClose    Original text did not have a close tag
4074          * @param PPFrame $frame
4075          *
4076          * @throws MWException
4077          * @return string
4078          */
4079         public function extensionSubstitution( $params, $frame ) {
4080                 static $errorStr = '<span class="error">';
4081                 static $errorLen = 20;
4082
4083                 $name = $frame->expand( $params['name'] );
4084                 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4085                         // Probably expansion depth or node count exceeded. Just punt the
4086                         // error up.
4087                         return $name;
4088                 }
4089
4090                 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4091                 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4092                         // See above
4093                         return $attrText;
4094                 }
4095
4096                 // We can't safely check if the expansion for $content resulted in an
4097                 // error, because the content could happen to be the error string
4098                 // (T149622).
4099                 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4100
4101                 $marker = self::MARKER_PREFIX . "-$name-"
4102                         . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4103
4104                 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4105                         ( $this->ot['html'] || $this->ot['pre'] );
4106                 if ( $isFunctionTag ) {
4107                         $markerType = 'none';
4108                 } else {
4109                         $markerType = 'general';
4110                 }
4111                 if ( $this->ot['html'] || $isFunctionTag ) {
4112                         $name = strtolower( $name );
4113                         $attributes = Sanitizer::decodeTagAttributes( $attrText );
4114                         if ( isset( $params['attributes'] ) ) {
4115                                 $attributes += $params['attributes'];
4116                         }
4117
4118                         if ( isset( $this->mTagHooks[$name] ) ) {
4119                                 $output = call_user_func_array( $this->mTagHooks[$name],
4120                                         [ $content, $attributes, $this, $frame ] );
4121                         } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4122                                 list( $callback, ) = $this->mFunctionTagHooks[$name];
4123
4124                                 // Avoid PHP 7.1 warning from passing $this by reference
4125                                 $parser = $this;
4126                                 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4127                         } else {
4128                                 $output = '<span class="error">Invalid tag extension name: ' .
4129                                         htmlspecialchars( $name ) . '</span>';
4130                         }
4131
4132                         if ( is_array( $output ) ) {
4133                                 // Extract flags
4134                                 $flags = $output;
4135                                 $output = $flags[0];
4136                                 if ( isset( $flags['markerType'] ) ) {
4137                                         $markerType = $flags['markerType'];
4138                                 }
4139                         }
4140                 } else {
4141                         if ( is_null( $attrText ) ) {
4142                                 $attrText = '';
4143                         }
4144                         if ( isset( $params['attributes'] ) ) {
4145                                 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4146                                         $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4147                                                 htmlspecialchars( $attrValue ) . '"';
4148                                 }
4149                         }
4150                         if ( $content === null ) {
4151                                 $output = "<$name$attrText/>";
4152                         } else {
4153                                 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4154                                 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4155                                         // See above
4156                                         return $close;
4157                                 }
4158                                 $output = "<$name$attrText>$content$close";
4159                         }
4160                 }
4161
4162                 if ( $markerType === 'none' ) {
4163                         return $output;
4164                 } elseif ( $markerType === 'nowiki' ) {
4165                         $this->mStripState->addNoWiki( $marker, $output );
4166                 } elseif ( $markerType === 'general' ) {
4167                         $this->mStripState->addGeneral( $marker, $output );
4168                 } else {
4169                         throw new MWException( __METHOD__ . ': invalid marker type' );
4170                 }
4171                 return $marker;
4172         }
4173
4174         /**
4175          * Increment an include size counter
4176          *
4177          * @param string $type The type of expansion
4178          * @param int $size The size of the text
4179          * @return bool False if this inclusion would take it over the maximum, true otherwise
4180          */
4181         public function incrementIncludeSize( $type, $size ) {
4182                 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4183                         return false;
4184                 } else {
4185                         $this->mIncludeSizes[$type] += $size;
4186                         return true;
4187                 }
4188         }
4189
4190         /**
4191          * Increment the expensive function count
4192          *
4193          * @return bool False if the limit has been exceeded
4194          */
4195         public function incrementExpensiveFunctionCount() {
4196                 $this->mExpensiveFunctionCount++;
4197                 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4198         }
4199
4200         /**
4201          * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4202          * Fills $this->mDoubleUnderscores, returns the modified text
4203          *
4204          * @param string $text
4205          *
4206          * @return string
4207          */
4208         public function doDoubleUnderscore( $text ) {
4209                 # The position of __TOC__ needs to be recorded
4210                 $mw = $this->magicWordFactory->get( 'toc' );
4211                 if ( $mw->match( $text ) ) {
4212                         $this->mShowToc = true;
4213                         $this->mForceTocPosition = true;
4214
4215                         # Set a placeholder. At the end we'll fill it in with the TOC.
4216                         $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4217
4218                         # Only keep the first one.
4219                         $text = $mw->replace( '', $text );
4220                 }
4221
4222                 # Now match and remove the rest of them
4223                 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4224                 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4225
4226                 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4227                         $this->mOutput->mNoGallery = true;
4228                 }
4229                 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4230                         $this->mShowToc = false;
4231                 }
4232                 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4233                         && $this->mTitle->getNamespace() == NS_CATEGORY
4234                 ) {
4235                         $this->addTrackingCategory( 'hidden-category-category' );
4236                 }
4237                 # (T10068) Allow control over whether robots index a page.
4238                 # __INDEX__ always overrides __NOINDEX__, see T16899
4239                 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4240                         $this->mOutput->setIndexPolicy( 'noindex' );
4241                         $this->addTrackingCategory( 'noindex-category' );
4242                 }
4243                 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4244                         $this->mOutput->setIndexPolicy( 'index' );
4245                         $this->addTrackingCategory( 'index-category' );
4246                 }
4247
4248                 # Cache all double underscores in the database
4249                 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4250                         $this->mOutput->setProperty( $key, '' );
4251                 }
4252
4253                 return $text;
4254         }
4255
4256         /**
4257          * @see ParserOutput::addTrackingCategory()
4258          * @param string $msg Message key
4259          * @return bool Whether the addition was successful
4260          */
4261         public function addTrackingCategory( $msg ) {
4262                 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4263         }
4264
4265         /**
4266          * This function accomplishes several tasks:
4267          * 1) Auto-number headings if that option is enabled
4268          * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4269          * 3) Add a Table of contents on the top for users who have enabled the option
4270          * 4) Auto-anchor headings
4271          *
4272          * It loops through all headlines, collects the necessary data, then splits up the
4273          * string and re-inserts the newly formatted headlines.
4274          *
4275          * @param string $text
4276          * @param string $origText Original, untouched wikitext
4277          * @param bool $isMain
4278          * @return mixed|string
4279          * @private
4280          */
4281         public function formatHeadings( $text, $origText, $isMain = true ) {
4282                 # Inhibit editsection links if requested in the page
4283                 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4284                         $maybeShowEditLink = false;
4285                 } else {
4286                         $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4287                 }
4288
4289                 # Get all headlines for numbering them and adding funky stuff like [edit]
4290                 # links - this is for later, but we need the number of headlines right now
4291                 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4292                 # be trimmed here since whitespace in HTML headings is significant.
4293                 $matches = [];
4294                 $numMatches = preg_match_all(
4295                         '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4296                         $text,
4297                         $matches
4298                 );
4299
4300                 # if there are fewer than 4 headlines in the article, do not show TOC
4301                 # unless it's been explicitly enabled.
4302                 $enoughToc = $this->mShowToc &&
4303                         ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4304
4305                 # Allow user to stipulate that a page should have a "new section"
4306                 # link added via __NEWSECTIONLINK__
4307                 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4308                         $this->mOutput->setNewSection( true );
4309                 }
4310
4311                 # Allow user to remove the "new section"
4312                 # link via __NONEWSECTIONLINK__
4313                 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4314                         $this->mOutput->hideNewSection( true );
4315                 }
4316
4317                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4318                 # override above conditions and always show TOC above first header
4319                 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4320                         $this->mShowToc = true;
4321                         $enoughToc = true;
4322                 }
4323
4324                 # headline counter
4325                 $headlineCount = 0;
4326                 $numVisible = 0;
4327
4328                 # Ugh .. the TOC should have neat indentation levels which can be
4329                 # passed to the skin functions. These are determined here
4330                 $toc = '';
4331                 $full = '';
4332                 $head = [];
4333                 $sublevelCount = [];
4334                 $levelCount = [];
4335                 $level = 0;
4336                 $prevlevel = 0;
4337                 $toclevel = 0;
4338                 $prevtoclevel = 0;
4339                 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4340                 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4341                 $oldType = $this->mOutputType;
4342                 $this->setOutputType( self::OT_WIKI );
4343                 $frame = $this->getPreprocessor()->newFrame();
4344                 $root = $this->preprocessToDom( $origText );
4345                 $node = $root->getFirstChild();
4346                 $byteOffset = 0;
4347                 $tocraw = [];
4348                 $refers = [];
4349
4350                 $headlines = $numMatches !== false ? $matches[3] : [];
4351
4352                 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4353                 foreach ( $headlines as $headline ) {
4354                         $isTemplate = false;
4355                         $titleText = false;
4356                         $sectionIndex = false;
4357                         $numbering = '';
4358                         $markerMatches = [];
4359                         if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4360                                 $serial = $markerMatches[1];
4361                                 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4362                                 $isTemplate = ( $titleText != $baseTitleText );
4363                                 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4364                         }
4365
4366                         if ( $toclevel ) {
4367                                 $prevlevel = $level;
4368                         }
4369                         $level = $matches[1][$headlineCount];
4370
4371                         if ( $level > $prevlevel ) {
4372                                 # Increase TOC level
4373                                 $toclevel++;
4374                                 $sublevelCount[$toclevel] = 0;
4375                                 if ( $toclevel < $maxTocLevel ) {
4376                                         $prevtoclevel = $toclevel;
4377                                         $toc .= Linker::tocIndent();
4378                                         $numVisible++;
4379                                 }
4380                         } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4381                                 # Decrease TOC level, find level to jump to
4382
4383                                 for ( $i = $toclevel; $i > 0; $i-- ) {
4384                                         if ( $levelCount[$i] == $level ) {
4385                                                 # Found last matching level
4386                                                 $toclevel = $i;
4387                                                 break;
4388                                         } elseif ( $levelCount[$i] < $level ) {
4389                                                 # Found first matching level below current level
4390                                                 $toclevel = $i + 1;
4391                                                 break;
4392                                         }
4393                                 }
4394                                 if ( $i == 0 ) {
4395                                         $toclevel = 1;
4396                                 }
4397                                 if ( $toclevel < $maxTocLevel ) {
4398                                         if ( $prevtoclevel < $maxTocLevel ) {
4399                                                 # Unindent only if the previous toc level was shown :p
4400                                                 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4401                                                 $prevtoclevel = $toclevel;
4402                                         } else {
4403                                                 $toc .= Linker::tocLineEnd();
4404                                         }
4405                                 }
4406                         } else {
4407                                 # No change in level, end TOC line
4408                                 if ( $toclevel < $maxTocLevel ) {
4409                                         $toc .= Linker::tocLineEnd();
4410                                 }
4411                         }
4412
4413                         $levelCount[$toclevel] = $level;
4414
4415                         # count number of headlines for each level
4416                         $sublevelCount[$toclevel]++;
4417                         $dot = 0;
4418                         for ( $i = 1; $i <= $toclevel; $i++ ) {
4419                                 if ( !empty( $sublevelCount[$i] ) ) {
4420                                         if ( $dot ) {
4421                                                 $numbering .= '.';
4422                                         }
4423                                         $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4424                                         $dot = 1;
4425                                 }
4426                         }
4427
4428                         # The safe header is a version of the header text safe to use for links
4429
4430                         # Remove link placeholders by the link text.
4431                         #     <!--LINK number-->
4432                         # turns into
4433                         #     link text with suffix
4434                         # Do this before unstrip since link text can contain strip markers
4435                         $safeHeadline = $this->replaceLinkHoldersText( $headline );
4436
4437                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4438                         $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4439
4440                         # Remove any <style> or <script> tags (T198618)
4441                         $safeHeadline = preg_replace(
4442                                 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4443                                 '',
4444                                 $safeHeadline
4445                         );
4446
4447                         # Strip out HTML (first regex removes any tag not allowed)
4448                         # Allowed tags are:
4449                         # * <sup> and <sub> (T10393)
4450                         # * <i> (T28375)
4451                         # * <b> (r105284)
4452                         # * <bdi> (T74884)
4453                         # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4454                         # * <s> and <strike> (T35715)
4455                         # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4456                         # to allow setting directionality in toc items.
4457                         $tocline = preg_replace(
4458                                 [
4459                                         '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4460                                         '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4461                                 ],
4462                                 [ '', '<$1>' ],
4463                                 $safeHeadline
4464                         );
4465
4466                         # Strip '<span></span>', which is the result from the above if
4467                         # <span id="foo"></span> is used to produce an additional anchor
4468                         # for a section.
4469                         $tocline = str_replace( '<span></span>', '', $tocline );
4470
4471                         $tocline = trim( $tocline );
4472
4473                         # For the anchor, strip out HTML-y stuff period
4474                         $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4475                         $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4476
4477                         # Save headline for section edit hint before it's escaped
4478                         $headlineHint = $safeHeadline;
4479
4480                         # Decode HTML entities
4481                         $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4482
4483                         $safeHeadline = self::normalizeSectionName( $safeHeadline );
4484
4485                         $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4486                         $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4487                         $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4488                         if ( $fallbackHeadline === $safeHeadline ) {
4489                                 # No reason to have both (in fact, we can't)
4490                                 $fallbackHeadline = false;
4491                         }
4492
4493                         # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4494                         # @todo FIXME: We may be changing them depending on the current locale.
4495                         $arrayKey = strtolower( $safeHeadline );
4496                         if ( $fallbackHeadline === false ) {
4497                                 $fallbackArrayKey = false;
4498                         } else {
4499                                 $fallbackArrayKey = strtolower( $fallbackHeadline );
4500                         }
4501
4502                         # Create the anchor for linking from the TOC to the section
4503                         $anchor = $safeHeadline;
4504                         $fallbackAnchor = $fallbackHeadline;
4505                         if ( isset( $refers[$arrayKey] ) ) {
4506                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4507                                 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4508                                 $anchor .= "_$i";
4509                                 $linkAnchor .= "_$i";
4510                                 $refers["${arrayKey}_$i"] = true;
4511                         } else {
4512                                 $refers[$arrayKey] = true;
4513                         }
4514                         if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4515                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4516                                 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4517                                 $fallbackAnchor .= "_$i";
4518                                 $refers["${fallbackArrayKey}_$i"] = true;
4519                         } else {
4520                                 $refers[$fallbackArrayKey] = true;
4521                         }
4522
4523                         # Don't number the heading if it is the only one (looks silly)
4524                         if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4525                                 # the two are different if the line contains a link
4526                                 $headline = Html::element(
4527                                         'span',
4528                                         [ 'class' => 'mw-headline-number' ],
4529                                         $numbering
4530                                 ) . ' ' . $headline;
4531                         }
4532
4533                         if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4534                                 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4535                                         $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4536                         }
4537
4538                         # Add the section to the section tree
4539                         # Find the DOM node for this header
4540                         $noOffset = ( $isTemplate || $sectionIndex === false );
4541                         while ( $node && !$noOffset ) {
4542                                 if ( $node->getName() === 'h' ) {
4543                                         $bits = $node->splitHeading();
4544                                         if ( $bits['i'] == $sectionIndex ) {
4545                                                 break;
4546                                         }
4547                                 }
4548                                 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4549                                         $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4550                                 $node = $node->getNextSibling();
4551                         }
4552                         $tocraw[] = [
4553                                 'toclevel' => $toclevel,
4554                                 'level' => $level,
4555                                 'line' => $tocline,
4556                                 'number' => $numbering,
4557                                 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4558                                 'fromtitle' => $titleText,
4559                                 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4560                                 'anchor' => $anchor,
4561                         ];
4562
4563                         # give headline the correct <h#> tag
4564                         if ( $maybeShowEditLink && $sectionIndex !== false ) {
4565                                 // Output edit section links as markers with styles that can be customized by skins
4566                                 if ( $isTemplate ) {
4567                                         # Put a T flag in the section identifier, to indicate to extractSections()
4568                                         # that sections inside <includeonly> should be counted.
4569                                         $editsectionPage = $titleText;
4570                                         $editsectionSection = "T-$sectionIndex";
4571                                         $editsectionContent = null;
4572                                 } else {
4573                                         $editsectionPage = $this->mTitle->getPrefixedText();
4574                                         $editsectionSection = $sectionIndex;
4575                                         $editsectionContent = $headlineHint;
4576                                 }
4577                                 // We use a bit of pesudo-xml for editsection markers. The
4578                                 // language converter is run later on. Using a UNIQ style marker
4579                                 // leads to the converter screwing up the tokens when it
4580                                 // converts stuff. And trying to insert strip tags fails too. At
4581                                 // this point all real inputted tags have already been escaped,
4582                                 // so we don't have to worry about a user trying to input one of
4583                                 // these markers directly. We use a page and section attribute
4584                                 // to stop the language converter from converting these
4585                                 // important bits of data, but put the headline hint inside a
4586                                 // content block because the language converter is supposed to
4587                                 // be able to convert that piece of data.
4588                                 // Gets replaced with html in ParserOutput::getText
4589                                 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4590                                 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4591                                 if ( $editsectionContent !== null ) {
4592                                         $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4593                                 } else {
4594                                         $editlink .= '/>';
4595                                 }
4596                         } else {
4597                                 $editlink = '';
4598                         }
4599                         $head[$headlineCount] = Linker::makeHeadline( $level,
4600                                 $matches['attrib'][$headlineCount], $anchor, $headline,
4601                                 $editlink, $fallbackAnchor );
4602
4603                         $headlineCount++;
4604                 }
4605
4606                 $this->setOutputType( $oldType );
4607
4608                 # Never ever show TOC if no headers
4609                 if ( $numVisible < 1 ) {
4610                         $enoughToc = false;
4611                 }
4612
4613                 if ( $enoughToc ) {
4614                         if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4615                                 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4616                         }
4617                         $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4618                         $this->mOutput->setTOCHTML( $toc );
4619                         $toc = self::TOC_START . $toc . self::TOC_END;
4620                 }
4621
4622                 if ( $isMain ) {
4623                         $this->mOutput->setSections( $tocraw );
4624                 }
4625
4626                 # split up and insert constructed headlines
4627                 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4628                 $i = 0;
4629
4630                 // build an array of document sections
4631                 $sections = [];
4632                 foreach ( $blocks as $block ) {
4633                         // $head is zero-based, sections aren't.
4634                         if ( empty( $head[$i - 1] ) ) {
4635                                 $sections[$i] = $block;
4636                         } else {
4637                                 $sections[$i] = $head[$i - 1] . $block;
4638                         }
4639
4640                         /**
4641                          * Send a hook, one per section.
4642                          * The idea here is to be able to make section-level DIVs, but to do so in a
4643                          * lower-impact, more correct way than r50769
4644                          *
4645                          * $this : caller
4646                          * $section : the section number
4647                          * &$sectionContent : ref to the content of the section
4648                          * $maybeShowEditLinks : boolean describing whether this section has an edit link
4649                          */
4650                         Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4651
4652                         $i++;
4653                 }
4654
4655                 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4656                         // append the TOC at the beginning
4657                         // Top anchor now in skin
4658                         $sections[0] .= $toc . "\n";
4659                 }
4660
4661                 $full .= implode( '', $sections );
4662
4663                 if ( $this->mForceTocPosition ) {
4664                         return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4665                 } else {
4666                         return $full;
4667                 }
4668         }
4669
4670         /**
4671          * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4672          * conversion, substituting signatures, {{subst:}} templates, etc.
4673          *
4674          * @param string $text The text to transform
4675          * @param Title $title The Title object for the current article
4676          * @param User $user The User object describing the current user
4677          * @param ParserOptions $options Parsing options
4678          * @param bool $clearState Whether to clear the parser state first
4679          * @return string The altered wiki markup
4680          */
4681         public function preSaveTransform( $text, Title $title, User $user,
4682                 ParserOptions $options, $clearState = true
4683         ) {
4684                 if ( $clearState ) {
4685                         $magicScopeVariable = $this->lock();
4686                 }
4687                 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4688                 $this->setUser( $user );
4689
4690                 // Strip U+0000 NULL (T159174)
4691                 $text = str_replace( "\000", '', $text );
4692
4693                 // We still normalize line endings for backwards-compatibility
4694                 // with other code that just calls PST, but this should already
4695                 // be handled in TextContent subclasses
4696                 $text = TextContent::normalizeLineEndings( $text );
4697
4698                 if ( $options->getPreSaveTransform() ) {
4699                         $text = $this->pstPass2( $text, $user );
4700                 }
4701                 $text = $this->mStripState->unstripBoth( $text );
4702
4703                 $this->setUser( null ); # Reset
4704
4705                 return $text;
4706         }
4707
4708         /**
4709          * Pre-save transform helper function
4710          *
4711          * @param string $text
4712          * @param User $user
4713          *
4714          * @return string
4715          */
4716         private function pstPass2( $text, $user ) {
4717                 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4718                 # $this->contLang here in order to give everyone the same signature and use the default one
4719                 # rather than the one selected in each user's preferences.  (see also T14815)
4720                 $ts = $this->mOptions->getTimestamp();
4721                 $timestamp = MWTimestamp::getLocalInstance( $ts );
4722                 $ts = $timestamp->format( 'YmdHis' );
4723                 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4724
4725                 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4726
4727                 # Variable replacement
4728                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4729                 $text = $this->replaceVariables( $text );
4730
4731                 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4732                 # which may corrupt this parser instance via its wfMessage()->text() call-
4733
4734                 # Signatures
4735                 if ( strpos( $text, '~~~' ) !== false ) {
4736                         $sigText = $this->getUserSig( $user );
4737                         $text = strtr( $text, [
4738                                 '~~~~~' => $d,
4739                                 '~~~~' => "$sigText $d",
4740                                 '~~~' => $sigText
4741                         ] );
4742                         # The main two signature forms used above are time-sensitive
4743                         $this->setOutputFlag( 'user-signature', 'User signature detected' );
4744                 }
4745
4746                 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4747                 $tc = '[' . Title::legalChars() . ']';
4748                 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4749
4750                 // [[ns:page (context)|]]
4751                 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4752                 // [[ns:page（context）|]] (double-width brackets, added in r40257)
4753                 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?（$tc+）)\\|]]/";
4754                 // [[ns:page (context), context|]] (using either single or double-width comma)
4755                 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |，)$tc+|)\\|]]/";
4756                 // [[|page]] (reverse pipe trick: add context from page title)
4757                 $p2 = "/\[\[\\|($tc+)]]/";
4758
4759                 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4760                 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4761                 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4762                 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4763
4764                 $t = $this->mTitle->getText();
4765                 $m = [];
4766                 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4767                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4768                 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4769                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4770                 } else {
4771                         # if there's no context, don't bother duplicating the title
4772                         $text = preg_replace( $p2, '[[\\1]]', $text );
4773                 }
4774
4775                 return $text;
4776         }
4777
4778         /**
4779          * Fetch the user's signature text, if any, and normalize to
4780          * validated, ready-to-insert wikitext.
4781          * If you have pre-fetched the nickname or the fancySig option, you can
4782          * specify them here to save a database query.
4783          * Do not reuse this parser instance after calling getUserSig(),
4784          * as it may have changed.
4785          *
4786          * @param User &$user
4787          * @param string|bool $nickname Nickname to use or false to use user's default nickname
4788          * @param bool|null $fancySig whether the nicknname is the complete signature
4789          *    or null to use default value
4790          * @return string
4791          */
4792         public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4793                 $username = $user->getName();
4794
4795                 # If not given, retrieve from the user object.
4796                 if ( $nickname === false ) {
4797                         $nickname = $user->getOption( 'nickname' );
4798                 }
4799
4800                 if ( is_null( $fancySig ) ) {
4801                         $fancySig = $user->getBoolOption( 'fancysig' );
4802                 }
4803
4804                 $nickname = $nickname == null ? $username : $nickname;
4805
4806                 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4807                         $nickname = $username;
4808                         $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4809                 } elseif ( $fancySig !== false ) {
4810                         # Sig. might contain markup; validate this
4811                         if ( $this->validateSig( $nickname ) !== false ) {
4812                                 # Validated; clean up (if needed) and return it
4813                                 return $this->cleanSig( $nickname, true );
4814                         } else {
4815                                 # Failed to validate; fall back to the default
4816                                 $nickname = $username;
4817                                 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4818                         }
4819                 }
4820
4821                 # Make sure nickname doesnt get a sig in a sig
4822                 $nickname = self::cleanSigInSig( $nickname );
4823
4824                 # If we're still here, make it a link to the user page
4825                 $userText = wfEscapeWikiText( $username );
4826                 $nickText = wfEscapeWikiText( $nickname );
4827                 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4828
4829                 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4830                         ->title( $this->getTitle() )->text();
4831         }
4832
4833         /**
4834          * Check that the user's signature contains no bad XML
4835          *
4836          * @param string $text
4837          * @return string|bool An expanded string, or false if invalid.
4838          */
4839         public function validateSig( $text ) {
4840                 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4841         }
4842
4843         /**
4844          * Clean up signature text
4845          *
4846          * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4847          * 2) Substitute all transclusions
4848          *
4849          * @param string $text
4850          * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4851          * @return string Signature text
4852          */
4853         public function cleanSig( $text, $parsing = false ) {
4854                 if ( !$parsing ) {
4855                         global $wgTitle;
4856                         $magicScopeVariable = $this->lock();
4857                         $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4858                 }
4859
4860                 # Option to disable this feature
4861                 if ( !$this->mOptions->getCleanSignatures() ) {
4862                         return $text;
4863                 }
4864
4865                 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4866                 #  => Move this logic to braceSubstitution()
4867                 $substWord = $this->magicWordFactory->get( 'subst' );
4868                 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4869                 $substText = '{{' . $substWord->getSynonym( 0 );
4870
4871                 $text = preg_replace( $substRegex, $substText, $text );
4872                 $text = self::cleanSigInSig( $text );
4873                 $dom = $this->preprocessToDom( $text );
4874                 $frame = $this->getPreprocessor()->newFrame();
4875                 $text = $frame->expand( $dom );
4876
4877                 if ( !$parsing ) {
4878                         $text = $this->mStripState->unstripBoth( $text );
4879                 }
4880
4881                 return $text;
4882         }
4883
4884         /**
4885          * Strip 3, 4 or 5 tildes out of signatures.
4886          *
4887          * @param string $text
4888          * @return string Signature text with /~{3,5}/ removed
4889          */
4890         public static function cleanSigInSig( $text ) {
4891                 $text = preg_replace( '/~{3,5}/', '', $text );
4892                 return $text;
4893         }
4894
4895         /**
4896          * Set up some variables which are usually set up in parse()
4897          * so that an external function can call some class members with confidence
4898          *
4899          * @param Title|null $title
4900          * @param ParserOptions $options
4901          * @param int $outputType
4902          * @param bool $clearState
4903          * @param int|null $revId
4904          */
4905         public function startExternalParse( Title $title = null, ParserOptions $options,
4906                 $outputType, $clearState = true, $revId = null
4907         ) {
4908                 $this->startParse( $title, $options, $outputType, $clearState );
4909                 if ( $revId !== null ) {
4910                         $this->mRevisionId = $revId;
4911                 }
4912         }
4913
4914         /**
4915          * @param Title|null $title
4916          * @param ParserOptions $options
4917          * @param int $outputType
4918          * @param bool $clearState
4919          */
4920         private function startParse( Title $title = null, ParserOptions $options,
4921                 $outputType, $clearState = true
4922         ) {
4923                 $this->setTitle( $title );
4924                 $this->mOptions = $options;
4925                 $this->setOutputType( $outputType );
4926                 if ( $clearState ) {
4927                         $this->clearState();
4928                 }
4929         }
4930
4931         /**
4932          * Wrapper for preprocess()
4933          *
4934          * @param string $text The text to preprocess
4935          * @param ParserOptions $options
4936          * @param Title|null $title Title object or null to use $wgTitle
4937          * @return string
4938          */
4939         public function transformMsg( $text, $options, $title = null ) {
4940                 static $executing = false;
4941
4942                 # Guard against infinite recursion
4943                 if ( $executing ) {
4944                         return $text;
4945                 }
4946                 $executing = true;
4947
4948                 if ( !$title ) {
4949                         global $wgTitle;
4950                         $title = $wgTitle;
4951                 }
4952
4953                 $text = $this->preprocess( $text, $title, $options );
4954
4955                 $executing = false;
4956                 return $text;
4957         }
4958
4959         /**
4960          * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4961          * The callback should have the following form:
4962          *    function myParserHook( $text, $params, $parser, $frame ) { ... }
4963          *
4964          * Transform and return $text. Use $parser for any required context, e.g. use
4965          * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4966          *
4967          * Hooks may return extended information by returning an array, of which the
4968          * first numbered element (index 0) must be the return string, and all other
4969          * entries are extracted into local variables within an internal function
4970          * in the Parser class.
4971          *
4972          * This interface (introduced r61913) appears to be undocumented, but
4973          * 'markerType' is used by some core tag hooks to override which strip
4974          * array their results are placed in. **Use great caution if attempting
4975          * this interface, as it is not documented and injudicious use could smash
4976          * private variables.**
4977          *
4978          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4979          * @param callable $callback The callback function (and object) to use for the tag
4980          * @throws MWException
4981          * @return callable|null The old value of the mTagHooks array associated with the hook
4982          */
4983         public function setHook( $tag, callable $callback ) {
4984                 $tag = strtolower( $tag );
4985                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4986                         throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4987                 }
4988                 $oldVal = $this->mTagHooks[$tag] ?? null;
4989                 $this->mTagHooks[$tag] = $callback;
4990                 if ( !in_array( $tag, $this->mStripList ) ) {
4991                         $this->mStripList[] = $tag;
4992                 }
4993
4994                 return $oldVal;
4995         }
4996
4997         /**
4998          * As setHook(), but letting the contents be parsed.
4999          *
5000          * Transparent tag hooks are like regular XML-style tag hooks, except they
5001          * operate late in the transformation sequence, on HTML instead of wikitext.
5002          *
5003          * This is probably obsoleted by things dealing with parser frames?
5004          * The only extension currently using it is geoserver.
5005          *
5006          * @since 1.10
5007          * @todo better document or deprecate this
5008          *
5009          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
5010          * @param callable $callback The callback function (and object) to use for the tag
5011          * @throws MWException
5012          * @return callable|null The old value of the mTagHooks array associated with the hook
5013          */
5014         public function setTransparentTagHook( $tag, callable $callback ) {
5015                 $tag = strtolower( $tag );
5016                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5017                         throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5018                 }
5019                 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5020                 $this->mTransparentTagHooks[$tag] = $callback;
5021
5022                 return $oldVal;
5023         }
5024
5025         /**
5026          * Remove all tag hooks
5027          */
5028         public function clearTagHooks() {
5029                 $this->mTagHooks = [];
5030                 $this->mFunctionTagHooks = [];
5031                 $this->mStripList = $this->mDefaultStripList;
5032         }
5033
5034         /**
5035          * Create a function, e.g. {{sum:1|2|3}}
5036          * The callback function should have the form:
5037          *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5038          *
5039          * Or with Parser::SFH_OBJECT_ARGS:
5040          *    function myParserFunction( $parser, $frame, $args ) { ... }
5041          *
5042          * The callback may either return the text result of the function, or an array with the text
5043          * in element 0, and a number of flags in the other elements. The names of the flags are
5044          * specified in the keys. Valid flags are:
5045          *   found                     The text returned is valid, stop processing the template. This
5046          *                             is on by default.
5047          *   nowiki                    Wiki markup in the return value should be escaped
5048          *   isHTML                    The returned text is HTML, armour it against wikitext transformation
5049          *
5050          * @param string $id The magic word ID
5051          * @param callable $callback The callback function (and object) to use
5052          * @param int $flags A combination of the following flags:
5053          *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5054          *
5055          *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
5056          *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5057          *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
5058          *     the arguments, and to control the way they are expanded.
5059          *
5060          *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5061          *     arguments, for instance:
5062          *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5063          *
5064          *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5065          *     future versions. Please call $frame->expand() on it anyway so that your code keeps
5066          *     working if/when this is changed.
5067          *
5068          *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5069          *     expansion.
5070          *
5071          *     Please read the documentation in includes/parser/Preprocessor.php for more information
5072          *     about the methods available in PPFrame and PPNode.
5073          *
5074          * @throws MWException
5075          * @return string|callable The old callback function for this name, if any
5076          */
5077         public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5078                 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5079                 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5080
5081                 # Add to function cache
5082                 $mw = $this->magicWordFactory->get( $id );
5083                 if ( !$mw ) {
5084                         throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5085                 }
5086
5087                 $synonyms = $mw->getSynonyms();
5088                 $sensitive = intval( $mw->isCaseSensitive() );
5089
5090                 foreach ( $synonyms as $syn ) {
5091                         # Case
5092                         if ( !$sensitive ) {
5093                                 $syn = $this->contLang->lc( $syn );
5094                         }
5095                         # Add leading hash
5096                         if ( !( $flags & self::SFH_NO_HASH ) ) {
5097                                 $syn = '#' . $syn;
5098                         }
5099                         # Remove trailing colon
5100                         if ( substr( $syn, -1, 1 ) === ':' ) {
5101                                 $syn = substr( $syn, 0, -1 );
5102                         }
5103                         $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5104                 }
5105                 return $oldVal;
5106         }
5107
5108         /**
5109          * Get all registered function hook identifiers
5110          *
5111          * @return array
5112          */
5113         public function getFunctionHooks() {
5114                 $this->firstCallInit();
5115                 return array_keys( $this->mFunctionHooks );
5116         }
5117
5118         /**
5119          * Create a tag function, e.g. "<test>some stuff</test>".
5120          * Unlike tag hooks, tag functions are parsed at preprocessor level.
5121          * Unlike parser functions, their content is not preprocessed.
5122          * @param string $tag
5123          * @param callable $callback
5124          * @param int $flags
5125          * @throws MWException
5126          * @return null
5127          */
5128         public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5129                 $tag = strtolower( $tag );
5130                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5131                         throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5132                 }
5133                 $old = $this->mFunctionTagHooks[$tag] ?? null;
5134                 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5135
5136                 if ( !in_array( $tag, $this->mStripList ) ) {
5137                         $this->mStripList[] = $tag;
5138                 }
5139
5140                 return $old;
5141         }
5142
5143         /**
5144          * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5145          * Placeholders created in Linker::link()
5146          *
5147          * @param string &$text
5148          * @param int $options
5149          */
5150         public function replaceLinkHolders( &$text, $options = 0 ) {
5151                 $this->mLinkHolders->replace( $text );
5152         }
5153
5154         /**
5155          * Replace "<!--LINK-->" link placeholders with plain text of links
5156          * (not HTML-formatted).
5157          *
5158          * @param string $text
5159          * @return string
5160          */
5161         public function replaceLinkHoldersText( $text ) {
5162                 return $this->mLinkHolders->replaceText( $text );
5163         }
5164
5165         /**
5166          * Renders an image gallery from a text with one line per image.
5167          * text labels may be given by using |-style alternative text. E.g.
5168          *   Image:one.jpg|The number "1"
5169          *   Image:tree.jpg|A tree
5170          * given as text will return the HTML of a gallery with two images,
5171          * labeled 'The number "1"' and
5172          * 'A tree'.
5173          *
5174          * @param string $text
5175          * @param array $params
5176          * @return string HTML
5177          */
5178         public function renderImageGallery( $text, $params ) {
5179                 $mode = false;
5180                 if ( isset( $params['mode'] ) ) {
5181                         $mode = $params['mode'];
5182                 }
5183
5184                 try {
5185                         $ig = ImageGalleryBase::factory( $mode );
5186                 } catch ( Exception $e ) {
5187                         // If invalid type set, fallback to default.
5188                         $ig = ImageGalleryBase::factory( false );
5189                 }
5190
5191                 $ig->setContextTitle( $this->mTitle );
5192                 $ig->setShowBytes( false );
5193                 $ig->setShowDimensions( false );
5194                 $ig->setShowFilename( false );
5195                 $ig->setParser( $this );
5196                 $ig->setHideBadImages();
5197                 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5198
5199                 if ( isset( $params['showfilename'] ) ) {
5200                         $ig->setShowFilename( true );
5201                 } else {
5202                         $ig->setShowFilename( false );
5203                 }
5204                 if ( isset( $params['caption'] ) ) {
5205                         // NOTE: We aren't passing a frame here or below.  Frame info
5206                         // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5207                         // See T107332#4030581
5208                         $caption = $this->recursiveTagParse( $params['caption'] );
5209                         $ig->setCaptionHtml( $caption );
5210                 }
5211                 if ( isset( $params['perrow'] ) ) {
5212                         $ig->setPerRow( $params['perrow'] );
5213                 }
5214                 if ( isset( $params['widths'] ) ) {
5215                         $ig->setWidths( $params['widths'] );
5216                 }
5217                 if ( isset( $params['heights'] ) ) {
5218                         $ig->setHeights( $params['heights'] );
5219                 }
5220                 $ig->setAdditionalOptions( $params );
5221
5222                 // Avoid PHP 7.1 warning from passing $this by reference
5223                 $parser = $this;
5224                 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5225
5226                 $lines = StringUtils::explode( "\n", $text );
5227                 foreach ( $lines as $line ) {
5228                         # match lines like these:
5229                         # Image:someimage.jpg|This is some image
5230                         $matches = [];
5231                         preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5232                         # Skip empty lines
5233                         if ( count( $matches ) == 0 ) {
5234                                 continue;
5235                         }
5236
5237                         if ( strpos( $matches[0], '%' ) !== false ) {
5238                                 $matches[1] = rawurldecode( $matches[1] );
5239                         }
5240                         $title = Title::newFromText( $matches[1], NS_FILE );
5241                         if ( is_null( $title ) ) {
5242                                 # Bogus title. Ignore these so we don't bomb out later.
5243                                 continue;
5244                         }
5245
5246                         # We need to get what handler the file uses, to figure out parameters.
5247                         # Note, a hook can overide the file name, and chose an entirely different
5248                         # file (which potentially could be of a different type and have different handler).
5249                         $options = [];
5250                         $descQuery = false;
5251                         Hooks::run( 'BeforeParserFetchFileAndTitle',
5252                                 [ $this, $title, &$options, &$descQuery ] );
5253                         # Don't register it now, as TraditionalImageGallery does that later.
5254                         $file = $this->fetchFileNoRegister( $title, $options );
5255                         $handler = $file ? $file->getHandler() : false;
5256
5257                         $paramMap = [
5258                                 'img_alt' => 'gallery-internal-alt',
5259                                 'img_link' => 'gallery-internal-link',
5260                         ];
5261                         if ( $handler ) {
5262                                 $paramMap += $handler->getParamMap();
5263                                 // We don't want people to specify per-image widths.
5264                                 // Additionally the width parameter would need special casing anyhow.
5265                                 unset( $paramMap['img_width'] );
5266                         }
5267
5268                         $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5269
5270                         $label = '';
5271                         $alt = '';
5272                         $link = '';
5273                         $handlerOptions = [];
5274                         if ( isset( $matches[3] ) ) {
5275                                 // look for an |alt= definition while trying not to break existing
5276                                 // captions with multiple pipes (|) in it, until a more sensible grammar
5277                                 // is defined for images in galleries
5278
5279                                 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5280                                 // splitting on '|' is a bit odd, and different from makeImage.
5281                                 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5282                                 // Protect LanguageConverter markup
5283                                 $parameterMatches = StringUtils::delimiterExplode(
5284                                         '-{', '}-', '|', $matches[3], true /* nested */
5285                                 );
5286
5287                                 foreach ( $parameterMatches as $parameterMatch ) {
5288                                         list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5289                                         if ( $magicName ) {
5290                                                 $paramName = $paramMap[$magicName];
5291
5292                                                 switch ( $paramName ) {
5293                                                         case 'gallery-internal-alt':
5294                                                                 $alt = $this->stripAltText( $match, false );
5295                                                                 break;
5296                                                         case 'gallery-internal-link':
5297                                                                 $linkValue = $this->stripAltText( $match, false );
5298                                                                 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5299                                                                         // Result of LanguageConverter::markNoConversion
5300                                                                         // invoked on an external link.
5301                                                                         $linkValue = substr( $linkValue, 4, -2 );
5302                                                                 }
5303                                                                 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5304                                                                 if ( $type === 'link-url' ) {
5305                                                                         $link = $target;
5306                                                                         $this->mOutput->addExternalLink( $target );
5307                                                                 } elseif ( $type === 'link-title' ) {
5308                                                                         $link = $target->getLinkURL();
5309                                                                         $this->mOutput->addLink( $target );
5310                                                                 }
5311                                                                 break;
5312                                                         default:
5313                                                                 // Must be a handler specific parameter.
5314                                                                 if ( $handler->validateParam( $paramName, $match ) ) {
5315                                                                         $handlerOptions[$paramName] = $match;
5316                                                                 } else {
5317                                                                         // Guess not, consider it as caption.
5318                                                                         $this->logger->debug(
5319                                                                                 "$parameterMatch failed parameter validation" );
5320                                                                         $label = $parameterMatch;
5321                                                                 }
5322                                                 }
5323
5324                                         } else {
5325                                                 // Last pipe wins.
5326                                                 $label = $parameterMatch;
5327                                         }
5328                                 }
5329                         }
5330
5331                         $ig->add( $title, $label, $alt, $link, $handlerOptions );
5332                 }
5333                 $html = $ig->toHTML();
5334                 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5335                 return $html;
5336         }
5337
5338         /**
5339          * @param MediaHandler $handler
5340          * @return array
5341          */
5342         public function getImageParams( $handler ) {
5343                 if ( $handler ) {
5344                         $handlerClass = get_class( $handler );
5345                 } else {
5346                         $handlerClass = '';
5347                 }
5348                 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5349                         # Initialise static lists
5350                         static $internalParamNames = [
5351                                 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5352                                 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5353                                         'bottom', 'text-bottom' ],
5354                                 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5355                                         'upright', 'border', 'link', 'alt', 'class' ],
5356                         ];
5357                         static $internalParamMap;
5358                         if ( !$internalParamMap ) {
5359                                 $internalParamMap = [];
5360                                 foreach ( $internalParamNames as $type => $names ) {
5361                                         foreach ( $names as $name ) {
5362                                                 // For grep: img_left, img_right, img_center, img_none,
5363                                                 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5364                                                 // img_bottom, img_text_bottom,
5365                                                 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5366                                                 // img_border, img_link, img_alt, img_class
5367                                                 $magicName = str_replace( '-', '_', "img_$name" );
5368                                                 $internalParamMap[$magicName] = [ $type, $name ];
5369                                         }
5370                                 }
5371                         }
5372
5373                         # Add handler params
5374                         $paramMap = $internalParamMap;
5375                         if ( $handler ) {
5376                                 $handlerParamMap = $handler->getParamMap();
5377                                 foreach ( $handlerParamMap as $magic => $paramName ) {
5378                                         $paramMap[$magic] = [ 'handler', $paramName ];
5379                                 }
5380                         }
5381                         $this->mImageParams[$handlerClass] = $paramMap;
5382                         $this->mImageParamsMagicArray[$handlerClass] =
5383                                 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5384                 }
5385                 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5386         }
5387
5388         /**
5389          * Parse image options text and use it to make an image
5390          *
5391          * @param Title $title
5392          * @param string $options
5393          * @param LinkHolderArray|bool $holders
5394          * @return string HTML
5395          */
5396         public function makeImage( $title, $options, $holders = false ) {
5397                 # Check if the options text is of the form "options|alt text"
5398                 # Options are:
5399                 #  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5400                 #  * left       no resizing, just left align. label is used for alt= only
5401                 #  * right      same, but right aligned
5402                 #  * none       same, but not aligned
5403                 #  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5404                 #  * center     center the image
5405                 #  * frame      Keep original image size, no magnify-button.
5406                 #  * framed     Same as "frame"
5407                 #  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5408                 #  * upright    reduce width for upright images, rounded to full __0 px
5409                 #  * border     draw a 1px border around the image
5410                 #  * alt        Text for HTML alt attribute (defaults to empty)
5411                 #  * class      Set a class for img node
5412                 #  * link       Set the target of the image link. Can be external, interwiki, or local
5413                 # vertical-align values (no % or length right now):
5414                 #  * baseline
5415                 #  * sub
5416                 #  * super
5417                 #  * top
5418                 #  * text-top
5419                 #  * middle
5420                 #  * bottom
5421                 #  * text-bottom
5422
5423                 # Protect LanguageConverter markup when splitting into parts
5424                 $parts = StringUtils::delimiterExplode(
5425                         '-{', '}-', '|', $options, true /* allow nesting */
5426                 );
5427
5428                 # Give extensions a chance to select the file revision for us
5429                 $options = [];
5430                 $descQuery = false;
5431                 Hooks::run( 'BeforeParserFetchFileAndTitle',
5432                         [ $this, $title, &$options, &$descQuery ] );
5433                 # Fetch and register the file (file title may be different via hooks)
5434                 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5435
5436                 # Get parameter map
5437                 $handler = $file ? $file->getHandler() : false;
5438
5439                 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5440
5441                 if ( !$file ) {
5442                         $this->addTrackingCategory( 'broken-file-category' );
5443                 }
5444
5445                 # Process the input parameters
5446                 $caption = '';
5447                 $params = [ 'frame' => [], 'handler' => [],
5448                         'horizAlign' => [], 'vertAlign' => [] ];
5449                 $seenformat = false;
5450                 foreach ( $parts as $part ) {
5451                         $part = trim( $part );
5452                         list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5453                         $validated = false;
5454                         if ( isset( $paramMap[$magicName] ) ) {
5455                                 list( $type, $paramName ) = $paramMap[$magicName];
5456
5457                                 # Special case; width and height come in one variable together
5458                                 if ( $type === 'handler' && $paramName === 'width' ) {
5459                                         $parsedWidthParam = self::parseWidthParam( $value );
5460                                         if ( isset( $parsedWidthParam['width'] ) ) {
5461                                                 $width = $parsedWidthParam['width'];
5462                                                 if ( $handler->validateParam( 'width', $width ) ) {
5463                                                         $params[$type]['width'] = $width;
5464                                                         $validated = true;
5465                                                 }
5466                                         }
5467                                         if ( isset( $parsedWidthParam['height'] ) ) {
5468                                                 $height = $parsedWidthParam['height'];
5469                                                 if ( $handler->validateParam( 'height', $height ) ) {
5470                                                         $params[$type]['height'] = $height;
5471                                                         $validated = true;
5472                                                 }
5473                                         }
5474                                         # else no validation -- T15436
5475                                 } else {
5476                                         if ( $type === 'handler' ) {
5477                                                 # Validate handler parameter
5478                                                 $validated = $handler->validateParam( $paramName, $value );
5479                                         } else {
5480                                                 # Validate internal parameters
5481                                                 switch ( $paramName ) {
5482                                                         case 'manualthumb':
5483                                                         case 'alt':
5484                                                         case 'class':
5485                                                                 # @todo FIXME: Possibly check validity here for
5486                                                                 # manualthumb? downstream behavior seems odd with
5487                                                                 # missing manual thumbs.
5488                                                                 $validated = true;
5489                                                                 $value = $this->stripAltText( $value, $holders );
5490                                                                 break;
5491                                                         case 'link':
5492                                                                 list( $paramName, $value ) =
5493                                                                         $this->parseLinkParameter(
5494                                                                                 $this->stripAltText( $value, $holders )
5495                                                                         );
5496                                                                 if ( $paramName ) {
5497                                                                         $validated = true;
5498                                                                         if ( $paramName === 'no-link' ) {
5499                                                                                 $value = true;
5500                                                                         }
5501                                                                         if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5502                                                                                 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5503                                                                         }
5504                                                                 }
5505                                                                 break;
5506                                                         case 'frameless':
5507                                                         case 'framed':
5508                                                         case 'thumbnail':
5509                                                                 // use first appearing option, discard others.
5510                                                                 $validated = !$seenformat;
5511                                                                 $seenformat = true;
5512                                                                 break;
5513                                                         default:
5514                                                                 # Most other things appear to be empty or numeric...
5515                                                                 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5516                                                 }
5517                                         }
5518
5519                                         if ( $validated ) {
5520                                                 $params[$type][$paramName] = $value;
5521                                         }
5522                                 }
5523                         }
5524                         if ( !$validated ) {
5525                                 $caption = $part;
5526                         }
5527                 }
5528
5529                 # Process alignment parameters
5530                 if ( $params['horizAlign'] ) {
5531                         $params['frame']['align'] = key( $params['horizAlign'] );
5532                 }
5533                 if ( $params['vertAlign'] ) {
5534                         $params['frame']['valign'] = key( $params['vertAlign'] );
5535                 }
5536
5537                 $params['frame']['caption'] = $caption;
5538
5539                 # Will the image be presented in a frame, with the caption below?
5540                 $imageIsFramed = isset( $params['frame']['frame'] )
5541                         || isset( $params['frame']['framed'] )
5542                         || isset( $params['frame']['thumbnail'] )
5543                         || isset( $params['frame']['manualthumb'] );
5544
5545                 # In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5546                 # came to also set the caption, ordinary text after the image -- which
5547                 # makes no sense, because that just repeats the text multiple times in
5548                 # screen readers.  It *also* came to set the title attribute.
5549                 # Now that we have an alt attribute, we should not set the alt text to
5550                 # equal the caption: that's worse than useless, it just repeats the
5551                 # text.  This is the framed/thumbnail case.  If there's no caption, we
5552                 # use the unnamed parameter for alt text as well, just for the time be-
5553                 # ing, if the unnamed param is set and the alt param is not.
5554                 # For the future, we need to figure out if we want to tweak this more,
5555                 # e.g., introducing a title= parameter for the title; ignoring the un-
5556                 # named parameter entirely for images without a caption; adding an ex-
5557                 # plicit caption= parameter and preserving the old magic unnamed para-
5558                 # meter for BC; ...
5559                 if ( $imageIsFramed ) { # Framed image
5560                         if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5561                                 # No caption or alt text, add the filename as the alt text so
5562                                 # that screen readers at least get some description of the image
5563                                 $params['frame']['alt'] = $title->getText();
5564                         }
5565                         # Do not set $params['frame']['title'] because tooltips don't make sense
5566                         # for framed images
5567                 } else { # Inline image
5568                         if ( !isset( $params['frame']['alt'] ) ) {
5569                                 # No alt text, use the "caption" for the alt text
5570                                 if ( $caption !== '' ) {
5571                                         $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5572                                 } else {
5573                                         # No caption, fall back to using the filename for the
5574                                         # alt text
5575                                         $params['frame']['alt'] = $title->getText();
5576                                 }
5577                         }
5578                         # Use the "caption" for the tooltip text
5579                         $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5580                 }
5581                 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5582
5583                 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5584
5585                 # Linker does the rest
5586                 $time = $options['time'] ?? false;
5587                 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5588                         $time, $descQuery, $this->mOptions->getThumbSize() );
5589
5590                 # Give the handler a chance to modify the parser object
5591                 if ( $handler ) {
5592                         $handler->parserTransformHook( $this, $file );
5593                 }
5594
5595                 return $ret;
5596         }
5597
5598         /**
5599          * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5600          *
5601          * Adds an entry to appropriate link tables.
5602          *
5603          * @since 1.32
5604          * @param string $value
5605          * @return array of `[ type, target ]`, where:
5606          *   - `type` is one of:
5607          *     - `null`: Given value is not a valid link target, use default
5608          *     - `'no-link'`: Given value is empty, do not generate a link
5609          *     - `'link-url'`: Given value is a valid external link
5610          *     - `'link-title'`: Given value is a valid internal link
5611          *   - `target` is:
5612          *     - When `type` is `null` or `'no-link'`: `false`
5613          *     - When `type` is `'link-url'`: URL string corresponding to given value
5614          *     - When `type` is `'link-title'`: Title object corresponding to given value
5615          */
5616         public function parseLinkParameter( $value ) {
5617                 $chars = self::EXT_LINK_URL_CLASS;
5618                 $addr = self::EXT_LINK_ADDR;
5619                 $prots = $this->mUrlProtocols;
5620                 $type = null;
5621                 $target = false;
5622                 if ( $value === '' ) {
5623                         $type = 'no-link';
5624                 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5625                         if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5626                                 $this->mOutput->addExternalLink( $value );
5627                                 $type = 'link-url';
5628                                 $target = $value;
5629                         }
5630                 } else {
5631                         $linkTitle = Title::newFromText( $value );
5632                         if ( $linkTitle ) {
5633                                 $this->mOutput->addLink( $linkTitle );
5634                                 $type = 'link-title';
5635                                 $target = $linkTitle;
5636                         }
5637                 }
5638                 return [ $type, $target ];
5639         }
5640
5641         /**
5642          * @param string $caption
5643          * @param LinkHolderArray|bool $holders
5644          * @return mixed|string
5645          */
5646         protected function stripAltText( $caption, $holders ) {
5647                 # Strip bad stuff out of the title (tooltip).  We can't just use
5648                 # replaceLinkHoldersText() here, because if this function is called
5649                 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5650                 if ( $holders ) {
5651                         $tooltip = $holders->replaceText( $caption );
5652                 } else {
5653                         $tooltip = $this->replaceLinkHoldersText( $caption );
5654                 }
5655
5656                 # make sure there are no placeholders in thumbnail attributes
5657                 # that are later expanded to html- so expand them now and
5658                 # remove the tags
5659                 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5660                 # Compatibility hack!  In HTML certain entity references not terminated
5661                 # by a semicolon are decoded (but not if we're in an attribute; that's
5662                 # how link URLs get away without properly escaping & in queries).
5663                 # But wikitext has always required semicolon-termination of entities,
5664                 # so encode & where needed to avoid decode of semicolon-less entities.
5665                 # See T209236 and
5666                 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5667                 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5668                 $tooltip = preg_replace( "/
5669                         &                       # 1. entity prefix
5670                         (?=                     # 2. followed by:
5671                         (?:                     #  a. one of the legacy semicolon-less named entities
5672                                 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5673                                 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5674                                 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5675                                 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5676                                 U(?:acute|circ|grave|uml)|Yacute|
5677                                 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5678                                 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5679                                 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5680                                 frac(?:1(?:2|4)|34)|
5681                                 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5682                                 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5683                                 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5684                                 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5685                                 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5686                                 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5687                                 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5688                                 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5689                                 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5690                         )
5691                         (?:[^;]|$))     #  b. and not followed by a semicolon
5692                         # S = study, for efficiency
5693                         /Sx", '&amp;', $tooltip );
5694                 $tooltip = Sanitizer::stripAllTags( $tooltip );
5695
5696                 return $tooltip;
5697         }
5698
5699         /**
5700          * Set a flag in the output object indicating that the content is dynamic and
5701          * shouldn't be cached.
5702          * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5703          */
5704         public function disableCache() {
5705                 $this->logger->debug( "Parser output marked as uncacheable." );
5706                 if ( !$this->mOutput ) {
5707                         throw new MWException( __METHOD__ .
5708                                 " can only be called when actually parsing something" );
5709                 }
5710                 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5711         }
5712
5713         /**
5714          * Callback from the Sanitizer for expanding items found in HTML attribute
5715          * values, so they can be safely tested and escaped.
5716          *
5717          * @param string &$text
5718          * @param bool|PPFrame $frame
5719          * @return string
5720          */
5721         public function attributeStripCallback( &$text, $frame = false ) {
5722                 $text = $this->replaceVariables( $text, $frame );
5723                 $text = $this->mStripState->unstripBoth( $text );
5724                 return $text;
5725         }
5726
5727         /**
5728          * Accessor
5729          *
5730          * @return array
5731          */
5732         public function getTags() {
5733                 $this->firstCallInit();
5734                 return array_merge(
5735                         array_keys( $this->mTransparentTagHooks ),
5736                         array_keys( $this->mTagHooks ),
5737                         array_keys( $this->mFunctionTagHooks )
5738                 );
5739         }
5740
5741         /**
5742          * @since 1.32
5743          * @return array
5744          */
5745         public function getFunctionSynonyms() {
5746                 $this->firstCallInit();
5747                 return $this->mFunctionSynonyms;
5748         }
5749
5750         /**
5751          * @since 1.32
5752          * @return string
5753          */
5754         public function getUrlProtocols() {
5755                 return $this->mUrlProtocols;
5756         }
5757
5758         /**
5759          * Replace transparent tags in $text with the values given by the callbacks.
5760          *
5761          * Transparent tag hooks are like regular XML-style tag hooks, except they
5762          * operate late in the transformation sequence, on HTML instead of wikitext.
5763          *
5764          * @param string $text
5765          *
5766          * @return string
5767          */
5768         public function replaceTransparentTags( $text ) {
5769                 $matches = [];
5770                 $elements = array_keys( $this->mTransparentTagHooks );
5771                 $text = self::extractTagsAndParams( $elements, $text, $matches );
5772                 $replacements = [];
5773
5774                 foreach ( $matches as $marker => $data ) {
5775                         list( $element, $content, $params, $tag ) = $data;
5776                         $tagName = strtolower( $element );
5777                         if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5778                                 $output = call_user_func_array(
5779                                         $this->mTransparentTagHooks[$tagName],
5780                                         [ $content, $params, $this ]
5781                                 );
5782                         } else {
5783                                 $output = $tag;
5784                         }
5785                         $replacements[$marker] = $output;
5786                 }
5787                 return strtr( $text, $replacements );
5788         }
5789
5790         /**
5791          * Break wikitext input into sections, and either pull or replace
5792          * some particular section's text.
5793          *
5794          * External callers should use the getSection and replaceSection methods.
5795          *
5796          * @param string $text Page wikitext
5797          * @param string|int $sectionId A section identifier string of the form:
5798          *   "<flag1> - <flag2> - ... - <section number>"
5799          *
5800          * Currently the only recognised flag is "T", which means the target section number
5801          * was derived during a template inclusion parse, in other words this is a template
5802          * section edit link. If no flags are given, it was an ordinary section edit link.
5803          * This flag is required to avoid a section numbering mismatch when a section is
5804          * enclosed by "<includeonly>" (T8563).
5805          *
5806          * The section number 0 pulls the text before the first heading; other numbers will
5807          * pull the given section along with its lower-level subsections. If the section is
5808          * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5809          *
5810          * Section 0 is always considered to exist, even if it only contains the empty
5811          * string. If $text is the empty string and section 0 is replaced, $newText is
5812          * returned.
5813          *
5814          * @param string $mode One of "get" or "replace"
5815          * @param string $newText Replacement text for section data.
5816          * @return string For "get", the extracted section text.
5817          *   for "replace", the whole page with the section replaced.
5818          */
5819         private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5820                 global $wgTitle; # not generally used but removes an ugly failure mode
5821
5822                 $magicScopeVariable = $this->lock();
5823                 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5824                 $outText = '';
5825                 $frame = $this->getPreprocessor()->newFrame();
5826
5827                 # Process section extraction flags
5828                 $flags = 0;
5829                 $sectionParts = explode( '-', $sectionId );
5830                 $sectionIndex = array_pop( $sectionParts );
5831                 foreach ( $sectionParts as $part ) {
5832                         if ( $part === 'T' ) {
5833                                 $flags |= self::PTD_FOR_INCLUSION;
5834                         }
5835                 }
5836
5837                 # Check for empty input
5838                 if ( strval( $text ) === '' ) {
5839                         # Only sections 0 and T-0 exist in an empty document
5840                         if ( $sectionIndex == 0 ) {
5841                                 if ( $mode === 'get' ) {
5842                                         return '';
5843                                 }
5844
5845                                 return $newText;
5846                         } else {
5847                                 if ( $mode === 'get' ) {
5848                                         return $newText;
5849                                 }
5850
5851                                 return $text;
5852                         }
5853                 }
5854
5855                 # Preprocess the text
5856                 $root = $this->preprocessToDom( $text, $flags );
5857
5858                 # <h> nodes indicate section breaks
5859                 # They can only occur at the top level, so we can find them by iterating the root's children
5860                 $node = $root->getFirstChild();
5861
5862                 # Find the target section
5863                 if ( $sectionIndex == 0 ) {
5864                         # Section zero doesn't nest, level=big
5865                         $targetLevel = 1000;
5866                 } else {
5867                         while ( $node ) {
5868                                 if ( $node->getName() === 'h' ) {
5869                                         $bits = $node->splitHeading();
5870                                         if ( $bits['i'] == $sectionIndex ) {
5871                                                 $targetLevel = $bits['level'];
5872                                                 break;
5873                                         }
5874                                 }
5875                                 if ( $mode === 'replace' ) {
5876                                         $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5877                                 }
5878                                 $node = $node->getNextSibling();
5879                         }
5880                 }
5881
5882                 if ( !$node ) {
5883                         # Not found
5884                         if ( $mode === 'get' ) {
5885                                 return $newText;
5886                         } else {
5887                                 return $text;
5888                         }
5889                 }
5890
5891                 # Find the end of the section, including nested sections
5892                 do {
5893                         if ( $node->getName() === 'h' ) {
5894                                 $bits = $node->splitHeading();
5895                                 $curLevel = $bits['level'];
5896                                 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5897                                         break;
5898                                 }
5899                         }
5900                         if ( $mode === 'get' ) {
5901                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5902                         }
5903                         $node = $node->getNextSibling();
5904                 } while ( $node );
5905
5906                 # Write out the remainder (in replace mode only)
5907                 if ( $mode === 'replace' ) {
5908                         # Output the replacement text
5909                         # Add two newlines on -- trailing whitespace in $newText is conventionally
5910                         # stripped by the editor, so we need both newlines to restore the paragraph gap
5911                         # Only add trailing whitespace if there is newText
5912                         if ( $newText != "" ) {
5913                                 $outText .= $newText . "\n\n";
5914                         }
5915
5916                         while ( $node ) {
5917                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5918                                 $node = $node->getNextSibling();
5919                         }
5920                 }
5921
5922                 if ( is_string( $outText ) ) {
5923                         # Re-insert stripped tags
5924                         $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5925                 }
5926
5927                 return $outText;
5928         }
5929
5930         /**
5931          * This function returns the text of a section, specified by a number ($section).
5932          * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5933          * the first section before any such heading (section 0).
5934          *
5935          * If a section contains subsections, these are also returned.
5936          *
5937          * @param string $text Text to look in
5938          * @param string|int $sectionId Section identifier as a number or string
5939          * (e.g. 0, 1 or 'T-1').
5940          * @param string $defaultText Default to return if section is not found
5941          *
5942          * @return string Text of the requested section
5943          */
5944         public function getSection( $text, $sectionId, $defaultText = '' ) {
5945                 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5946         }
5947
5948         /**
5949          * This function returns $oldtext after the content of the section
5950          * specified by $section has been replaced with $text. If the target
5951          * section does not exist, $oldtext is returned unchanged.
5952          *
5953          * @param string $oldText Former text of the article
5954          * @param string|int $sectionId Section identifier as a number or string
5955          * (e.g. 0, 1 or 'T-1').
5956          * @param string $newText Replacing text
5957          *
5958          * @return string Modified text
5959          */
5960         public function replaceSection( $oldText, $sectionId, $newText ) {
5961                 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5962         }
5963
5964         /**
5965          * Get the ID of the revision we are parsing
5966          *
5967          * The return value will be either:
5968          *   - a) Positive, indicating a specific revision ID (current or old)
5969          *   - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5970          *   - c) Null, meaning the parse is for preview mode and there is no revision
5971          *
5972          * @return int|null
5973          */
5974         public function getRevisionId() {
5975                 return $this->mRevisionId;
5976         }
5977
5978         /**
5979          * Get the revision object for $this->mRevisionId
5980          *
5981          * @return Revision|null Either a Revision object or null
5982          * @since 1.23 (public since 1.23)
5983          */
5984         public function getRevisionObject() {
5985                 if ( $this->mRevisionObject ) {
5986                         return $this->mRevisionObject;
5987                 }
5988
5989                 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5990                 // This is useful when parsing a revision that has not yet been saved.
5991                 // However, if we get back a saved revision even though we are in
5992                 // preview mode, we'll have to ignore it, see below.
5993                 // NOTE: This callback may be used to inject an OLD revision that was
5994                 // already loaded, so "current" is a bit of a misnomer. We can't just
5995                 // skip it if mRevisionId is set.
5996                 $rev = call_user_func(
5997                         $this->mOptions->getCurrentRevisionCallback(),
5998                         $this->getTitle(),
5999                         $this
6000                 );
6001
6002                 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
6003                         // We are in preview mode (mRevisionId is null), and the current revision callback
6004                         // returned an existing revision. Ignore it and return null, it's probably the page's
6005                         // current revision, which is not what we want here. Note that we do want to call the
6006                         // callback to allow the unsaved revision to be injected here, e.g. for
6007                         // self-transclusion previews.
6008                         return null;
6009                 }
6010
6011                 // If the parse is for a new revision, then the callback should have
6012                 // already been set to force the object and should match mRevisionId.
6013                 // If not, try to fetch by mRevisionId for sanity.
6014                 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
6015                         $rev = Revision::newFromId( $this->mRevisionId );
6016                 }
6017
6018                 $this->mRevisionObject = $rev;
6019
6020                 return $this->mRevisionObject;
6021         }
6022
6023         /**
6024          * Get the timestamp associated with the current revision, adjusted for
6025          * the default server-local timestamp
6026          * @return string TS_MW timestamp
6027          */
6028         public function getRevisionTimestamp() {
6029                 if ( $this->mRevisionTimestamp !== null ) {
6030                         return $this->mRevisionTimestamp;
6031                 }
6032
6033                 # Use specified revision timestamp, falling back to the current timestamp
6034                 $revObject = $this->getRevisionObject();
6035                 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6036                 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6037
6038                 # The cryptic '' timezone parameter tells to use the site-default
6039                 # timezone offset instead of the user settings.
6040                 # Since this value will be saved into the parser cache, served
6041                 # to other users, and potentially even used inside links and such,
6042                 # it needs to be consistent for all visitors.
6043                 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6044
6045                 return $this->mRevisionTimestamp;
6046         }
6047
6048         /**
6049          * Get the name of the user that edited the last revision
6050          *
6051          * @return string User name
6052          */
6053         public function getRevisionUser() {
6054                 if ( is_null( $this->mRevisionUser ) ) {
6055                         $revObject = $this->getRevisionObject();
6056
6057                         # if this template is subst: the revision id will be blank,
6058                         # so just use the current user's name
6059                         if ( $revObject ) {
6060                                 $this->mRevisionUser = $revObject->getUserText();
6061                         } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6062                                 $this->mRevisionUser = $this->getUser()->getName();
6063                         }
6064                 }
6065                 return $this->mRevisionUser;
6066         }
6067
6068         /**
6069          * Get the size of the revision
6070          *
6071          * @return int|null Revision size
6072          */
6073         public function getRevisionSize() {
6074                 if ( is_null( $this->mRevisionSize ) ) {
6075                         $revObject = $this->getRevisionObject();
6076
6077                         # if this variable is subst: the revision id will be blank,
6078                         # so just use the parser input size, because the own substituation
6079                         # will change the size.
6080                         if ( $revObject ) {
6081                                 $this->mRevisionSize = $revObject->getSize();
6082                         } else {
6083                                 $this->mRevisionSize = $this->mInputSize;
6084                         }
6085                 }
6086                 return $this->mRevisionSize;
6087         }
6088
6089         /**
6090          * Mutator for $mDefaultSort
6091          *
6092          * @param string $sort New value
6093          */
6094         public function setDefaultSort( $sort ) {
6095                 $this->mDefaultSort = $sort;
6096                 $this->mOutput->setProperty( 'defaultsort', $sort );
6097         }
6098
6099         /**
6100          * Accessor for $mDefaultSort
6101          * Will use the empty string if none is set.
6102          *
6103          * This value is treated as a prefix, so the
6104          * empty string is equivalent to sorting by
6105          * page name.
6106          *
6107          * @return string
6108          */
6109         public function getDefaultSort() {
6110                 if ( $this->mDefaultSort !== false ) {
6111                         return $this->mDefaultSort;
6112                 } else {
6113                         return '';
6114                 }
6115         }
6116
6117         /**
6118          * Accessor for $mDefaultSort
6119          * Unlike getDefaultSort(), will return false if none is set
6120          *
6121          * @return string|bool
6122          */
6123         public function getCustomDefaultSort() {
6124                 return $this->mDefaultSort;
6125         }
6126
6127         private static function getSectionNameFromStrippedText( $text ) {
6128                 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6129                 $text = Sanitizer::decodeCharReferences( $text );
6130                 $text = self::normalizeSectionName( $text );
6131                 return $text;
6132         }
6133
6134         private static function makeAnchor( $sectionName ) {
6135                 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6136         }
6137
6138         private function makeLegacyAnchor( $sectionName ) {
6139                 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6140                 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6141                         // ForAttribute() and ForLink() are the same for legacy encoding
6142                         $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6143                 } else {
6144                         $id = Sanitizer::escapeIdForLink( $sectionName );
6145                 }
6146
6147                 return "#$id";
6148         }
6149
6150         /**
6151          * Try to guess the section anchor name based on a wikitext fragment
6152          * presumably extracted from a heading, for example "Header" from
6153          * "== Header ==".
6154          *
6155          * @param string $text
6156          * @return string Anchor (starting with '#')
6157          */
6158         public function guessSectionNameFromWikiText( $text ) {
6159                 # Strip out wikitext links(they break the anchor)
6160                 $text = $this->stripSectionName( $text );
6161                 $sectionName = self::getSectionNameFromStrippedText( $text );
6162                 return self::makeAnchor( $sectionName );
6163         }
6164
6165         /**
6166          * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6167          * instead, if possible. For use in redirects, since various versions
6168          * of Microsoft browsers interpret Location: headers as something other
6169          * than UTF-8, resulting in breakage.
6170          *
6171          * @param string $text The section name
6172          * @return string Anchor (starting with '#')
6173          */
6174         public function guessLegacySectionNameFromWikiText( $text ) {
6175                 # Strip out wikitext links(they break the anchor)
6176                 $text = $this->stripSectionName( $text );
6177                 $sectionName = self::getSectionNameFromStrippedText( $text );
6178                 return $this->makeLegacyAnchor( $sectionName );
6179         }
6180
6181         /**
6182          * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6183          * @param string $text Section name (plain text)
6184          * @return string Anchor (starting with '#')
6185          */
6186         public static function guessSectionNameFromStrippedText( $text ) {
6187                 $sectionName = self::getSectionNameFromStrippedText( $text );
6188                 return self::makeAnchor( $sectionName );
6189         }
6190
6191         /**
6192          * Apply the same normalization as code making links to this section would
6193          *
6194          * @param string $text
6195          * @return string
6196          */
6197         private static function normalizeSectionName( $text ) {
6198                 # T90902: ensure the same normalization is applied for IDs as to links
6199                 /** @var MediaWikiTitleCodec $titleParser */
6200                 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6201                 '@phan-var MediaWikiTitleCodec $titleParser';
6202                 try {
6203
6204                         $parts = $titleParser->splitTitleString( "#$text" );
6205                 } catch ( MalformedTitleException $ex ) {
6206                         return $text;
6207                 }
6208                 return $parts['fragment'];
6209         }
6210
6211         /**
6212          * Strips a text string of wikitext for use in a section anchor
6213          *
6214          * Accepts a text string and then removes all wikitext from the
6215          * string and leaves only the resultant text (i.e. the result of
6216          * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6217          * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6218          * to create valid section anchors by mimicing the output of the
6219          * parser when headings are parsed.
6220          *
6221          * @param string $text Text string to be stripped of wikitext
6222          * for use in a Section anchor
6223          * @return string Filtered text string
6224          */
6225         public function stripSectionName( $text ) {
6226                 # Strip internal link markup
6227                 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6228                 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6229
6230                 # Strip external link markup
6231                 # @todo FIXME: Not tolerant to blank link text
6232                 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6233                 # on how many empty links there are on the page - need to figure that out.
6234                 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6235
6236                 # Parse wikitext quotes (italics & bold)
6237                 $text = $this->doQuotes( $text );
6238
6239                 # Strip HTML tags
6240                 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6241                 return $text;
6242         }
6243
6244         /**
6245          * strip/replaceVariables/unstrip for preprocessor regression testing
6246          *
6247          * @param string $text
6248          * @param Title $title
6249          * @param ParserOptions $options
6250          * @param int $outputType
6251          *
6252          * @return string
6253          */
6254         public function testSrvus( $text, Title $title, ParserOptions $options,
6255                 $outputType = self::OT_HTML
6256         ) {
6257                 $magicScopeVariable = $this->lock();
6258                 $this->startParse( $title, $options, $outputType, true );
6259
6260                 $text = $this->replaceVariables( $text );
6261                 $text = $this->mStripState->unstripBoth( $text );
6262                 $text = Sanitizer::removeHTMLtags( $text );
6263                 return $text;
6264         }
6265
6266         /**
6267          * @param string $text
6268          * @param Title $title
6269          * @param ParserOptions $options
6270          * @return string
6271          */
6272         public function testPst( $text, Title $title, ParserOptions $options ) {
6273                 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6274         }
6275
6276         /**
6277          * @param string $text
6278          * @param Title $title
6279          * @param ParserOptions $options
6280          * @return string
6281          */
6282         public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6283                 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6284         }
6285
6286         /**
6287          * Call a callback function on all regions of the given text that are not
6288          * inside strip markers, and replace those regions with the return value
6289          * of the callback. For example, with input:
6290          *
6291          *  aaa<MARKER>bbb
6292          *
6293          * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6294          * two strings will be replaced with the value returned by the callback in
6295          * each case.
6296          *
6297          * @param string $s
6298          * @param callable $callback
6299          *
6300          * @return string
6301          */
6302         public function markerSkipCallback( $s, $callback ) {
6303                 $i = 0;
6304                 $out = '';
6305                 while ( $i < strlen( $s ) ) {
6306                         $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6307                         if ( $markerStart === false ) {
6308                                 $out .= call_user_func( $callback, substr( $s, $i ) );
6309                                 break;
6310                         } else {
6311                                 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6312                                 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6313                                 if ( $markerEnd === false ) {
6314                                         $out .= substr( $s, $markerStart );
6315                                         break;
6316                                 } else {
6317                                         $markerEnd += strlen( self::MARKER_SUFFIX );
6318                                         $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6319                                         $i = $markerEnd;
6320                                 }
6321                         }
6322                 }
6323                 return $out;
6324         }
6325
6326         /**
6327          * Remove any strip markers found in the given text.
6328          *
6329          * @param string $text
6330          * @return string
6331          */
6332         public function killMarkers( $text ) {
6333                 return $this->mStripState->killMarkers( $text );
6334         }
6335
6336         /**
6337          * Save the parser state required to convert the given half-parsed text to
6338          * HTML. "Half-parsed" in this context means the output of
6339          * recursiveTagParse() or internalParse(). This output has strip markers
6340          * from replaceVariables (extensionSubstitution() etc.), and link
6341          * placeholders from replaceLinkHolders().
6342          *
6343          * Returns an array which can be serialized and stored persistently. This
6344          * array can later be loaded into another parser instance with
6345          * unserializeHalfParsedText(). The text can then be safely incorporated into
6346          * the return value of a parser hook.
6347          *
6348          * @deprecated since 1.31
6349          * @param string $text
6350          *
6351          * @return array
6352          */
6353         public function serializeHalfParsedText( $text ) {
6354                 wfDeprecated( __METHOD__, '1.31' );
6355                 $data = [
6356                         'text' => $text,
6357                         'version' => self::HALF_PARSED_VERSION,
6358                         'stripState' => $this->mStripState->getSubState( $text ),
6359                         'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6360                 ];
6361                 return $data;
6362         }
6363
6364         /**
6365          * Load the parser state given in the $data array, which is assumed to
6366          * have been generated by serializeHalfParsedText(). The text contents is
6367          * extracted from the array, and its markers are transformed into markers
6368          * appropriate for the current Parser instance. This transformed text is
6369          * returned, and can be safely included in the return value of a parser
6370          * hook.
6371          *
6372          * If the $data array has been stored persistently, the caller should first
6373          * check whether it is still valid, by calling isValidHalfParsedText().
6374          *
6375          * @deprecated since 1.31
6376          * @param array $data Serialized data
6377          * @throws MWException
6378          * @return string
6379          */
6380         public function unserializeHalfParsedText( $data ) {
6381                 wfDeprecated( __METHOD__, '1.31' );
6382                 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6383                         throw new MWException( __METHOD__ . ': invalid version' );
6384                 }
6385
6386                 # First, extract the strip state.
6387                 $texts = [ $data['text'] ];
6388                 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6389
6390                 # Now renumber links
6391                 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6392
6393                 # Should be good to go.
6394                 return $texts[0];
6395         }
6396
6397         /**
6398          * Returns true if the given array, presumed to be generated by
6399          * serializeHalfParsedText(), is compatible with the current version of the
6400          * parser.
6401          *
6402          * @deprecated since 1.31
6403          * @param array $data
6404          *
6405          * @return bool
6406          */
6407         public function isValidHalfParsedText( $data ) {
6408                 wfDeprecated( __METHOD__, '1.31' );
6409                 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6410         }
6411
6412         /**
6413          * Parsed a width param of imagelink like 300px or 200x300px
6414          *
6415          * @param string $value
6416          * @param bool $parseHeight
6417          *
6418          * @return array
6419          * @since 1.20
6420          */
6421         public static function parseWidthParam( $value, $parseHeight = true ) {
6422                 $parsedWidthParam = [];
6423                 if ( $value === '' ) {
6424                         return $parsedWidthParam;
6425                 }
6426                 $m = [];
6427                 # (T15500) In both cases (width/height and width only),
6428                 # permit trailing "px" for backward compatibility.
6429                 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6430                         $width = intval( $m[1] );
6431                         $height = intval( $m[2] );
6432                         $parsedWidthParam['width'] = $width;
6433                         $parsedWidthParam['height'] = $height;
6434                 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6435                         $width = intval( $value );
6436                         $parsedWidthParam['width'] = $width;
6437                 }
6438                 return $parsedWidthParam;
6439         }
6440
6441         /**
6442          * Lock the current instance of the parser.
6443          *
6444          * This is meant to stop someone from calling the parser
6445          * recursively and messing up all the strip state.
6446          *
6447          * @throws MWException If parser is in a parse
6448          * @return ScopedCallback The lock will be released once the return value goes out of scope.
6449          */
6450         protected function lock() {
6451                 if ( $this->mInParse ) {
6452                         throw new MWException( "Parser state cleared while parsing. "
6453                                 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6454                 }
6455
6456                 // Save the backtrace when locking, so that if some code tries locking again,
6457                 // we can print the lock owner's backtrace for easier debugging
6458                 $e = new Exception;
6459                 $this->mInParse = $e->getTraceAsString();
6460
6461                 $recursiveCheck = new ScopedCallback( function () {
6462                         $this->mInParse = false;
6463                 } );
6464
6465                 return $recursiveCheck;
6466         }
6467
6468         /**
6469          * Strip outer <p></p> tag from the HTML source of a single paragraph.
6470          *
6471          * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6472          * or if there is more than one <p/> tag in the input HTML.
6473          *
6474          * @param string $html
6475          * @return string
6476          * @since 1.24
6477          */
6478         public static function stripOuterParagraph( $html ) {
6479                 $m = [];
6480                 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6481                         $html = $m[1];
6482                 }
6483
6484                 return $html;
6485         }
6486
6487         /**
6488          * Return this parser if it is not doing anything, otherwise
6489          * get a fresh parser. You can use this method by doing
6490          * $newParser = $oldParser->getFreshParser(), or more simply
6491          * $oldParser->getFreshParser()->parse( ... );
6492          * if you're unsure if $oldParser is safe to use.
6493          *
6494          * @since 1.24
6495          * @return Parser A parser object that is not parsing anything
6496          */
6497         public function getFreshParser() {
6498                 if ( $this->mInParse ) {
6499                         return $this->factory->create();
6500                 } else {
6501                         return $this;
6502                 }
6503         }
6504
6505         /**
6506          * Set's up the PHP implementation of OOUI for use in this request
6507          * and instructs OutputPage to enable OOUI for itself.
6508          *
6509          * @since 1.26
6510          */
6511         public function enableOOUI() {
6512                 OutputPage::setupOOUI();
6513                 $this->mOutput->setEnableOOUI( true );
6514         }
6515
6516         /**
6517          * @param string $flag
6518          * @param string $reason
6519          */
6520         protected function setOutputFlag( $flag, $reason ) {
6521                 $this->mOutput->setFlag( $flag );
6522                 $name = $this->mTitle->getPrefixedText();
6523                 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6524         }
6525 }