includes/parser/Parser.php

   1 <?php
   2 /**
   3  * PHP parser that converts wiki markup to HTML.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Parser
  22  */
  23 use MediaWiki\Config\ServiceOptions;
  24 use MediaWiki\Linker\LinkRenderer;
  25 use MediaWiki\Linker\LinkRendererFactory;
  26 use MediaWiki\Linker\LinkTarget;
  27 use MediaWiki\MediaWikiServices;
  28 use MediaWiki\Special\SpecialPageFactory;
  29 use Psr\Log\NullLogger;
  30 use Wikimedia\ScopedCallback;
  31 use Psr\Log\LoggerInterface;
  32
  33 /**
  34  * @defgroup Parser Parser
  35  */
  36
  37 /**
  38  * PHP Parser - Processes wiki markup (which uses a more user-friendly
  39  * syntax, such as "[[link]]" for making links), and provides a one-way
  40  * transformation of that wiki markup it into (X)HTML output / markup
  41  * (which in turn the browser understands, and can display).
  42  *
  43  * There are seven main entry points into the Parser class:
  44  *
  45  * - Parser::parse()
  46  *     produces HTML output
  47  * - Parser::preSaveTransform()
  48  *     produces altered wiki markup
  49  * - Parser::preprocess()
  50  *     removes HTML comments and expands templates
  51  * - Parser::cleanSig() and Parser::cleanSigInSig()
  52  *     cleans a signature before saving it to preferences
  53  * - Parser::getSection()
  54  *     return the content of a section from an article for section editing
  55  * - Parser::replaceSection()
  56  *     replaces a section by number inside an article
  57  * - Parser::getPreloadText()
  58  *     removes <noinclude> sections and <includeonly> tags
  59  *
  60  * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
  61  *
  62  * @par Settings:
  63  * $wgNamespacesWithSubpages
  64  *
  65  * @par Settings only within ParserOptions:
  66  * $wgAllowExternalImages
  67  * $wgAllowSpecialInclusion
  68  * $wgInterwikiMagic
  69  * $wgMaxArticleSize
  70  *
  71  * @ingroup Parser
  72  */
  73 class Parser {
  74         /**
  75          * Update this version number when the ParserOutput format
  76          * changes in an incompatible way, so the parser cache
  77          * can automatically discard old data.
  78          */
  79         const VERSION = '1.6.4';
  80
  81         /**
  82          * Update this version number when the output of serialiseHalfParsedText()
  83          * changes in an incompatible way
  84          */
  85         const HALF_PARSED_VERSION = 2;
  86
  87         # Flags for Parser::setFunctionHook
  88         const SFH_NO_HASH = 1;
  89         const SFH_OBJECT_ARGS = 2;
  90
  91         # Constants needed for external link processing
  92         # Everything except bracket, space, or control characters
  93         # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
  94         # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
  95         # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
  96         # uses to replace invalid HTML characters.
  97         const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
  98         # Simplified expression to match an IPv4 or IPv6 address, or
  99         # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
 100         const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
 101         # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
 102         // phpcs:ignore Generic.Files.LineLength
 103         const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
 104                 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
 105
 106         # Regular expression for a non-newline space
 107         const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
 108
 109         # Flags for preprocessToDom
 110         const PTD_FOR_INCLUSION = 1;
 111
 112         # Allowed values for $this->mOutputType
 113         # Parameter to startExternalParse().
 114         const OT_HTML = 1; # like parse()
 115         const OT_WIKI = 2; # like preSaveTransform()
 116         const OT_PREPROCESS = 3; # like preprocess()
 117         const OT_MSG = 3;
 118         const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
 119
 120         /**
 121          * @var string Prefix and suffix for temporary replacement strings
 122          * for the multipass parser.
 123          *
 124          * \x7f should never appear in input as it's disallowed in XML.
 125          * Using it at the front also gives us a little extra robustness
 126          * since it shouldn't match when butted up against identifier-like
 127          * string constructs.
 128          *
 129          * Must not consist of all title characters, or else it will change
 130          * the behavior of <nowiki> in a link.
 131          *
 132          * Must have a character that needs escaping in attributes, otherwise
 133          * someone could put a strip marker in an attribute, to get around
 134          * escaping quote marks, and break out of the attribute. Thus we add
 135          * `'".
 136          */
 137         const MARKER_SUFFIX = "-QINU`\"'\x7f";
 138         const MARKER_PREFIX = "\x7f'\"`UNIQ-";
 139
 140         # Markers used for wrapping the table of contents
 141         const TOC_START = '<mw:toc>';
 142         const TOC_END = '</mw:toc>';
 143
 144         /** @var int Assume that no output will later be saved this many seconds after parsing */
 145         const MAX_TTS = 900;
 146
 147         # Persistent:
 148         public $mTagHooks = [];
 149         public $mTransparentTagHooks = [];
 150         public $mFunctionHooks = [];
 151         public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
 152         public $mFunctionTagHooks = [];
 153         public $mStripList = [];
 154         public $mDefaultStripList = [];
 155         public $mVarCache = [];
 156         public $mImageParams = [];
 157         public $mImageParamsMagicArray = [];
 158         public $mMarkerIndex = 0;
 159         /**
 160          * @var bool Whether firstCallInit still needs to be called
 161          */
 162         public $mFirstCall = true;
 163
 164         # Initialised by initialiseVariables()
 165
 166         /**
 167          * @var MagicWordArray
 168          */
 169         public $mVariables;
 170
 171         /**
 172          * @var MagicWordArray
 173          */
 174         public $mSubstWords;
 175
 176         /**
 177          * @deprecated since 1.34, there should be no need to use this
 178          * @var array
 179          */
 180         public $mConf;
 181
 182         # Initialised in constructor
 183         public $mExtLinkBracketedRegex, $mUrlProtocols;
 184
 185         # Initialized in getPreprocessor()
 186         /** @var Preprocessor */
 187         public $mPreprocessor;
 188
 189         # Cleared with clearState():
 190         /**
 191          * @var ParserOutput
 192          */
 193         public $mOutput;
 194         public $mAutonumber;
 195
 196         /**
 197          * @var StripState
 198          */
 199         public $mStripState;
 200
 201         public $mIncludeCount;
 202         /**
 203          * @var LinkHolderArray
 204          */
 205         public $mLinkHolders;
 206
 207         public $mLinkID;
 208         public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
 209         public $mDefaultSort;
 210         public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
 211         public $mExpensiveFunctionCount; # number of expensive parser function calls
 212         public $mShowToc, $mForceTocPosition;
 213         /** @var array */
 214         public $mTplDomCache;
 215
 216         /**
 217          * @var User
 218          */
 219         public $mUser; # User object; only used when doing pre-save transform
 220
 221         # Temporary
 222         # These are variables reset at least once per parse regardless of $clearState
 223
 224         /**
 225          * @var ParserOptions
 226          */
 227         public $mOptions;
 228
 229         /**
 230          * @var Title
 231          */
 232         public $mTitle;        # Title context, used for self-link rendering and similar things
 233         public $mOutputType;   # Output type, one of the OT_xxx constants
 234         public $ot;            # Shortcut alias, see setOutputType()
 235         public $mRevisionObject; # The revision object of the specified revision ID
 236         public $mRevisionId;   # ID to display in {{REVISIONID}} tags
 237         public $mRevisionTimestamp; # The timestamp of the specified revision ID
 238         public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
 239         public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
 240         public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
 241         public $mInputSize = false; # For {{PAGESIZE}} on current page.
 242
 243         /**
 244          * @var array Array with the language name of each language link (i.e. the
 245          * interwiki prefix) in the key, value arbitrary. Used to avoid sending
 246          * duplicate language links to the ParserOutput.
 247          */
 248         public $mLangLinkLanguages;
 249
 250         /**
 251          * @var MapCacheLRU|null
 252          * @since 1.24
 253          *
 254          * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
 255          */
 256         public $currentRevisionCache;
 257
 258         /**
 259          * @var bool|string Recursive call protection.
 260          * This variable should be treated as if it were private.
 261          */
 262         public $mInParse = false;
 263
 264         /** @var SectionProfiler */
 265         protected $mProfiler;
 266
 267         /**
 268          * @var LinkRenderer
 269          */
 270         protected $mLinkRenderer;
 271
 272         /** @var MagicWordFactory */
 273         private $magicWordFactory;
 274
 275         /** @var Language */
 276         private $contLang;
 277
 278         /** @var ParserFactory */
 279         private $factory;
 280
 281         /** @var SpecialPageFactory */
 282         private $specialPageFactory;
 283
 284         /**
 285          * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
 286          * $mOptions, which is public and widely used, and also with the local variable $options used
 287          * for ParserOptions throughout this file.
 288          *
 289          * @var ServiceOptions
 290          */
 291         private $svcOptions;
 292
 293         /** @var LinkRendererFactory */
 294         private $linkRendererFactory;
 295
 296         /** @var NamespaceInfo */
 297         private $nsInfo;
 298
 299         /** @var LoggerInterface */
 300         private $logger;
 301
 302         /**
 303          * TODO Make this a const when HHVM support is dropped (T192166)
 304          *
 305          * @var array
 306          * @since 1.33
 307          */
 308         public static $constructorOptions = [
 309                 // See $wgParserConf documentation
 310                 'class',
 311                 'preprocessorClass',
 312                 // See documentation for the corresponding config options
 313                 'ArticlePath',
 314                 'EnableScaryTranscluding',
 315                 'ExtraInterlanguageLinkPrefixes',
 316                 'FragmentMode',
 317                 'LanguageCode',
 318                 'MaxSigChars',
 319                 'MaxTocLevel',
 320                 'MiserMode',
 321                 'ScriptPath',
 322                 'Server',
 323                 'ServerName',
 324                 'ShowHostnames',
 325                 'Sitename',
 326                 'StylePath',
 327                 'TranscludeCacheExpiry',
 328         ];
 329
 330         /**
 331          * Constructing parsers directly is deprecated! Use a ParserFactory.
 332          *
 333          * @param ServiceOptions|null $svcOptions
 334          * @param MagicWordFactory|null $magicWordFactory
 335          * @param Language|null $contLang Content language
 336          * @param ParserFactory|null $factory
 337          * @param string|null $urlProtocols As returned from wfUrlProtocols()
 338          * @param SpecialPageFactory|null $spFactory
 339          * @param LinkRendererFactory|null $linkRendererFactory
 340          * @param NamespaceInfo|null $nsInfo
 341          * @param LoggerInterface|null $logger
 342          */
 343         public function __construct(
 344                 $svcOptions = null,
 345                 MagicWordFactory $magicWordFactory = null,
 346                 Language $contLang = null,
 347                 ParserFactory $factory = null,
 348                 $urlProtocols = null,
 349                 SpecialPageFactory $spFactory = null,
 350                 $linkRendererFactory = null,
 351                 $nsInfo = null,
 352                 $logger = null
 353         ) {
 354                 if ( !$svcOptions || is_array( $svcOptions ) ) {
 355                         // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
 356                         // Config, and the eighth is LinkRendererFactory.
 357                         $this->mConf = (array)$svcOptions;
 358                         if ( empty( $this->mConf['class'] ) ) {
 359                                 $this->mConf['class'] = self::class;
 360                         }
 361                         if ( empty( $this->mConf['preprocessorClass'] ) ) {
 362                                 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
 363                         }
 364                         $this->svcOptions = new ServiceOptions( self::$constructorOptions,
 365                                 $this->mConf, func_num_args() > 6
 366                                         ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
 367                         );
 368                         $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
 369                         $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
 370                 } else {
 371                         // New calling convention
 372                         $svcOptions->assertRequiredOptions( self::$constructorOptions );
 373                         // $this->mConf is public, so we'll keep those two options there as well for
 374                         // compatibility until it's removed
 375                         $this->mConf = [
 376                                 'class' => $svcOptions->get( 'class' ),
 377                                 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
 378                         ];
 379                         $this->svcOptions = $svcOptions;
 380                 }
 381
 382                 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
 383                 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
 384                         self::EXT_LINK_ADDR .
 385                         self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
 386
 387                 $this->magicWordFactory = $magicWordFactory ??
 388                         MediaWikiServices::getInstance()->getMagicWordFactory();
 389
 390                 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
 391
 392                 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
 393                 $this->specialPageFactory = $spFactory ??
 394                         MediaWikiServices::getInstance()->getSpecialPageFactory();
 395                 $this->linkRendererFactory = $linkRendererFactory ??
 396                         MediaWikiServices::getInstance()->getLinkRendererFactory();
 397                 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
 398                 $this->logger = $logger ?: new NullLogger();
 399         }
 400
 401         /**
 402          * Reduce memory usage to reduce the impact of circular references
 403          */
 404         public function __destruct() {
 405                 if ( isset( $this->mLinkHolders ) ) {
 406                         unset( $this->mLinkHolders );
 407                 }
 408                 foreach ( $this as $name => $value ) {
 409                         unset( $this->$name );
 410                 }
 411         }
 412
 413         /**
 414          * Allow extensions to clean up when the parser is cloned
 415          */
 416         public function __clone() {
 417                 $this->mInParse = false;
 418
 419                 // T58226: When you create a reference "to" an object field, that
 420                 // makes the object field itself be a reference too (until the other
 421                 // reference goes out of scope). When cloning, any field that's a
 422                 // reference is copied as a reference in the new object. Both of these
 423                 // are defined PHP5 behaviors, as inconvenient as it is for us when old
 424                 // hooks from PHP4 days are passing fields by reference.
 425                 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
 426                         // Make a non-reference copy of the field, then rebind the field to
 427                         // reference the new copy.
 428                         $tmp = $this->$k;
 429                         $this->$k =& $tmp;
 430                         unset( $tmp );
 431                 }
 432
 433                 Hooks::run( 'ParserCloned', [ $this ] );
 434         }
 435
 436         /**
 437          * Which class should we use for the preprocessor if not otherwise specified?
 438          *
 439          * @since 1.34
 440          * @deprecated since 1.34, removing configurability of preprocessor
 441          * @return string
 442          */
 443         public static function getDefaultPreprocessorClass() {
 444                 return Preprocessor_Hash::class;
 445         }
 446
 447         /**
 448          * Do various kinds of initialisation on the first call of the parser
 449          */
 450         public function firstCallInit() {
 451                 if ( !$this->mFirstCall ) {
 452                         return;
 453                 }
 454                 $this->mFirstCall = false;
 455
 456                 CoreParserFunctions::register( $this );
 457                 CoreTagHooks::register( $this );
 458                 $this->initialiseVariables();
 459
 460                 // Avoid PHP 7.1 warning from passing $this by reference
 461                 $parser = $this;
 462                 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
 463         }
 464
 465         /**
 466          * Clear Parser state
 467          *
 468          * @private
 469          */
 470         public function clearState() {
 471                 $this->firstCallInit();
 472                 $this->resetOutput();
 473                 $this->mAutonumber = 0;
 474                 $this->mIncludeCount = [];
 475                 $this->mLinkHolders = new LinkHolderArray( $this );
 476                 $this->mLinkID = 0;
 477                 $this->mRevisionObject = $this->mRevisionTimestamp =
 478                         $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
 479                 $this->mVarCache = [];
 480                 $this->mUser = null;
 481                 $this->mLangLinkLanguages = [];
 482                 $this->currentRevisionCache = null;
 483
 484                 $this->mStripState = new StripState( $this );
 485
 486                 # Clear these on every parse, T6549
 487                 $this->mTplRedirCache = $this->mTplDomCache = [];
 488
 489                 $this->mShowToc = true;
 490                 $this->mForceTocPosition = false;
 491                 $this->mIncludeSizes = [
 492                         'post-expand' => 0,
 493                         'arg' => 0,
 494                 ];
 495                 $this->mPPNodeCount = 0;
 496                 $this->mGeneratedPPNodeCount = 0;
 497                 $this->mHighestExpansionDepth = 0;
 498                 $this->mDefaultSort = false;
 499                 $this->mHeadings = [];
 500                 $this->mDoubleUnderscores = [];
 501                 $this->mExpensiveFunctionCount = 0;
 502
 503                 # Fix cloning
 504                 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
 505                         $this->mPreprocessor = null;
 506                 }
 507
 508                 $this->mProfiler = new SectionProfiler();
 509
 510                 // Avoid PHP 7.1 warning from passing $this by reference
 511                 $parser = $this;
 512                 Hooks::run( 'ParserClearState', [ &$parser ] );
 513         }
 514
 515         /**
 516          * Reset the ParserOutput
 517          */
 518         public function resetOutput() {
 519                 $this->mOutput = new ParserOutput;
 520                 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
 521         }
 522
 523         /**
 524          * Convert wikitext to HTML
 525          * Do not call this function recursively.
 526          *
 527          * @param string $text Text we want to parse
 528          * @param-taint $text escapes_htmlnoent
 529          * @param Title $title
 530          * @param ParserOptions $options
 531          * @param bool $linestart
 532          * @param bool $clearState
 533          * @param int|null $revid Number to pass in {{REVISIONID}}
 534          * @return ParserOutput A ParserOutput
 535          * @return-taint escaped
 536          */
 537         public function parse(
 538                 $text, Title $title, ParserOptions $options,
 539                 $linestart = true, $clearState = true, $revid = null
 540         ) {
 541                 if ( $clearState ) {
 542                         // We use U+007F DELETE to construct strip markers, so we have to make
 543                         // sure that this character does not occur in the input text.
 544                         $text = strtr( $text, "\x7f", "?" );
 545                         $magicScopeVariable = $this->lock();
 546                 }
 547                 // Strip U+0000 NULL (T159174)
 548                 $text = str_replace( "\000", '', $text );
 549
 550                 $this->startParse( $title, $options, self::OT_HTML, $clearState );
 551
 552                 $this->currentRevisionCache = null;
 553                 $this->mInputSize = strlen( $text );
 554                 if ( $this->mOptions->getEnableLimitReport() ) {
 555                         $this->mOutput->resetParseStartTime();
 556                 }
 557
 558                 $oldRevisionId = $this->mRevisionId;
 559                 $oldRevisionObject = $this->mRevisionObject;
 560                 $oldRevisionTimestamp = $this->mRevisionTimestamp;
 561                 $oldRevisionUser = $this->mRevisionUser;
 562                 $oldRevisionSize = $this->mRevisionSize;
 563                 if ( $revid !== null ) {
 564                         $this->mRevisionId = $revid;
 565                         $this->mRevisionObject = null;
 566                         $this->mRevisionTimestamp = null;
 567                         $this->mRevisionUser = null;
 568                         $this->mRevisionSize = null;
 569                 }
 570
 571                 // Avoid PHP 7.1 warning from passing $this by reference
 572                 $parser = $this;
 573                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 574                 # No more strip!
 575                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 576                 $text = $this->internalParse( $text );
 577                 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
 578
 579                 $text = $this->internalParseHalfParsed( $text, true, $linestart );
 580
 581                 /**
 582                  * A converted title will be provided in the output object if title and
 583                  * content conversion are enabled, the article text does not contain
 584                  * a conversion-suppressing double-underscore tag, and no
 585                  * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
 586                  * automatic link conversion.
 587                  */
 588                 if ( !( $options->getDisableTitleConversion()
 589                         || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 590                         || isset( $this->mDoubleUnderscores['notitleconvert'] )
 591                         || $this->mOutput->getDisplayTitle() !== false )
 592                 ) {
 593                         $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
 594                         if ( $convruletitle ) {
 595                                 $this->mOutput->setTitleText( $convruletitle );
 596                         } else {
 597                                 $titleText = $this->getTargetLanguage()->convertTitle( $title );
 598                                 $this->mOutput->setTitleText( $titleText );
 599                         }
 600                 }
 601
 602                 # Compute runtime adaptive expiry if set
 603                 $this->mOutput->finalizeAdaptiveCacheExpiry();
 604
 605                 # Warn if too many heavyweight parser functions were used
 606                 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
 607                         $this->limitationWarn( 'expensive-parserfunction',
 608                                 $this->mExpensiveFunctionCount,
 609                                 $this->mOptions->getExpensiveParserFunctionLimit()
 610                         );
 611                 }
 612
 613                 # Information on limits, for the benefit of users who try to skirt them
 614                 if ( $this->mOptions->getEnableLimitReport() ) {
 615                         $text .= $this->makeLimitReport();
 616                 }
 617
 618                 # Wrap non-interface parser output in a <div> so it can be targeted
 619                 # with CSS (T37247)
 620                 $class = $this->mOptions->getWrapOutputClass();
 621                 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
 622                         $this->mOutput->addWrapperDivClass( $class );
 623                 }
 624
 625                 $this->mOutput->setText( $text );
 626
 627                 $this->mRevisionId = $oldRevisionId;
 628                 $this->mRevisionObject = $oldRevisionObject;
 629                 $this->mRevisionTimestamp = $oldRevisionTimestamp;
 630                 $this->mRevisionUser = $oldRevisionUser;
 631                 $this->mRevisionSize = $oldRevisionSize;
 632                 $this->mInputSize = false;
 633                 $this->currentRevisionCache = null;
 634
 635                 return $this->mOutput;
 636         }
 637
 638         /**
 639          * Set the limit report data in the current ParserOutput, and return the
 640          * limit report HTML comment.
 641          *
 642          * @return string
 643          */
 644         protected function makeLimitReport() {
 645                 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
 646
 647                 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
 648                 if ( $cpuTime !== null ) {
 649                         $this->mOutput->setLimitReportData( 'limitreport-cputime',
 650                                 sprintf( "%.3f", $cpuTime )
 651                         );
 652                 }
 653
 654                 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
 655                 $this->mOutput->setLimitReportData( 'limitreport-walltime',
 656                         sprintf( "%.3f", $wallTime )
 657                 );
 658
 659                 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
 660                         [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
 661                 );
 662                 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
 663                         [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
 664                 );
 665                 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
 666                         [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
 667                 );
 668                 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
 669                         [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
 670                 );
 671                 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
 672                         [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
 673                 );
 674                 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
 675                         [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
 676                 );
 677
 678                 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
 679                         $this->mOutput->setLimitReportData( $key, $value );
 680                 }
 681
 682                 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
 683
 684                 $limitReport = "NewPP limit report\n";
 685                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 686                         $limitReport .= 'Parsed by ' . wfHostname() . "\n";
 687                 }
 688                 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
 689                 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
 690                 $limitReport .= 'Dynamic content: ' .
 691                         ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
 692                         "\n";
 693                 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
 694
 695                 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
 696                         if ( Hooks::run( 'ParserLimitReportFormat',
 697                                 [ $key, &$value, &$limitReport, false, false ]
 698                         ) ) {
 699                                 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
 700                                 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
 701                                         ->inLanguage( 'en' )->useDatabase( false );
 702                                 if ( !$valueMsg->exists() ) {
 703                                         $valueMsg = new RawMessage( '$1' );
 704                                 }
 705                                 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
 706                                         $valueMsg->params( $value );
 707                                         $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
 708                                 }
 709                         }
 710                 }
 711                 // Since we're not really outputting HTML, decode the entities and
 712                 // then re-encode the things that need hiding inside HTML comments.
 713                 $limitReport = htmlspecialchars_decode( $limitReport );
 714
 715                 // Sanitize for comment. Note '‐' in the replacement is U+2010,
 716                 // which looks much like the problematic '-'.
 717                 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
 718                 $text = "\n<!-- \n$limitReport-->\n";
 719
 720                 // Add on template profiling data in human/machine readable way
 721                 $dataByFunc = $this->mProfiler->getFunctionStats();
 722                 uasort( $dataByFunc, function ( $a, $b ) {
 723                         return $b['real'] <=> $a['real']; // descending order
 724                 } );
 725                 $profileReport = [];
 726                 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
 727                         $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
 728                                 $item['%real'], $item['real'], $item['calls'],
 729                                 htmlspecialchars( $item['name'] ) );
 730                 }
 731                 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
 732                 $text .= implode( "\n", $profileReport ) . "\n-->\n";
 733
 734                 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
 735
 736                 // Add other cache related metadata
 737                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 738                         $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
 739                 }
 740                 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
 741                         $this->mOutput->getCacheTime() );
 742                 $this->mOutput->setLimitReportData( 'cachereport-ttl',
 743                         $this->mOutput->getCacheExpiry() );
 744                 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
 745                         $this->mOutput->hasDynamicContent() );
 746
 747                 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
 748                         wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
 749                                 $this->mTitle->getPrefixedDBkey() );
 750                 }
 751                 return $text;
 752         }
 753
 754         /**
 755          * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
 756          * can be called from an extension tag hook.
 757          *
 758          * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
 759          * instead, which means that lists and links have not been fully parsed yet,
 760          * and strip markers are still present.
 761          *
 762          * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
 763          *
 764          * Use this function if you're a parser tag hook and you want to parse
 765          * wikitext before or after applying additional transformations, and you
 766          * intend to *return the result as hook output*, which will cause it to go
 767          * through the rest of parsing process automatically.
 768          *
 769          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 770          * $text are not expanded
 771          *
 772          * @param string $text Text extension wants to have parsed
 773          * @param-taint $text escapes_htmlnoent
 774          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 775          * @return string UNSAFE half-parsed HTML
 776          * @return-taint escaped
 777          */
 778         public function recursiveTagParse( $text, $frame = false ) {
 779                 // Avoid PHP 7.1 warning from passing $this by reference
 780                 $parser = $this;
 781                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 782                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 783                 $text = $this->internalParse( $text, false, $frame );
 784                 return $text;
 785         }
 786
 787         /**
 788          * Fully parse wikitext to fully parsed HTML. This recursive parser entry
 789          * point can be called from an extension tag hook.
 790          *
 791          * The output of this function is fully-parsed HTML that is safe for output.
 792          * If you're a parser tag hook, you might want to use recursiveTagParse()
 793          * instead.
 794          *
 795          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 796          * $text are not expanded
 797          *
 798          * @since 1.25
 799          *
 800          * @param string $text Text extension wants to have parsed
 801          * @param-taint $text escapes_htmlnoent
 802          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 803          * @return string Fully parsed HTML
 804          * @return-taint escaped
 805          */
 806         public function recursiveTagParseFully( $text, $frame = false ) {
 807                 $text = $this->recursiveTagParse( $text, $frame );
 808                 $text = $this->internalParseHalfParsed( $text, false );
 809                 return $text;
 810         }
 811
 812         /**
 813          * Expand templates and variables in the text, producing valid, static wikitext.
 814          * Also removes comments.
 815          * Do not call this function recursively.
 816          * @param string $text
 817          * @param Title|null $title
 818          * @param ParserOptions $options
 819          * @param int|null $revid
 820          * @param bool|PPFrame $frame
 821          * @return mixed|string
 822          */
 823         public function preprocess( $text, Title $title = null,
 824                 ParserOptions $options, $revid = null, $frame = false
 825         ) {
 826                 $magicScopeVariable = $this->lock();
 827                 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
 828                 if ( $revid !== null ) {
 829                         $this->mRevisionId = $revid;
 830                 }
 831                 // Avoid PHP 7.1 warning from passing $this by reference
 832                 $parser = $this;
 833                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 834                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 835                 $text = $this->replaceVariables( $text, $frame );
 836                 $text = $this->mStripState->unstripBoth( $text );
 837                 return $text;
 838         }
 839
 840         /**
 841          * Recursive parser entry point that can be called from an extension tag
 842          * hook.
 843          *
 844          * @param string $text Text to be expanded
 845          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 846          * @return string
 847          * @since 1.19
 848          */
 849         public function recursivePreprocess( $text, $frame = false ) {
 850                 $text = $this->replaceVariables( $text, $frame );
 851                 $text = $this->mStripState->unstripBoth( $text );
 852                 return $text;
 853         }
 854
 855         /**
 856          * Process the wikitext for the "?preload=" feature. (T7210)
 857          *
 858          * "<noinclude>", "<includeonly>" etc. are parsed as for template
 859          * transclusion, comments, templates, arguments, tags hooks and parser
 860          * functions are untouched.
 861          *
 862          * @param string $text
 863          * @param Title $title
 864          * @param ParserOptions $options
 865          * @param array $params
 866          * @return string
 867          */
 868         public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
 869                 $msg = new RawMessage( $text );
 870                 $text = $msg->params( $params )->plain();
 871
 872                 # Parser (re)initialisation
 873                 $magicScopeVariable = $this->lock();
 874                 $this->startParse( $title, $options, self::OT_PLAIN, true );
 875
 876                 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
 877                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
 878                 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
 879                 $text = $this->mStripState->unstripBoth( $text );
 880                 return $text;
 881         }
 882
 883         /**
 884          * Set the current user.
 885          * Should only be used when doing pre-save transform.
 886          *
 887          * @param User|null $user User object or null (to reset)
 888          */
 889         public function setUser( $user ) {
 890                 $this->mUser = $user;
 891         }
 892
 893         /**
 894          * Set the context title
 895          *
 896          * @param Title $t
 897          */
 898         public function setTitle( $t ) {
 899                 if ( !$t ) {
 900                         $t = Title::newFromText( 'NO TITLE' );
 901                 }
 902
 903                 if ( $t->hasFragment() ) {
 904                         # Strip the fragment to avoid various odd effects
 905                         $this->mTitle = $t->createFragmentTarget( '' );
 906                 } else {
 907                         $this->mTitle = $t;
 908                 }
 909         }
 910
 911         /**
 912          * Accessor for the Title object
 913          *
 914          * @return Title|null
 915          */
 916         public function getTitle() {
 917                 return $this->mTitle;
 918         }
 919
 920         /**
 921          * Accessor/mutator for the Title object
 922          *
 923          * @param Title|null $x Title object or null to just get the current one
 924          * @return Title
 925          */
 926         public function Title( $x = null ) {
 927                 return wfSetVar( $this->mTitle, $x );
 928         }
 929
 930         /**
 931          * Set the output type
 932          *
 933          * @param int $ot New value
 934          */
 935         public function setOutputType( $ot ) {
 936                 $this->mOutputType = $ot;
 937                 # Shortcut alias
 938                 $this->ot = [
 939                         'html' => $ot == self::OT_HTML,
 940                         'wiki' => $ot == self::OT_WIKI,
 941                         'pre' => $ot == self::OT_PREPROCESS,
 942                         'plain' => $ot == self::OT_PLAIN,
 943                 ];
 944         }
 945
 946         /**
 947          * Accessor/mutator for the output type
 948          *
 949          * @param int|null $x New value or null to just get the current one
 950          * @return int
 951          */
 952         public function OutputType( $x = null ) {
 953                 return wfSetVar( $this->mOutputType, $x );
 954         }
 955
 956         /**
 957          * Get the ParserOutput object
 958          *
 959          * @return ParserOutput
 960          */
 961         public function getOutput() {
 962                 return $this->mOutput;
 963         }
 964
 965         /**
 966          * Get the ParserOptions object
 967          *
 968          * @return ParserOptions
 969          */
 970         public function getOptions() {
 971                 return $this->mOptions;
 972         }
 973
 974         /**
 975          * Accessor/mutator for the ParserOptions object
 976          *
 977          * @param ParserOptions|null $x New value or null to just get the current one
 978          * @return ParserOptions Current ParserOptions object
 979          */
 980         public function Options( $x = null ) {
 981                 return wfSetVar( $this->mOptions, $x );
 982         }
 983
 984         /**
 985          * @return int
 986          */
 987         public function nextLinkID() {
 988                 return $this->mLinkID++;
 989         }
 990
 991         /**
 992          * @param int $id
 993          */
 994         public function setLinkID( $id ) {
 995                 $this->mLinkID = $id;
 996         }
 997
 998         /**
 999          * Get a language object for use in parser functions such as {{FORMATNUM:}}
1000          * @return Language
1001          */
1002         public function getFunctionLang() {
1003                 return $this->getTargetLanguage();
1004         }
1005
1006         /**
1007          * Get the target language for the content being parsed. This is usually the
1008          * language that the content is in.
1009          *
1010          * @since 1.19
1011          *
1012          * @throws MWException
1013          * @return Language
1014          */
1015         public function getTargetLanguage() {
1016                 $target = $this->mOptions->getTargetLanguage();
1017
1018                 if ( $target !== null ) {
1019                         return $target;
1020                 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1021                         return $this->mOptions->getUserLangObj();
1022                 } elseif ( is_null( $this->mTitle ) ) {
1023                         throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1024                 }
1025
1026                 return $this->mTitle->getPageLanguage();
1027         }
1028
1029         /**
1030          * Get the language object for language conversion
1031          * @deprecated since 1.32, just use getTargetLanguage()
1032          * @return Language|null
1033          */
1034         public function getConverterLanguage() {
1035                 return $this->getTargetLanguage();
1036         }
1037
1038         /**
1039          * Get a User object either from $this->mUser, if set, or from the
1040          * ParserOptions object otherwise
1041          *
1042          * @return User
1043          */
1044         public function getUser() {
1045                 if ( !is_null( $this->mUser ) ) {
1046                         return $this->mUser;
1047                 }
1048                 return $this->mOptions->getUser();
1049         }
1050
1051         /**
1052          * Get a preprocessor object
1053          *
1054          * @return Preprocessor
1055          */
1056         public function getPreprocessor() {
1057                 if ( !isset( $this->mPreprocessor ) ) {
1058                         $class = $this->svcOptions->get( 'preprocessorClass' );
1059                         $this->mPreprocessor = new $class( $this );
1060                 }
1061                 return $this->mPreprocessor;
1062         }
1063
1064         /**
1065          * Get a LinkRenderer instance to make links with
1066          *
1067          * @since 1.28
1068          * @return LinkRenderer
1069          */
1070         public function getLinkRenderer() {
1071                 // XXX We make the LinkRenderer with current options and then cache it forever
1072                 if ( !$this->mLinkRenderer ) {
1073                         $this->mLinkRenderer = $this->linkRendererFactory->create();
1074                         $this->mLinkRenderer->setStubThreshold(
1075                                 $this->getOptions()->getStubThreshold()
1076                         );
1077                 }
1078
1079                 return $this->mLinkRenderer;
1080         }
1081
1082         /**
1083          * Get the MagicWordFactory that this Parser is using
1084          *
1085          * @since 1.32
1086          * @return MagicWordFactory
1087          */
1088         public function getMagicWordFactory() {
1089                 return $this->magicWordFactory;
1090         }
1091
1092         /**
1093          * Get the content language that this Parser is using
1094          *
1095          * @since 1.32
1096          * @return Language
1097          */
1098         public function getContentLanguage() {
1099                 return $this->contLang;
1100         }
1101
1102         /**
1103          * Replaces all occurrences of HTML-style comments and the given tags
1104          * in the text with a random marker and returns the next text. The output
1105          * parameter $matches will be an associative array filled with data in
1106          * the form:
1107          *
1108          * @code
1109          *   'UNIQ-xxxxx' => [
1110          *     'element',
1111          *     'tag content',
1112          *     [ 'param' => 'x' ],
1113          *     '<element param="x">tag content</element>' ]
1114          * @endcode
1115          *
1116          * @param array $elements List of element names. Comments are always extracted.
1117          * @param string $text Source text string.
1118          * @param array &$matches Out parameter, Array: extracted tags
1119          * @return string Stripped text
1120          */
1121         public static function extractTagsAndParams( $elements, $text, &$matches ) {
1122                 static $n = 1;
1123                 $stripped = '';
1124                 $matches = [];
1125
1126                 $taglist = implode( '|', $elements );
1127                 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1128
1129                 while ( $text != '' ) {
1130                         $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1131                         $stripped .= $p[0];
1132                         if ( count( $p ) < 5 ) {
1133                                 break;
1134                         }
1135                         if ( count( $p ) > 5 ) {
1136                                 # comment
1137                                 $element = $p[4];
1138                                 $attributes = '';
1139                                 $close = '';
1140                                 $inside = $p[5];
1141                         } else {
1142                                 # tag
1143                                 list( , $element, $attributes, $close, $inside ) = $p;
1144                         }
1145
1146                         $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1147                         $stripped .= $marker;
1148
1149                         if ( $close === '/>' ) {
1150                                 # Empty element tag, <tag />
1151                                 $content = null;
1152                                 $text = $inside;
1153                                 $tail = null;
1154                         } else {
1155                                 if ( $element === '!--' ) {
1156                                         $end = '/(-->)/';
1157                                 } else {
1158                                         $end = "/(<\\/$element\\s*>)/i";
1159                                 }
1160                                 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1161                                 $content = $q[0];
1162                                 if ( count( $q ) < 3 ) {
1163                                         # No end tag -- let it run out to the end of the text.
1164                                         $tail = '';
1165                                         $text = '';
1166                                 } else {
1167                                         list( , $tail, $text ) = $q;
1168                                 }
1169                         }
1170
1171                         $matches[$marker] = [ $element,
1172                                 $content,
1173                                 Sanitizer::decodeTagAttributes( $attributes ),
1174                                 "<$element$attributes$close$content$tail" ];
1175                 }
1176                 return $stripped;
1177         }
1178
1179         /**
1180          * Get a list of strippable XML-like elements
1181          *
1182          * @return array
1183          */
1184         public function getStripList() {
1185                 return $this->mStripList;
1186         }
1187
1188         /**
1189          * Get the StripState
1190          *
1191          * @return StripState
1192          */
1193         public function getStripState() {
1194                 return $this->mStripState;
1195         }
1196
1197         /**
1198          * Add an item to the strip state
1199          * Returns the unique tag which must be inserted into the stripped text
1200          * The tag will be replaced with the original text in unstrip()
1201          *
1202          * @param string $text
1203          *
1204          * @return string
1205          */
1206         public function insertStripItem( $text ) {
1207                 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1208                 $this->mMarkerIndex++;
1209                 $this->mStripState->addGeneral( $marker, $text );
1210                 return $marker;
1211         }
1212
1213         /**
1214          * parse the wiki syntax used to render tables
1215          *
1216          * @private
1217          * @param string $text
1218          * @return string
1219          */
1220         public function doTableStuff( $text ) {
1221                 $lines = StringUtils::explode( "\n", $text );
1222                 $out = '';
1223                 $td_history = []; # Is currently a td tag open?
1224                 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1225                 $tr_history = []; # Is currently a tr tag open?
1226                 $tr_attributes = []; # history of tr attributes
1227                 $has_opened_tr = []; # Did this table open a <tr> element?
1228                 $indent_level = 0; # indent level of the table
1229
1230                 foreach ( $lines as $outLine ) {
1231                         $line = trim( $outLine );
1232
1233                         if ( $line === '' ) { # empty line, go to next line
1234                                 $out .= $outLine . "\n";
1235                                 continue;
1236                         }
1237
1238                         $first_character = $line[0];
1239                         $first_two = substr( $line, 0, 2 );
1240                         $matches = [];
1241
1242                         if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1243                                 # First check if we are starting a new table
1244                                 $indent_level = strlen( $matches[1] );
1245
1246                                 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1247                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1248
1249                                 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1250                                 array_push( $td_history, false );
1251                                 array_push( $last_tag_history, '' );
1252                                 array_push( $tr_history, false );
1253                                 array_push( $tr_attributes, '' );
1254                                 array_push( $has_opened_tr, false );
1255                         } elseif ( count( $td_history ) == 0 ) {
1256                                 # Don't do any of the following
1257                                 $out .= $outLine . "\n";
1258                                 continue;
1259                         } elseif ( $first_two === '|}' ) {
1260                                 # We are ending a table
1261                                 $line = '</table>' . substr( $line, 2 );
1262                                 $last_tag = array_pop( $last_tag_history );
1263
1264                                 if ( !array_pop( $has_opened_tr ) ) {
1265                                         $line = "<tr><td></td></tr>{$line}";
1266                                 }
1267
1268                                 if ( array_pop( $tr_history ) ) {
1269                                         $line = "</tr>{$line}";
1270                                 }
1271
1272                                 if ( array_pop( $td_history ) ) {
1273                                         $line = "</{$last_tag}>{$line}";
1274                                 }
1275                                 array_pop( $tr_attributes );
1276                                 if ( $indent_level > 0 ) {
1277                                         $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1278                                 } else {
1279                                         $outLine = $line;
1280                                 }
1281                         } elseif ( $first_two === '|-' ) {
1282                                 # Now we have a table row
1283                                 $line = preg_replace( '#^\|-+#', '', $line );
1284
1285                                 # Whats after the tag is now only attributes
1286                                 $attributes = $this->mStripState->unstripBoth( $line );
1287                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1288                                 array_pop( $tr_attributes );
1289                                 array_push( $tr_attributes, $attributes );
1290
1291                                 $line = '';
1292                                 $last_tag = array_pop( $last_tag_history );
1293                                 array_pop( $has_opened_tr );
1294                                 array_push( $has_opened_tr, true );
1295
1296                                 if ( array_pop( $tr_history ) ) {
1297                                         $line = '</tr>';
1298                                 }
1299
1300                                 if ( array_pop( $td_history ) ) {
1301                                         $line = "</{$last_tag}>{$line}";
1302                                 }
1303
1304                                 $outLine = $line;
1305                                 array_push( $tr_history, false );
1306                                 array_push( $td_history, false );
1307                                 array_push( $last_tag_history, '' );
1308                         } elseif ( $first_character === '|'
1309                                 || $first_character === '!'
1310                                 || $first_two === '|+'
1311                         ) {
1312                                 # This might be cell elements, td, th or captions
1313                                 if ( $first_two === '|+' ) {
1314                                         $first_character = '+';
1315                                         $line = substr( $line, 2 );
1316                                 } else {
1317                                         $line = substr( $line, 1 );
1318                                 }
1319
1320                                 // Implies both are valid for table headings.
1321                                 if ( $first_character === '!' ) {
1322                                         $line = StringUtils::replaceMarkup( '!!', '||', $line );
1323                                 }
1324
1325                                 # Split up multiple cells on the same line.
1326                                 # FIXME : This can result in improper nesting of tags processed
1327                                 # by earlier parser steps.
1328                                 $cells = explode( '||', $line );
1329
1330                                 $outLine = '';
1331
1332                                 # Loop through each table cell
1333                                 foreach ( $cells as $cell ) {
1334                                         $previous = '';
1335                                         if ( $first_character !== '+' ) {
1336                                                 $tr_after = array_pop( $tr_attributes );
1337                                                 if ( !array_pop( $tr_history ) ) {
1338                                                         $previous = "<tr{$tr_after}>\n";
1339                                                 }
1340                                                 array_push( $tr_history, true );
1341                                                 array_push( $tr_attributes, '' );
1342                                                 array_pop( $has_opened_tr );
1343                                                 array_push( $has_opened_tr, true );
1344                                         }
1345
1346                                         $last_tag = array_pop( $last_tag_history );
1347
1348                                         if ( array_pop( $td_history ) ) {
1349                                                 $previous = "</{$last_tag}>\n{$previous}";
1350                                         }
1351
1352                                         if ( $first_character === '|' ) {
1353                                                 $last_tag = 'td';
1354                                         } elseif ( $first_character === '!' ) {
1355                                                 $last_tag = 'th';
1356                                         } elseif ( $first_character === '+' ) {
1357                                                 $last_tag = 'caption';
1358                                         } else {
1359                                                 $last_tag = '';
1360                                         }
1361
1362                                         array_push( $last_tag_history, $last_tag );
1363
1364                                         # A cell could contain both parameters and data
1365                                         $cell_data = explode( '|', $cell, 2 );
1366
1367                                         # T2553: Note that a '|' inside an invalid link should not
1368                                         # be mistaken as delimiting cell parameters
1369                                         # Bug T153140: Neither should language converter markup.
1370                                         if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1371                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1372                                         } elseif ( count( $cell_data ) == 1 ) {
1373                                                 // Whitespace in cells is trimmed
1374                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1375                                         } else {
1376                                                 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1377                                                 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1378                                                 // Whitespace in cells is trimmed
1379                                                 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1380                                         }
1381
1382                                         $outLine .= $cell;
1383                                         array_push( $td_history, true );
1384                                 }
1385                         }
1386                         $out .= $outLine . "\n";
1387                 }
1388
1389                 # Closing open td, tr && table
1390                 while ( count( $td_history ) > 0 ) {
1391                         if ( array_pop( $td_history ) ) {
1392                                 $out .= "</td>\n";
1393                         }
1394                         if ( array_pop( $tr_history ) ) {
1395                                 $out .= "</tr>\n";
1396                         }
1397                         if ( !array_pop( $has_opened_tr ) ) {
1398                                 $out .= "<tr><td></td></tr>\n";
1399                         }
1400
1401                         $out .= "</table>\n";
1402                 }
1403
1404                 # Remove trailing line-ending (b/c)
1405                 if ( substr( $out, -1 ) === "\n" ) {
1406                         $out = substr( $out, 0, -1 );
1407                 }
1408
1409                 # special case: don't return empty table
1410                 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1411                         $out = '';
1412                 }
1413
1414                 return $out;
1415         }
1416
1417         /**
1418          * Helper function for parse() that transforms wiki markup into half-parsed
1419          * HTML. Only called for $mOutputType == self::OT_HTML.
1420          *
1421          * @private
1422          *
1423          * @param string $text The text to parse
1424          * @param-taint $text escapes_html
1425          * @param bool $isMain Whether this is being called from the main parse() function
1426          * @param PPFrame|bool $frame A pre-processor frame
1427          *
1428          * @return string
1429          */
1430         public function internalParse( $text, $isMain = true, $frame = false ) {
1431                 $origText = $text;
1432
1433                 // Avoid PHP 7.1 warning from passing $this by reference
1434                 $parser = $this;
1435
1436                 # Hook to suspend the parser in this state
1437                 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1438                         return $text;
1439                 }
1440
1441                 # if $frame is provided, then use $frame for replacing any variables
1442                 if ( $frame ) {
1443                         # use frame depth to infer how include/noinclude tags should be handled
1444                         # depth=0 means this is the top-level document; otherwise it's an included document
1445                         if ( !$frame->depth ) {
1446                                 $flag = 0;
1447                         } else {
1448                                 $flag = self::PTD_FOR_INCLUSION;
1449                         }
1450                         $dom = $this->preprocessToDom( $text, $flag );
1451                         $text = $frame->expand( $dom );
1452                 } else {
1453                         # if $frame is not provided, then use old-style replaceVariables
1454                         $text = $this->replaceVariables( $text );
1455                 }
1456
1457                 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1458                 $text = Sanitizer::removeHTMLtags(
1459                         $text,
1460                         [ $this, 'attributeStripCallback' ],
1461                         false,
1462                         array_keys( $this->mTransparentTagHooks ),
1463                         [],
1464                         [ $this, 'addTrackingCategory' ]
1465                 );
1466                 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1467
1468                 # Tables need to come after variable replacement for things to work
1469                 # properly; putting them before other transformations should keep
1470                 # exciting things like link expansions from showing up in surprising
1471                 # places.
1472                 $text = $this->doTableStuff( $text );
1473
1474                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1475
1476                 $text = $this->doDoubleUnderscore( $text );
1477
1478                 $text = $this->doHeadings( $text );
1479                 $text = $this->replaceInternalLinks( $text );
1480                 $text = $this->doAllQuotes( $text );
1481                 $text = $this->replaceExternalLinks( $text );
1482
1483                 # replaceInternalLinks may sometimes leave behind
1484                 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1485                 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1486
1487                 $text = $this->doMagicLinks( $text );
1488                 $text = $this->formatHeadings( $text, $origText, $isMain );
1489
1490                 return $text;
1491         }
1492
1493         /**
1494          * Helper function for parse() that transforms half-parsed HTML into fully
1495          * parsed HTML.
1496          *
1497          * @param string $text
1498          * @param bool $isMain
1499          * @param bool $linestart
1500          * @return string
1501          */
1502         private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1503                 $text = $this->mStripState->unstripGeneral( $text );
1504
1505                 // Avoid PHP 7.1 warning from passing $this by reference
1506                 $parser = $this;
1507
1508                 if ( $isMain ) {
1509                         Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1510                 }
1511
1512                 # Clean up special characters, only run once, next-to-last before doBlockLevels
1513                 $text = Sanitizer::armorFrenchSpaces( $text );
1514
1515                 $text = $this->doBlockLevels( $text, $linestart );
1516
1517                 $this->replaceLinkHolders( $text );
1518
1519                 /**
1520                  * The input doesn't get language converted if
1521                  * a) It's disabled
1522                  * b) Content isn't converted
1523                  * c) It's a conversion table
1524                  * d) it is an interface message (which is in the user language)
1525                  */
1526                 if ( !( $this->mOptions->getDisableContentConversion()
1527                         || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1528                         && !$this->mOptions->getInterfaceMessage()
1529                 ) {
1530                         # The position of the convert() call should not be changed. it
1531                         # assumes that the links are all replaced and the only thing left
1532                         # is the <nowiki> mark.
1533                         $text = $this->getTargetLanguage()->convert( $text );
1534                 }
1535
1536                 $text = $this->mStripState->unstripNoWiki( $text );
1537
1538                 if ( $isMain ) {
1539                         Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1540                 }
1541
1542                 $text = $this->replaceTransparentTags( $text );
1543                 $text = $this->mStripState->unstripGeneral( $text );
1544
1545                 $text = Sanitizer::normalizeCharReferences( $text );
1546
1547                 if ( MWTidy::isEnabled() ) {
1548                         if ( $this->mOptions->getTidy() ) {
1549                                 $text = MWTidy::tidy( $text );
1550                         }
1551                 } else {
1552                         # attempt to sanitize at least some nesting problems
1553                         # (T4702 and quite a few others)
1554                         # This code path is buggy and deprecated!
1555                         wfDeprecated( 'disabling tidy', '1.33' );
1556                         $tidyregs = [
1557                                 # ''Something [http://www.cool.com cool''] -->
1558                                 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1559                                 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1560                                 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1561                                 # fix up an anchor inside another anchor, only
1562                                 # at least for a single single nested link (T5695)
1563                                 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1564                                 '\\1\\2</a>\\3</a>\\1\\4</a>',
1565                                 # fix div inside inline elements- doBlockLevels won't wrap a line which
1566                                 # contains a div, so fix it up here; replace
1567                                 # div with escaped text
1568                                 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1569                                 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1570                                 # remove empty italic or bold tag pairs, some
1571                                 # introduced by rules above
1572                                 '/<([bi])><\/\\1>/' => '',
1573                         ];
1574
1575                         $text = preg_replace(
1576                                 array_keys( $tidyregs ),
1577                                 array_values( $tidyregs ),
1578                                 $text );
1579                 }
1580
1581                 if ( $isMain ) {
1582                         Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1583                 }
1584
1585                 return $text;
1586         }
1587
1588         /**
1589          * Replace special strings like "ISBN xxx" and "RFC xxx" with
1590          * magic external links.
1591          *
1592          * DML
1593          * @private
1594          *
1595          * @param string $text
1596          *
1597          * @return string
1598          */
1599         public function doMagicLinks( $text ) {
1600                 $prots = wfUrlProtocolsWithoutProtRel();
1601                 $urlChar = self::EXT_LINK_URL_CLASS;
1602                 $addr = self::EXT_LINK_ADDR;
1603                 $space = self::SPACE_NOT_NL; #  non-newline space
1604                 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1605                 $spaces = "$space++"; # possessive match of 1 or more spaces
1606                 $text = preg_replace_callback(
1607                         '!(?:                        # Start cases
1608                                 (<a[ \t\r\n>].*?</a>) |    # m[1]: Skip link text
1609                                 (<.*?>) |                  # m[2]: Skip stuff inside HTML elements' . "
1610                                 (\b                        # m[3]: Free external links
1611                                         (?i:$prots)
1612                                         ($addr$urlChar*)         # m[4]: Post-protocol path
1613                                 ) |
1614                                 \b(?:RFC|PMID) $spaces     # m[5]: RFC or PMID, capture number
1615                                         ([0-9]+)\b |
1616                                 \bISBN $spaces (           # m[6]: ISBN, capture number
1617                                         (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1618                                         (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1619                                         [0-9Xx]                  #  check digit
1620                                 )\b
1621                         )!xu", [ $this, 'magicLinkCallback' ], $text );
1622                 return $text;
1623         }
1624
1625         /**
1626          * @throws MWException
1627          * @param array $m
1628          * @return string HTML
1629          */
1630         public function magicLinkCallback( $m ) {
1631                 if ( isset( $m[1] ) && $m[1] !== '' ) {
1632                         # Skip anchor
1633                         return $m[0];
1634                 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1635                         # Skip HTML element
1636                         return $m[0];
1637                 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1638                         # Free external link
1639                         return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1640                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1641                         # RFC or PMID
1642                         if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1643                                 if ( !$this->mOptions->getMagicRFCLinks() ) {
1644                                         return $m[0];
1645                                 }
1646                                 $keyword = 'RFC';
1647                                 $urlmsg = 'rfcurl';
1648                                 $cssClass = 'mw-magiclink-rfc';
1649                                 $trackingCat = 'magiclink-tracking-rfc';
1650                                 $id = $m[5];
1651                         } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1652                                 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1653                                         return $m[0];
1654                                 }
1655                                 $keyword = 'PMID';
1656                                 $urlmsg = 'pubmedurl';
1657                                 $cssClass = 'mw-magiclink-pmid';
1658                                 $trackingCat = 'magiclink-tracking-pmid';
1659                                 $id = $m[5];
1660                         } else {
1661                                 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1662                                         substr( $m[0], 0, 20 ) . '"' );
1663                         }
1664                         $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1665                         $this->addTrackingCategory( $trackingCat );
1666                         return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1667                 } elseif ( isset( $m[6] ) && $m[6] !== ''
1668                         && $this->mOptions->getMagicISBNLinks()
1669                 ) {
1670                         # ISBN
1671                         $isbn = $m[6];
1672                         $space = self::SPACE_NOT_NL; #  non-newline space
1673                         $isbn = preg_replace( "/$space/", ' ', $isbn );
1674                         $num = strtr( $isbn, [
1675                                 '-' => '',
1676                                 ' ' => '',
1677                                 'x' => 'X',
1678                         ] );
1679                         $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1680                         return $this->getLinkRenderer()->makeKnownLink(
1681                                 SpecialPage::getTitleFor( 'Booksources', $num ),
1682                                 "ISBN $isbn",
1683                                 [
1684                                         'class' => 'internal mw-magiclink-isbn',
1685                                         'title' => false // suppress title attribute
1686                                 ]
1687                         );
1688                 } else {
1689                         return $m[0];
1690                 }
1691         }
1692
1693         /**
1694          * Make a free external link, given a user-supplied URL
1695          *
1696          * @param string $url
1697          * @param int $numPostProto
1698          *   The number of characters after the protocol.
1699          * @return string HTML
1700          * @private
1701          */
1702         public function makeFreeExternalLink( $url, $numPostProto ) {
1703                 $trail = '';
1704
1705                 # The characters '<' and '>' (which were escaped by
1706                 # removeHTMLtags()) should not be included in
1707                 # URLs, per RFC 2396.
1708                 # Make &nbsp; terminate a URL as well (bug T84937)
1709                 $m2 = [];
1710                 if ( preg_match(
1711                         '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1712                         $url,
1713                         $m2,
1714                         PREG_OFFSET_CAPTURE
1715                 ) ) {
1716                         $trail = substr( $url, $m2[0][1] ) . $trail;
1717                         $url = substr( $url, 0, $m2[0][1] );
1718                 }
1719
1720                 # Move trailing punctuation to $trail
1721                 $sep = ',;\.:!?';
1722                 # If there is no left bracket, then consider right brackets fair game too
1723                 if ( strpos( $url, '(' ) === false ) {
1724                         $sep .= ')';
1725                 }
1726
1727                 $urlRev = strrev( $url );
1728                 $numSepChars = strspn( $urlRev, $sep );
1729                 # Don't break a trailing HTML entity by moving the ; into $trail
1730                 # This is in hot code, so use substr_compare to avoid having to
1731                 # create a new string object for the comparison
1732                 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1733                         # more optimization: instead of running preg_match with a $
1734                         # anchor, which can be slow, do the match on the reversed
1735                         # string starting at the desired offset.
1736                         # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1737                         if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1738                                 $numSepChars--;
1739                         }
1740                 }
1741                 if ( $numSepChars ) {
1742                         $trail = substr( $url, -$numSepChars ) . $trail;
1743                         $url = substr( $url, 0, -$numSepChars );
1744                 }
1745
1746                 # Verify that we still have a real URL after trail removal, and
1747                 # not just lone protocol
1748                 if ( strlen( $trail ) >= $numPostProto ) {
1749                         return $url . $trail;
1750                 }
1751
1752                 $url = Sanitizer::cleanUrl( $url );
1753
1754                 # Is this an external image?
1755                 $text = $this->maybeMakeExternalImage( $url );
1756                 if ( $text === false ) {
1757                         # Not an image, make a link
1758                         $text = Linker::makeExternalLink( $url,
1759                                 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1760                                 true, 'free',
1761                                 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1762                         # Register it in the output object...
1763                         $this->mOutput->addExternalLink( $url );
1764                 }
1765                 return $text . $trail;
1766         }
1767
1768         /**
1769          * Parse headers and return html
1770          *
1771          * @private
1772          *
1773          * @param string $text
1774          *
1775          * @return string
1776          */
1777         public function doHeadings( $text ) {
1778                 for ( $i = 6; $i >= 1; --$i ) {
1779                         $h = str_repeat( '=', $i );
1780                         // Trim non-newline whitespace from headings
1781                         // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1782                         $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1783                 }
1784                 return $text;
1785         }
1786
1787         /**
1788          * Replace single quotes with HTML markup
1789          * @private
1790          *
1791          * @param string $text
1792          *
1793          * @return string The altered text
1794          */
1795         public function doAllQuotes( $text ) {
1796                 $outtext = '';
1797                 $lines = StringUtils::explode( "\n", $text );
1798                 foreach ( $lines as $line ) {
1799                         $outtext .= $this->doQuotes( $line ) . "\n";
1800                 }
1801                 $outtext = substr( $outtext, 0, -1 );
1802                 return $outtext;
1803         }
1804
1805         /**
1806          * Helper function for doAllQuotes()
1807          *
1808          * @param string $text
1809          *
1810          * @return string
1811          */
1812         public function doQuotes( $text ) {
1813                 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1814                 $countarr = count( $arr );
1815                 if ( $countarr == 1 ) {
1816                         return $text;
1817                 }
1818
1819                 // First, do some preliminary work. This may shift some apostrophes from
1820                 // being mark-up to being text. It also counts the number of occurrences
1821                 // of bold and italics mark-ups.
1822                 $numbold = 0;
1823                 $numitalics = 0;
1824                 for ( $i = 1; $i < $countarr; $i += 2 ) {
1825                         $thislen = strlen( $arr[$i] );
1826                         // If there are ever four apostrophes, assume the first is supposed to
1827                         // be text, and the remaining three constitute mark-up for bold text.
1828                         // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1829                         if ( $thislen == 4 ) {
1830                                 $arr[$i - 1] .= "'";
1831                                 $arr[$i] = "'''";
1832                                 $thislen = 3;
1833                         } elseif ( $thislen > 5 ) {
1834                                 // If there are more than 5 apostrophes in a row, assume they're all
1835                                 // text except for the last 5.
1836                                 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1837                                 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1838                                 $arr[$i] = "'''''";
1839                                 $thislen = 5;
1840                         }
1841                         // Count the number of occurrences of bold and italics mark-ups.
1842                         if ( $thislen == 2 ) {
1843                                 $numitalics++;
1844                         } elseif ( $thislen == 3 ) {
1845                                 $numbold++;
1846                         } elseif ( $thislen == 5 ) {
1847                                 $numitalics++;
1848                                 $numbold++;
1849                         }
1850                 }
1851
1852                 // If there is an odd number of both bold and italics, it is likely
1853                 // that one of the bold ones was meant to be an apostrophe followed
1854                 // by italics. Which one we cannot know for certain, but it is more
1855                 // likely to be one that has a single-letter word before it.
1856                 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1857                         $firstsingleletterword = -1;
1858                         $firstmultiletterword = -1;
1859                         $firstspace = -1;
1860                         for ( $i = 1; $i < $countarr; $i += 2 ) {
1861                                 if ( strlen( $arr[$i] ) == 3 ) {
1862                                         $x1 = substr( $arr[$i - 1], -1 );
1863                                         $x2 = substr( $arr[$i - 1], -2, 1 );
1864                                         if ( $x1 === ' ' ) {
1865                                                 if ( $firstspace == -1 ) {
1866                                                         $firstspace = $i;
1867                                                 }
1868                                         } elseif ( $x2 === ' ' ) {
1869                                                 $firstsingleletterword = $i;
1870                                                 // if $firstsingleletterword is set, we don't
1871                                                 // look at the other options, so we can bail early.
1872                                                 break;
1873                                         } elseif ( $firstmultiletterword == -1 ) {
1874                                                 $firstmultiletterword = $i;
1875                                         }
1876                                 }
1877                         }
1878
1879                         // If there is a single-letter word, use it!
1880                         if ( $firstsingleletterword > -1 ) {
1881                                 $arr[$firstsingleletterword] = "''";
1882                                 $arr[$firstsingleletterword - 1] .= "'";
1883                         } elseif ( $firstmultiletterword > -1 ) {
1884                                 // If not, but there's a multi-letter word, use that one.
1885                                 $arr[$firstmultiletterword] = "''";
1886                                 $arr[$firstmultiletterword - 1] .= "'";
1887                         } elseif ( $firstspace > -1 ) {
1888                                 // ... otherwise use the first one that has neither.
1889                                 // (notice that it is possible for all three to be -1 if, for example,
1890                                 // there is only one pentuple-apostrophe in the line)
1891                                 $arr[$firstspace] = "''";
1892                                 $arr[$firstspace - 1] .= "'";
1893                         }
1894                 }
1895
1896                 // Now let's actually convert our apostrophic mush to HTML!
1897                 $output = '';
1898                 $buffer = '';
1899                 $state = '';
1900                 $i = 0;
1901                 foreach ( $arr as $r ) {
1902                         if ( ( $i % 2 ) == 0 ) {
1903                                 if ( $state === 'both' ) {
1904                                         $buffer .= $r;
1905                                 } else {
1906                                         $output .= $r;
1907                                 }
1908                         } else {
1909                                 $thislen = strlen( $r );
1910                                 if ( $thislen == 2 ) {
1911                                         if ( $state === 'i' ) {
1912                                                 $output .= '</i>';
1913                                                 $state = '';
1914                                         } elseif ( $state === 'bi' ) {
1915                                                 $output .= '</i>';
1916                                                 $state = 'b';
1917                                         } elseif ( $state === 'ib' ) {
1918                                                 $output .= '</b></i><b>';
1919                                                 $state = 'b';
1920                                         } elseif ( $state === 'both' ) {
1921                                                 $output .= '<b><i>' . $buffer . '</i>';
1922                                                 $state = 'b';
1923                                         } else { // $state can be 'b' or ''
1924                                                 $output .= '<i>';
1925                                                 $state .= 'i';
1926                                         }
1927                                 } elseif ( $thislen == 3 ) {
1928                                         if ( $state === 'b' ) {
1929                                                 $output .= '</b>';
1930                                                 $state = '';
1931                                         } elseif ( $state === 'bi' ) {
1932                                                 $output .= '</i></b><i>';
1933                                                 $state = 'i';
1934                                         } elseif ( $state === 'ib' ) {
1935                                                 $output .= '</b>';
1936                                                 $state = 'i';
1937                                         } elseif ( $state === 'both' ) {
1938                                                 $output .= '<i><b>' . $buffer . '</b>';
1939                                                 $state = 'i';
1940                                         } else { // $state can be 'i' or ''
1941                                                 $output .= '<b>';
1942                                                 $state .= 'b';
1943                                         }
1944                                 } elseif ( $thislen == 5 ) {
1945                                         if ( $state === 'b' ) {
1946                                                 $output .= '</b><i>';
1947                                                 $state = 'i';
1948                                         } elseif ( $state === 'i' ) {
1949                                                 $output .= '</i><b>';
1950                                                 $state = 'b';
1951                                         } elseif ( $state === 'bi' ) {
1952                                                 $output .= '</i></b>';
1953                                                 $state = '';
1954                                         } elseif ( $state === 'ib' ) {
1955                                                 $output .= '</b></i>';
1956                                                 $state = '';
1957                                         } elseif ( $state === 'both' ) {
1958                                                 $output .= '<i><b>' . $buffer . '</b></i>';
1959                                                 $state = '';
1960                                         } else { // ($state == '')
1961                                                 $buffer = '';
1962                                                 $state = 'both';
1963                                         }
1964                                 }
1965                         }
1966                         $i++;
1967                 }
1968                 // Now close all remaining tags.  Notice that the order is important.
1969                 if ( $state === 'b' || $state === 'ib' ) {
1970                         $output .= '</b>';
1971                 }
1972                 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1973                         $output .= '</i>';
1974                 }
1975                 if ( $state === 'bi' ) {
1976                         $output .= '</b>';
1977                 }
1978                 // There might be lonely ''''', so make sure we have a buffer
1979                 if ( $state === 'both' && $buffer ) {
1980                         $output .= '<b><i>' . $buffer . '</i></b>';
1981                 }
1982                 return $output;
1983         }
1984
1985         /**
1986          * Replace external links (REL)
1987          *
1988          * Note: this is all very hackish and the order of execution matters a lot.
1989          * Make sure to run tests/parser/parserTests.php if you change this code.
1990          *
1991          * @private
1992          *
1993          * @param string $text
1994          *
1995          * @throws MWException
1996          * @return string
1997          */
1998         public function replaceExternalLinks( $text ) {
1999                 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2000                 if ( $bits === false ) {
2001                         throw new MWException( "PCRE needs to be compiled with "
2002                                 . "--enable-unicode-properties in order for MediaWiki to function" );
2003                 }
2004                 $s = array_shift( $bits );
2005
2006                 $i = 0;
2007                 while ( $i < count( $bits ) ) {
2008                         $url = $bits[$i++];
2009                         $i++; // protocol
2010                         $text = $bits[$i++];
2011                         $trail = $bits[$i++];
2012
2013                         # The characters '<' and '>' (which were escaped by
2014                         # removeHTMLtags()) should not be included in
2015                         # URLs, per RFC 2396.
2016                         $m2 = [];
2017                         if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2018                                 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2019                                 $url = substr( $url, 0, $m2[0][1] );
2020                         }
2021
2022                         # If the link text is an image URL, replace it with an <img> tag
2023                         # This happened by accident in the original parser, but some people used it extensively
2024                         $img = $this->maybeMakeExternalImage( $text );
2025                         if ( $img !== false ) {
2026                                 $text = $img;
2027                         }
2028
2029                         $dtrail = '';
2030
2031                         # Set linktype for CSS
2032                         $linktype = 'text';
2033
2034                         # No link text, e.g. [http://domain.tld/some.link]
2035                         if ( $text == '' ) {
2036                                 # Autonumber
2037                                 $langObj = $this->getTargetLanguage();
2038                                 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2039                                 $linktype = 'autonumber';
2040                         } else {
2041                                 # Have link text, e.g. [http://domain.tld/some.link text]s
2042                                 # Check for trail
2043                                 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2044                         }
2045
2046                         // Excluding protocol-relative URLs may avoid many false positives.
2047                         if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2048                                 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2049                         }
2050
2051                         $url = Sanitizer::cleanUrl( $url );
2052
2053                         # Use the encoded URL
2054                         # This means that users can paste URLs directly into the text
2055                         # Funny characters like ö aren't valid in URLs anyway
2056                         # This was changed in August 2004
2057                         $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2058                                 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2059
2060                         # Register link in the output object.
2061                         $this->mOutput->addExternalLink( $url );
2062                 }
2063
2064                 return $s;
2065         }
2066
2067         /**
2068          * Get the rel attribute for a particular external link.
2069          *
2070          * @since 1.21
2071          * @param string|bool $url Optional URL, to extract the domain from for rel =>
2072          *   nofollow if appropriate
2073          * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2074          * @return string|null Rel attribute for $url
2075          */
2076         public static function getExternalLinkRel( $url = false, $title = null ) {
2077                 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2078                 $ns = $title ? $title->getNamespace() : false;
2079                 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2080                         && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2081                 ) {
2082                         return 'nofollow';
2083                 }
2084                 return null;
2085         }
2086
2087         /**
2088          * Get an associative array of additional HTML attributes appropriate for a
2089          * particular external link.  This currently may include rel => nofollow
2090          * (depending on configuration, namespace, and the URL's domain) and/or a
2091          * target attribute (depending on configuration).
2092          *
2093          * @param string $url URL to extract the domain from for rel =>
2094          *   nofollow if appropriate
2095          * @return array Associative array of HTML attributes
2096          */
2097         public function getExternalLinkAttribs( $url ) {
2098                 $attribs = [];
2099                 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2100
2101                 $target = $this->mOptions->getExternalLinkTarget();
2102                 if ( $target ) {
2103                         $attribs['target'] = $target;
2104                         if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2105                                 // T133507. New windows can navigate parent cross-origin.
2106                                 // Including noreferrer due to lacking browser
2107                                 // support of noopener. Eventually noreferrer should be removed.
2108                                 if ( $rel !== '' ) {
2109                                         $rel .= ' ';
2110                                 }
2111                                 $rel .= 'noreferrer noopener';
2112                         }
2113                 }
2114                 $attribs['rel'] = $rel;
2115                 return $attribs;
2116         }
2117
2118         /**
2119          * Replace unusual escape codes in a URL with their equivalent characters
2120          *
2121          * This generally follows the syntax defined in RFC 3986, with special
2122          * consideration for HTTP query strings.
2123          *
2124          * @param string $url
2125          * @return string
2126          */
2127         public static function normalizeLinkUrl( $url ) {
2128                 # Test for RFC 3986 IPv6 syntax
2129                 $scheme = '[a-z][a-z0-9+.-]*:';
2130                 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2131                 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2132                 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2133                         IP::isValid( rawurldecode( $m[1] ) )
2134                 ) {
2135                         $isIPv6 = rawurldecode( $m[1] );
2136                 } else {
2137                         $isIPv6 = false;
2138                 }
2139
2140                 # Make sure unsafe characters are encoded
2141                 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2142                         function ( $m ) {
2143                                 return rawurlencode( $m[0] );
2144                         },
2145                         $url
2146                 );
2147
2148                 $ret = '';
2149                 $end = strlen( $url );
2150
2151                 # Fragment part - 'fragment'
2152                 $start = strpos( $url, '#' );
2153                 if ( $start !== false && $start < $end ) {
2154                         $ret = self::normalizeUrlComponent(
2155                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2156                         $end = $start;
2157                 }
2158
2159                 # Query part - 'query' minus &=+;
2160                 $start = strpos( $url, '?' );
2161                 if ( $start !== false && $start < $end ) {
2162                         $ret = self::normalizeUrlComponent(
2163                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2164                         $end = $start;
2165                 }
2166
2167                 # Scheme and path part - 'pchar'
2168                 # (we assume no userinfo or encoded colons in the host)
2169                 $ret = self::normalizeUrlComponent(
2170                         substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2171
2172                 # Fix IPv6 syntax
2173                 if ( $isIPv6 !== false ) {
2174                         $ipv6Host = "%5B({$isIPv6})%5D";
2175                         $ret = preg_replace(
2176                                 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2177                                 "$1[$2]",
2178                                 $ret
2179                         );
2180                 }
2181
2182                 return $ret;
2183         }
2184
2185         private static function normalizeUrlComponent( $component, $unsafe ) {
2186                 $callback = function ( $matches ) use ( $unsafe ) {
2187                         $char = urldecode( $matches[0] );
2188                         $ord = ord( $char );
2189                         if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2190                                 # Unescape it
2191                                 return $char;
2192                         } else {
2193                                 # Leave it escaped, but use uppercase for a-f
2194                                 return strtoupper( $matches[0] );
2195                         }
2196                 };
2197                 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2198         }
2199
2200         /**
2201          * make an image if it's allowed, either through the global
2202          * option, through the exception, or through the on-wiki whitelist
2203          *
2204          * @param string $url
2205          *
2206          * @return string
2207          */
2208         private function maybeMakeExternalImage( $url ) {
2209                 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2210                 $imagesexception = !empty( $imagesfrom );
2211                 $text = false;
2212                 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2213                 if ( $imagesexception && is_array( $imagesfrom ) ) {
2214                         $imagematch = false;
2215                         foreach ( $imagesfrom as $match ) {
2216                                 if ( strpos( $url, $match ) === 0 ) {
2217                                         $imagematch = true;
2218                                         break;
2219                                 }
2220                         }
2221                 } elseif ( $imagesexception ) {
2222                         $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2223                 } else {
2224                         $imagematch = false;
2225                 }
2226
2227                 if ( $this->mOptions->getAllowExternalImages()
2228                         || ( $imagesexception && $imagematch )
2229                 ) {
2230                         if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2231                                 # Image found
2232                                 $text = Linker::makeExternalImage( $url );
2233                         }
2234                 }
2235                 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2236                         && preg_match( self::EXT_IMAGE_REGEX, $url )
2237                 ) {
2238                         $whitelist = explode(
2239                                 "\n",
2240                                 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2241                         );
2242
2243                         foreach ( $whitelist as $entry ) {
2244                                 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2245                                 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2246                                         continue;
2247                                 }
2248                                 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2249                                         # Image matches a whitelist entry
2250                                         $text = Linker::makeExternalImage( $url );
2251                                         break;
2252                                 }
2253                         }
2254                 }
2255                 return $text;
2256         }
2257
2258         /**
2259          * Process [[ ]] wikilinks
2260          *
2261          * @param string $s
2262          *
2263          * @return string Processed text
2264          *
2265          * @private
2266          */
2267         public function replaceInternalLinks( $s ) {
2268                 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2269                 return $s;
2270         }
2271
2272         /**
2273          * Process [[ ]] wikilinks (RIL)
2274          * @param string &$s
2275          * @throws MWException
2276          * @return LinkHolderArray
2277          *
2278          * @private
2279          */
2280         public function replaceInternalLinks2( &$s ) {
2281                 static $tc = false, $e1, $e1_img;
2282                 # the % is needed to support urlencoded titles as well
2283                 if ( !$tc ) {
2284                         $tc = Title::legalChars() . '#%';
2285                         # Match a link having the form [[namespace:link|alternate]]trail
2286                         $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2287                         # Match cases where there is no "]]", which might still be images
2288                         $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2289                 }
2290
2291                 $holders = new LinkHolderArray( $this );
2292
2293                 # split the entire text string on occurrences of [[
2294                 $a = StringUtils::explode( '[[', ' ' . $s );
2295                 # get the first element (all text up to first [[), and remove the space we added
2296                 $s = $a->current();
2297                 $a->next();
2298                 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2299                 $s = substr( $s, 1 );
2300
2301                 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2302                 $e2 = null;
2303                 if ( $useLinkPrefixExtension ) {
2304                         # Match the end of a line for a word that's not followed by whitespace,
2305                         # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2306                         $charset = $this->contLang->linkPrefixCharset();
2307                         $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2308                 }
2309
2310                 if ( is_null( $this->mTitle ) ) {
2311                         throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2312                 }
2313                 $nottalk = !$this->mTitle->isTalkPage();
2314
2315                 if ( $useLinkPrefixExtension ) {
2316                         $m = [];
2317                         if ( preg_match( $e2, $s, $m ) ) {
2318                                 $first_prefix = $m[2];
2319                         } else {
2320                                 $first_prefix = false;
2321                         }
2322                 } else {
2323                         $prefix = '';
2324                 }
2325
2326                 $useSubpages = $this->areSubpagesAllowed();
2327
2328                 # Loop for each link
2329                 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2330                         # Check for excessive memory usage
2331                         if ( $holders->isBig() ) {
2332                                 # Too big
2333                                 # Do the existence check, replace the link holders and clear the array
2334                                 $holders->replace( $s );
2335                                 $holders->clear();
2336                         }
2337
2338                         if ( $useLinkPrefixExtension ) {
2339                                 if ( preg_match( $e2, $s, $m ) ) {
2340                                         list( , $s, $prefix ) = $m;
2341                                 } else {
2342                                         $prefix = '';
2343                                 }
2344                                 # first link
2345                                 if ( $first_prefix ) {
2346                                         $prefix = $first_prefix;
2347                                         $first_prefix = false;
2348                                 }
2349                         }
2350
2351                         $might_be_img = false;
2352
2353                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2354                                 $text = $m[2];
2355                                 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2356                                 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2357                                 # the real problem is with the $e1 regex
2358                                 # See T1500.
2359                                 # Still some problems for cases where the ] is meant to be outside punctuation,
2360                                 # and no image is in sight. See T4095.
2361                                 if ( $text !== ''
2362                                         && substr( $m[3], 0, 1 ) === ']'
2363                                         && strpos( $text, '[' ) !== false
2364                                 ) {
2365                                         $text .= ']'; # so that replaceExternalLinks($text) works later
2366                                         $m[3] = substr( $m[3], 1 );
2367                                 }
2368                                 # fix up urlencoded title texts
2369                                 if ( strpos( $m[1], '%' ) !== false ) {
2370                                         # Should anchors '#' also be rejected?
2371                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2372                                 }
2373                                 $trail = $m[3];
2374                         } elseif ( preg_match( $e1_img, $line, $m ) ) {
2375                                 # Invalid, but might be an image with a link in its caption
2376                                 $might_be_img = true;
2377                                 $text = $m[2];
2378                                 if ( strpos( $m[1], '%' ) !== false ) {
2379                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2380                                 }
2381                                 $trail = "";
2382                         } else { # Invalid form; output directly
2383                                 $s .= $prefix . '[[' . $line;
2384                                 continue;
2385                         }
2386
2387                         $origLink = ltrim( $m[1], ' ' );
2388
2389                         # Don't allow internal links to pages containing
2390                         # PROTO: where PROTO is a valid URL protocol; these
2391                         # should be external links.
2392                         if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2393                                 $s .= $prefix . '[[' . $line;
2394                                 continue;
2395                         }
2396
2397                         # Make subpage if necessary
2398                         if ( $useSubpages ) {
2399                                 $link = $this->maybeDoSubpageLink( $origLink, $text );
2400                         } else {
2401                                 $link = $origLink;
2402                         }
2403
2404                         // \x7f isn't a default legal title char, so most likely strip
2405                         // markers will force us into the "invalid form" path above.  But,
2406                         // just in case, let's assert that xmlish tags aren't valid in
2407                         // the title position.
2408                         $unstrip = $this->mStripState->killMarkers( $link );
2409                         $noMarkers = ( $unstrip === $link );
2410
2411                         $nt = $noMarkers ? Title::newFromText( $link ) : null;
2412                         if ( $nt === null ) {
2413                                 $s .= $prefix . '[[' . $line;
2414                                 continue;
2415                         }
2416
2417                         $ns = $nt->getNamespace();
2418                         $iw = $nt->getInterwiki();
2419
2420                         $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2421
2422                         if ( $might_be_img ) { # if this is actually an invalid link
2423                                 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2424                                         $found = false;
2425                                         while ( true ) {
2426                                                 # look at the next 'line' to see if we can close it there
2427                                                 $a->next();
2428                                                 $next_line = $a->current();
2429                                                 if ( $next_line === false || $next_line === null ) {
2430                                                         break;
2431                                                 }
2432                                                 $m = explode( ']]', $next_line, 3 );
2433                                                 if ( count( $m ) == 3 ) {
2434                                                         # the first ]] closes the inner link, the second the image
2435                                                         $found = true;
2436                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2437                                                         $trail = $m[2];
2438                                                         break;
2439                                                 } elseif ( count( $m ) == 2 ) {
2440                                                         # if there's exactly one ]] that's fine, we'll keep looking
2441                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2442                                                 } else {
2443                                                         # if $next_line is invalid too, we need look no further
2444                                                         $text .= '[[' . $next_line;
2445                                                         break;
2446                                                 }
2447                                         }
2448                                         if ( !$found ) {
2449                                                 # we couldn't find the end of this imageLink, so output it raw
2450                                                 # but don't ignore what might be perfectly normal links in the text we've examined
2451                                                 $holders->merge( $this->replaceInternalLinks2( $text ) );
2452                                                 $s .= "{$prefix}[[$link|$text";
2453                                                 # note: no $trail, because without an end, there *is* no trail
2454                                                 continue;
2455                                         }
2456                                 } else { # it's not an image, so output it raw
2457                                         $s .= "{$prefix}[[$link|$text";
2458                                         # note: no $trail, because without an end, there *is* no trail
2459                                         continue;
2460                                 }
2461                         }
2462
2463                         $wasblank = ( $text == '' );
2464                         if ( $wasblank ) {
2465                                 $text = $link;
2466                                 if ( !$noforce ) {
2467                                         # Strip off leading ':'
2468                                         $text = substr( $text, 1 );
2469                                 }
2470                         } else {
2471                                 # T6598 madness. Handle the quotes only if they come from the alternate part
2472                                 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2473                                 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2474                                 #    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2475                                 $text = $this->doQuotes( $text );
2476                         }
2477
2478                         # Link not escaped by : , create the various objects
2479                         if ( $noforce && !$nt->wasLocalInterwiki() ) {
2480                                 # Interwikis
2481                                 if (
2482                                         $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2483                                                 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2484                                                 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2485                                         )
2486                                 ) {
2487                                         # T26502: filter duplicates
2488                                         if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2489                                                 $this->mLangLinkLanguages[$iw] = true;
2490                                                 $this->mOutput->addLanguageLink( $nt->getFullText() );
2491                                         }
2492
2493                                         /**
2494                                          * Strip the whitespace interwiki links produce, see T10897
2495                                          */
2496                                         $s = rtrim( $s . $prefix ) . $trail; # T175416
2497                                         continue;
2498                                 }
2499
2500                                 if ( $ns == NS_FILE ) {
2501                                         if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2502                                                 if ( $wasblank ) {
2503                                                         # if no parameters were passed, $text
2504                                                         # becomes something like "File:Foo.png",
2505                                                         # which we don't want to pass on to the
2506                                                         # image generator
2507                                                         $text = '';
2508                                                 } else {
2509                                                         # recursively parse links inside the image caption
2510                                                         # actually, this will parse them in any other parameters, too,
2511                                                         # but it might be hard to fix that, and it doesn't matter ATM
2512                                                         $text = $this->replaceExternalLinks( $text );
2513                                                         $holders->merge( $this->replaceInternalLinks2( $text ) );
2514                                                 }
2515                                                 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2516                                                 $s .= $prefix . $this->armorLinks(
2517                                                         $this->makeImage( $nt, $text, $holders ) ) . $trail;
2518                                                 continue;
2519                                         }
2520                                 } elseif ( $ns == NS_CATEGORY ) {
2521                                         /**
2522                                          * Strip the whitespace Category links produce, see T2087
2523                                          */
2524                                         $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2525
2526                                         if ( $wasblank ) {
2527                                                 $sortkey = $this->getDefaultSort();
2528                                         } else {
2529                                                 $sortkey = $text;
2530                                         }
2531                                         $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2532                                         $sortkey = str_replace( "\n", '', $sortkey );
2533                                         $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2534                                         $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2535
2536                                         continue;
2537                                 }
2538                         }
2539
2540                         # Self-link checking. For some languages, variants of the title are checked in
2541                         # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2542                         # for linking to a different variant.
2543                         if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2544                                 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2545                                 continue;
2546                         }
2547
2548                         # NS_MEDIA is a pseudo-namespace for linking directly to a file
2549                         # @todo FIXME: Should do batch file existence checks, see comment below
2550                         if ( $ns == NS_MEDIA ) {
2551                                 # Give extensions a chance to select the file revision for us
2552                                 $options = [];
2553                                 $descQuery = false;
2554                                 Hooks::run( 'BeforeParserFetchFileAndTitle',
2555                                         [ $this, $nt, &$options, &$descQuery ] );
2556                                 # Fetch and register the file (file title may be different via hooks)
2557                                 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2558                                 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2559                                 $s .= $prefix . $this->armorLinks(
2560                                         Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2561                                 continue;
2562                         }
2563
2564                         # Some titles, such as valid special pages or files in foreign repos, should
2565                         # be shown as bluelinks even though they're not included in the page table
2566                         # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2567                         # batch file existence checks for NS_FILE and NS_MEDIA
2568                         if ( $iw == '' && $nt->isAlwaysKnown() ) {
2569                                 $this->mOutput->addLink( $nt );
2570                                 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2571                         } else {
2572                                 # Links will be added to the output link list after checking
2573                                 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2574                         }
2575                 }
2576                 return $holders;
2577         }
2578
2579         /**
2580          * Render a forced-blue link inline; protect against double expansion of
2581          * URLs if we're in a mode that prepends full URL prefixes to internal links.
2582          * Since this little disaster has to split off the trail text to avoid
2583          * breaking URLs in the following text without breaking trails on the
2584          * wiki links, it's been made into a horrible function.
2585          *
2586          * @param Title $nt
2587          * @param string $text
2588          * @param string $trail
2589          * @param string $prefix
2590          * @return string HTML-wikitext mix oh yuck
2591          */
2592         protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2593                 list( $inside, $trail ) = Linker::splitTrail( $trail );
2594
2595                 if ( $text == '' ) {
2596                         $text = htmlspecialchars( $nt->getPrefixedText() );
2597                 }
2598
2599                 $link = $this->getLinkRenderer()->makeKnownLink(
2600                         $nt, new HtmlArmor( "$prefix$text$inside" )
2601                 );
2602
2603                 return $this->armorLinks( $link ) . $trail;
2604         }
2605
2606         /**
2607          * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2608          * going to go through further parsing steps before inline URL expansion.
2609          *
2610          * Not needed quite as much as it used to be since free links are a bit
2611          * more sensible these days. But bracketed links are still an issue.
2612          *
2613          * @param string $text More-or-less HTML
2614          * @return string Less-or-more HTML with NOPARSE bits
2615          */
2616         public function armorLinks( $text ) {
2617                 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2618                         self::MARKER_PREFIX . "NOPARSE$1", $text );
2619         }
2620
2621         /**
2622          * Return true if subpage links should be expanded on this page.
2623          * @return bool
2624          */
2625         public function areSubpagesAllowed() {
2626                 # Some namespaces don't allow subpages
2627                 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2628         }
2629
2630         /**
2631          * Handle link to subpage if necessary
2632          *
2633          * @param string $target The source of the link
2634          * @param string &$text The link text, modified as necessary
2635          * @return string The full name of the link
2636          * @private
2637          */
2638         public function maybeDoSubpageLink( $target, &$text ) {
2639                 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2640         }
2641
2642         /**
2643          * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2644          *
2645          * @param string $text
2646          * @param bool $linestart Whether or not this is at the start of a line.
2647          * @private
2648          * @return string The lists rendered as HTML
2649          */
2650         public function doBlockLevels( $text, $linestart ) {
2651                 return BlockLevelPass::doBlockLevels( $text, $linestart );
2652         }
2653
2654         /**
2655          * Return value of a magic variable (like PAGENAME)
2656          *
2657          * @private
2658          *
2659          * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2660          * @param bool|PPFrame $frame
2661          *
2662          * @throws MWException
2663          * @return string
2664          */
2665         public function getVariableValue( $index, $frame = false ) {
2666                 if ( is_null( $this->mTitle ) ) {
2667                         // If no title set, bad things are going to happen
2668                         // later. Title should always be set since this
2669                         // should only be called in the middle of a parse
2670                         // operation (but the unit-tests do funky stuff)
2671                         throw new MWException( __METHOD__ . ' Should only be '
2672                                 . ' called while parsing (no title set)' );
2673                 }
2674
2675                 // Avoid PHP 7.1 warning from passing $this by reference
2676                 $parser = $this;
2677
2678                 /**
2679                  * Some of these require message or data lookups and can be
2680                  * expensive to check many times.
2681                  */
2682                 if (
2683                         Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2684                         isset( $this->mVarCache[$index] )
2685                 ) {
2686                         return $this->mVarCache[$index];
2687                 }
2688
2689                 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2690                 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2691
2692                 $pageLang = $this->getFunctionLang();
2693
2694                 switch ( $index ) {
2695                         case '!':
2696                                 $value = '|';
2697                                 break;
2698                         case 'currentmonth':
2699                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2700                                 break;
2701                         case 'currentmonth1':
2702                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2703                                 break;
2704                         case 'currentmonthname':
2705                                 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2706                                 break;
2707                         case 'currentmonthnamegen':
2708                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2709                                 break;
2710                         case 'currentmonthabbrev':
2711                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2712                                 break;
2713                         case 'currentday':
2714                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2715                                 break;
2716                         case 'currentday2':
2717                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2718                                 break;
2719                         case 'localmonth':
2720                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2721                                 break;
2722                         case 'localmonth1':
2723                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2724                                 break;
2725                         case 'localmonthname':
2726                                 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2727                                 break;
2728                         case 'localmonthnamegen':
2729                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2730                                 break;
2731                         case 'localmonthabbrev':
2732                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2733                                 break;
2734                         case 'localday':
2735                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2736                                 break;
2737                         case 'localday2':
2738                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2739                                 break;
2740                         case 'pagename':
2741                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
2742                                 break;
2743                         case 'pagenamee':
2744                                 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2745                                 break;
2746                         case 'fullpagename':
2747                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2748                                 break;
2749                         case 'fullpagenamee':
2750                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2751                                 break;
2752                         case 'subpagename':
2753                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2754                                 break;
2755                         case 'subpagenamee':
2756                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2757                                 break;
2758                         case 'rootpagename':
2759                                 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2760                                 break;
2761                         case 'rootpagenamee':
2762                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2763                                         ' ',
2764                                         '_',
2765                                         $this->mTitle->getRootText()
2766                                 ) ) );
2767                                 break;
2768                         case 'basepagename':
2769                                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2770                                 break;
2771                         case 'basepagenamee':
2772                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2773                                         ' ',
2774                                         '_',
2775                                         $this->mTitle->getBaseText()
2776                                 ) ) );
2777                                 break;
2778                         case 'talkpagename':
2779                                 if ( $this->mTitle->canHaveTalkPage() ) {
2780                                         $talkPage = $this->mTitle->getTalkPage();
2781                                         $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2782                                 } else {
2783                                         $value = '';
2784                                 }
2785                                 break;
2786                         case 'talkpagenamee':
2787                                 if ( $this->mTitle->canHaveTalkPage() ) {
2788                                         $talkPage = $this->mTitle->getTalkPage();
2789                                         $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2790                                 } else {
2791                                         $value = '';
2792                                 }
2793                                 break;
2794                         case 'subjectpagename':
2795                                 $subjPage = $this->mTitle->getSubjectPage();
2796                                 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2797                                 break;
2798                         case 'subjectpagenamee':
2799                                 $subjPage = $this->mTitle->getSubjectPage();
2800                                 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2801                                 break;
2802                         case 'pageid': // requested in T25427
2803                                 # Inform the edit saving system that getting the canonical output
2804                                 # after page insertion requires a parse that used that exact page ID
2805                                 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2806                                 $value = $this->mTitle->getArticleID();
2807                                 if ( !$value ) {
2808                                         $value = $this->mOptions->getSpeculativePageId();
2809                                         if ( $value ) {
2810                                                 $this->mOutput->setSpeculativePageIdUsed( $value );
2811                                         }
2812                                 }
2813                                 break;
2814                         case 'revisionid':
2815                                 if (
2816                                         $this->svcOptions->get( 'MiserMode' ) &&
2817                                         !$this->mOptions->getInterfaceMessage() &&
2818                                         // @TODO: disallow this word on all namespaces
2819                                         $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2820                                 ) {
2821                                         // Use a stub result instead of the actual revision ID in order to avoid
2822                                         // double parses on page save but still allow preview detection (T137900)
2823                                         if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2824                                                 $value = '-';
2825                                         } else {
2826                                                 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2827                                                 $value = '';
2828                                         }
2829                                 } else {
2830                                         # Inform the edit saving system that getting the canonical output after
2831                                         # revision insertion requires a parse that used that exact revision ID
2832                                         $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2833                                         $value = $this->getRevisionId();
2834                                         if ( $value === 0 ) {
2835                                                 $rev = $this->getRevisionObject();
2836                                                 $value = $rev ? $rev->getId() : $value;
2837                                         }
2838                                         if ( !$value ) {
2839                                                 $value = $this->mOptions->getSpeculativeRevId();
2840                                                 if ( $value ) {
2841                                                         $this->mOutput->setSpeculativeRevIdUsed( $value );
2842                                                 }
2843                                         }
2844                                 }
2845                                 break;
2846                         case 'revisionday':
2847                                 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2848                                 break;
2849                         case 'revisionday2':
2850                                 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2851                                 break;
2852                         case 'revisionmonth':
2853                                 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2854                                 break;
2855                         case 'revisionmonth1':
2856                                 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2857                                 break;
2858                         case 'revisionyear':
2859                                 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2860                                 break;
2861                         case 'revisiontimestamp':
2862                                 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2863                                 break;
2864                         case 'revisionuser':
2865                                 # Inform the edit saving system that getting the canonical output after
2866                                 # revision insertion requires a parse that used the actual user ID
2867                                 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2868                                 $value = $this->getRevisionUser();
2869                                 break;
2870                         case 'revisionsize':
2871                                 $value = $this->getRevisionSize();
2872                                 break;
2873                         case 'namespace':
2874                                 $value = str_replace( '_', ' ',
2875                                         $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2876                                 break;
2877                         case 'namespacee':
2878                                 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2879                                 break;
2880                         case 'namespacenumber':
2881                                 $value = $this->mTitle->getNamespace();
2882                                 break;
2883                         case 'talkspace':
2884                                 $value = $this->mTitle->canHaveTalkPage()
2885                                         ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2886                                         : '';
2887                                 break;
2888                         case 'talkspacee':
2889                                 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2890                                 break;
2891                         case 'subjectspace':
2892                                 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2893                                 break;
2894                         case 'subjectspacee':
2895                                 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2896                                 break;
2897                         case 'currentdayname':
2898                                 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2899                                 break;
2900                         case 'currentyear':
2901                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2902                                 break;
2903                         case 'currenttime':
2904                                 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2905                                 break;
2906                         case 'currenthour':
2907                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2908                                 break;
2909                         case 'currentweek':
2910                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2911                                 # int to remove the padding
2912                                 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2913                                 break;
2914                         case 'currentdow':
2915                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2916                                 break;
2917                         case 'localdayname':
2918                                 $value = $pageLang->getWeekdayName(
2919                                         (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2920                                 );
2921                                 break;
2922                         case 'localyear':
2923                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2924                                 break;
2925                         case 'localtime':
2926                                 $value = $pageLang->time(
2927                                         MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2928                                         false,
2929                                         false
2930                                 );
2931                                 break;
2932                         case 'localhour':
2933                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2934                                 break;
2935                         case 'localweek':
2936                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2937                                 # int to remove the padding
2938                                 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2939                                 break;
2940                         case 'localdow':
2941                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2942                                 break;
2943                         case 'numberofarticles':
2944                                 $value = $pageLang->formatNum( SiteStats::articles() );
2945                                 break;
2946                         case 'numberoffiles':
2947                                 $value = $pageLang->formatNum( SiteStats::images() );
2948                                 break;
2949                         case 'numberofusers':
2950                                 $value = $pageLang->formatNum( SiteStats::users() );
2951                                 break;
2952                         case 'numberofactiveusers':
2953                                 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2954                                 break;
2955                         case 'numberofpages':
2956                                 $value = $pageLang->formatNum( SiteStats::pages() );
2957                                 break;
2958                         case 'numberofadmins':
2959                                 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2960                                 break;
2961                         case 'numberofedits':
2962                                 $value = $pageLang->formatNum( SiteStats::edits() );
2963                                 break;
2964                         case 'currenttimestamp':
2965                                 $value = wfTimestamp( TS_MW, $ts );
2966                                 break;
2967                         case 'localtimestamp':
2968                                 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2969                                 break;
2970                         case 'currentversion':
2971                                 $value = SpecialVersion::getVersion();
2972                                 break;
2973                         case 'articlepath':
2974                                 return $this->svcOptions->get( 'ArticlePath' );
2975                         case 'sitename':
2976                                 return $this->svcOptions->get( 'Sitename' );
2977                         case 'server':
2978                                 return $this->svcOptions->get( 'Server' );
2979                         case 'servername':
2980                                 return $this->svcOptions->get( 'ServerName' );
2981                         case 'scriptpath':
2982                                 return $this->svcOptions->get( 'ScriptPath' );
2983                         case 'stylepath':
2984                                 return $this->svcOptions->get( 'StylePath' );
2985                         case 'directionmark':
2986                                 return $pageLang->getDirMark();
2987                         case 'contentlanguage':
2988                                 return $this->svcOptions->get( 'LanguageCode' );
2989                         case 'pagelanguage':
2990                                 $value = $pageLang->getCode();
2991                                 break;
2992                         case 'cascadingsources':
2993                                 $value = CoreParserFunctions::cascadingsources( $this );
2994                                 break;
2995                         default:
2996                                 $ret = null;
2997                                 Hooks::run(
2998                                         'ParserGetVariableValueSwitch',
2999                                         [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3000                                 );
3001
3002                                 return $ret;
3003                 }
3004
3005                 if ( $index ) {
3006                         $this->mVarCache[$index] = $value;
3007                 }
3008
3009                 return $value;
3010         }
3011
3012         /**
3013          * @param int $start
3014          * @param int $len
3015          * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3016          * @param string $variable Parser variable name
3017          * @return string
3018          */
3019         private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3020                 # Get the timezone-adjusted timestamp to be used for this revision
3021                 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3022                 # Possibly set vary-revision if there is not yet an associated revision
3023                 if ( !$this->getRevisionObject() ) {
3024                         # Get the timezone-adjusted timestamp $mtts seconds in the future.
3025                         # This future is relative to the current time and not that of the
3026                         # parser options. The rendered timestamp can be compared to that
3027                         # of the timestamp specified by the parser options.
3028                         $resThen = substr(
3029                                 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3030                                 $start,
3031                                 $len
3032                         );
3033
3034                         if ( $resNow !== $resThen ) {
3035                                 # Inform the edit saving system that getting the canonical output after
3036                                 # revision insertion requires a parse that used an actual revision timestamp
3037                                 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3038                         }
3039                 }
3040
3041                 return $resNow;
3042         }
3043
3044         /**
3045          * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3046          *
3047          * @private
3048          */
3049         public function initialiseVariables() {
3050                 $variableIDs = $this->magicWordFactory->getVariableIDs();
3051                 $substIDs = $this->magicWordFactory->getSubstIDs();
3052
3053                 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3054                 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3055         }
3056
3057         /**
3058          * Preprocess some wikitext and return the document tree.
3059          * This is the ghost of replace_variables().
3060          *
3061          * @param string $text The text to parse
3062          * @param int $flags Bitwise combination of:
3063          *   - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3064          *     included. Default is to assume a direct page view.
3065          *
3066          * The generated DOM tree must depend only on the input text and the flags.
3067          * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3068          *
3069          * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3070          * change in the DOM tree for a given text, must be passed through the section identifier
3071          * in the section edit link and thus back to extractSections().
3072          *
3073          * The output of this function is currently only cached in process memory, but a persistent
3074          * cache may be implemented at a later date which takes further advantage of these strict
3075          * dependency requirements.
3076          *
3077          * @return PPNode
3078          */
3079         public function preprocessToDom( $text, $flags = 0 ) {
3080                 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3081                 return $dom;
3082         }
3083
3084         /**
3085          * Return a three-element array: leading whitespace, string contents, trailing whitespace
3086          *
3087          * @param string $s
3088          *
3089          * @return array
3090          */
3091         public static function splitWhitespace( $s ) {
3092                 $ltrimmed = ltrim( $s );
3093                 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3094                 $trimmed = rtrim( $ltrimmed );
3095                 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3096                 if ( $diff > 0 ) {
3097                         $w2 = substr( $ltrimmed, -$diff );
3098                 } else {
3099                         $w2 = '';
3100                 }
3101                 return [ $w1, $trimmed, $w2 ];
3102         }
3103
3104         /**
3105          * Replace magic variables, templates, and template arguments
3106          * with the appropriate text. Templates are substituted recursively,
3107          * taking care to avoid infinite loops.
3108          *
3109          * Note that the substitution depends on value of $mOutputType:
3110          *  self::OT_WIKI: only {{subst:}} templates
3111          *  self::OT_PREPROCESS: templates but not extension tags
3112          *  self::OT_HTML: all templates and extension tags
3113          *
3114          * @param string $text The text to transform
3115          * @param false|PPFrame|array $frame Object describing the arguments passed to the
3116          *   template. Arguments may also be provided as an associative array, as
3117          *   was the usual case before MW1.12. Providing arguments this way may be
3118          *   useful for extensions wishing to perform variable replacement
3119          *   explicitly.
3120          * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3121          *   double-brace expansion.
3122          * @return string
3123          */
3124         public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3125                 # Is there any text? Also, Prevent too big inclusions!
3126                 $textSize = strlen( $text );
3127                 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3128                         return $text;
3129                 }
3130
3131                 if ( $frame === false ) {
3132                         $frame = $this->getPreprocessor()->newFrame();
3133                 } elseif ( !( $frame instanceof PPFrame ) ) {
3134                         $this->logger->debug(
3135                                 __METHOD__ . " called using plain parameters instead of " .
3136                                 "a PPFrame instance. Creating custom frame."
3137                         );
3138                         $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3139                 }
3140
3141                 $dom = $this->preprocessToDom( $text );
3142                 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3143                 $text = $frame->expand( $dom, $flags );
3144
3145                 return $text;
3146         }
3147
3148         /**
3149          * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3150          *
3151          * @param array $args
3152          *
3153          * @return array
3154          */
3155         public static function createAssocArgs( $args ) {
3156                 $assocArgs = [];
3157                 $index = 1;
3158                 foreach ( $args as $arg ) {
3159                         $eqpos = strpos( $arg, '=' );
3160                         if ( $eqpos === false ) {
3161                                 $assocArgs[$index++] = $arg;
3162                         } else {
3163                                 $name = trim( substr( $arg, 0, $eqpos ) );
3164                                 $value = trim( substr( $arg, $eqpos + 1 ) );
3165                                 if ( $value === false ) {
3166                                         $value = '';
3167                                 }
3168                                 if ( $name !== false ) {
3169                                         $assocArgs[$name] = $value;
3170                                 }
3171                         }
3172                 }
3173
3174                 return $assocArgs;
3175         }
3176
3177         /**
3178          * Warn the user when a parser limitation is reached
3179          * Will warn at most once the user per limitation type
3180          *
3181          * The results are shown during preview and run through the Parser (See EditPage.php)
3182          *
3183          * @param string $limitationType Should be one of:
3184          *   'expensive-parserfunction' (corresponding messages:
3185          *       'expensive-parserfunction-warning',
3186          *       'expensive-parserfunction-category')
3187          *   'post-expand-template-argument' (corresponding messages:
3188          *       'post-expand-template-argument-warning',
3189          *       'post-expand-template-argument-category')
3190          *   'post-expand-template-inclusion' (corresponding messages:
3191          *       'post-expand-template-inclusion-warning',
3192          *       'post-expand-template-inclusion-category')
3193          *   'node-count-exceeded' (corresponding messages:
3194          *       'node-count-exceeded-warning',
3195          *       'node-count-exceeded-category')
3196          *   'expansion-depth-exceeded' (corresponding messages:
3197          *       'expansion-depth-exceeded-warning',
3198          *       'expansion-depth-exceeded-category')
3199          * @param string|int|null $current Current value
3200          * @param string|int|null $max Maximum allowed, when an explicit limit has been
3201          *       exceeded, provide the values (optional)
3202          */
3203         public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3204                 # does no harm if $current and $max are present but are unnecessary for the message
3205                 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3206                 # only during preview, and that would split the parser cache unnecessarily.
3207                 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3208                         ->text();
3209                 $this->mOutput->addWarning( $warning );
3210                 $this->addTrackingCategory( "$limitationType-category" );
3211         }
3212
3213         /**
3214          * Return the text of a template, after recursively
3215          * replacing any variables or templates within the template.
3216          *
3217          * @param array $piece The parts of the template
3218          *   $piece['title']: the title, i.e. the part before the |
3219          *   $piece['parts']: the parameter array
3220          *   $piece['lineStart']: whether the brace was at the start of a line
3221          * @param PPFrame $frame The current frame, contains template arguments
3222          * @throws Exception
3223          * @return string|array The text of the template
3224          */
3225         public function braceSubstitution( $piece, $frame ) {
3226                 // Flags
3227
3228                 // $text has been filled
3229                 $found = false;
3230                 // wiki markup in $text should be escaped
3231                 $nowiki = false;
3232                 // $text is HTML, armour it against wikitext transformation
3233                 $isHTML = false;
3234                 // Force interwiki transclusion to be done in raw mode not rendered
3235                 $forceRawInterwiki = false;
3236                 // $text is a DOM node needing expansion in a child frame
3237                 $isChildObj = false;
3238                 // $text is a DOM node needing expansion in the current frame
3239                 $isLocalObj = false;
3240
3241                 # Title object, where $text came from
3242                 $title = false;
3243
3244                 # $part1 is the bit before the first |, and must contain only title characters.
3245                 # Various prefixes will be stripped from it later.
3246                 $titleWithSpaces = $frame->expand( $piece['title'] );
3247                 $part1 = trim( $titleWithSpaces );
3248                 $titleText = false;
3249
3250                 # Original title text preserved for various purposes
3251                 $originalTitle = $part1;
3252
3253                 # $args is a list of argument nodes, starting from index 0, not including $part1
3254                 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3255                 # below won't work b/c this $args isn't an object
3256                 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3257
3258                 $profileSection = null; // profile templates
3259
3260                 # SUBST
3261                 if ( !$found ) {
3262                         $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3263
3264                         # Possibilities for substMatch: "subst", "safesubst" or FALSE
3265                         # Decide whether to expand template or keep wikitext as-is.
3266                         if ( $this->ot['wiki'] ) {
3267                                 if ( $substMatch === false ) {
3268                                         $literal = true;  # literal when in PST with no prefix
3269                                 } else {
3270                                         $literal = false; # expand when in PST with subst: or safesubst:
3271                                 }
3272                         } else {
3273                                 if ( $substMatch == 'subst' ) {
3274                                         $literal = true;  # literal when not in PST with plain subst:
3275                                 } else {
3276                                         $literal = false; # expand when not in PST with safesubst: or no prefix
3277                                 }
3278                         }
3279                         if ( $literal ) {
3280                                 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3281                                 $isLocalObj = true;
3282                                 $found = true;
3283                         }
3284                 }
3285
3286                 # Variables
3287                 if ( !$found && $args->getLength() == 0 ) {
3288                         $id = $this->mVariables->matchStartToEnd( $part1 );
3289                         if ( $id !== false ) {
3290                                 $text = $this->getVariableValue( $id, $frame );
3291                                 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3292                                         $this->mOutput->updateCacheExpiry(
3293                                                 $this->magicWordFactory->getCacheTTL( $id ) );
3294                                 }
3295                                 $found = true;
3296                         }
3297                 }
3298
3299                 # MSG, MSGNW and RAW
3300                 if ( !$found ) {
3301                         # Check for MSGNW:
3302                         $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3303                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3304                                 $nowiki = true;
3305                         } else {
3306                                 # Remove obsolete MSG:
3307                                 $mwMsg = $this->magicWordFactory->get( 'msg' );
3308                                 $mwMsg->matchStartAndRemove( $part1 );
3309                         }
3310
3311                         # Check for RAW:
3312                         $mwRaw = $this->magicWordFactory->get( 'raw' );
3313                         if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3314                                 $forceRawInterwiki = true;
3315                         }
3316                 }
3317
3318                 # Parser functions
3319                 if ( !$found ) {
3320                         $colonPos = strpos( $part1, ':' );
3321                         if ( $colonPos !== false ) {
3322                                 $func = substr( $part1, 0, $colonPos );
3323                                 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3324                                 $argsLength = $args->getLength();
3325                                 for ( $i = 0; $i < $argsLength; $i++ ) {
3326                                         $funcArgs[] = $args->item( $i );
3327                                 }
3328
3329                                 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3330
3331                                 // Extract any forwarded flags
3332                                 if ( isset( $result['title'] ) ) {
3333                                         $title = $result['title'];
3334                                 }
3335                                 if ( isset( $result['found'] ) ) {
3336                                         $found = $result['found'];
3337                                 }
3338                                 if ( array_key_exists( 'text', $result ) ) {
3339                                         // a string or null
3340                                         $text = $result['text'];
3341                                 }
3342                                 if ( isset( $result['nowiki'] ) ) {
3343                                         $nowiki = $result['nowiki'];
3344                                 }
3345                                 if ( isset( $result['isHTML'] ) ) {
3346                                         $isHTML = $result['isHTML'];
3347                                 }
3348                                 if ( isset( $result['forceRawInterwiki'] ) ) {
3349                                         $forceRawInterwiki = $result['forceRawInterwiki'];
3350                                 }
3351                                 if ( isset( $result['isChildObj'] ) ) {
3352                                         $isChildObj = $result['isChildObj'];
3353                                 }
3354                                 if ( isset( $result['isLocalObj'] ) ) {
3355                                         $isLocalObj = $result['isLocalObj'];
3356                                 }
3357                         }
3358                 }
3359
3360                 # Finish mangling title and then check for loops.
3361                 # Set $title to a Title object and $titleText to the PDBK
3362                 if ( !$found ) {
3363                         $ns = NS_TEMPLATE;
3364                         # Split the title into page and subpage
3365                         $subpage = '';
3366                         $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3367                         if ( $part1 !== $relative ) {
3368                                 $part1 = $relative;
3369                                 $ns = $this->mTitle->getNamespace();
3370                         }
3371                         $title = Title::newFromText( $part1, $ns );
3372                         if ( $title ) {
3373                                 $titleText = $title->getPrefixedText();
3374                                 # Check for language variants if the template is not found
3375                                 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3376                                         $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3377                                 }
3378                                 # Do recursion depth check
3379                                 $limit = $this->mOptions->getMaxTemplateDepth();
3380                                 if ( $frame->depth >= $limit ) {
3381                                         $found = true;
3382                                         $text = '<span class="error">'
3383                                                 . wfMessage( 'parser-template-recursion-depth-warning' )
3384                                                         ->numParams( $limit )->inContentLanguage()->text()
3385                                                 . '</span>';
3386                                 }
3387                         }
3388                 }
3389
3390                 # Load from database
3391                 if ( !$found && $title ) {
3392                         $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3393                         if ( !$title->isExternal() ) {
3394                                 if ( $title->isSpecialPage()
3395                                         && $this->mOptions->getAllowSpecialInclusion()
3396                                         && $this->ot['html']
3397                                 ) {
3398                                         $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3399                                         // Pass the template arguments as URL parameters.
3400                                         // "uselang" will have no effect since the Language object
3401                                         // is forced to the one defined in ParserOptions.
3402                                         $pageArgs = [];
3403                                         $argsLength = $args->getLength();
3404                                         for ( $i = 0; $i < $argsLength; $i++ ) {
3405                                                 $bits = $args->item( $i )->splitArg();
3406                                                 if ( strval( $bits['index'] ) === '' ) {
3407                                                         $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3408                                                         $value = trim( $frame->expand( $bits['value'] ) );
3409                                                         $pageArgs[$name] = $value;
3410                                                 }
3411                                         }
3412
3413                                         // Create a new context to execute the special page
3414                                         $context = new RequestContext;
3415                                         $context->setTitle( $title );
3416                                         $context->setRequest( new FauxRequest( $pageArgs ) );
3417                                         if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3418                                                 $context->setUser( $this->getUser() );
3419                                         } else {
3420                                                 // If this page is cached, then we better not be per user.
3421                                                 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3422                                         }
3423                                         $context->setLanguage( $this->mOptions->getUserLangObj() );
3424                                         $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3425                                         if ( $ret ) {
3426                                                 $text = $context->getOutput()->getHTML();
3427                                                 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3428                                                 $found = true;
3429                                                 $isHTML = true;
3430                                                 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3431                                                         $this->mOutput->updateRuntimeAdaptiveExpiry(
3432                                                                 $specialPage->maxIncludeCacheTime()
3433                                                         );
3434                                                 }
3435                                         }
3436                                 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3437                                         $found = false; # access denied
3438                                         $this->logger->debug(
3439                                                 __METHOD__ .
3440                                                 ": template inclusion denied for " . $title->getPrefixedDBkey()
3441                                         );
3442                                 } else {
3443                                         list( $text, $title ) = $this->getTemplateDom( $title );
3444                                         if ( $text !== false ) {
3445                                                 $found = true;
3446                                                 $isChildObj = true;
3447                                         }
3448                                 }
3449
3450                                 # If the title is valid but undisplayable, make a link to it
3451                                 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3452                                         $text = "[[:$titleText]]";
3453                                         $found = true;
3454                                 }
3455                         } elseif ( $title->isTrans() ) {
3456                                 # Interwiki transclusion
3457                                 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3458                                         $text = $this->interwikiTransclude( $title, 'render' );
3459                                         $isHTML = true;
3460                                 } else {
3461                                         $text = $this->interwikiTransclude( $title, 'raw' );
3462                                         # Preprocess it like a template
3463                                         $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3464                                         $isChildObj = true;
3465                                 }
3466                                 $found = true;
3467                         }
3468
3469                         # Do infinite loop check
3470                         # This has to be done after redirect resolution to avoid infinite loops via redirects
3471                         if ( !$frame->loopCheck( $title ) ) {
3472                                 $found = true;
3473                                 $text = '<span class="error">'
3474                                         . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3475                                         . '</span>';
3476                                 $this->addTrackingCategory( 'template-loop-category' );
3477                                 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3478                                         wfEscapeWikiText( $titleText ) )->text() );
3479                                 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3480                         }
3481                 }
3482
3483                 # If we haven't found text to substitute by now, we're done
3484                 # Recover the source wikitext and return it
3485                 if ( !$found ) {
3486                         $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3487                         if ( $profileSection ) {
3488                                 $this->mProfiler->scopedProfileOut( $profileSection );
3489                         }
3490                         return [ 'object' => $text ];
3491                 }
3492
3493                 # Expand DOM-style return values in a child frame
3494                 if ( $isChildObj ) {
3495                         # Clean up argument array
3496                         $newFrame = $frame->newChild( $args, $title );
3497
3498                         if ( $nowiki ) {
3499                                 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3500                         } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3501                                 # Expansion is eligible for the empty-frame cache
3502                                 $text = $newFrame->cachedExpand( $titleText, $text );
3503                         } else {
3504                                 # Uncached expansion
3505                                 $text = $newFrame->expand( $text );
3506                         }
3507                 }
3508                 if ( $isLocalObj && $nowiki ) {
3509                         $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3510                         $isLocalObj = false;
3511                 }
3512
3513                 if ( $profileSection ) {
3514                         $this->mProfiler->scopedProfileOut( $profileSection );
3515                 }
3516
3517                 # Replace raw HTML by a placeholder
3518                 if ( $isHTML ) {
3519                         $text = $this->insertStripItem( $text );
3520                 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3521                         # Escape nowiki-style return values
3522                         $text = wfEscapeWikiText( $text );
3523                 } elseif ( is_string( $text )
3524                         && !$piece['lineStart']
3525                         && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3526                 ) {
3527                         # T2529: if the template begins with a table or block-level
3528                         # element, it should be treated as beginning a new line.
3529                         # This behavior is somewhat controversial.
3530                         $text = "\n" . $text;
3531                 }
3532
3533                 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3534                         # Error, oversize inclusion
3535                         if ( $titleText !== false ) {
3536                                 # Make a working, properly escaped link if possible (T25588)
3537                                 $text = "[[:$titleText]]";
3538                         } else {
3539                                 # This will probably not be a working link, but at least it may
3540                                 # provide some hint of where the problem is
3541                                 preg_replace( '/^:/', '', $originalTitle );
3542                                 $text = "[[:$originalTitle]]";
3543                         }
3544                         $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3545                                 . 'post-expand include size too large -->' );
3546                         $this->limitationWarn( 'post-expand-template-inclusion' );
3547                 }
3548
3549                 if ( $isLocalObj ) {
3550                         $ret = [ 'object' => $text ];
3551                 } else {
3552                         $ret = [ 'text' => $text ];
3553                 }
3554
3555                 return $ret;
3556         }
3557
3558         /**
3559          * Call a parser function and return an array with text and flags.
3560          *
3561          * The returned array will always contain a boolean 'found', indicating
3562          * whether the parser function was found or not. It may also contain the
3563          * following:
3564          *  text: string|object, resulting wikitext or PP DOM object
3565          *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3566          *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3567          *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3568          *  nowiki: bool, wiki markup in $text should be escaped
3569          *
3570          * @since 1.21
3571          * @param PPFrame $frame The current frame, contains template arguments
3572          * @param string $function Function name
3573          * @param array $args Arguments to the function
3574          * @throws MWException
3575          * @return array
3576          */
3577         public function callParserFunction( $frame, $function, array $args = [] ) {
3578                 # Case sensitive functions
3579                 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3580                         $function = $this->mFunctionSynonyms[1][$function];
3581                 } else {
3582                         # Case insensitive functions
3583                         $function = $this->contLang->lc( $function );
3584                         if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3585                                 $function = $this->mFunctionSynonyms[0][$function];
3586                         } else {
3587                                 return [ 'found' => false ];
3588                         }
3589                 }
3590
3591                 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3592
3593                 // Avoid PHP 7.1 warning from passing $this by reference
3594                 $parser = $this;
3595
3596                 $allArgs = [ &$parser ];
3597                 if ( $flags & self::SFH_OBJECT_ARGS ) {
3598                         # Convert arguments to PPNodes and collect for appending to $allArgs
3599                         $funcArgs = [];
3600                         foreach ( $args as $k => $v ) {
3601                                 if ( $v instanceof PPNode || $k === 0 ) {
3602                                         $funcArgs[] = $v;
3603                                 } else {
3604                                         $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3605                                 }
3606                         }
3607
3608                         # Add a frame parameter, and pass the arguments as an array
3609                         $allArgs[] = $frame;
3610                         $allArgs[] = $funcArgs;
3611                 } else {
3612                         # Convert arguments to plain text and append to $allArgs
3613                         foreach ( $args as $k => $v ) {
3614                                 if ( $v instanceof PPNode ) {
3615                                         $allArgs[] = trim( $frame->expand( $v ) );
3616                                 } elseif ( is_int( $k ) && $k >= 0 ) {
3617                                         $allArgs[] = trim( $v );
3618                                 } else {
3619                                         $allArgs[] = trim( "$k=$v" );
3620                                 }
3621                         }
3622                 }
3623
3624                 $result = $callback( ...$allArgs );
3625
3626                 # The interface for function hooks allows them to return a wikitext
3627                 # string or an array containing the string and any flags. This mungs
3628                 # things around to match what this method should return.
3629                 if ( !is_array( $result ) ) {
3630                         $result = [
3631                                 'found' => true,
3632                                 'text' => $result,
3633                         ];
3634                 } else {
3635                         if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3636                                 $result['text'] = $result[0];
3637                         }
3638                         unset( $result[0] );
3639                         $result += [
3640                                 'found' => true,
3641                         ];
3642                 }
3643
3644                 $noparse = true;
3645                 $preprocessFlags = 0;
3646                 if ( isset( $result['noparse'] ) ) {
3647                         $noparse = $result['noparse'];
3648                 }
3649                 if ( isset( $result['preprocessFlags'] ) ) {
3650                         $preprocessFlags = $result['preprocessFlags'];
3651                 }
3652
3653                 if ( !$noparse ) {
3654                         $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3655                         $result['isChildObj'] = true;
3656                 }
3657
3658                 return $result;
3659         }
3660
3661         /**
3662          * Get the semi-parsed DOM representation of a template with a given title,
3663          * and its redirect destination title. Cached.
3664          *
3665          * @param Title $title
3666          *
3667          * @return array
3668          */
3669         public function getTemplateDom( $title ) {
3670                 $cacheTitle = $title;
3671                 $titleText = $title->getPrefixedDBkey();
3672
3673                 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3674                         list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3675                         $title = Title::makeTitle( $ns, $dbk );
3676                         $titleText = $title->getPrefixedDBkey();
3677                 }
3678                 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3679                         return [ $this->mTplDomCache[$titleText], $title ];
3680                 }
3681
3682                 # Cache miss, go to the database
3683                 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3684
3685                 if ( $text === false ) {
3686                         $this->mTplDomCache[$titleText] = false;
3687                         return [ false, $title ];
3688                 }
3689
3690                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3691                 $this->mTplDomCache[$titleText] = $dom;
3692
3693                 if ( !$title->equals( $cacheTitle ) ) {
3694                         $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3695                                 [ $title->getNamespace(), $title->getDBkey() ];
3696                 }
3697
3698                 return [ $dom, $title ];
3699         }
3700
3701         /**
3702          * Fetch the current revision of a given title. Note that the revision
3703          * (and even the title) may not exist in the database, so everything
3704          * contributing to the output of the parser should use this method
3705          * where possible, rather than getting the revisions themselves. This
3706          * method also caches its results, so using it benefits performance.
3707          *
3708          * @since 1.24
3709          * @param Title $title
3710          * @return Revision
3711          */
3712         public function fetchCurrentRevisionOfTitle( $title ) {
3713                 $cacheKey = $title->getPrefixedDBkey();
3714                 if ( !$this->currentRevisionCache ) {
3715                         $this->currentRevisionCache = new MapCacheLRU( 100 );
3716                 }
3717                 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3718                         $this->currentRevisionCache->set( $cacheKey,
3719                                 // Defaults to Parser::statelessFetchRevision()
3720                                 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3721                         );
3722                 }
3723                 return $this->currentRevisionCache->get( $cacheKey );
3724         }
3725
3726         /**
3727          * @param Title $title
3728          * @return bool
3729          * @since 1.34
3730          */
3731         public function isCurrentRevisionOfTitleCached( $title ) {
3732                 return (
3733                         $this->currentRevisionCache &&
3734                         $this->currentRevisionCache->has( $title->getPrefixedText() )
3735                 );
3736         }
3737
3738         /**
3739          * Wrapper around Revision::newFromTitle to allow passing additional parameters
3740          * without passing them on to it.
3741          *
3742          * @since 1.24
3743          * @param Title $title
3744          * @param Parser|bool $parser
3745          * @return Revision|bool False if missing
3746          */
3747         public static function statelessFetchRevision( Title $title, $parser = false ) {
3748                 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3749
3750                 return $rev;
3751         }
3752
3753         /**
3754          * Fetch the unparsed text of a template and register a reference to it.
3755          * @param Title $title
3756          * @return array ( string or false, Title )
3757          */
3758         public function fetchTemplateAndTitle( $title ) {
3759                 // Defaults to Parser::statelessFetchTemplate()
3760                 $templateCb = $this->mOptions->getTemplateCallback();
3761                 $stuff = call_user_func( $templateCb, $title, $this );
3762                 $rev = $stuff['revision'] ?? null;
3763                 $text = $stuff['text'];
3764                 if ( is_string( $stuff['text'] ) ) {
3765                         // We use U+007F DELETE to distinguish strip markers from regular text
3766                         $text = strtr( $text, "\x7f", "?" );
3767                 }
3768                 $finalTitle = $stuff['finalTitle'] ?? $title;
3769                 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3770                         $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3771                         if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3772                                 // Self-transclusion; final result may change based on the new page version
3773                                 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3774                                 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3775                         }
3776                 }
3777
3778                 return [ $text, $finalTitle ];
3779         }
3780
3781         /**
3782          * Fetch the unparsed text of a template and register a reference to it.
3783          * @param Title $title
3784          * @return string|bool
3785          */
3786         public function fetchTemplate( $title ) {
3787                 return $this->fetchTemplateAndTitle( $title )[0];
3788         }
3789
3790         /**
3791          * Static function to get a template
3792          * Can be overridden via ParserOptions::setTemplateCallback().
3793          *
3794          * @param Title $title
3795          * @param bool|Parser $parser
3796          *
3797          * @return array
3798          */
3799         public static function statelessFetchTemplate( $title, $parser = false ) {
3800                 $text = $skip = false;
3801                 $finalTitle = $title;
3802                 $deps = [];
3803                 $rev = null;
3804
3805                 # Loop to fetch the article, with up to 1 redirect
3806                 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3807                         # Give extensions a chance to select the revision instead
3808                         $id = false; # Assume current
3809                         Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3810                                 [ $parser, $title, &$skip, &$id ] );
3811
3812                         if ( $skip ) {
3813                                 $text = false;
3814                                 $deps[] = [
3815                                         'title' => $title,
3816                                         'page_id' => $title->getArticleID(),
3817                                         'rev_id' => null
3818                                 ];
3819                                 break;
3820                         }
3821                         # Get the revision
3822                         if ( $id ) {
3823                                 $rev = Revision::newFromId( $id );
3824                         } elseif ( $parser ) {
3825                                 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3826                         } else {
3827                                 $rev = Revision::newFromTitle( $title );
3828                         }
3829                         $rev_id = $rev ? $rev->getId() : 0;
3830                         # If there is no current revision, there is no page
3831                         if ( $id === false && !$rev ) {
3832                                 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3833                                 $linkCache->addBadLinkObj( $title );
3834                         }
3835
3836                         $deps[] = [
3837                                 'title' => $title,
3838                                 'page_id' => $title->getArticleID(),
3839                                 'rev_id' => $rev_id
3840                         ];
3841                         if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3842                                 # We fetched a rev from a different title; register it too...
3843                                 $deps[] = [
3844                                         'title' => $rev->getTitle(),
3845                                         'page_id' => $rev->getPage(),
3846                                         'rev_id' => $rev_id
3847                                 ];
3848                         }
3849
3850                         if ( $rev ) {
3851                                 $content = $rev->getContent();
3852                                 $text = $content ? $content->getWikitextForTransclusion() : null;
3853
3854                                 Hooks::run( 'ParserFetchTemplate',
3855                                         [ $parser, $title, $rev, &$text, &$deps ] );
3856
3857                                 if ( $text === false || $text === null ) {
3858                                         $text = false;
3859                                         break;
3860                                 }
3861                         } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3862                                 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3863                                         lcfirst( $title->getText() ) )->inContentLanguage();
3864                                 if ( !$message->exists() ) {
3865                                         $text = false;
3866                                         break;
3867                                 }
3868                                 $content = $message->content();
3869                                 $text = $message->plain();
3870                         } else {
3871                                 break;
3872                         }
3873                         if ( !$content ) {
3874                                 break;
3875                         }
3876                         # Redirect?
3877                         $finalTitle = $title;
3878                         $title = $content->getRedirectTarget();
3879                 }
3880                 return [
3881                         'revision' => $rev,
3882                         'text' => $text,
3883                         'finalTitle' => $finalTitle,
3884                         'deps' => $deps
3885                 ];
3886         }
3887
3888         /**
3889          * Fetch a file and its title and register a reference to it.
3890          * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3891          * @param Title $title
3892          * @param array $options Array of options to RepoGroup::findFile
3893          * @return array ( File or false, Title of file )
3894          */
3895         public function fetchFileAndTitle( $title, $options = [] ) {
3896                 $file = $this->fetchFileNoRegister( $title, $options );
3897
3898                 $time = $file ? $file->getTimestamp() : false;
3899                 $sha1 = $file ? $file->getSha1() : false;
3900                 # Register the file as a dependency...
3901                 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3902                 if ( $file && !$title->equals( $file->getTitle() ) ) {
3903                         # Update fetched file title
3904                         $title = $file->getTitle();
3905                         $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3906                 }
3907                 return [ $file, $title ];
3908         }
3909
3910         /**
3911          * Helper function for fetchFileAndTitle.
3912          *
3913          * Also useful if you need to fetch a file but not use it yet,
3914          * for example to get the file's handler.
3915          *
3916          * @param Title $title
3917          * @param array $options Array of options to RepoGroup::findFile
3918          * @return File|bool
3919          */
3920         protected function fetchFileNoRegister( $title, $options = [] ) {
3921                 if ( isset( $options['broken'] ) ) {
3922                         $file = false; // broken thumbnail forced by hook
3923                 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3924                         $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3925                 } else { // get by (name,timestamp)
3926                         $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3927                 }
3928                 return $file;
3929         }
3930
3931         /**
3932          * Transclude an interwiki link.
3933          *
3934          * @param Title $title
3935          * @param string $action Usually one of (raw, render)
3936          *
3937          * @return string
3938          */
3939         public function interwikiTransclude( $title, $action ) {
3940                 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3941                         return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3942                 }
3943
3944                 $url = $title->getFullURL( [ 'action' => $action ] );
3945                 if ( strlen( $url ) > 1024 ) {
3946                         return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3947                 }
3948
3949                 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3950
3951                 $fname = __METHOD__;
3952                 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3953
3954                 $data = $cache->getWithSetCallback(
3955                         $cache->makeGlobalKey(
3956                                 'interwiki-transclude',
3957                                 ( $wikiId !== false ) ? $wikiId : 'external',
3958                                 sha1( $url )
3959                         ),
3960                         $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3961                         function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3962                                 $req = MWHttpRequest::factory( $url, [], $fname );
3963
3964                                 $status = $req->execute(); // Status object
3965                                 if ( !$status->isOK() ) {
3966                                         $ttl = $cache::TTL_UNCACHEABLE;
3967                                 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3968                                         $ttl = min( $cache::TTL_LAGGED, $ttl );
3969                                 }
3970
3971                                 return [
3972                                         'text' => $status->isOK() ? $req->getContent() : null,
3973                                         'code' => $req->getStatus()
3974                                 ];
3975                         },
3976                         [
3977                                 'checkKeys' => ( $wikiId !== false )
3978                                         ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3979                                         : [],
3980                                 'pcGroup' => 'interwiki-transclude:5',
3981                                 'pcTTL' => $cache::TTL_PROC_LONG
3982                         ]
3983                 );
3984
3985                 if ( is_string( $data['text'] ) ) {
3986                         $text = $data['text'];
3987                 } elseif ( $data['code'] != 200 ) {
3988                         // Though we failed to fetch the content, this status is useless.
3989                         $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3990                                 ->params( $url, $data['code'] )->inContentLanguage()->text();
3991                 } else {
3992                         $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3993                 }
3994
3995                 return $text;
3996         }
3997
3998         /**
3999          * Triple brace replacement -- used for template arguments
4000          * @private
4001          *
4002          * @param array $piece
4003          * @param PPFrame $frame
4004          *
4005          * @return array
4006          */
4007         public function argSubstitution( $piece, $frame ) {
4008                 $error = false;
4009                 $parts = $piece['parts'];
4010                 $nameWithSpaces = $frame->expand( $piece['title'] );
4011                 $argName = trim( $nameWithSpaces );
4012                 $object = false;
4013                 $text = $frame->getArgument( $argName );
4014                 if ( $text === false && $parts->getLength() > 0
4015                         && ( $this->ot['html']
4016                                 || $this->ot['pre']
4017                                 || ( $this->ot['wiki'] && $frame->isTemplate() )
4018                         )
4019                 ) {
4020                         # No match in frame, use the supplied default
4021                         $object = $parts->item( 0 )->getChildren();
4022                 }
4023                 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4024                         $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4025                         $this->limitationWarn( 'post-expand-template-argument' );
4026                 }
4027
4028                 if ( $text === false && $object === false ) {
4029                         # No match anywhere
4030                         $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4031                 }
4032                 if ( $error !== false ) {
4033                         $text .= $error;
4034                 }
4035                 if ( $object !== false ) {
4036                         $ret = [ 'object' => $object ];
4037                 } else {
4038                         $ret = [ 'text' => $text ];
4039                 }
4040
4041                 return $ret;
4042         }
4043
4044         /**
4045          * Return the text to be used for a given extension tag.
4046          * This is the ghost of strip().
4047          *
4048          * @param array $params Associative array of parameters:
4049          *     name       PPNode for the tag name
4050          *     attr       PPNode for unparsed text where tag attributes are thought to be
4051          *     attributes Optional associative array of parsed attributes
4052          *     inner      Contents of extension element
4053          *     noClose    Original text did not have a close tag
4054          * @param PPFrame $frame
4055          *
4056          * @throws MWException
4057          * @return string
4058          */
4059         public function extensionSubstitution( $params, $frame ) {
4060                 static $errorStr = '<span class="error">';
4061                 static $errorLen = 20;
4062
4063                 $name = $frame->expand( $params['name'] );
4064                 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4065                         // Probably expansion depth or node count exceeded. Just punt the
4066                         // error up.
4067                         return $name;
4068                 }
4069
4070                 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4071                 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4072                         // See above
4073                         return $attrText;
4074                 }
4075
4076                 // We can't safely check if the expansion for $content resulted in an
4077                 // error, because the content could happen to be the error string
4078                 // (T149622).
4079                 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4080
4081                 $marker = self::MARKER_PREFIX . "-$name-"
4082                         . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4083
4084                 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4085                         ( $this->ot['html'] || $this->ot['pre'] );
4086                 if ( $isFunctionTag ) {
4087                         $markerType = 'none';
4088                 } else {
4089                         $markerType = 'general';
4090                 }
4091                 if ( $this->ot['html'] || $isFunctionTag ) {
4092                         $name = strtolower( $name );
4093                         $attributes = Sanitizer::decodeTagAttributes( $attrText );
4094                         if ( isset( $params['attributes'] ) ) {
4095                                 $attributes += $params['attributes'];
4096                         }
4097
4098                         if ( isset( $this->mTagHooks[$name] ) ) {
4099                                 $output = call_user_func_array( $this->mTagHooks[$name],
4100                                         [ $content, $attributes, $this, $frame ] );
4101                         } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4102                                 list( $callback, ) = $this->mFunctionTagHooks[$name];
4103
4104                                 // Avoid PHP 7.1 warning from passing $this by reference
4105                                 $parser = $this;
4106                                 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4107                         } else {
4108                                 $output = '<span class="error">Invalid tag extension name: ' .
4109                                         htmlspecialchars( $name ) . '</span>';
4110                         }
4111
4112                         if ( is_array( $output ) ) {
4113                                 // Extract flags
4114                                 $flags = $output;
4115                                 $output = $flags[0];
4116                                 if ( isset( $flags['markerType'] ) ) {
4117                                         $markerType = $flags['markerType'];
4118                                 }
4119                         }
4120                 } else {
4121                         if ( is_null( $attrText ) ) {
4122                                 $attrText = '';
4123                         }
4124                         if ( isset( $params['attributes'] ) ) {
4125                                 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4126                                         $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4127                                                 htmlspecialchars( $attrValue ) . '"';
4128                                 }
4129                         }
4130                         if ( $content === null ) {
4131                                 $output = "<$name$attrText/>";
4132                         } else {
4133                                 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4134                                 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4135                                         // See above
4136                                         return $close;
4137                                 }
4138                                 $output = "<$name$attrText>$content$close";
4139                         }
4140                 }
4141
4142                 if ( $markerType === 'none' ) {
4143                         return $output;
4144                 } elseif ( $markerType === 'nowiki' ) {
4145                         $this->mStripState->addNoWiki( $marker, $output );
4146                 } elseif ( $markerType === 'general' ) {
4147                         $this->mStripState->addGeneral( $marker, $output );
4148                 } else {
4149                         throw new MWException( __METHOD__ . ': invalid marker type' );
4150                 }
4151                 return $marker;
4152         }
4153
4154         /**
4155          * Increment an include size counter
4156          *
4157          * @param string $type The type of expansion
4158          * @param int $size The size of the text
4159          * @return bool False if this inclusion would take it over the maximum, true otherwise
4160          */
4161         public function incrementIncludeSize( $type, $size ) {
4162                 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4163                         return false;
4164                 } else {
4165                         $this->mIncludeSizes[$type] += $size;
4166                         return true;
4167                 }
4168         }
4169
4170         /**
4171          * Increment the expensive function count
4172          *
4173          * @return bool False if the limit has been exceeded
4174          */
4175         public function incrementExpensiveFunctionCount() {
4176                 $this->mExpensiveFunctionCount++;
4177                 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4178         }
4179
4180         /**
4181          * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4182          * Fills $this->mDoubleUnderscores, returns the modified text
4183          *
4184          * @param string $text
4185          *
4186          * @return string
4187          */
4188         public function doDoubleUnderscore( $text ) {
4189                 # The position of __TOC__ needs to be recorded
4190                 $mw = $this->magicWordFactory->get( 'toc' );
4191                 if ( $mw->match( $text ) ) {
4192                         $this->mShowToc = true;
4193                         $this->mForceTocPosition = true;
4194
4195                         # Set a placeholder. At the end we'll fill it in with the TOC.
4196                         $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4197
4198                         # Only keep the first one.
4199                         $text = $mw->replace( '', $text );
4200                 }
4201
4202                 # Now match and remove the rest of them
4203                 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4204                 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4205
4206                 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4207                         $this->mOutput->mNoGallery = true;
4208                 }
4209                 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4210                         $this->mShowToc = false;
4211                 }
4212                 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4213                         && $this->mTitle->getNamespace() == NS_CATEGORY
4214                 ) {
4215                         $this->addTrackingCategory( 'hidden-category-category' );
4216                 }
4217                 # (T10068) Allow control over whether robots index a page.
4218                 # __INDEX__ always overrides __NOINDEX__, see T16899
4219                 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4220                         $this->mOutput->setIndexPolicy( 'noindex' );
4221                         $this->addTrackingCategory( 'noindex-category' );
4222                 }
4223                 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4224                         $this->mOutput->setIndexPolicy( 'index' );
4225                         $this->addTrackingCategory( 'index-category' );
4226                 }
4227
4228                 # Cache all double underscores in the database
4229                 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4230                         $this->mOutput->setProperty( $key, '' );
4231                 }
4232
4233                 return $text;
4234         }
4235
4236         /**
4237          * @see ParserOutput::addTrackingCategory()
4238          * @param string $msg Message key
4239          * @return bool Whether the addition was successful
4240          */
4241         public function addTrackingCategory( $msg ) {
4242                 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4243         }
4244
4245         /**
4246          * This function accomplishes several tasks:
4247          * 1) Auto-number headings if that option is enabled
4248          * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4249          * 3) Add a Table of contents on the top for users who have enabled the option
4250          * 4) Auto-anchor headings
4251          *
4252          * It loops through all headlines, collects the necessary data, then splits up the
4253          * string and re-inserts the newly formatted headlines.
4254          *
4255          * @param string $text
4256          * @param string $origText Original, untouched wikitext
4257          * @param bool $isMain
4258          * @return mixed|string
4259          * @private
4260          */
4261         public function formatHeadings( $text, $origText, $isMain = true ) {
4262                 # Inhibit editsection links if requested in the page
4263                 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4264                         $maybeShowEditLink = false;
4265                 } else {
4266                         $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4267                 }
4268
4269                 # Get all headlines for numbering them and adding funky stuff like [edit]
4270                 # links - this is for later, but we need the number of headlines right now
4271                 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4272                 # be trimmed here since whitespace in HTML headings is significant.
4273                 $matches = [];
4274                 $numMatches = preg_match_all(
4275                         '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4276                         $text,
4277                         $matches
4278                 );
4279
4280                 # if there are fewer than 4 headlines in the article, do not show TOC
4281                 # unless it's been explicitly enabled.
4282                 $enoughToc = $this->mShowToc &&
4283                         ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4284
4285                 # Allow user to stipulate that a page should have a "new section"
4286                 # link added via __NEWSECTIONLINK__
4287                 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4288                         $this->mOutput->setNewSection( true );
4289                 }
4290
4291                 # Allow user to remove the "new section"
4292                 # link via __NONEWSECTIONLINK__
4293                 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4294                         $this->mOutput->hideNewSection( true );
4295                 }
4296
4297                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4298                 # override above conditions and always show TOC above first header
4299                 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4300                         $this->mShowToc = true;
4301                         $enoughToc = true;
4302                 }
4303
4304                 # headline counter
4305                 $headlineCount = 0;
4306                 $numVisible = 0;
4307
4308                 # Ugh .. the TOC should have neat indentation levels which can be
4309                 # passed to the skin functions. These are determined here
4310                 $toc = '';
4311                 $full = '';
4312                 $head = [];
4313                 $sublevelCount = [];
4314                 $levelCount = [];
4315                 $level = 0;
4316                 $prevlevel = 0;
4317                 $toclevel = 0;
4318                 $prevtoclevel = 0;
4319                 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4320                 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4321                 $oldType = $this->mOutputType;
4322                 $this->setOutputType( self::OT_WIKI );
4323                 $frame = $this->getPreprocessor()->newFrame();
4324                 $root = $this->preprocessToDom( $origText );
4325                 $node = $root->getFirstChild();
4326                 $byteOffset = 0;
4327                 $tocraw = [];
4328                 $refers = [];
4329
4330                 $headlines = $numMatches !== false ? $matches[3] : [];
4331
4332                 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4333                 foreach ( $headlines as $headline ) {
4334                         $isTemplate = false;
4335                         $titleText = false;
4336                         $sectionIndex = false;
4337                         $numbering = '';
4338                         $markerMatches = [];
4339                         if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4340                                 $serial = $markerMatches[1];
4341                                 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4342                                 $isTemplate = ( $titleText != $baseTitleText );
4343                                 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4344                         }
4345
4346                         if ( $toclevel ) {
4347                                 $prevlevel = $level;
4348                         }
4349                         $level = $matches[1][$headlineCount];
4350
4351                         if ( $level > $prevlevel ) {
4352                                 # Increase TOC level
4353                                 $toclevel++;
4354                                 $sublevelCount[$toclevel] = 0;
4355                                 if ( $toclevel < $maxTocLevel ) {
4356                                         $prevtoclevel = $toclevel;
4357                                         $toc .= Linker::tocIndent();
4358                                         $numVisible++;
4359                                 }
4360                         } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4361                                 # Decrease TOC level, find level to jump to
4362
4363                                 for ( $i = $toclevel; $i > 0; $i-- ) {
4364                                         if ( $levelCount[$i] == $level ) {
4365                                                 # Found last matching level
4366                                                 $toclevel = $i;
4367                                                 break;
4368                                         } elseif ( $levelCount[$i] < $level ) {
4369                                                 # Found first matching level below current level
4370                                                 $toclevel = $i + 1;
4371                                                 break;
4372                                         }
4373                                 }
4374                                 if ( $i == 0 ) {
4375                                         $toclevel = 1;
4376                                 }
4377                                 if ( $toclevel < $maxTocLevel ) {
4378                                         if ( $prevtoclevel < $maxTocLevel ) {
4379                                                 # Unindent only if the previous toc level was shown :p
4380                                                 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4381                                                 $prevtoclevel = $toclevel;
4382                                         } else {
4383                                                 $toc .= Linker::tocLineEnd();
4384                                         }
4385                                 }
4386                         } else {
4387                                 # No change in level, end TOC line
4388                                 if ( $toclevel < $maxTocLevel ) {
4389                                         $toc .= Linker::tocLineEnd();
4390                                 }
4391                         }
4392
4393                         $levelCount[$toclevel] = $level;
4394
4395                         # count number of headlines for each level
4396                         $sublevelCount[$toclevel]++;
4397                         $dot = 0;
4398                         for ( $i = 1; $i <= $toclevel; $i++ ) {
4399                                 if ( !empty( $sublevelCount[$i] ) ) {
4400                                         if ( $dot ) {
4401                                                 $numbering .= '.';
4402                                         }
4403                                         $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4404                                         $dot = 1;
4405                                 }
4406                         }
4407
4408                         # The safe header is a version of the header text safe to use for links
4409
4410                         # Remove link placeholders by the link text.
4411                         #     <!--LINK number-->
4412                         # turns into
4413                         #     link text with suffix
4414                         # Do this before unstrip since link text can contain strip markers
4415                         $safeHeadline = $this->replaceLinkHoldersText( $headline );
4416
4417                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4418                         $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4419
4420                         # Remove any <style> or <script> tags (T198618)
4421                         $safeHeadline = preg_replace(
4422                                 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4423                                 '',
4424                                 $safeHeadline
4425                         );
4426
4427                         # Strip out HTML (first regex removes any tag not allowed)
4428                         # Allowed tags are:
4429                         # * <sup> and <sub> (T10393)
4430                         # * <i> (T28375)
4431                         # * <b> (r105284)
4432                         # * <bdi> (T74884)
4433                         # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4434                         # * <s> and <strike> (T35715)
4435                         # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4436                         # to allow setting directionality in toc items.
4437                         $tocline = preg_replace(
4438                                 [
4439                                         '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4440                                         '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4441                                 ],
4442                                 [ '', '<$1>' ],
4443                                 $safeHeadline
4444                         );
4445
4446                         # Strip '<span></span>', which is the result from the above if
4447                         # <span id="foo"></span> is used to produce an additional anchor
4448                         # for a section.
4449                         $tocline = str_replace( '<span></span>', '', $tocline );
4450
4451                         $tocline = trim( $tocline );
4452
4453                         # For the anchor, strip out HTML-y stuff period
4454                         $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4455                         $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4456
4457                         # Save headline for section edit hint before it's escaped
4458                         $headlineHint = $safeHeadline;
4459
4460                         # Decode HTML entities
4461                         $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4462
4463                         $safeHeadline = self::normalizeSectionName( $safeHeadline );
4464
4465                         $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4466                         $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4467                         $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4468                         if ( $fallbackHeadline === $safeHeadline ) {
4469                                 # No reason to have both (in fact, we can't)
4470                                 $fallbackHeadline = false;
4471                         }
4472
4473                         # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4474                         # @todo FIXME: We may be changing them depending on the current locale.
4475                         $arrayKey = strtolower( $safeHeadline );
4476                         if ( $fallbackHeadline === false ) {
4477                                 $fallbackArrayKey = false;
4478                         } else {
4479                                 $fallbackArrayKey = strtolower( $fallbackHeadline );
4480                         }
4481
4482                         # Create the anchor for linking from the TOC to the section
4483                         $anchor = $safeHeadline;
4484                         $fallbackAnchor = $fallbackHeadline;
4485                         if ( isset( $refers[$arrayKey] ) ) {
4486                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4487                                 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4488                                 $anchor .= "_$i";
4489                                 $linkAnchor .= "_$i";
4490                                 $refers["${arrayKey}_$i"] = true;
4491                         } else {
4492                                 $refers[$arrayKey] = true;
4493                         }
4494                         if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4495                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4496                                 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4497                                 $fallbackAnchor .= "_$i";
4498                                 $refers["${fallbackArrayKey}_$i"] = true;
4499                         } else {
4500                                 $refers[$fallbackArrayKey] = true;
4501                         }
4502
4503                         # Don't number the heading if it is the only one (looks silly)
4504                         if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4505                                 # the two are different if the line contains a link
4506                                 $headline = Html::element(
4507                                         'span',
4508                                         [ 'class' => 'mw-headline-number' ],
4509                                         $numbering
4510                                 ) . ' ' . $headline;
4511                         }
4512
4513                         if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4514                                 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4515                                         $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4516                         }
4517
4518                         # Add the section to the section tree
4519                         # Find the DOM node for this header
4520                         $noOffset = ( $isTemplate || $sectionIndex === false );
4521                         while ( $node && !$noOffset ) {
4522                                 if ( $node->getName() === 'h' ) {
4523                                         $bits = $node->splitHeading();
4524                                         if ( $bits['i'] == $sectionIndex ) {
4525                                                 break;
4526                                         }
4527                                 }
4528                                 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4529                                         $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4530                                 $node = $node->getNextSibling();
4531                         }
4532                         $tocraw[] = [
4533                                 'toclevel' => $toclevel,
4534                                 'level' => $level,
4535                                 'line' => $tocline,
4536                                 'number' => $numbering,
4537                                 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4538                                 'fromtitle' => $titleText,
4539                                 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4540                                 'anchor' => $anchor,
4541                         ];
4542
4543                         # give headline the correct <h#> tag
4544                         if ( $maybeShowEditLink && $sectionIndex !== false ) {
4545                                 // Output edit section links as markers with styles that can be customized by skins
4546                                 if ( $isTemplate ) {
4547                                         # Put a T flag in the section identifier, to indicate to extractSections()
4548                                         # that sections inside <includeonly> should be counted.
4549                                         $editsectionPage = $titleText;
4550                                         $editsectionSection = "T-$sectionIndex";
4551                                         $editsectionContent = null;
4552                                 } else {
4553                                         $editsectionPage = $this->mTitle->getPrefixedText();
4554                                         $editsectionSection = $sectionIndex;
4555                                         $editsectionContent = $headlineHint;
4556                                 }
4557                                 // We use a bit of pesudo-xml for editsection markers. The
4558                                 // language converter is run later on. Using a UNIQ style marker
4559                                 // leads to the converter screwing up the tokens when it
4560                                 // converts stuff. And trying to insert strip tags fails too. At
4561                                 // this point all real inputted tags have already been escaped,
4562                                 // so we don't have to worry about a user trying to input one of
4563                                 // these markers directly. We use a page and section attribute
4564                                 // to stop the language converter from converting these
4565                                 // important bits of data, but put the headline hint inside a
4566                                 // content block because the language converter is supposed to
4567                                 // be able to convert that piece of data.
4568                                 // Gets replaced with html in ParserOutput::getText
4569                                 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4570                                 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4571                                 if ( $editsectionContent !== null ) {
4572                                         $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4573                                 } else {
4574                                         $editlink .= '/>';
4575                                 }
4576                         } else {
4577                                 $editlink = '';
4578                         }
4579                         $head[$headlineCount] = Linker::makeHeadline( $level,
4580                                 $matches['attrib'][$headlineCount], $anchor, $headline,
4581                                 $editlink, $fallbackAnchor );
4582
4583                         $headlineCount++;
4584                 }
4585
4586                 $this->setOutputType( $oldType );
4587
4588                 # Never ever show TOC if no headers
4589                 if ( $numVisible < 1 ) {
4590                         $enoughToc = false;
4591                 }
4592
4593                 if ( $enoughToc ) {
4594                         if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4595                                 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4596                         }
4597                         $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4598                         $this->mOutput->setTOCHTML( $toc );
4599                         $toc = self::TOC_START . $toc . self::TOC_END;
4600                 }
4601
4602                 if ( $isMain ) {
4603                         $this->mOutput->setSections( $tocraw );
4604                 }
4605
4606                 # split up and insert constructed headlines
4607                 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4608                 $i = 0;
4609
4610                 // build an array of document sections
4611                 $sections = [];
4612                 foreach ( $blocks as $block ) {
4613                         // $head is zero-based, sections aren't.
4614                         if ( empty( $head[$i - 1] ) ) {
4615                                 $sections[$i] = $block;
4616                         } else {
4617                                 $sections[$i] = $head[$i - 1] . $block;
4618                         }
4619
4620                         /**
4621                          * Send a hook, one per section.
4622                          * The idea here is to be able to make section-level DIVs, but to do so in a
4623                          * lower-impact, more correct way than r50769
4624                          *
4625                          * $this : caller
4626                          * $section : the section number
4627                          * &$sectionContent : ref to the content of the section
4628                          * $maybeShowEditLinks : boolean describing whether this section has an edit link
4629                          */
4630                         Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4631
4632                         $i++;
4633                 }
4634
4635                 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4636                         // append the TOC at the beginning
4637                         // Top anchor now in skin
4638                         $sections[0] .= $toc . "\n";
4639                 }
4640
4641                 $full .= implode( '', $sections );
4642
4643                 if ( $this->mForceTocPosition ) {
4644                         return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4645                 } else {
4646                         return $full;
4647                 }
4648         }
4649
4650         /**
4651          * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4652          * conversion, substituting signatures, {{subst:}} templates, etc.
4653          *
4654          * @param string $text The text to transform
4655          * @param Title $title The Title object for the current article
4656          * @param User $user The User object describing the current user
4657          * @param ParserOptions $options Parsing options
4658          * @param bool $clearState Whether to clear the parser state first
4659          * @return string The altered wiki markup
4660          */
4661         public function preSaveTransform( $text, Title $title, User $user,
4662                 ParserOptions $options, $clearState = true
4663         ) {
4664                 if ( $clearState ) {
4665                         $magicScopeVariable = $this->lock();
4666                 }
4667                 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4668                 $this->setUser( $user );
4669
4670                 // Strip U+0000 NULL (T159174)
4671                 $text = str_replace( "\000", '', $text );
4672
4673                 // We still normalize line endings for backwards-compatibility
4674                 // with other code that just calls PST, but this should already
4675                 // be handled in TextContent subclasses
4676                 $text = TextContent::normalizeLineEndings( $text );
4677
4678                 if ( $options->getPreSaveTransform() ) {
4679                         $text = $this->pstPass2( $text, $user );
4680                 }
4681                 $text = $this->mStripState->unstripBoth( $text );
4682
4683                 $this->setUser( null ); # Reset
4684
4685                 return $text;
4686         }
4687
4688         /**
4689          * Pre-save transform helper function
4690          *
4691          * @param string $text
4692          * @param User $user
4693          *
4694          * @return string
4695          */
4696         private function pstPass2( $text, $user ) {
4697                 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4698                 # $this->contLang here in order to give everyone the same signature and use the default one
4699                 # rather than the one selected in each user's preferences.  (see also T14815)
4700                 $ts = $this->mOptions->getTimestamp();
4701                 $timestamp = MWTimestamp::getLocalInstance( $ts );
4702                 $ts = $timestamp->format( 'YmdHis' );
4703                 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4704
4705                 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4706
4707                 # Variable replacement
4708                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4709                 $text = $this->replaceVariables( $text );
4710
4711                 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4712                 # which may corrupt this parser instance via its wfMessage()->text() call-
4713
4714                 # Signatures
4715                 if ( strpos( $text, '~~~' ) !== false ) {
4716                         $sigText = $this->getUserSig( $user );
4717                         $text = strtr( $text, [
4718                                 '~~~~~' => $d,
4719                                 '~~~~' => "$sigText $d",
4720                                 '~~~' => $sigText
4721                         ] );
4722                         # The main two signature forms used above are time-sensitive
4723                         $this->setOutputFlag( 'user-signature', 'User signature detected' );
4724                 }
4725
4726                 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4727                 $tc = '[' . Title::legalChars() . ']';
4728                 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4729
4730                 // [[ns:page (context)|]]
4731                 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4732                 // [[ns:page（context）|]] (double-width brackets, added in r40257)
4733                 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?（$tc+）)\\|]]/";
4734                 // [[ns:page (context), context|]] (using either single or double-width comma)
4735                 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |，)$tc+|)\\|]]/";
4736                 // [[|page]] (reverse pipe trick: add context from page title)
4737                 $p2 = "/\[\[\\|($tc+)]]/";
4738
4739                 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4740                 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4741                 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4742                 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4743
4744                 $t = $this->mTitle->getText();
4745                 $m = [];
4746                 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4747                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4748                 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4749                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4750                 } else {
4751                         # if there's no context, don't bother duplicating the title
4752                         $text = preg_replace( $p2, '[[\\1]]', $text );
4753                 }
4754
4755                 return $text;
4756         }
4757
4758         /**
4759          * Fetch the user's signature text, if any, and normalize to
4760          * validated, ready-to-insert wikitext.
4761          * If you have pre-fetched the nickname or the fancySig option, you can
4762          * specify them here to save a database query.
4763          * Do not reuse this parser instance after calling getUserSig(),
4764          * as it may have changed.
4765          *
4766          * @param User &$user
4767          * @param string|bool $nickname Nickname to use or false to use user's default nickname
4768          * @param bool|null $fancySig whether the nicknname is the complete signature
4769          *    or null to use default value
4770          * @return string
4771          */
4772         public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4773                 $username = $user->getName();
4774
4775                 # If not given, retrieve from the user object.
4776                 if ( $nickname === false ) {
4777                         $nickname = $user->getOption( 'nickname' );
4778                 }
4779
4780                 if ( is_null( $fancySig ) ) {
4781                         $fancySig = $user->getBoolOption( 'fancysig' );
4782                 }
4783
4784                 $nickname = $nickname == null ? $username : $nickname;
4785
4786                 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4787                         $nickname = $username;
4788                         $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4789                 } elseif ( $fancySig !== false ) {
4790                         # Sig. might contain markup; validate this
4791                         if ( $this->validateSig( $nickname ) !== false ) {
4792                                 # Validated; clean up (if needed) and return it
4793                                 return $this->cleanSig( $nickname, true );
4794                         } else {
4795                                 # Failed to validate; fall back to the default
4796                                 $nickname = $username;
4797                                 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4798                         }
4799                 }
4800
4801                 # Make sure nickname doesnt get a sig in a sig
4802                 $nickname = self::cleanSigInSig( $nickname );
4803
4804                 # If we're still here, make it a link to the user page
4805                 $userText = wfEscapeWikiText( $username );
4806                 $nickText = wfEscapeWikiText( $nickname );
4807                 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4808
4809                 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4810                         ->title( $this->getTitle() )->text();
4811         }
4812
4813         /**
4814          * Check that the user's signature contains no bad XML
4815          *
4816          * @param string $text
4817          * @return string|bool An expanded string, or false if invalid.
4818          */
4819         public function validateSig( $text ) {
4820                 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4821         }
4822
4823         /**
4824          * Clean up signature text
4825          *
4826          * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4827          * 2) Substitute all transclusions
4828          *
4829          * @param string $text
4830          * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4831          * @return string Signature text
4832          */
4833         public function cleanSig( $text, $parsing = false ) {
4834                 if ( !$parsing ) {
4835                         global $wgTitle;
4836                         $magicScopeVariable = $this->lock();
4837                         $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4838                 }
4839
4840                 # Option to disable this feature
4841                 if ( !$this->mOptions->getCleanSignatures() ) {
4842                         return $text;
4843                 }
4844
4845                 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4846                 #  => Move this logic to braceSubstitution()
4847                 $substWord = $this->magicWordFactory->get( 'subst' );
4848                 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4849                 $substText = '{{' . $substWord->getSynonym( 0 );
4850
4851                 $text = preg_replace( $substRegex, $substText, $text );
4852                 $text = self::cleanSigInSig( $text );
4853                 $dom = $this->preprocessToDom( $text );
4854                 $frame = $this->getPreprocessor()->newFrame();
4855                 $text = $frame->expand( $dom );
4856
4857                 if ( !$parsing ) {
4858                         $text = $this->mStripState->unstripBoth( $text );
4859                 }
4860
4861                 return $text;
4862         }
4863
4864         /**
4865          * Strip 3, 4 or 5 tildes out of signatures.
4866          *
4867          * @param string $text
4868          * @return string Signature text with /~{3,5}/ removed
4869          */
4870         public static function cleanSigInSig( $text ) {
4871                 $text = preg_replace( '/~{3,5}/', '', $text );
4872                 return $text;
4873         }
4874
4875         /**
4876          * Set up some variables which are usually set up in parse()
4877          * so that an external function can call some class members with confidence
4878          *
4879          * @param Title|null $title
4880          * @param ParserOptions $options
4881          * @param int $outputType
4882          * @param bool $clearState
4883          * @param int|null $revId
4884          */
4885         public function startExternalParse( Title $title = null, ParserOptions $options,
4886                 $outputType, $clearState = true, $revId = null
4887         ) {
4888                 $this->startParse( $title, $options, $outputType, $clearState );
4889                 if ( $revId !== null ) {
4890                         $this->mRevisionId = $revId;
4891                 }
4892         }
4893
4894         /**
4895          * @param Title|null $title
4896          * @param ParserOptions $options
4897          * @param int $outputType
4898          * @param bool $clearState
4899          */
4900         private function startParse( Title $title = null, ParserOptions $options,
4901                 $outputType, $clearState = true
4902         ) {
4903                 $this->setTitle( $title );
4904                 $this->mOptions = $options;
4905                 $this->setOutputType( $outputType );
4906                 if ( $clearState ) {
4907                         $this->clearState();
4908                 }
4909         }
4910
4911         /**
4912          * Wrapper for preprocess()
4913          *
4914          * @param string $text The text to preprocess
4915          * @param ParserOptions $options
4916          * @param Title|null $title Title object or null to use $wgTitle
4917          * @return string
4918          */
4919         public function transformMsg( $text, $options, $title = null ) {
4920                 static $executing = false;
4921
4922                 # Guard against infinite recursion
4923                 if ( $executing ) {
4924                         return $text;
4925                 }
4926                 $executing = true;
4927
4928                 if ( !$title ) {
4929                         global $wgTitle;
4930                         $title = $wgTitle;
4931                 }
4932
4933                 $text = $this->preprocess( $text, $title, $options );
4934
4935                 $executing = false;
4936                 return $text;
4937         }
4938
4939         /**
4940          * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4941          * The callback should have the following form:
4942          *    function myParserHook( $text, $params, $parser, $frame ) { ... }
4943          *
4944          * Transform and return $text. Use $parser for any required context, e.g. use
4945          * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4946          *
4947          * Hooks may return extended information by returning an array, of which the
4948          * first numbered element (index 0) must be the return string, and all other
4949          * entries are extracted into local variables within an internal function
4950          * in the Parser class.
4951          *
4952          * This interface (introduced r61913) appears to be undocumented, but
4953          * 'markerType' is used by some core tag hooks to override which strip
4954          * array their results are placed in. **Use great caution if attempting
4955          * this interface, as it is not documented and injudicious use could smash
4956          * private variables.**
4957          *
4958          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4959          * @param callable $callback The callback function (and object) to use for the tag
4960          * @throws MWException
4961          * @return callable|null The old value of the mTagHooks array associated with the hook
4962          */
4963         public function setHook( $tag, callable $callback ) {
4964                 $tag = strtolower( $tag );
4965                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4966                         throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4967                 }
4968                 $oldVal = $this->mTagHooks[$tag] ?? null;
4969                 $this->mTagHooks[$tag] = $callback;
4970                 if ( !in_array( $tag, $this->mStripList ) ) {
4971                         $this->mStripList[] = $tag;
4972                 }
4973
4974                 return $oldVal;
4975         }
4976
4977         /**
4978          * As setHook(), but letting the contents be parsed.
4979          *
4980          * Transparent tag hooks are like regular XML-style tag hooks, except they
4981          * operate late in the transformation sequence, on HTML instead of wikitext.
4982          *
4983          * This is probably obsoleted by things dealing with parser frames?
4984          * The only extension currently using it is geoserver.
4985          *
4986          * @since 1.10
4987          * @todo better document or deprecate this
4988          *
4989          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4990          * @param callable $callback The callback function (and object) to use for the tag
4991          * @throws MWException
4992          * @return callable|null The old value of the mTagHooks array associated with the hook
4993          */
4994         public function setTransparentTagHook( $tag, callable $callback ) {
4995                 $tag = strtolower( $tag );
4996                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4997                         throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4998                 }
4999                 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5000                 $this->mTransparentTagHooks[$tag] = $callback;
5001
5002                 return $oldVal;
5003         }
5004
5005         /**
5006          * Remove all tag hooks
5007          */
5008         public function clearTagHooks() {
5009                 $this->mTagHooks = [];
5010                 $this->mFunctionTagHooks = [];
5011                 $this->mStripList = $this->mDefaultStripList;
5012         }
5013
5014         /**
5015          * Create a function, e.g. {{sum:1|2|3}}
5016          * The callback function should have the form:
5017          *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5018          *
5019          * Or with Parser::SFH_OBJECT_ARGS:
5020          *    function myParserFunction( $parser, $frame, $args ) { ... }
5021          *
5022          * The callback may either return the text result of the function, or an array with the text
5023          * in element 0, and a number of flags in the other elements. The names of the flags are
5024          * specified in the keys. Valid flags are:
5025          *   found                     The text returned is valid, stop processing the template. This
5026          *                             is on by default.
5027          *   nowiki                    Wiki markup in the return value should be escaped
5028          *   isHTML                    The returned text is HTML, armour it against wikitext transformation
5029          *
5030          * @param string $id The magic word ID
5031          * @param callable $callback The callback function (and object) to use
5032          * @param int $flags A combination of the following flags:
5033          *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5034          *
5035          *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
5036          *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5037          *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
5038          *     the arguments, and to control the way they are expanded.
5039          *
5040          *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5041          *     arguments, for instance:
5042          *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5043          *
5044          *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5045          *     future versions. Please call $frame->expand() on it anyway so that your code keeps
5046          *     working if/when this is changed.
5047          *
5048          *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5049          *     expansion.
5050          *
5051          *     Please read the documentation in includes/parser/Preprocessor.php for more information
5052          *     about the methods available in PPFrame and PPNode.
5053          *
5054          * @throws MWException
5055          * @return string|callable The old callback function for this name, if any
5056          */
5057         public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5058                 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5059                 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5060
5061                 # Add to function cache
5062                 $mw = $this->magicWordFactory->get( $id );
5063                 if ( !$mw ) {
5064                         throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5065                 }
5066
5067                 $synonyms = $mw->getSynonyms();
5068                 $sensitive = intval( $mw->isCaseSensitive() );
5069
5070                 foreach ( $synonyms as $syn ) {
5071                         # Case
5072                         if ( !$sensitive ) {
5073                                 $syn = $this->contLang->lc( $syn );
5074                         }
5075                         # Add leading hash
5076                         if ( !( $flags & self::SFH_NO_HASH ) ) {
5077                                 $syn = '#' . $syn;
5078                         }
5079                         # Remove trailing colon
5080                         if ( substr( $syn, -1, 1 ) === ':' ) {
5081                                 $syn = substr( $syn, 0, -1 );
5082                         }
5083                         $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5084                 }
5085                 return $oldVal;
5086         }
5087
5088         /**
5089          * Get all registered function hook identifiers
5090          *
5091          * @return array
5092          */
5093         public function getFunctionHooks() {
5094                 $this->firstCallInit();
5095                 return array_keys( $this->mFunctionHooks );
5096         }
5097
5098         /**
5099          * Create a tag function, e.g. "<test>some stuff</test>".
5100          * Unlike tag hooks, tag functions are parsed at preprocessor level.
5101          * Unlike parser functions, their content is not preprocessed.
5102          * @param string $tag
5103          * @param callable $callback
5104          * @param int $flags
5105          * @throws MWException
5106          * @return null
5107          */
5108         public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5109                 $tag = strtolower( $tag );
5110                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5111                         throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5112                 }
5113                 $old = $this->mFunctionTagHooks[$tag] ?? null;
5114                 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5115
5116                 if ( !in_array( $tag, $this->mStripList ) ) {
5117                         $this->mStripList[] = $tag;
5118                 }
5119
5120                 return $old;
5121         }
5122
5123         /**
5124          * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5125          * Placeholders created in Linker::link()
5126          *
5127          * @param string &$text
5128          * @param int $options
5129          */
5130         public function replaceLinkHolders( &$text, $options = 0 ) {
5131                 $this->mLinkHolders->replace( $text );
5132         }
5133
5134         /**
5135          * Replace "<!--LINK-->" link placeholders with plain text of links
5136          * (not HTML-formatted).
5137          *
5138          * @param string $text
5139          * @return string
5140          */
5141         public function replaceLinkHoldersText( $text ) {
5142                 return $this->mLinkHolders->replaceText( $text );
5143         }
5144
5145         /**
5146          * Renders an image gallery from a text with one line per image.
5147          * text labels may be given by using |-style alternative text. E.g.
5148          *   Image:one.jpg|The number "1"
5149          *   Image:tree.jpg|A tree
5150          * given as text will return the HTML of a gallery with two images,
5151          * labeled 'The number "1"' and
5152          * 'A tree'.
5153          *
5154          * @param string $text
5155          * @param array $params
5156          * @return string HTML
5157          */
5158         public function renderImageGallery( $text, $params ) {
5159                 $mode = false;
5160                 if ( isset( $params['mode'] ) ) {
5161                         $mode = $params['mode'];
5162                 }
5163
5164                 try {
5165                         $ig = ImageGalleryBase::factory( $mode );
5166                 } catch ( Exception $e ) {
5167                         // If invalid type set, fallback to default.
5168                         $ig = ImageGalleryBase::factory( false );
5169                 }
5170
5171                 $ig->setContextTitle( $this->mTitle );
5172                 $ig->setShowBytes( false );
5173                 $ig->setShowDimensions( false );
5174                 $ig->setShowFilename( false );
5175                 $ig->setParser( $this );
5176                 $ig->setHideBadImages();
5177                 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5178
5179                 if ( isset( $params['showfilename'] ) ) {
5180                         $ig->setShowFilename( true );
5181                 } else {
5182                         $ig->setShowFilename( false );
5183                 }
5184                 if ( isset( $params['caption'] ) ) {
5185                         // NOTE: We aren't passing a frame here or below.  Frame info
5186                         // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5187                         // See T107332#4030581
5188                         $caption = $this->recursiveTagParse( $params['caption'] );
5189                         $ig->setCaptionHtml( $caption );
5190                 }
5191                 if ( isset( $params['perrow'] ) ) {
5192                         $ig->setPerRow( $params['perrow'] );
5193                 }
5194                 if ( isset( $params['widths'] ) ) {
5195                         $ig->setWidths( $params['widths'] );
5196                 }
5197                 if ( isset( $params['heights'] ) ) {
5198                         $ig->setHeights( $params['heights'] );
5199                 }
5200                 $ig->setAdditionalOptions( $params );
5201
5202                 // Avoid PHP 7.1 warning from passing $this by reference
5203                 $parser = $this;
5204                 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5205
5206                 $lines = StringUtils::explode( "\n", $text );
5207                 foreach ( $lines as $line ) {
5208                         # match lines like these:
5209                         # Image:someimage.jpg|This is some image
5210                         $matches = [];
5211                         preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5212                         # Skip empty lines
5213                         if ( count( $matches ) == 0 ) {
5214                                 continue;
5215                         }
5216
5217                         if ( strpos( $matches[0], '%' ) !== false ) {
5218                                 $matches[1] = rawurldecode( $matches[1] );
5219                         }
5220                         $title = Title::newFromText( $matches[1], NS_FILE );
5221                         if ( is_null( $title ) ) {
5222                                 # Bogus title. Ignore these so we don't bomb out later.
5223                                 continue;
5224                         }
5225
5226                         # We need to get what handler the file uses, to figure out parameters.
5227                         # Note, a hook can overide the file name, and chose an entirely different
5228                         # file (which potentially could be of a different type and have different handler).
5229                         $options = [];
5230                         $descQuery = false;
5231                         Hooks::run( 'BeforeParserFetchFileAndTitle',
5232                                 [ $this, $title, &$options, &$descQuery ] );
5233                         # Don't register it now, as TraditionalImageGallery does that later.
5234                         $file = $this->fetchFileNoRegister( $title, $options );
5235                         $handler = $file ? $file->getHandler() : false;
5236
5237                         $paramMap = [
5238                                 'img_alt' => 'gallery-internal-alt',
5239                                 'img_link' => 'gallery-internal-link',
5240                         ];
5241                         if ( $handler ) {
5242                                 $paramMap += $handler->getParamMap();
5243                                 // We don't want people to specify per-image widths.
5244                                 // Additionally the width parameter would need special casing anyhow.
5245                                 unset( $paramMap['img_width'] );
5246                         }
5247
5248                         $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5249
5250                         $label = '';
5251                         $alt = '';
5252                         $link = '';
5253                         $handlerOptions = [];
5254                         if ( isset( $matches[3] ) ) {
5255                                 // look for an |alt= definition while trying not to break existing
5256                                 // captions with multiple pipes (|) in it, until a more sensible grammar
5257                                 // is defined for images in galleries
5258
5259                                 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5260                                 // splitting on '|' is a bit odd, and different from makeImage.
5261                                 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5262                                 // Protect LanguageConverter markup
5263                                 $parameterMatches = StringUtils::delimiterExplode(
5264                                         '-{', '}-', '|', $matches[3], true /* nested */
5265                                 );
5266
5267                                 foreach ( $parameterMatches as $parameterMatch ) {
5268                                         list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5269                                         if ( $magicName ) {
5270                                                 $paramName = $paramMap[$magicName];
5271
5272                                                 switch ( $paramName ) {
5273                                                         case 'gallery-internal-alt':
5274                                                                 $alt = $this->stripAltText( $match, false );
5275                                                                 break;
5276                                                         case 'gallery-internal-link':
5277                                                                 $linkValue = $this->stripAltText( $match, false );
5278                                                                 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5279                                                                         // Result of LanguageConverter::markNoConversion
5280                                                                         // invoked on an external link.
5281                                                                         $linkValue = substr( $linkValue, 4, -2 );
5282                                                                 }
5283                                                                 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5284                                                                 if ( $type === 'link-url' ) {
5285                                                                         $link = $target;
5286                                                                         $this->mOutput->addExternalLink( $target );
5287                                                                 } elseif ( $type === 'link-title' ) {
5288                                                                         $link = $target->getLinkURL();
5289                                                                         $this->mOutput->addLink( $target );
5290                                                                 }
5291                                                                 break;
5292                                                         default:
5293                                                                 // Must be a handler specific parameter.
5294                                                                 if ( $handler->validateParam( $paramName, $match ) ) {
5295                                                                         $handlerOptions[$paramName] = $match;
5296                                                                 } else {
5297                                                                         // Guess not, consider it as caption.
5298                                                                         $this->logger->debug(
5299                                                                                 "$parameterMatch failed parameter validation" );
5300                                                                         $label = $parameterMatch;
5301                                                                 }
5302                                                 }
5303
5304                                         } else {
5305                                                 // Last pipe wins.
5306                                                 $label = $parameterMatch;
5307                                         }
5308                                 }
5309                         }
5310
5311                         $ig->add( $title, $label, $alt, $link, $handlerOptions );
5312                 }
5313                 $html = $ig->toHTML();
5314                 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5315                 return $html;
5316         }
5317
5318         /**
5319          * @param MediaHandler $handler
5320          * @return array
5321          */
5322         public function getImageParams( $handler ) {
5323                 if ( $handler ) {
5324                         $handlerClass = get_class( $handler );
5325                 } else {
5326                         $handlerClass = '';
5327                 }
5328                 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5329                         # Initialise static lists
5330                         static $internalParamNames = [
5331                                 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5332                                 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5333                                         'bottom', 'text-bottom' ],
5334                                 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5335                                         'upright', 'border', 'link', 'alt', 'class' ],
5336                         ];
5337                         static $internalParamMap;
5338                         if ( !$internalParamMap ) {
5339                                 $internalParamMap = [];
5340                                 foreach ( $internalParamNames as $type => $names ) {
5341                                         foreach ( $names as $name ) {
5342                                                 // For grep: img_left, img_right, img_center, img_none,
5343                                                 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5344                                                 // img_bottom, img_text_bottom,
5345                                                 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5346                                                 // img_border, img_link, img_alt, img_class
5347                                                 $magicName = str_replace( '-', '_', "img_$name" );
5348                                                 $internalParamMap[$magicName] = [ $type, $name ];
5349                                         }
5350                                 }
5351                         }
5352
5353                         # Add handler params
5354                         $paramMap = $internalParamMap;
5355                         if ( $handler ) {
5356                                 $handlerParamMap = $handler->getParamMap();
5357                                 foreach ( $handlerParamMap as $magic => $paramName ) {
5358                                         $paramMap[$magic] = [ 'handler', $paramName ];
5359                                 }
5360                         }
5361                         $this->mImageParams[$handlerClass] = $paramMap;
5362                         $this->mImageParamsMagicArray[$handlerClass] =
5363                                 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5364                 }
5365                 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5366         }
5367
5368         /**
5369          * Parse image options text and use it to make an image
5370          *
5371          * @param Title $title
5372          * @param string $options
5373          * @param LinkHolderArray|bool $holders
5374          * @return string HTML
5375          */
5376         public function makeImage( $title, $options, $holders = false ) {
5377                 # Check if the options text is of the form "options|alt text"
5378                 # Options are:
5379                 #  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5380                 #  * left       no resizing, just left align. label is used for alt= only
5381                 #  * right      same, but right aligned
5382                 #  * none       same, but not aligned
5383                 #  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5384                 #  * center     center the image
5385                 #  * frame      Keep original image size, no magnify-button.
5386                 #  * framed     Same as "frame"
5387                 #  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5388                 #  * upright    reduce width for upright images, rounded to full __0 px
5389                 #  * border     draw a 1px border around the image
5390                 #  * alt        Text for HTML alt attribute (defaults to empty)
5391                 #  * class      Set a class for img node
5392                 #  * link       Set the target of the image link. Can be external, interwiki, or local
5393                 # vertical-align values (no % or length right now):
5394                 #  * baseline
5395                 #  * sub
5396                 #  * super
5397                 #  * top
5398                 #  * text-top
5399                 #  * middle
5400                 #  * bottom
5401                 #  * text-bottom
5402
5403                 # Protect LanguageConverter markup when splitting into parts
5404                 $parts = StringUtils::delimiterExplode(
5405                         '-{', '}-', '|', $options, true /* allow nesting */
5406                 );
5407
5408                 # Give extensions a chance to select the file revision for us
5409                 $options = [];
5410                 $descQuery = false;
5411                 Hooks::run( 'BeforeParserFetchFileAndTitle',
5412                         [ $this, $title, &$options, &$descQuery ] );
5413                 # Fetch and register the file (file title may be different via hooks)
5414                 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5415
5416                 # Get parameter map
5417                 $handler = $file ? $file->getHandler() : false;
5418
5419                 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5420
5421                 if ( !$file ) {
5422                         $this->addTrackingCategory( 'broken-file-category' );
5423                 }
5424
5425                 # Process the input parameters
5426                 $caption = '';
5427                 $params = [ 'frame' => [], 'handler' => [],
5428                         'horizAlign' => [], 'vertAlign' => [] ];
5429                 $seenformat = false;
5430                 foreach ( $parts as $part ) {
5431                         $part = trim( $part );
5432                         list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5433                         $validated = false;
5434                         if ( isset( $paramMap[$magicName] ) ) {
5435                                 list( $type, $paramName ) = $paramMap[$magicName];
5436
5437                                 # Special case; width and height come in one variable together
5438                                 if ( $type === 'handler' && $paramName === 'width' ) {
5439                                         $parsedWidthParam = self::parseWidthParam( $value );
5440                                         if ( isset( $parsedWidthParam['width'] ) ) {
5441                                                 $width = $parsedWidthParam['width'];
5442                                                 if ( $handler->validateParam( 'width', $width ) ) {
5443                                                         $params[$type]['width'] = $width;
5444                                                         $validated = true;
5445                                                 }
5446                                         }
5447                                         if ( isset( $parsedWidthParam['height'] ) ) {
5448                                                 $height = $parsedWidthParam['height'];
5449                                                 if ( $handler->validateParam( 'height', $height ) ) {
5450                                                         $params[$type]['height'] = $height;
5451                                                         $validated = true;
5452                                                 }
5453                                         }
5454                                         # else no validation -- T15436
5455                                 } else {
5456                                         if ( $type === 'handler' ) {
5457                                                 # Validate handler parameter
5458                                                 $validated = $handler->validateParam( $paramName, $value );
5459                                         } else {
5460                                                 # Validate internal parameters
5461                                                 switch ( $paramName ) {
5462                                                         case 'manualthumb':
5463                                                         case 'alt':
5464                                                         case 'class':
5465                                                                 # @todo FIXME: Possibly check validity here for
5466                                                                 # manualthumb? downstream behavior seems odd with
5467                                                                 # missing manual thumbs.
5468                                                                 $validated = true;
5469                                                                 $value = $this->stripAltText( $value, $holders );
5470                                                                 break;
5471                                                         case 'link':
5472                                                                 list( $paramName, $value ) =
5473                                                                         $this->parseLinkParameter(
5474                                                                                 $this->stripAltText( $value, $holders )
5475                                                                         );
5476                                                                 if ( $paramName ) {
5477                                                                         $validated = true;
5478                                                                         if ( $paramName === 'no-link' ) {
5479                                                                                 $value = true;
5480                                                                         }
5481                                                                         if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5482                                                                                 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5483                                                                         }
5484                                                                 }
5485                                                                 break;
5486                                                         case 'frameless':
5487                                                         case 'framed':
5488                                                         case 'thumbnail':
5489                                                                 // use first appearing option, discard others.
5490                                                                 $validated = !$seenformat;
5491                                                                 $seenformat = true;
5492                                                                 break;
5493                                                         default:
5494                                                                 # Most other things appear to be empty or numeric...
5495                                                                 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5496                                                 }
5497                                         }
5498
5499                                         if ( $validated ) {
5500                                                 $params[$type][$paramName] = $value;
5501                                         }
5502                                 }
5503                         }
5504                         if ( !$validated ) {
5505                                 $caption = $part;
5506                         }
5507                 }
5508
5509                 # Process alignment parameters
5510                 if ( $params['horizAlign'] ) {
5511                         $params['frame']['align'] = key( $params['horizAlign'] );
5512                 }
5513                 if ( $params['vertAlign'] ) {
5514                         $params['frame']['valign'] = key( $params['vertAlign'] );
5515                 }
5516
5517                 $params['frame']['caption'] = $caption;
5518
5519                 # Will the image be presented in a frame, with the caption below?
5520                 $imageIsFramed = isset( $params['frame']['frame'] )
5521                         || isset( $params['frame']['framed'] )
5522                         || isset( $params['frame']['thumbnail'] )
5523                         || isset( $params['frame']['manualthumb'] );
5524
5525                 # In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5526                 # came to also set the caption, ordinary text after the image -- which
5527                 # makes no sense, because that just repeats the text multiple times in
5528                 # screen readers.  It *also* came to set the title attribute.
5529                 # Now that we have an alt attribute, we should not set the alt text to
5530                 # equal the caption: that's worse than useless, it just repeats the
5531                 # text.  This is the framed/thumbnail case.  If there's no caption, we
5532                 # use the unnamed parameter for alt text as well, just for the time be-
5533                 # ing, if the unnamed param is set and the alt param is not.
5534                 # For the future, we need to figure out if we want to tweak this more,
5535                 # e.g., introducing a title= parameter for the title; ignoring the un-
5536                 # named parameter entirely for images without a caption; adding an ex-
5537                 # plicit caption= parameter and preserving the old magic unnamed para-
5538                 # meter for BC; ...
5539                 if ( $imageIsFramed ) { # Framed image
5540                         if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5541                                 # No caption or alt text, add the filename as the alt text so
5542                                 # that screen readers at least get some description of the image
5543                                 $params['frame']['alt'] = $title->getText();
5544                         }
5545                         # Do not set $params['frame']['title'] because tooltips don't make sense
5546                         # for framed images
5547                 } else { # Inline image
5548                         if ( !isset( $params['frame']['alt'] ) ) {
5549                                 # No alt text, use the "caption" for the alt text
5550                                 if ( $caption !== '' ) {
5551                                         $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5552                                 } else {
5553                                         # No caption, fall back to using the filename for the
5554                                         # alt text
5555                                         $params['frame']['alt'] = $title->getText();
5556                                 }
5557                         }
5558                         # Use the "caption" for the tooltip text
5559                         $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5560                 }
5561                 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5562
5563                 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5564
5565                 # Linker does the rest
5566                 $time = $options['time'] ?? false;
5567                 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5568                         $time, $descQuery, $this->mOptions->getThumbSize() );
5569
5570                 # Give the handler a chance to modify the parser object
5571                 if ( $handler ) {
5572                         $handler->parserTransformHook( $this, $file );
5573                 }
5574
5575                 return $ret;
5576         }
5577
5578         /**
5579          * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5580          *
5581          * Adds an entry to appropriate link tables.
5582          *
5583          * @since 1.32
5584          * @param string $value
5585          * @return array of `[ type, target ]`, where:
5586          *   - `type` is one of:
5587          *     - `null`: Given value is not a valid link target, use default
5588          *     - `'no-link'`: Given value is empty, do not generate a link
5589          *     - `'link-url'`: Given value is a valid external link
5590          *     - `'link-title'`: Given value is a valid internal link
5591          *   - `target` is:
5592          *     - When `type` is `null` or `'no-link'`: `false`
5593          *     - When `type` is `'link-url'`: URL string corresponding to given value
5594          *     - When `type` is `'link-title'`: Title object corresponding to given value
5595          */
5596         public function parseLinkParameter( $value ) {
5597                 $chars = self::EXT_LINK_URL_CLASS;
5598                 $addr = self::EXT_LINK_ADDR;
5599                 $prots = $this->mUrlProtocols;
5600                 $type = null;
5601                 $target = false;
5602                 if ( $value === '' ) {
5603                         $type = 'no-link';
5604                 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5605                         if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5606                                 $this->mOutput->addExternalLink( $value );
5607                                 $type = 'link-url';
5608                                 $target = $value;
5609                         }
5610                 } else {
5611                         $linkTitle = Title::newFromText( $value );
5612                         if ( $linkTitle ) {
5613                                 $this->mOutput->addLink( $linkTitle );
5614                                 $type = 'link-title';
5615                                 $target = $linkTitle;
5616                         }
5617                 }
5618                 return [ $type, $target ];
5619         }
5620
5621         /**
5622          * @param string $caption
5623          * @param LinkHolderArray|bool $holders
5624          * @return mixed|string
5625          */
5626         protected function stripAltText( $caption, $holders ) {
5627                 # Strip bad stuff out of the title (tooltip).  We can't just use
5628                 # replaceLinkHoldersText() here, because if this function is called
5629                 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5630                 if ( $holders ) {
5631                         $tooltip = $holders->replaceText( $caption );
5632                 } else {
5633                         $tooltip = $this->replaceLinkHoldersText( $caption );
5634                 }
5635
5636                 # make sure there are no placeholders in thumbnail attributes
5637                 # that are later expanded to html- so expand them now and
5638                 # remove the tags
5639                 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5640                 # Compatibility hack!  In HTML certain entity references not terminated
5641                 # by a semicolon are decoded (but not if we're in an attribute; that's
5642                 # how link URLs get away without properly escaping & in queries).
5643                 # But wikitext has always required semicolon-termination of entities,
5644                 # so encode & where needed to avoid decode of semicolon-less entities.
5645                 # See T209236 and
5646                 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5647                 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5648                 $tooltip = preg_replace( "/
5649                         &                       # 1. entity prefix
5650                         (?=                     # 2. followed by:
5651                         (?:                     #  a. one of the legacy semicolon-less named entities
5652                                 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5653                                 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5654                                 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5655                                 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5656                                 U(?:acute|circ|grave|uml)|Yacute|
5657                                 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5658                                 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5659                                 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5660                                 frac(?:1(?:2|4)|34)|
5661                                 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5662                                 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5663                                 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5664                                 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5665                                 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5666                                 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5667                                 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5668                                 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5669                                 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5670                         )
5671                         (?:[^;]|$))     #  b. and not followed by a semicolon
5672                         # S = study, for efficiency
5673                         /Sx", '&amp;', $tooltip );
5674                 $tooltip = Sanitizer::stripAllTags( $tooltip );
5675
5676                 return $tooltip;
5677         }
5678
5679         /**
5680          * Set a flag in the output object indicating that the content is dynamic and
5681          * shouldn't be cached.
5682          * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5683          */
5684         public function disableCache() {
5685                 $this->logger->debug( "Parser output marked as uncacheable." );
5686                 if ( !$this->mOutput ) {
5687                         throw new MWException( __METHOD__ .
5688                                 " can only be called when actually parsing something" );
5689                 }
5690                 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5691         }
5692
5693         /**
5694          * Callback from the Sanitizer for expanding items found in HTML attribute
5695          * values, so they can be safely tested and escaped.
5696          *
5697          * @param string &$text
5698          * @param bool|PPFrame $frame
5699          * @return string
5700          */
5701         public function attributeStripCallback( &$text, $frame = false ) {
5702                 $text = $this->replaceVariables( $text, $frame );
5703                 $text = $this->mStripState->unstripBoth( $text );
5704                 return $text;
5705         }
5706
5707         /**
5708          * Accessor
5709          *
5710          * @return array
5711          */
5712         public function getTags() {
5713                 $this->firstCallInit();
5714                 return array_merge(
5715                         array_keys( $this->mTransparentTagHooks ),
5716                         array_keys( $this->mTagHooks ),
5717                         array_keys( $this->mFunctionTagHooks )
5718                 );
5719         }
5720
5721         /**
5722          * @since 1.32
5723          * @return array
5724          */
5725         public function getFunctionSynonyms() {
5726                 $this->firstCallInit();
5727                 return $this->mFunctionSynonyms;
5728         }
5729
5730         /**
5731          * @since 1.32
5732          * @return string
5733          */
5734         public function getUrlProtocols() {
5735                 return $this->mUrlProtocols;
5736         }
5737
5738         /**
5739          * Replace transparent tags in $text with the values given by the callbacks.
5740          *
5741          * Transparent tag hooks are like regular XML-style tag hooks, except they
5742          * operate late in the transformation sequence, on HTML instead of wikitext.
5743          *
5744          * @param string $text
5745          *
5746          * @return string
5747          */
5748         public function replaceTransparentTags( $text ) {
5749                 $matches = [];
5750                 $elements = array_keys( $this->mTransparentTagHooks );
5751                 $text = self::extractTagsAndParams( $elements, $text, $matches );
5752                 $replacements = [];
5753
5754                 foreach ( $matches as $marker => $data ) {
5755                         list( $element, $content, $params, $tag ) = $data;
5756                         $tagName = strtolower( $element );
5757                         if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5758                                 $output = call_user_func_array(
5759                                         $this->mTransparentTagHooks[$tagName],
5760                                         [ $content, $params, $this ]
5761                                 );
5762                         } else {
5763                                 $output = $tag;
5764                         }
5765                         $replacements[$marker] = $output;
5766                 }
5767                 return strtr( $text, $replacements );
5768         }
5769
5770         /**
5771          * Break wikitext input into sections, and either pull or replace
5772          * some particular section's text.
5773          *
5774          * External callers should use the getSection and replaceSection methods.
5775          *
5776          * @param string $text Page wikitext
5777          * @param string|int $sectionId A section identifier string of the form:
5778          *   "<flag1> - <flag2> - ... - <section number>"
5779          *
5780          * Currently the only recognised flag is "T", which means the target section number
5781          * was derived during a template inclusion parse, in other words this is a template
5782          * section edit link. If no flags are given, it was an ordinary section edit link.
5783          * This flag is required to avoid a section numbering mismatch when a section is
5784          * enclosed by "<includeonly>" (T8563).
5785          *
5786          * The section number 0 pulls the text before the first heading; other numbers will
5787          * pull the given section along with its lower-level subsections. If the section is
5788          * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5789          *
5790          * Section 0 is always considered to exist, even if it only contains the empty
5791          * string. If $text is the empty string and section 0 is replaced, $newText is
5792          * returned.
5793          *
5794          * @param string $mode One of "get" or "replace"
5795          * @param string $newText Replacement text for section data.
5796          * @return string For "get", the extracted section text.
5797          *   for "replace", the whole page with the section replaced.
5798          */
5799         private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5800                 global $wgTitle; # not generally used but removes an ugly failure mode
5801
5802                 $magicScopeVariable = $this->lock();
5803                 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5804                 $outText = '';
5805                 $frame = $this->getPreprocessor()->newFrame();
5806
5807                 # Process section extraction flags
5808                 $flags = 0;
5809                 $sectionParts = explode( '-', $sectionId );
5810                 $sectionIndex = array_pop( $sectionParts );
5811                 foreach ( $sectionParts as $part ) {
5812                         if ( $part === 'T' ) {
5813                                 $flags |= self::PTD_FOR_INCLUSION;
5814                         }
5815                 }
5816
5817                 # Check for empty input
5818                 if ( strval( $text ) === '' ) {
5819                         # Only sections 0 and T-0 exist in an empty document
5820                         if ( $sectionIndex == 0 ) {
5821                                 if ( $mode === 'get' ) {
5822                                         return '';
5823                                 }
5824
5825                                 return $newText;
5826                         } else {
5827                                 if ( $mode === 'get' ) {
5828                                         return $newText;
5829                                 }
5830
5831                                 return $text;
5832                         }
5833                 }
5834
5835                 # Preprocess the text
5836                 $root = $this->preprocessToDom( $text, $flags );
5837
5838                 # <h> nodes indicate section breaks
5839                 # They can only occur at the top level, so we can find them by iterating the root's children
5840                 $node = $root->getFirstChild();
5841
5842                 # Find the target section
5843                 if ( $sectionIndex == 0 ) {
5844                         # Section zero doesn't nest, level=big
5845                         $targetLevel = 1000;
5846                 } else {
5847                         while ( $node ) {
5848                                 if ( $node->getName() === 'h' ) {
5849                                         $bits = $node->splitHeading();
5850                                         if ( $bits['i'] == $sectionIndex ) {
5851                                                 $targetLevel = $bits['level'];
5852                                                 break;
5853                                         }
5854                                 }
5855                                 if ( $mode === 'replace' ) {
5856                                         $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5857                                 }
5858                                 $node = $node->getNextSibling();
5859                         }
5860                 }
5861
5862                 if ( !$node ) {
5863                         # Not found
5864                         if ( $mode === 'get' ) {
5865                                 return $newText;
5866                         } else {
5867                                 return $text;
5868                         }
5869                 }
5870
5871                 # Find the end of the section, including nested sections
5872                 do {
5873                         if ( $node->getName() === 'h' ) {
5874                                 $bits = $node->splitHeading();
5875                                 $curLevel = $bits['level'];
5876                                 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5877                                         break;
5878                                 }
5879                         }
5880                         if ( $mode === 'get' ) {
5881                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5882                         }
5883                         $node = $node->getNextSibling();
5884                 } while ( $node );
5885
5886                 # Write out the remainder (in replace mode only)
5887                 if ( $mode === 'replace' ) {
5888                         # Output the replacement text
5889                         # Add two newlines on -- trailing whitespace in $newText is conventionally
5890                         # stripped by the editor, so we need both newlines to restore the paragraph gap
5891                         # Only add trailing whitespace if there is newText
5892                         if ( $newText != "" ) {
5893                                 $outText .= $newText . "\n\n";
5894                         }
5895
5896                         while ( $node ) {
5897                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5898                                 $node = $node->getNextSibling();
5899                         }
5900                 }
5901
5902                 if ( is_string( $outText ) ) {
5903                         # Re-insert stripped tags
5904                         $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5905                 }
5906
5907                 return $outText;
5908         }
5909
5910         /**
5911          * This function returns the text of a section, specified by a number ($section).
5912          * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5913          * the first section before any such heading (section 0).
5914          *
5915          * If a section contains subsections, these are also returned.
5916          *
5917          * @param string $text Text to look in
5918          * @param string|int $sectionId Section identifier as a number or string
5919          * (e.g. 0, 1 or 'T-1').
5920          * @param string $defaultText Default to return if section is not found
5921          *
5922          * @return string Text of the requested section
5923          */
5924         public function getSection( $text, $sectionId, $defaultText = '' ) {
5925                 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5926         }
5927
5928         /**
5929          * This function returns $oldtext after the content of the section
5930          * specified by $section has been replaced with $text. If the target
5931          * section does not exist, $oldtext is returned unchanged.
5932          *
5933          * @param string $oldText Former text of the article
5934          * @param string|int $sectionId Section identifier as a number or string
5935          * (e.g. 0, 1 or 'T-1').
5936          * @param string $newText Replacing text
5937          *
5938          * @return string Modified text
5939          */
5940         public function replaceSection( $oldText, $sectionId, $newText ) {
5941                 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5942         }
5943
5944         /**
5945          * Get the ID of the revision we are parsing
5946          *
5947          * The return value will be either:
5948          *   - a) Positive, indicating a specific revision ID (current or old)
5949          *   - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5950          *   - c) Null, meaning the parse is for preview mode and there is no revision
5951          *
5952          * @return int|null
5953          */
5954         public function getRevisionId() {
5955                 return $this->mRevisionId;
5956         }
5957
5958         /**
5959          * Get the revision object for $this->mRevisionId
5960          *
5961          * @return Revision|null Either a Revision object or null
5962          * @since 1.23 (public since 1.23)
5963          */
5964         public function getRevisionObject() {
5965                 if ( $this->mRevisionObject ) {
5966                         return $this->mRevisionObject;
5967                 }
5968
5969                 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5970                 // This is useful when parsing a revision that has not yet been saved.
5971                 // However, if we get back a saved revision even though we are in
5972                 // preview mode, we'll have to ignore it, see below.
5973                 // NOTE: This callback may be used to inject an OLD revision that was
5974                 // already loaded, so "current" is a bit of a misnomer. We can't just
5975                 // skip it if mRevisionId is set.
5976                 $rev = call_user_func(
5977                         $this->mOptions->getCurrentRevisionCallback(),
5978                         $this->getTitle(),
5979                         $this
5980                 );
5981
5982                 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5983                         // We are in preview mode (mRevisionId is null), and the current revision callback
5984                         // returned an existing revision. Ignore it and return null, it's probably the page's
5985                         // current revision, which is not what we want here. Note that we do want to call the
5986                         // callback to allow the unsaved revision to be injected here, e.g. for
5987                         // self-transclusion previews.
5988                         return null;
5989                 }
5990
5991                 // If the parse is for a new revision, then the callback should have
5992                 // already been set to force the object and should match mRevisionId.
5993                 // If not, try to fetch by mRevisionId for sanity.
5994                 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5995                         $rev = Revision::newFromId( $this->mRevisionId );
5996                 }
5997
5998                 $this->mRevisionObject = $rev;
5999
6000                 return $this->mRevisionObject;
6001         }
6002
6003         /**
6004          * Get the timestamp associated with the current revision, adjusted for
6005          * the default server-local timestamp
6006          * @return string TS_MW timestamp
6007          */
6008         public function getRevisionTimestamp() {
6009                 if ( $this->mRevisionTimestamp !== null ) {
6010                         return $this->mRevisionTimestamp;
6011                 }
6012
6013                 # Use specified revision timestamp, falling back to the current timestamp
6014                 $revObject = $this->getRevisionObject();
6015                 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6016                 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6017
6018                 # The cryptic '' timezone parameter tells to use the site-default
6019                 # timezone offset instead of the user settings.
6020                 # Since this value will be saved into the parser cache, served
6021                 # to other users, and potentially even used inside links and such,
6022                 # it needs to be consistent for all visitors.
6023                 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6024
6025                 return $this->mRevisionTimestamp;
6026         }
6027
6028         /**
6029          * Get the name of the user that edited the last revision
6030          *
6031          * @return string User name
6032          */
6033         public function getRevisionUser() {
6034                 if ( is_null( $this->mRevisionUser ) ) {
6035                         $revObject = $this->getRevisionObject();
6036
6037                         # if this template is subst: the revision id will be blank,
6038                         # so just use the current user's name
6039                         if ( $revObject ) {
6040                                 $this->mRevisionUser = $revObject->getUserText();
6041                         } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6042                                 $this->mRevisionUser = $this->getUser()->getName();
6043                         }
6044                 }
6045                 return $this->mRevisionUser;
6046         }
6047
6048         /**
6049          * Get the size of the revision
6050          *
6051          * @return int|null Revision size
6052          */
6053         public function getRevisionSize() {
6054                 if ( is_null( $this->mRevisionSize ) ) {
6055                         $revObject = $this->getRevisionObject();
6056
6057                         # if this variable is subst: the revision id will be blank,
6058                         # so just use the parser input size, because the own substituation
6059                         # will change the size.
6060                         if ( $revObject ) {
6061                                 $this->mRevisionSize = $revObject->getSize();
6062                         } else {
6063                                 $this->mRevisionSize = $this->mInputSize;
6064                         }
6065                 }
6066                 return $this->mRevisionSize;
6067         }
6068
6069         /**
6070          * Mutator for $mDefaultSort
6071          *
6072          * @param string $sort New value
6073          */
6074         public function setDefaultSort( $sort ) {
6075                 $this->mDefaultSort = $sort;
6076                 $this->mOutput->setProperty( 'defaultsort', $sort );
6077         }
6078
6079         /**
6080          * Accessor for $mDefaultSort
6081          * Will use the empty string if none is set.
6082          *
6083          * This value is treated as a prefix, so the
6084          * empty string is equivalent to sorting by
6085          * page name.
6086          *
6087          * @return string
6088          */
6089         public function getDefaultSort() {
6090                 if ( $this->mDefaultSort !== false ) {
6091                         return $this->mDefaultSort;
6092                 } else {
6093                         return '';
6094                 }
6095         }
6096
6097         /**
6098          * Accessor for $mDefaultSort
6099          * Unlike getDefaultSort(), will return false if none is set
6100          *
6101          * @return string|bool
6102          */
6103         public function getCustomDefaultSort() {
6104                 return $this->mDefaultSort;
6105         }
6106
6107         private static function getSectionNameFromStrippedText( $text ) {
6108                 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6109                 $text = Sanitizer::decodeCharReferences( $text );
6110                 $text = self::normalizeSectionName( $text );
6111                 return $text;
6112         }
6113
6114         private static function makeAnchor( $sectionName ) {
6115                 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6116         }
6117
6118         private function makeLegacyAnchor( $sectionName ) {
6119                 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6120                 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6121                         // ForAttribute() and ForLink() are the same for legacy encoding
6122                         $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6123                 } else {
6124                         $id = Sanitizer::escapeIdForLink( $sectionName );
6125                 }
6126
6127                 return "#$id";
6128         }
6129
6130         /**
6131          * Try to guess the section anchor name based on a wikitext fragment
6132          * presumably extracted from a heading, for example "Header" from
6133          * "== Header ==".
6134          *
6135          * @param string $text
6136          * @return string Anchor (starting with '#')
6137          */
6138         public function guessSectionNameFromWikiText( $text ) {
6139                 # Strip out wikitext links(they break the anchor)
6140                 $text = $this->stripSectionName( $text );
6141                 $sectionName = self::getSectionNameFromStrippedText( $text );
6142                 return self::makeAnchor( $sectionName );
6143         }
6144
6145         /**
6146          * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6147          * instead, if possible. For use in redirects, since various versions
6148          * of Microsoft browsers interpret Location: headers as something other
6149          * than UTF-8, resulting in breakage.
6150          *
6151          * @param string $text The section name
6152          * @return string Anchor (starting with '#')
6153          */
6154         public function guessLegacySectionNameFromWikiText( $text ) {
6155                 # Strip out wikitext links(they break the anchor)
6156                 $text = $this->stripSectionName( $text );
6157                 $sectionName = self::getSectionNameFromStrippedText( $text );
6158                 return $this->makeLegacyAnchor( $sectionName );
6159         }
6160
6161         /**
6162          * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6163          * @param string $text Section name (plain text)
6164          * @return string Anchor (starting with '#')
6165          */
6166         public static function guessSectionNameFromStrippedText( $text ) {
6167                 $sectionName = self::getSectionNameFromStrippedText( $text );
6168                 return self::makeAnchor( $sectionName );
6169         }
6170
6171         /**
6172          * Apply the same normalization as code making links to this section would
6173          *
6174          * @param string $text
6175          * @return string
6176          */
6177         private static function normalizeSectionName( $text ) {
6178                 # T90902: ensure the same normalization is applied for IDs as to links
6179                 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6180                 try {
6181
6182                         $parts = $titleParser->splitTitleString( "#$text" );
6183                 } catch ( MalformedTitleException $ex ) {
6184                         return $text;
6185                 }
6186                 return $parts['fragment'];
6187         }
6188
6189         /**
6190          * Strips a text string of wikitext for use in a section anchor
6191          *
6192          * Accepts a text string and then removes all wikitext from the
6193          * string and leaves only the resultant text (i.e. the result of
6194          * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6195          * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6196          * to create valid section anchors by mimicing the output of the
6197          * parser when headings are parsed.
6198          *
6199          * @param string $text Text string to be stripped of wikitext
6200          * for use in a Section anchor
6201          * @return string Filtered text string
6202          */
6203         public function stripSectionName( $text ) {
6204                 # Strip internal link markup
6205                 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6206                 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6207
6208                 # Strip external link markup
6209                 # @todo FIXME: Not tolerant to blank link text
6210                 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6211                 # on how many empty links there are on the page - need to figure that out.
6212                 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6213
6214                 # Parse wikitext quotes (italics & bold)
6215                 $text = $this->doQuotes( $text );
6216
6217                 # Strip HTML tags
6218                 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6219                 return $text;
6220         }
6221
6222         /**
6223          * strip/replaceVariables/unstrip for preprocessor regression testing
6224          *
6225          * @param string $text
6226          * @param Title $title
6227          * @param ParserOptions $options
6228          * @param int $outputType
6229          *
6230          * @return string
6231          */
6232         public function testSrvus( $text, Title $title, ParserOptions $options,
6233                 $outputType = self::OT_HTML
6234         ) {
6235                 $magicScopeVariable = $this->lock();
6236                 $this->startParse( $title, $options, $outputType, true );
6237
6238                 $text = $this->replaceVariables( $text );
6239                 $text = $this->mStripState->unstripBoth( $text );
6240                 $text = Sanitizer::removeHTMLtags( $text );
6241                 return $text;
6242         }
6243
6244         /**
6245          * @param string $text
6246          * @param Title $title
6247          * @param ParserOptions $options
6248          * @return string
6249          */
6250         public function testPst( $text, Title $title, ParserOptions $options ) {
6251                 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6252         }
6253
6254         /**
6255          * @param string $text
6256          * @param Title $title
6257          * @param ParserOptions $options
6258          * @return string
6259          */
6260         public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6261                 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6262         }
6263
6264         /**
6265          * Call a callback function on all regions of the given text that are not
6266          * inside strip markers, and replace those regions with the return value
6267          * of the callback. For example, with input:
6268          *
6269          *  aaa<MARKER>bbb
6270          *
6271          * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6272          * two strings will be replaced with the value returned by the callback in
6273          * each case.
6274          *
6275          * @param string $s
6276          * @param callable $callback
6277          *
6278          * @return string
6279          */
6280         public function markerSkipCallback( $s, $callback ) {
6281                 $i = 0;
6282                 $out = '';
6283                 while ( $i < strlen( $s ) ) {
6284                         $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6285                         if ( $markerStart === false ) {
6286                                 $out .= call_user_func( $callback, substr( $s, $i ) );
6287                                 break;
6288                         } else {
6289                                 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6290                                 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6291                                 if ( $markerEnd === false ) {
6292                                         $out .= substr( $s, $markerStart );
6293                                         break;
6294                                 } else {
6295                                         $markerEnd += strlen( self::MARKER_SUFFIX );
6296                                         $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6297                                         $i = $markerEnd;
6298                                 }
6299                         }
6300                 }
6301                 return $out;
6302         }
6303
6304         /**
6305          * Remove any strip markers found in the given text.
6306          *
6307          * @param string $text
6308          * @return string
6309          */
6310         public function killMarkers( $text ) {
6311                 return $this->mStripState->killMarkers( $text );
6312         }
6313
6314         /**
6315          * Save the parser state required to convert the given half-parsed text to
6316          * HTML. "Half-parsed" in this context means the output of
6317          * recursiveTagParse() or internalParse(). This output has strip markers
6318          * from replaceVariables (extensionSubstitution() etc.), and link
6319          * placeholders from replaceLinkHolders().
6320          *
6321          * Returns an array which can be serialized and stored persistently. This
6322          * array can later be loaded into another parser instance with
6323          * unserializeHalfParsedText(). The text can then be safely incorporated into
6324          * the return value of a parser hook.
6325          *
6326          * @deprecated since 1.31
6327          * @param string $text
6328          *
6329          * @return array
6330          */
6331         public function serializeHalfParsedText( $text ) {
6332                 wfDeprecated( __METHOD__, '1.31' );
6333                 $data = [
6334                         'text' => $text,
6335                         'version' => self::HALF_PARSED_VERSION,
6336                         'stripState' => $this->mStripState->getSubState( $text ),
6337                         'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6338                 ];
6339                 return $data;
6340         }
6341
6342         /**
6343          * Load the parser state given in the $data array, which is assumed to
6344          * have been generated by serializeHalfParsedText(). The text contents is
6345          * extracted from the array, and its markers are transformed into markers
6346          * appropriate for the current Parser instance. This transformed text is
6347          * returned, and can be safely included in the return value of a parser
6348          * hook.
6349          *
6350          * If the $data array has been stored persistently, the caller should first
6351          * check whether it is still valid, by calling isValidHalfParsedText().
6352          *
6353          * @deprecated since 1.31
6354          * @param array $data Serialized data
6355          * @throws MWException
6356          * @return string
6357          */
6358         public function unserializeHalfParsedText( $data ) {
6359                 wfDeprecated( __METHOD__, '1.31' );
6360                 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6361                         throw new MWException( __METHOD__ . ': invalid version' );
6362                 }
6363
6364                 # First, extract the strip state.
6365                 $texts = [ $data['text'] ];
6366                 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6367
6368                 # Now renumber links
6369                 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6370
6371                 # Should be good to go.
6372                 return $texts[0];
6373         }
6374
6375         /**
6376          * Returns true if the given array, presumed to be generated by
6377          * serializeHalfParsedText(), is compatible with the current version of the
6378          * parser.
6379          *
6380          * @deprecated since 1.31
6381          * @param array $data
6382          *
6383          * @return bool
6384          */
6385         public function isValidHalfParsedText( $data ) {
6386                 wfDeprecated( __METHOD__, '1.31' );
6387                 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6388         }
6389
6390         /**
6391          * Parsed a width param of imagelink like 300px or 200x300px
6392          *
6393          * @param string $value
6394          * @param bool $parseHeight
6395          *
6396          * @return array
6397          * @since 1.20
6398          */
6399         public static function parseWidthParam( $value, $parseHeight = true ) {
6400                 $parsedWidthParam = [];
6401                 if ( $value === '' ) {
6402                         return $parsedWidthParam;
6403                 }
6404                 $m = [];
6405                 # (T15500) In both cases (width/height and width only),
6406                 # permit trailing "px" for backward compatibility.
6407                 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6408                         $width = intval( $m[1] );
6409                         $height = intval( $m[2] );
6410                         $parsedWidthParam['width'] = $width;
6411                         $parsedWidthParam['height'] = $height;
6412                 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6413                         $width = intval( $value );
6414                         $parsedWidthParam['width'] = $width;
6415                 }
6416                 return $parsedWidthParam;
6417         }
6418
6419         /**
6420          * Lock the current instance of the parser.
6421          *
6422          * This is meant to stop someone from calling the parser
6423          * recursively and messing up all the strip state.
6424          *
6425          * @throws MWException If parser is in a parse
6426          * @return ScopedCallback The lock will be released once the return value goes out of scope.
6427          */
6428         protected function lock() {
6429                 if ( $this->mInParse ) {
6430                         throw new MWException( "Parser state cleared while parsing. "
6431                                 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6432                 }
6433
6434                 // Save the backtrace when locking, so that if some code tries locking again,
6435                 // we can print the lock owner's backtrace for easier debugging
6436                 $e = new Exception;
6437                 $this->mInParse = $e->getTraceAsString();
6438
6439                 $recursiveCheck = new ScopedCallback( function () {
6440                         $this->mInParse = false;
6441                 } );
6442
6443                 return $recursiveCheck;
6444         }
6445
6446         /**
6447          * Strip outer <p></p> tag from the HTML source of a single paragraph.
6448          *
6449          * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6450          * or if there is more than one <p/> tag in the input HTML.
6451          *
6452          * @param string $html
6453          * @return string
6454          * @since 1.24
6455          */
6456         public static function stripOuterParagraph( $html ) {
6457                 $m = [];
6458                 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6459                         $html = $m[1];
6460                 }
6461
6462                 return $html;
6463         }
6464
6465         /**
6466          * Return this parser if it is not doing anything, otherwise
6467          * get a fresh parser. You can use this method by doing
6468          * $newParser = $oldParser->getFreshParser(), or more simply
6469          * $oldParser->getFreshParser()->parse( ... );
6470          * if you're unsure if $oldParser is safe to use.
6471          *
6472          * @since 1.24
6473          * @return Parser A parser object that is not parsing anything
6474          */
6475         public function getFreshParser() {
6476                 if ( $this->mInParse ) {
6477                         return $this->factory->create();
6478                 } else {
6479                         return $this;
6480                 }
6481         }
6482
6483         /**
6484          * Set's up the PHP implementation of OOUI for use in this request
6485          * and instructs OutputPage to enable OOUI for itself.
6486          *
6487          * @since 1.26
6488          */
6489         public function enableOOUI() {
6490                 OutputPage::setupOOUI();
6491                 $this->mOutput->setEnableOOUI( true );
6492         }
6493
6494         /**
6495          * @param string $flag
6496          * @param string $reason
6497          */
6498         protected function setOutputFlag( $flag, $reason ) {
6499                 $this->mOutput->setFlag( $flag );
6500                 $name = $this->mTitle->getPrefixedText();
6501                 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6502         }
6503 }