includes/parser/Parser.php

   1 <?php
   2 /**
   3  * PHP parser that converts wiki markup to HTML.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Parser
  22  */
  23 use MediaWiki\Config\ServiceOptions;
  24 use MediaWiki\Linker\LinkRenderer;
  25 use MediaWiki\Linker\LinkRendererFactory;
  26 use MediaWiki\Linker\LinkTarget;
  27 use MediaWiki\MediaWikiServices;
  28 use MediaWiki\Special\SpecialPageFactory;
  29 use Psr\Log\NullLogger;
  30 use Wikimedia\ScopedCallback;
  31 use Psr\Log\LoggerInterface;
  32
  33 /**
  34  * @defgroup Parser Parser
  35  */
  36
  37 /**
  38  * PHP Parser - Processes wiki markup (which uses a more user-friendly
  39  * syntax, such as "[[link]]" for making links), and provides a one-way
  40  * transformation of that wiki markup it into (X)HTML output / markup
  41  * (which in turn the browser understands, and can display).
  42  *
  43  * There are seven main entry points into the Parser class:
  44  *
  45  * - Parser::parse()
  46  *     produces HTML output
  47  * - Parser::preSaveTransform()
  48  *     produces altered wiki markup
  49  * - Parser::preprocess()
  50  *     removes HTML comments and expands templates
  51  * - Parser::cleanSig() and Parser::cleanSigInSig()
  52  *     cleans a signature before saving it to preferences
  53  * - Parser::getSection()
  54  *     return the content of a section from an article for section editing
  55  * - Parser::replaceSection()
  56  *     replaces a section by number inside an article
  57  * - Parser::getPreloadText()
  58  *     removes <noinclude> sections and <includeonly> tags
  59  *
  60  * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
  61  *
  62  * @par Settings:
  63  * $wgNamespacesWithSubpages
  64  *
  65  * @par Settings only within ParserOptions:
  66  * $wgAllowExternalImages
  67  * $wgAllowSpecialInclusion
  68  * $wgInterwikiMagic
  69  * $wgMaxArticleSize
  70  *
  71  * @ingroup Parser
  72  */
  73 class Parser {
  74         /**
  75          * Update this version number when the ParserOutput format
  76          * changes in an incompatible way, so the parser cache
  77          * can automatically discard old data.
  78          */
  79         const VERSION = '1.6.4';
  80
  81         /**
  82          * Update this version number when the output of serialiseHalfParsedText()
  83          * changes in an incompatible way
  84          */
  85         const HALF_PARSED_VERSION = 2;
  86
  87         # Flags for Parser::setFunctionHook
  88         const SFH_NO_HASH = 1;
  89         const SFH_OBJECT_ARGS = 2;
  90
  91         # Constants needed for external link processing
  92         # Everything except bracket, space, or control characters
  93         # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
  94         # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
  95         # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
  96         # uses to replace invalid HTML characters.
  97         const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
  98         # Simplified expression to match an IPv4 or IPv6 address, or
  99         # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
 100         const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
 101         # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
 102         // phpcs:ignore Generic.Files.LineLength
 103         const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
 104                 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
 105
 106         # Regular expression for a non-newline space
 107         const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
 108
 109         # Flags for preprocessToDom
 110         const PTD_FOR_INCLUSION = 1;
 111
 112         # Allowed values for $this->mOutputType
 113         # Parameter to startExternalParse().
 114         const OT_HTML = 1; # like parse()
 115         const OT_WIKI = 2; # like preSaveTransform()
 116         const OT_PREPROCESS = 3; # like preprocess()
 117         const OT_MSG = 3;
 118         const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
 119
 120         /**
 121          * @var string Prefix and suffix for temporary replacement strings
 122          * for the multipass parser.
 123          *
 124          * \x7f should never appear in input as it's disallowed in XML.
 125          * Using it at the front also gives us a little extra robustness
 126          * since it shouldn't match when butted up against identifier-like
 127          * string constructs.
 128          *
 129          * Must not consist of all title characters, or else it will change
 130          * the behavior of <nowiki> in a link.
 131          *
 132          * Must have a character that needs escaping in attributes, otherwise
 133          * someone could put a strip marker in an attribute, to get around
 134          * escaping quote marks, and break out of the attribute. Thus we add
 135          * `'".
 136          */
 137         const MARKER_SUFFIX = "-QINU`\"'\x7f";
 138         const MARKER_PREFIX = "\x7f'\"`UNIQ-";
 139
 140         # Markers used for wrapping the table of contents
 141         const TOC_START = '<mw:toc>';
 142         const TOC_END = '</mw:toc>';
 143
 144         /** @var int Assume that no output will later be saved this many seconds after parsing */
 145         const MAX_TTS = 900;
 146
 147         # Persistent:
 148         public $mTagHooks = [];
 149         public $mTransparentTagHooks = [];
 150         public $mFunctionHooks = [];
 151         public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
 152         public $mFunctionTagHooks = [];
 153         public $mStripList = [];
 154         public $mDefaultStripList = [];
 155         public $mVarCache = [];
 156         public $mImageParams = [];
 157         public $mImageParamsMagicArray = [];
 158         public $mMarkerIndex = 0;
 159         /**
 160          * @var bool Whether firstCallInit still needs to be called
 161          */
 162         public $mFirstCall = true;
 163
 164         # Initialised by initialiseVariables()
 165
 166         /**
 167          * @var MagicWordArray
 168          */
 169         public $mVariables;
 170
 171         /**
 172          * @var MagicWordArray
 173          */
 174         public $mSubstWords;
 175
 176         /**
 177          * @deprecated since 1.34, there should be no need to use this
 178          * @var array
 179          */
 180         public $mConf;
 181
 182         # Initialised in constructor
 183         public $mExtLinkBracketedRegex, $mUrlProtocols;
 184
 185         # Initialized in getPreprocessor()
 186         /** @var Preprocessor */
 187         public $mPreprocessor;
 188
 189         # Cleared with clearState():
 190         /**
 191          * @var ParserOutput
 192          */
 193         public $mOutput;
 194         public $mAutonumber;
 195
 196         /**
 197          * @var StripState
 198          */
 199         public $mStripState;
 200
 201         public $mIncludeCount;
 202         /**
 203          * @var LinkHolderArray
 204          */
 205         public $mLinkHolders;
 206
 207         public $mLinkID;
 208         public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
 209         public $mDefaultSort;
 210         public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
 211         public $mExpensiveFunctionCount; # number of expensive parser function calls
 212         public $mShowToc, $mForceTocPosition;
 213         /** @var array */
 214         public $mTplDomCache;
 215
 216         /**
 217          * @var User
 218          */
 219         public $mUser; # User object; only used when doing pre-save transform
 220
 221         # Temporary
 222         # These are variables reset at least once per parse regardless of $clearState
 223
 224         /**
 225          * @var ParserOptions
 226          */
 227         public $mOptions;
 228
 229         /**
 230          * @var Title
 231          */
 232         public $mTitle;        # Title context, used for self-link rendering and similar things
 233         public $mOutputType;   # Output type, one of the OT_xxx constants
 234         public $ot;            # Shortcut alias, see setOutputType()
 235         public $mRevisionObject; # The revision object of the specified revision ID
 236         public $mRevisionId;   # ID to display in {{REVISIONID}} tags
 237         public $mRevisionTimestamp; # The timestamp of the specified revision ID
 238         public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
 239         public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
 240         public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
 241         public $mInputSize = false; # For {{PAGESIZE}} on current page.
 242
 243         /**
 244          * @var array Array with the language name of each language link (i.e. the
 245          * interwiki prefix) in the key, value arbitrary. Used to avoid sending
 246          * duplicate language links to the ParserOutput.
 247          */
 248         public $mLangLinkLanguages;
 249
 250         /**
 251          * @var MapCacheLRU|null
 252          * @since 1.24
 253          *
 254          * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
 255          */
 256         public $currentRevisionCache;
 257
 258         /**
 259          * @var bool|string Recursive call protection.
 260          * This variable should be treated as if it were private.
 261          */
 262         public $mInParse = false;
 263
 264         /** @var SectionProfiler */
 265         protected $mProfiler;
 266
 267         /**
 268          * @var LinkRenderer
 269          */
 270         protected $mLinkRenderer;
 271
 272         /** @var MagicWordFactory */
 273         private $magicWordFactory;
 274
 275         /** @var Language */
 276         private $contLang;
 277
 278         /** @var ParserFactory */
 279         private $factory;
 280
 281         /** @var SpecialPageFactory */
 282         private $specialPageFactory;
 283
 284         /**
 285          * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
 286          * $mOptions, which is public and widely used, and also with the local variable $options used
 287          * for ParserOptions throughout this file.
 288          *
 289          * @var ServiceOptions
 290          */
 291         private $svcOptions;
 292
 293         /** @var LinkRendererFactory */
 294         private $linkRendererFactory;
 295
 296         /** @var NamespaceInfo */
 297         private $nsInfo;
 298
 299         /** @var LoggerInterface */
 300         private $logger;
 301
 302         /**
 303          * TODO Make this a const when HHVM support is dropped (T192166)
 304          *
 305          * @var array
 306          * @since 1.33
 307          */
 308         public static $constructorOptions = [
 309                 // See $wgParserConf documentation
 310                 'class',
 311                 'preprocessorClass',
 312                 // See documentation for the corresponding config options
 313                 'ArticlePath',
 314                 'EnableScaryTranscluding',
 315                 'ExtraInterlanguageLinkPrefixes',
 316                 'FragmentMode',
 317                 'LanguageCode',
 318                 'MaxSigChars',
 319                 'MaxTocLevel',
 320                 'MiserMode',
 321                 'ScriptPath',
 322                 'Server',
 323                 'ServerName',
 324                 'ShowHostnames',
 325                 'Sitename',
 326                 'StylePath',
 327                 'TranscludeCacheExpiry',
 328         ];
 329
 330         /**
 331          * Constructing parsers directly is deprecated! Use a ParserFactory.
 332          *
 333          * @param ServiceOptions|null $svcOptions
 334          * @param MagicWordFactory|null $magicWordFactory
 335          * @param Language|null $contLang Content language
 336          * @param ParserFactory|null $factory
 337          * @param string|null $urlProtocols As returned from wfUrlProtocols()
 338          * @param SpecialPageFactory|null $spFactory
 339          * @param LinkRendererFactory|null $linkRendererFactory
 340          * @param NamespaceInfo|null $nsInfo
 341          * @param LoggerInterface|null $logger
 342          */
 343         public function __construct(
 344                 $svcOptions = null,
 345                 MagicWordFactory $magicWordFactory = null,
 346                 Language $contLang = null,
 347                 ParserFactory $factory = null,
 348                 $urlProtocols = null,
 349                 SpecialPageFactory $spFactory = null,
 350                 $linkRendererFactory = null,
 351                 $nsInfo = null,
 352                 $logger = null
 353         ) {
 354                 if ( !$svcOptions || is_array( $svcOptions ) ) {
 355                         // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
 356                         // Config, and the eighth is LinkRendererFactory.
 357                         $this->mConf = (array)$svcOptions;
 358                         if ( empty( $this->mConf['class'] ) ) {
 359                                 $this->mConf['class'] = self::class;
 360                         }
 361                         if ( empty( $this->mConf['preprocessorClass'] ) ) {
 362                                 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
 363                         }
 364                         $this->svcOptions = new ServiceOptions( self::$constructorOptions,
 365                                 $this->mConf, func_num_args() > 6
 366                                         ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
 367                         );
 368                         $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
 369                         $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
 370                 } else {
 371                         // New calling convention
 372                         $svcOptions->assertRequiredOptions( self::$constructorOptions );
 373                         // $this->mConf is public, so we'll keep those two options there as well for
 374                         // compatibility until it's removed
 375                         $this->mConf = [
 376                                 'class' => $svcOptions->get( 'class' ),
 377                                 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
 378                         ];
 379                         $this->svcOptions = $svcOptions;
 380                 }
 381
 382                 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
 383                 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
 384                         self::EXT_LINK_ADDR .
 385                         self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
 386
 387                 $this->magicWordFactory = $magicWordFactory ??
 388                         MediaWikiServices::getInstance()->getMagicWordFactory();
 389
 390                 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
 391
 392                 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
 393                 $this->specialPageFactory = $spFactory ??
 394                         MediaWikiServices::getInstance()->getSpecialPageFactory();
 395                 $this->linkRendererFactory = $linkRendererFactory ??
 396                         MediaWikiServices::getInstance()->getLinkRendererFactory();
 397                 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
 398                 $this->logger = $logger ?: new NullLogger();
 399         }
 400
 401         /**
 402          * Reduce memory usage to reduce the impact of circular references
 403          */
 404         public function __destruct() {
 405                 if ( isset( $this->mLinkHolders ) ) {
 406                         unset( $this->mLinkHolders );
 407                 }
 408                 foreach ( $this as $name => $value ) {
 409                         unset( $this->$name );
 410                 }
 411         }
 412
 413         /**
 414          * Allow extensions to clean up when the parser is cloned
 415          */
 416         public function __clone() {
 417                 $this->mInParse = false;
 418
 419                 // T58226: When you create a reference "to" an object field, that
 420                 // makes the object field itself be a reference too (until the other
 421                 // reference goes out of scope). When cloning, any field that's a
 422                 // reference is copied as a reference in the new object. Both of these
 423                 // are defined PHP5 behaviors, as inconvenient as it is for us when old
 424                 // hooks from PHP4 days are passing fields by reference.
 425                 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
 426                         // Make a non-reference copy of the field, then rebind the field to
 427                         // reference the new copy.
 428                         $tmp = $this->$k;
 429                         $this->$k =& $tmp;
 430                         unset( $tmp );
 431                 }
 432
 433                 Hooks::run( 'ParserCloned', [ $this ] );
 434         }
 435
 436         /**
 437          * Which class should we use for the preprocessor if not otherwise specified?
 438          *
 439          * @since 1.34
 440          * @deprecated since 1.34, removing configurability of preprocessor
 441          * @return string
 442          */
 443         public static function getDefaultPreprocessorClass() {
 444                 return Preprocessor_Hash::class;
 445         }
 446
 447         /**
 448          * Do various kinds of initialisation on the first call of the parser
 449          */
 450         public function firstCallInit() {
 451                 if ( !$this->mFirstCall ) {
 452                         return;
 453                 }
 454                 $this->mFirstCall = false;
 455
 456                 CoreParserFunctions::register( $this );
 457                 CoreTagHooks::register( $this );
 458                 $this->initialiseVariables();
 459
 460                 // Avoid PHP 7.1 warning from passing $this by reference
 461                 $parser = $this;
 462                 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
 463         }
 464
 465         /**
 466          * Clear Parser state
 467          *
 468          * @private
 469          */
 470         public function clearState() {
 471                 $this->firstCallInit();
 472                 $this->resetOutput();
 473                 $this->mAutonumber = 0;
 474                 $this->mIncludeCount = [];
 475                 $this->mLinkHolders = new LinkHolderArray( $this );
 476                 $this->mLinkID = 0;
 477                 $this->mRevisionObject = $this->mRevisionTimestamp =
 478                         $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
 479                 $this->mVarCache = [];
 480                 $this->mUser = null;
 481                 $this->mLangLinkLanguages = [];
 482                 $this->currentRevisionCache = null;
 483
 484                 $this->mStripState = new StripState( $this );
 485
 486                 # Clear these on every parse, T6549
 487                 $this->mTplRedirCache = $this->mTplDomCache = [];
 488
 489                 $this->mShowToc = true;
 490                 $this->mForceTocPosition = false;
 491                 $this->mIncludeSizes = [
 492                         'post-expand' => 0,
 493                         'arg' => 0,
 494                 ];
 495                 $this->mPPNodeCount = 0;
 496                 $this->mGeneratedPPNodeCount = 0;
 497                 $this->mHighestExpansionDepth = 0;
 498                 $this->mDefaultSort = false;
 499                 $this->mHeadings = [];
 500                 $this->mDoubleUnderscores = [];
 501                 $this->mExpensiveFunctionCount = 0;
 502
 503                 # Fix cloning
 504                 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
 505                         $this->mPreprocessor = null;
 506                 }
 507
 508                 $this->mProfiler = new SectionProfiler();
 509
 510                 // Avoid PHP 7.1 warning from passing $this by reference
 511                 $parser = $this;
 512                 Hooks::run( 'ParserClearState', [ &$parser ] );
 513         }
 514
 515         /**
 516          * Reset the ParserOutput
 517          */
 518         public function resetOutput() {
 519                 $this->mOutput = new ParserOutput;
 520                 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
 521         }
 522
 523         /**
 524          * Convert wikitext to HTML
 525          * Do not call this function recursively.
 526          *
 527          * @param string $text Text we want to parse
 528          * @param-taint $text escapes_htmlnoent
 529          * @param Title $title
 530          * @param ParserOptions $options
 531          * @param bool $linestart
 532          * @param bool $clearState
 533          * @param int|null $revid ID of the revision being rendered. This is used to render
 534          *  REVISION* magic words. 0 means that any current revision will be used. Null means
 535          *  that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
 536          *  use the current timestamp.
 537          * @return ParserOutput A ParserOutput
 538          * @return-taint escaped
 539          */
 540         public function parse(
 541                 $text, Title $title, ParserOptions $options,
 542                 $linestart = true, $clearState = true, $revid = null
 543         ) {
 544                 if ( $clearState ) {
 545                         // We use U+007F DELETE to construct strip markers, so we have to make
 546                         // sure that this character does not occur in the input text.
 547                         $text = strtr( $text, "\x7f", "?" );
 548                         $magicScopeVariable = $this->lock();
 549                 }
 550                 // Strip U+0000 NULL (T159174)
 551                 $text = str_replace( "\000", '', $text );
 552
 553                 $this->startParse( $title, $options, self::OT_HTML, $clearState );
 554
 555                 $this->currentRevisionCache = null;
 556                 $this->mInputSize = strlen( $text );
 557                 if ( $this->mOptions->getEnableLimitReport() ) {
 558                         $this->mOutput->resetParseStartTime();
 559                 }
 560
 561                 $oldRevisionId = $this->mRevisionId;
 562                 $oldRevisionObject = $this->mRevisionObject;
 563                 $oldRevisionTimestamp = $this->mRevisionTimestamp;
 564                 $oldRevisionUser = $this->mRevisionUser;
 565                 $oldRevisionSize = $this->mRevisionSize;
 566                 if ( $revid !== null ) {
 567                         $this->mRevisionId = $revid;
 568                         $this->mRevisionObject = null;
 569                         $this->mRevisionTimestamp = null;
 570                         $this->mRevisionUser = null;
 571                         $this->mRevisionSize = null;
 572                 }
 573
 574                 // Avoid PHP 7.1 warning from passing $this by reference
 575                 $parser = $this;
 576                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 577                 # No more strip!
 578                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 579                 $text = $this->internalParse( $text );
 580                 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
 581
 582                 $text = $this->internalParseHalfParsed( $text, true, $linestart );
 583
 584                 /**
 585                  * A converted title will be provided in the output object if title and
 586                  * content conversion are enabled, the article text does not contain
 587                  * a conversion-suppressing double-underscore tag, and no
 588                  * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
 589                  * automatic link conversion.
 590                  */
 591                 if ( !( $options->getDisableTitleConversion()
 592                         || isset( $this->mDoubleUnderscores['nocontentconvert'] )
 593                         || isset( $this->mDoubleUnderscores['notitleconvert'] )
 594                         || $this->mOutput->getDisplayTitle() !== false )
 595                 ) {
 596                         $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
 597                         if ( $convruletitle ) {
 598                                 $this->mOutput->setTitleText( $convruletitle );
 599                         } else {
 600                                 $titleText = $this->getTargetLanguage()->convertTitle( $title );
 601                                 $this->mOutput->setTitleText( $titleText );
 602                         }
 603                 }
 604
 605                 # Compute runtime adaptive expiry if set
 606                 $this->mOutput->finalizeAdaptiveCacheExpiry();
 607
 608                 # Warn if too many heavyweight parser functions were used
 609                 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
 610                         $this->limitationWarn( 'expensive-parserfunction',
 611                                 $this->mExpensiveFunctionCount,
 612                                 $this->mOptions->getExpensiveParserFunctionLimit()
 613                         );
 614                 }
 615
 616                 # Information on limits, for the benefit of users who try to skirt them
 617                 if ( $this->mOptions->getEnableLimitReport() ) {
 618                         $text .= $this->makeLimitReport();
 619                 }
 620
 621                 # Wrap non-interface parser output in a <div> so it can be targeted
 622                 # with CSS (T37247)
 623                 $class = $this->mOptions->getWrapOutputClass();
 624                 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
 625                         $this->mOutput->addWrapperDivClass( $class );
 626                 }
 627
 628                 $this->mOutput->setText( $text );
 629
 630                 $this->mRevisionId = $oldRevisionId;
 631                 $this->mRevisionObject = $oldRevisionObject;
 632                 $this->mRevisionTimestamp = $oldRevisionTimestamp;
 633                 $this->mRevisionUser = $oldRevisionUser;
 634                 $this->mRevisionSize = $oldRevisionSize;
 635                 $this->mInputSize = false;
 636                 $this->currentRevisionCache = null;
 637
 638                 return $this->mOutput;
 639         }
 640
 641         /**
 642          * Set the limit report data in the current ParserOutput, and return the
 643          * limit report HTML comment.
 644          *
 645          * @return string
 646          */
 647         protected function makeLimitReport() {
 648                 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
 649
 650                 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
 651                 if ( $cpuTime !== null ) {
 652                         $this->mOutput->setLimitReportData( 'limitreport-cputime',
 653                                 sprintf( "%.3f", $cpuTime )
 654                         );
 655                 }
 656
 657                 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
 658                 $this->mOutput->setLimitReportData( 'limitreport-walltime',
 659                         sprintf( "%.3f", $wallTime )
 660                 );
 661
 662                 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
 663                         [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
 664                 );
 665                 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
 666                         [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
 667                 );
 668                 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
 669                         [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
 670                 );
 671                 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
 672                         [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
 673                 );
 674                 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
 675                         [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
 676                 );
 677                 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
 678                         [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
 679                 );
 680
 681                 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
 682                         $this->mOutput->setLimitReportData( $key, $value );
 683                 }
 684
 685                 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
 686
 687                 $limitReport = "NewPP limit report\n";
 688                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 689                         $limitReport .= 'Parsed by ' . wfHostname() . "\n";
 690                 }
 691                 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
 692                 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
 693                 $limitReport .= 'Dynamic content: ' .
 694                         ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
 695                         "\n";
 696                 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
 697
 698                 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
 699                         if ( Hooks::run( 'ParserLimitReportFormat',
 700                                 [ $key, &$value, &$limitReport, false, false ]
 701                         ) ) {
 702                                 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
 703                                 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
 704                                         ->inLanguage( 'en' )->useDatabase( false );
 705                                 if ( !$valueMsg->exists() ) {
 706                                         $valueMsg = new RawMessage( '$1' );
 707                                 }
 708                                 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
 709                                         $valueMsg->params( $value );
 710                                         $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
 711                                 }
 712                         }
 713                 }
 714                 // Since we're not really outputting HTML, decode the entities and
 715                 // then re-encode the things that need hiding inside HTML comments.
 716                 $limitReport = htmlspecialchars_decode( $limitReport );
 717
 718                 // Sanitize for comment. Note '‐' in the replacement is U+2010,
 719                 // which looks much like the problematic '-'.
 720                 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
 721                 $text = "\n<!-- \n$limitReport-->\n";
 722
 723                 // Add on template profiling data in human/machine readable way
 724                 $dataByFunc = $this->mProfiler->getFunctionStats();
 725                 uasort( $dataByFunc, function ( $a, $b ) {
 726                         return $b['real'] <=> $a['real']; // descending order
 727                 } );
 728                 $profileReport = [];
 729                 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
 730                         $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
 731                                 $item['%real'], $item['real'], $item['calls'],
 732                                 htmlspecialchars( $item['name'] ) );
 733                 }
 734                 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
 735                 $text .= implode( "\n", $profileReport ) . "\n-->\n";
 736
 737                 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
 738
 739                 // Add other cache related metadata
 740                 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
 741                         $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
 742                 }
 743                 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
 744                         $this->mOutput->getCacheTime() );
 745                 $this->mOutput->setLimitReportData( 'cachereport-ttl',
 746                         $this->mOutput->getCacheExpiry() );
 747                 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
 748                         $this->mOutput->hasDynamicContent() );
 749
 750                 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
 751                         wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
 752                                 $this->mTitle->getPrefixedDBkey() );
 753                 }
 754                 return $text;
 755         }
 756
 757         /**
 758          * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
 759          * can be called from an extension tag hook.
 760          *
 761          * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
 762          * instead, which means that lists and links have not been fully parsed yet,
 763          * and strip markers are still present.
 764          *
 765          * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
 766          *
 767          * Use this function if you're a parser tag hook and you want to parse
 768          * wikitext before or after applying additional transformations, and you
 769          * intend to *return the result as hook output*, which will cause it to go
 770          * through the rest of parsing process automatically.
 771          *
 772          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 773          * $text are not expanded
 774          *
 775          * @param string $text Text extension wants to have parsed
 776          * @param-taint $text escapes_htmlnoent
 777          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 778          * @return string UNSAFE half-parsed HTML
 779          * @return-taint escaped
 780          */
 781         public function recursiveTagParse( $text, $frame = false ) {
 782                 // Avoid PHP 7.1 warning from passing $this by reference
 783                 $parser = $this;
 784                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 785                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 786                 $text = $this->internalParse( $text, false, $frame );
 787                 return $text;
 788         }
 789
 790         /**
 791          * Fully parse wikitext to fully parsed HTML. This recursive parser entry
 792          * point can be called from an extension tag hook.
 793          *
 794          * The output of this function is fully-parsed HTML that is safe for output.
 795          * If you're a parser tag hook, you might want to use recursiveTagParse()
 796          * instead.
 797          *
 798          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 799          * $text are not expanded
 800          *
 801          * @since 1.25
 802          *
 803          * @param string $text Text extension wants to have parsed
 804          * @param-taint $text escapes_htmlnoent
 805          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 806          * @return string Fully parsed HTML
 807          * @return-taint escaped
 808          */
 809         public function recursiveTagParseFully( $text, $frame = false ) {
 810                 $text = $this->recursiveTagParse( $text, $frame );
 811                 $text = $this->internalParseHalfParsed( $text, false );
 812                 return $text;
 813         }
 814
 815         /**
 816          * Expand templates and variables in the text, producing valid, static wikitext.
 817          * Also removes comments.
 818          * Do not call this function recursively.
 819          * @param string $text
 820          * @param Title|null $title
 821          * @param ParserOptions $options
 822          * @param int|null $revid
 823          * @param bool|PPFrame $frame
 824          * @return mixed|string
 825          */
 826         public function preprocess( $text, Title $title = null,
 827                 ParserOptions $options, $revid = null, $frame = false
 828         ) {
 829                 $magicScopeVariable = $this->lock();
 830                 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
 831                 if ( $revid !== null ) {
 832                         $this->mRevisionId = $revid;
 833                 }
 834                 // Avoid PHP 7.1 warning from passing $this by reference
 835                 $parser = $this;
 836                 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
 837                 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
 838                 $text = $this->replaceVariables( $text, $frame );
 839                 $text = $this->mStripState->unstripBoth( $text );
 840                 return $text;
 841         }
 842
 843         /**
 844          * Recursive parser entry point that can be called from an extension tag
 845          * hook.
 846          *
 847          * @param string $text Text to be expanded
 848          * @param bool|PPFrame $frame The frame to use for expanding any template variables
 849          * @return string
 850          * @since 1.19
 851          */
 852         public function recursivePreprocess( $text, $frame = false ) {
 853                 $text = $this->replaceVariables( $text, $frame );
 854                 $text = $this->mStripState->unstripBoth( $text );
 855                 return $text;
 856         }
 857
 858         /**
 859          * Process the wikitext for the "?preload=" feature. (T7210)
 860          *
 861          * "<noinclude>", "<includeonly>" etc. are parsed as for template
 862          * transclusion, comments, templates, arguments, tags hooks and parser
 863          * functions are untouched.
 864          *
 865          * @param string $text
 866          * @param Title $title
 867          * @param ParserOptions $options
 868          * @param array $params
 869          * @return string
 870          */
 871         public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
 872                 $msg = new RawMessage( $text );
 873                 $text = $msg->params( $params )->plain();
 874
 875                 # Parser (re)initialisation
 876                 $magicScopeVariable = $this->lock();
 877                 $this->startParse( $title, $options, self::OT_PLAIN, true );
 878
 879                 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
 880                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
 881                 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
 882                 $text = $this->mStripState->unstripBoth( $text );
 883                 return $text;
 884         }
 885
 886         /**
 887          * Set the current user.
 888          * Should only be used when doing pre-save transform.
 889          *
 890          * @param User|null $user User object or null (to reset)
 891          */
 892         public function setUser( $user ) {
 893                 $this->mUser = $user;
 894         }
 895
 896         /**
 897          * Set the context title
 898          *
 899          * @param Title $t
 900          */
 901         public function setTitle( $t ) {
 902                 if ( !$t ) {
 903                         $t = Title::newFromText( 'NO TITLE' );
 904                 }
 905
 906                 if ( $t->hasFragment() ) {
 907                         # Strip the fragment to avoid various odd effects
 908                         $this->mTitle = $t->createFragmentTarget( '' );
 909                 } else {
 910                         $this->mTitle = $t;
 911                 }
 912         }
 913
 914         /**
 915          * Accessor for the Title object
 916          *
 917          * @return Title|null
 918          */
 919         public function getTitle() {
 920                 return $this->mTitle;
 921         }
 922
 923         /**
 924          * Accessor/mutator for the Title object
 925          *
 926          * @param Title|null $x Title object or null to just get the current one
 927          * @return Title
 928          */
 929         public function Title( $x = null ) {
 930                 return wfSetVar( $this->mTitle, $x );
 931         }
 932
 933         /**
 934          * Set the output type
 935          *
 936          * @param int $ot New value
 937          */
 938         public function setOutputType( $ot ) {
 939                 $this->mOutputType = $ot;
 940                 # Shortcut alias
 941                 $this->ot = [
 942                         'html' => $ot == self::OT_HTML,
 943                         'wiki' => $ot == self::OT_WIKI,
 944                         'pre' => $ot == self::OT_PREPROCESS,
 945                         'plain' => $ot == self::OT_PLAIN,
 946                 ];
 947         }
 948
 949         /**
 950          * Accessor/mutator for the output type
 951          *
 952          * @param int|null $x New value or null to just get the current one
 953          * @return int
 954          */
 955         public function OutputType( $x = null ) {
 956                 return wfSetVar( $this->mOutputType, $x );
 957         }
 958
 959         /**
 960          * Get the ParserOutput object
 961          *
 962          * @return ParserOutput
 963          */
 964         public function getOutput() {
 965                 return $this->mOutput;
 966         }
 967
 968         /**
 969          * Get the ParserOptions object
 970          *
 971          * @return ParserOptions
 972          */
 973         public function getOptions() {
 974                 return $this->mOptions;
 975         }
 976
 977         /**
 978          * Accessor/mutator for the ParserOptions object
 979          *
 980          * @param ParserOptions|null $x New value or null to just get the current one
 981          * @return ParserOptions Current ParserOptions object
 982          */
 983         public function Options( $x = null ) {
 984                 return wfSetVar( $this->mOptions, $x );
 985         }
 986
 987         /**
 988          * @return int
 989          */
 990         public function nextLinkID() {
 991                 return $this->mLinkID++;
 992         }
 993
 994         /**
 995          * @param int $id
 996          */
 997         public function setLinkID( $id ) {
 998                 $this->mLinkID = $id;
 999         }
1000
1001         /**
1002          * Get a language object for use in parser functions such as {{FORMATNUM:}}
1003          * @return Language
1004          */
1005         public function getFunctionLang() {
1006                 return $this->getTargetLanguage();
1007         }
1008
1009         /**
1010          * Get the target language for the content being parsed. This is usually the
1011          * language that the content is in.
1012          *
1013          * @since 1.19
1014          *
1015          * @throws MWException
1016          * @return Language
1017          */
1018         public function getTargetLanguage() {
1019                 $target = $this->mOptions->getTargetLanguage();
1020
1021                 if ( $target !== null ) {
1022                         return $target;
1023                 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1024                         return $this->mOptions->getUserLangObj();
1025                 } elseif ( is_null( $this->mTitle ) ) {
1026                         throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1027                 }
1028
1029                 return $this->mTitle->getPageLanguage();
1030         }
1031
1032         /**
1033          * Get the language object for language conversion
1034          * @deprecated since 1.32, just use getTargetLanguage()
1035          * @return Language|null
1036          */
1037         public function getConverterLanguage() {
1038                 return $this->getTargetLanguage();
1039         }
1040
1041         /**
1042          * Get a User object either from $this->mUser, if set, or from the
1043          * ParserOptions object otherwise
1044          *
1045          * @return User
1046          */
1047         public function getUser() {
1048                 if ( !is_null( $this->mUser ) ) {
1049                         return $this->mUser;
1050                 }
1051                 return $this->mOptions->getUser();
1052         }
1053
1054         /**
1055          * Get a preprocessor object
1056          *
1057          * @return Preprocessor
1058          */
1059         public function getPreprocessor() {
1060                 if ( !isset( $this->mPreprocessor ) ) {
1061                         $class = $this->svcOptions->get( 'preprocessorClass' );
1062                         $this->mPreprocessor = new $class( $this );
1063                 }
1064                 return $this->mPreprocessor;
1065         }
1066
1067         /**
1068          * Get a LinkRenderer instance to make links with
1069          *
1070          * @since 1.28
1071          * @return LinkRenderer
1072          */
1073         public function getLinkRenderer() {
1074                 // XXX We make the LinkRenderer with current options and then cache it forever
1075                 if ( !$this->mLinkRenderer ) {
1076                         $this->mLinkRenderer = $this->linkRendererFactory->create();
1077                         $this->mLinkRenderer->setStubThreshold(
1078                                 $this->getOptions()->getStubThreshold()
1079                         );
1080                 }
1081
1082                 return $this->mLinkRenderer;
1083         }
1084
1085         /**
1086          * Get the MagicWordFactory that this Parser is using
1087          *
1088          * @since 1.32
1089          * @return MagicWordFactory
1090          */
1091         public function getMagicWordFactory() {
1092                 return $this->magicWordFactory;
1093         }
1094
1095         /**
1096          * Get the content language that this Parser is using
1097          *
1098          * @since 1.32
1099          * @return Language
1100          */
1101         public function getContentLanguage() {
1102                 return $this->contLang;
1103         }
1104
1105         /**
1106          * Replaces all occurrences of HTML-style comments and the given tags
1107          * in the text with a random marker and returns the next text. The output
1108          * parameter $matches will be an associative array filled with data in
1109          * the form:
1110          *
1111          * @code
1112          *   'UNIQ-xxxxx' => [
1113          *     'element',
1114          *     'tag content',
1115          *     [ 'param' => 'x' ],
1116          *     '<element param="x">tag content</element>' ]
1117          * @endcode
1118          *
1119          * @param array $elements List of element names. Comments are always extracted.
1120          * @param string $text Source text string.
1121          * @param array &$matches Out parameter, Array: extracted tags
1122          * @return string Stripped text
1123          */
1124         public static function extractTagsAndParams( $elements, $text, &$matches ) {
1125                 static $n = 1;
1126                 $stripped = '';
1127                 $matches = [];
1128
1129                 $taglist = implode( '|', $elements );
1130                 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1131
1132                 while ( $text != '' ) {
1133                         $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1134                         $stripped .= $p[0];
1135                         if ( count( $p ) < 5 ) {
1136                                 break;
1137                         }
1138                         if ( count( $p ) > 5 ) {
1139                                 # comment
1140                                 $element = $p[4];
1141                                 $attributes = '';
1142                                 $close = '';
1143                                 $inside = $p[5];
1144                         } else {
1145                                 # tag
1146                                 list( , $element, $attributes, $close, $inside ) = $p;
1147                         }
1148
1149                         $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1150                         $stripped .= $marker;
1151
1152                         if ( $close === '/>' ) {
1153                                 # Empty element tag, <tag />
1154                                 $content = null;
1155                                 $text = $inside;
1156                                 $tail = null;
1157                         } else {
1158                                 if ( $element === '!--' ) {
1159                                         $end = '/(-->)/';
1160                                 } else {
1161                                         $end = "/(<\\/$element\\s*>)/i";
1162                                 }
1163                                 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1164                                 $content = $q[0];
1165                                 if ( count( $q ) < 3 ) {
1166                                         # No end tag -- let it run out to the end of the text.
1167                                         $tail = '';
1168                                         $text = '';
1169                                 } else {
1170                                         list( , $tail, $text ) = $q;
1171                                 }
1172                         }
1173
1174                         $matches[$marker] = [ $element,
1175                                 $content,
1176                                 Sanitizer::decodeTagAttributes( $attributes ),
1177                                 "<$element$attributes$close$content$tail" ];
1178                 }
1179                 return $stripped;
1180         }
1181
1182         /**
1183          * Get a list of strippable XML-like elements
1184          *
1185          * @return array
1186          */
1187         public function getStripList() {
1188                 return $this->mStripList;
1189         }
1190
1191         /**
1192          * Get the StripState
1193          *
1194          * @return StripState
1195          */
1196         public function getStripState() {
1197                 return $this->mStripState;
1198         }
1199
1200         /**
1201          * Add an item to the strip state
1202          * Returns the unique tag which must be inserted into the stripped text
1203          * The tag will be replaced with the original text in unstrip()
1204          *
1205          * @param string $text
1206          *
1207          * @return string
1208          */
1209         public function insertStripItem( $text ) {
1210                 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1211                 $this->mMarkerIndex++;
1212                 $this->mStripState->addGeneral( $marker, $text );
1213                 return $marker;
1214         }
1215
1216         /**
1217          * parse the wiki syntax used to render tables
1218          *
1219          * @private
1220          * @param string $text
1221          * @return string
1222          */
1223         public function doTableStuff( $text ) {
1224                 $lines = StringUtils::explode( "\n", $text );
1225                 $out = '';
1226                 $td_history = []; # Is currently a td tag open?
1227                 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1228                 $tr_history = []; # Is currently a tr tag open?
1229                 $tr_attributes = []; # history of tr attributes
1230                 $has_opened_tr = []; # Did this table open a <tr> element?
1231                 $indent_level = 0; # indent level of the table
1232
1233                 foreach ( $lines as $outLine ) {
1234                         $line = trim( $outLine );
1235
1236                         if ( $line === '' ) { # empty line, go to next line
1237                                 $out .= $outLine . "\n";
1238                                 continue;
1239                         }
1240
1241                         $first_character = $line[0];
1242                         $first_two = substr( $line, 0, 2 );
1243                         $matches = [];
1244
1245                         if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1246                                 # First check if we are starting a new table
1247                                 $indent_level = strlen( $matches[1] );
1248
1249                                 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1250                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1251
1252                                 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1253                                 array_push( $td_history, false );
1254                                 array_push( $last_tag_history, '' );
1255                                 array_push( $tr_history, false );
1256                                 array_push( $tr_attributes, '' );
1257                                 array_push( $has_opened_tr, false );
1258                         } elseif ( count( $td_history ) == 0 ) {
1259                                 # Don't do any of the following
1260                                 $out .= $outLine . "\n";
1261                                 continue;
1262                         } elseif ( $first_two === '|}' ) {
1263                                 # We are ending a table
1264                                 $line = '</table>' . substr( $line, 2 );
1265                                 $last_tag = array_pop( $last_tag_history );
1266
1267                                 if ( !array_pop( $has_opened_tr ) ) {
1268                                         $line = "<tr><td></td></tr>{$line}";
1269                                 }
1270
1271                                 if ( array_pop( $tr_history ) ) {
1272                                         $line = "</tr>{$line}";
1273                                 }
1274
1275                                 if ( array_pop( $td_history ) ) {
1276                                         $line = "</{$last_tag}>{$line}";
1277                                 }
1278                                 array_pop( $tr_attributes );
1279                                 if ( $indent_level > 0 ) {
1280                                         $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1281                                 } else {
1282                                         $outLine = $line;
1283                                 }
1284                         } elseif ( $first_two === '|-' ) {
1285                                 # Now we have a table row
1286                                 $line = preg_replace( '#^\|-+#', '', $line );
1287
1288                                 # Whats after the tag is now only attributes
1289                                 $attributes = $this->mStripState->unstripBoth( $line );
1290                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1291                                 array_pop( $tr_attributes );
1292                                 array_push( $tr_attributes, $attributes );
1293
1294                                 $line = '';
1295                                 $last_tag = array_pop( $last_tag_history );
1296                                 array_pop( $has_opened_tr );
1297                                 array_push( $has_opened_tr, true );
1298
1299                                 if ( array_pop( $tr_history ) ) {
1300                                         $line = '</tr>';
1301                                 }
1302
1303                                 if ( array_pop( $td_history ) ) {
1304                                         $line = "</{$last_tag}>{$line}";
1305                                 }
1306
1307                                 $outLine = $line;
1308                                 array_push( $tr_history, false );
1309                                 array_push( $td_history, false );
1310                                 array_push( $last_tag_history, '' );
1311                         } elseif ( $first_character === '|'
1312                                 || $first_character === '!'
1313                                 || $first_two === '|+'
1314                         ) {
1315                                 # This might be cell elements, td, th or captions
1316                                 if ( $first_two === '|+' ) {
1317                                         $first_character = '+';
1318                                         $line = substr( $line, 2 );
1319                                 } else {
1320                                         $line = substr( $line, 1 );
1321                                 }
1322
1323                                 // Implies both are valid for table headings.
1324                                 if ( $first_character === '!' ) {
1325                                         $line = StringUtils::replaceMarkup( '!!', '||', $line );
1326                                 }
1327
1328                                 # Split up multiple cells on the same line.
1329                                 # FIXME : This can result in improper nesting of tags processed
1330                                 # by earlier parser steps.
1331                                 $cells = explode( '||', $line );
1332
1333                                 $outLine = '';
1334
1335                                 # Loop through each table cell
1336                                 foreach ( $cells as $cell ) {
1337                                         $previous = '';
1338                                         if ( $first_character !== '+' ) {
1339                                                 $tr_after = array_pop( $tr_attributes );
1340                                                 if ( !array_pop( $tr_history ) ) {
1341                                                         $previous = "<tr{$tr_after}>\n";
1342                                                 }
1343                                                 array_push( $tr_history, true );
1344                                                 array_push( $tr_attributes, '' );
1345                                                 array_pop( $has_opened_tr );
1346                                                 array_push( $has_opened_tr, true );
1347                                         }
1348
1349                                         $last_tag = array_pop( $last_tag_history );
1350
1351                                         if ( array_pop( $td_history ) ) {
1352                                                 $previous = "</{$last_tag}>\n{$previous}";
1353                                         }
1354
1355                                         if ( $first_character === '|' ) {
1356                                                 $last_tag = 'td';
1357                                         } elseif ( $first_character === '!' ) {
1358                                                 $last_tag = 'th';
1359                                         } elseif ( $first_character === '+' ) {
1360                                                 $last_tag = 'caption';
1361                                         } else {
1362                                                 $last_tag = '';
1363                                         }
1364
1365                                         array_push( $last_tag_history, $last_tag );
1366
1367                                         # A cell could contain both parameters and data
1368                                         $cell_data = explode( '|', $cell, 2 );
1369
1370                                         # T2553: Note that a '|' inside an invalid link should not
1371                                         # be mistaken as delimiting cell parameters
1372                                         # Bug T153140: Neither should language converter markup.
1373                                         if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1374                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1375                                         } elseif ( count( $cell_data ) == 1 ) {
1376                                                 // Whitespace in cells is trimmed
1377                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1378                                         } else {
1379                                                 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1380                                                 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1381                                                 // Whitespace in cells is trimmed
1382                                                 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1383                                         }
1384
1385                                         $outLine .= $cell;
1386                                         array_push( $td_history, true );
1387                                 }
1388                         }
1389                         $out .= $outLine . "\n";
1390                 }
1391
1392                 # Closing open td, tr && table
1393                 while ( count( $td_history ) > 0 ) {
1394                         if ( array_pop( $td_history ) ) {
1395                                 $out .= "</td>\n";
1396                         }
1397                         if ( array_pop( $tr_history ) ) {
1398                                 $out .= "</tr>\n";
1399                         }
1400                         if ( !array_pop( $has_opened_tr ) ) {
1401                                 $out .= "<tr><td></td></tr>\n";
1402                         }
1403
1404                         $out .= "</table>\n";
1405                 }
1406
1407                 # Remove trailing line-ending (b/c)
1408                 if ( substr( $out, -1 ) === "\n" ) {
1409                         $out = substr( $out, 0, -1 );
1410                 }
1411
1412                 # special case: don't return empty table
1413                 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1414                         $out = '';
1415                 }
1416
1417                 return $out;
1418         }
1419
1420         /**
1421          * Helper function for parse() that transforms wiki markup into half-parsed
1422          * HTML. Only called for $mOutputType == self::OT_HTML.
1423          *
1424          * @private
1425          *
1426          * @param string $text The text to parse
1427          * @param-taint $text escapes_html
1428          * @param bool $isMain Whether this is being called from the main parse() function
1429          * @param PPFrame|bool $frame A pre-processor frame
1430          *
1431          * @return string
1432          */
1433         public function internalParse( $text, $isMain = true, $frame = false ) {
1434                 $origText = $text;
1435
1436                 // Avoid PHP 7.1 warning from passing $this by reference
1437                 $parser = $this;
1438
1439                 # Hook to suspend the parser in this state
1440                 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1441                         return $text;
1442                 }
1443
1444                 # if $frame is provided, then use $frame for replacing any variables
1445                 if ( $frame ) {
1446                         # use frame depth to infer how include/noinclude tags should be handled
1447                         # depth=0 means this is the top-level document; otherwise it's an included document
1448                         if ( !$frame->depth ) {
1449                                 $flag = 0;
1450                         } else {
1451                                 $flag = self::PTD_FOR_INCLUSION;
1452                         }
1453                         $dom = $this->preprocessToDom( $text, $flag );
1454                         $text = $frame->expand( $dom );
1455                 } else {
1456                         # if $frame is not provided, then use old-style replaceVariables
1457                         $text = $this->replaceVariables( $text );
1458                 }
1459
1460                 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1461                 $text = Sanitizer::removeHTMLtags(
1462                         $text,
1463                         [ $this, 'attributeStripCallback' ],
1464                         false,
1465                         array_keys( $this->mTransparentTagHooks ),
1466                         [],
1467                         [ $this, 'addTrackingCategory' ]
1468                 );
1469                 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1470
1471                 # Tables need to come after variable replacement for things to work
1472                 # properly; putting them before other transformations should keep
1473                 # exciting things like link expansions from showing up in surprising
1474                 # places.
1475                 $text = $this->doTableStuff( $text );
1476
1477                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1478
1479                 $text = $this->doDoubleUnderscore( $text );
1480
1481                 $text = $this->doHeadings( $text );
1482                 $text = $this->replaceInternalLinks( $text );
1483                 $text = $this->doAllQuotes( $text );
1484                 $text = $this->replaceExternalLinks( $text );
1485
1486                 # replaceInternalLinks may sometimes leave behind
1487                 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1488                 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1489
1490                 $text = $this->doMagicLinks( $text );
1491                 $text = $this->formatHeadings( $text, $origText, $isMain );
1492
1493                 return $text;
1494         }
1495
1496         /**
1497          * Helper function for parse() that transforms half-parsed HTML into fully
1498          * parsed HTML.
1499          *
1500          * @param string $text
1501          * @param bool $isMain
1502          * @param bool $linestart
1503          * @return string
1504          */
1505         private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1506                 $text = $this->mStripState->unstripGeneral( $text );
1507
1508                 // Avoid PHP 7.1 warning from passing $this by reference
1509                 $parser = $this;
1510
1511                 if ( $isMain ) {
1512                         Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1513                 }
1514
1515                 # Clean up special characters, only run once, next-to-last before doBlockLevels
1516                 $text = Sanitizer::armorFrenchSpaces( $text );
1517
1518                 $text = $this->doBlockLevels( $text, $linestart );
1519
1520                 $this->replaceLinkHolders( $text );
1521
1522                 /**
1523                  * The input doesn't get language converted if
1524                  * a) It's disabled
1525                  * b) Content isn't converted
1526                  * c) It's a conversion table
1527                  * d) it is an interface message (which is in the user language)
1528                  */
1529                 if ( !( $this->mOptions->getDisableContentConversion()
1530                         || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1531                         && !$this->mOptions->getInterfaceMessage()
1532                 ) {
1533                         # The position of the convert() call should not be changed. it
1534                         # assumes that the links are all replaced and the only thing left
1535                         # is the <nowiki> mark.
1536                         $text = $this->getTargetLanguage()->convert( $text );
1537                 }
1538
1539                 $text = $this->mStripState->unstripNoWiki( $text );
1540
1541                 if ( $isMain ) {
1542                         Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1543                 }
1544
1545                 $text = $this->replaceTransparentTags( $text );
1546                 $text = $this->mStripState->unstripGeneral( $text );
1547
1548                 $text = Sanitizer::normalizeCharReferences( $text );
1549
1550                 if ( MWTidy::isEnabled() ) {
1551                         if ( $this->mOptions->getTidy() ) {
1552                                 $text = MWTidy::tidy( $text );
1553                         }
1554                 } else {
1555                         # attempt to sanitize at least some nesting problems
1556                         # (T4702 and quite a few others)
1557                         # This code path is buggy and deprecated!
1558                         wfDeprecated( 'disabling tidy', '1.33' );
1559                         $tidyregs = [
1560                                 # ''Something [http://www.cool.com cool''] -->
1561                                 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1562                                 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1563                                 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1564                                 # fix up an anchor inside another anchor, only
1565                                 # at least for a single single nested link (T5695)
1566                                 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1567                                 '\\1\\2</a>\\3</a>\\1\\4</a>',
1568                                 # fix div inside inline elements- doBlockLevels won't wrap a line which
1569                                 # contains a div, so fix it up here; replace
1570                                 # div with escaped text
1571                                 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1572                                 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1573                                 # remove empty italic or bold tag pairs, some
1574                                 # introduced by rules above
1575                                 '/<([bi])><\/\\1>/' => '',
1576                         ];
1577
1578                         $text = preg_replace(
1579                                 array_keys( $tidyregs ),
1580                                 array_values( $tidyregs ),
1581                                 $text );
1582                 }
1583
1584                 if ( $isMain ) {
1585                         Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1586                 }
1587
1588                 return $text;
1589         }
1590
1591         /**
1592          * Replace special strings like "ISBN xxx" and "RFC xxx" with
1593          * magic external links.
1594          *
1595          * DML
1596          * @private
1597          *
1598          * @param string $text
1599          *
1600          * @return string
1601          */
1602         public function doMagicLinks( $text ) {
1603                 $prots = wfUrlProtocolsWithoutProtRel();
1604                 $urlChar = self::EXT_LINK_URL_CLASS;
1605                 $addr = self::EXT_LINK_ADDR;
1606                 $space = self::SPACE_NOT_NL; #  non-newline space
1607                 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1608                 $spaces = "$space++"; # possessive match of 1 or more spaces
1609                 $text = preg_replace_callback(
1610                         '!(?:                        # Start cases
1611                                 (<a[ \t\r\n>].*?</a>) |    # m[1]: Skip link text
1612                                 (<.*?>) |                  # m[2]: Skip stuff inside HTML elements' . "
1613                                 (\b                        # m[3]: Free external links
1614                                         (?i:$prots)
1615                                         ($addr$urlChar*)         # m[4]: Post-protocol path
1616                                 ) |
1617                                 \b(?:RFC|PMID) $spaces     # m[5]: RFC or PMID, capture number
1618                                         ([0-9]+)\b |
1619                                 \bISBN $spaces (           # m[6]: ISBN, capture number
1620                                         (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1621                                         (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1622                                         [0-9Xx]                  #  check digit
1623                                 )\b
1624                         )!xu", [ $this, 'magicLinkCallback' ], $text );
1625                 return $text;
1626         }
1627
1628         /**
1629          * @throws MWException
1630          * @param array $m
1631          * @return string HTML
1632          */
1633         public function magicLinkCallback( $m ) {
1634                 if ( isset( $m[1] ) && $m[1] !== '' ) {
1635                         # Skip anchor
1636                         return $m[0];
1637                 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1638                         # Skip HTML element
1639                         return $m[0];
1640                 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1641                         # Free external link
1642                         return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1643                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1644                         # RFC or PMID
1645                         if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1646                                 if ( !$this->mOptions->getMagicRFCLinks() ) {
1647                                         return $m[0];
1648                                 }
1649                                 $keyword = 'RFC';
1650                                 $urlmsg = 'rfcurl';
1651                                 $cssClass = 'mw-magiclink-rfc';
1652                                 $trackingCat = 'magiclink-tracking-rfc';
1653                                 $id = $m[5];
1654                         } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1655                                 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1656                                         return $m[0];
1657                                 }
1658                                 $keyword = 'PMID';
1659                                 $urlmsg = 'pubmedurl';
1660                                 $cssClass = 'mw-magiclink-pmid';
1661                                 $trackingCat = 'magiclink-tracking-pmid';
1662                                 $id = $m[5];
1663                         } else {
1664                                 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1665                                         substr( $m[0], 0, 20 ) . '"' );
1666                         }
1667                         $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1668                         $this->addTrackingCategory( $trackingCat );
1669                         return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1670                 } elseif ( isset( $m[6] ) && $m[6] !== ''
1671                         && $this->mOptions->getMagicISBNLinks()
1672                 ) {
1673                         # ISBN
1674                         $isbn = $m[6];
1675                         $space = self::SPACE_NOT_NL; #  non-newline space
1676                         $isbn = preg_replace( "/$space/", ' ', $isbn );
1677                         $num = strtr( $isbn, [
1678                                 '-' => '',
1679                                 ' ' => '',
1680                                 'x' => 'X',
1681                         ] );
1682                         $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1683                         return $this->getLinkRenderer()->makeKnownLink(
1684                                 SpecialPage::getTitleFor( 'Booksources', $num ),
1685                                 "ISBN $isbn",
1686                                 [
1687                                         'class' => 'internal mw-magiclink-isbn',
1688                                         'title' => false // suppress title attribute
1689                                 ]
1690                         );
1691                 } else {
1692                         return $m[0];
1693                 }
1694         }
1695
1696         /**
1697          * Make a free external link, given a user-supplied URL
1698          *
1699          * @param string $url
1700          * @param int $numPostProto
1701          *   The number of characters after the protocol.
1702          * @return string HTML
1703          * @private
1704          */
1705         public function makeFreeExternalLink( $url, $numPostProto ) {
1706                 $trail = '';
1707
1708                 # The characters '<' and '>' (which were escaped by
1709                 # removeHTMLtags()) should not be included in
1710                 # URLs, per RFC 2396.
1711                 # Make &nbsp; terminate a URL as well (bug T84937)
1712                 $m2 = [];
1713                 if ( preg_match(
1714                         '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1715                         $url,
1716                         $m2,
1717                         PREG_OFFSET_CAPTURE
1718                 ) ) {
1719                         $trail = substr( $url, $m2[0][1] ) . $trail;
1720                         $url = substr( $url, 0, $m2[0][1] );
1721                 }
1722
1723                 # Move trailing punctuation to $trail
1724                 $sep = ',;\.:!?';
1725                 # If there is no left bracket, then consider right brackets fair game too
1726                 if ( strpos( $url, '(' ) === false ) {
1727                         $sep .= ')';
1728                 }
1729
1730                 $urlRev = strrev( $url );
1731                 $numSepChars = strspn( $urlRev, $sep );
1732                 # Don't break a trailing HTML entity by moving the ; into $trail
1733                 # This is in hot code, so use substr_compare to avoid having to
1734                 # create a new string object for the comparison
1735                 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1736                         # more optimization: instead of running preg_match with a $
1737                         # anchor, which can be slow, do the match on the reversed
1738                         # string starting at the desired offset.
1739                         # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1740                         if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1741                                 $numSepChars--;
1742                         }
1743                 }
1744                 if ( $numSepChars ) {
1745                         $trail = substr( $url, -$numSepChars ) . $trail;
1746                         $url = substr( $url, 0, -$numSepChars );
1747                 }
1748
1749                 # Verify that we still have a real URL after trail removal, and
1750                 # not just lone protocol
1751                 if ( strlen( $trail ) >= $numPostProto ) {
1752                         return $url . $trail;
1753                 }
1754
1755                 $url = Sanitizer::cleanUrl( $url );
1756
1757                 # Is this an external image?
1758                 $text = $this->maybeMakeExternalImage( $url );
1759                 if ( $text === false ) {
1760                         # Not an image, make a link
1761                         $text = Linker::makeExternalLink( $url,
1762                                 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1763                                 true, 'free',
1764                                 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1765                         # Register it in the output object...
1766                         $this->mOutput->addExternalLink( $url );
1767                 }
1768                 return $text . $trail;
1769         }
1770
1771         /**
1772          * Parse headers and return html
1773          *
1774          * @private
1775          *
1776          * @param string $text
1777          *
1778          * @return string
1779          */
1780         public function doHeadings( $text ) {
1781                 for ( $i = 6; $i >= 1; --$i ) {
1782                         $h = str_repeat( '=', $i );
1783                         // Trim non-newline whitespace from headings
1784                         // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1785                         $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1786                 }
1787                 return $text;
1788         }
1789
1790         /**
1791          * Replace single quotes with HTML markup
1792          * @private
1793          *
1794          * @param string $text
1795          *
1796          * @return string The altered text
1797          */
1798         public function doAllQuotes( $text ) {
1799                 $outtext = '';
1800                 $lines = StringUtils::explode( "\n", $text );
1801                 foreach ( $lines as $line ) {
1802                         $outtext .= $this->doQuotes( $line ) . "\n";
1803                 }
1804                 $outtext = substr( $outtext, 0, -1 );
1805                 return $outtext;
1806         }
1807
1808         /**
1809          * Helper function for doAllQuotes()
1810          *
1811          * @param string $text
1812          *
1813          * @return string
1814          */
1815         public function doQuotes( $text ) {
1816                 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1817                 $countarr = count( $arr );
1818                 if ( $countarr == 1 ) {
1819                         return $text;
1820                 }
1821
1822                 // First, do some preliminary work. This may shift some apostrophes from
1823                 // being mark-up to being text. It also counts the number of occurrences
1824                 // of bold and italics mark-ups.
1825                 $numbold = 0;
1826                 $numitalics = 0;
1827                 for ( $i = 1; $i < $countarr; $i += 2 ) {
1828                         $thislen = strlen( $arr[$i] );
1829                         // If there are ever four apostrophes, assume the first is supposed to
1830                         // be text, and the remaining three constitute mark-up for bold text.
1831                         // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1832                         if ( $thislen == 4 ) {
1833                                 $arr[$i - 1] .= "'";
1834                                 $arr[$i] = "'''";
1835                                 $thislen = 3;
1836                         } elseif ( $thislen > 5 ) {
1837                                 // If there are more than 5 apostrophes in a row, assume they're all
1838                                 // text except for the last 5.
1839                                 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1840                                 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1841                                 $arr[$i] = "'''''";
1842                                 $thislen = 5;
1843                         }
1844                         // Count the number of occurrences of bold and italics mark-ups.
1845                         if ( $thislen == 2 ) {
1846                                 $numitalics++;
1847                         } elseif ( $thislen == 3 ) {
1848                                 $numbold++;
1849                         } elseif ( $thislen == 5 ) {
1850                                 $numitalics++;
1851                                 $numbold++;
1852                         }
1853                 }
1854
1855                 // If there is an odd number of both bold and italics, it is likely
1856                 // that one of the bold ones was meant to be an apostrophe followed
1857                 // by italics. Which one we cannot know for certain, but it is more
1858                 // likely to be one that has a single-letter word before it.
1859                 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1860                         $firstsingleletterword = -1;
1861                         $firstmultiletterword = -1;
1862                         $firstspace = -1;
1863                         for ( $i = 1; $i < $countarr; $i += 2 ) {
1864                                 if ( strlen( $arr[$i] ) == 3 ) {
1865                                         $x1 = substr( $arr[$i - 1], -1 );
1866                                         $x2 = substr( $arr[$i - 1], -2, 1 );
1867                                         if ( $x1 === ' ' ) {
1868                                                 if ( $firstspace == -1 ) {
1869                                                         $firstspace = $i;
1870                                                 }
1871                                         } elseif ( $x2 === ' ' ) {
1872                                                 $firstsingleletterword = $i;
1873                                                 // if $firstsingleletterword is set, we don't
1874                                                 // look at the other options, so we can bail early.
1875                                                 break;
1876                                         } elseif ( $firstmultiletterword == -1 ) {
1877                                                 $firstmultiletterword = $i;
1878                                         }
1879                                 }
1880                         }
1881
1882                         // If there is a single-letter word, use it!
1883                         if ( $firstsingleletterword > -1 ) {
1884                                 $arr[$firstsingleletterword] = "''";
1885                                 $arr[$firstsingleletterword - 1] .= "'";
1886                         } elseif ( $firstmultiletterword > -1 ) {
1887                                 // If not, but there's a multi-letter word, use that one.
1888                                 $arr[$firstmultiletterword] = "''";
1889                                 $arr[$firstmultiletterword - 1] .= "'";
1890                         } elseif ( $firstspace > -1 ) {
1891                                 // ... otherwise use the first one that has neither.
1892                                 // (notice that it is possible for all three to be -1 if, for example,
1893                                 // there is only one pentuple-apostrophe in the line)
1894                                 $arr[$firstspace] = "''";
1895                                 $arr[$firstspace - 1] .= "'";
1896                         }
1897                 }
1898
1899                 // Now let's actually convert our apostrophic mush to HTML!
1900                 $output = '';
1901                 $buffer = '';
1902                 $state = '';
1903                 $i = 0;
1904                 foreach ( $arr as $r ) {
1905                         if ( ( $i % 2 ) == 0 ) {
1906                                 if ( $state === 'both' ) {
1907                                         $buffer .= $r;
1908                                 } else {
1909                                         $output .= $r;
1910                                 }
1911                         } else {
1912                                 $thislen = strlen( $r );
1913                                 if ( $thislen == 2 ) {
1914                                         if ( $state === 'i' ) {
1915                                                 $output .= '</i>';
1916                                                 $state = '';
1917                                         } elseif ( $state === 'bi' ) {
1918                                                 $output .= '</i>';
1919                                                 $state = 'b';
1920                                         } elseif ( $state === 'ib' ) {
1921                                                 $output .= '</b></i><b>';
1922                                                 $state = 'b';
1923                                         } elseif ( $state === 'both' ) {
1924                                                 $output .= '<b><i>' . $buffer . '</i>';
1925                                                 $state = 'b';
1926                                         } else { // $state can be 'b' or ''
1927                                                 $output .= '<i>';
1928                                                 $state .= 'i';
1929                                         }
1930                                 } elseif ( $thislen == 3 ) {
1931                                         if ( $state === 'b' ) {
1932                                                 $output .= '</b>';
1933                                                 $state = '';
1934                                         } elseif ( $state === 'bi' ) {
1935                                                 $output .= '</i></b><i>';
1936                                                 $state = 'i';
1937                                         } elseif ( $state === 'ib' ) {
1938                                                 $output .= '</b>';
1939                                                 $state = 'i';
1940                                         } elseif ( $state === 'both' ) {
1941                                                 $output .= '<i><b>' . $buffer . '</b>';
1942                                                 $state = 'i';
1943                                         } else { // $state can be 'i' or ''
1944                                                 $output .= '<b>';
1945                                                 $state .= 'b';
1946                                         }
1947                                 } elseif ( $thislen == 5 ) {
1948                                         if ( $state === 'b' ) {
1949                                                 $output .= '</b><i>';
1950                                                 $state = 'i';
1951                                         } elseif ( $state === 'i' ) {
1952                                                 $output .= '</i><b>';
1953                                                 $state = 'b';
1954                                         } elseif ( $state === 'bi' ) {
1955                                                 $output .= '</i></b>';
1956                                                 $state = '';
1957                                         } elseif ( $state === 'ib' ) {
1958                                                 $output .= '</b></i>';
1959                                                 $state = '';
1960                                         } elseif ( $state === 'both' ) {
1961                                                 $output .= '<i><b>' . $buffer . '</b></i>';
1962                                                 $state = '';
1963                                         } else { // ($state == '')
1964                                                 $buffer = '';
1965                                                 $state = 'both';
1966                                         }
1967                                 }
1968                         }
1969                         $i++;
1970                 }
1971                 // Now close all remaining tags.  Notice that the order is important.
1972                 if ( $state === 'b' || $state === 'ib' ) {
1973                         $output .= '</b>';
1974                 }
1975                 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1976                         $output .= '</i>';
1977                 }
1978                 if ( $state === 'bi' ) {
1979                         $output .= '</b>';
1980                 }
1981                 // There might be lonely ''''', so make sure we have a buffer
1982                 if ( $state === 'both' && $buffer ) {
1983                         $output .= '<b><i>' . $buffer . '</i></b>';
1984                 }
1985                 return $output;
1986         }
1987
1988         /**
1989          * Replace external links (REL)
1990          *
1991          * Note: this is all very hackish and the order of execution matters a lot.
1992          * Make sure to run tests/parser/parserTests.php if you change this code.
1993          *
1994          * @private
1995          *
1996          * @param string $text
1997          *
1998          * @throws MWException
1999          * @return string
2000          */
2001         public function replaceExternalLinks( $text ) {
2002                 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2003                 if ( $bits === false ) {
2004                         throw new MWException( "PCRE needs to be compiled with "
2005                                 . "--enable-unicode-properties in order for MediaWiki to function" );
2006                 }
2007                 $s = array_shift( $bits );
2008
2009                 $i = 0;
2010                 while ( $i < count( $bits ) ) {
2011                         $url = $bits[$i++];
2012                         $i++; // protocol
2013                         $text = $bits[$i++];
2014                         $trail = $bits[$i++];
2015
2016                         # The characters '<' and '>' (which were escaped by
2017                         # removeHTMLtags()) should not be included in
2018                         # URLs, per RFC 2396.
2019                         $m2 = [];
2020                         if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2021                                 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2022                                 $url = substr( $url, 0, $m2[0][1] );
2023                         }
2024
2025                         # If the link text is an image URL, replace it with an <img> tag
2026                         # This happened by accident in the original parser, but some people used it extensively
2027                         $img = $this->maybeMakeExternalImage( $text );
2028                         if ( $img !== false ) {
2029                                 $text = $img;
2030                         }
2031
2032                         $dtrail = '';
2033
2034                         # Set linktype for CSS
2035                         $linktype = 'text';
2036
2037                         # No link text, e.g. [http://domain.tld/some.link]
2038                         if ( $text == '' ) {
2039                                 # Autonumber
2040                                 $langObj = $this->getTargetLanguage();
2041                                 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2042                                 $linktype = 'autonumber';
2043                         } else {
2044                                 # Have link text, e.g. [http://domain.tld/some.link text]s
2045                                 # Check for trail
2046                                 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2047                         }
2048
2049                         // Excluding protocol-relative URLs may avoid many false positives.
2050                         if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2051                                 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2052                         }
2053
2054                         $url = Sanitizer::cleanUrl( $url );
2055
2056                         # Use the encoded URL
2057                         # This means that users can paste URLs directly into the text
2058                         # Funny characters like ö aren't valid in URLs anyway
2059                         # This was changed in August 2004
2060                         $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2061                                 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2062
2063                         # Register link in the output object.
2064                         $this->mOutput->addExternalLink( $url );
2065                 }
2066
2067                 return $s;
2068         }
2069
2070         /**
2071          * Get the rel attribute for a particular external link.
2072          *
2073          * @since 1.21
2074          * @param string|bool $url Optional URL, to extract the domain from for rel =>
2075          *   nofollow if appropriate
2076          * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2077          * @return string|null Rel attribute for $url
2078          */
2079         public static function getExternalLinkRel( $url = false, $title = null ) {
2080                 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2081                 $ns = $title ? $title->getNamespace() : false;
2082                 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2083                         && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2084                 ) {
2085                         return 'nofollow';
2086                 }
2087                 return null;
2088         }
2089
2090         /**
2091          * Get an associative array of additional HTML attributes appropriate for a
2092          * particular external link.  This currently may include rel => nofollow
2093          * (depending on configuration, namespace, and the URL's domain) and/or a
2094          * target attribute (depending on configuration).
2095          *
2096          * @param string $url URL to extract the domain from for rel =>
2097          *   nofollow if appropriate
2098          * @return array Associative array of HTML attributes
2099          */
2100         public function getExternalLinkAttribs( $url ) {
2101                 $attribs = [];
2102                 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2103
2104                 $target = $this->mOptions->getExternalLinkTarget();
2105                 if ( $target ) {
2106                         $attribs['target'] = $target;
2107                         if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2108                                 // T133507. New windows can navigate parent cross-origin.
2109                                 // Including noreferrer due to lacking browser
2110                                 // support of noopener. Eventually noreferrer should be removed.
2111                                 if ( $rel !== '' ) {
2112                                         $rel .= ' ';
2113                                 }
2114                                 $rel .= 'noreferrer noopener';
2115                         }
2116                 }
2117                 $attribs['rel'] = $rel;
2118                 return $attribs;
2119         }
2120
2121         /**
2122          * Replace unusual escape codes in a URL with their equivalent characters
2123          *
2124          * This generally follows the syntax defined in RFC 3986, with special
2125          * consideration for HTTP query strings.
2126          *
2127          * @param string $url
2128          * @return string
2129          */
2130         public static function normalizeLinkUrl( $url ) {
2131                 # Test for RFC 3986 IPv6 syntax
2132                 $scheme = '[a-z][a-z0-9+.-]*:';
2133                 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2134                 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2135                 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2136                         IP::isValid( rawurldecode( $m[1] ) )
2137                 ) {
2138                         $isIPv6 = rawurldecode( $m[1] );
2139                 } else {
2140                         $isIPv6 = false;
2141                 }
2142
2143                 # Make sure unsafe characters are encoded
2144                 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2145                         function ( $m ) {
2146                                 return rawurlencode( $m[0] );
2147                         },
2148                         $url
2149                 );
2150
2151                 $ret = '';
2152                 $end = strlen( $url );
2153
2154                 # Fragment part - 'fragment'
2155                 $start = strpos( $url, '#' );
2156                 if ( $start !== false && $start < $end ) {
2157                         $ret = self::normalizeUrlComponent(
2158                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2159                         $end = $start;
2160                 }
2161
2162                 # Query part - 'query' minus &=+;
2163                 $start = strpos( $url, '?' );
2164                 if ( $start !== false && $start < $end ) {
2165                         $ret = self::normalizeUrlComponent(
2166                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2167                         $end = $start;
2168                 }
2169
2170                 # Scheme and path part - 'pchar'
2171                 # (we assume no userinfo or encoded colons in the host)
2172                 $ret = self::normalizeUrlComponent(
2173                         substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2174
2175                 # Fix IPv6 syntax
2176                 if ( $isIPv6 !== false ) {
2177                         $ipv6Host = "%5B({$isIPv6})%5D";
2178                         $ret = preg_replace(
2179                                 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2180                                 "$1[$2]",
2181                                 $ret
2182                         );
2183                 }
2184
2185                 return $ret;
2186         }
2187
2188         private static function normalizeUrlComponent( $component, $unsafe ) {
2189                 $callback = function ( $matches ) use ( $unsafe ) {
2190                         $char = urldecode( $matches[0] );
2191                         $ord = ord( $char );
2192                         if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2193                                 # Unescape it
2194                                 return $char;
2195                         } else {
2196                                 # Leave it escaped, but use uppercase for a-f
2197                                 return strtoupper( $matches[0] );
2198                         }
2199                 };
2200                 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2201         }
2202
2203         /**
2204          * make an image if it's allowed, either through the global
2205          * option, through the exception, or through the on-wiki whitelist
2206          *
2207          * @param string $url
2208          *
2209          * @return string
2210          */
2211         private function maybeMakeExternalImage( $url ) {
2212                 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2213                 $imagesexception = !empty( $imagesfrom );
2214                 $text = false;
2215                 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2216                 if ( $imagesexception && is_array( $imagesfrom ) ) {
2217                         $imagematch = false;
2218                         foreach ( $imagesfrom as $match ) {
2219                                 if ( strpos( $url, $match ) === 0 ) {
2220                                         $imagematch = true;
2221                                         break;
2222                                 }
2223                         }
2224                 } elseif ( $imagesexception ) {
2225                         $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2226                 } else {
2227                         $imagematch = false;
2228                 }
2229
2230                 if ( $this->mOptions->getAllowExternalImages()
2231                         || ( $imagesexception && $imagematch )
2232                 ) {
2233                         if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2234                                 # Image found
2235                                 $text = Linker::makeExternalImage( $url );
2236                         }
2237                 }
2238                 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2239                         && preg_match( self::EXT_IMAGE_REGEX, $url )
2240                 ) {
2241                         $whitelist = explode(
2242                                 "\n",
2243                                 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2244                         );
2245
2246                         foreach ( $whitelist as $entry ) {
2247                                 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2248                                 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2249                                         continue;
2250                                 }
2251                                 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2252                                         # Image matches a whitelist entry
2253                                         $text = Linker::makeExternalImage( $url );
2254                                         break;
2255                                 }
2256                         }
2257                 }
2258                 return $text;
2259         }
2260
2261         /**
2262          * Process [[ ]] wikilinks
2263          *
2264          * @param string $s
2265          *
2266          * @return string Processed text
2267          *
2268          * @private
2269          */
2270         public function replaceInternalLinks( $s ) {
2271                 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2272                 return $s;
2273         }
2274
2275         /**
2276          * Process [[ ]] wikilinks (RIL)
2277          * @param string &$s
2278          * @throws MWException
2279          * @return LinkHolderArray
2280          *
2281          * @private
2282          */
2283         public function replaceInternalLinks2( &$s ) {
2284                 static $tc = false, $e1, $e1_img;
2285                 # the % is needed to support urlencoded titles as well
2286                 if ( !$tc ) {
2287                         $tc = Title::legalChars() . '#%';
2288                         # Match a link having the form [[namespace:link|alternate]]trail
2289                         $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2290                         # Match cases where there is no "]]", which might still be images
2291                         $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2292                 }
2293
2294                 $holders = new LinkHolderArray( $this );
2295
2296                 # split the entire text string on occurrences of [[
2297                 $a = StringUtils::explode( '[[', ' ' . $s );
2298                 # get the first element (all text up to first [[), and remove the space we added
2299                 $s = $a->current();
2300                 $a->next();
2301                 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2302                 $s = substr( $s, 1 );
2303
2304                 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2305                 $e2 = null;
2306                 if ( $useLinkPrefixExtension ) {
2307                         # Match the end of a line for a word that's not followed by whitespace,
2308                         # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2309                         $charset = $this->contLang->linkPrefixCharset();
2310                         $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2311                 }
2312
2313                 if ( is_null( $this->mTitle ) ) {
2314                         throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2315                 }
2316                 $nottalk = !$this->mTitle->isTalkPage();
2317
2318                 if ( $useLinkPrefixExtension ) {
2319                         $m = [];
2320                         if ( preg_match( $e2, $s, $m ) ) {
2321                                 $first_prefix = $m[2];
2322                         } else {
2323                                 $first_prefix = false;
2324                         }
2325                 } else {
2326                         $prefix = '';
2327                 }
2328
2329                 $useSubpages = $this->areSubpagesAllowed();
2330
2331                 # Loop for each link
2332                 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2333                         # Check for excessive memory usage
2334                         if ( $holders->isBig() ) {
2335                                 # Too big
2336                                 # Do the existence check, replace the link holders and clear the array
2337                                 $holders->replace( $s );
2338                                 $holders->clear();
2339                         }
2340
2341                         if ( $useLinkPrefixExtension ) {
2342                                 if ( preg_match( $e2, $s, $m ) ) {
2343                                         list( , $s, $prefix ) = $m;
2344                                 } else {
2345                                         $prefix = '';
2346                                 }
2347                                 # first link
2348                                 if ( $first_prefix ) {
2349                                         $prefix = $first_prefix;
2350                                         $first_prefix = false;
2351                                 }
2352                         }
2353
2354                         $might_be_img = false;
2355
2356                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2357                                 $text = $m[2];
2358                                 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2359                                 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2360                                 # the real problem is with the $e1 regex
2361                                 # See T1500.
2362                                 # Still some problems for cases where the ] is meant to be outside punctuation,
2363                                 # and no image is in sight. See T4095.
2364                                 if ( $text !== ''
2365                                         && substr( $m[3], 0, 1 ) === ']'
2366                                         && strpos( $text, '[' ) !== false
2367                                 ) {
2368                                         $text .= ']'; # so that replaceExternalLinks($text) works later
2369                                         $m[3] = substr( $m[3], 1 );
2370                                 }
2371                                 # fix up urlencoded title texts
2372                                 if ( strpos( $m[1], '%' ) !== false ) {
2373                                         # Should anchors '#' also be rejected?
2374                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2375                                 }
2376                                 $trail = $m[3];
2377                         } elseif ( preg_match( $e1_img, $line, $m ) ) {
2378                                 # Invalid, but might be an image with a link in its caption
2379                                 $might_be_img = true;
2380                                 $text = $m[2];
2381                                 if ( strpos( $m[1], '%' ) !== false ) {
2382                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2383                                 }
2384                                 $trail = "";
2385                         } else { # Invalid form; output directly
2386                                 $s .= $prefix . '[[' . $line;
2387                                 continue;
2388                         }
2389
2390                         $origLink = ltrim( $m[1], ' ' );
2391
2392                         # Don't allow internal links to pages containing
2393                         # PROTO: where PROTO is a valid URL protocol; these
2394                         # should be external links.
2395                         if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2396                                 $s .= $prefix . '[[' . $line;
2397                                 continue;
2398                         }
2399
2400                         # Make subpage if necessary
2401                         if ( $useSubpages ) {
2402                                 $link = $this->maybeDoSubpageLink( $origLink, $text );
2403                         } else {
2404                                 $link = $origLink;
2405                         }
2406
2407                         // \x7f isn't a default legal title char, so most likely strip
2408                         // markers will force us into the "invalid form" path above.  But,
2409                         // just in case, let's assert that xmlish tags aren't valid in
2410                         // the title position.
2411                         $unstrip = $this->mStripState->killMarkers( $link );
2412                         $noMarkers = ( $unstrip === $link );
2413
2414                         $nt = $noMarkers ? Title::newFromText( $link ) : null;
2415                         if ( $nt === null ) {
2416                                 $s .= $prefix . '[[' . $line;
2417                                 continue;
2418                         }
2419
2420                         $ns = $nt->getNamespace();
2421                         $iw = $nt->getInterwiki();
2422
2423                         $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2424
2425                         if ( $might_be_img ) { # if this is actually an invalid link
2426                                 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2427                                         $found = false;
2428                                         while ( true ) {
2429                                                 # look at the next 'line' to see if we can close it there
2430                                                 $a->next();
2431                                                 $next_line = $a->current();
2432                                                 if ( $next_line === false || $next_line === null ) {
2433                                                         break;
2434                                                 }
2435                                                 $m = explode( ']]', $next_line, 3 );
2436                                                 if ( count( $m ) == 3 ) {
2437                                                         # the first ]] closes the inner link, the second the image
2438                                                         $found = true;
2439                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2440                                                         $trail = $m[2];
2441                                                         break;
2442                                                 } elseif ( count( $m ) == 2 ) {
2443                                                         # if there's exactly one ]] that's fine, we'll keep looking
2444                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2445                                                 } else {
2446                                                         # if $next_line is invalid too, we need look no further
2447                                                         $text .= '[[' . $next_line;
2448                                                         break;
2449                                                 }
2450                                         }
2451                                         if ( !$found ) {
2452                                                 # we couldn't find the end of this imageLink, so output it raw
2453                                                 # but don't ignore what might be perfectly normal links in the text we've examined
2454                                                 $holders->merge( $this->replaceInternalLinks2( $text ) );
2455                                                 $s .= "{$prefix}[[$link|$text";
2456                                                 # note: no $trail, because without an end, there *is* no trail
2457                                                 continue;
2458                                         }
2459                                 } else { # it's not an image, so output it raw
2460                                         $s .= "{$prefix}[[$link|$text";
2461                                         # note: no $trail, because without an end, there *is* no trail
2462                                         continue;
2463                                 }
2464                         }
2465
2466                         $wasblank = ( $text == '' );
2467                         if ( $wasblank ) {
2468                                 $text = $link;
2469                                 if ( !$noforce ) {
2470                                         # Strip off leading ':'
2471                                         $text = substr( $text, 1 );
2472                                 }
2473                         } else {
2474                                 # T6598 madness. Handle the quotes only if they come from the alternate part
2475                                 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2476                                 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2477                                 #    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2478                                 $text = $this->doQuotes( $text );
2479                         }
2480
2481                         # Link not escaped by : , create the various objects
2482                         if ( $noforce && !$nt->wasLocalInterwiki() ) {
2483                                 # Interwikis
2484                                 if (
2485                                         $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2486                                                 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2487                                                 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2488                                         )
2489                                 ) {
2490                                         # T26502: filter duplicates
2491                                         if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2492                                                 $this->mLangLinkLanguages[$iw] = true;
2493                                                 $this->mOutput->addLanguageLink( $nt->getFullText() );
2494                                         }
2495
2496                                         /**
2497                                          * Strip the whitespace interwiki links produce, see T10897
2498                                          */
2499                                         $s = rtrim( $s . $prefix ) . $trail; # T175416
2500                                         continue;
2501                                 }
2502
2503                                 if ( $ns == NS_FILE ) {
2504                                         if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2505                                                 if ( $wasblank ) {
2506                                                         # if no parameters were passed, $text
2507                                                         # becomes something like "File:Foo.png",
2508                                                         # which we don't want to pass on to the
2509                                                         # image generator
2510                                                         $text = '';
2511                                                 } else {
2512                                                         # recursively parse links inside the image caption
2513                                                         # actually, this will parse them in any other parameters, too,
2514                                                         # but it might be hard to fix that, and it doesn't matter ATM
2515                                                         $text = $this->replaceExternalLinks( $text );
2516                                                         $holders->merge( $this->replaceInternalLinks2( $text ) );
2517                                                 }
2518                                                 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2519                                                 $s .= $prefix . $this->armorLinks(
2520                                                         $this->makeImage( $nt, $text, $holders ) ) . $trail;
2521                                                 continue;
2522                                         }
2523                                 } elseif ( $ns == NS_CATEGORY ) {
2524                                         /**
2525                                          * Strip the whitespace Category links produce, see T2087
2526                                          */
2527                                         $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2528
2529                                         if ( $wasblank ) {
2530                                                 $sortkey = $this->getDefaultSort();
2531                                         } else {
2532                                                 $sortkey = $text;
2533                                         }
2534                                         $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2535                                         $sortkey = str_replace( "\n", '', $sortkey );
2536                                         $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2537                                         $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2538
2539                                         continue;
2540                                 }
2541                         }
2542
2543                         # Self-link checking. For some languages, variants of the title are checked in
2544                         # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2545                         # for linking to a different variant.
2546                         if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2547                                 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2548                                 continue;
2549                         }
2550
2551                         # NS_MEDIA is a pseudo-namespace for linking directly to a file
2552                         # @todo FIXME: Should do batch file existence checks, see comment below
2553                         if ( $ns == NS_MEDIA ) {
2554                                 # Give extensions a chance to select the file revision for us
2555                                 $options = [];
2556                                 $descQuery = false;
2557                                 Hooks::run( 'BeforeParserFetchFileAndTitle',
2558                                         [ $this, $nt, &$options, &$descQuery ] );
2559                                 # Fetch and register the file (file title may be different via hooks)
2560                                 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2561                                 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2562                                 $s .= $prefix . $this->armorLinks(
2563                                         Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2564                                 continue;
2565                         }
2566
2567                         # Some titles, such as valid special pages or files in foreign repos, should
2568                         # be shown as bluelinks even though they're not included in the page table
2569                         # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2570                         # batch file existence checks for NS_FILE and NS_MEDIA
2571                         if ( $iw == '' && $nt->isAlwaysKnown() ) {
2572                                 $this->mOutput->addLink( $nt );
2573                                 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2574                         } else {
2575                                 # Links will be added to the output link list after checking
2576                                 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2577                         }
2578                 }
2579                 return $holders;
2580         }
2581
2582         /**
2583          * Render a forced-blue link inline; protect against double expansion of
2584          * URLs if we're in a mode that prepends full URL prefixes to internal links.
2585          * Since this little disaster has to split off the trail text to avoid
2586          * breaking URLs in the following text without breaking trails on the
2587          * wiki links, it's been made into a horrible function.
2588          *
2589          * @param Title $nt
2590          * @param string $text
2591          * @param string $trail
2592          * @param string $prefix
2593          * @return string HTML-wikitext mix oh yuck
2594          */
2595         protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2596                 list( $inside, $trail ) = Linker::splitTrail( $trail );
2597
2598                 if ( $text == '' ) {
2599                         $text = htmlspecialchars( $nt->getPrefixedText() );
2600                 }
2601
2602                 $link = $this->getLinkRenderer()->makeKnownLink(
2603                         $nt, new HtmlArmor( "$prefix$text$inside" )
2604                 );
2605
2606                 return $this->armorLinks( $link ) . $trail;
2607         }
2608
2609         /**
2610          * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2611          * going to go through further parsing steps before inline URL expansion.
2612          *
2613          * Not needed quite as much as it used to be since free links are a bit
2614          * more sensible these days. But bracketed links are still an issue.
2615          *
2616          * @param string $text More-or-less HTML
2617          * @return string Less-or-more HTML with NOPARSE bits
2618          */
2619         public function armorLinks( $text ) {
2620                 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2621                         self::MARKER_PREFIX . "NOPARSE$1", $text );
2622         }
2623
2624         /**
2625          * Return true if subpage links should be expanded on this page.
2626          * @return bool
2627          */
2628         public function areSubpagesAllowed() {
2629                 # Some namespaces don't allow subpages
2630                 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2631         }
2632
2633         /**
2634          * Handle link to subpage if necessary
2635          *
2636          * @param string $target The source of the link
2637          * @param string &$text The link text, modified as necessary
2638          * @return string The full name of the link
2639          * @private
2640          */
2641         public function maybeDoSubpageLink( $target, &$text ) {
2642                 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2643         }
2644
2645         /**
2646          * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2647          *
2648          * @param string $text
2649          * @param bool $linestart Whether or not this is at the start of a line.
2650          * @private
2651          * @return string The lists rendered as HTML
2652          */
2653         public function doBlockLevels( $text, $linestart ) {
2654                 return BlockLevelPass::doBlockLevels( $text, $linestart );
2655         }
2656
2657         /**
2658          * Return value of a magic variable (like PAGENAME)
2659          *
2660          * @private
2661          *
2662          * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2663          * @param bool|PPFrame $frame
2664          *
2665          * @throws MWException
2666          * @return string
2667          */
2668         public function getVariableValue( $index, $frame = false ) {
2669                 if ( is_null( $this->mTitle ) ) {
2670                         // If no title set, bad things are going to happen
2671                         // later. Title should always be set since this
2672                         // should only be called in the middle of a parse
2673                         // operation (but the unit-tests do funky stuff)
2674                         throw new MWException( __METHOD__ . ' Should only be '
2675                                 . ' called while parsing (no title set)' );
2676                 }
2677
2678                 // Avoid PHP 7.1 warning from passing $this by reference
2679                 $parser = $this;
2680
2681                 /**
2682                  * Some of these require message or data lookups and can be
2683                  * expensive to check many times.
2684                  */
2685                 if (
2686                         Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2687                         isset( $this->mVarCache[$index] )
2688                 ) {
2689                         return $this->mVarCache[$index];
2690                 }
2691
2692                 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2693                 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2694
2695                 $pageLang = $this->getFunctionLang();
2696
2697                 switch ( $index ) {
2698                         case '!':
2699                                 $value = '|';
2700                                 break;
2701                         case 'currentmonth':
2702                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2703                                 break;
2704                         case 'currentmonth1':
2705                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2706                                 break;
2707                         case 'currentmonthname':
2708                                 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2709                                 break;
2710                         case 'currentmonthnamegen':
2711                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2712                                 break;
2713                         case 'currentmonthabbrev':
2714                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2715                                 break;
2716                         case 'currentday':
2717                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2718                                 break;
2719                         case 'currentday2':
2720                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2721                                 break;
2722                         case 'localmonth':
2723                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2724                                 break;
2725                         case 'localmonth1':
2726                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2727                                 break;
2728                         case 'localmonthname':
2729                                 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2730                                 break;
2731                         case 'localmonthnamegen':
2732                                 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2733                                 break;
2734                         case 'localmonthabbrev':
2735                                 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2736                                 break;
2737                         case 'localday':
2738                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2739                                 break;
2740                         case 'localday2':
2741                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2742                                 break;
2743                         case 'pagename':
2744                                 $value = wfEscapeWikiText( $this->mTitle->getText() );
2745                                 break;
2746                         case 'pagenamee':
2747                                 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2748                                 break;
2749                         case 'fullpagename':
2750                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2751                                 break;
2752                         case 'fullpagenamee':
2753                                 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2754                                 break;
2755                         case 'subpagename':
2756                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2757                                 break;
2758                         case 'subpagenamee':
2759                                 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2760                                 break;
2761                         case 'rootpagename':
2762                                 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2763                                 break;
2764                         case 'rootpagenamee':
2765                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2766                                         ' ',
2767                                         '_',
2768                                         $this->mTitle->getRootText()
2769                                 ) ) );
2770                                 break;
2771                         case 'basepagename':
2772                                 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2773                                 break;
2774                         case 'basepagenamee':
2775                                 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2776                                         ' ',
2777                                         '_',
2778                                         $this->mTitle->getBaseText()
2779                                 ) ) );
2780                                 break;
2781                         case 'talkpagename':
2782                                 if ( $this->mTitle->canHaveTalkPage() ) {
2783                                         $talkPage = $this->mTitle->getTalkPage();
2784                                         $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2785                                 } else {
2786                                         $value = '';
2787                                 }
2788                                 break;
2789                         case 'talkpagenamee':
2790                                 if ( $this->mTitle->canHaveTalkPage() ) {
2791                                         $talkPage = $this->mTitle->getTalkPage();
2792                                         $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2793                                 } else {
2794                                         $value = '';
2795                                 }
2796                                 break;
2797                         case 'subjectpagename':
2798                                 $subjPage = $this->mTitle->getSubjectPage();
2799                                 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2800                                 break;
2801                         case 'subjectpagenamee':
2802                                 $subjPage = $this->mTitle->getSubjectPage();
2803                                 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2804                                 break;
2805                         case 'pageid': // requested in T25427
2806                                 # Inform the edit saving system that getting the canonical output
2807                                 # after page insertion requires a parse that used that exact page ID
2808                                 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2809                                 $value = $this->mTitle->getArticleID();
2810                                 if ( !$value ) {
2811                                         $value = $this->mOptions->getSpeculativePageId();
2812                                         if ( $value ) {
2813                                                 $this->mOutput->setSpeculativePageIdUsed( $value );
2814                                         }
2815                                 }
2816                                 break;
2817                         case 'revisionid':
2818                                 if (
2819                                         $this->svcOptions->get( 'MiserMode' ) &&
2820                                         !$this->mOptions->getInterfaceMessage() &&
2821                                         // @TODO: disallow this word on all namespaces
2822                                         $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2823                                 ) {
2824                                         // Use a stub result instead of the actual revision ID in order to avoid
2825                                         // double parses on page save but still allow preview detection (T137900)
2826                                         if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2827                                                 $value = '-';
2828                                         } else {
2829                                                 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2830                                                 $value = '';
2831                                         }
2832                                 } else {
2833                                         # Inform the edit saving system that getting the canonical output after
2834                                         # revision insertion requires a parse that used that exact revision ID
2835                                         $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2836                                         $value = $this->getRevisionId();
2837                                         if ( $value === 0 ) {
2838                                                 $rev = $this->getRevisionObject();
2839                                                 $value = $rev ? $rev->getId() : $value;
2840                                         }
2841                                         if ( !$value ) {
2842                                                 $value = $this->mOptions->getSpeculativeRevId();
2843                                                 if ( $value ) {
2844                                                         $this->mOutput->setSpeculativeRevIdUsed( $value );
2845                                                 }
2846                                         }
2847                                 }
2848                                 break;
2849                         case 'revisionday':
2850                                 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2851                                 break;
2852                         case 'revisionday2':
2853                                 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2854                                 break;
2855                         case 'revisionmonth':
2856                                 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2857                                 break;
2858                         case 'revisionmonth1':
2859                                 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2860                                 break;
2861                         case 'revisionyear':
2862                                 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2863                                 break;
2864                         case 'revisiontimestamp':
2865                                 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2866                                 break;
2867                         case 'revisionuser':
2868                                 # Inform the edit saving system that getting the canonical output after
2869                                 # revision insertion requires a parse that used the actual user ID
2870                                 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2871                                 $value = $this->getRevisionUser();
2872                                 break;
2873                         case 'revisionsize':
2874                                 $value = $this->getRevisionSize();
2875                                 break;
2876                         case 'namespace':
2877                                 $value = str_replace( '_', ' ',
2878                                         $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2879                                 break;
2880                         case 'namespacee':
2881                                 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2882                                 break;
2883                         case 'namespacenumber':
2884                                 $value = $this->mTitle->getNamespace();
2885                                 break;
2886                         case 'talkspace':
2887                                 $value = $this->mTitle->canHaveTalkPage()
2888                                         ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2889                                         : '';
2890                                 break;
2891                         case 'talkspacee':
2892                                 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2893                                 break;
2894                         case 'subjectspace':
2895                                 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2896                                 break;
2897                         case 'subjectspacee':
2898                                 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2899                                 break;
2900                         case 'currentdayname':
2901                                 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2902                                 break;
2903                         case 'currentyear':
2904                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2905                                 break;
2906                         case 'currenttime':
2907                                 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2908                                 break;
2909                         case 'currenthour':
2910                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2911                                 break;
2912                         case 'currentweek':
2913                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2914                                 # int to remove the padding
2915                                 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2916                                 break;
2917                         case 'currentdow':
2918                                 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2919                                 break;
2920                         case 'localdayname':
2921                                 $value = $pageLang->getWeekdayName(
2922                                         (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2923                                 );
2924                                 break;
2925                         case 'localyear':
2926                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2927                                 break;
2928                         case 'localtime':
2929                                 $value = $pageLang->time(
2930                                         MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2931                                         false,
2932                                         false
2933                                 );
2934                                 break;
2935                         case 'localhour':
2936                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2937                                 break;
2938                         case 'localweek':
2939                                 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2940                                 # int to remove the padding
2941                                 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2942                                 break;
2943                         case 'localdow':
2944                                 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2945                                 break;
2946                         case 'numberofarticles':
2947                                 $value = $pageLang->formatNum( SiteStats::articles() );
2948                                 break;
2949                         case 'numberoffiles':
2950                                 $value = $pageLang->formatNum( SiteStats::images() );
2951                                 break;
2952                         case 'numberofusers':
2953                                 $value = $pageLang->formatNum( SiteStats::users() );
2954                                 break;
2955                         case 'numberofactiveusers':
2956                                 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2957                                 break;
2958                         case 'numberofpages':
2959                                 $value = $pageLang->formatNum( SiteStats::pages() );
2960                                 break;
2961                         case 'numberofadmins':
2962                                 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2963                                 break;
2964                         case 'numberofedits':
2965                                 $value = $pageLang->formatNum( SiteStats::edits() );
2966                                 break;
2967                         case 'currenttimestamp':
2968                                 $value = wfTimestamp( TS_MW, $ts );
2969                                 break;
2970                         case 'localtimestamp':
2971                                 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2972                                 break;
2973                         case 'currentversion':
2974                                 $value = SpecialVersion::getVersion();
2975                                 break;
2976                         case 'articlepath':
2977                                 return $this->svcOptions->get( 'ArticlePath' );
2978                         case 'sitename':
2979                                 return $this->svcOptions->get( 'Sitename' );
2980                         case 'server':
2981                                 return $this->svcOptions->get( 'Server' );
2982                         case 'servername':
2983                                 return $this->svcOptions->get( 'ServerName' );
2984                         case 'scriptpath':
2985                                 return $this->svcOptions->get( 'ScriptPath' );
2986                         case 'stylepath':
2987                                 return $this->svcOptions->get( 'StylePath' );
2988                         case 'directionmark':
2989                                 return $pageLang->getDirMark();
2990                         case 'contentlanguage':
2991                                 return $this->svcOptions->get( 'LanguageCode' );
2992                         case 'pagelanguage':
2993                                 $value = $pageLang->getCode();
2994                                 break;
2995                         case 'cascadingsources':
2996                                 $value = CoreParserFunctions::cascadingsources( $this );
2997                                 break;
2998                         default:
2999                                 $ret = null;
3000                                 Hooks::run(
3001                                         'ParserGetVariableValueSwitch',
3002                                         [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3003                                 );
3004
3005                                 return $ret;
3006                 }
3007
3008                 if ( $index ) {
3009                         $this->mVarCache[$index] = $value;
3010                 }
3011
3012                 return $value;
3013         }
3014
3015         /**
3016          * @param int $start
3017          * @param int $len
3018          * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3019          * @param string $variable Parser variable name
3020          * @return string
3021          */
3022         private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3023                 # Get the timezone-adjusted timestamp to be used for this revision
3024                 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3025                 # Possibly set vary-revision if there is not yet an associated revision
3026                 if ( !$this->getRevisionObject() ) {
3027                         # Get the timezone-adjusted timestamp $mtts seconds in the future.
3028                         # This future is relative to the current time and not that of the
3029                         # parser options. The rendered timestamp can be compared to that
3030                         # of the timestamp specified by the parser options.
3031                         $resThen = substr(
3032                                 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3033                                 $start,
3034                                 $len
3035                         );
3036
3037                         if ( $resNow !== $resThen ) {
3038                                 # Inform the edit saving system that getting the canonical output after
3039                                 # revision insertion requires a parse that used an actual revision timestamp
3040                                 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3041                         }
3042                 }
3043
3044                 return $resNow;
3045         }
3046
3047         /**
3048          * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3049          *
3050          * @private
3051          */
3052         public function initialiseVariables() {
3053                 $variableIDs = $this->magicWordFactory->getVariableIDs();
3054                 $substIDs = $this->magicWordFactory->getSubstIDs();
3055
3056                 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3057                 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3058         }
3059
3060         /**
3061          * Preprocess some wikitext and return the document tree.
3062          * This is the ghost of replace_variables().
3063          *
3064          * @param string $text The text to parse
3065          * @param int $flags Bitwise combination of:
3066          *   - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3067          *     included. Default is to assume a direct page view.
3068          *
3069          * The generated DOM tree must depend only on the input text and the flags.
3070          * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3071          *
3072          * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3073          * change in the DOM tree for a given text, must be passed through the section identifier
3074          * in the section edit link and thus back to extractSections().
3075          *
3076          * The output of this function is currently only cached in process memory, but a persistent
3077          * cache may be implemented at a later date which takes further advantage of these strict
3078          * dependency requirements.
3079          *
3080          * @return PPNode
3081          */
3082         public function preprocessToDom( $text, $flags = 0 ) {
3083                 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3084                 return $dom;
3085         }
3086
3087         /**
3088          * Return a three-element array: leading whitespace, string contents, trailing whitespace
3089          *
3090          * @param string $s
3091          *
3092          * @return array
3093          */
3094         public static function splitWhitespace( $s ) {
3095                 $ltrimmed = ltrim( $s );
3096                 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3097                 $trimmed = rtrim( $ltrimmed );
3098                 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3099                 if ( $diff > 0 ) {
3100                         $w2 = substr( $ltrimmed, -$diff );
3101                 } else {
3102                         $w2 = '';
3103                 }
3104                 return [ $w1, $trimmed, $w2 ];
3105         }
3106
3107         /**
3108          * Replace magic variables, templates, and template arguments
3109          * with the appropriate text. Templates are substituted recursively,
3110          * taking care to avoid infinite loops.
3111          *
3112          * Note that the substitution depends on value of $mOutputType:
3113          *  self::OT_WIKI: only {{subst:}} templates
3114          *  self::OT_PREPROCESS: templates but not extension tags
3115          *  self::OT_HTML: all templates and extension tags
3116          *
3117          * @param string $text The text to transform
3118          * @param false|PPFrame|array $frame Object describing the arguments passed to the
3119          *   template. Arguments may also be provided as an associative array, as
3120          *   was the usual case before MW1.12. Providing arguments this way may be
3121          *   useful for extensions wishing to perform variable replacement
3122          *   explicitly.
3123          * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3124          *   double-brace expansion.
3125          * @return string
3126          */
3127         public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3128                 # Is there any text? Also, Prevent too big inclusions!
3129                 $textSize = strlen( $text );
3130                 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3131                         return $text;
3132                 }
3133
3134                 if ( $frame === false ) {
3135                         $frame = $this->getPreprocessor()->newFrame();
3136                 } elseif ( !( $frame instanceof PPFrame ) ) {
3137                         $this->logger->debug(
3138                                 __METHOD__ . " called using plain parameters instead of " .
3139                                 "a PPFrame instance. Creating custom frame."
3140                         );
3141                         $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3142                 }
3143
3144                 $dom = $this->preprocessToDom( $text );
3145                 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3146                 $text = $frame->expand( $dom, $flags );
3147
3148                 return $text;
3149         }
3150
3151         /**
3152          * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3153          *
3154          * @param array $args
3155          *
3156          * @return array
3157          */
3158         public static function createAssocArgs( $args ) {
3159                 $assocArgs = [];
3160                 $index = 1;
3161                 foreach ( $args as $arg ) {
3162                         $eqpos = strpos( $arg, '=' );
3163                         if ( $eqpos === false ) {
3164                                 $assocArgs[$index++] = $arg;
3165                         } else {
3166                                 $name = trim( substr( $arg, 0, $eqpos ) );
3167                                 $value = trim( substr( $arg, $eqpos + 1 ) );
3168                                 if ( $value === false ) {
3169                                         $value = '';
3170                                 }
3171                                 if ( $name !== false ) {
3172                                         $assocArgs[$name] = $value;
3173                                 }
3174                         }
3175                 }
3176
3177                 return $assocArgs;
3178         }
3179
3180         /**
3181          * Warn the user when a parser limitation is reached
3182          * Will warn at most once the user per limitation type
3183          *
3184          * The results are shown during preview and run through the Parser (See EditPage.php)
3185          *
3186          * @param string $limitationType Should be one of:
3187          *   'expensive-parserfunction' (corresponding messages:
3188          *       'expensive-parserfunction-warning',
3189          *       'expensive-parserfunction-category')
3190          *   'post-expand-template-argument' (corresponding messages:
3191          *       'post-expand-template-argument-warning',
3192          *       'post-expand-template-argument-category')
3193          *   'post-expand-template-inclusion' (corresponding messages:
3194          *       'post-expand-template-inclusion-warning',
3195          *       'post-expand-template-inclusion-category')
3196          *   'node-count-exceeded' (corresponding messages:
3197          *       'node-count-exceeded-warning',
3198          *       'node-count-exceeded-category')
3199          *   'expansion-depth-exceeded' (corresponding messages:
3200          *       'expansion-depth-exceeded-warning',
3201          *       'expansion-depth-exceeded-category')
3202          * @param string|int|null $current Current value
3203          * @param string|int|null $max Maximum allowed, when an explicit limit has been
3204          *       exceeded, provide the values (optional)
3205          */
3206         public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3207                 # does no harm if $current and $max are present but are unnecessary for the message
3208                 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3209                 # only during preview, and that would split the parser cache unnecessarily.
3210                 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3211                         ->text();
3212                 $this->mOutput->addWarning( $warning );
3213                 $this->addTrackingCategory( "$limitationType-category" );
3214         }
3215
3216         /**
3217          * Return the text of a template, after recursively
3218          * replacing any variables or templates within the template.
3219          *
3220          * @param array $piece The parts of the template
3221          *   $piece['title']: the title, i.e. the part before the |
3222          *   $piece['parts']: the parameter array
3223          *   $piece['lineStart']: whether the brace was at the start of a line
3224          * @param PPFrame $frame The current frame, contains template arguments
3225          * @throws Exception
3226          * @return string|array The text of the template
3227          */
3228         public function braceSubstitution( $piece, $frame ) {
3229                 // Flags
3230
3231                 // $text has been filled
3232                 $found = false;
3233                 // wiki markup in $text should be escaped
3234                 $nowiki = false;
3235                 // $text is HTML, armour it against wikitext transformation
3236                 $isHTML = false;
3237                 // Force interwiki transclusion to be done in raw mode not rendered
3238                 $forceRawInterwiki = false;
3239                 // $text is a DOM node needing expansion in a child frame
3240                 $isChildObj = false;
3241                 // $text is a DOM node needing expansion in the current frame
3242                 $isLocalObj = false;
3243
3244                 # Title object, where $text came from
3245                 $title = false;
3246
3247                 # $part1 is the bit before the first |, and must contain only title characters.
3248                 # Various prefixes will be stripped from it later.
3249                 $titleWithSpaces = $frame->expand( $piece['title'] );
3250                 $part1 = trim( $titleWithSpaces );
3251                 $titleText = false;
3252
3253                 # Original title text preserved for various purposes
3254                 $originalTitle = $part1;
3255
3256                 # $args is a list of argument nodes, starting from index 0, not including $part1
3257                 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3258                 # below won't work b/c this $args isn't an object
3259                 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3260
3261                 $profileSection = null; // profile templates
3262
3263                 # SUBST
3264                 if ( !$found ) {
3265                         $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3266
3267                         # Possibilities for substMatch: "subst", "safesubst" or FALSE
3268                         # Decide whether to expand template or keep wikitext as-is.
3269                         if ( $this->ot['wiki'] ) {
3270                                 if ( $substMatch === false ) {
3271                                         $literal = true;  # literal when in PST with no prefix
3272                                 } else {
3273                                         $literal = false; # expand when in PST with subst: or safesubst:
3274                                 }
3275                         } else {
3276                                 if ( $substMatch == 'subst' ) {
3277                                         $literal = true;  # literal when not in PST with plain subst:
3278                                 } else {
3279                                         $literal = false; # expand when not in PST with safesubst: or no prefix
3280                                 }
3281                         }
3282                         if ( $literal ) {
3283                                 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3284                                 $isLocalObj = true;
3285                                 $found = true;
3286                         }
3287                 }
3288
3289                 # Variables
3290                 if ( !$found && $args->getLength() == 0 ) {
3291                         $id = $this->mVariables->matchStartToEnd( $part1 );
3292                         if ( $id !== false ) {
3293                                 $text = $this->getVariableValue( $id, $frame );
3294                                 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3295                                         $this->mOutput->updateCacheExpiry(
3296                                                 $this->magicWordFactory->getCacheTTL( $id ) );
3297                                 }
3298                                 $found = true;
3299                         }
3300                 }
3301
3302                 # MSG, MSGNW and RAW
3303                 if ( !$found ) {
3304                         # Check for MSGNW:
3305                         $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3306                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3307                                 $nowiki = true;
3308                         } else {
3309                                 # Remove obsolete MSG:
3310                                 $mwMsg = $this->magicWordFactory->get( 'msg' );
3311                                 $mwMsg->matchStartAndRemove( $part1 );
3312                         }
3313
3314                         # Check for RAW:
3315                         $mwRaw = $this->magicWordFactory->get( 'raw' );
3316                         if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3317                                 $forceRawInterwiki = true;
3318                         }
3319                 }
3320
3321                 # Parser functions
3322                 if ( !$found ) {
3323                         $colonPos = strpos( $part1, ':' );
3324                         if ( $colonPos !== false ) {
3325                                 $func = substr( $part1, 0, $colonPos );
3326                                 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3327                                 $argsLength = $args->getLength();
3328                                 for ( $i = 0; $i < $argsLength; $i++ ) {
3329                                         $funcArgs[] = $args->item( $i );
3330                                 }
3331
3332                                 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3333
3334                                 // Extract any forwarded flags
3335                                 if ( isset( $result['title'] ) ) {
3336                                         $title = $result['title'];
3337                                 }
3338                                 if ( isset( $result['found'] ) ) {
3339                                         $found = $result['found'];
3340                                 }
3341                                 if ( array_key_exists( 'text', $result ) ) {
3342                                         // a string or null
3343                                         $text = $result['text'];
3344                                 }
3345                                 if ( isset( $result['nowiki'] ) ) {
3346                                         $nowiki = $result['nowiki'];
3347                                 }
3348                                 if ( isset( $result['isHTML'] ) ) {
3349                                         $isHTML = $result['isHTML'];
3350                                 }
3351                                 if ( isset( $result['forceRawInterwiki'] ) ) {
3352                                         $forceRawInterwiki = $result['forceRawInterwiki'];
3353                                 }
3354                                 if ( isset( $result['isChildObj'] ) ) {
3355                                         $isChildObj = $result['isChildObj'];
3356                                 }
3357                                 if ( isset( $result['isLocalObj'] ) ) {
3358                                         $isLocalObj = $result['isLocalObj'];
3359                                 }
3360                         }
3361                 }
3362
3363                 # Finish mangling title and then check for loops.
3364                 # Set $title to a Title object and $titleText to the PDBK
3365                 if ( !$found ) {
3366                         $ns = NS_TEMPLATE;
3367                         # Split the title into page and subpage
3368                         $subpage = '';
3369                         $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3370                         if ( $part1 !== $relative ) {
3371                                 $part1 = $relative;
3372                                 $ns = $this->mTitle->getNamespace();
3373                         }
3374                         $title = Title::newFromText( $part1, $ns );
3375                         if ( $title ) {
3376                                 $titleText = $title->getPrefixedText();
3377                                 # Check for language variants if the template is not found
3378                                 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3379                                         $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3380                                 }
3381                                 # Do recursion depth check
3382                                 $limit = $this->mOptions->getMaxTemplateDepth();
3383                                 if ( $frame->depth >= $limit ) {
3384                                         $found = true;
3385                                         $text = '<span class="error">'
3386                                                 . wfMessage( 'parser-template-recursion-depth-warning' )
3387                                                         ->numParams( $limit )->inContentLanguage()->text()
3388                                                 . '</span>';
3389                                 }
3390                         }
3391                 }
3392
3393                 # Load from database
3394                 if ( !$found && $title ) {
3395                         $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3396                         if ( !$title->isExternal() ) {
3397                                 if ( $title->isSpecialPage()
3398                                         && $this->mOptions->getAllowSpecialInclusion()
3399                                         && $this->ot['html']
3400                                 ) {
3401                                         $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3402                                         // Pass the template arguments as URL parameters.
3403                                         // "uselang" will have no effect since the Language object
3404                                         // is forced to the one defined in ParserOptions.
3405                                         $pageArgs = [];
3406                                         $argsLength = $args->getLength();
3407                                         for ( $i = 0; $i < $argsLength; $i++ ) {
3408                                                 $bits = $args->item( $i )->splitArg();
3409                                                 if ( strval( $bits['index'] ) === '' ) {
3410                                                         $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3411                                                         $value = trim( $frame->expand( $bits['value'] ) );
3412                                                         $pageArgs[$name] = $value;
3413                                                 }
3414                                         }
3415
3416                                         // Create a new context to execute the special page
3417                                         $context = new RequestContext;
3418                                         $context->setTitle( $title );
3419                                         $context->setRequest( new FauxRequest( $pageArgs ) );
3420                                         if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3421                                                 $context->setUser( $this->getUser() );
3422                                         } else {
3423                                                 // If this page is cached, then we better not be per user.
3424                                                 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3425                                         }
3426                                         $context->setLanguage( $this->mOptions->getUserLangObj() );
3427                                         $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3428                                         if ( $ret ) {
3429                                                 $text = $context->getOutput()->getHTML();
3430                                                 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3431                                                 $found = true;
3432                                                 $isHTML = true;
3433                                                 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3434                                                         $this->mOutput->updateRuntimeAdaptiveExpiry(
3435                                                                 $specialPage->maxIncludeCacheTime()
3436                                                         );
3437                                                 }
3438                                         }
3439                                 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3440                                         $found = false; # access denied
3441                                         $this->logger->debug(
3442                                                 __METHOD__ .
3443                                                 ": template inclusion denied for " . $title->getPrefixedDBkey()
3444                                         );
3445                                 } else {
3446                                         list( $text, $title ) = $this->getTemplateDom( $title );
3447                                         if ( $text !== false ) {
3448                                                 $found = true;
3449                                                 $isChildObj = true;
3450                                         }
3451                                 }
3452
3453                                 # If the title is valid but undisplayable, make a link to it
3454                                 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3455                                         $text = "[[:$titleText]]";
3456                                         $found = true;
3457                                 }
3458                         } elseif ( $title->isTrans() ) {
3459                                 # Interwiki transclusion
3460                                 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3461                                         $text = $this->interwikiTransclude( $title, 'render' );
3462                                         $isHTML = true;
3463                                 } else {
3464                                         $text = $this->interwikiTransclude( $title, 'raw' );
3465                                         # Preprocess it like a template
3466                                         $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3467                                         $isChildObj = true;
3468                                 }
3469                                 $found = true;
3470                         }
3471
3472                         # Do infinite loop check
3473                         # This has to be done after redirect resolution to avoid infinite loops via redirects
3474                         if ( !$frame->loopCheck( $title ) ) {
3475                                 $found = true;
3476                                 $text = '<span class="error">'
3477                                         . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3478                                         . '</span>';
3479                                 $this->addTrackingCategory( 'template-loop-category' );
3480                                 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3481                                         wfEscapeWikiText( $titleText ) )->text() );
3482                                 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3483                         }
3484                 }
3485
3486                 # If we haven't found text to substitute by now, we're done
3487                 # Recover the source wikitext and return it
3488                 if ( !$found ) {
3489                         $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3490                         if ( $profileSection ) {
3491                                 $this->mProfiler->scopedProfileOut( $profileSection );
3492                         }
3493                         return [ 'object' => $text ];
3494                 }
3495
3496                 # Expand DOM-style return values in a child frame
3497                 if ( $isChildObj ) {
3498                         # Clean up argument array
3499                         $newFrame = $frame->newChild( $args, $title );
3500
3501                         if ( $nowiki ) {
3502                                 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3503                         } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3504                                 # Expansion is eligible for the empty-frame cache
3505                                 $text = $newFrame->cachedExpand( $titleText, $text );
3506                         } else {
3507                                 # Uncached expansion
3508                                 $text = $newFrame->expand( $text );
3509                         }
3510                 }
3511                 if ( $isLocalObj && $nowiki ) {
3512                         $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3513                         $isLocalObj = false;
3514                 }
3515
3516                 if ( $profileSection ) {
3517                         $this->mProfiler->scopedProfileOut( $profileSection );
3518                 }
3519
3520                 # Replace raw HTML by a placeholder
3521                 if ( $isHTML ) {
3522                         $text = $this->insertStripItem( $text );
3523                 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3524                         # Escape nowiki-style return values
3525                         $text = wfEscapeWikiText( $text );
3526                 } elseif ( is_string( $text )
3527                         && !$piece['lineStart']
3528                         && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3529                 ) {
3530                         # T2529: if the template begins with a table or block-level
3531                         # element, it should be treated as beginning a new line.
3532                         # This behavior is somewhat controversial.
3533                         $text = "\n" . $text;
3534                 }
3535
3536                 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3537                         # Error, oversize inclusion
3538                         if ( $titleText !== false ) {
3539                                 # Make a working, properly escaped link if possible (T25588)
3540                                 $text = "[[:$titleText]]";
3541                         } else {
3542                                 # This will probably not be a working link, but at least it may
3543                                 # provide some hint of where the problem is
3544                                 preg_replace( '/^:/', '', $originalTitle );
3545                                 $text = "[[:$originalTitle]]";
3546                         }
3547                         $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3548                                 . 'post-expand include size too large -->' );
3549                         $this->limitationWarn( 'post-expand-template-inclusion' );
3550                 }
3551
3552                 if ( $isLocalObj ) {
3553                         $ret = [ 'object' => $text ];
3554                 } else {
3555                         $ret = [ 'text' => $text ];
3556                 }
3557
3558                 return $ret;
3559         }
3560
3561         /**
3562          * Call a parser function and return an array with text and flags.
3563          *
3564          * The returned array will always contain a boolean 'found', indicating
3565          * whether the parser function was found or not. It may also contain the
3566          * following:
3567          *  text: string|object, resulting wikitext or PP DOM object
3568          *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3569          *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3570          *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3571          *  nowiki: bool, wiki markup in $text should be escaped
3572          *
3573          * @since 1.21
3574          * @param PPFrame $frame The current frame, contains template arguments
3575          * @param string $function Function name
3576          * @param array $args Arguments to the function
3577          * @throws MWException
3578          * @return array
3579          */
3580         public function callParserFunction( $frame, $function, array $args = [] ) {
3581                 # Case sensitive functions
3582                 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3583                         $function = $this->mFunctionSynonyms[1][$function];
3584                 } else {
3585                         # Case insensitive functions
3586                         $function = $this->contLang->lc( $function );
3587                         if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3588                                 $function = $this->mFunctionSynonyms[0][$function];
3589                         } else {
3590                                 return [ 'found' => false ];
3591                         }
3592                 }
3593
3594                 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3595
3596                 // Avoid PHP 7.1 warning from passing $this by reference
3597                 $parser = $this;
3598
3599                 $allArgs = [ &$parser ];
3600                 if ( $flags & self::SFH_OBJECT_ARGS ) {
3601                         # Convert arguments to PPNodes and collect for appending to $allArgs
3602                         $funcArgs = [];
3603                         foreach ( $args as $k => $v ) {
3604                                 if ( $v instanceof PPNode || $k === 0 ) {
3605                                         $funcArgs[] = $v;
3606                                 } else {
3607                                         $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3608                                 }
3609                         }
3610
3611                         # Add a frame parameter, and pass the arguments as an array
3612                         $allArgs[] = $frame;
3613                         $allArgs[] = $funcArgs;
3614                 } else {
3615                         # Convert arguments to plain text and append to $allArgs
3616                         foreach ( $args as $k => $v ) {
3617                                 if ( $v instanceof PPNode ) {
3618                                         $allArgs[] = trim( $frame->expand( $v ) );
3619                                 } elseif ( is_int( $k ) && $k >= 0 ) {
3620                                         $allArgs[] = trim( $v );
3621                                 } else {
3622                                         $allArgs[] = trim( "$k=$v" );
3623                                 }
3624                         }
3625                 }
3626
3627                 $result = $callback( ...$allArgs );
3628
3629                 # The interface for function hooks allows them to return a wikitext
3630                 # string or an array containing the string and any flags. This mungs
3631                 # things around to match what this method should return.
3632                 if ( !is_array( $result ) ) {
3633                         $result = [
3634                                 'found' => true,
3635                                 'text' => $result,
3636                         ];
3637                 } else {
3638                         if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3639                                 $result['text'] = $result[0];
3640                         }
3641                         unset( $result[0] );
3642                         $result += [
3643                                 'found' => true,
3644                         ];
3645                 }
3646
3647                 $noparse = true;
3648                 $preprocessFlags = 0;
3649                 if ( isset( $result['noparse'] ) ) {
3650                         $noparse = $result['noparse'];
3651                 }
3652                 if ( isset( $result['preprocessFlags'] ) ) {
3653                         $preprocessFlags = $result['preprocessFlags'];
3654                 }
3655
3656                 if ( !$noparse ) {
3657                         $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3658                         $result['isChildObj'] = true;
3659                 }
3660
3661                 return $result;
3662         }
3663
3664         /**
3665          * Get the semi-parsed DOM representation of a template with a given title,
3666          * and its redirect destination title. Cached.
3667          *
3668          * @param Title $title
3669          *
3670          * @return array
3671          */
3672         public function getTemplateDom( $title ) {
3673                 $cacheTitle = $title;
3674                 $titleText = $title->getPrefixedDBkey();
3675
3676                 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3677                         list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3678                         $title = Title::makeTitle( $ns, $dbk );
3679                         $titleText = $title->getPrefixedDBkey();
3680                 }
3681                 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3682                         return [ $this->mTplDomCache[$titleText], $title ];
3683                 }
3684
3685                 # Cache miss, go to the database
3686                 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3687
3688                 if ( $text === false ) {
3689                         $this->mTplDomCache[$titleText] = false;
3690                         return [ false, $title ];
3691                 }
3692
3693                 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3694                 $this->mTplDomCache[$titleText] = $dom;
3695
3696                 if ( !$title->equals( $cacheTitle ) ) {
3697                         $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3698                                 [ $title->getNamespace(), $title->getDBkey() ];
3699                 }
3700
3701                 return [ $dom, $title ];
3702         }
3703
3704         /**
3705          * Fetch the current revision of a given title. Note that the revision
3706          * (and even the title) may not exist in the database, so everything
3707          * contributing to the output of the parser should use this method
3708          * where possible, rather than getting the revisions themselves. This
3709          * method also caches its results, so using it benefits performance.
3710          *
3711          * @since 1.24
3712          * @param Title $title
3713          * @return Revision
3714          */
3715         public function fetchCurrentRevisionOfTitle( $title ) {
3716                 $cacheKey = $title->getPrefixedDBkey();
3717                 if ( !$this->currentRevisionCache ) {
3718                         $this->currentRevisionCache = new MapCacheLRU( 100 );
3719                 }
3720                 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3721                         $this->currentRevisionCache->set( $cacheKey,
3722                                 // Defaults to Parser::statelessFetchRevision()
3723                                 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3724                         );
3725                 }
3726                 return $this->currentRevisionCache->get( $cacheKey );
3727         }
3728
3729         /**
3730          * @param Title $title
3731          * @return bool
3732          * @since 1.34
3733          */
3734         public function isCurrentRevisionOfTitleCached( $title ) {
3735                 return (
3736                         $this->currentRevisionCache &&
3737                         $this->currentRevisionCache->has( $title->getPrefixedText() )
3738                 );
3739         }
3740
3741         /**
3742          * Wrapper around Revision::newFromTitle to allow passing additional parameters
3743          * without passing them on to it.
3744          *
3745          * @since 1.24
3746          * @param Title $title
3747          * @param Parser|bool $parser
3748          * @return Revision|bool False if missing
3749          */
3750         public static function statelessFetchRevision( Title $title, $parser = false ) {
3751                 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3752
3753                 return $rev;
3754         }
3755
3756         /**
3757          * Fetch the unparsed text of a template and register a reference to it.
3758          * @param Title $title
3759          * @return array ( string or false, Title )
3760          */
3761         public function fetchTemplateAndTitle( $title ) {
3762                 // Defaults to Parser::statelessFetchTemplate()
3763                 $templateCb = $this->mOptions->getTemplateCallback();
3764                 $stuff = call_user_func( $templateCb, $title, $this );
3765                 $rev = $stuff['revision'] ?? null;
3766                 $text = $stuff['text'];
3767                 if ( is_string( $stuff['text'] ) ) {
3768                         // We use U+007F DELETE to distinguish strip markers from regular text
3769                         $text = strtr( $text, "\x7f", "?" );
3770                 }
3771                 $finalTitle = $stuff['finalTitle'] ?? $title;
3772                 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3773                         $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3774                         if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3775                                 // Self-transclusion; final result may change based on the new page version
3776                                 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3777                                 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3778                         }
3779                 }
3780
3781                 return [ $text, $finalTitle ];
3782         }
3783
3784         /**
3785          * Fetch the unparsed text of a template and register a reference to it.
3786          * @param Title $title
3787          * @return string|bool
3788          */
3789         public function fetchTemplate( $title ) {
3790                 return $this->fetchTemplateAndTitle( $title )[0];
3791         }
3792
3793         /**
3794          * Static function to get a template
3795          * Can be overridden via ParserOptions::setTemplateCallback().
3796          *
3797          * @param Title $title
3798          * @param bool|Parser $parser
3799          *
3800          * @return array
3801          */
3802         public static function statelessFetchTemplate( $title, $parser = false ) {
3803                 $text = $skip = false;
3804                 $finalTitle = $title;
3805                 $deps = [];
3806                 $rev = null;
3807
3808                 # Loop to fetch the article, with up to 1 redirect
3809                 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3810                         # Give extensions a chance to select the revision instead
3811                         $id = false; # Assume current
3812                         Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3813                                 [ $parser, $title, &$skip, &$id ] );
3814
3815                         if ( $skip ) {
3816                                 $text = false;
3817                                 $deps[] = [
3818                                         'title' => $title,
3819                                         'page_id' => $title->getArticleID(),
3820                                         'rev_id' => null
3821                                 ];
3822                                 break;
3823                         }
3824                         # Get the revision
3825                         if ( $id ) {
3826                                 $rev = Revision::newFromId( $id );
3827                         } elseif ( $parser ) {
3828                                 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3829                         } else {
3830                                 $rev = Revision::newFromTitle( $title );
3831                         }
3832                         $rev_id = $rev ? $rev->getId() : 0;
3833                         # If there is no current revision, there is no page
3834                         if ( $id === false && !$rev ) {
3835                                 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3836                                 $linkCache->addBadLinkObj( $title );
3837                         }
3838
3839                         $deps[] = [
3840                                 'title' => $title,
3841                                 'page_id' => $title->getArticleID(),
3842                                 'rev_id' => $rev_id
3843                         ];
3844                         if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3845                                 # We fetched a rev from a different title; register it too...
3846                                 $deps[] = [
3847                                         'title' => $rev->getTitle(),
3848                                         'page_id' => $rev->getPage(),
3849                                         'rev_id' => $rev_id
3850                                 ];
3851                         }
3852
3853                         if ( $rev ) {
3854                                 $content = $rev->getContent();
3855                                 $text = $content ? $content->getWikitextForTransclusion() : null;
3856
3857                                 Hooks::run( 'ParserFetchTemplate',
3858                                         [ $parser, $title, $rev, &$text, &$deps ] );
3859
3860                                 if ( $text === false || $text === null ) {
3861                                         $text = false;
3862                                         break;
3863                                 }
3864                         } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3865                                 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3866                                         lcfirst( $title->getText() ) )->inContentLanguage();
3867                                 if ( !$message->exists() ) {
3868                                         $text = false;
3869                                         break;
3870                                 }
3871                                 $content = $message->content();
3872                                 $text = $message->plain();
3873                         } else {
3874                                 break;
3875                         }
3876                         if ( !$content ) {
3877                                 break;
3878                         }
3879                         # Redirect?
3880                         $finalTitle = $title;
3881                         $title = $content->getRedirectTarget();
3882                 }
3883                 return [
3884                         'revision' => $rev,
3885                         'text' => $text,
3886                         'finalTitle' => $finalTitle,
3887                         'deps' => $deps
3888                 ];
3889         }
3890
3891         /**
3892          * Fetch a file and its title and register a reference to it.
3893          * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3894          * @param Title $title
3895          * @param array $options Array of options to RepoGroup::findFile
3896          * @return array ( File or false, Title of file )
3897          */
3898         public function fetchFileAndTitle( $title, $options = [] ) {
3899                 $file = $this->fetchFileNoRegister( $title, $options );
3900
3901                 $time = $file ? $file->getTimestamp() : false;
3902                 $sha1 = $file ? $file->getSha1() : false;
3903                 # Register the file as a dependency...
3904                 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3905                 if ( $file && !$title->equals( $file->getTitle() ) ) {
3906                         # Update fetched file title
3907                         $title = $file->getTitle();
3908                         $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3909                 }
3910                 return [ $file, $title ];
3911         }
3912
3913         /**
3914          * Helper function for fetchFileAndTitle.
3915          *
3916          * Also useful if you need to fetch a file but not use it yet,
3917          * for example to get the file's handler.
3918          *
3919          * @param Title $title
3920          * @param array $options Array of options to RepoGroup::findFile
3921          * @return File|bool
3922          */
3923         protected function fetchFileNoRegister( $title, $options = [] ) {
3924                 if ( isset( $options['broken'] ) ) {
3925                         $file = false; // broken thumbnail forced by hook
3926                 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3927                         $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3928                 } else { // get by (name,timestamp)
3929                         $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3930                 }
3931                 return $file;
3932         }
3933
3934         /**
3935          * Transclude an interwiki link.
3936          *
3937          * @param Title $title
3938          * @param string $action Usually one of (raw, render)
3939          *
3940          * @return string
3941          */
3942         public function interwikiTransclude( $title, $action ) {
3943                 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3944                         return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3945                 }
3946
3947                 $url = $title->getFullURL( [ 'action' => $action ] );
3948                 if ( strlen( $url ) > 1024 ) {
3949                         return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3950                 }
3951
3952                 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3953
3954                 $fname = __METHOD__;
3955                 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3956
3957                 $data = $cache->getWithSetCallback(
3958                         $cache->makeGlobalKey(
3959                                 'interwiki-transclude',
3960                                 ( $wikiId !== false ) ? $wikiId : 'external',
3961                                 sha1( $url )
3962                         ),
3963                         $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3964                         function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3965                                 $req = MWHttpRequest::factory( $url, [], $fname );
3966
3967                                 $status = $req->execute(); // Status object
3968                                 if ( !$status->isOK() ) {
3969                                         $ttl = $cache::TTL_UNCACHEABLE;
3970                                 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3971                                         $ttl = min( $cache::TTL_LAGGED, $ttl );
3972                                 }
3973
3974                                 return [
3975                                         'text' => $status->isOK() ? $req->getContent() : null,
3976                                         'code' => $req->getStatus()
3977                                 ];
3978                         },
3979                         [
3980                                 'checkKeys' => ( $wikiId !== false )
3981                                         ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3982                                         : [],
3983                                 'pcGroup' => 'interwiki-transclude:5',
3984                                 'pcTTL' => $cache::TTL_PROC_LONG
3985                         ]
3986                 );
3987
3988                 if ( is_string( $data['text'] ) ) {
3989                         $text = $data['text'];
3990                 } elseif ( $data['code'] != 200 ) {
3991                         // Though we failed to fetch the content, this status is useless.
3992                         $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3993                                 ->params( $url, $data['code'] )->inContentLanguage()->text();
3994                 } else {
3995                         $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3996                 }
3997
3998                 return $text;
3999         }
4000
4001         /**
4002          * Triple brace replacement -- used for template arguments
4003          * @private
4004          *
4005          * @param array $piece
4006          * @param PPFrame $frame
4007          *
4008          * @return array
4009          */
4010         public function argSubstitution( $piece, $frame ) {
4011                 $error = false;
4012                 $parts = $piece['parts'];
4013                 $nameWithSpaces = $frame->expand( $piece['title'] );
4014                 $argName = trim( $nameWithSpaces );
4015                 $object = false;
4016                 $text = $frame->getArgument( $argName );
4017                 if ( $text === false && $parts->getLength() > 0
4018                         && ( $this->ot['html']
4019                                 || $this->ot['pre']
4020                                 || ( $this->ot['wiki'] && $frame->isTemplate() )
4021                         )
4022                 ) {
4023                         # No match in frame, use the supplied default
4024                         $object = $parts->item( 0 )->getChildren();
4025                 }
4026                 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4027                         $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4028                         $this->limitationWarn( 'post-expand-template-argument' );
4029                 }
4030
4031                 if ( $text === false && $object === false ) {
4032                         # No match anywhere
4033                         $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4034                 }
4035                 if ( $error !== false ) {
4036                         $text .= $error;
4037                 }
4038                 if ( $object !== false ) {
4039                         $ret = [ 'object' => $object ];
4040                 } else {
4041                         $ret = [ 'text' => $text ];
4042                 }
4043
4044                 return $ret;
4045         }
4046
4047         /**
4048          * Return the text to be used for a given extension tag.
4049          * This is the ghost of strip().
4050          *
4051          * @param array $params Associative array of parameters:
4052          *     name       PPNode for the tag name
4053          *     attr       PPNode for unparsed text where tag attributes are thought to be
4054          *     attributes Optional associative array of parsed attributes
4055          *     inner      Contents of extension element
4056          *     noClose    Original text did not have a close tag
4057          * @param PPFrame $frame
4058          *
4059          * @throws MWException
4060          * @return string
4061          */
4062         public function extensionSubstitution( $params, $frame ) {
4063                 static $errorStr = '<span class="error">';
4064                 static $errorLen = 20;
4065
4066                 $name = $frame->expand( $params['name'] );
4067                 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4068                         // Probably expansion depth or node count exceeded. Just punt the
4069                         // error up.
4070                         return $name;
4071                 }
4072
4073                 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4074                 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4075                         // See above
4076                         return $attrText;
4077                 }
4078
4079                 // We can't safely check if the expansion for $content resulted in an
4080                 // error, because the content could happen to be the error string
4081                 // (T149622).
4082                 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4083
4084                 $marker = self::MARKER_PREFIX . "-$name-"
4085                         . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4086
4087                 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4088                         ( $this->ot['html'] || $this->ot['pre'] );
4089                 if ( $isFunctionTag ) {
4090                         $markerType = 'none';
4091                 } else {
4092                         $markerType = 'general';
4093                 }
4094                 if ( $this->ot['html'] || $isFunctionTag ) {
4095                         $name = strtolower( $name );
4096                         $attributes = Sanitizer::decodeTagAttributes( $attrText );
4097                         if ( isset( $params['attributes'] ) ) {
4098                                 $attributes += $params['attributes'];
4099                         }
4100
4101                         if ( isset( $this->mTagHooks[$name] ) ) {
4102                                 $output = call_user_func_array( $this->mTagHooks[$name],
4103                                         [ $content, $attributes, $this, $frame ] );
4104                         } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4105                                 list( $callback, ) = $this->mFunctionTagHooks[$name];
4106
4107                                 // Avoid PHP 7.1 warning from passing $this by reference
4108                                 $parser = $this;
4109                                 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4110                         } else {
4111                                 $output = '<span class="error">Invalid tag extension name: ' .
4112                                         htmlspecialchars( $name ) . '</span>';
4113                         }
4114
4115                         if ( is_array( $output ) ) {
4116                                 // Extract flags
4117                                 $flags = $output;
4118                                 $output = $flags[0];
4119                                 if ( isset( $flags['markerType'] ) ) {
4120                                         $markerType = $flags['markerType'];
4121                                 }
4122                         }
4123                 } else {
4124                         if ( is_null( $attrText ) ) {
4125                                 $attrText = '';
4126                         }
4127                         if ( isset( $params['attributes'] ) ) {
4128                                 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4129                                         $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4130                                                 htmlspecialchars( $attrValue ) . '"';
4131                                 }
4132                         }
4133                         if ( $content === null ) {
4134                                 $output = "<$name$attrText/>";
4135                         } else {
4136                                 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4137                                 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4138                                         // See above
4139                                         return $close;
4140                                 }
4141                                 $output = "<$name$attrText>$content$close";
4142                         }
4143                 }
4144
4145                 if ( $markerType === 'none' ) {
4146                         return $output;
4147                 } elseif ( $markerType === 'nowiki' ) {
4148                         $this->mStripState->addNoWiki( $marker, $output );
4149                 } elseif ( $markerType === 'general' ) {
4150                         $this->mStripState->addGeneral( $marker, $output );
4151                 } else {
4152                         throw new MWException( __METHOD__ . ': invalid marker type' );
4153                 }
4154                 return $marker;
4155         }
4156
4157         /**
4158          * Increment an include size counter
4159          *
4160          * @param string $type The type of expansion
4161          * @param int $size The size of the text
4162          * @return bool False if this inclusion would take it over the maximum, true otherwise
4163          */
4164         public function incrementIncludeSize( $type, $size ) {
4165                 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4166                         return false;
4167                 } else {
4168                         $this->mIncludeSizes[$type] += $size;
4169                         return true;
4170                 }
4171         }
4172
4173         /**
4174          * Increment the expensive function count
4175          *
4176          * @return bool False if the limit has been exceeded
4177          */
4178         public function incrementExpensiveFunctionCount() {
4179                 $this->mExpensiveFunctionCount++;
4180                 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4181         }
4182
4183         /**
4184          * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4185          * Fills $this->mDoubleUnderscores, returns the modified text
4186          *
4187          * @param string $text
4188          *
4189          * @return string
4190          */
4191         public function doDoubleUnderscore( $text ) {
4192                 # The position of __TOC__ needs to be recorded
4193                 $mw = $this->magicWordFactory->get( 'toc' );
4194                 if ( $mw->match( $text ) ) {
4195                         $this->mShowToc = true;
4196                         $this->mForceTocPosition = true;
4197
4198                         # Set a placeholder. At the end we'll fill it in with the TOC.
4199                         $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4200
4201                         # Only keep the first one.
4202                         $text = $mw->replace( '', $text );
4203                 }
4204
4205                 # Now match and remove the rest of them
4206                 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4207                 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4208
4209                 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4210                         $this->mOutput->mNoGallery = true;
4211                 }
4212                 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4213                         $this->mShowToc = false;
4214                 }
4215                 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4216                         && $this->mTitle->getNamespace() == NS_CATEGORY
4217                 ) {
4218                         $this->addTrackingCategory( 'hidden-category-category' );
4219                 }
4220                 # (T10068) Allow control over whether robots index a page.
4221                 # __INDEX__ always overrides __NOINDEX__, see T16899
4222                 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4223                         $this->mOutput->setIndexPolicy( 'noindex' );
4224                         $this->addTrackingCategory( 'noindex-category' );
4225                 }
4226                 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4227                         $this->mOutput->setIndexPolicy( 'index' );
4228                         $this->addTrackingCategory( 'index-category' );
4229                 }
4230
4231                 # Cache all double underscores in the database
4232                 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4233                         $this->mOutput->setProperty( $key, '' );
4234                 }
4235
4236                 return $text;
4237         }
4238
4239         /**
4240          * @see ParserOutput::addTrackingCategory()
4241          * @param string $msg Message key
4242          * @return bool Whether the addition was successful
4243          */
4244         public function addTrackingCategory( $msg ) {
4245                 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4246         }
4247
4248         /**
4249          * This function accomplishes several tasks:
4250          * 1) Auto-number headings if that option is enabled
4251          * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4252          * 3) Add a Table of contents on the top for users who have enabled the option
4253          * 4) Auto-anchor headings
4254          *
4255          * It loops through all headlines, collects the necessary data, then splits up the
4256          * string and re-inserts the newly formatted headlines.
4257          *
4258          * @param string $text
4259          * @param string $origText Original, untouched wikitext
4260          * @param bool $isMain
4261          * @return mixed|string
4262          * @private
4263          */
4264         public function formatHeadings( $text, $origText, $isMain = true ) {
4265                 # Inhibit editsection links if requested in the page
4266                 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4267                         $maybeShowEditLink = false;
4268                 } else {
4269                         $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4270                 }
4271
4272                 # Get all headlines for numbering them and adding funky stuff like [edit]
4273                 # links - this is for later, but we need the number of headlines right now
4274                 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4275                 # be trimmed here since whitespace in HTML headings is significant.
4276                 $matches = [];
4277                 $numMatches = preg_match_all(
4278                         '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4279                         $text,
4280                         $matches
4281                 );
4282
4283                 # if there are fewer than 4 headlines in the article, do not show TOC
4284                 # unless it's been explicitly enabled.
4285                 $enoughToc = $this->mShowToc &&
4286                         ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4287
4288                 # Allow user to stipulate that a page should have a "new section"
4289                 # link added via __NEWSECTIONLINK__
4290                 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4291                         $this->mOutput->setNewSection( true );
4292                 }
4293
4294                 # Allow user to remove the "new section"
4295                 # link via __NONEWSECTIONLINK__
4296                 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4297                         $this->mOutput->hideNewSection( true );
4298                 }
4299
4300                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4301                 # override above conditions and always show TOC above first header
4302                 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4303                         $this->mShowToc = true;
4304                         $enoughToc = true;
4305                 }
4306
4307                 # headline counter
4308                 $headlineCount = 0;
4309                 $numVisible = 0;
4310
4311                 # Ugh .. the TOC should have neat indentation levels which can be
4312                 # passed to the skin functions. These are determined here
4313                 $toc = '';
4314                 $full = '';
4315                 $head = [];
4316                 $sublevelCount = [];
4317                 $levelCount = [];
4318                 $level = 0;
4319                 $prevlevel = 0;
4320                 $toclevel = 0;
4321                 $prevtoclevel = 0;
4322                 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4323                 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4324                 $oldType = $this->mOutputType;
4325                 $this->setOutputType( self::OT_WIKI );
4326                 $frame = $this->getPreprocessor()->newFrame();
4327                 $root = $this->preprocessToDom( $origText );
4328                 $node = $root->getFirstChild();
4329                 $byteOffset = 0;
4330                 $tocraw = [];
4331                 $refers = [];
4332
4333                 $headlines = $numMatches !== false ? $matches[3] : [];
4334
4335                 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4336                 foreach ( $headlines as $headline ) {
4337                         $isTemplate = false;
4338                         $titleText = false;
4339                         $sectionIndex = false;
4340                         $numbering = '';
4341                         $markerMatches = [];
4342                         if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4343                                 $serial = $markerMatches[1];
4344                                 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4345                                 $isTemplate = ( $titleText != $baseTitleText );
4346                                 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4347                         }
4348
4349                         if ( $toclevel ) {
4350                                 $prevlevel = $level;
4351                         }
4352                         $level = $matches[1][$headlineCount];
4353
4354                         if ( $level > $prevlevel ) {
4355                                 # Increase TOC level
4356                                 $toclevel++;
4357                                 $sublevelCount[$toclevel] = 0;
4358                                 if ( $toclevel < $maxTocLevel ) {
4359                                         $prevtoclevel = $toclevel;
4360                                         $toc .= Linker::tocIndent();
4361                                         $numVisible++;
4362                                 }
4363                         } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4364                                 # Decrease TOC level, find level to jump to
4365
4366                                 for ( $i = $toclevel; $i > 0; $i-- ) {
4367                                         if ( $levelCount[$i] == $level ) {
4368                                                 # Found last matching level
4369                                                 $toclevel = $i;
4370                                                 break;
4371                                         } elseif ( $levelCount[$i] < $level ) {
4372                                                 # Found first matching level below current level
4373                                                 $toclevel = $i + 1;
4374                                                 break;
4375                                         }
4376                                 }
4377                                 if ( $i == 0 ) {
4378                                         $toclevel = 1;
4379                                 }
4380                                 if ( $toclevel < $maxTocLevel ) {
4381                                         if ( $prevtoclevel < $maxTocLevel ) {
4382                                                 # Unindent only if the previous toc level was shown :p
4383                                                 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4384                                                 $prevtoclevel = $toclevel;
4385                                         } else {
4386                                                 $toc .= Linker::tocLineEnd();
4387                                         }
4388                                 }
4389                         } else {
4390                                 # No change in level, end TOC line
4391                                 if ( $toclevel < $maxTocLevel ) {
4392                                         $toc .= Linker::tocLineEnd();
4393                                 }
4394                         }
4395
4396                         $levelCount[$toclevel] = $level;
4397
4398                         # count number of headlines for each level
4399                         $sublevelCount[$toclevel]++;
4400                         $dot = 0;
4401                         for ( $i = 1; $i <= $toclevel; $i++ ) {
4402                                 if ( !empty( $sublevelCount[$i] ) ) {
4403                                         if ( $dot ) {
4404                                                 $numbering .= '.';
4405                                         }
4406                                         $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4407                                         $dot = 1;
4408                                 }
4409                         }
4410
4411                         # The safe header is a version of the header text safe to use for links
4412
4413                         # Remove link placeholders by the link text.
4414                         #     <!--LINK number-->
4415                         # turns into
4416                         #     link text with suffix
4417                         # Do this before unstrip since link text can contain strip markers
4418                         $safeHeadline = $this->replaceLinkHoldersText( $headline );
4419
4420                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4421                         $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4422
4423                         # Remove any <style> or <script> tags (T198618)
4424                         $safeHeadline = preg_replace(
4425                                 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4426                                 '',
4427                                 $safeHeadline
4428                         );
4429
4430                         # Strip out HTML (first regex removes any tag not allowed)
4431                         # Allowed tags are:
4432                         # * <sup> and <sub> (T10393)
4433                         # * <i> (T28375)
4434                         # * <b> (r105284)
4435                         # * <bdi> (T74884)
4436                         # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4437                         # * <s> and <strike> (T35715)
4438                         # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4439                         # to allow setting directionality in toc items.
4440                         $tocline = preg_replace(
4441                                 [
4442                                         '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4443                                         '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4444                                 ],
4445                                 [ '', '<$1>' ],
4446                                 $safeHeadline
4447                         );
4448
4449                         # Strip '<span></span>', which is the result from the above if
4450                         # <span id="foo"></span> is used to produce an additional anchor
4451                         # for a section.
4452                         $tocline = str_replace( '<span></span>', '', $tocline );
4453
4454                         $tocline = trim( $tocline );
4455
4456                         # For the anchor, strip out HTML-y stuff period
4457                         $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4458                         $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4459
4460                         # Save headline for section edit hint before it's escaped
4461                         $headlineHint = $safeHeadline;
4462
4463                         # Decode HTML entities
4464                         $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4465
4466                         $safeHeadline = self::normalizeSectionName( $safeHeadline );
4467
4468                         $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4469                         $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4470                         $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4471                         if ( $fallbackHeadline === $safeHeadline ) {
4472                                 # No reason to have both (in fact, we can't)
4473                                 $fallbackHeadline = false;
4474                         }
4475
4476                         # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4477                         # @todo FIXME: We may be changing them depending on the current locale.
4478                         $arrayKey = strtolower( $safeHeadline );
4479                         if ( $fallbackHeadline === false ) {
4480                                 $fallbackArrayKey = false;
4481                         } else {
4482                                 $fallbackArrayKey = strtolower( $fallbackHeadline );
4483                         }
4484
4485                         # Create the anchor for linking from the TOC to the section
4486                         $anchor = $safeHeadline;
4487                         $fallbackAnchor = $fallbackHeadline;
4488                         if ( isset( $refers[$arrayKey] ) ) {
4489                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4490                                 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4491                                 $anchor .= "_$i";
4492                                 $linkAnchor .= "_$i";
4493                                 $refers["${arrayKey}_$i"] = true;
4494                         } else {
4495                                 $refers[$arrayKey] = true;
4496                         }
4497                         if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4498                                 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4499                                 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4500                                 $fallbackAnchor .= "_$i";
4501                                 $refers["${fallbackArrayKey}_$i"] = true;
4502                         } else {
4503                                 $refers[$fallbackArrayKey] = true;
4504                         }
4505
4506                         # Don't number the heading if it is the only one (looks silly)
4507                         if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4508                                 # the two are different if the line contains a link
4509                                 $headline = Html::element(
4510                                         'span',
4511                                         [ 'class' => 'mw-headline-number' ],
4512                                         $numbering
4513                                 ) . ' ' . $headline;
4514                         }
4515
4516                         if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4517                                 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4518                                         $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4519                         }
4520
4521                         # Add the section to the section tree
4522                         # Find the DOM node for this header
4523                         $noOffset = ( $isTemplate || $sectionIndex === false );
4524                         while ( $node && !$noOffset ) {
4525                                 if ( $node->getName() === 'h' ) {
4526                                         $bits = $node->splitHeading();
4527                                         if ( $bits['i'] == $sectionIndex ) {
4528                                                 break;
4529                                         }
4530                                 }
4531                                 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4532                                         $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4533                                 $node = $node->getNextSibling();
4534                         }
4535                         $tocraw[] = [
4536                                 'toclevel' => $toclevel,
4537                                 'level' => $level,
4538                                 'line' => $tocline,
4539                                 'number' => $numbering,
4540                                 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4541                                 'fromtitle' => $titleText,
4542                                 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4543                                 'anchor' => $anchor,
4544                         ];
4545
4546                         # give headline the correct <h#> tag
4547                         if ( $maybeShowEditLink && $sectionIndex !== false ) {
4548                                 // Output edit section links as markers with styles that can be customized by skins
4549                                 if ( $isTemplate ) {
4550                                         # Put a T flag in the section identifier, to indicate to extractSections()
4551                                         # that sections inside <includeonly> should be counted.
4552                                         $editsectionPage = $titleText;
4553                                         $editsectionSection = "T-$sectionIndex";
4554                                         $editsectionContent = null;
4555                                 } else {
4556                                         $editsectionPage = $this->mTitle->getPrefixedText();
4557                                         $editsectionSection = $sectionIndex;
4558                                         $editsectionContent = $headlineHint;
4559                                 }
4560                                 // We use a bit of pesudo-xml for editsection markers. The
4561                                 // language converter is run later on. Using a UNIQ style marker
4562                                 // leads to the converter screwing up the tokens when it
4563                                 // converts stuff. And trying to insert strip tags fails too. At
4564                                 // this point all real inputted tags have already been escaped,
4565                                 // so we don't have to worry about a user trying to input one of
4566                                 // these markers directly. We use a page and section attribute
4567                                 // to stop the language converter from converting these
4568                                 // important bits of data, but put the headline hint inside a
4569                                 // content block because the language converter is supposed to
4570                                 // be able to convert that piece of data.
4571                                 // Gets replaced with html in ParserOutput::getText
4572                                 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4573                                 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4574                                 if ( $editsectionContent !== null ) {
4575                                         $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4576                                 } else {
4577                                         $editlink .= '/>';
4578                                 }
4579                         } else {
4580                                 $editlink = '';
4581                         }
4582                         $head[$headlineCount] = Linker::makeHeadline( $level,
4583                                 $matches['attrib'][$headlineCount], $anchor, $headline,
4584                                 $editlink, $fallbackAnchor );
4585
4586                         $headlineCount++;
4587                 }
4588
4589                 $this->setOutputType( $oldType );
4590
4591                 # Never ever show TOC if no headers
4592                 if ( $numVisible < 1 ) {
4593                         $enoughToc = false;
4594                 }
4595
4596                 if ( $enoughToc ) {
4597                         if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4598                                 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4599                         }
4600                         $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4601                         $this->mOutput->setTOCHTML( $toc );
4602                         $toc = self::TOC_START . $toc . self::TOC_END;
4603                 }
4604
4605                 if ( $isMain ) {
4606                         $this->mOutput->setSections( $tocraw );
4607                 }
4608
4609                 # split up and insert constructed headlines
4610                 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4611                 $i = 0;
4612
4613                 // build an array of document sections
4614                 $sections = [];
4615                 foreach ( $blocks as $block ) {
4616                         // $head is zero-based, sections aren't.
4617                         if ( empty( $head[$i - 1] ) ) {
4618                                 $sections[$i] = $block;
4619                         } else {
4620                                 $sections[$i] = $head[$i - 1] . $block;
4621                         }
4622
4623                         /**
4624                          * Send a hook, one per section.
4625                          * The idea here is to be able to make section-level DIVs, but to do so in a
4626                          * lower-impact, more correct way than r50769
4627                          *
4628                          * $this : caller
4629                          * $section : the section number
4630                          * &$sectionContent : ref to the content of the section
4631                          * $maybeShowEditLinks : boolean describing whether this section has an edit link
4632                          */
4633                         Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4634
4635                         $i++;
4636                 }
4637
4638                 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4639                         // append the TOC at the beginning
4640                         // Top anchor now in skin
4641                         $sections[0] .= $toc . "\n";
4642                 }
4643
4644                 $full .= implode( '', $sections );
4645
4646                 if ( $this->mForceTocPosition ) {
4647                         return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4648                 } else {
4649                         return $full;
4650                 }
4651         }
4652
4653         /**
4654          * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4655          * conversion, substituting signatures, {{subst:}} templates, etc.
4656          *
4657          * @param string $text The text to transform
4658          * @param Title $title The Title object for the current article
4659          * @param User $user The User object describing the current user
4660          * @param ParserOptions $options Parsing options
4661          * @param bool $clearState Whether to clear the parser state first
4662          * @return string The altered wiki markup
4663          */
4664         public function preSaveTransform( $text, Title $title, User $user,
4665                 ParserOptions $options, $clearState = true
4666         ) {
4667                 if ( $clearState ) {
4668                         $magicScopeVariable = $this->lock();
4669                 }
4670                 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4671                 $this->setUser( $user );
4672
4673                 // Strip U+0000 NULL (T159174)
4674                 $text = str_replace( "\000", '', $text );
4675
4676                 // We still normalize line endings for backwards-compatibility
4677                 // with other code that just calls PST, but this should already
4678                 // be handled in TextContent subclasses
4679                 $text = TextContent::normalizeLineEndings( $text );
4680
4681                 if ( $options->getPreSaveTransform() ) {
4682                         $text = $this->pstPass2( $text, $user );
4683                 }
4684                 $text = $this->mStripState->unstripBoth( $text );
4685
4686                 $this->setUser( null ); # Reset
4687
4688                 return $text;
4689         }
4690
4691         /**
4692          * Pre-save transform helper function
4693          *
4694          * @param string $text
4695          * @param User $user
4696          *
4697          * @return string
4698          */
4699         private function pstPass2( $text, $user ) {
4700                 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4701                 # $this->contLang here in order to give everyone the same signature and use the default one
4702                 # rather than the one selected in each user's preferences.  (see also T14815)
4703                 $ts = $this->mOptions->getTimestamp();
4704                 $timestamp = MWTimestamp::getLocalInstance( $ts );
4705                 $ts = $timestamp->format( 'YmdHis' );
4706                 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4707
4708                 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4709
4710                 # Variable replacement
4711                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4712                 $text = $this->replaceVariables( $text );
4713
4714                 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4715                 # which may corrupt this parser instance via its wfMessage()->text() call-
4716
4717                 # Signatures
4718                 if ( strpos( $text, '~~~' ) !== false ) {
4719                         $sigText = $this->getUserSig( $user );
4720                         $text = strtr( $text, [
4721                                 '~~~~~' => $d,
4722                                 '~~~~' => "$sigText $d",
4723                                 '~~~' => $sigText
4724                         ] );
4725                         # The main two signature forms used above are time-sensitive
4726                         $this->setOutputFlag( 'user-signature', 'User signature detected' );
4727                 }
4728
4729                 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4730                 $tc = '[' . Title::legalChars() . ']';
4731                 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4732
4733                 // [[ns:page (context)|]]
4734                 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4735                 // [[ns:page（context）|]] (double-width brackets, added in r40257)
4736                 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?（$tc+）)\\|]]/";
4737                 // [[ns:page (context), context|]] (using either single or double-width comma)
4738                 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |，)$tc+|)\\|]]/";
4739                 // [[|page]] (reverse pipe trick: add context from page title)
4740                 $p2 = "/\[\[\\|($tc+)]]/";
4741
4742                 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4743                 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4744                 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4745                 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4746
4747                 $t = $this->mTitle->getText();
4748                 $m = [];
4749                 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4750                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4751                 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4752                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4753                 } else {
4754                         # if there's no context, don't bother duplicating the title
4755                         $text = preg_replace( $p2, '[[\\1]]', $text );
4756                 }
4757
4758                 return $text;
4759         }
4760
4761         /**
4762          * Fetch the user's signature text, if any, and normalize to
4763          * validated, ready-to-insert wikitext.
4764          * If you have pre-fetched the nickname or the fancySig option, you can
4765          * specify them here to save a database query.
4766          * Do not reuse this parser instance after calling getUserSig(),
4767          * as it may have changed.
4768          *
4769          * @param User &$user
4770          * @param string|bool $nickname Nickname to use or false to use user's default nickname
4771          * @param bool|null $fancySig whether the nicknname is the complete signature
4772          *    or null to use default value
4773          * @return string
4774          */
4775         public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4776                 $username = $user->getName();
4777
4778                 # If not given, retrieve from the user object.
4779                 if ( $nickname === false ) {
4780                         $nickname = $user->getOption( 'nickname' );
4781                 }
4782
4783                 if ( is_null( $fancySig ) ) {
4784                         $fancySig = $user->getBoolOption( 'fancysig' );
4785                 }
4786
4787                 $nickname = $nickname == null ? $username : $nickname;
4788
4789                 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4790                         $nickname = $username;
4791                         $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4792                 } elseif ( $fancySig !== false ) {
4793                         # Sig. might contain markup; validate this
4794                         if ( $this->validateSig( $nickname ) !== false ) {
4795                                 # Validated; clean up (if needed) and return it
4796                                 return $this->cleanSig( $nickname, true );
4797                         } else {
4798                                 # Failed to validate; fall back to the default
4799                                 $nickname = $username;
4800                                 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4801                         }
4802                 }
4803
4804                 # Make sure nickname doesnt get a sig in a sig
4805                 $nickname = self::cleanSigInSig( $nickname );
4806
4807                 # If we're still here, make it a link to the user page
4808                 $userText = wfEscapeWikiText( $username );
4809                 $nickText = wfEscapeWikiText( $nickname );
4810                 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4811
4812                 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4813                         ->title( $this->getTitle() )->text();
4814         }
4815
4816         /**
4817          * Check that the user's signature contains no bad XML
4818          *
4819          * @param string $text
4820          * @return string|bool An expanded string, or false if invalid.
4821          */
4822         public function validateSig( $text ) {
4823                 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4824         }
4825
4826         /**
4827          * Clean up signature text
4828          *
4829          * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4830          * 2) Substitute all transclusions
4831          *
4832          * @param string $text
4833          * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4834          * @return string Signature text
4835          */
4836         public function cleanSig( $text, $parsing = false ) {
4837                 if ( !$parsing ) {
4838                         global $wgTitle;
4839                         $magicScopeVariable = $this->lock();
4840                         $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4841                 }
4842
4843                 # Option to disable this feature
4844                 if ( !$this->mOptions->getCleanSignatures() ) {
4845                         return $text;
4846                 }
4847
4848                 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4849                 #  => Move this logic to braceSubstitution()
4850                 $substWord = $this->magicWordFactory->get( 'subst' );
4851                 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4852                 $substText = '{{' . $substWord->getSynonym( 0 );
4853
4854                 $text = preg_replace( $substRegex, $substText, $text );
4855                 $text = self::cleanSigInSig( $text );
4856                 $dom = $this->preprocessToDom( $text );
4857                 $frame = $this->getPreprocessor()->newFrame();
4858                 $text = $frame->expand( $dom );
4859
4860                 if ( !$parsing ) {
4861                         $text = $this->mStripState->unstripBoth( $text );
4862                 }
4863
4864                 return $text;
4865         }
4866
4867         /**
4868          * Strip 3, 4 or 5 tildes out of signatures.
4869          *
4870          * @param string $text
4871          * @return string Signature text with /~{3,5}/ removed
4872          */
4873         public static function cleanSigInSig( $text ) {
4874                 $text = preg_replace( '/~{3,5}/', '', $text );
4875                 return $text;
4876         }
4877
4878         /**
4879          * Set up some variables which are usually set up in parse()
4880          * so that an external function can call some class members with confidence
4881          *
4882          * @param Title|null $title
4883          * @param ParserOptions $options
4884          * @param int $outputType
4885          * @param bool $clearState
4886          * @param int|null $revId
4887          */
4888         public function startExternalParse( Title $title = null, ParserOptions $options,
4889                 $outputType, $clearState = true, $revId = null
4890         ) {
4891                 $this->startParse( $title, $options, $outputType, $clearState );
4892                 if ( $revId !== null ) {
4893                         $this->mRevisionId = $revId;
4894                 }
4895         }
4896
4897         /**
4898          * @param Title|null $title
4899          * @param ParserOptions $options
4900          * @param int $outputType
4901          * @param bool $clearState
4902          */
4903         private function startParse( Title $title = null, ParserOptions $options,
4904                 $outputType, $clearState = true
4905         ) {
4906                 $this->setTitle( $title );
4907                 $this->mOptions = $options;
4908                 $this->setOutputType( $outputType );
4909                 if ( $clearState ) {
4910                         $this->clearState();
4911                 }
4912         }
4913
4914         /**
4915          * Wrapper for preprocess()
4916          *
4917          * @param string $text The text to preprocess
4918          * @param ParserOptions $options
4919          * @param Title|null $title Title object or null to use $wgTitle
4920          * @return string
4921          */
4922         public function transformMsg( $text, $options, $title = null ) {
4923                 static $executing = false;
4924
4925                 # Guard against infinite recursion
4926                 if ( $executing ) {
4927                         return $text;
4928                 }
4929                 $executing = true;
4930
4931                 if ( !$title ) {
4932                         global $wgTitle;
4933                         $title = $wgTitle;
4934                 }
4935
4936                 $text = $this->preprocess( $text, $title, $options );
4937
4938                 $executing = false;
4939                 return $text;
4940         }
4941
4942         /**
4943          * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4944          * The callback should have the following form:
4945          *    function myParserHook( $text, $params, $parser, $frame ) { ... }
4946          *
4947          * Transform and return $text. Use $parser for any required context, e.g. use
4948          * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4949          *
4950          * Hooks may return extended information by returning an array, of which the
4951          * first numbered element (index 0) must be the return string, and all other
4952          * entries are extracted into local variables within an internal function
4953          * in the Parser class.
4954          *
4955          * This interface (introduced r61913) appears to be undocumented, but
4956          * 'markerType' is used by some core tag hooks to override which strip
4957          * array their results are placed in. **Use great caution if attempting
4958          * this interface, as it is not documented and injudicious use could smash
4959          * private variables.**
4960          *
4961          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4962          * @param callable $callback The callback function (and object) to use for the tag
4963          * @throws MWException
4964          * @return callable|null The old value of the mTagHooks array associated with the hook
4965          */
4966         public function setHook( $tag, callable $callback ) {
4967                 $tag = strtolower( $tag );
4968                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4969                         throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4970                 }
4971                 $oldVal = $this->mTagHooks[$tag] ?? null;
4972                 $this->mTagHooks[$tag] = $callback;
4973                 if ( !in_array( $tag, $this->mStripList ) ) {
4974                         $this->mStripList[] = $tag;
4975                 }
4976
4977                 return $oldVal;
4978         }
4979
4980         /**
4981          * As setHook(), but letting the contents be parsed.
4982          *
4983          * Transparent tag hooks are like regular XML-style tag hooks, except they
4984          * operate late in the transformation sequence, on HTML instead of wikitext.
4985          *
4986          * This is probably obsoleted by things dealing with parser frames?
4987          * The only extension currently using it is geoserver.
4988          *
4989          * @since 1.10
4990          * @todo better document or deprecate this
4991          *
4992          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4993          * @param callable $callback The callback function (and object) to use for the tag
4994          * @throws MWException
4995          * @return callable|null The old value of the mTagHooks array associated with the hook
4996          */
4997         public function setTransparentTagHook( $tag, callable $callback ) {
4998                 $tag = strtolower( $tag );
4999                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5000                         throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5001                 }
5002                 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5003                 $this->mTransparentTagHooks[$tag] = $callback;
5004
5005                 return $oldVal;
5006         }
5007
5008         /**
5009          * Remove all tag hooks
5010          */
5011         public function clearTagHooks() {
5012                 $this->mTagHooks = [];
5013                 $this->mFunctionTagHooks = [];
5014                 $this->mStripList = $this->mDefaultStripList;
5015         }
5016
5017         /**
5018          * Create a function, e.g. {{sum:1|2|3}}
5019          * The callback function should have the form:
5020          *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5021          *
5022          * Or with Parser::SFH_OBJECT_ARGS:
5023          *    function myParserFunction( $parser, $frame, $args ) { ... }
5024          *
5025          * The callback may either return the text result of the function, or an array with the text
5026          * in element 0, and a number of flags in the other elements. The names of the flags are
5027          * specified in the keys. Valid flags are:
5028          *   found                     The text returned is valid, stop processing the template. This
5029          *                             is on by default.
5030          *   nowiki                    Wiki markup in the return value should be escaped
5031          *   isHTML                    The returned text is HTML, armour it against wikitext transformation
5032          *
5033          * @param string $id The magic word ID
5034          * @param callable $callback The callback function (and object) to use
5035          * @param int $flags A combination of the following flags:
5036          *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5037          *
5038          *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
5039          *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5040          *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
5041          *     the arguments, and to control the way they are expanded.
5042          *
5043          *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5044          *     arguments, for instance:
5045          *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5046          *
5047          *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5048          *     future versions. Please call $frame->expand() on it anyway so that your code keeps
5049          *     working if/when this is changed.
5050          *
5051          *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5052          *     expansion.
5053          *
5054          *     Please read the documentation in includes/parser/Preprocessor.php for more information
5055          *     about the methods available in PPFrame and PPNode.
5056          *
5057          * @throws MWException
5058          * @return string|callable The old callback function for this name, if any
5059          */
5060         public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5061                 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5062                 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5063
5064                 # Add to function cache
5065                 $mw = $this->magicWordFactory->get( $id );
5066                 if ( !$mw ) {
5067                         throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5068                 }
5069
5070                 $synonyms = $mw->getSynonyms();
5071                 $sensitive = intval( $mw->isCaseSensitive() );
5072
5073                 foreach ( $synonyms as $syn ) {
5074                         # Case
5075                         if ( !$sensitive ) {
5076                                 $syn = $this->contLang->lc( $syn );
5077                         }
5078                         # Add leading hash
5079                         if ( !( $flags & self::SFH_NO_HASH ) ) {
5080                                 $syn = '#' . $syn;
5081                         }
5082                         # Remove trailing colon
5083                         if ( substr( $syn, -1, 1 ) === ':' ) {
5084                                 $syn = substr( $syn, 0, -1 );
5085                         }
5086                         $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5087                 }
5088                 return $oldVal;
5089         }
5090
5091         /**
5092          * Get all registered function hook identifiers
5093          *
5094          * @return array
5095          */
5096         public function getFunctionHooks() {
5097                 $this->firstCallInit();
5098                 return array_keys( $this->mFunctionHooks );
5099         }
5100
5101         /**
5102          * Create a tag function, e.g. "<test>some stuff</test>".
5103          * Unlike tag hooks, tag functions are parsed at preprocessor level.
5104          * Unlike parser functions, their content is not preprocessed.
5105          * @param string $tag
5106          * @param callable $callback
5107          * @param int $flags
5108          * @throws MWException
5109          * @return null
5110          */
5111         public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5112                 $tag = strtolower( $tag );
5113                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5114                         throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5115                 }
5116                 $old = $this->mFunctionTagHooks[$tag] ?? null;
5117                 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5118
5119                 if ( !in_array( $tag, $this->mStripList ) ) {
5120                         $this->mStripList[] = $tag;
5121                 }
5122
5123                 return $old;
5124         }
5125
5126         /**
5127          * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5128          * Placeholders created in Linker::link()
5129          *
5130          * @param string &$text
5131          * @param int $options
5132          */
5133         public function replaceLinkHolders( &$text, $options = 0 ) {
5134                 $this->mLinkHolders->replace( $text );
5135         }
5136
5137         /**
5138          * Replace "<!--LINK-->" link placeholders with plain text of links
5139          * (not HTML-formatted).
5140          *
5141          * @param string $text
5142          * @return string
5143          */
5144         public function replaceLinkHoldersText( $text ) {
5145                 return $this->mLinkHolders->replaceText( $text );
5146         }
5147
5148         /**
5149          * Renders an image gallery from a text with one line per image.
5150          * text labels may be given by using |-style alternative text. E.g.
5151          *   Image:one.jpg|The number "1"
5152          *   Image:tree.jpg|A tree
5153          * given as text will return the HTML of a gallery with two images,
5154          * labeled 'The number "1"' and
5155          * 'A tree'.
5156          *
5157          * @param string $text
5158          * @param array $params
5159          * @return string HTML
5160          */
5161         public function renderImageGallery( $text, $params ) {
5162                 $mode = false;
5163                 if ( isset( $params['mode'] ) ) {
5164                         $mode = $params['mode'];
5165                 }
5166
5167                 try {
5168                         $ig = ImageGalleryBase::factory( $mode );
5169                 } catch ( Exception $e ) {
5170                         // If invalid type set, fallback to default.
5171                         $ig = ImageGalleryBase::factory( false );
5172                 }
5173
5174                 $ig->setContextTitle( $this->mTitle );
5175                 $ig->setShowBytes( false );
5176                 $ig->setShowDimensions( false );
5177                 $ig->setShowFilename( false );
5178                 $ig->setParser( $this );
5179                 $ig->setHideBadImages();
5180                 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5181
5182                 if ( isset( $params['showfilename'] ) ) {
5183                         $ig->setShowFilename( true );
5184                 } else {
5185                         $ig->setShowFilename( false );
5186                 }
5187                 if ( isset( $params['caption'] ) ) {
5188                         // NOTE: We aren't passing a frame here or below.  Frame info
5189                         // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5190                         // See T107332#4030581
5191                         $caption = $this->recursiveTagParse( $params['caption'] );
5192                         $ig->setCaptionHtml( $caption );
5193                 }
5194                 if ( isset( $params['perrow'] ) ) {
5195                         $ig->setPerRow( $params['perrow'] );
5196                 }
5197                 if ( isset( $params['widths'] ) ) {
5198                         $ig->setWidths( $params['widths'] );
5199                 }
5200                 if ( isset( $params['heights'] ) ) {
5201                         $ig->setHeights( $params['heights'] );
5202                 }
5203                 $ig->setAdditionalOptions( $params );
5204
5205                 // Avoid PHP 7.1 warning from passing $this by reference
5206                 $parser = $this;
5207                 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5208
5209                 $lines = StringUtils::explode( "\n", $text );
5210                 foreach ( $lines as $line ) {
5211                         # match lines like these:
5212                         # Image:someimage.jpg|This is some image
5213                         $matches = [];
5214                         preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5215                         # Skip empty lines
5216                         if ( count( $matches ) == 0 ) {
5217                                 continue;
5218                         }
5219
5220                         if ( strpos( $matches[0], '%' ) !== false ) {
5221                                 $matches[1] = rawurldecode( $matches[1] );
5222                         }
5223                         $title = Title::newFromText( $matches[1], NS_FILE );
5224                         if ( is_null( $title ) ) {
5225                                 # Bogus title. Ignore these so we don't bomb out later.
5226                                 continue;
5227                         }
5228
5229                         # We need to get what handler the file uses, to figure out parameters.
5230                         # Note, a hook can overide the file name, and chose an entirely different
5231                         # file (which potentially could be of a different type and have different handler).
5232                         $options = [];
5233                         $descQuery = false;
5234                         Hooks::run( 'BeforeParserFetchFileAndTitle',
5235                                 [ $this, $title, &$options, &$descQuery ] );
5236                         # Don't register it now, as TraditionalImageGallery does that later.
5237                         $file = $this->fetchFileNoRegister( $title, $options );
5238                         $handler = $file ? $file->getHandler() : false;
5239
5240                         $paramMap = [
5241                                 'img_alt' => 'gallery-internal-alt',
5242                                 'img_link' => 'gallery-internal-link',
5243                         ];
5244                         if ( $handler ) {
5245                                 $paramMap += $handler->getParamMap();
5246                                 // We don't want people to specify per-image widths.
5247                                 // Additionally the width parameter would need special casing anyhow.
5248                                 unset( $paramMap['img_width'] );
5249                         }
5250
5251                         $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5252
5253                         $label = '';
5254                         $alt = '';
5255                         $link = '';
5256                         $handlerOptions = [];
5257                         if ( isset( $matches[3] ) ) {
5258                                 // look for an |alt= definition while trying not to break existing
5259                                 // captions with multiple pipes (|) in it, until a more sensible grammar
5260                                 // is defined for images in galleries
5261
5262                                 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5263                                 // splitting on '|' is a bit odd, and different from makeImage.
5264                                 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5265                                 // Protect LanguageConverter markup
5266                                 $parameterMatches = StringUtils::delimiterExplode(
5267                                         '-{', '}-', '|', $matches[3], true /* nested */
5268                                 );
5269
5270                                 foreach ( $parameterMatches as $parameterMatch ) {
5271                                         list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5272                                         if ( $magicName ) {
5273                                                 $paramName = $paramMap[$magicName];
5274
5275                                                 switch ( $paramName ) {
5276                                                         case 'gallery-internal-alt':
5277                                                                 $alt = $this->stripAltText( $match, false );
5278                                                                 break;
5279                                                         case 'gallery-internal-link':
5280                                                                 $linkValue = $this->stripAltText( $match, false );
5281                                                                 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5282                                                                         // Result of LanguageConverter::markNoConversion
5283                                                                         // invoked on an external link.
5284                                                                         $linkValue = substr( $linkValue, 4, -2 );
5285                                                                 }
5286                                                                 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5287                                                                 if ( $type === 'link-url' ) {
5288                                                                         $link = $target;
5289                                                                         $this->mOutput->addExternalLink( $target );
5290                                                                 } elseif ( $type === 'link-title' ) {
5291                                                                         $link = $target->getLinkURL();
5292                                                                         $this->mOutput->addLink( $target );
5293                                                                 }
5294                                                                 break;
5295                                                         default:
5296                                                                 // Must be a handler specific parameter.
5297                                                                 if ( $handler->validateParam( $paramName, $match ) ) {
5298                                                                         $handlerOptions[$paramName] = $match;
5299                                                                 } else {
5300                                                                         // Guess not, consider it as caption.
5301                                                                         $this->logger->debug(
5302                                                                                 "$parameterMatch failed parameter validation" );
5303                                                                         $label = $parameterMatch;
5304                                                                 }
5305                                                 }
5306
5307                                         } else {
5308                                                 // Last pipe wins.
5309                                                 $label = $parameterMatch;
5310                                         }
5311                                 }
5312                         }
5313
5314                         $ig->add( $title, $label, $alt, $link, $handlerOptions );
5315                 }
5316                 $html = $ig->toHTML();
5317                 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5318                 return $html;
5319         }
5320
5321         /**
5322          * @param MediaHandler $handler
5323          * @return array
5324          */
5325         public function getImageParams( $handler ) {
5326                 if ( $handler ) {
5327                         $handlerClass = get_class( $handler );
5328                 } else {
5329                         $handlerClass = '';
5330                 }
5331                 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5332                         # Initialise static lists
5333                         static $internalParamNames = [
5334                                 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5335                                 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5336                                         'bottom', 'text-bottom' ],
5337                                 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5338                                         'upright', 'border', 'link', 'alt', 'class' ],
5339                         ];
5340                         static $internalParamMap;
5341                         if ( !$internalParamMap ) {
5342                                 $internalParamMap = [];
5343                                 foreach ( $internalParamNames as $type => $names ) {
5344                                         foreach ( $names as $name ) {
5345                                                 // For grep: img_left, img_right, img_center, img_none,
5346                                                 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5347                                                 // img_bottom, img_text_bottom,
5348                                                 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5349                                                 // img_border, img_link, img_alt, img_class
5350                                                 $magicName = str_replace( '-', '_', "img_$name" );
5351                                                 $internalParamMap[$magicName] = [ $type, $name ];
5352                                         }
5353                                 }
5354                         }
5355
5356                         # Add handler params
5357                         $paramMap = $internalParamMap;
5358                         if ( $handler ) {
5359                                 $handlerParamMap = $handler->getParamMap();
5360                                 foreach ( $handlerParamMap as $magic => $paramName ) {
5361                                         $paramMap[$magic] = [ 'handler', $paramName ];
5362                                 }
5363                         }
5364                         $this->mImageParams[$handlerClass] = $paramMap;
5365                         $this->mImageParamsMagicArray[$handlerClass] =
5366                                 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5367                 }
5368                 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5369         }
5370
5371         /**
5372          * Parse image options text and use it to make an image
5373          *
5374          * @param Title $title
5375          * @param string $options
5376          * @param LinkHolderArray|bool $holders
5377          * @return string HTML
5378          */
5379         public function makeImage( $title, $options, $holders = false ) {
5380                 # Check if the options text is of the form "options|alt text"
5381                 # Options are:
5382                 #  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5383                 #  * left       no resizing, just left align. label is used for alt= only
5384                 #  * right      same, but right aligned
5385                 #  * none       same, but not aligned
5386                 #  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5387                 #  * center     center the image
5388                 #  * frame      Keep original image size, no magnify-button.
5389                 #  * framed     Same as "frame"
5390                 #  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5391                 #  * upright    reduce width for upright images, rounded to full __0 px
5392                 #  * border     draw a 1px border around the image
5393                 #  * alt        Text for HTML alt attribute (defaults to empty)
5394                 #  * class      Set a class for img node
5395                 #  * link       Set the target of the image link. Can be external, interwiki, or local
5396                 # vertical-align values (no % or length right now):
5397                 #  * baseline
5398                 #  * sub
5399                 #  * super
5400                 #  * top
5401                 #  * text-top
5402                 #  * middle
5403                 #  * bottom
5404                 #  * text-bottom
5405
5406                 # Protect LanguageConverter markup when splitting into parts
5407                 $parts = StringUtils::delimiterExplode(
5408                         '-{', '}-', '|', $options, true /* allow nesting */
5409                 );
5410
5411                 # Give extensions a chance to select the file revision for us
5412                 $options = [];
5413                 $descQuery = false;
5414                 Hooks::run( 'BeforeParserFetchFileAndTitle',
5415                         [ $this, $title, &$options, &$descQuery ] );
5416                 # Fetch and register the file (file title may be different via hooks)
5417                 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5418
5419                 # Get parameter map
5420                 $handler = $file ? $file->getHandler() : false;
5421
5422                 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5423
5424                 if ( !$file ) {
5425                         $this->addTrackingCategory( 'broken-file-category' );
5426                 }
5427
5428                 # Process the input parameters
5429                 $caption = '';
5430                 $params = [ 'frame' => [], 'handler' => [],
5431                         'horizAlign' => [], 'vertAlign' => [] ];
5432                 $seenformat = false;
5433                 foreach ( $parts as $part ) {
5434                         $part = trim( $part );
5435                         list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5436                         $validated = false;
5437                         if ( isset( $paramMap[$magicName] ) ) {
5438                                 list( $type, $paramName ) = $paramMap[$magicName];
5439
5440                                 # Special case; width and height come in one variable together
5441                                 if ( $type === 'handler' && $paramName === 'width' ) {
5442                                         $parsedWidthParam = self::parseWidthParam( $value );
5443                                         if ( isset( $parsedWidthParam['width'] ) ) {
5444                                                 $width = $parsedWidthParam['width'];
5445                                                 if ( $handler->validateParam( 'width', $width ) ) {
5446                                                         $params[$type]['width'] = $width;
5447                                                         $validated = true;
5448                                                 }
5449                                         }
5450                                         if ( isset( $parsedWidthParam['height'] ) ) {
5451                                                 $height = $parsedWidthParam['height'];
5452                                                 if ( $handler->validateParam( 'height', $height ) ) {
5453                                                         $params[$type]['height'] = $height;
5454                                                         $validated = true;
5455                                                 }
5456                                         }
5457                                         # else no validation -- T15436
5458                                 } else {
5459                                         if ( $type === 'handler' ) {
5460                                                 # Validate handler parameter
5461                                                 $validated = $handler->validateParam( $paramName, $value );
5462                                         } else {
5463                                                 # Validate internal parameters
5464                                                 switch ( $paramName ) {
5465                                                         case 'manualthumb':
5466                                                         case 'alt':
5467                                                         case 'class':
5468                                                                 # @todo FIXME: Possibly check validity here for
5469                                                                 # manualthumb? downstream behavior seems odd with
5470                                                                 # missing manual thumbs.
5471                                                                 $validated = true;
5472                                                                 $value = $this->stripAltText( $value, $holders );
5473                                                                 break;
5474                                                         case 'link':
5475                                                                 list( $paramName, $value ) =
5476                                                                         $this->parseLinkParameter(
5477                                                                                 $this->stripAltText( $value, $holders )
5478                                                                         );
5479                                                                 if ( $paramName ) {
5480                                                                         $validated = true;
5481                                                                         if ( $paramName === 'no-link' ) {
5482                                                                                 $value = true;
5483                                                                         }
5484                                                                         if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5485                                                                                 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5486                                                                         }
5487                                                                 }
5488                                                                 break;
5489                                                         case 'frameless':
5490                                                         case 'framed':
5491                                                         case 'thumbnail':
5492                                                                 // use first appearing option, discard others.
5493                                                                 $validated = !$seenformat;
5494                                                                 $seenformat = true;
5495                                                                 break;
5496                                                         default:
5497                                                                 # Most other things appear to be empty or numeric...
5498                                                                 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5499                                                 }
5500                                         }
5501
5502                                         if ( $validated ) {
5503                                                 $params[$type][$paramName] = $value;
5504                                         }
5505                                 }
5506                         }
5507                         if ( !$validated ) {
5508                                 $caption = $part;
5509                         }
5510                 }
5511
5512                 # Process alignment parameters
5513                 if ( $params['horizAlign'] ) {
5514                         $params['frame']['align'] = key( $params['horizAlign'] );
5515                 }
5516                 if ( $params['vertAlign'] ) {
5517                         $params['frame']['valign'] = key( $params['vertAlign'] );
5518                 }
5519
5520                 $params['frame']['caption'] = $caption;
5521
5522                 # Will the image be presented in a frame, with the caption below?
5523                 $imageIsFramed = isset( $params['frame']['frame'] )
5524                         || isset( $params['frame']['framed'] )
5525                         || isset( $params['frame']['thumbnail'] )
5526                         || isset( $params['frame']['manualthumb'] );
5527
5528                 # In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5529                 # came to also set the caption, ordinary text after the image -- which
5530                 # makes no sense, because that just repeats the text multiple times in
5531                 # screen readers.  It *also* came to set the title attribute.
5532                 # Now that we have an alt attribute, we should not set the alt text to
5533                 # equal the caption: that's worse than useless, it just repeats the
5534                 # text.  This is the framed/thumbnail case.  If there's no caption, we
5535                 # use the unnamed parameter for alt text as well, just for the time be-
5536                 # ing, if the unnamed param is set and the alt param is not.
5537                 # For the future, we need to figure out if we want to tweak this more,
5538                 # e.g., introducing a title= parameter for the title; ignoring the un-
5539                 # named parameter entirely for images without a caption; adding an ex-
5540                 # plicit caption= parameter and preserving the old magic unnamed para-
5541                 # meter for BC; ...
5542                 if ( $imageIsFramed ) { # Framed image
5543                         if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5544                                 # No caption or alt text, add the filename as the alt text so
5545                                 # that screen readers at least get some description of the image
5546                                 $params['frame']['alt'] = $title->getText();
5547                         }
5548                         # Do not set $params['frame']['title'] because tooltips don't make sense
5549                         # for framed images
5550                 } else { # Inline image
5551                         if ( !isset( $params['frame']['alt'] ) ) {
5552                                 # No alt text, use the "caption" for the alt text
5553                                 if ( $caption !== '' ) {
5554                                         $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5555                                 } else {
5556                                         # No caption, fall back to using the filename for the
5557                                         # alt text
5558                                         $params['frame']['alt'] = $title->getText();
5559                                 }
5560                         }
5561                         # Use the "caption" for the tooltip text
5562                         $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5563                 }
5564                 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5565
5566                 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5567
5568                 # Linker does the rest
5569                 $time = $options['time'] ?? false;
5570                 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5571                         $time, $descQuery, $this->mOptions->getThumbSize() );
5572
5573                 # Give the handler a chance to modify the parser object
5574                 if ( $handler ) {
5575                         $handler->parserTransformHook( $this, $file );
5576                 }
5577
5578                 return $ret;
5579         }
5580
5581         /**
5582          * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5583          *
5584          * Adds an entry to appropriate link tables.
5585          *
5586          * @since 1.32
5587          * @param string $value
5588          * @return array of `[ type, target ]`, where:
5589          *   - `type` is one of:
5590          *     - `null`: Given value is not a valid link target, use default
5591          *     - `'no-link'`: Given value is empty, do not generate a link
5592          *     - `'link-url'`: Given value is a valid external link
5593          *     - `'link-title'`: Given value is a valid internal link
5594          *   - `target` is:
5595          *     - When `type` is `null` or `'no-link'`: `false`
5596          *     - When `type` is `'link-url'`: URL string corresponding to given value
5597          *     - When `type` is `'link-title'`: Title object corresponding to given value
5598          */
5599         public function parseLinkParameter( $value ) {
5600                 $chars = self::EXT_LINK_URL_CLASS;
5601                 $addr = self::EXT_LINK_ADDR;
5602                 $prots = $this->mUrlProtocols;
5603                 $type = null;
5604                 $target = false;
5605                 if ( $value === '' ) {
5606                         $type = 'no-link';
5607                 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5608                         if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5609                                 $this->mOutput->addExternalLink( $value );
5610                                 $type = 'link-url';
5611                                 $target = $value;
5612                         }
5613                 } else {
5614                         $linkTitle = Title::newFromText( $value );
5615                         if ( $linkTitle ) {
5616                                 $this->mOutput->addLink( $linkTitle );
5617                                 $type = 'link-title';
5618                                 $target = $linkTitle;
5619                         }
5620                 }
5621                 return [ $type, $target ];
5622         }
5623
5624         /**
5625          * @param string $caption
5626          * @param LinkHolderArray|bool $holders
5627          * @return mixed|string
5628          */
5629         protected function stripAltText( $caption, $holders ) {
5630                 # Strip bad stuff out of the title (tooltip).  We can't just use
5631                 # replaceLinkHoldersText() here, because if this function is called
5632                 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5633                 if ( $holders ) {
5634                         $tooltip = $holders->replaceText( $caption );
5635                 } else {
5636                         $tooltip = $this->replaceLinkHoldersText( $caption );
5637                 }
5638
5639                 # make sure there are no placeholders in thumbnail attributes
5640                 # that are later expanded to html- so expand them now and
5641                 # remove the tags
5642                 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5643                 # Compatibility hack!  In HTML certain entity references not terminated
5644                 # by a semicolon are decoded (but not if we're in an attribute; that's
5645                 # how link URLs get away without properly escaping & in queries).
5646                 # But wikitext has always required semicolon-termination of entities,
5647                 # so encode & where needed to avoid decode of semicolon-less entities.
5648                 # See T209236 and
5649                 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5650                 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5651                 $tooltip = preg_replace( "/
5652                         &                       # 1. entity prefix
5653                         (?=                     # 2. followed by:
5654                         (?:                     #  a. one of the legacy semicolon-less named entities
5655                                 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5656                                 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5657                                 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5658                                 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5659                                 U(?:acute|circ|grave|uml)|Yacute|
5660                                 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5661                                 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5662                                 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5663                                 frac(?:1(?:2|4)|34)|
5664                                 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5665                                 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5666                                 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5667                                 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5668                                 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5669                                 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5670                                 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5671                                 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5672                                 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5673                         )
5674                         (?:[^;]|$))     #  b. and not followed by a semicolon
5675                         # S = study, for efficiency
5676                         /Sx", '&amp;', $tooltip );
5677                 $tooltip = Sanitizer::stripAllTags( $tooltip );
5678
5679                 return $tooltip;
5680         }
5681
5682         /**
5683          * Set a flag in the output object indicating that the content is dynamic and
5684          * shouldn't be cached.
5685          * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5686          */
5687         public function disableCache() {
5688                 $this->logger->debug( "Parser output marked as uncacheable." );
5689                 if ( !$this->mOutput ) {
5690                         throw new MWException( __METHOD__ .
5691                                 " can only be called when actually parsing something" );
5692                 }
5693                 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5694         }
5695
5696         /**
5697          * Callback from the Sanitizer for expanding items found in HTML attribute
5698          * values, so they can be safely tested and escaped.
5699          *
5700          * @param string &$text
5701          * @param bool|PPFrame $frame
5702          * @return string
5703          */
5704         public function attributeStripCallback( &$text, $frame = false ) {
5705                 $text = $this->replaceVariables( $text, $frame );
5706                 $text = $this->mStripState->unstripBoth( $text );
5707                 return $text;
5708         }
5709
5710         /**
5711          * Accessor
5712          *
5713          * @return array
5714          */
5715         public function getTags() {
5716                 $this->firstCallInit();
5717                 return array_merge(
5718                         array_keys( $this->mTransparentTagHooks ),
5719                         array_keys( $this->mTagHooks ),
5720                         array_keys( $this->mFunctionTagHooks )
5721                 );
5722         }
5723
5724         /**
5725          * @since 1.32
5726          * @return array
5727          */
5728         public function getFunctionSynonyms() {
5729                 $this->firstCallInit();
5730                 return $this->mFunctionSynonyms;
5731         }
5732
5733         /**
5734          * @since 1.32
5735          * @return string
5736          */
5737         public function getUrlProtocols() {
5738                 return $this->mUrlProtocols;
5739         }
5740
5741         /**
5742          * Replace transparent tags in $text with the values given by the callbacks.
5743          *
5744          * Transparent tag hooks are like regular XML-style tag hooks, except they
5745          * operate late in the transformation sequence, on HTML instead of wikitext.
5746          *
5747          * @param string $text
5748          *
5749          * @return string
5750          */
5751         public function replaceTransparentTags( $text ) {
5752                 $matches = [];
5753                 $elements = array_keys( $this->mTransparentTagHooks );
5754                 $text = self::extractTagsAndParams( $elements, $text, $matches );
5755                 $replacements = [];
5756
5757                 foreach ( $matches as $marker => $data ) {
5758                         list( $element, $content, $params, $tag ) = $data;
5759                         $tagName = strtolower( $element );
5760                         if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5761                                 $output = call_user_func_array(
5762                                         $this->mTransparentTagHooks[$tagName],
5763                                         [ $content, $params, $this ]
5764                                 );
5765                         } else {
5766                                 $output = $tag;
5767                         }
5768                         $replacements[$marker] = $output;
5769                 }
5770                 return strtr( $text, $replacements );
5771         }
5772
5773         /**
5774          * Break wikitext input into sections, and either pull or replace
5775          * some particular section's text.
5776          *
5777          * External callers should use the getSection and replaceSection methods.
5778          *
5779          * @param string $text Page wikitext
5780          * @param string|int $sectionId A section identifier string of the form:
5781          *   "<flag1> - <flag2> - ... - <section number>"
5782          *
5783          * Currently the only recognised flag is "T", which means the target section number
5784          * was derived during a template inclusion parse, in other words this is a template
5785          * section edit link. If no flags are given, it was an ordinary section edit link.
5786          * This flag is required to avoid a section numbering mismatch when a section is
5787          * enclosed by "<includeonly>" (T8563).
5788          *
5789          * The section number 0 pulls the text before the first heading; other numbers will
5790          * pull the given section along with its lower-level subsections. If the section is
5791          * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5792          *
5793          * Section 0 is always considered to exist, even if it only contains the empty
5794          * string. If $text is the empty string and section 0 is replaced, $newText is
5795          * returned.
5796          *
5797          * @param string $mode One of "get" or "replace"
5798          * @param string $newText Replacement text for section data.
5799          * @return string For "get", the extracted section text.
5800          *   for "replace", the whole page with the section replaced.
5801          */
5802         private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5803                 global $wgTitle; # not generally used but removes an ugly failure mode
5804
5805                 $magicScopeVariable = $this->lock();
5806                 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5807                 $outText = '';
5808                 $frame = $this->getPreprocessor()->newFrame();
5809
5810                 # Process section extraction flags
5811                 $flags = 0;
5812                 $sectionParts = explode( '-', $sectionId );
5813                 $sectionIndex = array_pop( $sectionParts );
5814                 foreach ( $sectionParts as $part ) {
5815                         if ( $part === 'T' ) {
5816                                 $flags |= self::PTD_FOR_INCLUSION;
5817                         }
5818                 }
5819
5820                 # Check for empty input
5821                 if ( strval( $text ) === '' ) {
5822                         # Only sections 0 and T-0 exist in an empty document
5823                         if ( $sectionIndex == 0 ) {
5824                                 if ( $mode === 'get' ) {
5825                                         return '';
5826                                 }
5827
5828                                 return $newText;
5829                         } else {
5830                                 if ( $mode === 'get' ) {
5831                                         return $newText;
5832                                 }
5833
5834                                 return $text;
5835                         }
5836                 }
5837
5838                 # Preprocess the text
5839                 $root = $this->preprocessToDom( $text, $flags );
5840
5841                 # <h> nodes indicate section breaks
5842                 # They can only occur at the top level, so we can find them by iterating the root's children
5843                 $node = $root->getFirstChild();
5844
5845                 # Find the target section
5846                 if ( $sectionIndex == 0 ) {
5847                         # Section zero doesn't nest, level=big
5848                         $targetLevel = 1000;
5849                 } else {
5850                         while ( $node ) {
5851                                 if ( $node->getName() === 'h' ) {
5852                                         $bits = $node->splitHeading();
5853                                         if ( $bits['i'] == $sectionIndex ) {
5854                                                 $targetLevel = $bits['level'];
5855                                                 break;
5856                                         }
5857                                 }
5858                                 if ( $mode === 'replace' ) {
5859                                         $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5860                                 }
5861                                 $node = $node->getNextSibling();
5862                         }
5863                 }
5864
5865                 if ( !$node ) {
5866                         # Not found
5867                         if ( $mode === 'get' ) {
5868                                 return $newText;
5869                         } else {
5870                                 return $text;
5871                         }
5872                 }
5873
5874                 # Find the end of the section, including nested sections
5875                 do {
5876                         if ( $node->getName() === 'h' ) {
5877                                 $bits = $node->splitHeading();
5878                                 $curLevel = $bits['level'];
5879                                 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5880                                         break;
5881                                 }
5882                         }
5883                         if ( $mode === 'get' ) {
5884                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5885                         }
5886                         $node = $node->getNextSibling();
5887                 } while ( $node );
5888
5889                 # Write out the remainder (in replace mode only)
5890                 if ( $mode === 'replace' ) {
5891                         # Output the replacement text
5892                         # Add two newlines on -- trailing whitespace in $newText is conventionally
5893                         # stripped by the editor, so we need both newlines to restore the paragraph gap
5894                         # Only add trailing whitespace if there is newText
5895                         if ( $newText != "" ) {
5896                                 $outText .= $newText . "\n\n";
5897                         }
5898
5899                         while ( $node ) {
5900                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5901                                 $node = $node->getNextSibling();
5902                         }
5903                 }
5904
5905                 if ( is_string( $outText ) ) {
5906                         # Re-insert stripped tags
5907                         $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5908                 }
5909
5910                 return $outText;
5911         }
5912
5913         /**
5914          * This function returns the text of a section, specified by a number ($section).
5915          * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5916          * the first section before any such heading (section 0).
5917          *
5918          * If a section contains subsections, these are also returned.
5919          *
5920          * @param string $text Text to look in
5921          * @param string|int $sectionId Section identifier as a number or string
5922          * (e.g. 0, 1 or 'T-1').
5923          * @param string $defaultText Default to return if section is not found
5924          *
5925          * @return string Text of the requested section
5926          */
5927         public function getSection( $text, $sectionId, $defaultText = '' ) {
5928                 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5929         }
5930
5931         /**
5932          * This function returns $oldtext after the content of the section
5933          * specified by $section has been replaced with $text. If the target
5934          * section does not exist, $oldtext is returned unchanged.
5935          *
5936          * @param string $oldText Former text of the article
5937          * @param string|int $sectionId Section identifier as a number or string
5938          * (e.g. 0, 1 or 'T-1').
5939          * @param string $newText Replacing text
5940          *
5941          * @return string Modified text
5942          */
5943         public function replaceSection( $oldText, $sectionId, $newText ) {
5944                 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5945         }
5946
5947         /**
5948          * Get the ID of the revision we are parsing
5949          *
5950          * The return value will be either:
5951          *   - a) Positive, indicating a specific revision ID (current or old)
5952          *   - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5953          *   - c) Null, meaning the parse is for preview mode and there is no revision
5954          *
5955          * @return int|null
5956          */
5957         public function getRevisionId() {
5958                 return $this->mRevisionId;
5959         }
5960
5961         /**
5962          * Get the revision object for $this->mRevisionId
5963          *
5964          * @return Revision|null Either a Revision object or null
5965          * @since 1.23 (public since 1.23)
5966          */
5967         public function getRevisionObject() {
5968                 if ( $this->mRevisionObject ) {
5969                         return $this->mRevisionObject;
5970                 }
5971
5972                 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5973                 // This is useful when parsing a revision that has not yet been saved.
5974                 // However, if we get back a saved revision even though we are in
5975                 // preview mode, we'll have to ignore it, see below.
5976                 // NOTE: This callback may be used to inject an OLD revision that was
5977                 // already loaded, so "current" is a bit of a misnomer. We can't just
5978                 // skip it if mRevisionId is set.
5979                 $rev = call_user_func(
5980                         $this->mOptions->getCurrentRevisionCallback(),
5981                         $this->getTitle(),
5982                         $this
5983                 );
5984
5985                 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5986                         // We are in preview mode (mRevisionId is null), and the current revision callback
5987                         // returned an existing revision. Ignore it and return null, it's probably the page's
5988                         // current revision, which is not what we want here. Note that we do want to call the
5989                         // callback to allow the unsaved revision to be injected here, e.g. for
5990                         // self-transclusion previews.
5991                         return null;
5992                 }
5993
5994                 // If the parse is for a new revision, then the callback should have
5995                 // already been set to force the object and should match mRevisionId.
5996                 // If not, try to fetch by mRevisionId for sanity.
5997                 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5998                         $rev = Revision::newFromId( $this->mRevisionId );
5999                 }
6000
6001                 $this->mRevisionObject = $rev;
6002
6003                 return $this->mRevisionObject;
6004         }
6005
6006         /**
6007          * Get the timestamp associated with the current revision, adjusted for
6008          * the default server-local timestamp
6009          * @return string TS_MW timestamp
6010          */
6011         public function getRevisionTimestamp() {
6012                 if ( $this->mRevisionTimestamp !== null ) {
6013                         return $this->mRevisionTimestamp;
6014                 }
6015
6016                 # Use specified revision timestamp, falling back to the current timestamp
6017                 $revObject = $this->getRevisionObject();
6018                 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6019                 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6020
6021                 # The cryptic '' timezone parameter tells to use the site-default
6022                 # timezone offset instead of the user settings.
6023                 # Since this value will be saved into the parser cache, served
6024                 # to other users, and potentially even used inside links and such,
6025                 # it needs to be consistent for all visitors.
6026                 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6027
6028                 return $this->mRevisionTimestamp;
6029         }
6030
6031         /**
6032          * Get the name of the user that edited the last revision
6033          *
6034          * @return string User name
6035          */
6036         public function getRevisionUser() {
6037                 if ( is_null( $this->mRevisionUser ) ) {
6038                         $revObject = $this->getRevisionObject();
6039
6040                         # if this template is subst: the revision id will be blank,
6041                         # so just use the current user's name
6042                         if ( $revObject ) {
6043                                 $this->mRevisionUser = $revObject->getUserText();
6044                         } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6045                                 $this->mRevisionUser = $this->getUser()->getName();
6046                         }
6047                 }
6048                 return $this->mRevisionUser;
6049         }
6050
6051         /**
6052          * Get the size of the revision
6053          *
6054          * @return int|null Revision size
6055          */
6056         public function getRevisionSize() {
6057                 if ( is_null( $this->mRevisionSize ) ) {
6058                         $revObject = $this->getRevisionObject();
6059
6060                         # if this variable is subst: the revision id will be blank,
6061                         # so just use the parser input size, because the own substituation
6062                         # will change the size.
6063                         if ( $revObject ) {
6064                                 $this->mRevisionSize = $revObject->getSize();
6065                         } else {
6066                                 $this->mRevisionSize = $this->mInputSize;
6067                         }
6068                 }
6069                 return $this->mRevisionSize;
6070         }
6071
6072         /**
6073          * Mutator for $mDefaultSort
6074          *
6075          * @param string $sort New value
6076          */
6077         public function setDefaultSort( $sort ) {
6078                 $this->mDefaultSort = $sort;
6079                 $this->mOutput->setProperty( 'defaultsort', $sort );
6080         }
6081
6082         /**
6083          * Accessor for $mDefaultSort
6084          * Will use the empty string if none is set.
6085          *
6086          * This value is treated as a prefix, so the
6087          * empty string is equivalent to sorting by
6088          * page name.
6089          *
6090          * @return string
6091          */
6092         public function getDefaultSort() {
6093                 if ( $this->mDefaultSort !== false ) {
6094                         return $this->mDefaultSort;
6095                 } else {
6096                         return '';
6097                 }
6098         }
6099
6100         /**
6101          * Accessor for $mDefaultSort
6102          * Unlike getDefaultSort(), will return false if none is set
6103          *
6104          * @return string|bool
6105          */
6106         public function getCustomDefaultSort() {
6107                 return $this->mDefaultSort;
6108         }
6109
6110         private static function getSectionNameFromStrippedText( $text ) {
6111                 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6112                 $text = Sanitizer::decodeCharReferences( $text );
6113                 $text = self::normalizeSectionName( $text );
6114                 return $text;
6115         }
6116
6117         private static function makeAnchor( $sectionName ) {
6118                 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6119         }
6120
6121         private function makeLegacyAnchor( $sectionName ) {
6122                 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6123                 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6124                         // ForAttribute() and ForLink() are the same for legacy encoding
6125                         $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6126                 } else {
6127                         $id = Sanitizer::escapeIdForLink( $sectionName );
6128                 }
6129
6130                 return "#$id";
6131         }
6132
6133         /**
6134          * Try to guess the section anchor name based on a wikitext fragment
6135          * presumably extracted from a heading, for example "Header" from
6136          * "== Header ==".
6137          *
6138          * @param string $text
6139          * @return string Anchor (starting with '#')
6140          */
6141         public function guessSectionNameFromWikiText( $text ) {
6142                 # Strip out wikitext links(they break the anchor)
6143                 $text = $this->stripSectionName( $text );
6144                 $sectionName = self::getSectionNameFromStrippedText( $text );
6145                 return self::makeAnchor( $sectionName );
6146         }
6147
6148         /**
6149          * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6150          * instead, if possible. For use in redirects, since various versions
6151          * of Microsoft browsers interpret Location: headers as something other
6152          * than UTF-8, resulting in breakage.
6153          *
6154          * @param string $text The section name
6155          * @return string Anchor (starting with '#')
6156          */
6157         public function guessLegacySectionNameFromWikiText( $text ) {
6158                 # Strip out wikitext links(they break the anchor)
6159                 $text = $this->stripSectionName( $text );
6160                 $sectionName = self::getSectionNameFromStrippedText( $text );
6161                 return $this->makeLegacyAnchor( $sectionName );
6162         }
6163
6164         /**
6165          * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6166          * @param string $text Section name (plain text)
6167          * @return string Anchor (starting with '#')
6168          */
6169         public static function guessSectionNameFromStrippedText( $text ) {
6170                 $sectionName = self::getSectionNameFromStrippedText( $text );
6171                 return self::makeAnchor( $sectionName );
6172         }
6173
6174         /**
6175          * Apply the same normalization as code making links to this section would
6176          *
6177          * @param string $text
6178          * @return string
6179          */
6180         private static function normalizeSectionName( $text ) {
6181                 # T90902: ensure the same normalization is applied for IDs as to links
6182                 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6183                 try {
6184
6185                         $parts = $titleParser->splitTitleString( "#$text" );
6186                 } catch ( MalformedTitleException $ex ) {
6187                         return $text;
6188                 }
6189                 return $parts['fragment'];
6190         }
6191
6192         /**
6193          * Strips a text string of wikitext for use in a section anchor
6194          *
6195          * Accepts a text string and then removes all wikitext from the
6196          * string and leaves only the resultant text (i.e. the result of
6197          * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6198          * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6199          * to create valid section anchors by mimicing the output of the
6200          * parser when headings are parsed.
6201          *
6202          * @param string $text Text string to be stripped of wikitext
6203          * for use in a Section anchor
6204          * @return string Filtered text string
6205          */
6206         public function stripSectionName( $text ) {
6207                 # Strip internal link markup
6208                 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6209                 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6210
6211                 # Strip external link markup
6212                 # @todo FIXME: Not tolerant to blank link text
6213                 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6214                 # on how many empty links there are on the page - need to figure that out.
6215                 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6216
6217                 # Parse wikitext quotes (italics & bold)
6218                 $text = $this->doQuotes( $text );
6219
6220                 # Strip HTML tags
6221                 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6222                 return $text;
6223         }
6224
6225         /**
6226          * strip/replaceVariables/unstrip for preprocessor regression testing
6227          *
6228          * @param string $text
6229          * @param Title $title
6230          * @param ParserOptions $options
6231          * @param int $outputType
6232          *
6233          * @return string
6234          */
6235         public function testSrvus( $text, Title $title, ParserOptions $options,
6236                 $outputType = self::OT_HTML
6237         ) {
6238                 $magicScopeVariable = $this->lock();
6239                 $this->startParse( $title, $options, $outputType, true );
6240
6241                 $text = $this->replaceVariables( $text );
6242                 $text = $this->mStripState->unstripBoth( $text );
6243                 $text = Sanitizer::removeHTMLtags( $text );
6244                 return $text;
6245         }
6246
6247         /**
6248          * @param string $text
6249          * @param Title $title
6250          * @param ParserOptions $options
6251          * @return string
6252          */
6253         public function testPst( $text, Title $title, ParserOptions $options ) {
6254                 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6255         }
6256
6257         /**
6258          * @param string $text
6259          * @param Title $title
6260          * @param ParserOptions $options
6261          * @return string
6262          */
6263         public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6264                 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6265         }
6266
6267         /**
6268          * Call a callback function on all regions of the given text that are not
6269          * inside strip markers, and replace those regions with the return value
6270          * of the callback. For example, with input:
6271          *
6272          *  aaa<MARKER>bbb
6273          *
6274          * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6275          * two strings will be replaced with the value returned by the callback in
6276          * each case.
6277          *
6278          * @param string $s
6279          * @param callable $callback
6280          *
6281          * @return string
6282          */
6283         public function markerSkipCallback( $s, $callback ) {
6284                 $i = 0;
6285                 $out = '';
6286                 while ( $i < strlen( $s ) ) {
6287                         $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6288                         if ( $markerStart === false ) {
6289                                 $out .= call_user_func( $callback, substr( $s, $i ) );
6290                                 break;
6291                         } else {
6292                                 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6293                                 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6294                                 if ( $markerEnd === false ) {
6295                                         $out .= substr( $s, $markerStart );
6296                                         break;
6297                                 } else {
6298                                         $markerEnd += strlen( self::MARKER_SUFFIX );
6299                                         $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6300                                         $i = $markerEnd;
6301                                 }
6302                         }
6303                 }
6304                 return $out;
6305         }
6306
6307         /**
6308          * Remove any strip markers found in the given text.
6309          *
6310          * @param string $text
6311          * @return string
6312          */
6313         public function killMarkers( $text ) {
6314                 return $this->mStripState->killMarkers( $text );
6315         }
6316
6317         /**
6318          * Save the parser state required to convert the given half-parsed text to
6319          * HTML. "Half-parsed" in this context means the output of
6320          * recursiveTagParse() or internalParse(). This output has strip markers
6321          * from replaceVariables (extensionSubstitution() etc.), and link
6322          * placeholders from replaceLinkHolders().
6323          *
6324          * Returns an array which can be serialized and stored persistently. This
6325          * array can later be loaded into another parser instance with
6326          * unserializeHalfParsedText(). The text can then be safely incorporated into
6327          * the return value of a parser hook.
6328          *
6329          * @deprecated since 1.31
6330          * @param string $text
6331          *
6332          * @return array
6333          */
6334         public function serializeHalfParsedText( $text ) {
6335                 wfDeprecated( __METHOD__, '1.31' );
6336                 $data = [
6337                         'text' => $text,
6338                         'version' => self::HALF_PARSED_VERSION,
6339                         'stripState' => $this->mStripState->getSubState( $text ),
6340                         'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6341                 ];
6342                 return $data;
6343         }
6344
6345         /**
6346          * Load the parser state given in the $data array, which is assumed to
6347          * have been generated by serializeHalfParsedText(). The text contents is
6348          * extracted from the array, and its markers are transformed into markers
6349          * appropriate for the current Parser instance. This transformed text is
6350          * returned, and can be safely included in the return value of a parser
6351          * hook.
6352          *
6353          * If the $data array has been stored persistently, the caller should first
6354          * check whether it is still valid, by calling isValidHalfParsedText().
6355          *
6356          * @deprecated since 1.31
6357          * @param array $data Serialized data
6358          * @throws MWException
6359          * @return string
6360          */
6361         public function unserializeHalfParsedText( $data ) {
6362                 wfDeprecated( __METHOD__, '1.31' );
6363                 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6364                         throw new MWException( __METHOD__ . ': invalid version' );
6365                 }
6366
6367                 # First, extract the strip state.
6368                 $texts = [ $data['text'] ];
6369                 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6370
6371                 # Now renumber links
6372                 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6373
6374                 # Should be good to go.
6375                 return $texts[0];
6376         }
6377
6378         /**
6379          * Returns true if the given array, presumed to be generated by
6380          * serializeHalfParsedText(), is compatible with the current version of the
6381          * parser.
6382          *
6383          * @deprecated since 1.31
6384          * @param array $data
6385          *
6386          * @return bool
6387          */
6388         public function isValidHalfParsedText( $data ) {
6389                 wfDeprecated( __METHOD__, '1.31' );
6390                 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6391         }
6392
6393         /**
6394          * Parsed a width param of imagelink like 300px or 200x300px
6395          *
6396          * @param string $value
6397          * @param bool $parseHeight
6398          *
6399          * @return array
6400          * @since 1.20
6401          */
6402         public static function parseWidthParam( $value, $parseHeight = true ) {
6403                 $parsedWidthParam = [];
6404                 if ( $value === '' ) {
6405                         return $parsedWidthParam;
6406                 }
6407                 $m = [];
6408                 # (T15500) In both cases (width/height and width only),
6409                 # permit trailing "px" for backward compatibility.
6410                 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6411                         $width = intval( $m[1] );
6412                         $height = intval( $m[2] );
6413                         $parsedWidthParam['width'] = $width;
6414                         $parsedWidthParam['height'] = $height;
6415                 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6416                         $width = intval( $value );
6417                         $parsedWidthParam['width'] = $width;
6418                 }
6419                 return $parsedWidthParam;
6420         }
6421
6422         /**
6423          * Lock the current instance of the parser.
6424          *
6425          * This is meant to stop someone from calling the parser
6426          * recursively and messing up all the strip state.
6427          *
6428          * @throws MWException If parser is in a parse
6429          * @return ScopedCallback The lock will be released once the return value goes out of scope.
6430          */
6431         protected function lock() {
6432                 if ( $this->mInParse ) {
6433                         throw new MWException( "Parser state cleared while parsing. "
6434                                 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6435                 }
6436
6437                 // Save the backtrace when locking, so that if some code tries locking again,
6438                 // we can print the lock owner's backtrace for easier debugging
6439                 $e = new Exception;
6440                 $this->mInParse = $e->getTraceAsString();
6441
6442                 $recursiveCheck = new ScopedCallback( function () {
6443                         $this->mInParse = false;
6444                 } );
6445
6446                 return $recursiveCheck;
6447         }
6448
6449         /**
6450          * Strip outer <p></p> tag from the HTML source of a single paragraph.
6451          *
6452          * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6453          * or if there is more than one <p/> tag in the input HTML.
6454          *
6455          * @param string $html
6456          * @return string
6457          * @since 1.24
6458          */
6459         public static function stripOuterParagraph( $html ) {
6460                 $m = [];
6461                 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6462                         $html = $m[1];
6463                 }
6464
6465                 return $html;
6466         }
6467
6468         /**
6469          * Return this parser if it is not doing anything, otherwise
6470          * get a fresh parser. You can use this method by doing
6471          * $newParser = $oldParser->getFreshParser(), or more simply
6472          * $oldParser->getFreshParser()->parse( ... );
6473          * if you're unsure if $oldParser is safe to use.
6474          *
6475          * @since 1.24
6476          * @return Parser A parser object that is not parsing anything
6477          */
6478         public function getFreshParser() {
6479                 if ( $this->mInParse ) {
6480                         return $this->factory->create();
6481                 } else {
6482                         return $this;
6483                 }
6484         }
6485
6486         /**
6487          * Set's up the PHP implementation of OOUI for use in this request
6488          * and instructs OutputPage to enable OOUI for itself.
6489          *
6490          * @since 1.26
6491          */
6492         public function enableOOUI() {
6493                 OutputPage::setupOOUI();
6494                 $this->mOutput->setEnableOOUI( true );
6495         }
6496
6497         /**
6498          * @param string $flag
6499          * @param string $reason
6500          */
6501         protected function setOutputFlag( $flag, $reason ) {
6502                 $this->mOutput->setFlag( $flag );
6503                 $name = $this->mTitle->getPrefixedText();
6504                 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6505         }
6506 }