3aa2c69ffa7ec36149f044d296766ad24c5a6762
[lhc/web/wiklou.git] / includes / parser / Parser.php
1 <?php
2 /**
3 * PHP parser that converts wiki markup to HTML.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23 use MediaWiki\BadFileLookup;
24 use MediaWiki\Config\ServiceOptions;
25 use MediaWiki\Linker\LinkRenderer;
26 use MediaWiki\Linker\LinkRendererFactory;
27 use MediaWiki\Linker\LinkTarget;
28 use MediaWiki\MediaWikiServices;
29 use MediaWiki\Special\SpecialPageFactory;
30 use Psr\Log\NullLogger;
31 use Wikimedia\ScopedCallback;
32 use Psr\Log\LoggerInterface;
33
34 /**
35 * @defgroup Parser Parser
36 */
37
38 /**
39 * PHP Parser - Processes wiki markup (which uses a more user-friendly
40 * syntax, such as "[[link]]" for making links), and provides a one-way
41 * transformation of that wiki markup it into (X)HTML output / markup
42 * (which in turn the browser understands, and can display).
43 *
44 * There are seven main entry points into the Parser class:
45 *
46 * - Parser::parse()
47 * produces HTML output
48 * - Parser::preSaveTransform()
49 * produces altered wiki markup
50 * - Parser::preprocess()
51 * removes HTML comments and expands templates
52 * - Parser::cleanSig() and Parser::cleanSigInSig()
53 * cleans a signature before saving it to preferences
54 * - Parser::getSection()
55 * return the content of a section from an article for section editing
56 * - Parser::replaceSection()
57 * replaces a section by number inside an article
58 * - Parser::getPreloadText()
59 * removes <noinclude> sections and <includeonly> tags
60 *
61 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
62 *
63 * @par Settings:
64 * $wgNamespacesWithSubpages
65 *
66 * @par Settings only within ParserOptions:
67 * $wgAllowExternalImages
68 * $wgAllowSpecialInclusion
69 * $wgInterwikiMagic
70 * $wgMaxArticleSize
71 *
72 * @ingroup Parser
73 */
74 class Parser {
75 /**
76 * Update this version number when the ParserOutput format
77 * changes in an incompatible way, so the parser cache
78 * can automatically discard old data.
79 */
80 const VERSION = '1.6.4';
81
82 /**
83 * Update this version number when the output of serialiseHalfParsedText()
84 * changes in an incompatible way
85 */
86 const HALF_PARSED_VERSION = 2;
87
88 # Flags for Parser::setFunctionHook
89 const SFH_NO_HASH = 1;
90 const SFH_OBJECT_ARGS = 2;
91
92 # Constants needed for external link processing
93 # Everything except bracket, space, or control characters
94 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
95 # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
96 # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
97 # uses to replace invalid HTML characters.
98 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
99 # Simplified expression to match an IPv4 or IPv6 address, or
100 # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
101 const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
102 # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
103 // phpcs:ignore Generic.Files.LineLength
104 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
105 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
106
107 # Regular expression for a non-newline space
108 const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
109
110 # Flags for preprocessToDom
111 const PTD_FOR_INCLUSION = 1;
112
113 # Allowed values for $this->mOutputType
114 # Parameter to startExternalParse().
115 const OT_HTML = 1; # like parse()
116 const OT_WIKI = 2; # like preSaveTransform()
117 const OT_PREPROCESS = 3; # like preprocess()
118 const OT_MSG = 3;
119 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
120
121 /**
122 * @var string Prefix and suffix for temporary replacement strings
123 * for the multipass parser.
124 *
125 * \x7f should never appear in input as it's disallowed in XML.
126 * Using it at the front also gives us a little extra robustness
127 * since it shouldn't match when butted up against identifier-like
128 * string constructs.
129 *
130 * Must not consist of all title characters, or else it will change
131 * the behavior of <nowiki> in a link.
132 *
133 * Must have a character that needs escaping in attributes, otherwise
134 * someone could put a strip marker in an attribute, to get around
135 * escaping quote marks, and break out of the attribute. Thus we add
136 * `'".
137 */
138 const MARKER_SUFFIX = "-QINU`\"'\x7f";
139 const MARKER_PREFIX = "\x7f'\"`UNIQ-";
140
141 # Markers used for wrapping the table of contents
142 const TOC_START = '<mw:toc>';
143 const TOC_END = '</mw:toc>';
144
145 /** @var int Assume that no output will later be saved this many seconds after parsing */
146 const MAX_TTS = 900;
147
148 # Persistent:
149 public $mTagHooks = [];
150 public $mTransparentTagHooks = [];
151 public $mFunctionHooks = [];
152 public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
153 public $mFunctionTagHooks = [];
154 public $mStripList = [];
155 public $mDefaultStripList = [];
156 public $mVarCache = [];
157 public $mImageParams = [];
158 public $mImageParamsMagicArray = [];
159 public $mMarkerIndex = 0;
160 /**
161 * @var bool Whether firstCallInit still needs to be called
162 */
163 public $mFirstCall = true;
164
165 # Initialised by initialiseVariables()
166
167 /**
168 * @var MagicWordArray
169 */
170 public $mVariables;
171
172 /**
173 * @var MagicWordArray
174 */
175 public $mSubstWords;
176
177 /**
178 * @deprecated since 1.34, there should be no need to use this
179 * @var array
180 */
181 public $mConf;
182
183 # Initialised in constructor
184 public $mExtLinkBracketedRegex, $mUrlProtocols;
185
186 # Initialized in getPreprocessor()
187 /** @var Preprocessor */
188 public $mPreprocessor;
189
190 # Cleared with clearState():
191 /**
192 * @var ParserOutput
193 */
194 public $mOutput;
195 public $mAutonumber;
196
197 /**
198 * @var StripState
199 */
200 public $mStripState;
201
202 public $mIncludeCount;
203 /**
204 * @var LinkHolderArray
205 */
206 public $mLinkHolders;
207
208 public $mLinkID;
209 public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
210 public $mDefaultSort;
211 public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
212 public $mExpensiveFunctionCount; # number of expensive parser function calls
213 public $mShowToc, $mForceTocPosition;
214 /** @var array */
215 public $mTplDomCache;
216
217 /**
218 * @var User
219 */
220 public $mUser; # User object; only used when doing pre-save transform
221
222 # Temporary
223 # These are variables reset at least once per parse regardless of $clearState
224
225 /**
226 * @var ParserOptions
227 */
228 public $mOptions;
229
230 /**
231 * Since 1.34, leaving `mTitle` uninitialized or setting `mTitle` to
232 * `null` is deprecated.
233 *
234 * @internal
235 * @var Title|null
236 */
237 public $mTitle; # Title context, used for self-link rendering and similar things
238 public $mOutputType; # Output type, one of the OT_xxx constants
239 public $ot; # Shortcut alias, see setOutputType()
240 public $mRevisionObject; # The revision object of the specified revision ID
241 public $mRevisionId; # ID to display in {{REVISIONID}} tags
242 public $mRevisionTimestamp; # The timestamp of the specified revision ID
243 public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
244 public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
245 public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
246 public $mInputSize = false; # For {{PAGESIZE}} on current page.
247
248 /**
249 * @var array Array with the language name of each language link (i.e. the
250 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
251 * duplicate language links to the ParserOutput.
252 */
253 public $mLangLinkLanguages;
254
255 /**
256 * @var MapCacheLRU|null
257 * @since 1.24
258 *
259 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
260 */
261 public $currentRevisionCache;
262
263 /**
264 * @var bool|string Recursive call protection.
265 * This variable should be treated as if it were private.
266 */
267 public $mInParse = false;
268
269 /** @var SectionProfiler */
270 protected $mProfiler;
271
272 /**
273 * @var LinkRenderer
274 */
275 protected $mLinkRenderer;
276
277 /** @var MagicWordFactory */
278 private $magicWordFactory;
279
280 /** @var Language */
281 private $contLang;
282
283 /** @var ParserFactory */
284 private $factory;
285
286 /** @var SpecialPageFactory */
287 private $specialPageFactory;
288
289 /**
290 * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
291 * $mOptions, which is public and widely used, and also with the local variable $options used
292 * for ParserOptions throughout this file.
293 *
294 * @var ServiceOptions
295 */
296 private $svcOptions;
297
298 /** @var LinkRendererFactory */
299 private $linkRendererFactory;
300
301 /** @var NamespaceInfo */
302 private $nsInfo;
303
304 /** @var LoggerInterface */
305 private $logger;
306
307 /** @var BadFileLookup */
308 private $badFileLookup;
309
310 /**
311 * TODO Make this a const when HHVM support is dropped (T192166)
312 *
313 * @var array
314 * @since 1.33
315 */
316 public static $constructorOptions = [
317 // See $wgParserConf documentation
318 'class',
319 'preprocessorClass',
320 // See documentation for the corresponding config options
321 'ArticlePath',
322 'EnableScaryTranscluding',
323 'ExtraInterlanguageLinkPrefixes',
324 'FragmentMode',
325 'LanguageCode',
326 'MaxSigChars',
327 'MaxTocLevel',
328 'MiserMode',
329 'ScriptPath',
330 'Server',
331 'ServerName',
332 'ShowHostnames',
333 'Sitename',
334 'StylePath',
335 'TranscludeCacheExpiry',
336 ];
337
338 /**
339 * Constructing parsers directly is deprecated! Use a ParserFactory.
340 *
341 * @param ServiceOptions|null $svcOptions
342 * @param MagicWordFactory|null $magicWordFactory
343 * @param Language|null $contLang Content language
344 * @param ParserFactory|null $factory
345 * @param string|null $urlProtocols As returned from wfUrlProtocols()
346 * @param SpecialPageFactory|null $spFactory
347 * @param LinkRendererFactory|null $linkRendererFactory
348 * @param NamespaceInfo|null $nsInfo
349 * @param LoggerInterface|null $logger
350 * @param BadFileLookup|null $badFileLookup
351 */
352 public function __construct(
353 $svcOptions = null,
354 MagicWordFactory $magicWordFactory = null,
355 Language $contLang = null,
356 ParserFactory $factory = null,
357 $urlProtocols = null,
358 SpecialPageFactory $spFactory = null,
359 $linkRendererFactory = null,
360 $nsInfo = null,
361 $logger = null,
362 BadFileLookup $badFileLookup = null
363 ) {
364 if ( !$svcOptions || is_array( $svcOptions ) ) {
365 // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
366 // Config, and the eighth is LinkRendererFactory.
367 $this->mConf = (array)$svcOptions;
368 if ( empty( $this->mConf['class'] ) ) {
369 $this->mConf['class'] = self::class;
370 }
371 if ( empty( $this->mConf['preprocessorClass'] ) ) {
372 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
373 }
374 $this->svcOptions = new ServiceOptions( self::$constructorOptions,
375 $this->mConf, func_num_args() > 6
376 ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
377 );
378 $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
379 $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
380 } else {
381 // New calling convention
382 $svcOptions->assertRequiredOptions( self::$constructorOptions );
383 // $this->mConf is public, so we'll keep those two options there as well for
384 // compatibility until it's removed
385 $this->mConf = [
386 'class' => $svcOptions->get( 'class' ),
387 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
388 ];
389 $this->svcOptions = $svcOptions;
390 }
391
392 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
393 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
394 self::EXT_LINK_ADDR .
395 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
396
397 $this->magicWordFactory = $magicWordFactory ??
398 MediaWikiServices::getInstance()->getMagicWordFactory();
399
400 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
401
402 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
403 $this->specialPageFactory = $spFactory ??
404 MediaWikiServices::getInstance()->getSpecialPageFactory();
405 $this->linkRendererFactory = $linkRendererFactory ??
406 MediaWikiServices::getInstance()->getLinkRendererFactory();
407 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
408 $this->logger = $logger ?: new NullLogger();
409 $this->badFileLookup = $badFileLookup ??
410 MediaWikiServices::getInstance()->getBadFileLookup();
411 }
412
413 /**
414 * Reduce memory usage to reduce the impact of circular references
415 */
416 public function __destruct() {
417 if ( isset( $this->mLinkHolders ) ) {
418 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
419 unset( $this->mLinkHolders );
420 }
421 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
422 foreach ( $this as $name => $value ) {
423 unset( $this->$name );
424 }
425 }
426
427 /**
428 * Allow extensions to clean up when the parser is cloned
429 */
430 public function __clone() {
431 $this->mInParse = false;
432
433 // T58226: When you create a reference "to" an object field, that
434 // makes the object field itself be a reference too (until the other
435 // reference goes out of scope). When cloning, any field that's a
436 // reference is copied as a reference in the new object. Both of these
437 // are defined PHP5 behaviors, as inconvenient as it is for us when old
438 // hooks from PHP4 days are passing fields by reference.
439 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
440 // Make a non-reference copy of the field, then rebind the field to
441 // reference the new copy.
442 $tmp = $this->$k;
443 $this->$k =& $tmp;
444 unset( $tmp );
445 }
446
447 Hooks::run( 'ParserCloned', [ $this ] );
448 }
449
450 /**
451 * Which class should we use for the preprocessor if not otherwise specified?
452 *
453 * @since 1.34
454 * @deprecated since 1.34, removing configurability of preprocessor
455 * @return string
456 */
457 public static function getDefaultPreprocessorClass() {
458 return Preprocessor_Hash::class;
459 }
460
461 /**
462 * Do various kinds of initialisation on the first call of the parser
463 */
464 public function firstCallInit() {
465 if ( !$this->mFirstCall ) {
466 return;
467 }
468 $this->mFirstCall = false;
469
470 CoreParserFunctions::register( $this );
471 CoreTagHooks::register( $this );
472 $this->initialiseVariables();
473
474 // Avoid PHP 7.1 warning from passing $this by reference
475 $parser = $this;
476 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
477 }
478
479 /**
480 * Clear Parser state
481 *
482 * @private
483 */
484 public function clearState() {
485 $this->firstCallInit();
486 $this->resetOutput();
487 $this->mAutonumber = 0;
488 $this->mIncludeCount = [];
489 $this->mLinkHolders = new LinkHolderArray( $this );
490 $this->mLinkID = 0;
491 $this->mRevisionObject = $this->mRevisionTimestamp =
492 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
493 $this->mVarCache = [];
494 $this->mUser = null;
495 $this->mLangLinkLanguages = [];
496 $this->currentRevisionCache = null;
497
498 $this->mStripState = new StripState( $this );
499
500 # Clear these on every parse, T6549
501 $this->mTplRedirCache = $this->mTplDomCache = [];
502
503 $this->mShowToc = true;
504 $this->mForceTocPosition = false;
505 $this->mIncludeSizes = [
506 'post-expand' => 0,
507 'arg' => 0,
508 ];
509 $this->mPPNodeCount = 0;
510 $this->mGeneratedPPNodeCount = 0;
511 $this->mHighestExpansionDepth = 0;
512 $this->mDefaultSort = false;
513 $this->mHeadings = [];
514 $this->mDoubleUnderscores = [];
515 $this->mExpensiveFunctionCount = 0;
516
517 # Fix cloning
518 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
519 $this->mPreprocessor = null;
520 }
521
522 $this->mProfiler = new SectionProfiler();
523
524 // Avoid PHP 7.1 warning from passing $this by reference
525 $parser = $this;
526 Hooks::run( 'ParserClearState', [ &$parser ] );
527 }
528
529 /**
530 * Reset the ParserOutput
531 */
532 public function resetOutput() {
533 $this->mOutput = new ParserOutput;
534 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
535 }
536
537 /**
538 * Convert wikitext to HTML
539 * Do not call this function recursively.
540 *
541 * @param string $text Text we want to parse
542 * @param-taint $text escapes_htmlnoent
543 * @param Title $title
544 * @param ParserOptions $options
545 * @param bool $linestart
546 * @param bool $clearState
547 * @param int|null $revid ID of the revision being rendered. This is used to render
548 * REVISION* magic words. 0 means that any current revision will be used. Null means
549 * that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
550 * use the current timestamp.
551 * @return ParserOutput A ParserOutput
552 * @return-taint escaped
553 */
554 public function parse(
555 $text, Title $title, ParserOptions $options,
556 $linestart = true, $clearState = true, $revid = null
557 ) {
558 if ( $clearState ) {
559 // We use U+007F DELETE to construct strip markers, so we have to make
560 // sure that this character does not occur in the input text.
561 $text = strtr( $text, "\x7f", "?" );
562 $magicScopeVariable = $this->lock();
563 }
564 // Strip U+0000 NULL (T159174)
565 $text = str_replace( "\000", '', $text );
566
567 $this->startParse( $title, $options, self::OT_HTML, $clearState );
568
569 $this->currentRevisionCache = null;
570 $this->mInputSize = strlen( $text );
571 if ( $this->mOptions->getEnableLimitReport() ) {
572 $this->mOutput->resetParseStartTime();
573 }
574
575 $oldRevisionId = $this->mRevisionId;
576 $oldRevisionObject = $this->mRevisionObject;
577 $oldRevisionTimestamp = $this->mRevisionTimestamp;
578 $oldRevisionUser = $this->mRevisionUser;
579 $oldRevisionSize = $this->mRevisionSize;
580 if ( $revid !== null ) {
581 $this->mRevisionId = $revid;
582 $this->mRevisionObject = null;
583 $this->mRevisionTimestamp = null;
584 $this->mRevisionUser = null;
585 $this->mRevisionSize = null;
586 }
587
588 // Avoid PHP 7.1 warning from passing $this by reference
589 $parser = $this;
590 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
591 # No more strip!
592 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
593 $text = $this->internalParse( $text );
594 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
595
596 $text = $this->internalParseHalfParsed( $text, true, $linestart );
597
598 /**
599 * A converted title will be provided in the output object if title and
600 * content conversion are enabled, the article text does not contain
601 * a conversion-suppressing double-underscore tag, and no
602 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
603 * automatic link conversion.
604 */
605 if ( !( $options->getDisableTitleConversion()
606 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
607 || isset( $this->mDoubleUnderscores['notitleconvert'] )
608 || $this->mOutput->getDisplayTitle() !== false )
609 ) {
610 $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
611 if ( $convruletitle ) {
612 $this->mOutput->setTitleText( $convruletitle );
613 } else {
614 $titleText = $this->getTargetLanguage()->convertTitle( $title );
615 $this->mOutput->setTitleText( $titleText );
616 }
617 }
618
619 # Compute runtime adaptive expiry if set
620 $this->mOutput->finalizeAdaptiveCacheExpiry();
621
622 # Warn if too many heavyweight parser functions were used
623 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
624 $this->limitationWarn( 'expensive-parserfunction',
625 $this->mExpensiveFunctionCount,
626 $this->mOptions->getExpensiveParserFunctionLimit()
627 );
628 }
629
630 # Information on limits, for the benefit of users who try to skirt them
631 if ( $this->mOptions->getEnableLimitReport() ) {
632 $text .= $this->makeLimitReport();
633 }
634
635 # Wrap non-interface parser output in a <div> so it can be targeted
636 # with CSS (T37247)
637 $class = $this->mOptions->getWrapOutputClass();
638 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
639 $this->mOutput->addWrapperDivClass( $class );
640 }
641
642 $this->mOutput->setText( $text );
643
644 $this->mRevisionId = $oldRevisionId;
645 $this->mRevisionObject = $oldRevisionObject;
646 $this->mRevisionTimestamp = $oldRevisionTimestamp;
647 $this->mRevisionUser = $oldRevisionUser;
648 $this->mRevisionSize = $oldRevisionSize;
649 $this->mInputSize = false;
650 $this->currentRevisionCache = null;
651
652 return $this->mOutput;
653 }
654
655 /**
656 * Set the limit report data in the current ParserOutput, and return the
657 * limit report HTML comment.
658 *
659 * @return string
660 */
661 protected function makeLimitReport() {
662 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
663
664 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
665 if ( $cpuTime !== null ) {
666 $this->mOutput->setLimitReportData( 'limitreport-cputime',
667 sprintf( "%.3f", $cpuTime )
668 );
669 }
670
671 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
672 $this->mOutput->setLimitReportData( 'limitreport-walltime',
673 sprintf( "%.3f", $wallTime )
674 );
675
676 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
677 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
678 );
679 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
680 [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
681 );
682 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
683 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
684 );
685 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
686 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
687 );
688 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
689 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
690 );
691 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
692 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
693 );
694
695 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
696 $this->mOutput->setLimitReportData( $key, $value );
697 }
698
699 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
700
701 $limitReport = "NewPP limit report\n";
702 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
703 $limitReport .= 'Parsed by ' . wfHostname() . "\n";
704 }
705 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
706 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
707 $limitReport .= 'Dynamic content: ' .
708 ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
709 "\n";
710 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
711
712 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
713 if ( Hooks::run( 'ParserLimitReportFormat',
714 [ $key, &$value, &$limitReport, false, false ]
715 ) ) {
716 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
717 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
718 ->inLanguage( 'en' )->useDatabase( false );
719 if ( !$valueMsg->exists() ) {
720 $valueMsg = new RawMessage( '$1' );
721 }
722 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
723 $valueMsg->params( $value );
724 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
725 }
726 }
727 }
728 // Since we're not really outputting HTML, decode the entities and
729 // then re-encode the things that need hiding inside HTML comments.
730 $limitReport = htmlspecialchars_decode( $limitReport );
731
732 // Sanitize for comment. Note '‐' in the replacement is U+2010,
733 // which looks much like the problematic '-'.
734 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
735 $text = "\n<!-- \n$limitReport-->\n";
736
737 // Add on template profiling data in human/machine readable way
738 $dataByFunc = $this->mProfiler->getFunctionStats();
739 uasort( $dataByFunc, function ( $a, $b ) {
740 return $b['real'] <=> $a['real']; // descending order
741 } );
742 $profileReport = [];
743 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
744 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
745 $item['%real'], $item['real'], $item['calls'],
746 htmlspecialchars( $item['name'] ) );
747 }
748 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
749 $text .= implode( "\n", $profileReport ) . "\n-->\n";
750
751 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
752
753 // Add other cache related metadata
754 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
755 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
756 }
757 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
758 $this->mOutput->getCacheTime() );
759 $this->mOutput->setLimitReportData( 'cachereport-ttl',
760 $this->mOutput->getCacheExpiry() );
761 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
762 $this->mOutput->hasDynamicContent() );
763
764 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
765 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
766 $this->mTitle->getPrefixedDBkey() );
767 }
768 return $text;
769 }
770
771 /**
772 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
773 * can be called from an extension tag hook.
774 *
775 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
776 * instead, which means that lists and links have not been fully parsed yet,
777 * and strip markers are still present.
778 *
779 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
780 *
781 * Use this function if you're a parser tag hook and you want to parse
782 * wikitext before or after applying additional transformations, and you
783 * intend to *return the result as hook output*, which will cause it to go
784 * through the rest of parsing process automatically.
785 *
786 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
787 * $text are not expanded
788 *
789 * @param string $text Text extension wants to have parsed
790 * @param-taint $text escapes_htmlnoent
791 * @param bool|PPFrame $frame The frame to use for expanding any template variables
792 * @return string UNSAFE half-parsed HTML
793 * @return-taint escaped
794 */
795 public function recursiveTagParse( $text, $frame = false ) {
796 // Avoid PHP 7.1 warning from passing $this by reference
797 $parser = $this;
798 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
799 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
800 $text = $this->internalParse( $text, false, $frame );
801 return $text;
802 }
803
804 /**
805 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
806 * point can be called from an extension tag hook.
807 *
808 * The output of this function is fully-parsed HTML that is safe for output.
809 * If you're a parser tag hook, you might want to use recursiveTagParse()
810 * instead.
811 *
812 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
813 * $text are not expanded
814 *
815 * @since 1.25
816 *
817 * @param string $text Text extension wants to have parsed
818 * @param-taint $text escapes_htmlnoent
819 * @param bool|PPFrame $frame The frame to use for expanding any template variables
820 * @return string Fully parsed HTML
821 * @return-taint escaped
822 */
823 public function recursiveTagParseFully( $text, $frame = false ) {
824 $text = $this->recursiveTagParse( $text, $frame );
825 $text = $this->internalParseHalfParsed( $text, false );
826 return $text;
827 }
828
829 /**
830 * Expand templates and variables in the text, producing valid, static wikitext.
831 * Also removes comments.
832 * Do not call this function recursively.
833 * @param string $text
834 * @param Title|null $title
835 * @param ParserOptions $options
836 * @param int|null $revid
837 * @param bool|PPFrame $frame
838 * @return mixed|string
839 */
840 public function preprocess( $text, Title $title = null,
841 ParserOptions $options, $revid = null, $frame = false
842 ) {
843 $magicScopeVariable = $this->lock();
844 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
845 if ( $revid !== null ) {
846 $this->mRevisionId = $revid;
847 }
848 // Avoid PHP 7.1 warning from passing $this by reference
849 $parser = $this;
850 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
851 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
852 $text = $this->replaceVariables( $text, $frame );
853 $text = $this->mStripState->unstripBoth( $text );
854 return $text;
855 }
856
857 /**
858 * Recursive parser entry point that can be called from an extension tag
859 * hook.
860 *
861 * @param string $text Text to be expanded
862 * @param bool|PPFrame $frame The frame to use for expanding any template variables
863 * @return string
864 * @since 1.19
865 */
866 public function recursivePreprocess( $text, $frame = false ) {
867 $text = $this->replaceVariables( $text, $frame );
868 $text = $this->mStripState->unstripBoth( $text );
869 return $text;
870 }
871
872 /**
873 * Process the wikitext for the "?preload=" feature. (T7210)
874 *
875 * "<noinclude>", "<includeonly>" etc. are parsed as for template
876 * transclusion, comments, templates, arguments, tags hooks and parser
877 * functions are untouched.
878 *
879 * @param string $text
880 * @param Title $title
881 * @param ParserOptions $options
882 * @param array $params
883 * @return string
884 */
885 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
886 $msg = new RawMessage( $text );
887 $text = $msg->params( $params )->plain();
888
889 # Parser (re)initialisation
890 $magicScopeVariable = $this->lock();
891 $this->startParse( $title, $options, self::OT_PLAIN, true );
892
893 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
894 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
895 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
896 $text = $this->mStripState->unstripBoth( $text );
897 return $text;
898 }
899
900 /**
901 * Set the current user.
902 * Should only be used when doing pre-save transform.
903 *
904 * @param User|null $user User object or null (to reset)
905 */
906 public function setUser( $user ) {
907 $this->mUser = $user;
908 }
909
910 /**
911 * Set the context title
912 *
913 * @param Title|null $t
914 */
915 public function setTitle( Title $t = null ) {
916 if ( !$t ) {
917 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
918 }
919
920 if ( $t->hasFragment() ) {
921 # Strip the fragment to avoid various odd effects
922 $this->mTitle = $t->createFragmentTarget( '' );
923 } else {
924 $this->mTitle = $t;
925 }
926 }
927
928 /**
929 * Accessor for the Title object
930 *
931 * Since 1.34, leaving `mTitle` uninitialized as `null` is deprecated.
932 *
933 * @return Title|null
934 */
935 public function getTitle() : ?Title {
936 if ( $this->mTitle === null ) {
937 wfDeprecated( 'Parser title should never be null', '1.34' );
938 }
939 return $this->mTitle;
940 }
941
942 /**
943 * Accessor/mutator for the Title object
944 *
945 * @param Title|null $x Title object or null to just get the current one
946 * @return Title|null
947 */
948 public function Title( Title $x = null ) : ?Title {
949 return wfSetVar( $this->mTitle, $x );
950 }
951
952 /**
953 * Set the output type
954 *
955 * @param int $ot New value
956 */
957 public function setOutputType( $ot ) {
958 $this->mOutputType = $ot;
959 # Shortcut alias
960 $this->ot = [
961 'html' => $ot == self::OT_HTML,
962 'wiki' => $ot == self::OT_WIKI,
963 'pre' => $ot == self::OT_PREPROCESS,
964 'plain' => $ot == self::OT_PLAIN,
965 ];
966 }
967
968 /**
969 * Accessor/mutator for the output type
970 *
971 * @param int|null $x New value or null to just get the current one
972 * @return int
973 */
974 public function OutputType( $x = null ) {
975 return wfSetVar( $this->mOutputType, $x );
976 }
977
978 /**
979 * Get the ParserOutput object
980 *
981 * @return ParserOutput
982 */
983 public function getOutput() {
984 return $this->mOutput;
985 }
986
987 /**
988 * Get the ParserOptions object
989 *
990 * @return ParserOptions
991 */
992 public function getOptions() {
993 return $this->mOptions;
994 }
995
996 /**
997 * Accessor/mutator for the ParserOptions object
998 *
999 * @param ParserOptions|null $x New value or null to just get the current one
1000 * @return ParserOptions Current ParserOptions object
1001 */
1002 public function Options( $x = null ) {
1003 return wfSetVar( $this->mOptions, $x );
1004 }
1005
1006 /**
1007 * @return int
1008 */
1009 public function nextLinkID() {
1010 return $this->mLinkID++;
1011 }
1012
1013 /**
1014 * @param int $id
1015 */
1016 public function setLinkID( $id ) {
1017 $this->mLinkID = $id;
1018 }
1019
1020 /**
1021 * Get a language object for use in parser functions such as {{FORMATNUM:}}
1022 * @return Language
1023 */
1024 public function getFunctionLang() {
1025 return $this->getTargetLanguage();
1026 }
1027
1028 /**
1029 * Get the target language for the content being parsed. This is usually the
1030 * language that the content is in.
1031 *
1032 * @since 1.19
1033 *
1034 * @throws MWException
1035 * @return Language
1036 */
1037 public function getTargetLanguage() {
1038 $target = $this->mOptions->getTargetLanguage();
1039
1040 if ( $target !== null ) {
1041 return $target;
1042 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1043 return $this->mOptions->getUserLangObj();
1044 } elseif ( is_null( $this->mTitle ) ) {
1045 throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1046 }
1047
1048 return $this->mTitle->getPageLanguage();
1049 }
1050
1051 /**
1052 * Get the language object for language conversion
1053 * @deprecated since 1.32, just use getTargetLanguage()
1054 * @return Language|null
1055 */
1056 public function getConverterLanguage() {
1057 return $this->getTargetLanguage();
1058 }
1059
1060 /**
1061 * Get a User object either from $this->mUser, if set, or from the
1062 * ParserOptions object otherwise
1063 *
1064 * @return User
1065 */
1066 public function getUser() {
1067 if ( !is_null( $this->mUser ) ) {
1068 return $this->mUser;
1069 }
1070 return $this->mOptions->getUser();
1071 }
1072
1073 /**
1074 * Get a preprocessor object
1075 *
1076 * @return Preprocessor
1077 */
1078 public function getPreprocessor() {
1079 if ( !isset( $this->mPreprocessor ) ) {
1080 $class = $this->svcOptions->get( 'preprocessorClass' );
1081 $this->mPreprocessor = new $class( $this );
1082 }
1083 return $this->mPreprocessor;
1084 }
1085
1086 /**
1087 * Get a LinkRenderer instance to make links with
1088 *
1089 * @since 1.28
1090 * @return LinkRenderer
1091 */
1092 public function getLinkRenderer() {
1093 // XXX We make the LinkRenderer with current options and then cache it forever
1094 if ( !$this->mLinkRenderer ) {
1095 $this->mLinkRenderer = $this->linkRendererFactory->create();
1096 $this->mLinkRenderer->setStubThreshold(
1097 $this->getOptions()->getStubThreshold()
1098 );
1099 }
1100
1101 return $this->mLinkRenderer;
1102 }
1103
1104 /**
1105 * Get the MagicWordFactory that this Parser is using
1106 *
1107 * @since 1.32
1108 * @return MagicWordFactory
1109 */
1110 public function getMagicWordFactory() {
1111 return $this->magicWordFactory;
1112 }
1113
1114 /**
1115 * Get the content language that this Parser is using
1116 *
1117 * @since 1.32
1118 * @return Language
1119 */
1120 public function getContentLanguage() {
1121 return $this->contLang;
1122 }
1123
1124 /**
1125 * Replaces all occurrences of HTML-style comments and the given tags
1126 * in the text with a random marker and returns the next text. The output
1127 * parameter $matches will be an associative array filled with data in
1128 * the form:
1129 *
1130 * @code
1131 * 'UNIQ-xxxxx' => [
1132 * 'element',
1133 * 'tag content',
1134 * [ 'param' => 'x' ],
1135 * '<element param="x">tag content</element>' ]
1136 * @endcode
1137 *
1138 * @param array $elements List of element names. Comments are always extracted.
1139 * @param string $text Source text string.
1140 * @param array &$matches Out parameter, Array: extracted tags
1141 * @return string Stripped text
1142 */
1143 public static function extractTagsAndParams( $elements, $text, &$matches ) {
1144 static $n = 1;
1145 $stripped = '';
1146 $matches = [];
1147
1148 $taglist = implode( '|', $elements );
1149 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1150
1151 while ( $text != '' ) {
1152 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1153 $stripped .= $p[0];
1154 if ( count( $p ) < 5 ) {
1155 break;
1156 }
1157 if ( count( $p ) > 5 ) {
1158 # comment
1159 $element = $p[4];
1160 $attributes = '';
1161 $close = '';
1162 $inside = $p[5];
1163 } else {
1164 # tag
1165 list( , $element, $attributes, $close, $inside ) = $p;
1166 }
1167
1168 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1169 $stripped .= $marker;
1170
1171 if ( $close === '/>' ) {
1172 # Empty element tag, <tag />
1173 $content = null;
1174 $text = $inside;
1175 $tail = null;
1176 } else {
1177 if ( $element === '!--' ) {
1178 $end = '/(-->)/';
1179 } else {
1180 $end = "/(<\\/$element\\s*>)/i";
1181 }
1182 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1183 $content = $q[0];
1184 if ( count( $q ) < 3 ) {
1185 # No end tag -- let it run out to the end of the text.
1186 $tail = '';
1187 $text = '';
1188 } else {
1189 list( , $tail, $text ) = $q;
1190 }
1191 }
1192
1193 $matches[$marker] = [ $element,
1194 $content,
1195 Sanitizer::decodeTagAttributes( $attributes ),
1196 "<$element$attributes$close$content$tail" ];
1197 }
1198 return $stripped;
1199 }
1200
1201 /**
1202 * Get a list of strippable XML-like elements
1203 *
1204 * @return array
1205 */
1206 public function getStripList() {
1207 return $this->mStripList;
1208 }
1209
1210 /**
1211 * Get the StripState
1212 *
1213 * @return StripState
1214 */
1215 public function getStripState() {
1216 return $this->mStripState;
1217 }
1218
1219 /**
1220 * Add an item to the strip state
1221 * Returns the unique tag which must be inserted into the stripped text
1222 * The tag will be replaced with the original text in unstrip()
1223 *
1224 * @param string $text
1225 *
1226 * @return string
1227 */
1228 public function insertStripItem( $text ) {
1229 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1230 $this->mMarkerIndex++;
1231 $this->mStripState->addGeneral( $marker, $text );
1232 return $marker;
1233 }
1234
1235 /**
1236 * parse the wiki syntax used to render tables
1237 *
1238 * @private
1239 * @param string $text
1240 * @return string
1241 */
1242 public function doTableStuff( $text ) {
1243 $lines = StringUtils::explode( "\n", $text );
1244 $out = '';
1245 $td_history = []; # Is currently a td tag open?
1246 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1247 $tr_history = []; # Is currently a tr tag open?
1248 $tr_attributes = []; # history of tr attributes
1249 $has_opened_tr = []; # Did this table open a <tr> element?
1250 $indent_level = 0; # indent level of the table
1251
1252 foreach ( $lines as $outLine ) {
1253 $line = trim( $outLine );
1254
1255 if ( $line === '' ) { # empty line, go to next line
1256 $out .= $outLine . "\n";
1257 continue;
1258 }
1259
1260 $first_character = $line[0];
1261 $first_two = substr( $line, 0, 2 );
1262 $matches = [];
1263
1264 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1265 # First check if we are starting a new table
1266 $indent_level = strlen( $matches[1] );
1267
1268 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1269 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1270
1271 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1272 array_push( $td_history, false );
1273 array_push( $last_tag_history, '' );
1274 array_push( $tr_history, false );
1275 array_push( $tr_attributes, '' );
1276 array_push( $has_opened_tr, false );
1277 } elseif ( count( $td_history ) == 0 ) {
1278 # Don't do any of the following
1279 $out .= $outLine . "\n";
1280 continue;
1281 } elseif ( $first_two === '|}' ) {
1282 # We are ending a table
1283 $line = '</table>' . substr( $line, 2 );
1284 $last_tag = array_pop( $last_tag_history );
1285
1286 if ( !array_pop( $has_opened_tr ) ) {
1287 $line = "<tr><td></td></tr>{$line}";
1288 }
1289
1290 if ( array_pop( $tr_history ) ) {
1291 $line = "</tr>{$line}";
1292 }
1293
1294 if ( array_pop( $td_history ) ) {
1295 $line = "</{$last_tag}>{$line}";
1296 }
1297 array_pop( $tr_attributes );
1298 if ( $indent_level > 0 ) {
1299 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1300 } else {
1301 $outLine = $line;
1302 }
1303 } elseif ( $first_two === '|-' ) {
1304 # Now we have a table row
1305 $line = preg_replace( '#^\|-+#', '', $line );
1306
1307 # Whats after the tag is now only attributes
1308 $attributes = $this->mStripState->unstripBoth( $line );
1309 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1310 array_pop( $tr_attributes );
1311 array_push( $tr_attributes, $attributes );
1312
1313 $line = '';
1314 $last_tag = array_pop( $last_tag_history );
1315 array_pop( $has_opened_tr );
1316 array_push( $has_opened_tr, true );
1317
1318 if ( array_pop( $tr_history ) ) {
1319 $line = '</tr>';
1320 }
1321
1322 if ( array_pop( $td_history ) ) {
1323 $line = "</{$last_tag}>{$line}";
1324 }
1325
1326 $outLine = $line;
1327 array_push( $tr_history, false );
1328 array_push( $td_history, false );
1329 array_push( $last_tag_history, '' );
1330 } elseif ( $first_character === '|'
1331 || $first_character === '!'
1332 || $first_two === '|+'
1333 ) {
1334 # This might be cell elements, td, th or captions
1335 if ( $first_two === '|+' ) {
1336 $first_character = '+';
1337 $line = substr( $line, 2 );
1338 } else {
1339 $line = substr( $line, 1 );
1340 }
1341
1342 // Implies both are valid for table headings.
1343 if ( $first_character === '!' ) {
1344 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1345 }
1346
1347 # Split up multiple cells on the same line.
1348 # FIXME : This can result in improper nesting of tags processed
1349 # by earlier parser steps.
1350 $cells = explode( '||', $line );
1351
1352 $outLine = '';
1353
1354 # Loop through each table cell
1355 foreach ( $cells as $cell ) {
1356 $previous = '';
1357 if ( $first_character !== '+' ) {
1358 $tr_after = array_pop( $tr_attributes );
1359 if ( !array_pop( $tr_history ) ) {
1360 $previous = "<tr{$tr_after}>\n";
1361 }
1362 array_push( $tr_history, true );
1363 array_push( $tr_attributes, '' );
1364 array_pop( $has_opened_tr );
1365 array_push( $has_opened_tr, true );
1366 }
1367
1368 $last_tag = array_pop( $last_tag_history );
1369
1370 if ( array_pop( $td_history ) ) {
1371 $previous = "</{$last_tag}>\n{$previous}";
1372 }
1373
1374 if ( $first_character === '|' ) {
1375 $last_tag = 'td';
1376 } elseif ( $first_character === '!' ) {
1377 $last_tag = 'th';
1378 } elseif ( $first_character === '+' ) {
1379 $last_tag = 'caption';
1380 } else {
1381 $last_tag = '';
1382 }
1383
1384 array_push( $last_tag_history, $last_tag );
1385
1386 # A cell could contain both parameters and data
1387 $cell_data = explode( '|', $cell, 2 );
1388
1389 # T2553: Note that a '|' inside an invalid link should not
1390 # be mistaken as delimiting cell parameters
1391 # Bug T153140: Neither should language converter markup.
1392 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1393 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1394 } elseif ( count( $cell_data ) == 1 ) {
1395 // Whitespace in cells is trimmed
1396 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1397 } else {
1398 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1399 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1400 // Whitespace in cells is trimmed
1401 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1402 }
1403
1404 $outLine .= $cell;
1405 array_push( $td_history, true );
1406 }
1407 }
1408 $out .= $outLine . "\n";
1409 }
1410
1411 # Closing open td, tr && table
1412 while ( count( $td_history ) > 0 ) {
1413 if ( array_pop( $td_history ) ) {
1414 $out .= "</td>\n";
1415 }
1416 if ( array_pop( $tr_history ) ) {
1417 $out .= "</tr>\n";
1418 }
1419 if ( !array_pop( $has_opened_tr ) ) {
1420 $out .= "<tr><td></td></tr>\n";
1421 }
1422
1423 $out .= "</table>\n";
1424 }
1425
1426 # Remove trailing line-ending (b/c)
1427 if ( substr( $out, -1 ) === "\n" ) {
1428 $out = substr( $out, 0, -1 );
1429 }
1430
1431 # special case: don't return empty table
1432 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1433 $out = '';
1434 }
1435
1436 return $out;
1437 }
1438
1439 /**
1440 * Helper function for parse() that transforms wiki markup into half-parsed
1441 * HTML. Only called for $mOutputType == self::OT_HTML.
1442 *
1443 * @private
1444 *
1445 * @param string $text The text to parse
1446 * @param-taint $text escapes_html
1447 * @param bool $isMain Whether this is being called from the main parse() function
1448 * @param PPFrame|bool $frame A pre-processor frame
1449 *
1450 * @return string
1451 */
1452 public function internalParse( $text, $isMain = true, $frame = false ) {
1453 $origText = $text;
1454
1455 // Avoid PHP 7.1 warning from passing $this by reference
1456 $parser = $this;
1457
1458 # Hook to suspend the parser in this state
1459 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1460 return $text;
1461 }
1462
1463 # if $frame is provided, then use $frame for replacing any variables
1464 if ( $frame ) {
1465 # use frame depth to infer how include/noinclude tags should be handled
1466 # depth=0 means this is the top-level document; otherwise it's an included document
1467 if ( !$frame->depth ) {
1468 $flag = 0;
1469 } else {
1470 $flag = self::PTD_FOR_INCLUSION;
1471 }
1472 $dom = $this->preprocessToDom( $text, $flag );
1473 $text = $frame->expand( $dom );
1474 } else {
1475 # if $frame is not provided, then use old-style replaceVariables
1476 $text = $this->replaceVariables( $text );
1477 }
1478
1479 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1480 $text = Sanitizer::removeHTMLtags(
1481 $text,
1482 [ $this, 'attributeStripCallback' ],
1483 false,
1484 array_keys( $this->mTransparentTagHooks ),
1485 [],
1486 [ $this, 'addTrackingCategory' ]
1487 );
1488 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1489
1490 # Tables need to come after variable replacement for things to work
1491 # properly; putting them before other transformations should keep
1492 # exciting things like link expansions from showing up in surprising
1493 # places.
1494 $text = $this->doTableStuff( $text );
1495
1496 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1497
1498 $text = $this->doDoubleUnderscore( $text );
1499
1500 $text = $this->doHeadings( $text );
1501 $text = $this->replaceInternalLinks( $text );
1502 $text = $this->doAllQuotes( $text );
1503 $text = $this->replaceExternalLinks( $text );
1504
1505 # replaceInternalLinks may sometimes leave behind
1506 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1507 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1508
1509 $text = $this->doMagicLinks( $text );
1510 $text = $this->formatHeadings( $text, $origText, $isMain );
1511
1512 return $text;
1513 }
1514
1515 /**
1516 * Helper function for parse() that transforms half-parsed HTML into fully
1517 * parsed HTML.
1518 *
1519 * @param string $text
1520 * @param bool $isMain
1521 * @param bool $linestart
1522 * @return string
1523 */
1524 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1525 $text = $this->mStripState->unstripGeneral( $text );
1526
1527 // Avoid PHP 7.1 warning from passing $this by reference
1528 $parser = $this;
1529
1530 if ( $isMain ) {
1531 Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1532 }
1533
1534 # Clean up special characters, only run once, next-to-last before doBlockLevels
1535 $text = Sanitizer::armorFrenchSpaces( $text );
1536
1537 $text = $this->doBlockLevels( $text, $linestart );
1538
1539 $this->replaceLinkHolders( $text );
1540
1541 /**
1542 * The input doesn't get language converted if
1543 * a) It's disabled
1544 * b) Content isn't converted
1545 * c) It's a conversion table
1546 * d) it is an interface message (which is in the user language)
1547 */
1548 if ( !( $this->mOptions->getDisableContentConversion()
1549 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1550 && !$this->mOptions->getInterfaceMessage()
1551 ) {
1552 # The position of the convert() call should not be changed. it
1553 # assumes that the links are all replaced and the only thing left
1554 # is the <nowiki> mark.
1555 $text = $this->getTargetLanguage()->convert( $text );
1556 }
1557
1558 $text = $this->mStripState->unstripNoWiki( $text );
1559
1560 if ( $isMain ) {
1561 Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1562 }
1563
1564 $text = $this->replaceTransparentTags( $text );
1565 $text = $this->mStripState->unstripGeneral( $text );
1566
1567 $text = Sanitizer::normalizeCharReferences( $text );
1568
1569 if ( MWTidy::isEnabled() ) {
1570 if ( $this->mOptions->getTidy() ) {
1571 $text = MWTidy::tidy( $text );
1572 }
1573 } else {
1574 # attempt to sanitize at least some nesting problems
1575 # (T4702 and quite a few others)
1576 # This code path is buggy and deprecated!
1577 wfDeprecated( 'disabling tidy', '1.33' );
1578 $tidyregs = [
1579 # ''Something [http://www.cool.com cool''] -->
1580 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1581 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1582 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1583 # fix up an anchor inside another anchor, only
1584 # at least for a single single nested link (T5695)
1585 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1586 '\\1\\2</a>\\3</a>\\1\\4</a>',
1587 # fix div inside inline elements- doBlockLevels won't wrap a line which
1588 # contains a div, so fix it up here; replace
1589 # div with escaped text
1590 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1591 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1592 # remove empty italic or bold tag pairs, some
1593 # introduced by rules above
1594 '/<([bi])><\/\\1>/' => '',
1595 ];
1596
1597 $text = preg_replace(
1598 array_keys( $tidyregs ),
1599 array_values( $tidyregs ),
1600 $text );
1601 }
1602
1603 if ( $isMain ) {
1604 Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1605 }
1606
1607 return $text;
1608 }
1609
1610 /**
1611 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1612 * magic external links.
1613 *
1614 * DML
1615 * @private
1616 *
1617 * @param string $text
1618 *
1619 * @return string
1620 */
1621 public function doMagicLinks( $text ) {
1622 $prots = wfUrlProtocolsWithoutProtRel();
1623 $urlChar = self::EXT_LINK_URL_CLASS;
1624 $addr = self::EXT_LINK_ADDR;
1625 $space = self::SPACE_NOT_NL; # non-newline space
1626 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1627 $spaces = "$space++"; # possessive match of 1 or more spaces
1628 $text = preg_replace_callback(
1629 '!(?: # Start cases
1630 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1631 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1632 (\b # m[3]: Free external links
1633 (?i:$prots)
1634 ($addr$urlChar*) # m[4]: Post-protocol path
1635 ) |
1636 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1637 ([0-9]+)\b |
1638 \bISBN $spaces ( # m[6]: ISBN, capture number
1639 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1640 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1641 [0-9Xx] # check digit
1642 )\b
1643 )!xu", [ $this, 'magicLinkCallback' ], $text );
1644 return $text;
1645 }
1646
1647 /**
1648 * @throws MWException
1649 * @param array $m
1650 * @return string HTML
1651 */
1652 public function magicLinkCallback( $m ) {
1653 if ( isset( $m[1] ) && $m[1] !== '' ) {
1654 # Skip anchor
1655 return $m[0];
1656 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1657 # Skip HTML element
1658 return $m[0];
1659 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1660 # Free external link
1661 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1662 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1663 # RFC or PMID
1664 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1665 if ( !$this->mOptions->getMagicRFCLinks() ) {
1666 return $m[0];
1667 }
1668 $keyword = 'RFC';
1669 $urlmsg = 'rfcurl';
1670 $cssClass = 'mw-magiclink-rfc';
1671 $trackingCat = 'magiclink-tracking-rfc';
1672 $id = $m[5];
1673 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1674 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1675 return $m[0];
1676 }
1677 $keyword = 'PMID';
1678 $urlmsg = 'pubmedurl';
1679 $cssClass = 'mw-magiclink-pmid';
1680 $trackingCat = 'magiclink-tracking-pmid';
1681 $id = $m[5];
1682 } else {
1683 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1684 substr( $m[0], 0, 20 ) . '"' );
1685 }
1686 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1687 $this->addTrackingCategory( $trackingCat );
1688 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1689 } elseif ( isset( $m[6] ) && $m[6] !== ''
1690 && $this->mOptions->getMagicISBNLinks()
1691 ) {
1692 # ISBN
1693 $isbn = $m[6];
1694 $space = self::SPACE_NOT_NL; # non-newline space
1695 $isbn = preg_replace( "/$space/", ' ', $isbn );
1696 $num = strtr( $isbn, [
1697 '-' => '',
1698 ' ' => '',
1699 'x' => 'X',
1700 ] );
1701 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1702 return $this->getLinkRenderer()->makeKnownLink(
1703 SpecialPage::getTitleFor( 'Booksources', $num ),
1704 "ISBN $isbn",
1705 [
1706 'class' => 'internal mw-magiclink-isbn',
1707 'title' => false // suppress title attribute
1708 ]
1709 );
1710 } else {
1711 return $m[0];
1712 }
1713 }
1714
1715 /**
1716 * Make a free external link, given a user-supplied URL
1717 *
1718 * @param string $url
1719 * @param int $numPostProto
1720 * The number of characters after the protocol.
1721 * @return string HTML
1722 * @private
1723 */
1724 public function makeFreeExternalLink( $url, $numPostProto ) {
1725 $trail = '';
1726
1727 # The characters '<' and '>' (which were escaped by
1728 # removeHTMLtags()) should not be included in
1729 # URLs, per RFC 2396.
1730 # Make &nbsp; terminate a URL as well (bug T84937)
1731 $m2 = [];
1732 if ( preg_match(
1733 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1734 $url,
1735 $m2,
1736 PREG_OFFSET_CAPTURE
1737 ) ) {
1738 $trail = substr( $url, $m2[0][1] ) . $trail;
1739 $url = substr( $url, 0, $m2[0][1] );
1740 }
1741
1742 # Move trailing punctuation to $trail
1743 $sep = ',;\.:!?';
1744 # If there is no left bracket, then consider right brackets fair game too
1745 if ( strpos( $url, '(' ) === false ) {
1746 $sep .= ')';
1747 }
1748
1749 $urlRev = strrev( $url );
1750 $numSepChars = strspn( $urlRev, $sep );
1751 # Don't break a trailing HTML entity by moving the ; into $trail
1752 # This is in hot code, so use substr_compare to avoid having to
1753 # create a new string object for the comparison
1754 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1755 # more optimization: instead of running preg_match with a $
1756 # anchor, which can be slow, do the match on the reversed
1757 # string starting at the desired offset.
1758 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1759 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1760 $numSepChars--;
1761 }
1762 }
1763 if ( $numSepChars ) {
1764 $trail = substr( $url, -$numSepChars ) . $trail;
1765 $url = substr( $url, 0, -$numSepChars );
1766 }
1767
1768 # Verify that we still have a real URL after trail removal, and
1769 # not just lone protocol
1770 if ( strlen( $trail ) >= $numPostProto ) {
1771 return $url . $trail;
1772 }
1773
1774 $url = Sanitizer::cleanUrl( $url );
1775
1776 # Is this an external image?
1777 $text = $this->maybeMakeExternalImage( $url );
1778 if ( $text === false ) {
1779 # Not an image, make a link
1780 $text = Linker::makeExternalLink( $url,
1781 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1782 true, 'free',
1783 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1784 # Register it in the output object...
1785 $this->mOutput->addExternalLink( $url );
1786 }
1787 return $text . $trail;
1788 }
1789
1790 /**
1791 * Parse headers and return html
1792 *
1793 * @private
1794 *
1795 * @param string $text
1796 *
1797 * @return string
1798 */
1799 public function doHeadings( $text ) {
1800 for ( $i = 6; $i >= 1; --$i ) {
1801 $h = str_repeat( '=', $i );
1802 // Trim non-newline whitespace from headings
1803 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1804 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1805 }
1806 return $text;
1807 }
1808
1809 /**
1810 * Replace single quotes with HTML markup
1811 * @private
1812 *
1813 * @param string $text
1814 *
1815 * @return string The altered text
1816 */
1817 public function doAllQuotes( $text ) {
1818 $outtext = '';
1819 $lines = StringUtils::explode( "\n", $text );
1820 foreach ( $lines as $line ) {
1821 $outtext .= $this->doQuotes( $line ) . "\n";
1822 }
1823 $outtext = substr( $outtext, 0, -1 );
1824 return $outtext;
1825 }
1826
1827 /**
1828 * Helper function for doAllQuotes()
1829 *
1830 * @param string $text
1831 *
1832 * @return string
1833 */
1834 public function doQuotes( $text ) {
1835 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1836 $countarr = count( $arr );
1837 if ( $countarr == 1 ) {
1838 return $text;
1839 }
1840
1841 // First, do some preliminary work. This may shift some apostrophes from
1842 // being mark-up to being text. It also counts the number of occurrences
1843 // of bold and italics mark-ups.
1844 $numbold = 0;
1845 $numitalics = 0;
1846 for ( $i = 1; $i < $countarr; $i += 2 ) {
1847 $thislen = strlen( $arr[$i] );
1848 // If there are ever four apostrophes, assume the first is supposed to
1849 // be text, and the remaining three constitute mark-up for bold text.
1850 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1851 if ( $thislen == 4 ) {
1852 $arr[$i - 1] .= "'";
1853 $arr[$i] = "'''";
1854 $thislen = 3;
1855 } elseif ( $thislen > 5 ) {
1856 // If there are more than 5 apostrophes in a row, assume they're all
1857 // text except for the last 5.
1858 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1859 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1860 $arr[$i] = "'''''";
1861 $thislen = 5;
1862 }
1863 // Count the number of occurrences of bold and italics mark-ups.
1864 if ( $thislen == 2 ) {
1865 $numitalics++;
1866 } elseif ( $thislen == 3 ) {
1867 $numbold++;
1868 } elseif ( $thislen == 5 ) {
1869 $numitalics++;
1870 $numbold++;
1871 }
1872 }
1873
1874 // If there is an odd number of both bold and italics, it is likely
1875 // that one of the bold ones was meant to be an apostrophe followed
1876 // by italics. Which one we cannot know for certain, but it is more
1877 // likely to be one that has a single-letter word before it.
1878 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1879 $firstsingleletterword = -1;
1880 $firstmultiletterword = -1;
1881 $firstspace = -1;
1882 for ( $i = 1; $i < $countarr; $i += 2 ) {
1883 if ( strlen( $arr[$i] ) == 3 ) {
1884 $x1 = substr( $arr[$i - 1], -1 );
1885 $x2 = substr( $arr[$i - 1], -2, 1 );
1886 if ( $x1 === ' ' ) {
1887 if ( $firstspace == -1 ) {
1888 $firstspace = $i;
1889 }
1890 } elseif ( $x2 === ' ' ) {
1891 $firstsingleletterword = $i;
1892 // if $firstsingleletterword is set, we don't
1893 // look at the other options, so we can bail early.
1894 break;
1895 } elseif ( $firstmultiletterword == -1 ) {
1896 $firstmultiletterword = $i;
1897 }
1898 }
1899 }
1900
1901 // If there is a single-letter word, use it!
1902 if ( $firstsingleletterword > -1 ) {
1903 $arr[$firstsingleletterword] = "''";
1904 $arr[$firstsingleletterword - 1] .= "'";
1905 } elseif ( $firstmultiletterword > -1 ) {
1906 // If not, but there's a multi-letter word, use that one.
1907 $arr[$firstmultiletterword] = "''";
1908 $arr[$firstmultiletterword - 1] .= "'";
1909 } elseif ( $firstspace > -1 ) {
1910 // ... otherwise use the first one that has neither.
1911 // (notice that it is possible for all three to be -1 if, for example,
1912 // there is only one pentuple-apostrophe in the line)
1913 $arr[$firstspace] = "''";
1914 $arr[$firstspace - 1] .= "'";
1915 }
1916 }
1917
1918 // Now let's actually convert our apostrophic mush to HTML!
1919 $output = '';
1920 $buffer = '';
1921 $state = '';
1922 $i = 0;
1923 foreach ( $arr as $r ) {
1924 if ( ( $i % 2 ) == 0 ) {
1925 if ( $state === 'both' ) {
1926 $buffer .= $r;
1927 } else {
1928 $output .= $r;
1929 }
1930 } else {
1931 $thislen = strlen( $r );
1932 if ( $thislen == 2 ) {
1933 if ( $state === 'i' ) {
1934 $output .= '</i>';
1935 $state = '';
1936 } elseif ( $state === 'bi' ) {
1937 $output .= '</i>';
1938 $state = 'b';
1939 } elseif ( $state === 'ib' ) {
1940 $output .= '</b></i><b>';
1941 $state = 'b';
1942 } elseif ( $state === 'both' ) {
1943 $output .= '<b><i>' . $buffer . '</i>';
1944 $state = 'b';
1945 } else { // $state can be 'b' or ''
1946 $output .= '<i>';
1947 $state .= 'i';
1948 }
1949 } elseif ( $thislen == 3 ) {
1950 if ( $state === 'b' ) {
1951 $output .= '</b>';
1952 $state = '';
1953 } elseif ( $state === 'bi' ) {
1954 $output .= '</i></b><i>';
1955 $state = 'i';
1956 } elseif ( $state === 'ib' ) {
1957 $output .= '</b>';
1958 $state = 'i';
1959 } elseif ( $state === 'both' ) {
1960 $output .= '<i><b>' . $buffer . '</b>';
1961 $state = 'i';
1962 } else { // $state can be 'i' or ''
1963 $output .= '<b>';
1964 $state .= 'b';
1965 }
1966 } elseif ( $thislen == 5 ) {
1967 if ( $state === 'b' ) {
1968 $output .= '</b><i>';
1969 $state = 'i';
1970 } elseif ( $state === 'i' ) {
1971 $output .= '</i><b>';
1972 $state = 'b';
1973 } elseif ( $state === 'bi' ) {
1974 $output .= '</i></b>';
1975 $state = '';
1976 } elseif ( $state === 'ib' ) {
1977 $output .= '</b></i>';
1978 $state = '';
1979 } elseif ( $state === 'both' ) {
1980 $output .= '<i><b>' . $buffer . '</b></i>';
1981 $state = '';
1982 } else { // ($state == '')
1983 $buffer = '';
1984 $state = 'both';
1985 }
1986 }
1987 }
1988 $i++;
1989 }
1990 // Now close all remaining tags. Notice that the order is important.
1991 if ( $state === 'b' || $state === 'ib' ) {
1992 $output .= '</b>';
1993 }
1994 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1995 $output .= '</i>';
1996 }
1997 if ( $state === 'bi' ) {
1998 $output .= '</b>';
1999 }
2000 // There might be lonely ''''', so make sure we have a buffer
2001 if ( $state === 'both' && $buffer ) {
2002 $output .= '<b><i>' . $buffer . '</i></b>';
2003 }
2004 return $output;
2005 }
2006
2007 /**
2008 * Replace external links (REL)
2009 *
2010 * Note: this is all very hackish and the order of execution matters a lot.
2011 * Make sure to run tests/parser/parserTests.php if you change this code.
2012 *
2013 * @private
2014 *
2015 * @param string $text
2016 *
2017 * @throws MWException
2018 * @return string
2019 */
2020 public function replaceExternalLinks( $text ) {
2021 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2022 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2023 if ( $bits === false ) {
2024 throw new MWException( "PCRE needs to be compiled with "
2025 . "--enable-unicode-properties in order for MediaWiki to function" );
2026 }
2027 $s = array_shift( $bits );
2028
2029 $i = 0;
2030 while ( $i < count( $bits ) ) {
2031 $url = $bits[$i++];
2032 $i++; // protocol
2033 $text = $bits[$i++];
2034 $trail = $bits[$i++];
2035
2036 # The characters '<' and '>' (which were escaped by
2037 # removeHTMLtags()) should not be included in
2038 # URLs, per RFC 2396.
2039 $m2 = [];
2040 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2041 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2042 $url = substr( $url, 0, $m2[0][1] );
2043 }
2044
2045 # If the link text is an image URL, replace it with an <img> tag
2046 # This happened by accident in the original parser, but some people used it extensively
2047 $img = $this->maybeMakeExternalImage( $text );
2048 if ( $img !== false ) {
2049 $text = $img;
2050 }
2051
2052 $dtrail = '';
2053
2054 # Set linktype for CSS
2055 $linktype = 'text';
2056
2057 # No link text, e.g. [http://domain.tld/some.link]
2058 if ( $text == '' ) {
2059 # Autonumber
2060 $langObj = $this->getTargetLanguage();
2061 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2062 $linktype = 'autonumber';
2063 } else {
2064 # Have link text, e.g. [http://domain.tld/some.link text]s
2065 # Check for trail
2066 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2067 }
2068
2069 // Excluding protocol-relative URLs may avoid many false positives.
2070 if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2071 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2072 }
2073
2074 $url = Sanitizer::cleanUrl( $url );
2075
2076 # Use the encoded URL
2077 # This means that users can paste URLs directly into the text
2078 # Funny characters like ö aren't valid in URLs anyway
2079 # This was changed in August 2004
2080 $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2081 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2082
2083 # Register link in the output object.
2084 $this->mOutput->addExternalLink( $url );
2085 }
2086
2087 return $s;
2088 }
2089
2090 /**
2091 * Get the rel attribute for a particular external link.
2092 *
2093 * @since 1.21
2094 * @param string|bool $url Optional URL, to extract the domain from for rel =>
2095 * nofollow if appropriate
2096 * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2097 * @return string|null Rel attribute for $url
2098 */
2099 public static function getExternalLinkRel( $url = false, $title = null ) {
2100 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2101 $ns = $title ? $title->getNamespace() : false;
2102 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2103 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2104 ) {
2105 return 'nofollow';
2106 }
2107 return null;
2108 }
2109
2110 /**
2111 * Get an associative array of additional HTML attributes appropriate for a
2112 * particular external link. This currently may include rel => nofollow
2113 * (depending on configuration, namespace, and the URL's domain) and/or a
2114 * target attribute (depending on configuration).
2115 *
2116 * @param string $url URL to extract the domain from for rel =>
2117 * nofollow if appropriate
2118 * @return array Associative array of HTML attributes
2119 */
2120 public function getExternalLinkAttribs( $url ) {
2121 $attribs = [];
2122 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2123
2124 $target = $this->mOptions->getExternalLinkTarget();
2125 if ( $target ) {
2126 $attribs['target'] = $target;
2127 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2128 // T133507. New windows can navigate parent cross-origin.
2129 // Including noreferrer due to lacking browser
2130 // support of noopener. Eventually noreferrer should be removed.
2131 if ( $rel !== '' ) {
2132 $rel .= ' ';
2133 }
2134 $rel .= 'noreferrer noopener';
2135 }
2136 }
2137 $attribs['rel'] = $rel;
2138 return $attribs;
2139 }
2140
2141 /**
2142 * Replace unusual escape codes in a URL with their equivalent characters
2143 *
2144 * This generally follows the syntax defined in RFC 3986, with special
2145 * consideration for HTTP query strings.
2146 *
2147 * @param string $url
2148 * @return string
2149 */
2150 public static function normalizeLinkUrl( $url ) {
2151 # Test for RFC 3986 IPv6 syntax
2152 $scheme = '[a-z][a-z0-9+.-]*:';
2153 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2154 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2155 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2156 IP::isValid( rawurldecode( $m[1] ) )
2157 ) {
2158 $isIPv6 = rawurldecode( $m[1] );
2159 } else {
2160 $isIPv6 = false;
2161 }
2162
2163 # Make sure unsafe characters are encoded
2164 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2165 function ( $m ) {
2166 return rawurlencode( $m[0] );
2167 },
2168 $url
2169 );
2170
2171 $ret = '';
2172 $end = strlen( $url );
2173
2174 # Fragment part - 'fragment'
2175 $start = strpos( $url, '#' );
2176 if ( $start !== false && $start < $end ) {
2177 $ret = self::normalizeUrlComponent(
2178 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2179 $end = $start;
2180 }
2181
2182 # Query part - 'query' minus &=+;
2183 $start = strpos( $url, '?' );
2184 if ( $start !== false && $start < $end ) {
2185 $ret = self::normalizeUrlComponent(
2186 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2187 $end = $start;
2188 }
2189
2190 # Scheme and path part - 'pchar'
2191 # (we assume no userinfo or encoded colons in the host)
2192 $ret = self::normalizeUrlComponent(
2193 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2194
2195 # Fix IPv6 syntax
2196 if ( $isIPv6 !== false ) {
2197 $ipv6Host = "%5B({$isIPv6})%5D";
2198 $ret = preg_replace(
2199 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2200 "$1[$2]",
2201 $ret
2202 );
2203 }
2204
2205 return $ret;
2206 }
2207
2208 private static function normalizeUrlComponent( $component, $unsafe ) {
2209 $callback = function ( $matches ) use ( $unsafe ) {
2210 $char = urldecode( $matches[0] );
2211 $ord = ord( $char );
2212 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2213 # Unescape it
2214 return $char;
2215 } else {
2216 # Leave it escaped, but use uppercase for a-f
2217 return strtoupper( $matches[0] );
2218 }
2219 };
2220 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2221 }
2222
2223 /**
2224 * make an image if it's allowed, either through the global
2225 * option, through the exception, or through the on-wiki whitelist
2226 *
2227 * @param string $url
2228 *
2229 * @return string
2230 */
2231 private function maybeMakeExternalImage( $url ) {
2232 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2233 $imagesexception = !empty( $imagesfrom );
2234 $text = false;
2235 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2236 if ( $imagesexception && is_array( $imagesfrom ) ) {
2237 $imagematch = false;
2238 foreach ( $imagesfrom as $match ) {
2239 if ( strpos( $url, $match ) === 0 ) {
2240 $imagematch = true;
2241 break;
2242 }
2243 }
2244 } elseif ( $imagesexception ) {
2245 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2246 } else {
2247 $imagematch = false;
2248 }
2249
2250 if ( $this->mOptions->getAllowExternalImages()
2251 || ( $imagesexception && $imagematch )
2252 ) {
2253 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2254 # Image found
2255 $text = Linker::makeExternalImage( $url );
2256 }
2257 }
2258 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2259 && preg_match( self::EXT_IMAGE_REGEX, $url )
2260 ) {
2261 $whitelist = explode(
2262 "\n",
2263 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2264 );
2265
2266 foreach ( $whitelist as $entry ) {
2267 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2268 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2269 continue;
2270 }
2271 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2272 # Image matches a whitelist entry
2273 $text = Linker::makeExternalImage( $url );
2274 break;
2275 }
2276 }
2277 }
2278 return $text;
2279 }
2280
2281 /**
2282 * Process [[ ]] wikilinks
2283 *
2284 * @param string $s
2285 *
2286 * @return string Processed text
2287 *
2288 * @private
2289 */
2290 public function replaceInternalLinks( $s ) {
2291 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2292 return $s;
2293 }
2294
2295 /**
2296 * Process [[ ]] wikilinks (RIL)
2297 * @param string &$s
2298 * @throws MWException
2299 * @return LinkHolderArray
2300 *
2301 * @private
2302 */
2303 public function replaceInternalLinks2( &$s ) {
2304 static $tc = false, $e1, $e1_img;
2305 # the % is needed to support urlencoded titles as well
2306 if ( !$tc ) {
2307 $tc = Title::legalChars() . '#%';
2308 # Match a link having the form [[namespace:link|alternate]]trail
2309 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2310 # Match cases where there is no "]]", which might still be images
2311 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2312 }
2313
2314 $holders = new LinkHolderArray( $this );
2315
2316 # split the entire text string on occurrences of [[
2317 $a = StringUtils::explode( '[[', ' ' . $s );
2318 # get the first element (all text up to first [[), and remove the space we added
2319 $s = $a->current();
2320 $a->next();
2321 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2322 $s = substr( $s, 1 );
2323
2324 if ( is_null( $this->mTitle ) ) {
2325 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2326 }
2327 $nottalk = !$this->mTitle->isTalkPage();
2328
2329 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2330 $e2 = null;
2331 if ( $useLinkPrefixExtension ) {
2332 # Match the end of a line for a word that's not followed by whitespace,
2333 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2334 $charset = $this->contLang->linkPrefixCharset();
2335 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2336 $m = [];
2337 if ( preg_match( $e2, $s, $m ) ) {
2338 $first_prefix = $m[2];
2339 } else {
2340 $first_prefix = false;
2341 }
2342 } else {
2343 $prefix = '';
2344 }
2345
2346 $useSubpages = $this->areSubpagesAllowed();
2347
2348 # Loop for each link
2349 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2350 # Check for excessive memory usage
2351 if ( $holders->isBig() ) {
2352 # Too big
2353 # Do the existence check, replace the link holders and clear the array
2354 $holders->replace( $s );
2355 $holders->clear();
2356 }
2357
2358 if ( $useLinkPrefixExtension ) {
2359 if ( preg_match( $e2, $s, $m ) ) {
2360 list( , $s, $prefix ) = $m;
2361 } else {
2362 $prefix = '';
2363 }
2364 # first link
2365 if ( $first_prefix ) {
2366 $prefix = $first_prefix;
2367 $first_prefix = false;
2368 }
2369 }
2370
2371 $might_be_img = false;
2372
2373 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2374 $text = $m[2];
2375 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2376 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2377 # the real problem is with the $e1 regex
2378 # See T1500.
2379 # Still some problems for cases where the ] is meant to be outside punctuation,
2380 # and no image is in sight. See T4095.
2381 if ( $text !== ''
2382 && substr( $m[3], 0, 1 ) === ']'
2383 && strpos( $text, '[' ) !== false
2384 ) {
2385 $text .= ']'; # so that replaceExternalLinks($text) works later
2386 $m[3] = substr( $m[3], 1 );
2387 }
2388 # fix up urlencoded title texts
2389 if ( strpos( $m[1], '%' ) !== false ) {
2390 # Should anchors '#' also be rejected?
2391 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2392 }
2393 $trail = $m[3];
2394 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2395 # Invalid, but might be an image with a link in its caption
2396 $might_be_img = true;
2397 $text = $m[2];
2398 if ( strpos( $m[1], '%' ) !== false ) {
2399 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2400 }
2401 $trail = "";
2402 } else { # Invalid form; output directly
2403 $s .= $prefix . '[[' . $line;
2404 continue;
2405 }
2406
2407 $origLink = ltrim( $m[1], ' ' );
2408
2409 # Don't allow internal links to pages containing
2410 # PROTO: where PROTO is a valid URL protocol; these
2411 # should be external links.
2412 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2413 $s .= $prefix . '[[' . $line;
2414 continue;
2415 }
2416
2417 # Make subpage if necessary
2418 if ( $useSubpages ) {
2419 $link = $this->maybeDoSubpageLink( $origLink, $text );
2420 } else {
2421 $link = $origLink;
2422 }
2423
2424 // \x7f isn't a default legal title char, so most likely strip
2425 // markers will force us into the "invalid form" path above. But,
2426 // just in case, let's assert that xmlish tags aren't valid in
2427 // the title position.
2428 $unstrip = $this->mStripState->killMarkers( $link );
2429 $noMarkers = ( $unstrip === $link );
2430
2431 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2432 if ( $nt === null ) {
2433 $s .= $prefix . '[[' . $line;
2434 continue;
2435 }
2436
2437 $ns = $nt->getNamespace();
2438 $iw = $nt->getInterwiki();
2439
2440 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2441
2442 if ( $might_be_img ) { # if this is actually an invalid link
2443 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2444 $found = false;
2445 while ( true ) {
2446 # look at the next 'line' to see if we can close it there
2447 $a->next();
2448 $next_line = $a->current();
2449 if ( $next_line === false || $next_line === null ) {
2450 break;
2451 }
2452 $m = explode( ']]', $next_line, 3 );
2453 if ( count( $m ) == 3 ) {
2454 # the first ]] closes the inner link, the second the image
2455 $found = true;
2456 $text .= "[[{$m[0]}]]{$m[1]}";
2457 $trail = $m[2];
2458 break;
2459 } elseif ( count( $m ) == 2 ) {
2460 # if there's exactly one ]] that's fine, we'll keep looking
2461 $text .= "[[{$m[0]}]]{$m[1]}";
2462 } else {
2463 # if $next_line is invalid too, we need look no further
2464 $text .= '[[' . $next_line;
2465 break;
2466 }
2467 }
2468 if ( !$found ) {
2469 # we couldn't find the end of this imageLink, so output it raw
2470 # but don't ignore what might be perfectly normal links in the text we've examined
2471 $holders->merge( $this->replaceInternalLinks2( $text ) );
2472 $s .= "{$prefix}[[$link|$text";
2473 # note: no $trail, because without an end, there *is* no trail
2474 continue;
2475 }
2476 } else { # it's not an image, so output it raw
2477 $s .= "{$prefix}[[$link|$text";
2478 # note: no $trail, because without an end, there *is* no trail
2479 continue;
2480 }
2481 }
2482
2483 $wasblank = ( $text == '' );
2484 if ( $wasblank ) {
2485 $text = $link;
2486 if ( !$noforce ) {
2487 # Strip off leading ':'
2488 $text = substr( $text, 1 );
2489 }
2490 } else {
2491 # T6598 madness. Handle the quotes only if they come from the alternate part
2492 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2493 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2494 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2495 $text = $this->doQuotes( $text );
2496 }
2497
2498 # Link not escaped by : , create the various objects
2499 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2500 # Interwikis
2501 if (
2502 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2503 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2504 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2505 )
2506 ) {
2507 # T26502: filter duplicates
2508 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2509 $this->mLangLinkLanguages[$iw] = true;
2510 $this->mOutput->addLanguageLink( $nt->getFullText() );
2511 }
2512
2513 /**
2514 * Strip the whitespace interwiki links produce, see T10897
2515 */
2516 $s = rtrim( $s . $prefix ) . $trail; # T175416
2517 continue;
2518 }
2519
2520 if ( $ns == NS_FILE ) {
2521 if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->mTitle ) ) {
2522 if ( $wasblank ) {
2523 # if no parameters were passed, $text
2524 # becomes something like "File:Foo.png",
2525 # which we don't want to pass on to the
2526 # image generator
2527 $text = '';
2528 } else {
2529 # recursively parse links inside the image caption
2530 # actually, this will parse them in any other parameters, too,
2531 # but it might be hard to fix that, and it doesn't matter ATM
2532 $text = $this->replaceExternalLinks( $text );
2533 $holders->merge( $this->replaceInternalLinks2( $text ) );
2534 }
2535 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2536 $s .= $prefix . $this->armorLinks(
2537 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2538 continue;
2539 }
2540 } elseif ( $ns == NS_CATEGORY ) {
2541 /**
2542 * Strip the whitespace Category links produce, see T2087
2543 */
2544 $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2545
2546 if ( $wasblank ) {
2547 $sortkey = $this->getDefaultSort();
2548 } else {
2549 $sortkey = $text;
2550 }
2551 $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2552 $sortkey = str_replace( "\n", '', $sortkey );
2553 $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2554 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2555
2556 continue;
2557 }
2558 }
2559
2560 # Self-link checking. For some languages, variants of the title are checked in
2561 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2562 # for linking to a different variant.
2563 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2564 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2565 continue;
2566 }
2567
2568 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2569 # @todo FIXME: Should do batch file existence checks, see comment below
2570 if ( $ns == NS_MEDIA ) {
2571 # Give extensions a chance to select the file revision for us
2572 $options = [];
2573 $descQuery = false;
2574 Hooks::run( 'BeforeParserFetchFileAndTitle',
2575 [ $this, $nt, &$options, &$descQuery ] );
2576 # Fetch and register the file (file title may be different via hooks)
2577 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2578 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2579 $s .= $prefix . $this->armorLinks(
2580 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2581 continue;
2582 }
2583
2584 # Some titles, such as valid special pages or files in foreign repos, should
2585 # be shown as bluelinks even though they're not included in the page table
2586 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2587 # batch file existence checks for NS_FILE and NS_MEDIA
2588 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2589 $this->mOutput->addLink( $nt );
2590 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2591 } else {
2592 # Links will be added to the output link list after checking
2593 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2594 }
2595 }
2596 return $holders;
2597 }
2598
2599 /**
2600 * Render a forced-blue link inline; protect against double expansion of
2601 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2602 * Since this little disaster has to split off the trail text to avoid
2603 * breaking URLs in the following text without breaking trails on the
2604 * wiki links, it's been made into a horrible function.
2605 *
2606 * @param Title $nt
2607 * @param string $text
2608 * @param string $trail
2609 * @param string $prefix
2610 * @return string HTML-wikitext mix oh yuck
2611 */
2612 protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2613 list( $inside, $trail ) = Linker::splitTrail( $trail );
2614
2615 if ( $text == '' ) {
2616 $text = htmlspecialchars( $nt->getPrefixedText() );
2617 }
2618
2619 $link = $this->getLinkRenderer()->makeKnownLink(
2620 $nt, new HtmlArmor( "$prefix$text$inside" )
2621 );
2622
2623 return $this->armorLinks( $link ) . $trail;
2624 }
2625
2626 /**
2627 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2628 * going to go through further parsing steps before inline URL expansion.
2629 *
2630 * Not needed quite as much as it used to be since free links are a bit
2631 * more sensible these days. But bracketed links are still an issue.
2632 *
2633 * @param string $text More-or-less HTML
2634 * @return string Less-or-more HTML with NOPARSE bits
2635 */
2636 public function armorLinks( $text ) {
2637 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2638 self::MARKER_PREFIX . "NOPARSE$1", $text );
2639 }
2640
2641 /**
2642 * Return true if subpage links should be expanded on this page.
2643 * @return bool
2644 */
2645 public function areSubpagesAllowed() {
2646 # Some namespaces don't allow subpages
2647 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2648 }
2649
2650 /**
2651 * Handle link to subpage if necessary
2652 *
2653 * @param string $target The source of the link
2654 * @param string &$text The link text, modified as necessary
2655 * @return string The full name of the link
2656 * @private
2657 */
2658 public function maybeDoSubpageLink( $target, &$text ) {
2659 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2660 }
2661
2662 /**
2663 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2664 *
2665 * @param string $text
2666 * @param bool $linestart Whether or not this is at the start of a line.
2667 * @private
2668 * @return string The lists rendered as HTML
2669 */
2670 public function doBlockLevels( $text, $linestart ) {
2671 return BlockLevelPass::doBlockLevels( $text, $linestart );
2672 }
2673
2674 /**
2675 * Return value of a magic variable (like PAGENAME)
2676 *
2677 * @private
2678 *
2679 * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2680 * @param bool|PPFrame $frame
2681 *
2682 * @throws MWException
2683 * @return string
2684 */
2685 public function getVariableValue( $index, $frame = false ) {
2686 if ( is_null( $this->mTitle ) ) {
2687 // If no title set, bad things are going to happen
2688 // later. Title should always be set since this
2689 // should only be called in the middle of a parse
2690 // operation (but the unit-tests do funky stuff)
2691 throw new MWException( __METHOD__ . ' Should only be '
2692 . ' called while parsing (no title set)' );
2693 }
2694
2695 // Avoid PHP 7.1 warning from passing $this by reference
2696 $parser = $this;
2697
2698 /**
2699 * Some of these require message or data lookups and can be
2700 * expensive to check many times.
2701 */
2702 if (
2703 Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2704 isset( $this->mVarCache[$index] )
2705 ) {
2706 return $this->mVarCache[$index];
2707 }
2708
2709 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2710 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2711
2712 $pageLang = $this->getFunctionLang();
2713
2714 switch ( $index ) {
2715 case '!':
2716 $value = '|';
2717 break;
2718 case 'currentmonth':
2719 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2720 break;
2721 case 'currentmonth1':
2722 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2723 break;
2724 case 'currentmonthname':
2725 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2726 break;
2727 case 'currentmonthnamegen':
2728 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2729 break;
2730 case 'currentmonthabbrev':
2731 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2732 break;
2733 case 'currentday':
2734 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2735 break;
2736 case 'currentday2':
2737 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2738 break;
2739 case 'localmonth':
2740 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2741 break;
2742 case 'localmonth1':
2743 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2744 break;
2745 case 'localmonthname':
2746 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2747 break;
2748 case 'localmonthnamegen':
2749 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2750 break;
2751 case 'localmonthabbrev':
2752 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2753 break;
2754 case 'localday':
2755 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2756 break;
2757 case 'localday2':
2758 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2759 break;
2760 case 'pagename':
2761 $value = wfEscapeWikiText( $this->mTitle->getText() );
2762 break;
2763 case 'pagenamee':
2764 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2765 break;
2766 case 'fullpagename':
2767 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2768 break;
2769 case 'fullpagenamee':
2770 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2771 break;
2772 case 'subpagename':
2773 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2774 break;
2775 case 'subpagenamee':
2776 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2777 break;
2778 case 'rootpagename':
2779 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2780 break;
2781 case 'rootpagenamee':
2782 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2783 ' ',
2784 '_',
2785 $this->mTitle->getRootText()
2786 ) ) );
2787 break;
2788 case 'basepagename':
2789 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2790 break;
2791 case 'basepagenamee':
2792 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2793 ' ',
2794 '_',
2795 $this->mTitle->getBaseText()
2796 ) ) );
2797 break;
2798 case 'talkpagename':
2799 if ( $this->mTitle->canHaveTalkPage() ) {
2800 $talkPage = $this->mTitle->getTalkPage();
2801 $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2802 } else {
2803 $value = '';
2804 }
2805 break;
2806 case 'talkpagenamee':
2807 if ( $this->mTitle->canHaveTalkPage() ) {
2808 $talkPage = $this->mTitle->getTalkPage();
2809 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2810 } else {
2811 $value = '';
2812 }
2813 break;
2814 case 'subjectpagename':
2815 $subjPage = $this->mTitle->getSubjectPage();
2816 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2817 break;
2818 case 'subjectpagenamee':
2819 $subjPage = $this->mTitle->getSubjectPage();
2820 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2821 break;
2822 case 'pageid': // requested in T25427
2823 # Inform the edit saving system that getting the canonical output
2824 # after page insertion requires a parse that used that exact page ID
2825 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2826 $value = $this->mTitle->getArticleID();
2827 if ( !$value ) {
2828 $value = $this->mOptions->getSpeculativePageId();
2829 if ( $value ) {
2830 $this->mOutput->setSpeculativePageIdUsed( $value );
2831 }
2832 }
2833 break;
2834 case 'revisionid':
2835 if (
2836 $this->svcOptions->get( 'MiserMode' ) &&
2837 !$this->mOptions->getInterfaceMessage() &&
2838 // @TODO: disallow this word on all namespaces
2839 $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2840 ) {
2841 // Use a stub result instead of the actual revision ID in order to avoid
2842 // double parses on page save but still allow preview detection (T137900)
2843 if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2844 $value = '-';
2845 } else {
2846 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2847 $value = '';
2848 }
2849 } else {
2850 # Inform the edit saving system that getting the canonical output after
2851 # revision insertion requires a parse that used that exact revision ID
2852 $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2853 $value = $this->getRevisionId();
2854 if ( $value === 0 ) {
2855 $rev = $this->getRevisionObject();
2856 $value = $rev ? $rev->getId() : $value;
2857 }
2858 if ( !$value ) {
2859 $value = $this->mOptions->getSpeculativeRevId();
2860 if ( $value ) {
2861 $this->mOutput->setSpeculativeRevIdUsed( $value );
2862 }
2863 }
2864 }
2865 break;
2866 case 'revisionday':
2867 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2868 break;
2869 case 'revisionday2':
2870 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2871 break;
2872 case 'revisionmonth':
2873 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2874 break;
2875 case 'revisionmonth1':
2876 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2877 break;
2878 case 'revisionyear':
2879 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2880 break;
2881 case 'revisiontimestamp':
2882 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2883 break;
2884 case 'revisionuser':
2885 # Inform the edit saving system that getting the canonical output after
2886 # revision insertion requires a parse that used the actual user ID
2887 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2888 $value = $this->getRevisionUser();
2889 break;
2890 case 'revisionsize':
2891 $value = $this->getRevisionSize();
2892 break;
2893 case 'namespace':
2894 $value = str_replace( '_', ' ',
2895 $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2896 break;
2897 case 'namespacee':
2898 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2899 break;
2900 case 'namespacenumber':
2901 $value = $this->mTitle->getNamespace();
2902 break;
2903 case 'talkspace':
2904 $value = $this->mTitle->canHaveTalkPage()
2905 ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2906 : '';
2907 break;
2908 case 'talkspacee':
2909 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2910 break;
2911 case 'subjectspace':
2912 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2913 break;
2914 case 'subjectspacee':
2915 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2916 break;
2917 case 'currentdayname':
2918 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2919 break;
2920 case 'currentyear':
2921 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2922 break;
2923 case 'currenttime':
2924 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2925 break;
2926 case 'currenthour':
2927 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2928 break;
2929 case 'currentweek':
2930 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2931 # int to remove the padding
2932 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2933 break;
2934 case 'currentdow':
2935 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2936 break;
2937 case 'localdayname':
2938 $value = $pageLang->getWeekdayName(
2939 (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2940 );
2941 break;
2942 case 'localyear':
2943 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2944 break;
2945 case 'localtime':
2946 $value = $pageLang->time(
2947 MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2948 false,
2949 false
2950 );
2951 break;
2952 case 'localhour':
2953 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2954 break;
2955 case 'localweek':
2956 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2957 # int to remove the padding
2958 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2959 break;
2960 case 'localdow':
2961 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2962 break;
2963 case 'numberofarticles':
2964 $value = $pageLang->formatNum( SiteStats::articles() );
2965 break;
2966 case 'numberoffiles':
2967 $value = $pageLang->formatNum( SiteStats::images() );
2968 break;
2969 case 'numberofusers':
2970 $value = $pageLang->formatNum( SiteStats::users() );
2971 break;
2972 case 'numberofactiveusers':
2973 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2974 break;
2975 case 'numberofpages':
2976 $value = $pageLang->formatNum( SiteStats::pages() );
2977 break;
2978 case 'numberofadmins':
2979 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2980 break;
2981 case 'numberofedits':
2982 $value = $pageLang->formatNum( SiteStats::edits() );
2983 break;
2984 case 'currenttimestamp':
2985 $value = wfTimestamp( TS_MW, $ts );
2986 break;
2987 case 'localtimestamp':
2988 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2989 break;
2990 case 'currentversion':
2991 $value = SpecialVersion::getVersion();
2992 break;
2993 case 'articlepath':
2994 return $this->svcOptions->get( 'ArticlePath' );
2995 case 'sitename':
2996 return $this->svcOptions->get( 'Sitename' );
2997 case 'server':
2998 return $this->svcOptions->get( 'Server' );
2999 case 'servername':
3000 return $this->svcOptions->get( 'ServerName' );
3001 case 'scriptpath':
3002 return $this->svcOptions->get( 'ScriptPath' );
3003 case 'stylepath':
3004 return $this->svcOptions->get( 'StylePath' );
3005 case 'directionmark':
3006 return $pageLang->getDirMark();
3007 case 'contentlanguage':
3008 return $this->svcOptions->get( 'LanguageCode' );
3009 case 'pagelanguage':
3010 $value = $pageLang->getCode();
3011 break;
3012 case 'cascadingsources':
3013 $value = CoreParserFunctions::cascadingsources( $this );
3014 break;
3015 default:
3016 $ret = null;
3017 Hooks::run(
3018 'ParserGetVariableValueSwitch',
3019 [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3020 );
3021
3022 return $ret;
3023 }
3024
3025 if ( $index ) {
3026 $this->mVarCache[$index] = $value;
3027 }
3028
3029 return $value;
3030 }
3031
3032 /**
3033 * @param int $start
3034 * @param int $len
3035 * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3036 * @param string $variable Parser variable name
3037 * @return string
3038 */
3039 private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3040 # Get the timezone-adjusted timestamp to be used for this revision
3041 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3042 # Possibly set vary-revision if there is not yet an associated revision
3043 if ( !$this->getRevisionObject() ) {
3044 # Get the timezone-adjusted timestamp $mtts seconds in the future.
3045 # This future is relative to the current time and not that of the
3046 # parser options. The rendered timestamp can be compared to that
3047 # of the timestamp specified by the parser options.
3048 $resThen = substr(
3049 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3050 $start,
3051 $len
3052 );
3053
3054 if ( $resNow !== $resThen ) {
3055 # Inform the edit saving system that getting the canonical output after
3056 # revision insertion requires a parse that used an actual revision timestamp
3057 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3058 }
3059 }
3060
3061 return $resNow;
3062 }
3063
3064 /**
3065 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3066 *
3067 * @private
3068 */
3069 public function initialiseVariables() {
3070 $variableIDs = $this->magicWordFactory->getVariableIDs();
3071 $substIDs = $this->magicWordFactory->getSubstIDs();
3072
3073 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3074 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3075 }
3076
3077 /**
3078 * Preprocess some wikitext and return the document tree.
3079 * This is the ghost of replace_variables().
3080 *
3081 * @param string $text The text to parse
3082 * @param int $flags Bitwise combination of:
3083 * - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3084 * included. Default is to assume a direct page view.
3085 *
3086 * The generated DOM tree must depend only on the input text and the flags.
3087 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3088 *
3089 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3090 * change in the DOM tree for a given text, must be passed through the section identifier
3091 * in the section edit link and thus back to extractSections().
3092 *
3093 * The output of this function is currently only cached in process memory, but a persistent
3094 * cache may be implemented at a later date which takes further advantage of these strict
3095 * dependency requirements.
3096 *
3097 * @return PPNode
3098 */
3099 public function preprocessToDom( $text, $flags = 0 ) {
3100 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3101 return $dom;
3102 }
3103
3104 /**
3105 * Return a three-element array: leading whitespace, string contents, trailing whitespace
3106 *
3107 * @param string $s
3108 *
3109 * @return array
3110 */
3111 public static function splitWhitespace( $s ) {
3112 $ltrimmed = ltrim( $s );
3113 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3114 $trimmed = rtrim( $ltrimmed );
3115 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3116 if ( $diff > 0 ) {
3117 $w2 = substr( $ltrimmed, -$diff );
3118 } else {
3119 $w2 = '';
3120 }
3121 return [ $w1, $trimmed, $w2 ];
3122 }
3123
3124 /**
3125 * Replace magic variables, templates, and template arguments
3126 * with the appropriate text. Templates are substituted recursively,
3127 * taking care to avoid infinite loops.
3128 *
3129 * Note that the substitution depends on value of $mOutputType:
3130 * self::OT_WIKI: only {{subst:}} templates
3131 * self::OT_PREPROCESS: templates but not extension tags
3132 * self::OT_HTML: all templates and extension tags
3133 *
3134 * @param string $text The text to transform
3135 * @param false|PPFrame|array $frame Object describing the arguments passed to the
3136 * template. Arguments may also be provided as an associative array, as
3137 * was the usual case before MW1.12. Providing arguments this way may be
3138 * useful for extensions wishing to perform variable replacement
3139 * explicitly.
3140 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3141 * double-brace expansion.
3142 * @return string
3143 */
3144 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3145 # Is there any text? Also, Prevent too big inclusions!
3146 $textSize = strlen( $text );
3147 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3148 return $text;
3149 }
3150
3151 if ( $frame === false ) {
3152 $frame = $this->getPreprocessor()->newFrame();
3153 } elseif ( !( $frame instanceof PPFrame ) ) {
3154 $this->logger->debug(
3155 __METHOD__ . " called using plain parameters instead of " .
3156 "a PPFrame instance. Creating custom frame."
3157 );
3158 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3159 }
3160
3161 $dom = $this->preprocessToDom( $text );
3162 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3163 $text = $frame->expand( $dom, $flags );
3164
3165 return $text;
3166 }
3167
3168 /**
3169 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3170 *
3171 * @param array $args
3172 *
3173 * @return array
3174 */
3175 public static function createAssocArgs( $args ) {
3176 $assocArgs = [];
3177 $index = 1;
3178 foreach ( $args as $arg ) {
3179 $eqpos = strpos( $arg, '=' );
3180 if ( $eqpos === false ) {
3181 $assocArgs[$index++] = $arg;
3182 } else {
3183 $name = trim( substr( $arg, 0, $eqpos ) );
3184 $value = trim( substr( $arg, $eqpos + 1 ) );
3185 if ( $value === false ) {
3186 $value = '';
3187 }
3188 if ( $name !== false ) {
3189 $assocArgs[$name] = $value;
3190 }
3191 }
3192 }
3193
3194 return $assocArgs;
3195 }
3196
3197 /**
3198 * Warn the user when a parser limitation is reached
3199 * Will warn at most once the user per limitation type
3200 *
3201 * The results are shown during preview and run through the Parser (See EditPage.php)
3202 *
3203 * @param string $limitationType Should be one of:
3204 * 'expensive-parserfunction' (corresponding messages:
3205 * 'expensive-parserfunction-warning',
3206 * 'expensive-parserfunction-category')
3207 * 'post-expand-template-argument' (corresponding messages:
3208 * 'post-expand-template-argument-warning',
3209 * 'post-expand-template-argument-category')
3210 * 'post-expand-template-inclusion' (corresponding messages:
3211 * 'post-expand-template-inclusion-warning',
3212 * 'post-expand-template-inclusion-category')
3213 * 'node-count-exceeded' (corresponding messages:
3214 * 'node-count-exceeded-warning',
3215 * 'node-count-exceeded-category')
3216 * 'expansion-depth-exceeded' (corresponding messages:
3217 * 'expansion-depth-exceeded-warning',
3218 * 'expansion-depth-exceeded-category')
3219 * @param string|int|null $current Current value
3220 * @param string|int|null $max Maximum allowed, when an explicit limit has been
3221 * exceeded, provide the values (optional)
3222 */
3223 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3224 # does no harm if $current and $max are present but are unnecessary for the message
3225 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3226 # only during preview, and that would split the parser cache unnecessarily.
3227 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3228 ->text();
3229 $this->mOutput->addWarning( $warning );
3230 $this->addTrackingCategory( "$limitationType-category" );
3231 }
3232
3233 /**
3234 * Return the text of a template, after recursively
3235 * replacing any variables or templates within the template.
3236 *
3237 * @param array $piece The parts of the template
3238 * $piece['title']: the title, i.e. the part before the |
3239 * $piece['parts']: the parameter array
3240 * $piece['lineStart']: whether the brace was at the start of a line