Merge "parser: document what 0/null rev IDs do when given to Parser::parse"
[lhc/web/wiklou.git] / includes / parser / Parser.php
1 <?php
2 /**
3 * PHP parser that converts wiki markup to HTML.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23 use MediaWiki\Config\ServiceOptions;
24 use MediaWiki\Linker\LinkRenderer;
25 use MediaWiki\Linker\LinkRendererFactory;
26 use MediaWiki\Linker\LinkTarget;
27 use MediaWiki\MediaWikiServices;
28 use MediaWiki\Special\SpecialPageFactory;
29 use Psr\Log\NullLogger;
30 use Wikimedia\ScopedCallback;
31 use Psr\Log\LoggerInterface;
32
33 /**
34 * @defgroup Parser Parser
35 */
36
37 /**
38 * PHP Parser - Processes wiki markup (which uses a more user-friendly
39 * syntax, such as "[[link]]" for making links), and provides a one-way
40 * transformation of that wiki markup it into (X)HTML output / markup
41 * (which in turn the browser understands, and can display).
42 *
43 * There are seven main entry points into the Parser class:
44 *
45 * - Parser::parse()
46 * produces HTML output
47 * - Parser::preSaveTransform()
48 * produces altered wiki markup
49 * - Parser::preprocess()
50 * removes HTML comments and expands templates
51 * - Parser::cleanSig() and Parser::cleanSigInSig()
52 * cleans a signature before saving it to preferences
53 * - Parser::getSection()
54 * return the content of a section from an article for section editing
55 * - Parser::replaceSection()
56 * replaces a section by number inside an article
57 * - Parser::getPreloadText()
58 * removes <noinclude> sections and <includeonly> tags
59 *
60 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
61 *
62 * @par Settings:
63 * $wgNamespacesWithSubpages
64 *
65 * @par Settings only within ParserOptions:
66 * $wgAllowExternalImages
67 * $wgAllowSpecialInclusion
68 * $wgInterwikiMagic
69 * $wgMaxArticleSize
70 *
71 * @ingroup Parser
72 */
73 class Parser {
74 /**
75 * Update this version number when the ParserOutput format
76 * changes in an incompatible way, so the parser cache
77 * can automatically discard old data.
78 */
79 const VERSION = '1.6.4';
80
81 /**
82 * Update this version number when the output of serialiseHalfParsedText()
83 * changes in an incompatible way
84 */
85 const HALF_PARSED_VERSION = 2;
86
87 # Flags for Parser::setFunctionHook
88 const SFH_NO_HASH = 1;
89 const SFH_OBJECT_ARGS = 2;
90
91 # Constants needed for external link processing
92 # Everything except bracket, space, or control characters
93 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
94 # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
95 # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
96 # uses to replace invalid HTML characters.
97 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
98 # Simplified expression to match an IPv4 or IPv6 address, or
99 # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
100 const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
101 # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
102 // phpcs:ignore Generic.Files.LineLength
103 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
104 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
105
106 # Regular expression for a non-newline space
107 const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
108
109 # Flags for preprocessToDom
110 const PTD_FOR_INCLUSION = 1;
111
112 # Allowed values for $this->mOutputType
113 # Parameter to startExternalParse().
114 const OT_HTML = 1; # like parse()
115 const OT_WIKI = 2; # like preSaveTransform()
116 const OT_PREPROCESS = 3; # like preprocess()
117 const OT_MSG = 3;
118 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
119
120 /**
121 * @var string Prefix and suffix for temporary replacement strings
122 * for the multipass parser.
123 *
124 * \x7f should never appear in input as it's disallowed in XML.
125 * Using it at the front also gives us a little extra robustness
126 * since it shouldn't match when butted up against identifier-like
127 * string constructs.
128 *
129 * Must not consist of all title characters, or else it will change
130 * the behavior of <nowiki> in a link.
131 *
132 * Must have a character that needs escaping in attributes, otherwise
133 * someone could put a strip marker in an attribute, to get around
134 * escaping quote marks, and break out of the attribute. Thus we add
135 * `'".
136 */
137 const MARKER_SUFFIX = "-QINU`\"'\x7f";
138 const MARKER_PREFIX = "\x7f'\"`UNIQ-";
139
140 # Markers used for wrapping the table of contents
141 const TOC_START = '<mw:toc>';
142 const TOC_END = '</mw:toc>';
143
144 /** @var int Assume that no output will later be saved this many seconds after parsing */
145 const MAX_TTS = 900;
146
147 # Persistent:
148 public $mTagHooks = [];
149 public $mTransparentTagHooks = [];
150 public $mFunctionHooks = [];
151 public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
152 public $mFunctionTagHooks = [];
153 public $mStripList = [];
154 public $mDefaultStripList = [];
155 public $mVarCache = [];
156 public $mImageParams = [];
157 public $mImageParamsMagicArray = [];
158 public $mMarkerIndex = 0;
159 /**
160 * @var bool Whether firstCallInit still needs to be called
161 */
162 public $mFirstCall = true;
163
164 # Initialised by initialiseVariables()
165
166 /**
167 * @var MagicWordArray
168 */
169 public $mVariables;
170
171 /**
172 * @var MagicWordArray
173 */
174 public $mSubstWords;
175
176 /**
177 * @deprecated since 1.34, there should be no need to use this
178 * @var array
179 */
180 public $mConf;
181
182 # Initialised in constructor
183 public $mExtLinkBracketedRegex, $mUrlProtocols;
184
185 # Initialized in getPreprocessor()
186 /** @var Preprocessor */
187 public $mPreprocessor;
188
189 # Cleared with clearState():
190 /**
191 * @var ParserOutput
192 */
193 public $mOutput;
194 public $mAutonumber;
195
196 /**
197 * @var StripState
198 */
199 public $mStripState;
200
201 public $mIncludeCount;
202 /**
203 * @var LinkHolderArray
204 */
205 public $mLinkHolders;
206
207 public $mLinkID;
208 public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
209 public $mDefaultSort;
210 public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
211 public $mExpensiveFunctionCount; # number of expensive parser function calls
212 public $mShowToc, $mForceTocPosition;
213 /** @var array */
214 public $mTplDomCache;
215
216 /**
217 * @var User
218 */
219 public $mUser; # User object; only used when doing pre-save transform
220
221 # Temporary
222 # These are variables reset at least once per parse regardless of $clearState
223
224 /**
225 * @var ParserOptions
226 */
227 public $mOptions;
228
229 /**
230 * @var Title
231 */
232 public $mTitle; # Title context, used for self-link rendering and similar things
233 public $mOutputType; # Output type, one of the OT_xxx constants
234 public $ot; # Shortcut alias, see setOutputType()
235 public $mRevisionObject; # The revision object of the specified revision ID
236 public $mRevisionId; # ID to display in {{REVISIONID}} tags
237 public $mRevisionTimestamp; # The timestamp of the specified revision ID
238 public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
239 public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
240 public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
241 public $mInputSize = false; # For {{PAGESIZE}} on current page.
242
243 /**
244 * @var array Array with the language name of each language link (i.e. the
245 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
246 * duplicate language links to the ParserOutput.
247 */
248 public $mLangLinkLanguages;
249
250 /**
251 * @var MapCacheLRU|null
252 * @since 1.24
253 *
254 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
255 */
256 public $currentRevisionCache;
257
258 /**
259 * @var bool|string Recursive call protection.
260 * This variable should be treated as if it were private.
261 */
262 public $mInParse = false;
263
264 /** @var SectionProfiler */
265 protected $mProfiler;
266
267 /**
268 * @var LinkRenderer
269 */
270 protected $mLinkRenderer;
271
272 /** @var MagicWordFactory */
273 private $magicWordFactory;
274
275 /** @var Language */
276 private $contLang;
277
278 /** @var ParserFactory */
279 private $factory;
280
281 /** @var SpecialPageFactory */
282 private $specialPageFactory;
283
284 /**
285 * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
286 * $mOptions, which is public and widely used, and also with the local variable $options used
287 * for ParserOptions throughout this file.
288 *
289 * @var ServiceOptions
290 */
291 private $svcOptions;
292
293 /** @var LinkRendererFactory */
294 private $linkRendererFactory;
295
296 /** @var NamespaceInfo */
297 private $nsInfo;
298
299 /** @var LoggerInterface */
300 private $logger;
301
302 /**
303 * TODO Make this a const when HHVM support is dropped (T192166)
304 *
305 * @var array
306 * @since 1.33
307 */
308 public static $constructorOptions = [
309 // See $wgParserConf documentation
310 'class',
311 'preprocessorClass',
312 // See documentation for the corresponding config options
313 'ArticlePath',
314 'EnableScaryTranscluding',
315 'ExtraInterlanguageLinkPrefixes',
316 'FragmentMode',
317 'LanguageCode',
318 'MaxSigChars',
319 'MaxTocLevel',
320 'MiserMode',
321 'ScriptPath',
322 'Server',
323 'ServerName',
324 'ShowHostnames',
325 'Sitename',
326 'StylePath',
327 'TranscludeCacheExpiry',
328 ];
329
330 /**
331 * Constructing parsers directly is deprecated! Use a ParserFactory.
332 *
333 * @param ServiceOptions|null $svcOptions
334 * @param MagicWordFactory|null $magicWordFactory
335 * @param Language|null $contLang Content language
336 * @param ParserFactory|null $factory
337 * @param string|null $urlProtocols As returned from wfUrlProtocols()
338 * @param SpecialPageFactory|null $spFactory
339 * @param LinkRendererFactory|null $linkRendererFactory
340 * @param NamespaceInfo|null $nsInfo
341 * @param LoggerInterface|null $logger
342 */
343 public function __construct(
344 $svcOptions = null,
345 MagicWordFactory $magicWordFactory = null,
346 Language $contLang = null,
347 ParserFactory $factory = null,
348 $urlProtocols = null,
349 SpecialPageFactory $spFactory = null,
350 $linkRendererFactory = null,
351 $nsInfo = null,
352 $logger = null
353 ) {
354 if ( !$svcOptions || is_array( $svcOptions ) ) {
355 // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
356 // Config, and the eighth is LinkRendererFactory.
357 $this->mConf = (array)$svcOptions;
358 if ( empty( $this->mConf['class'] ) ) {
359 $this->mConf['class'] = self::class;
360 }
361 if ( empty( $this->mConf['preprocessorClass'] ) ) {
362 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
363 }
364 $this->svcOptions = new ServiceOptions( self::$constructorOptions,
365 $this->mConf, func_num_args() > 6
366 ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
367 );
368 $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
369 $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
370 } else {
371 // New calling convention
372 $svcOptions->assertRequiredOptions( self::$constructorOptions );
373 // $this->mConf is public, so we'll keep those two options there as well for
374 // compatibility until it's removed
375 $this->mConf = [
376 'class' => $svcOptions->get( 'class' ),
377 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
378 ];
379 $this->svcOptions = $svcOptions;
380 }
381
382 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
383 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
384 self::EXT_LINK_ADDR .
385 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
386
387 $this->magicWordFactory = $magicWordFactory ??
388 MediaWikiServices::getInstance()->getMagicWordFactory();
389
390 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
391
392 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
393 $this->specialPageFactory = $spFactory ??
394 MediaWikiServices::getInstance()->getSpecialPageFactory();
395 $this->linkRendererFactory = $linkRendererFactory ??
396 MediaWikiServices::getInstance()->getLinkRendererFactory();
397 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
398 $this->logger = $logger ?: new NullLogger();
399 }
400
401 /**
402 * Reduce memory usage to reduce the impact of circular references
403 */
404 public function __destruct() {
405 if ( isset( $this->mLinkHolders ) ) {
406 unset( $this->mLinkHolders );
407 }
408 foreach ( $this as $name => $value ) {
409 unset( $this->$name );
410 }
411 }
412
413 /**
414 * Allow extensions to clean up when the parser is cloned
415 */
416 public function __clone() {
417 $this->mInParse = false;
418
419 // T58226: When you create a reference "to" an object field, that
420 // makes the object field itself be a reference too (until the other
421 // reference goes out of scope). When cloning, any field that's a
422 // reference is copied as a reference in the new object. Both of these
423 // are defined PHP5 behaviors, as inconvenient as it is for us when old
424 // hooks from PHP4 days are passing fields by reference.
425 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
426 // Make a non-reference copy of the field, then rebind the field to
427 // reference the new copy.
428 $tmp = $this->$k;
429 $this->$k =& $tmp;
430 unset( $tmp );
431 }
432
433 Hooks::run( 'ParserCloned', [ $this ] );
434 }
435
436 /**
437 * Which class should we use for the preprocessor if not otherwise specified?
438 *
439 * @since 1.34
440 * @deprecated since 1.34, removing configurability of preprocessor
441 * @return string
442 */
443 public static function getDefaultPreprocessorClass() {
444 return Preprocessor_Hash::class;
445 }
446
447 /**
448 * Do various kinds of initialisation on the first call of the parser
449 */
450 public function firstCallInit() {
451 if ( !$this->mFirstCall ) {
452 return;
453 }
454 $this->mFirstCall = false;
455
456 CoreParserFunctions::register( $this );
457 CoreTagHooks::register( $this );
458 $this->initialiseVariables();
459
460 // Avoid PHP 7.1 warning from passing $this by reference
461 $parser = $this;
462 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
463 }
464
465 /**
466 * Clear Parser state
467 *
468 * @private
469 */
470 public function clearState() {
471 $this->firstCallInit();
472 $this->resetOutput();
473 $this->mAutonumber = 0;
474 $this->mIncludeCount = [];
475 $this->mLinkHolders = new LinkHolderArray( $this );
476 $this->mLinkID = 0;
477 $this->mRevisionObject = $this->mRevisionTimestamp =
478 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
479 $this->mVarCache = [];
480 $this->mUser = null;
481 $this->mLangLinkLanguages = [];
482 $this->currentRevisionCache = null;
483
484 $this->mStripState = new StripState( $this );
485
486 # Clear these on every parse, T6549
487 $this->mTplRedirCache = $this->mTplDomCache = [];
488
489 $this->mShowToc = true;
490 $this->mForceTocPosition = false;
491 $this->mIncludeSizes = [
492 'post-expand' => 0,
493 'arg' => 0,
494 ];
495 $this->mPPNodeCount = 0;
496 $this->mGeneratedPPNodeCount = 0;
497 $this->mHighestExpansionDepth = 0;
498 $this->mDefaultSort = false;
499 $this->mHeadings = [];
500 $this->mDoubleUnderscores = [];
501 $this->mExpensiveFunctionCount = 0;
502
503 # Fix cloning
504 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
505 $this->mPreprocessor = null;
506 }
507
508 $this->mProfiler = new SectionProfiler();
509
510 // Avoid PHP 7.1 warning from passing $this by reference
511 $parser = $this;
512 Hooks::run( 'ParserClearState', [ &$parser ] );
513 }
514
515 /**
516 * Reset the ParserOutput
517 */
518 public function resetOutput() {
519 $this->mOutput = new ParserOutput;
520 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
521 }
522
523 /**
524 * Convert wikitext to HTML
525 * Do not call this function recursively.
526 *
527 * @param string $text Text we want to parse
528 * @param-taint $text escapes_htmlnoent
529 * @param Title $title
530 * @param ParserOptions $options
531 * @param bool $linestart
532 * @param bool $clearState
533 * @param int|null $revid ID of the revision being rendered. This is used to render
534 * REVISION* magic words. 0 means that any current revision will be used. Null means
535 * that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
536 * use the current timestamp.
537 * @return ParserOutput A ParserOutput
538 * @return-taint escaped
539 */
540 public function parse(
541 $text, Title $title, ParserOptions $options,
542 $linestart = true, $clearState = true, $revid = null
543 ) {
544 if ( $clearState ) {
545 // We use U+007F DELETE to construct strip markers, so we have to make
546 // sure that this character does not occur in the input text.
547 $text = strtr( $text, "\x7f", "?" );
548 $magicScopeVariable = $this->lock();
549 }
550 // Strip U+0000 NULL (T159174)
551 $text = str_replace( "\000", '', $text );
552
553 $this->startParse( $title, $options, self::OT_HTML, $clearState );
554
555 $this->currentRevisionCache = null;
556 $this->mInputSize = strlen( $text );
557 if ( $this->mOptions->getEnableLimitReport() ) {
558 $this->mOutput->resetParseStartTime();
559 }
560
561 $oldRevisionId = $this->mRevisionId;
562 $oldRevisionObject = $this->mRevisionObject;
563 $oldRevisionTimestamp = $this->mRevisionTimestamp;
564 $oldRevisionUser = $this->mRevisionUser;
565 $oldRevisionSize = $this->mRevisionSize;
566 if ( $revid !== null ) {
567 $this->mRevisionId = $revid;
568 $this->mRevisionObject = null;
569 $this->mRevisionTimestamp = null;
570 $this->mRevisionUser = null;
571 $this->mRevisionSize = null;
572 }
573
574 // Avoid PHP 7.1 warning from passing $this by reference
575 $parser = $this;
576 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
577 # No more strip!
578 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
579 $text = $this->internalParse( $text );
580 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
581
582 $text = $this->internalParseHalfParsed( $text, true, $linestart );
583
584 /**
585 * A converted title will be provided in the output object if title and
586 * content conversion are enabled, the article text does not contain
587 * a conversion-suppressing double-underscore tag, and no
588 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
589 * automatic link conversion.
590 */
591 if ( !( $options->getDisableTitleConversion()
592 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
593 || isset( $this->mDoubleUnderscores['notitleconvert'] )
594 || $this->mOutput->getDisplayTitle() !== false )
595 ) {
596 $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
597 if ( $convruletitle ) {
598 $this->mOutput->setTitleText( $convruletitle );
599 } else {
600 $titleText = $this->getTargetLanguage()->convertTitle( $title );
601 $this->mOutput->setTitleText( $titleText );
602 }
603 }
604
605 # Compute runtime adaptive expiry if set
606 $this->mOutput->finalizeAdaptiveCacheExpiry();
607
608 # Warn if too many heavyweight parser functions were used
609 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
610 $this->limitationWarn( 'expensive-parserfunction',
611 $this->mExpensiveFunctionCount,
612 $this->mOptions->getExpensiveParserFunctionLimit()
613 );
614 }
615
616 # Information on limits, for the benefit of users who try to skirt them
617 if ( $this->mOptions->getEnableLimitReport() ) {
618 $text .= $this->makeLimitReport();
619 }
620
621 # Wrap non-interface parser output in a <div> so it can be targeted
622 # with CSS (T37247)
623 $class = $this->mOptions->getWrapOutputClass();
624 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
625 $this->mOutput->addWrapperDivClass( $class );
626 }
627
628 $this->mOutput->setText( $text );
629
630 $this->mRevisionId = $oldRevisionId;
631 $this->mRevisionObject = $oldRevisionObject;
632 $this->mRevisionTimestamp = $oldRevisionTimestamp;
633 $this->mRevisionUser = $oldRevisionUser;
634 $this->mRevisionSize = $oldRevisionSize;
635 $this->mInputSize = false;
636 $this->currentRevisionCache = null;
637
638 return $this->mOutput;
639 }
640
641 /**
642 * Set the limit report data in the current ParserOutput, and return the
643 * limit report HTML comment.
644 *
645 * @return string
646 */
647 protected function makeLimitReport() {
648 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
649
650 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
651 if ( $cpuTime !== null ) {
652 $this->mOutput->setLimitReportData( 'limitreport-cputime',
653 sprintf( "%.3f", $cpuTime )
654 );
655 }
656
657 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
658 $this->mOutput->setLimitReportData( 'limitreport-walltime',
659 sprintf( "%.3f", $wallTime )
660 );
661
662 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
663 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
664 );
665 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
666 [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
667 );
668 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
669 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
670 );
671 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
672 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
673 );
674 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
675 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
676 );
677 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
678 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
679 );
680
681 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
682 $this->mOutput->setLimitReportData( $key, $value );
683 }
684
685 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
686
687 $limitReport = "NewPP limit report\n";
688 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
689 $limitReport .= 'Parsed by ' . wfHostname() . "\n";
690 }
691 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
692 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
693 $limitReport .= 'Dynamic content: ' .
694 ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
695 "\n";
696 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
697
698 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
699 if ( Hooks::run( 'ParserLimitReportFormat',
700 [ $key, &$value, &$limitReport, false, false ]
701 ) ) {
702 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
703 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
704 ->inLanguage( 'en' )->useDatabase( false );
705 if ( !$valueMsg->exists() ) {
706 $valueMsg = new RawMessage( '$1' );
707 }
708 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
709 $valueMsg->params( $value );
710 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
711 }
712 }
713 }
714 // Since we're not really outputting HTML, decode the entities and
715 // then re-encode the things that need hiding inside HTML comments.
716 $limitReport = htmlspecialchars_decode( $limitReport );
717
718 // Sanitize for comment. Note '‐' in the replacement is U+2010,
719 // which looks much like the problematic '-'.
720 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
721 $text = "\n<!-- \n$limitReport-->\n";
722
723 // Add on template profiling data in human/machine readable way
724 $dataByFunc = $this->mProfiler->getFunctionStats();
725 uasort( $dataByFunc, function ( $a, $b ) {
726 return $b['real'] <=> $a['real']; // descending order
727 } );
728 $profileReport = [];
729 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
730 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
731 $item['%real'], $item['real'], $item['calls'],
732 htmlspecialchars( $item['name'] ) );
733 }
734 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
735 $text .= implode( "\n", $profileReport ) . "\n-->\n";
736
737 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
738
739 // Add other cache related metadata
740 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
741 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
742 }
743 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
744 $this->mOutput->getCacheTime() );
745 $this->mOutput->setLimitReportData( 'cachereport-ttl',
746 $this->mOutput->getCacheExpiry() );
747 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
748 $this->mOutput->hasDynamicContent() );
749
750 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
751 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
752 $this->mTitle->getPrefixedDBkey() );
753 }
754 return $text;
755 }
756
757 /**
758 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
759 * can be called from an extension tag hook.
760 *
761 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
762 * instead, which means that lists and links have not been fully parsed yet,
763 * and strip markers are still present.
764 *
765 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
766 *
767 * Use this function if you're a parser tag hook and you want to parse
768 * wikitext before or after applying additional transformations, and you
769 * intend to *return the result as hook output*, which will cause it to go
770 * through the rest of parsing process automatically.
771 *
772 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
773 * $text are not expanded
774 *
775 * @param string $text Text extension wants to have parsed
776 * @param-taint $text escapes_htmlnoent
777 * @param bool|PPFrame $frame The frame to use for expanding any template variables
778 * @return string UNSAFE half-parsed HTML
779 * @return-taint escaped
780 */
781 public function recursiveTagParse( $text, $frame = false ) {
782 // Avoid PHP 7.1 warning from passing $this by reference
783 $parser = $this;
784 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
785 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
786 $text = $this->internalParse( $text, false, $frame );
787 return $text;
788 }
789
790 /**
791 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
792 * point can be called from an extension tag hook.
793 *
794 * The output of this function is fully-parsed HTML that is safe for output.
795 * If you're a parser tag hook, you might want to use recursiveTagParse()
796 * instead.
797 *
798 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
799 * $text are not expanded
800 *
801 * @since 1.25
802 *
803 * @param string $text Text extension wants to have parsed
804 * @param-taint $text escapes_htmlnoent
805 * @param bool|PPFrame $frame The frame to use for expanding any template variables
806 * @return string Fully parsed HTML
807 * @return-taint escaped
808 */
809 public function recursiveTagParseFully( $text, $frame = false ) {
810 $text = $this->recursiveTagParse( $text, $frame );
811 $text = $this->internalParseHalfParsed( $text, false );
812 return $text;
813 }
814
815 /**
816 * Expand templates and variables in the text, producing valid, static wikitext.
817 * Also removes comments.
818 * Do not call this function recursively.
819 * @param string $text
820 * @param Title|null $title
821 * @param ParserOptions $options
822 * @param int|null $revid
823 * @param bool|PPFrame $frame
824 * @return mixed|string
825 */
826 public function preprocess( $text, Title $title = null,
827 ParserOptions $options, $revid = null, $frame = false
828 ) {
829 $magicScopeVariable = $this->lock();
830 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
831 if ( $revid !== null ) {
832 $this->mRevisionId = $revid;
833 }
834 // Avoid PHP 7.1 warning from passing $this by reference
835 $parser = $this;
836 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
837 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
838 $text = $this->replaceVariables( $text, $frame );
839 $text = $this->mStripState->unstripBoth( $text );
840 return $text;
841 }
842
843 /**
844 * Recursive parser entry point that can be called from an extension tag
845 * hook.
846 *
847 * @param string $text Text to be expanded
848 * @param bool|PPFrame $frame The frame to use for expanding any template variables
849 * @return string
850 * @since 1.19
851 */
852 public function recursivePreprocess( $text, $frame = false ) {
853 $text = $this->replaceVariables( $text, $frame );
854 $text = $this->mStripState->unstripBoth( $text );
855 return $text;
856 }
857
858 /**
859 * Process the wikitext for the "?preload=" feature. (T7210)
860 *
861 * "<noinclude>", "<includeonly>" etc. are parsed as for template
862 * transclusion, comments, templates, arguments, tags hooks and parser
863 * functions are untouched.
864 *
865 * @param string $text
866 * @param Title $title
867 * @param ParserOptions $options
868 * @param array $params
869 * @return string
870 */
871 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
872 $msg = new RawMessage( $text );
873 $text = $msg->params( $params )->plain();
874
875 # Parser (re)initialisation
876 $magicScopeVariable = $this->lock();
877 $this->startParse( $title, $options, self::OT_PLAIN, true );
878
879 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
880 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
881 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
882 $text = $this->mStripState->unstripBoth( $text );
883 return $text;
884 }
885
886 /**
887 * Set the current user.
888 * Should only be used when doing pre-save transform.
889 *
890 * @param User|null $user User object or null (to reset)
891 */
892 public function setUser( $user ) {
893 $this->mUser = $user;
894 }
895
896 /**
897 * Set the context title
898 *
899 * @param Title $t
900 */
901 public function setTitle( $t ) {
902 if ( !$t ) {
903 $t = Title::newFromText( 'NO TITLE' );
904 }
905
906 if ( $t->hasFragment() ) {
907 # Strip the fragment to avoid various odd effects
908 $this->mTitle = $t->createFragmentTarget( '' );
909 } else {
910 $this->mTitle = $t;
911 }
912 }
913
914 /**
915 * Accessor for the Title object
916 *
917 * @return Title|null
918 */
919 public function getTitle() {
920 return $this->mTitle;
921 }
922
923 /**
924 * Accessor/mutator for the Title object
925 *
926 * @param Title|null $x Title object or null to just get the current one
927 * @return Title
928 */
929 public function Title( $x = null ) {
930 return wfSetVar( $this->mTitle, $x );
931 }
932
933 /**
934 * Set the output type
935 *
936 * @param int $ot New value
937 */
938 public function setOutputType( $ot ) {
939 $this->mOutputType = $ot;
940 # Shortcut alias
941 $this->ot = [
942 'html' => $ot == self::OT_HTML,
943 'wiki' => $ot == self::OT_WIKI,
944 'pre' => $ot == self::OT_PREPROCESS,
945 'plain' => $ot == self::OT_PLAIN,
946 ];
947 }
948
949 /**
950 * Accessor/mutator for the output type
951 *
952 * @param int|null $x New value or null to just get the current one
953 * @return int
954 */
955 public function OutputType( $x = null ) {
956 return wfSetVar( $this->mOutputType, $x );
957 }
958
959 /**
960 * Get the ParserOutput object
961 *
962 * @return ParserOutput
963 */
964 public function getOutput() {
965 return $this->mOutput;
966 }
967
968 /**
969 * Get the ParserOptions object
970 *
971 * @return ParserOptions
972 */
973 public function getOptions() {
974 return $this->mOptions;
975 }
976
977 /**
978 * Accessor/mutator for the ParserOptions object
979 *
980 * @param ParserOptions|null $x New value or null to just get the current one
981 * @return ParserOptions Current ParserOptions object
982 */
983 public function Options( $x = null ) {
984 return wfSetVar( $this->mOptions, $x );
985 }
986
987 /**
988 * @return int
989 */
990 public function nextLinkID() {
991 return $this->mLinkID++;
992 }
993
994 /**
995 * @param int $id
996 */
997 public function setLinkID( $id ) {
998 $this->mLinkID = $id;
999 }
1000
1001 /**
1002 * Get a language object for use in parser functions such as {{FORMATNUM:}}
1003 * @return Language
1004 */
1005 public function getFunctionLang() {
1006 return $this->getTargetLanguage();
1007 }
1008
1009 /**
1010 * Get the target language for the content being parsed. This is usually the
1011 * language that the content is in.
1012 *
1013 * @since 1.19
1014 *
1015 * @throws MWException
1016 * @return Language
1017 */
1018 public function getTargetLanguage() {
1019 $target = $this->mOptions->getTargetLanguage();
1020
1021 if ( $target !== null ) {
1022 return $target;
1023 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1024 return $this->mOptions->getUserLangObj();
1025 } elseif ( is_null( $this->mTitle ) ) {
1026 throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1027 }
1028
1029 return $this->mTitle->getPageLanguage();
1030 }
1031
1032 /**
1033 * Get the language object for language conversion
1034 * @deprecated since 1.32, just use getTargetLanguage()
1035 * @return Language|null
1036 */
1037 public function getConverterLanguage() {
1038 return $this->getTargetLanguage();
1039 }
1040
1041 /**
1042 * Get a User object either from $this->mUser, if set, or from the
1043 * ParserOptions object otherwise
1044 *
1045 * @return User
1046 */
1047 public function getUser() {
1048 if ( !is_null( $this->mUser ) ) {
1049 return $this->mUser;
1050 }
1051 return $this->mOptions->getUser();
1052 }
1053
1054 /**
1055 * Get a preprocessor object
1056 *
1057 * @return Preprocessor
1058 */
1059 public function getPreprocessor() {
1060 if ( !isset( $this->mPreprocessor ) ) {
1061 $class = $this->svcOptions->get( 'preprocessorClass' );
1062 $this->mPreprocessor = new $class( $this );
1063 }
1064 return $this->mPreprocessor;
1065 }
1066
1067 /**
1068 * Get a LinkRenderer instance to make links with
1069 *
1070 * @since 1.28
1071 * @return LinkRenderer
1072 */
1073 public function getLinkRenderer() {
1074 // XXX We make the LinkRenderer with current options and then cache it forever
1075 if ( !$this->mLinkRenderer ) {
1076 $this->mLinkRenderer = $this->linkRendererFactory->create();
1077 $this->mLinkRenderer->setStubThreshold(
1078 $this->getOptions()->getStubThreshold()
1079 );
1080 }
1081
1082 return $this->mLinkRenderer;
1083 }
1084
1085 /**
1086 * Get the MagicWordFactory that this Parser is using
1087 *
1088 * @since 1.32
1089 * @return MagicWordFactory
1090 */
1091 public function getMagicWordFactory() {
1092 return $this->magicWordFactory;
1093 }
1094
1095 /**
1096 * Get the content language that this Parser is using
1097 *
1098 * @since 1.32
1099 * @return Language
1100 */
1101 public function getContentLanguage() {
1102 return $this->contLang;
1103 }
1104
1105 /**
1106 * Replaces all occurrences of HTML-style comments and the given tags
1107 * in the text with a random marker and returns the next text. The output
1108 * parameter $matches will be an associative array filled with data in
1109 * the form:
1110 *
1111 * @code
1112 * 'UNIQ-xxxxx' => [
1113 * 'element',
1114 * 'tag content',
1115 * [ 'param' => 'x' ],
1116 * '<element param="x">tag content</element>' ]
1117 * @endcode
1118 *
1119 * @param array $elements List of element names. Comments are always extracted.
1120 * @param string $text Source text string.
1121 * @param array &$matches Out parameter, Array: extracted tags
1122 * @return string Stripped text
1123 */
1124 public static function extractTagsAndParams( $elements, $text, &$matches ) {
1125 static $n = 1;
1126 $stripped = '';
1127 $matches = [];
1128
1129 $taglist = implode( '|', $elements );
1130 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1131
1132 while ( $text != '' ) {
1133 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1134 $stripped .= $p[0];
1135 if ( count( $p ) < 5 ) {
1136 break;
1137 }
1138 if ( count( $p ) > 5 ) {
1139 # comment
1140 $element = $p[4];
1141 $attributes = '';
1142 $close = '';
1143 $inside = $p[5];
1144 } else {
1145 # tag
1146 list( , $element, $attributes, $close, $inside ) = $p;
1147 }
1148
1149 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1150 $stripped .= $marker;
1151
1152 if ( $close === '/>' ) {
1153 # Empty element tag, <tag />
1154 $content = null;
1155 $text = $inside;
1156 $tail = null;
1157 } else {
1158 if ( $element === '!--' ) {
1159 $end = '/(-->)/';
1160 } else {
1161 $end = "/(<\\/$element\\s*>)/i";
1162 }
1163 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1164 $content = $q[0];
1165 if ( count( $q ) < 3 ) {
1166 # No end tag -- let it run out to the end of the text.
1167 $tail = '';
1168 $text = '';
1169 } else {
1170 list( , $tail, $text ) = $q;
1171 }
1172 }
1173
1174 $matches[$marker] = [ $element,
1175 $content,
1176 Sanitizer::decodeTagAttributes( $attributes ),
1177 "<$element$attributes$close$content$tail" ];
1178 }
1179 return $stripped;
1180 }
1181
1182 /**
1183 * Get a list of strippable XML-like elements
1184 *
1185 * @return array
1186 */
1187 public function getStripList() {
1188 return $this->mStripList;
1189 }
1190
1191 /**
1192 * Get the StripState
1193 *
1194 * @return StripState
1195 */
1196 public function getStripState() {
1197 return $this->mStripState;
1198 }
1199
1200 /**
1201 * Add an item to the strip state
1202 * Returns the unique tag which must be inserted into the stripped text
1203 * The tag will be replaced with the original text in unstrip()
1204 *
1205 * @param string $text
1206 *
1207 * @return string
1208 */
1209 public function insertStripItem( $text ) {
1210 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1211 $this->mMarkerIndex++;
1212 $this->mStripState->addGeneral( $marker, $text );
1213 return $marker;
1214 }
1215
1216 /**
1217 * parse the wiki syntax used to render tables
1218 *
1219 * @private
1220 * @param string $text
1221 * @return string
1222 */
1223 public function doTableStuff( $text ) {
1224 $lines = StringUtils::explode( "\n", $text );
1225 $out = '';
1226 $td_history = []; # Is currently a td tag open?
1227 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1228 $tr_history = []; # Is currently a tr tag open?
1229 $tr_attributes = []; # history of tr attributes
1230 $has_opened_tr = []; # Did this table open a <tr> element?
1231 $indent_level = 0; # indent level of the table
1232
1233 foreach ( $lines as $outLine ) {
1234 $line = trim( $outLine );
1235
1236 if ( $line === '' ) { # empty line, go to next line
1237 $out .= $outLine . "\n";
1238 continue;
1239 }
1240
1241 $first_character = $line[0];
1242 $first_two = substr( $line, 0, 2 );
1243 $matches = [];
1244
1245 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1246 # First check if we are starting a new table
1247 $indent_level = strlen( $matches[1] );
1248
1249 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1250 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1251
1252 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1253 array_push( $td_history, false );
1254 array_push( $last_tag_history, '' );
1255 array_push( $tr_history, false );
1256 array_push( $tr_attributes, '' );
1257 array_push( $has_opened_tr, false );
1258 } elseif ( count( $td_history ) == 0 ) {
1259 # Don't do any of the following
1260 $out .= $outLine . "\n";
1261 continue;
1262 } elseif ( $first_two === '|}' ) {
1263 # We are ending a table
1264 $line = '</table>' . substr( $line, 2 );
1265 $last_tag = array_pop( $last_tag_history );
1266
1267 if ( !array_pop( $has_opened_tr ) ) {
1268 $line = "<tr><td></td></tr>{$line}";
1269 }
1270
1271 if ( array_pop( $tr_history ) ) {
1272 $line = "</tr>{$line}";
1273 }
1274
1275 if ( array_pop( $td_history ) ) {
1276 $line = "</{$last_tag}>{$line}";
1277 }
1278 array_pop( $tr_attributes );
1279 if ( $indent_level > 0 ) {
1280 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1281 } else {
1282 $outLine = $line;
1283 }
1284 } elseif ( $first_two === '|-' ) {
1285 # Now we have a table row
1286 $line = preg_replace( '#^\|-+#', '', $line );
1287
1288 # Whats after the tag is now only attributes
1289 $attributes = $this->mStripState->unstripBoth( $line );
1290 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1291 array_pop( $tr_attributes );
1292 array_push( $tr_attributes, $attributes );
1293
1294 $line = '';
1295 $last_tag = array_pop( $last_tag_history );
1296 array_pop( $has_opened_tr );
1297 array_push( $has_opened_tr, true );
1298
1299 if ( array_pop( $tr_history ) ) {
1300 $line = '</tr>';
1301 }
1302
1303 if ( array_pop( $td_history ) ) {
1304 $line = "</{$last_tag}>{$line}";
1305 }
1306
1307 $outLine = $line;
1308 array_push( $tr_history, false );
1309 array_push( $td_history, false );
1310 array_push( $last_tag_history, '' );
1311 } elseif ( $first_character === '|'
1312 || $first_character === '!'
1313 || $first_two === '|+'
1314 ) {
1315 # This might be cell elements, td, th or captions
1316 if ( $first_two === '|+' ) {
1317 $first_character = '+';
1318 $line = substr( $line, 2 );
1319 } else {
1320 $line = substr( $line, 1 );
1321 }
1322
1323 // Implies both are valid for table headings.
1324 if ( $first_character === '!' ) {
1325 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1326 }
1327
1328 # Split up multiple cells on the same line.
1329 # FIXME : This can result in improper nesting of tags processed
1330 # by earlier parser steps.
1331 $cells = explode( '||', $line );
1332
1333 $outLine = '';
1334
1335 # Loop through each table cell
1336 foreach ( $cells as $cell ) {
1337 $previous = '';
1338 if ( $first_character !== '+' ) {
1339 $tr_after = array_pop( $tr_attributes );
1340 if ( !array_pop( $tr_history ) ) {
1341 $previous = "<tr{$tr_after}>\n";
1342 }
1343 array_push( $tr_history, true );
1344 array_push( $tr_attributes, '' );
1345 array_pop( $has_opened_tr );
1346 array_push( $has_opened_tr, true );
1347 }
1348
1349 $last_tag = array_pop( $last_tag_history );
1350
1351 if ( array_pop( $td_history ) ) {
1352 $previous = "</{$last_tag}>\n{$previous}";
1353 }
1354
1355 if ( $first_character === '|' ) {
1356 $last_tag = 'td';
1357 } elseif ( $first_character === '!' ) {
1358 $last_tag = 'th';
1359 } elseif ( $first_character === '+' ) {
1360 $last_tag = 'caption';
1361 } else {
1362 $last_tag = '';
1363 }
1364
1365 array_push( $last_tag_history, $last_tag );
1366
1367 # A cell could contain both parameters and data
1368 $cell_data = explode( '|', $cell, 2 );
1369
1370 # T2553: Note that a '|' inside an invalid link should not
1371 # be mistaken as delimiting cell parameters
1372 # Bug T153140: Neither should language converter markup.
1373 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1374 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1375 } elseif ( count( $cell_data ) == 1 ) {
1376 // Whitespace in cells is trimmed
1377 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1378 } else {
1379 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1380 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1381 // Whitespace in cells is trimmed
1382 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1383 }
1384
1385 $outLine .= $cell;
1386 array_push( $td_history, true );
1387 }
1388 }
1389 $out .= $outLine . "\n";
1390 }
1391
1392 # Closing open td, tr && table
1393 while ( count( $td_history ) > 0 ) {
1394 if ( array_pop( $td_history ) ) {
1395 $out .= "</td>\n";
1396 }
1397 if ( array_pop( $tr_history ) ) {
1398 $out .= "</tr>\n";
1399 }
1400 if ( !array_pop( $has_opened_tr ) ) {
1401 $out .= "<tr><td></td></tr>\n";
1402 }
1403
1404 $out .= "</table>\n";
1405 }
1406
1407 # Remove trailing line-ending (b/c)
1408 if ( substr( $out, -1 ) === "\n" ) {
1409 $out = substr( $out, 0, -1 );
1410 }
1411
1412 # special case: don't return empty table
1413 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1414 $out = '';
1415 }
1416
1417 return $out;
1418 }
1419
1420 /**
1421 * Helper function for parse() that transforms wiki markup into half-parsed
1422 * HTML. Only called for $mOutputType == self::OT_HTML.
1423 *
1424 * @private
1425 *
1426 * @param string $text The text to parse
1427 * @param-taint $text escapes_html
1428 * @param bool $isMain Whether this is being called from the main parse() function
1429 * @param PPFrame|bool $frame A pre-processor frame
1430 *
1431 * @return string
1432 */
1433 public function internalParse( $text, $isMain = true, $frame = false ) {
1434 $origText = $text;
1435
1436 // Avoid PHP 7.1 warning from passing $this by reference
1437 $parser = $this;
1438
1439 # Hook to suspend the parser in this state
1440 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1441 return $text;
1442 }
1443
1444 # if $frame is provided, then use $frame for replacing any variables
1445 if ( $frame ) {
1446 # use frame depth to infer how include/noinclude tags should be handled
1447 # depth=0 means this is the top-level document; otherwise it's an included document
1448 if ( !$frame->depth ) {
1449 $flag = 0;
1450 } else {
1451 $flag = self::PTD_FOR_INCLUSION;
1452 }
1453 $dom = $this->preprocessToDom( $text, $flag );
1454 $text = $frame->expand( $dom );
1455 } else {
1456 # if $frame is not provided, then use old-style replaceVariables
1457 $text = $this->replaceVariables( $text );
1458 }
1459
1460 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1461 $text = Sanitizer::removeHTMLtags(
1462 $text,
1463 [ $this, 'attributeStripCallback' ],
1464 false,
1465 array_keys( $this->mTransparentTagHooks ),
1466 [],
1467 [ $this, 'addTrackingCategory' ]
1468 );
1469 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1470
1471 # Tables need to come after variable replacement for things to work
1472 # properly; putting them before other transformations should keep
1473 # exciting things like link expansions from showing up in surprising
1474 # places.
1475 $text = $this->doTableStuff( $text );
1476
1477 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1478
1479 $text = $this->doDoubleUnderscore( $text );
1480
1481 $text = $this->doHeadings( $text );
1482 $text = $this->replaceInternalLinks( $text );
1483 $text = $this->doAllQuotes( $text );
1484 $text = $this->replaceExternalLinks( $text );
1485
1486 # replaceInternalLinks may sometimes leave behind
1487 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1488 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1489
1490 $text = $this->doMagicLinks( $text );
1491 $text = $this->formatHeadings( $text, $origText, $isMain );
1492
1493 return $text;
1494 }
1495
1496 /**
1497 * Helper function for parse() that transforms half-parsed HTML into fully
1498 * parsed HTML.
1499 *
1500 * @param string $text
1501 * @param bool $isMain
1502 * @param bool $linestart
1503 * @return string
1504 */
1505 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1506 $text = $this->mStripState->unstripGeneral( $text );
1507
1508 // Avoid PHP 7.1 warning from passing $this by reference
1509 $parser = $this;
1510
1511 if ( $isMain ) {
1512 Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1513 }
1514
1515 # Clean up special characters, only run once, next-to-last before doBlockLevels
1516 $text = Sanitizer::armorFrenchSpaces( $text );
1517
1518 $text = $this->doBlockLevels( $text, $linestart );
1519
1520 $this->replaceLinkHolders( $text );
1521
1522 /**
1523 * The input doesn't get language converted if
1524 * a) It's disabled
1525 * b) Content isn't converted
1526 * c) It's a conversion table
1527 * d) it is an interface message (which is in the user language)
1528 */
1529 if ( !( $this->mOptions->getDisableContentConversion()
1530 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1531 && !$this->mOptions->getInterfaceMessage()
1532 ) {
1533 # The position of the convert() call should not be changed. it
1534 # assumes that the links are all replaced and the only thing left
1535 # is the <nowiki> mark.
1536 $text = $this->getTargetLanguage()->convert( $text );
1537 }
1538
1539 $text = $this->mStripState->unstripNoWiki( $text );
1540
1541 if ( $isMain ) {
1542 Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1543 }
1544
1545 $text = $this->replaceTransparentTags( $text );
1546 $text = $this->mStripState->unstripGeneral( $text );
1547
1548 $text = Sanitizer::normalizeCharReferences( $text );
1549
1550 if ( MWTidy::isEnabled() ) {
1551 if ( $this->mOptions->getTidy() ) {
1552 $text = MWTidy::tidy( $text );
1553 }
1554 } else {
1555 # attempt to sanitize at least some nesting problems
1556 # (T4702 and quite a few others)
1557 # This code path is buggy and deprecated!
1558 wfDeprecated( 'disabling tidy', '1.33' );
1559 $tidyregs = [
1560 # ''Something [http://www.cool.com cool''] -->
1561 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1562 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1563 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1564 # fix up an anchor inside another anchor, only
1565 # at least for a single single nested link (T5695)
1566 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1567 '\\1\\2</a>\\3</a>\\1\\4</a>',
1568 # fix div inside inline elements- doBlockLevels won't wrap a line which
1569 # contains a div, so fix it up here; replace
1570 # div with escaped text
1571 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1572 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1573 # remove empty italic or bold tag pairs, some
1574 # introduced by rules above
1575 '/<([bi])><\/\\1>/' => '',
1576 ];
1577
1578 $text = preg_replace(
1579 array_keys( $tidyregs ),
1580 array_values( $tidyregs ),
1581 $text );
1582 }
1583
1584 if ( $isMain ) {
1585 Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1586 }
1587
1588 return $text;
1589 }
1590
1591 /**
1592 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1593 * magic external links.
1594 *
1595 * DML
1596 * @private
1597 *
1598 * @param string $text
1599 *
1600 * @return string
1601 */
1602 public function doMagicLinks( $text ) {
1603 $prots = wfUrlProtocolsWithoutProtRel();
1604 $urlChar = self::EXT_LINK_URL_CLASS;
1605 $addr = self::EXT_LINK_ADDR;
1606 $space = self::SPACE_NOT_NL; # non-newline space
1607 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1608 $spaces = "$space++"; # possessive match of 1 or more spaces
1609 $text = preg_replace_callback(
1610 '!(?: # Start cases
1611 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1612 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1613 (\b # m[3]: Free external links
1614 (?i:$prots)
1615 ($addr$urlChar*) # m[4]: Post-protocol path
1616 ) |
1617 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1618 ([0-9]+)\b |
1619 \bISBN $spaces ( # m[6]: ISBN, capture number
1620 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1621 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1622 [0-9Xx] # check digit
1623 )\b
1624 )!xu", [ $this, 'magicLinkCallback' ], $text );
1625 return $text;
1626 }
1627
1628 /**
1629 * @throws MWException
1630 * @param array $m
1631 * @return string HTML
1632 */
1633 public function magicLinkCallback( $m ) {
1634 if ( isset( $m[1] ) && $m[1] !== '' ) {
1635 # Skip anchor
1636 return $m[0];
1637 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1638 # Skip HTML element
1639 return $m[0];
1640 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1641 # Free external link
1642 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1643 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1644 # RFC or PMID
1645 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1646 if ( !$this->mOptions->getMagicRFCLinks() ) {
1647 return $m[0];
1648 }
1649 $keyword = 'RFC';
1650 $urlmsg = 'rfcurl';
1651 $cssClass = 'mw-magiclink-rfc';
1652 $trackingCat = 'magiclink-tracking-rfc';
1653 $id = $m[5];
1654 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1655 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1656 return $m[0];
1657 }
1658 $keyword = 'PMID';
1659 $urlmsg = 'pubmedurl';
1660 $cssClass = 'mw-magiclink-pmid';
1661 $trackingCat = 'magiclink-tracking-pmid';
1662 $id = $m[5];
1663 } else {
1664 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1665 substr( $m[0], 0, 20 ) . '"' );
1666 }
1667 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1668 $this->addTrackingCategory( $trackingCat );
1669 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1670 } elseif ( isset( $m[6] ) && $m[6] !== ''
1671 && $this->mOptions->getMagicISBNLinks()
1672 ) {
1673 # ISBN
1674 $isbn = $m[6];
1675 $space = self::SPACE_NOT_NL; # non-newline space
1676 $isbn = preg_replace( "/$space/", ' ', $isbn );
1677 $num = strtr( $isbn, [
1678 '-' => '',
1679 ' ' => '',
1680 'x' => 'X',
1681 ] );
1682 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1683 return $this->getLinkRenderer()->makeKnownLink(
1684 SpecialPage::getTitleFor( 'Booksources', $num ),
1685 "ISBN $isbn",
1686 [
1687 'class' => 'internal mw-magiclink-isbn',
1688 'title' => false // suppress title attribute
1689 ]
1690 );
1691 } else {
1692 return $m[0];
1693 }
1694 }
1695
1696 /**
1697 * Make a free external link, given a user-supplied URL
1698 *
1699 * @param string $url
1700 * @param int $numPostProto
1701 * The number of characters after the protocol.
1702 * @return string HTML
1703 * @private
1704 */
1705 public function makeFreeExternalLink( $url, $numPostProto ) {
1706 $trail = '';
1707
1708 # The characters '<' and '>' (which were escaped by
1709 # removeHTMLtags()) should not be included in
1710 # URLs, per RFC 2396.
1711 # Make &nbsp; terminate a URL as well (bug T84937)
1712 $m2 = [];
1713 if ( preg_match(
1714 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1715 $url,
1716 $m2,
1717 PREG_OFFSET_CAPTURE
1718 ) ) {
1719 $trail = substr( $url, $m2[0][1] ) . $trail;
1720 $url = substr( $url, 0, $m2[0][1] );
1721 }
1722
1723 # Move trailing punctuation to $trail
1724 $sep = ',;\.:!?';
1725 # If there is no left bracket, then consider right brackets fair game too
1726 if ( strpos( $url, '(' ) === false ) {
1727 $sep .= ')';
1728 }
1729
1730 $urlRev = strrev( $url );
1731 $numSepChars = strspn( $urlRev, $sep );
1732 # Don't break a trailing HTML entity by moving the ; into $trail
1733 # This is in hot code, so use substr_compare to avoid having to
1734 # create a new string object for the comparison
1735 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1736 # more optimization: instead of running preg_match with a $
1737 # anchor, which can be slow, do the match on the reversed
1738 # string starting at the desired offset.
1739 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1740 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1741 $numSepChars--;
1742 }
1743 }
1744 if ( $numSepChars ) {
1745 $trail = substr( $url, -$numSepChars ) . $trail;
1746 $url = substr( $url, 0, -$numSepChars );
1747 }
1748
1749 # Verify that we still have a real URL after trail removal, and
1750 # not just lone protocol
1751 if ( strlen( $trail ) >= $numPostProto ) {
1752 return $url . $trail;
1753 }
1754
1755 $url = Sanitizer::cleanUrl( $url );
1756
1757 # Is this an external image?
1758 $text = $this->maybeMakeExternalImage( $url );
1759 if ( $text === false ) {
1760 # Not an image, make a link
1761 $text = Linker::makeExternalLink( $url,
1762 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1763 true, 'free',
1764 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1765 # Register it in the output object...
1766 $this->mOutput->addExternalLink( $url );
1767 }
1768 return $text . $trail;
1769 }
1770
1771 /**
1772 * Parse headers and return html
1773 *
1774 * @private
1775 *
1776 * @param string $text
1777 *
1778 * @return string
1779 */
1780 public function doHeadings( $text ) {
1781 for ( $i = 6; $i >= 1; --$i ) {
1782 $h = str_repeat( '=', $i );
1783 // Trim non-newline whitespace from headings
1784 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1785 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1786 }
1787 return $text;
1788 }
1789
1790 /**
1791 * Replace single quotes with HTML markup
1792 * @private
1793 *
1794 * @param string $text
1795 *
1796 * @return string The altered text
1797 */
1798 public function doAllQuotes( $text ) {
1799 $outtext = '';
1800 $lines = StringUtils::explode( "\n", $text );
1801 foreach ( $lines as $line ) {
1802 $outtext .= $this->doQuotes( $line ) . "\n";
1803 }
1804 $outtext = substr( $outtext, 0, -1 );
1805 return $outtext;
1806 }
1807
1808 /**
1809 * Helper function for doAllQuotes()
1810 *
1811 * @param string $text
1812 *
1813 * @return string
1814 */
1815 public function doQuotes( $text ) {
1816 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1817 $countarr = count( $arr );
1818 if ( $countarr == 1 ) {
1819 return $text;
1820 }
1821
1822 // First, do some preliminary work. This may shift some apostrophes from
1823 // being mark-up to being text. It also counts the number of occurrences
1824 // of bold and italics mark-ups.
1825 $numbold = 0;
1826 $numitalics = 0;
1827 for ( $i = 1; $i < $countarr; $i += 2 ) {
1828 $thislen = strlen( $arr[$i] );
1829 // If there are ever four apostrophes, assume the first is supposed to
1830 // be text, and the remaining three constitute mark-up for bold text.
1831 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1832 if ( $thislen == 4 ) {
1833 $arr[$i - 1] .= "'";
1834 $arr[$i] = "'''";
1835 $thislen = 3;
1836 } elseif ( $thislen > 5 ) {
1837 // If there are more than 5 apostrophes in a row, assume they're all
1838 // text except for the last 5.
1839 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1840 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1841 $arr[$i] = "'''''";
1842 $thislen = 5;
1843 }
1844 // Count the number of occurrences of bold and italics mark-ups.
1845 if ( $thislen == 2 ) {
1846 $numitalics++;
1847 } elseif ( $thislen == 3 ) {
1848 $numbold++;
1849 } elseif ( $thislen == 5 ) {
1850 $numitalics++;
1851 $numbold++;
1852 }
1853 }
1854
1855 // If there is an odd number of both bold and italics, it is likely
1856 // that one of the bold ones was meant to be an apostrophe followed
1857 // by italics. Which one we cannot know for certain, but it is more
1858 // likely to be one that has a single-letter word before it.
1859 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1860 $firstsingleletterword = -1;
1861 $firstmultiletterword = -1;
1862 $firstspace = -1;
1863 for ( $i = 1; $i < $countarr; $i += 2 ) {
1864 if ( strlen( $arr[$i] ) == 3 ) {
1865 $x1 = substr( $arr[$i - 1], -1 );
1866 $x2 = substr( $arr[$i - 1], -2, 1 );
1867 if ( $x1 === ' ' ) {
1868 if ( $firstspace == -1 ) {
1869 $firstspace = $i;
1870 }
1871 } elseif ( $x2 === ' ' ) {
1872 $firstsingleletterword = $i;
1873 // if $firstsingleletterword is set, we don't
1874 // look at the other options, so we can bail early.
1875 break;
1876 } elseif ( $firstmultiletterword == -1 ) {
1877 $firstmultiletterword = $i;
1878 }
1879 }
1880 }
1881
1882 // If there is a single-letter word, use it!
1883 if ( $firstsingleletterword > -1 ) {
1884 $arr[$firstsingleletterword] = "''";
1885 $arr[$firstsingleletterword - 1] .= "'";
1886 } elseif ( $firstmultiletterword > -1 ) {
1887 // If not, but there's a multi-letter word, use that one.
1888 $arr[$firstmultiletterword] = "''";
1889 $arr[$firstmultiletterword - 1] .= "'";
1890 } elseif ( $firstspace > -1 ) {
1891 // ... otherwise use the first one that has neither.
1892 // (notice that it is possible for all three to be -1 if, for example,
1893 // there is only one pentuple-apostrophe in the line)
1894 $arr[$firstspace] = "''";
1895 $arr[$firstspace - 1] .= "'";
1896 }
1897 }
1898
1899 // Now let's actually convert our apostrophic mush to HTML!
1900 $output = '';
1901 $buffer = '';
1902 $state = '';
1903 $i = 0;
1904 foreach ( $arr as $r ) {
1905 if ( ( $i % 2 ) == 0 ) {
1906 if ( $state === 'both' ) {
1907 $buffer .= $r;
1908 } else {
1909 $output .= $r;
1910 }
1911 } else {
1912 $thislen = strlen( $r );
1913 if ( $thislen == 2 ) {
1914 if ( $state === 'i' ) {
1915 $output .= '</i>';
1916 $state = '';
1917 } elseif ( $state === 'bi' ) {
1918 $output .= '</i>';
1919 $state = 'b';
1920 } elseif ( $state === 'ib' ) {
1921 $output .= '</b></i><b>';
1922 $state = 'b';
1923 } elseif ( $state === 'both' ) {
1924 $output .= '<b><i>' . $buffer . '</i>';
1925 $state = 'b';
1926 } else { // $state can be 'b' or ''
1927 $output .= '<i>';
1928 $state .= 'i';
1929 }
1930 } elseif ( $thislen == 3 ) {
1931 if ( $state === 'b' ) {
1932 $output .= '</b>';
1933 $state = '';
1934 } elseif ( $state === 'bi' ) {
1935 $output .= '</i></b><i>';
1936 $state = 'i';
1937 } elseif ( $state === 'ib' ) {
1938 $output .= '</b>';
1939 $state = 'i';
1940 } elseif ( $state === 'both' ) {
1941 $output .= '<i><b>' . $buffer . '</b>';
1942 $state = 'i';
1943 } else { // $state can be 'i' or ''
1944 $output .= '<b>';
1945 $state .= 'b';
1946 }
1947 } elseif ( $thislen == 5 ) {
1948 if ( $state === 'b' ) {
1949 $output .= '</b><i>';
1950 $state = 'i';
1951 } elseif ( $state === 'i' ) {
1952 $output .= '</i><b>';
1953 $state = 'b';
1954 } elseif ( $state === 'bi' ) {
1955 $output .= '</i></b>';
1956 $state = '';
1957 } elseif ( $state === 'ib' ) {
1958 $output .= '</b></i>';
1959 $state = '';
1960 } elseif ( $state === 'both' ) {
1961 $output .= '<i><b>' . $buffer . '</b></i>';
1962 $state = '';
1963 } else { // ($state == '')
1964 $buffer = '';
1965 $state = 'both';
1966 }
1967 }
1968 }
1969 $i++;
1970 }
1971 // Now close all remaining tags. Notice that the order is important.
1972 if ( $state === 'b' || $state === 'ib' ) {
1973 $output .= '</b>';
1974 }
1975 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1976 $output .= '</i>';
1977 }
1978 if ( $state === 'bi' ) {
1979 $output .= '</b>';
1980 }
1981 // There might be lonely ''''', so make sure we have a buffer
1982 if ( $state === 'both' && $buffer ) {
1983 $output .= '<b><i>' . $buffer . '</i></b>';
1984 }
1985 return $output;
1986 }
1987
1988 /**
1989 * Replace external links (REL)
1990 *
1991 * Note: this is all very hackish and the order of execution matters a lot.
1992 * Make sure to run tests/parser/parserTests.php if you change this code.
1993 *
1994 * @private
1995 *
1996 * @param string $text
1997 *
1998 * @throws MWException
1999 * @return string
2000 */
2001 public function replaceExternalLinks( $text ) {
2002 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2003 if ( $bits === false ) {
2004 throw new MWException( "PCRE needs to be compiled with "
2005 . "--enable-unicode-properties in order for MediaWiki to function" );
2006 }
2007 $s = array_shift( $bits );
2008
2009 $i = 0;
2010 while ( $i < count( $bits ) ) {
2011 $url = $bits[$i++];
2012 $i++; // protocol
2013 $text = $bits[$i++];
2014 $trail = $bits[$i++];
2015
2016 # The characters '<' and '>' (which were escaped by
2017 # removeHTMLtags()) should not be included in
2018 # URLs, per RFC 2396.
2019 $m2 = [];
2020 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2021 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2022 $url = substr( $url, 0, $m2[0][1] );
2023 }
2024
2025 # If the link text is an image URL, replace it with an <img> tag
2026 # This happened by accident in the original parser, but some people used it extensively
2027 $img = $this->maybeMakeExternalImage( $text );
2028 if ( $img !== false ) {
2029 $text = $img;
2030 }
2031
2032 $dtrail = '';
2033
2034 # Set linktype for CSS
2035 $linktype = 'text';
2036
2037 # No link text, e.g. [http://domain.tld/some.link]
2038 if ( $text == '' ) {
2039 # Autonumber
2040 $langObj = $this->getTargetLanguage();
2041 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2042 $linktype = 'autonumber';
2043 } else {
2044 # Have link text, e.g. [http://domain.tld/some.link text]s
2045 # Check for trail
2046 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2047 }
2048
2049 // Excluding protocol-relative URLs may avoid many false positives.
2050 if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2051 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2052 }
2053
2054 $url = Sanitizer::cleanUrl( $url );
2055
2056 # Use the encoded URL
2057 # This means that users can paste URLs directly into the text
2058 # Funny characters like ö aren't valid in URLs anyway
2059 # This was changed in August 2004
2060 $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2061 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2062
2063 # Register link in the output object.
2064 $this->mOutput->addExternalLink( $url );
2065 }
2066
2067 return $s;
2068 }
2069
2070 /**
2071 * Get the rel attribute for a particular external link.
2072 *
2073 * @since 1.21
2074 * @param string|bool $url Optional URL, to extract the domain from for rel =>
2075 * nofollow if appropriate
2076 * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2077 * @return string|null Rel attribute for $url
2078 */
2079 public static function getExternalLinkRel( $url = false, $title = null ) {
2080 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2081 $ns = $title ? $title->getNamespace() : false;
2082 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2083 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2084 ) {
2085 return 'nofollow';
2086 }
2087 return null;
2088 }
2089
2090 /**
2091 * Get an associative array of additional HTML attributes appropriate for a
2092 * particular external link. This currently may include rel => nofollow
2093 * (depending on configuration, namespace, and the URL's domain) and/or a
2094 * target attribute (depending on configuration).
2095 *
2096 * @param string $url URL to extract the domain from for rel =>
2097 * nofollow if appropriate
2098 * @return array Associative array of HTML attributes
2099 */
2100 public function getExternalLinkAttribs( $url ) {
2101 $attribs = [];
2102 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2103
2104 $target = $this->mOptions->getExternalLinkTarget();
2105 if ( $target ) {
2106 $attribs['target'] = $target;
2107 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2108 // T133507. New windows can navigate parent cross-origin.
2109 // Including noreferrer due to lacking browser
2110 // support of noopener. Eventually noreferrer should be removed.
2111 if ( $rel !== '' ) {
2112 $rel .= ' ';
2113 }
2114 $rel .= 'noreferrer noopener';
2115 }
2116 }
2117 $attribs['rel'] = $rel;
2118 return $attribs;
2119 }
2120
2121 /**
2122 * Replace unusual escape codes in a URL with their equivalent characters
2123 *
2124 * This generally follows the syntax defined in RFC 3986, with special
2125 * consideration for HTTP query strings.
2126 *
2127 * @param string $url
2128 * @return string
2129 */
2130 public static function normalizeLinkUrl( $url ) {
2131 # Test for RFC 3986 IPv6 syntax
2132 $scheme = '[a-z][a-z0-9+.-]*:';
2133 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2134 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2135 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2136 IP::isValid( rawurldecode( $m[1] ) )
2137 ) {
2138 $isIPv6 = rawurldecode( $m[1] );
2139 } else {
2140 $isIPv6 = false;
2141 }
2142
2143 # Make sure unsafe characters are encoded
2144 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2145 function ( $m ) {
2146 return rawurlencode( $m[0] );
2147 },
2148 $url
2149 );
2150
2151 $ret = '';
2152 $end = strlen( $url );
2153
2154 # Fragment part - 'fragment'
2155 $start = strpos( $url, '#' );
2156 if ( $start !== false && $start < $end ) {
2157 $ret = self::normalizeUrlComponent(
2158 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2159 $end = $start;
2160 }
2161
2162 # Query part - 'query' minus &=+;
2163 $start = strpos( $url, '?' );
2164 if ( $start !== false && $start < $end ) {
2165 $ret = self::normalizeUrlComponent(
2166 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2167 $end = $start;
2168 }
2169
2170 # Scheme and path part - 'pchar'
2171 # (we assume no userinfo or encoded colons in the host)
2172 $ret = self::normalizeUrlComponent(
2173 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2174
2175 # Fix IPv6 syntax
2176 if ( $isIPv6 !== false ) {
2177 $ipv6Host = "%5B({$isIPv6})%5D";
2178 $ret = preg_replace(
2179 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2180 "$1[$2]",
2181 $ret
2182 );
2183 }
2184
2185 return $ret;
2186 }
2187
2188 private static function normalizeUrlComponent( $component, $unsafe ) {
2189 $callback = function ( $matches ) use ( $unsafe ) {
2190 $char = urldecode( $matches[0] );
2191 $ord = ord( $char );
2192 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2193 # Unescape it
2194 return $char;
2195 } else {
2196 # Leave it escaped, but use uppercase for a-f
2197 return strtoupper( $matches[0] );
2198 }
2199 };
2200 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2201 }
2202
2203 /**
2204 * make an image if it's allowed, either through the global
2205 * option, through the exception, or through the on-wiki whitelist
2206 *
2207 * @param string $url
2208 *
2209 * @return string
2210 */
2211 private function maybeMakeExternalImage( $url ) {
2212 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2213 $imagesexception = !empty( $imagesfrom );
2214 $text = false;
2215 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2216 if ( $imagesexception && is_array( $imagesfrom ) ) {
2217 $imagematch = false;
2218 foreach ( $imagesfrom as $match ) {
2219 if ( strpos( $url, $match ) === 0 ) {
2220 $imagematch = true;
2221 break;
2222 }
2223 }
2224 } elseif ( $imagesexception ) {
2225 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2226 } else {
2227 $imagematch = false;
2228 }
2229
2230 if ( $this->mOptions->getAllowExternalImages()
2231 || ( $imagesexception && $imagematch )
2232 ) {
2233 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2234 # Image found
2235 $text = Linker::makeExternalImage( $url );
2236 }
2237 }
2238 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2239 && preg_match( self::EXT_IMAGE_REGEX, $url )
2240 ) {
2241 $whitelist = explode(
2242 "\n",
2243 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2244 );
2245
2246 foreach ( $whitelist as $entry ) {
2247 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2248 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2249 continue;
2250 }
2251 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2252 # Image matches a whitelist entry
2253 $text = Linker::makeExternalImage( $url );
2254 break;
2255 }
2256 }
2257 }
2258 return $text;
2259 }
2260
2261 /**
2262 * Process [[ ]] wikilinks
2263 *
2264 * @param string $s
2265 *
2266 * @return string Processed text
2267 *
2268 * @private
2269 */
2270 public function replaceInternalLinks( $s ) {
2271 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2272 return $s;
2273 }
2274
2275 /**
2276 * Process [[ ]] wikilinks (RIL)
2277 * @param string &$s
2278 * @throws MWException
2279 * @return LinkHolderArray
2280 *
2281 * @private
2282 */
2283 public function replaceInternalLinks2( &$s ) {
2284 static $tc = false, $e1, $e1_img;
2285 # the % is needed to support urlencoded titles as well
2286 if ( !$tc ) {
2287 $tc = Title::legalChars() . '#%';
2288 # Match a link having the form [[namespace:link|alternate]]trail
2289 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2290 # Match cases where there is no "]]", which might still be images
2291 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2292 }
2293
2294 $holders = new LinkHolderArray( $this );
2295
2296 # split the entire text string on occurrences of [[
2297 $a = StringUtils::explode( '[[', ' ' . $s );
2298 # get the first element (all text up to first [[), and remove the space we added
2299 $s = $a->current();
2300 $a->next();
2301 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2302 $s = substr( $s, 1 );
2303
2304 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2305 $e2 = null;
2306 if ( $useLinkPrefixExtension ) {
2307 # Match the end of a line for a word that's not followed by whitespace,
2308 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2309 $charset = $this->contLang->linkPrefixCharset();
2310 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2311 }
2312
2313 if ( is_null( $this->mTitle ) ) {
2314 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2315 }
2316 $nottalk = !$this->mTitle->isTalkPage();
2317
2318 if ( $useLinkPrefixExtension ) {
2319 $m = [];
2320 if ( preg_match( $e2, $s, $m ) ) {
2321 $first_prefix = $m[2];
2322 } else {
2323 $first_prefix = false;
2324 }
2325 } else {
2326 $prefix = '';
2327 }
2328
2329 $useSubpages = $this->areSubpagesAllowed();
2330
2331 # Loop for each link
2332 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2333 # Check for excessive memory usage
2334 if ( $holders->isBig() ) {
2335 # Too big
2336 # Do the existence check, replace the link holders and clear the array
2337 $holders->replace( $s );
2338 $holders->clear();
2339 }
2340
2341 if ( $useLinkPrefixExtension ) {
2342 if ( preg_match( $e2, $s, $m ) ) {
2343 list( , $s, $prefix ) = $m;
2344 } else {
2345 $prefix = '';
2346 }
2347 # first link
2348 if ( $first_prefix ) {
2349 $prefix = $first_prefix;
2350 $first_prefix = false;
2351 }
2352 }
2353
2354 $might_be_img = false;
2355
2356 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2357 $text = $m[2];
2358 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2359 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2360 # the real problem is with the $e1 regex
2361 # See T1500.
2362 # Still some problems for cases where the ] is meant to be outside punctuation,
2363 # and no image is in sight. See T4095.
2364 if ( $text !== ''
2365 && substr( $m[3], 0, 1 ) === ']'
2366 && strpos( $text, '[' ) !== false
2367 ) {
2368 $text .= ']'; # so that replaceExternalLinks($text) works later
2369 $m[3] = substr( $m[3], 1 );
2370 }
2371 # fix up urlencoded title texts
2372 if ( strpos( $m[1], '%' ) !== false ) {
2373 # Should anchors '#' also be rejected?
2374 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2375 }
2376 $trail = $m[3];
2377 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2378 # Invalid, but might be an image with a link in its caption
2379 $might_be_img = true;
2380 $text = $m[2];
2381 if ( strpos( $m[1], '%' ) !== false ) {
2382 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2383 }
2384 $trail = "";
2385 } else { # Invalid form; output directly
2386 $s .= $prefix . '[[' . $line;
2387 continue;
2388 }
2389
2390 $origLink = ltrim( $m[1], ' ' );
2391
2392 # Don't allow internal links to pages containing
2393 # PROTO: where PROTO is a valid URL protocol; these
2394 # should be external links.
2395 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2396 $s .= $prefix . '[[' . $line;
2397 continue;
2398 }
2399
2400 # Make subpage if necessary
2401 if ( $useSubpages ) {
2402 $link = $this->maybeDoSubpageLink( $origLink, $text );
2403 } else {
2404 $link = $origLink;
2405 }
2406
2407 // \x7f isn't a default legal title char, so most likely strip
2408 // markers will force us into the "invalid form" path above. But,
2409 // just in case, let's assert that xmlish tags aren't valid in
2410 // the title position.
2411 $unstrip = $this->mStripState->killMarkers( $link );
2412 $noMarkers = ( $unstrip === $link );
2413
2414 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2415 if ( $nt === null ) {
2416 $s .= $prefix . '[[' . $line;
2417 continue;
2418 }
2419
2420 $ns = $nt->getNamespace();
2421 $iw = $nt->getInterwiki();
2422
2423 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2424
2425 if ( $might_be_img ) { # if this is actually an invalid link
2426 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2427 $found = false;
2428 while ( true ) {
2429 # look at the next 'line' to see if we can close it there
2430 $a->next();
2431 $next_line = $a->current();
2432 if ( $next_line === false || $next_line === null ) {
2433 break;
2434 }
2435 $m = explode( ']]', $next_line, 3 );
2436 if ( count( $m ) == 3 ) {
2437 # the first ]] closes the inner link, the second the image
2438 $found = true;
2439 $text .= "[[{$m[0]}]]{$m[1]}";
2440 $trail = $m[2];
2441 break;
2442 } elseif ( count( $m ) == 2 ) {
2443 # if there's exactly one ]] that's fine, we'll keep looking
2444 $text .= "[[{$m[0]}]]{$m[1]}";
2445 } else {
2446 # if $next_line is invalid too, we need look no further
2447 $text .= '[[' . $next_line;
2448 break;
2449 }
2450 }
2451 if ( !$found ) {
2452 # we couldn't find the end of this imageLink, so output it raw
2453 # but don't ignore what might be perfectly normal links in the text we've examined
2454 $holders->merge( $this->replaceInternalLinks2( $text ) );
2455 $s .= "{$prefix}[[$link|$text";
2456 # note: no $trail, because without an end, there *is* no trail
2457 continue;
2458 }
2459 } else { # it's not an image, so output it raw
2460 $s .= "{$prefix}[[$link|$text";
2461 # note: no $trail, because without an end, there *is* no trail
2462 continue;
2463 }
2464 }
2465
2466 $wasblank = ( $text == '' );
2467 if ( $wasblank ) {
2468 $text = $link;
2469 if ( !$noforce ) {
2470 # Strip off leading ':'
2471 $text = substr( $text, 1 );
2472 }
2473 } else {
2474 # T6598 madness. Handle the quotes only if they come from the alternate part
2475 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2476 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2477 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2478 $text = $this->doQuotes( $text );
2479 }
2480
2481 # Link not escaped by : , create the various objects
2482 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2483 # Interwikis
2484 if (
2485 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2486 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2487 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2488 )
2489 ) {
2490 # T26502: filter duplicates
2491 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2492 $this->mLangLinkLanguages[$iw] = true;
2493 $this->mOutput->addLanguageLink( $nt->getFullText() );
2494 }
2495
2496 /**
2497 * Strip the whitespace interwiki links produce, see T10897
2498 */
2499 $s = rtrim( $s . $prefix ) . $trail; # T175416
2500 continue;
2501 }
2502
2503 if ( $ns == NS_FILE ) {
2504 if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2505 if ( $wasblank ) {
2506 # if no parameters were passed, $text
2507 # becomes something like "File:Foo.png",
2508 # which we don't want to pass on to the
2509 # image generator
2510 $text = '';
2511 } else {
2512 # recursively parse links inside the image caption
2513 # actually, this will parse them in any other parameters, too,
2514 # but it might be hard to fix that, and it doesn't matter ATM
2515 $text = $this->replaceExternalLinks( $text );
2516 $holders->merge( $this->replaceInternalLinks2( $text ) );
2517 }
2518 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2519 $s .= $prefix . $this->armorLinks(
2520 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2521 continue;
2522 }
2523 } elseif ( $ns == NS_CATEGORY ) {
2524 /**
2525 * Strip the whitespace Category links produce, see T2087
2526 */
2527 $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2528
2529 if ( $wasblank ) {
2530 $sortkey = $this->getDefaultSort();
2531 } else {
2532 $sortkey = $text;
2533 }
2534 $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2535 $sortkey = str_replace( "\n", '', $sortkey );
2536 $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2537 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2538
2539 continue;
2540 }
2541 }
2542
2543 # Self-link checking. For some languages, variants of the title are checked in
2544 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2545 # for linking to a different variant.
2546 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2547 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2548 continue;
2549 }
2550
2551 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2552 # @todo FIXME: Should do batch file existence checks, see comment below
2553 if ( $ns == NS_MEDIA ) {
2554 # Give extensions a chance to select the file revision for us
2555 $options = [];
2556 $descQuery = false;
2557 Hooks::run( 'BeforeParserFetchFileAndTitle',
2558 [ $this, $nt, &$options, &$descQuery ] );
2559 # Fetch and register the file (file title may be different via hooks)
2560 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2561 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2562 $s .= $prefix . $this->armorLinks(
2563 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2564 continue;
2565 }
2566
2567 # Some titles, such as valid special pages or files in foreign repos, should
2568 # be shown as bluelinks even though they're not included in the page table
2569 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2570 # batch file existence checks for NS_FILE and NS_MEDIA
2571 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2572 $this->mOutput->addLink( $nt );
2573 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2574 } else {
2575 # Links will be added to the output link list after checking
2576 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2577 }
2578 }
2579 return $holders;
2580 }
2581
2582 /**
2583 * Render a forced-blue link inline; protect against double expansion of
2584 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2585 * Since this little disaster has to split off the trail text to avoid
2586 * breaking URLs in the following text without breaking trails on the
2587 * wiki links, it's been made into a horrible function.
2588 *
2589 * @param Title $nt
2590 * @param string $text
2591 * @param string $trail
2592 * @param string $prefix
2593 * @return string HTML-wikitext mix oh yuck
2594 */
2595 protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2596 list( $inside, $trail ) = Linker::splitTrail( $trail );
2597
2598 if ( $text == '' ) {
2599 $text = htmlspecialchars( $nt->getPrefixedText() );
2600 }
2601
2602 $link = $this->getLinkRenderer()->makeKnownLink(
2603 $nt, new HtmlArmor( "$prefix$text$inside" )
2604 );
2605
2606 return $this->armorLinks( $link ) . $trail;
2607 }
2608
2609 /**
2610 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2611 * going to go through further parsing steps before inline URL expansion.
2612 *
2613 * Not needed quite as much as it used to be since free links are a bit
2614 * more sensible these days. But bracketed links are still an issue.
2615 *
2616 * @param string $text More-or-less HTML
2617 * @return string Less-or-more HTML with NOPARSE bits
2618 */
2619 public function armorLinks( $text ) {
2620 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2621 self::MARKER_PREFIX . "NOPARSE$1", $text );
2622 }
2623
2624 /**
2625 * Return true if subpage links should be expanded on this page.
2626 * @return bool
2627 */
2628 public function areSubpagesAllowed() {
2629 # Some namespaces don't allow subpages
2630 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2631 }
2632
2633 /**
2634 * Handle link to subpage if necessary
2635 *
2636 * @param string $target The source of the link
2637 * @param string &$text The link text, modified as necessary
2638 * @return string The full name of the link
2639 * @private
2640 */
2641 public function maybeDoSubpageLink( $target, &$text ) {
2642 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2643 }
2644
2645 /**
2646 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2647 *
2648 * @param string $text
2649 * @param bool $linestart Whether or not this is at the start of a line.
2650 * @private
2651 * @return string The lists rendered as HTML
2652 */
2653 public function doBlockLevels( $text, $linestart ) {
2654 return BlockLevelPass::doBlockLevels( $text, $linestart );
2655 }
2656
2657 /**
2658 * Return value of a magic variable (like PAGENAME)
2659 *
2660 * @private
2661 *
2662 * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2663 * @param bool|PPFrame $frame
2664 *
2665 * @throws MWException
2666 * @return string
2667 */
2668 public function getVariableValue( $index, $frame = false ) {
2669 if ( is_null( $this->mTitle ) ) {
2670 // If no title set, bad things are going to happen
2671 // later. Title should always be set since this
2672 // should only be called in the middle of a parse
2673 // operation (but the unit-tests do funky stuff)
2674 throw new MWException( __METHOD__ . ' Should only be '
2675 . ' called while parsing (no title set)' );
2676 }
2677
2678 // Avoid PHP 7.1 warning from passing $this by reference
2679 $parser = $this;
2680
2681 /**
2682 * Some of these require message or data lookups and can be
2683 * expensive to check many times.
2684 */
2685 if (
2686 Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2687 isset( $this->mVarCache[$index] )
2688 ) {
2689 return $this->mVarCache[$index];
2690 }
2691
2692 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2693 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2694
2695 $pageLang = $this->getFunctionLang();
2696
2697 switch ( $index ) {
2698 case '!':
2699 $value = '|';
2700 break;
2701 case 'currentmonth':
2702 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2703 break;
2704 case 'currentmonth1':
2705 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2706 break;
2707 case 'currentmonthname':
2708 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2709 break;
2710 case 'currentmonthnamegen':
2711 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2712 break;
2713 case 'currentmonthabbrev':
2714 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2715 break;
2716 case 'currentday':
2717 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2718 break;
2719 case 'currentday2':
2720 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2721 break;
2722 case 'localmonth':
2723 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2724 break;
2725 case 'localmonth1':
2726 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2727 break;
2728 case 'localmonthname':
2729 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2730 break;
2731 case 'localmonthnamegen':
2732 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2733 break;
2734 case 'localmonthabbrev':
2735 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2736 break;
2737 case 'localday':
2738 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2739 break;
2740 case 'localday2':
2741 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2742 break;
2743 case 'pagename':
2744 $value = wfEscapeWikiText( $this->mTitle->getText() );
2745 break;
2746 case 'pagenamee':
2747 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2748 break;
2749 case 'fullpagename':
2750 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2751 break;
2752 case 'fullpagenamee':
2753 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2754 break;
2755 case 'subpagename':
2756 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2757 break;
2758 case 'subpagenamee':
2759 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2760 break;
2761 case 'rootpagename':
2762 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2763 break;
2764 case 'rootpagenamee':
2765 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2766 ' ',
2767 '_',
2768 $this->mTitle->getRootText()
2769 ) ) );
2770 break;
2771 case 'basepagename':
2772 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2773 break;
2774 case 'basepagenamee':
2775 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2776 ' ',
2777 '_',
2778 $this->mTitle->getBaseText()
2779 ) ) );
2780 break;
2781 case 'talkpagename':
2782 if ( $this->mTitle->canHaveTalkPage() ) {
2783 $talkPage = $this->mTitle->getTalkPage();
2784 $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2785 } else {
2786 $value = '';
2787 }
2788 break;
2789 case 'talkpagenamee':
2790 if ( $this->mTitle->canHaveTalkPage() ) {
2791 $talkPage = $this->mTitle->getTalkPage();
2792 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2793 } else {
2794 $value = '';
2795 }
2796 break;
2797 case 'subjectpagename':
2798 $subjPage = $this->mTitle->getSubjectPage();
2799 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2800 break;
2801 case 'subjectpagenamee':
2802 $subjPage = $this->mTitle->getSubjectPage();
2803 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2804 break;
2805 case 'pageid': // requested in T25427
2806 # Inform the edit saving system that getting the canonical output
2807 # after page insertion requires a parse that used that exact page ID
2808 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2809 $value = $this->mTitle->getArticleID();
2810 if ( !$value ) {
2811 $value = $this->mOptions->getSpeculativePageId();
2812 if ( $value ) {
2813 $this->mOutput->setSpeculativePageIdUsed( $value );
2814 }
2815 }
2816 break;
2817 case 'revisionid':
2818 if (
2819 $this->svcOptions->get( 'MiserMode' ) &&
2820 !$this->mOptions->getInterfaceMessage() &&
2821 // @TODO: disallow this word on all namespaces
2822 $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2823 ) {
2824 // Use a stub result instead of the actual revision ID in order to avoid
2825 // double parses on page save but still allow preview detection (T137900)
2826 if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2827 $value = '-';
2828 } else {
2829 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2830 $value = '';
2831 }
2832 } else {
2833 # Inform the edit saving system that getting the canonical output after
2834 # revision insertion requires a parse that used that exact revision ID
2835 $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2836 $value = $this->getRevisionId();
2837 if ( $value === 0 ) {
2838 $rev = $this->getRevisionObject();
2839 $value = $rev ? $rev->getId() : $value;
2840 }
2841 if ( !$value ) {
2842 $value = $this->mOptions->getSpeculativeRevId();
2843 if ( $value ) {
2844 $this->mOutput->setSpeculativeRevIdUsed( $value );
2845 }
2846 }
2847 }
2848 break;
2849 case 'revisionday':
2850 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2851 break;
2852 case 'revisionday2':
2853 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2854 break;
2855 case 'revisionmonth':
2856 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2857 break;
2858 case 'revisionmonth1':
2859 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2860 break;
2861 case 'revisionyear':
2862 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2863 break;
2864 case 'revisiontimestamp':
2865 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2866 break;
2867 case 'revisionuser':
2868 # Inform the edit saving system that getting the canonical output after
2869 # revision insertion requires a parse that used the actual user ID
2870 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2871 $value = $this->getRevisionUser();
2872 break;
2873 case 'revisionsize':
2874 $value = $this->getRevisionSize();
2875 break;
2876 case 'namespace':
2877 $value = str_replace( '_', ' ',
2878 $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2879 break;
2880 case 'namespacee':
2881 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2882 break;
2883 case 'namespacenumber':
2884 $value = $this->mTitle->getNamespace();
2885 break;
2886 case 'talkspace':
2887 $value = $this->mTitle->canHaveTalkPage()
2888 ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2889 : '';
2890 break;
2891 case 'talkspacee':
2892 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2893 break;
2894 case 'subjectspace':
2895 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2896 break;
2897 case 'subjectspacee':
2898 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2899 break;
2900 case 'currentdayname':
2901 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2902 break;
2903 case 'currentyear':
2904 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2905 break;
2906 case 'currenttime':
2907 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2908 break;
2909 case 'currenthour':
2910 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2911 break;
2912 case 'currentweek':
2913 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2914 # int to remove the padding
2915 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2916 break;
2917 case 'currentdow':
2918 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2919 break;
2920 case 'localdayname':
2921 $value = $pageLang->getWeekdayName(
2922 (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2923 );
2924 break;
2925 case 'localyear':
2926 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2927 break;
2928 case 'localtime':
2929 $value = $pageLang->time(
2930 MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2931 false,
2932 false
2933 );
2934 break;
2935 case 'localhour':
2936 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2937 break;
2938 case 'localweek':
2939 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2940 # int to remove the padding
2941 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2942 break;
2943 case 'localdow':
2944 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2945 break;
2946 case 'numberofarticles':
2947 $value = $pageLang->formatNum( SiteStats::articles() );
2948 break;
2949 case 'numberoffiles':
2950 $value = $pageLang->formatNum( SiteStats::images() );
2951 break;
2952 case 'numberofusers':
2953 $value = $pageLang->formatNum( SiteStats::users() );
2954 break;
2955 case 'numberofactiveusers':
2956 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2957 break;
2958 case 'numberofpages':
2959 $value = $pageLang->formatNum( SiteStats::pages() );
2960 break;
2961 case 'numberofadmins':
2962 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2963 break;
2964 case 'numberofedits':
2965 $value = $pageLang->formatNum( SiteStats::edits() );
2966 break;
2967 case 'currenttimestamp':
2968 $value = wfTimestamp( TS_MW, $ts );
2969 break;
2970 case 'localtimestamp':
2971 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2972 break;
2973 case 'currentversion':
2974 $value = SpecialVersion::getVersion();
2975 break;
2976 case 'articlepath':
2977 return $this->svcOptions->get( 'ArticlePath' );
2978 case 'sitename':
2979 return $this->svcOptions->get( 'Sitename' );
2980 case 'server':
2981 return $this->svcOptions->get( 'Server' );
2982 case 'servername':
2983 return $this->svcOptions->get( 'ServerName' );
2984 case 'scriptpath':
2985 return $this->svcOptions->get( 'ScriptPath' );
2986 case 'stylepath':
2987 return $this->svcOptions->get( 'StylePath' );
2988 case 'directionmark':
2989 return $pageLang->getDirMark();
2990 case 'contentlanguage':
2991 return $this->svcOptions->get( 'LanguageCode' );
2992 case 'pagelanguage':
2993 $value = $pageLang->getCode();
2994 break;
2995 case 'cascadingsources':
2996 $value = CoreParserFunctions::cascadingsources( $this );
2997 break;
2998 default:
2999 $ret = null;
3000 Hooks::run(
3001 'ParserGetVariableValueSwitch',
3002 [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3003 );
3004
3005 return $ret;
3006 }
3007
3008 if ( $index ) {
3009 $this->mVarCache[$index] = $value;
3010 }
3011
3012 return $value;
3013 }
3014
3015 /**
3016 * @param int $start
3017 * @param int $len
3018 * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3019 * @param string $variable Parser variable name
3020 * @return string
3021 */
3022 private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3023 # Get the timezone-adjusted timestamp to be used for this revision
3024 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3025 # Possibly set vary-revision if there is not yet an associated revision
3026 if ( !$this->getRevisionObject() ) {
3027 # Get the timezone-adjusted timestamp $mtts seconds in the future.
3028 # This future is relative to the current time and not that of the
3029 # parser options. The rendered timestamp can be compared to that
3030 # of the timestamp specified by the parser options.
3031 $resThen = substr(
3032 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3033 $start,
3034 $len
3035 );
3036
3037 if ( $resNow !== $resThen ) {
3038 # Inform the edit saving system that getting the canonical output after
3039 # revision insertion requires a parse that used an actual revision timestamp
3040 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3041 }
3042 }
3043
3044 return $resNow;
3045 }
3046
3047 /**
3048 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3049 *
3050 * @private
3051 */
3052 public function initialiseVariables() {
3053 $variableIDs = $this->magicWordFactory->getVariableIDs();
3054 $substIDs = $this->magicWordFactory->getSubstIDs();
3055
3056 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3057 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3058 }
3059
3060 /**
3061 * Preprocess some wikitext and return the document tree.
3062 * This is the ghost of replace_variables().
3063 *
3064 * @param string $text The text to parse
3065 * @param int $flags Bitwise combination of:
3066 * - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3067 * included. Default is to assume a direct page view.
3068 *
3069 * The generated DOM tree must depend only on the input text and the flags.
3070 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3071 *
3072 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3073 * change in the DOM tree for a given text, must be passed through the section identifier
3074 * in the section edit link and thus back to extractSections().
3075 *
3076 * The output of this function is currently only cached in process memory, but a persistent
3077 * cache may be implemented at a later date which takes further advantage of these strict
3078 * dependency requirements.
3079 *
3080 * @return PPNode
3081 */
3082 public function preprocessToDom( $text, $flags = 0 ) {
3083 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3084 return $dom;
3085 }
3086
3087 /**
3088 * Return a three-element array: leading whitespace, string contents, trailing whitespace
3089 *
3090 * @param string $s
3091 *
3092 * @return array
3093 */
3094 public static function splitWhitespace( $s ) {
3095 $ltrimmed = ltrim( $s );
3096 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3097 $trimmed = rtrim( $ltrimmed );
3098 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3099 if ( $diff > 0 ) {
3100 $w2 = substr( $ltrimmed, -$diff );
3101 } else {
3102 $w2 = '';
3103 }
3104 return [ $w1, $trimmed, $w2 ];
3105 }
3106
3107 /**
3108 * Replace magic variables, templates, and template arguments
3109 * with the appropriate text. Templates are substituted recursively,
3110 * taking care to avoid infinite loops.
3111 *
3112 * Note that the substitution depends on value of $mOutputType:
3113 * self::OT_WIKI: only {{subst:}} templates
3114 * self::OT_PREPROCESS: templates but not extension tags
3115 * self::OT_HTML: all templates and extension tags
3116 *
3117 * @param string $text The text to transform
3118 * @param false|PPFrame|array $frame Object describing the arguments passed to the
3119 * template. Arguments may also be provided as an associative array, as
3120 * was the usual case before MW1.12. Providing arguments this way may be
3121 * useful for extensions wishing to perform variable replacement
3122 * explicitly.
3123 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3124 * double-brace expansion.
3125 * @return string
3126 */
3127 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3128 # Is there any text? Also, Prevent too big inclusions!
3129 $textSize = strlen( $text );
3130 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3131 return $text;
3132 }
3133
3134 if ( $frame === false ) {
3135 $frame = $this->getPreprocessor()->newFrame();
3136 } elseif ( !( $frame instanceof PPFrame ) ) {
3137 $this->logger->debug(
3138 __METHOD__ . " called using plain parameters instead of " .
3139 "a PPFrame instance. Creating custom frame."
3140 );
3141 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3142 }
3143
3144 $dom = $this->preprocessToDom( $text );
3145 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3146 $text = $frame->expand( $dom, $flags );
3147
3148 return $text;
3149 }
3150
3151 /**
3152 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3153 *
3154 * @param array $args
3155 *
3156 * @return array
3157 */
3158 public static function createAssocArgs( $args ) {
3159 $assocArgs = [];
3160 $index = 1;
3161 foreach ( $args as $arg ) {
3162 $eqpos = strpos( $arg, '=' );
3163 if ( $eqpos === false ) {
3164 $assocArgs[$index++] = $arg;
3165 } else {
3166 $name = trim( substr( $arg, 0, $eqpos ) );
3167 $value = trim( substr( $arg, $eqpos + 1 ) );
3168 if ( $value === false ) {
3169 $value = '';
3170 }
3171 if ( $name !== false ) {
3172 $assocArgs[$name] = $value;
3173 }
3174 }
3175 }
3176
3177 return $assocArgs;
3178 }
3179
3180 /**
3181 * Warn the user when a parser limitation is reached
3182 * Will warn at most once the user per limitation type
3183 *
3184 * The results are shown during preview and run through the Parser (See EditPage.php)
3185 *
3186 * @param string $limitationType Should be one of:
3187 * 'expensive-parserfunction' (corresponding messages:
3188 * 'expensive-parserfunction-warning',
3189 * 'expensive-parserfunction-category')
3190 * 'post-expand-template-argument' (corresponding messages:
3191 * 'post-expand-template-argument-warning',
3192 * 'post-expand-template-argument-category')
3193 * 'post-expand-template-inclusion' (corresponding messages:
3194 * 'post-expand-template-inclusion-warning',
3195 * 'post-expand-template-inclusion-category')
3196 * 'node-count-exceeded' (corresponding messages:
3197 * 'node-count-exceeded-warning',
3198 * 'node-count-exceeded-category')
3199 * 'expansion-depth-exceeded' (corresponding messages:
3200 * 'expansion-depth-exceeded-warning',
3201 * 'expansion-depth-exceeded-category')
3202 * @param string|int|null $current Current value
3203 * @param string|int|null $max Maximum allowed, when an explicit limit has been
3204 * exceeded, provide the values (optional)
3205 */
3206 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3207 # does no harm if $current and $max are present but are unnecessary for the message
3208 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3209 # only during preview, and that would split the parser cache unnecessarily.
3210 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3211 ->text();
3212 $this->mOutput->addWarning( $warning );
3213 $this->addTrackingCategory( "$limitationType-category" );
3214 }
3215
3216 /**
3217 * Return the text of a template, after recursively
3218 * replacing any variables or templates within the template.
3219 *
3220 * @param array $piece The parts of the template
3221 * $piece['title']: the title, i.e. the part before the |
3222 * $piece['parts']: the parameter array
3223 * $piece['lineStart']: whether the brace was at the start of a line
3224 * @param PPFrame $frame The current frame, contains template arguments
3225 * @throws Exception
3226 * @return string|array The text of the template
3227 */
3228 public function braceSubstitution( $piece, $frame ) {
3229 // Flags
3230
3231 // $text has been filled
3232 $found = false;
3233 // wiki markup in $text should be escaped
3234 $nowiki = false;
3235 // $text is HTML, armour it against wikitext transformation
3236 $isHTML = false;
3237 // Force interwiki transclusion to be done in raw mode not rendered
3238 $forceRawInterwiki = false;
3239 // $text is a DOM node needing expansion in a child frame
3240 $isChildObj = false;
3241 // $text is a DOM node needing expansion in the current frame
3242 $isLocalObj = false;
3243
3244 # Title object, where $text came from
3245 $title = false;
3246
3247 # $part1 is the bit before the first |, and must contain only title characters.
3248 # Various prefixes will be stripped from it later.
3249 $titleWithSpaces = $frame->expand( $piece['title'] );
3250 $part1 = trim( $titleWithSpaces );
3251 $titleText = false;
3252
3253 # Original title text preserved for various purposes
3254 $originalTitle = $part1;
3255
3256 # $args is a list of argument nodes, starting from index 0, not including $part1
3257 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3258 # below won't work b/c this $args isn't an object
3259 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3260
3261 $profileSection = null; // profile templates
3262
3263 # SUBST
3264 if ( !$found ) {
3265 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3266
3267 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3268 # Decide whether to expand template or keep wikitext as-is.
3269 if ( $this->ot['wiki'] ) {
3270 if ( $substMatch === false ) {
3271 $literal = true; # literal when in PST with no prefix
3272 } else {
3273 $literal = false; # expand when in PST with subst: or safesubst:
3274 }
3275 } else {
3276 if ( $substMatch == 'subst' ) {
3277 $literal = true; # literal when not in PST with plain subst:
3278 } else {
3279 $literal = false; # expand when not in PST with safesubst: or no prefix
3280 }
3281 }
3282 if ( $literal ) {
3283 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3284 $isLocalObj = true;
3285 $found = true;
3286 }
3287 }
3288
3289 # Variables
3290 if ( !$found && $args->getLength() == 0 ) {
3291 $id = $this->mVariables->matchStartToEnd( $part1 );
3292 if ( $id !== false ) {
3293 $text = $this->getVariableValue( $id, $frame );
3294 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3295 $this->mOutput->updateCacheExpiry(
3296 $this->magicWordFactory->getCacheTTL( $id ) );
3297 }
3298 $found = true;
3299 }
3300 }
3301
3302 # MSG, MSGNW and RAW
3303 if ( !$found ) {
3304 # Check for MSGNW:
3305 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3306 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3307 $nowiki = true;
3308 } else {
3309 # Remove obsolete MSG:
3310 $mwMsg = $this->magicWordFactory->get( 'msg' );
3311 $mwMsg->matchStartAndRemove( $part1 );
3312 }
3313
3314 # Check for RAW:
3315 $mwRaw = $this->magicWordFactory->get( 'raw' );
3316 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3317 $forceRawInterwiki = true;
3318 }
3319 }
3320
3321 # Parser functions
3322 if ( !$found ) {
3323 $colonPos = strpos( $part1, ':' );
3324 if ( $colonPos !== false ) {
3325 $func = substr( $part1, 0, $colonPos );
3326 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3327 $argsLength = $args->getLength();
3328 for ( $i = 0; $i < $argsLength; $i++ ) {
3329 $funcArgs[] = $args->item( $i );
3330 }
3331
3332 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3333
3334 // Extract any forwarded flags
3335 if ( isset( $result['title'] ) ) {
3336 $title = $result['title'];
3337 }
3338 if ( isset( $result['found'] ) ) {
3339 $found = $result['found'];
3340 }
3341 if ( array_key_exists( 'text', $result ) ) {
3342 // a string or null
3343 $text = $result['text'];
3344 }
3345 if ( isset( $result['nowiki'] ) ) {
3346 $nowiki = $result['nowiki'];
3347 }
3348 if ( isset( $result['isHTML'] ) ) {
3349 $isHTML = $result['isHTML'];
3350 }
3351 if ( isset( $result['forceRawInterwiki'] ) ) {
3352 $forceRawInterwiki = $result['forceRawInterwiki'];
3353 }
3354 if ( isset( $result['isChildObj'] ) ) {
3355 $isChildObj = $result['isChildObj'];
3356 }
3357 if ( isset( $result['isLocalObj'] ) ) {
3358 $isLocalObj = $result['isLocalObj'];
3359 }
3360 }
3361 }
3362
3363 # Finish mangling title and then check for loops.
3364 # Set $title to a Title object and $titleText to the PDBK
3365 if ( !$found ) {
3366 $ns = NS_TEMPLATE;
3367 # Split the title into page and subpage
3368 $subpage = '';
3369 $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3370 if ( $part1 !== $relative ) {
3371 $part1 = $relative;
3372 $ns = $this->mTitle->getNamespace();
3373 }
3374 $title = Title::newFromText( $part1, $ns );
3375 if ( $title ) {
3376 $titleText = $title->getPrefixedText();
3377 # Check for language variants if the template is not found
3378 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3379 $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3380 }
3381 # Do recursion depth check
3382 $limit = $this->mOptions->getMaxTemplateDepth();
3383 if ( $frame->depth >= $limit ) {
3384 $found = true;
3385 $text = '<span class="error">'
3386 . wfMessage( 'parser-template-recursion-depth-warning' )
3387 ->numParams( $limit )->inContentLanguage()->text()
3388 . '</span>';
3389 }
3390 }
3391 }
3392
3393 # Load from database
3394 if ( !$found && $title ) {
3395 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3396 if ( !$title->isExternal() ) {
3397 if ( $title->isSpecialPage()
3398 && $this->mOptions->getAllowSpecialInclusion()
3399 && $this->ot['html']
3400 ) {
3401 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3402 // Pass the template arguments as URL parameters.
3403 // "uselang" will have no effect since the Language object
3404 // is forced to the one defined in ParserOptions.
3405 $pageArgs = [];
3406 $argsLength = $args->getLength();
3407 for ( $i = 0; $i < $argsLength; $i++ ) {
3408 $bits = $args->item( $i )->splitArg();
3409 if ( strval( $bits['index'] ) === '' ) {
3410 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3411 $value = trim( $frame->expand( $bits['value'] ) );
3412 $pageArgs[$name] = $value;
3413 }
3414 }
3415
3416 // Create a new context to execute the special page
3417 $context = new RequestContext;
3418 $context->setTitle( $title );
3419 $context->setRequest( new FauxRequest( $pageArgs ) );
3420 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3421 $context->setUser( $this->getUser() );
3422 } else {
3423 // If this page is cached, then we better not be per user.
3424 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3425 }
3426 $context->setLanguage( $this->mOptions->getUserLangObj() );
3427 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3428 if ( $ret ) {
3429 $text = $context->getOutput()->getHTML();
3430 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3431 $found = true;
3432 $isHTML = true;
3433 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3434 $this->mOutput->updateRuntimeAdaptiveExpiry(
3435 $specialPage->maxIncludeCacheTime()
3436 );
3437 }
3438 }
3439 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3440 $found = false; # access denied
3441 $this->logger->debug(
3442 __METHOD__ .
3443 ": template inclusion denied for " . $title->getPrefixedDBkey()
3444 );
3445 } else {
3446 list( $text, $title ) = $this->getTemplateDom( $title );
3447 if ( $text !== false ) {
3448 $found = true;
3449 $isChildObj = true;
3450 }
3451 }
3452
3453 # If the title is valid but undisplayable, make a link to it
3454 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3455 $text = "[[:$titleText]]";
3456 $found = true;
3457 }
3458 } elseif ( $title->isTrans() ) {
3459 # Interwiki transclusion
3460 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3461 $text = $this->interwikiTransclude( $title, 'render' );
3462 $isHTML = true;
3463 } else {
3464 $text = $this->interwikiTransclude( $title, 'raw' );
3465 # Preprocess it like a template
3466 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3467 $isChildObj = true;
3468 }
3469 $found = true;
3470 }
3471
3472 # Do infinite loop check
3473 # This has to be done after redirect resolution to avoid infinite loops via redirects
3474 if ( !$frame->loopCheck( $title ) ) {
3475 $found = true;
3476 $text = '<span class="error">'
3477 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3478 . '</span>';
3479 $this->addTrackingCategory( 'template-loop-category' );
3480 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3481 wfEscapeWikiText( $titleText ) )->text() );
3482 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3483 }
3484 }
3485
3486 # If we haven't found text to substitute by now, we're done
3487 # Recover the source wikitext and return it
3488 if ( !$found ) {
3489 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3490 if ( $profileSection ) {
3491 $this->mProfiler->scopedProfileOut( $profileSection );
3492 }
3493 return [ 'object' => $text ];
3494 }
3495
3496 # Expand DOM-style return values in a child frame
3497 if ( $isChildObj ) {
3498 # Clean up argument array
3499 $newFrame = $frame->newChild( $args, $title );
3500
3501 if ( $nowiki ) {
3502 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3503 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3504 # Expansion is eligible for the empty-frame cache
3505 $text = $newFrame->cachedExpand( $titleText, $text );
3506 } else {
3507 # Uncached expansion
3508 $text = $newFrame->expand( $text );
3509 }
3510 }
3511 if ( $isLocalObj && $nowiki ) {
3512 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3513 $isLocalObj = false;
3514 }
3515
3516 if ( $profileSection ) {
3517 $this->mProfiler->scopedProfileOut( $profileSection );
3518 }
3519
3520 # Replace raw HTML by a placeholder
3521 if ( $isHTML ) {
3522 $text = $this->insertStripItem( $text );
3523 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3524 # Escape nowiki-style return values
3525 $text = wfEscapeWikiText( $text );
3526 } elseif ( is_string( $text )
3527 && !$piece['lineStart']
3528 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3529 ) {
3530 # T2529: if the template begins with a table or block-level
3531 # element, it should be treated as beginning a new line.
3532 # This behavior is somewhat controversial.
3533 $text = "\n" . $text;
3534 }
3535
3536 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3537 # Error, oversize inclusion
3538 if ( $titleText !== false ) {
3539 # Make a working, properly escaped link if possible (T25588)
3540 $text = "[[:$titleText]]";
3541 } else {
3542 # This will probably not be a working link, but at least it may
3543 # provide some hint of where the problem is
3544 preg_replace( '/^:/', '', $originalTitle );
3545 $text = "[[:$originalTitle]]";
3546 }
3547 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3548 . 'post-expand include size too large -->' );
3549 $this->limitationWarn( 'post-expand-template-inclusion' );
3550 }
3551
3552 if ( $isLocalObj ) {
3553 $ret = [ 'object' => $text ];
3554 } else {
3555 $ret = [ 'text' => $text ];
3556 }
3557
3558 return $ret;
3559 }
3560
3561 /**
3562 * Call a parser function and return an array with text and flags.
3563 *
3564 * The returned array will always contain a boolean 'found', indicating
3565 * whether the parser function was found or not. It may also contain the
3566 * following:
3567 * text: string|object, resulting wikitext or PP DOM object
3568 * isHTML: bool, $text is HTML, armour it against wikitext transformation
3569 * isChildObj: bool, $text is a DOM node needing expansion in a child frame
3570 * isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3571 * nowiki: bool, wiki markup in $text should be escaped
3572 *
3573 * @since 1.21
3574 * @param PPFrame $frame The current frame, contains template arguments
3575 * @param string $function Function name
3576 * @param array $args Arguments to the function
3577 * @throws MWException
3578 * @return array
3579 */
3580 public function callParserFunction( $frame, $function, array $args = [] ) {
3581 # Case sensitive functions
3582 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3583 $function = $this->mFunctionSynonyms[1][$function];
3584 } else {
3585 # Case insensitive functions
3586 $function = $this->contLang->lc( $function );
3587 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3588 $function = $this->mFunctionSynonyms[0][$function];
3589 } else {
3590 return [ 'found' => false ];
3591 }
3592 }
3593
3594 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3595
3596 // Avoid PHP 7.1 warning from passing $this by reference
3597 $parser = $this;
3598
3599 $allArgs = [ &$parser ];
3600 if ( $flags & self::SFH_OBJECT_ARGS ) {
3601 # Convert arguments to PPNodes and collect for appending to $allArgs
3602 $funcArgs = [];
3603 foreach ( $args as $k => $v ) {
3604 if ( $v instanceof PPNode || $k === 0 ) {
3605 $funcArgs[] = $v;
3606 } else {
3607 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3608 }
3609 }
3610
3611 # Add a frame parameter, and pass the arguments as an array
3612 $allArgs[] = $frame;
3613 $allArgs[] = $funcArgs;
3614 } else {
3615 # Convert arguments to plain text and append to $allArgs
3616 foreach ( $args as $k => $v ) {
3617 if ( $v instanceof PPNode ) {
3618 $allArgs[] = trim( $frame->expand( $v ) );
3619 } elseif ( is_int( $k ) && $k >= 0 ) {
3620 $allArgs[] = trim( $v );
3621 } else {
3622 $allArgs[] = trim( "$k=$v" );
3623 }
3624 }
3625 }
3626
3627 $result = $callback( ...$allArgs );
3628
3629 # The interface for function hooks allows them to return a wikitext
3630 # string or an array containing the string and any flags. This mungs
3631 # things around to match what this method should return.
3632 if ( !is_array( $result ) ) {
3633 $result = [
3634 'found' => true,
3635 'text' => $result,
3636 ];
3637 } else {
3638 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3639 $result['text'] = $result[0];
3640 }
3641 unset( $result[0] );
3642 $result += [
3643 'found' => true,
3644 ];
3645 }
3646
3647 $noparse = true;
3648 $preprocessFlags = 0;
3649 if ( isset( $result['noparse'] ) ) {
3650 $noparse = $result['noparse'];
3651 }
3652 if ( isset( $result['preprocessFlags'] ) ) {
3653 $preprocessFlags = $result['preprocessFlags'];
3654 }
3655
3656 if ( !$noparse ) {
3657 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3658 $result['isChildObj'] = true;
3659 }
3660
3661 return $result;
3662 }
3663
3664 /**
3665 * Get the semi-parsed DOM representation of a template with a given title,
3666 * and its redirect destination title. Cached.
3667 *
3668 * @param Title $title
3669 *
3670 * @return array
3671 */
3672 public function getTemplateDom( $title ) {
3673 $cacheTitle = $title;
3674 $titleText = $title->getPrefixedDBkey();
3675
3676 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3677 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3678 $title = Title::makeTitle( $ns, $dbk );
3679 $titleText = $title->getPrefixedDBkey();
3680 }
3681 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3682 return [ $this->mTplDomCache[$titleText], $title ];
3683 }
3684
3685 # Cache miss, go to the database
3686 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3687
3688 if ( $text === false ) {
3689 $this->mTplDomCache[$titleText] = false;
3690 return [ false, $title ];
3691 }
3692
3693 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3694 $this->mTplDomCache[$titleText] = $dom;
3695
3696 if ( !$title->equals( $cacheTitle ) ) {
3697 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3698 [ $title->getNamespace(), $title->getDBkey() ];
3699 }
3700
3701 return [ $dom, $title ];
3702 }
3703
3704 /**
3705 * Fetch the current revision of a given title. Note that the revision
3706 * (and even the title) may not exist in the database, so everything
3707 * contributing to the output of the parser should use this method
3708 * where possible, rather than getting the revisions themselves. This
3709 * method also caches its results, so using it benefits performance.
3710 *
3711 * @since 1.24
3712 * @param Title $title
3713 * @return Revision
3714 */
3715 public function fetchCurrentRevisionOfTitle( $title ) {
3716 $cacheKey = $title->getPrefixedDBkey();
3717 if ( !$this->currentRevisionCache ) {
3718 $this->currentRevisionCache = new MapCacheLRU( 100 );
3719 }
3720 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3721 $this->currentRevisionCache->set( $cacheKey,
3722 // Defaults to Parser::statelessFetchRevision()
3723 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3724 );
3725 }
3726 return $this->currentRevisionCache->get( $cacheKey );
3727 }
3728
3729 /**
3730 * @param Title $title
3731 * @return bool
3732 * @since 1.34
3733 */
3734 public function isCurrentRevisionOfTitleCached( $title ) {
3735 return (
3736 $this->currentRevisionCache &&
3737 $this->currentRevisionCache->has( $title->getPrefixedText() )
3738 );
3739 }
3740
3741 /**
3742 * Wrapper around Revision::newFromTitle to allow passing additional parameters
3743 * without passing them on to it.
3744 *
3745 * @since 1.24
3746 * @param Title $title
3747 * @param Parser|bool $parser
3748 * @return Revision|bool False if missing
3749 */
3750 public static function statelessFetchRevision( Title $title, $parser = false ) {
3751 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3752
3753 return $rev;
3754 }
3755
3756 /**
3757 * Fetch the unparsed text of a template and register a reference to it.
3758 * @param Title $title
3759 * @return array ( string or false, Title )
3760 */
3761 public function fetchTemplateAndTitle( $title ) {
3762 // Defaults to Parser::statelessFetchTemplate()
3763 $templateCb = $this->mOptions->getTemplateCallback();
3764 $stuff = call_user_func( $templateCb, $title, $this );
3765 $rev = $stuff['revision'] ?? null;
3766 $text = $stuff['text'];
3767 if ( is_string( $stuff['text'] ) ) {
3768 // We use U+007F DELETE to distinguish strip markers from regular text
3769 $text = strtr( $text, "\x7f", "?" );
3770 }
3771 $finalTitle = $stuff['finalTitle'] ?? $title;
3772 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3773 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3774 if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3775 // Self-transclusion; final result may change based on the new page version
3776 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3777 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3778 }
3779 }
3780
3781 return [ $text, $finalTitle ];
3782 }
3783
3784 /**
3785 * Fetch the unparsed text of a template and register a reference to it.
3786 * @param Title $title
3787 * @return string|bool
3788 */
3789 public function fetchTemplate( $title ) {
3790 return $this->fetchTemplateAndTitle( $title )[0];
3791 }
3792
3793 /**
3794 * Static function to get a template
3795 * Can be overridden via ParserOptions::setTemplateCallback().
3796 *
3797 * @param Title $title
3798 * @param bool|Parser $parser
3799 *
3800 * @return array
3801 */
3802 public static function statelessFetchTemplate( $title, $parser = false ) {
3803 $text = $skip = false;
3804 $finalTitle = $title;
3805 $deps = [];
3806 $rev = null;
3807
3808 # Loop to fetch the article, with up to 1 redirect
3809 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3810 # Give extensions a chance to select the revision instead
3811 $id = false; # Assume current
3812 Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3813 [ $parser, $title, &$skip, &$id ] );
3814
3815 if ( $skip ) {
3816 $text = false;
3817 $deps[] = [
3818 'title' => $title,
3819 'page_id' => $title->getArticleID(),
3820 'rev_id' => null
3821 ];
3822 break;
3823 }
3824 # Get the revision
3825 if ( $id ) {
3826 $rev = Revision::newFromId( $id );
3827 } elseif ( $parser ) {
3828 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3829 } else {
3830 $rev = Revision::newFromTitle( $title );
3831 }
3832 $rev_id = $rev ? $rev->getId() : 0;
3833 # If there is no current revision, there is no page
3834 if ( $id === false && !$rev ) {
3835 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3836 $linkCache->addBadLinkObj( $title );
3837 }
3838
3839 $deps[] = [
3840 'title' => $title,
3841 'page_id' => $title->getArticleID(),
3842 'rev_id' => $rev_id
3843 ];
3844 if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3845 # We fetched a rev from a different title; register it too...
3846 $deps[] = [
3847 'title' => $rev->getTitle(),
3848 'page_id' => $rev->getPage(),
3849 'rev_id' => $rev_id
3850 ];
3851 }
3852
3853 if ( $rev ) {
3854 $content = $rev->getContent();
3855 $text = $content ? $content->getWikitextForTransclusion() : null;
3856
3857 Hooks::run( 'ParserFetchTemplate',
3858 [ $parser, $title, $rev, &$text, &$deps ] );
3859
3860 if ( $text === false || $text === null ) {
3861 $text = false;
3862 break;
3863 }
3864 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3865 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3866 lcfirst( $title->getText() ) )->inContentLanguage();
3867 if ( !$message->exists() ) {
3868 $text = false;
3869 break;
3870 }
3871 $content = $message->content();
3872 $text = $message->plain();
3873 } else {
3874 break;
3875 }
3876 if ( !$content ) {
3877 break;
3878 }
3879 # Redirect?
3880 $finalTitle = $title;
3881 $title = $content->getRedirectTarget();
3882 }
3883 return [
3884 'revision' => $rev,
3885 'text' => $text,
3886 'finalTitle' => $finalTitle,
3887 'deps' => $deps
3888 ];
3889 }
3890
3891 /**
3892 * Fetch a file and its title and register a reference to it.
3893 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3894 * @param Title $title
3895 * @param array $options Array of options to RepoGroup::findFile
3896 * @return array ( File or false, Title of file )
3897 */
3898 public function fetchFileAndTitle( $title, $options = [] ) {
3899 $file = $this->fetchFileNoRegister( $title, $options );
3900
3901 $time = $file ? $file->getTimestamp() : false;
3902 $sha1 = $file ? $file->getSha1() : false;
3903 # Register the file as a dependency...
3904 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3905 if ( $file && !$title->equals( $file->getTitle() ) ) {
3906 # Update fetched file title
3907 $title = $file->getTitle();
3908 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3909 }
3910 return [ $file, $title ];
3911 }
3912
3913 /**
3914 * Helper function for fetchFileAndTitle.
3915 *
3916 * Also useful if you need to fetch a file but not use it yet,
3917 * for example to get the file's handler.
3918 *
3919 * @param Title $title
3920 * @param array $options Array of options to RepoGroup::findFile
3921 * @return File|bool
3922 */
3923 protected function fetchFileNoRegister( $title, $options = [] ) {
3924 if ( isset( $options['broken'] ) ) {
3925 $file = false; // broken thumbnail forced by hook
3926 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3927 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3928 } else { // get by (name,timestamp)
3929 $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3930 }
3931 return $file;
3932 }
3933
3934 /**
3935 * Transclude an interwiki link.
3936 *
3937 * @param Title $title
3938 * @param string $action Usually one of (raw, render)
3939 *
3940 * @return string
3941 */
3942 public function interwikiTransclude( $title, $action ) {
3943 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3944 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3945 }
3946
3947 $url = $title->getFullURL( [ 'action' => $action ] );
3948 if ( strlen( $url ) > 1024 ) {
3949 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3950 }
3951
3952 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3953
3954 $fname = __METHOD__;
3955 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3956
3957 $data = $cache->getWithSetCallback(
3958 $cache->makeGlobalKey(
3959 'interwiki-transclude',
3960 ( $wikiId !== false ) ? $wikiId : 'external',
3961 sha1( $url )
3962 ),
3963 $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3964 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3965 $req = MWHttpRequest::factory( $url, [], $fname );
3966
3967 $status = $req->execute(); // Status object
3968 if ( !$status->isOK() ) {
3969 $ttl = $cache::TTL_UNCACHEABLE;
3970 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3971 $ttl = min( $cache::TTL_LAGGED, $ttl );
3972 }
3973
3974 return [
3975 'text' => $status->isOK() ? $req->getContent() : null,
3976 'code' => $req->getStatus()
3977 ];
3978 },
3979 [
3980 'checkKeys' => ( $wikiId !== false )
3981 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3982 : [],
3983 'pcGroup' => 'interwiki-transclude:5',
3984 'pcTTL' => $cache::TTL_PROC_LONG
3985 ]
3986 );
3987
3988 if ( is_string( $data['text'] ) ) {
3989 $text = $data['text'];
3990 } elseif ( $data['code'] != 200 ) {
3991 // Though we failed to fetch the content, this status is useless.
3992 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3993 ->params( $url, $data['code'] )->inContentLanguage()->text();
3994 } else {
3995 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3996 }
3997
3998 return $text;
3999 }
4000
4001 /**
4002 * Triple brace replacement -- used for template arguments
4003 * @private
4004 *
4005 * @param array $piece
4006 * @param PPFrame $frame
4007 *
4008 * @return array
4009 */
4010 public function argSubstitution( $piece, $frame ) {
4011 $error = false;
4012 $parts = $piece['parts'];
4013 $nameWithSpaces = $frame->expand( $piece['title'] );
4014 $argName = trim( $nameWithSpaces );
4015 $object = false;
4016 $text = $frame->getArgument( $argName );
4017 if ( $text === false && $parts->getLength() > 0
4018 && ( $this->ot['html']
4019 || $this->ot['pre']
4020 || ( $this->ot['wiki'] && $frame->isTemplate() )
4021 )
4022 ) {
4023 # No match in frame, use the supplied default
4024 $object = $parts->item( 0 )->getChildren();
4025 }
4026 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4027 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4028 $this->limitationWarn( 'post-expand-template-argument' );
4029 }
4030
4031 if ( $text === false && $object === false ) {
4032 # No match anywhere
4033 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4034 }
4035 if ( $error !== false ) {
4036 $text .= $error;
4037 }
4038 if ( $object !== false ) {
4039 $ret = [ 'object' => $object ];
4040 } else {
4041 $ret = [ 'text' => $text ];
4042 }
4043
4044 return $ret;
4045 }
4046
4047 /**
4048 * Return the text to be used for a given extension tag.
4049 * This is the ghost of strip().
4050 *
4051 * @param array $params Associative array of parameters:
4052 * name PPNode for the tag name
4053 * attr PPNode for unparsed text where tag attributes are thought to be
4054 * attributes Optional associative array of parsed attributes
4055 * inner Contents of extension element
4056 * noClose Original text did not have a close tag
4057 * @param PPFrame $frame
4058 *
4059 * @throws MWException
4060 * @return string
4061 */
4062 public function extensionSubstitution( $params, $frame ) {
4063 static $errorStr = '<span class="error">';
4064 static $errorLen = 20;
4065
4066 $name = $frame->expand( $params['name'] );
4067 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4068 // Probably expansion depth or node count exceeded. Just punt the
4069 // error up.
4070 return $name;
4071 }
4072
4073 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4074 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4075 // See above
4076 return $attrText;
4077 }
4078
4079 // We can't safely check if the expansion for $content resulted in an
4080 // error, because the content could happen to be the error string
4081 // (T149622).
4082 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4083
4084 $marker = self::MARKER_PREFIX . "-$name-"
4085 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4086
4087 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4088 ( $this->ot['html'] || $this->ot['pre'] );
4089 if ( $isFunctionTag ) {
4090 $markerType = 'none';
4091 } else {
4092 $markerType = 'general';
4093 }
4094 if ( $this->ot['html'] || $isFunctionTag ) {
4095 $name = strtolower( $name );
4096 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4097 if ( isset( $params['attributes'] ) ) {
4098 $attributes += $params['attributes'];
4099 }
4100
4101 if ( isset( $this->mTagHooks[$name] ) ) {
4102 $output = call_user_func_array( $this->mTagHooks[$name],
4103 [ $content, $attributes, $this, $frame ] );
4104 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4105 list( $callback, ) = $this->mFunctionTagHooks[$name];
4106
4107 // Avoid PHP 7.1 warning from passing $this by reference
4108 $parser = $this;
4109 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4110 } else {
4111 $output = '<span class="error">Invalid tag extension name: ' .
4112 htmlspecialchars( $name ) . '</span>';
4113 }
4114
4115 if ( is_array( $output ) ) {
4116 // Extract flags
4117 $flags = $output;
4118 $output = $flags[0];
4119 if ( isset( $flags['markerType'] ) ) {
4120 $markerType = $flags['markerType'];
4121 }
4122 }
4123 } else {
4124 if ( is_null( $attrText ) ) {
4125 $attrText = '';
4126 }
4127 if ( isset( $params['attributes'] ) ) {
4128 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4129 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4130 htmlspecialchars( $attrValue ) . '"';
4131 }
4132 }
4133 if ( $content === null ) {
4134 $output = "<$name$attrText/>";
4135 } else {
4136 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4137 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4138 // See above
4139 return $close;
4140 }
4141 $output = "<$name$attrText>$content$close";
4142 }
4143 }
4144
4145 if ( $markerType === 'none' ) {
4146 return $output;
4147 } elseif ( $markerType === 'nowiki' ) {
4148 $this->mStripState->addNoWiki( $marker, $output );
4149 } elseif ( $markerType === 'general' ) {
4150 $this->mStripState->addGeneral( $marker, $output );
4151 } else {
4152 throw new MWException( __METHOD__ . ': invalid marker type' );
4153 }
4154 return $marker;
4155 }
4156
4157 /**
4158 * Increment an include size counter
4159 *
4160 * @param string $type The type of expansion
4161 * @param int $size The size of the text
4162 * @return bool False if this inclusion would take it over the maximum, true otherwise
4163 */
4164 public function incrementIncludeSize( $type, $size ) {
4165 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4166 return false;
4167 } else {
4168 $this->mIncludeSizes[$type] += $size;
4169 return true;
4170 }
4171 }
4172
4173 /**
4174 * Increment the expensive function count
4175 *
4176 * @return bool False if the limit has been exceeded
4177 */
4178 public function incrementExpensiveFunctionCount() {
4179 $this->mExpensiveFunctionCount++;
4180 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4181 }
4182
4183 /**
4184 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4185 * Fills $this->mDoubleUnderscores, returns the modified text
4186 *
4187 * @param string $text
4188 *
4189 * @return string
4190 */
4191 public function doDoubleUnderscore( $text ) {
4192 # The position of __TOC__ needs to be recorded
4193 $mw = $this->magicWordFactory->get( 'toc' );
4194 if ( $mw->match( $text ) ) {
4195 $this->mShowToc = true;
4196 $this->mForceTocPosition = true;
4197
4198 # Set a placeholder. At the end we'll fill it in with the TOC.
4199 $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4200
4201 # Only keep the first one.
4202 $text = $mw->replace( '', $text );
4203 }
4204
4205 # Now match and remove the rest of them
4206 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4207 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4208
4209 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4210 $this->mOutput->mNoGallery = true;
4211 }
4212 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4213 $this->mShowToc = false;
4214 }
4215 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4216 && $this->mTitle->getNamespace() == NS_CATEGORY
4217 ) {
4218 $this->addTrackingCategory( 'hidden-category-category' );
4219 }
4220 # (T10068) Allow control over whether robots index a page.
4221 # __INDEX__ always overrides __NOINDEX__, see T16899
4222 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4223 $this->mOutput->setIndexPolicy( 'noindex' );
4224 $this->addTrackingCategory( 'noindex-category' );
4225 }
4226 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4227 $this->mOutput->setIndexPolicy( 'index' );
4228 $this->addTrackingCategory( 'index-category' );
4229 }
4230
4231 # Cache all double underscores in the database
4232 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4233 $this->mOutput->setProperty( $key, '' );
4234 }
4235
4236 return $text;
4237 }
4238
4239 /**
4240 * @see ParserOutput::addTrackingCategory()
4241 * @param string $msg Message key
4242 * @return bool Whether the addition was successful
4243 */
4244 public function addTrackingCategory( $msg ) {
4245 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4246 }
4247
4248 /**
4249 * This function accomplishes several tasks:
4250 * 1) Auto-number headings if that option is enabled
4251 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4252 * 3) Add a Table of contents on the top for users who have enabled the option
4253 * 4) Auto-anchor headings
4254 *
4255 * It loops through all headlines, collects the necessary data, then splits up the
4256 * string and re-inserts the newly formatted headlines.
4257 *
4258 * @param string $text
4259 * @param string $origText Original, untouched wikitext
4260 * @param bool $isMain
4261 * @return mixed|string
4262 * @private
4263 */
4264 public function formatHeadings( $text, $origText, $isMain = true ) {
4265 # Inhibit editsection links if requested in the page
4266 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4267 $maybeShowEditLink = false;
4268 } else {
4269 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4270 }
4271
4272 # Get all headlines for numbering them and adding funky stuff like [edit]
4273 # links - this is for later, but we need the number of headlines right now
4274 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4275 # be trimmed here since whitespace in HTML headings is significant.
4276 $matches = [];
4277 $numMatches = preg_match_all(
4278 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4279 $text,
4280 $matches
4281 );
4282
4283 # if there are fewer than 4 headlines in the article, do not show TOC
4284 # unless it's been explicitly enabled.
4285 $enoughToc = $this->mShowToc &&
4286 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4287
4288 # Allow user to stipulate that a page should have a "new section"
4289 # link added via __NEWSECTIONLINK__
4290 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4291 $this->mOutput->setNewSection( true );
4292 }
4293
4294 # Allow user to remove the "new section"
4295 # link via __NONEWSECTIONLINK__
4296 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4297 $this->mOutput->hideNewSection( true );
4298 }
4299
4300 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4301 # override above conditions and always show TOC above first header
4302 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4303 $this->mShowToc = true;
4304 $enoughToc = true;
4305 }
4306
4307 # headline counter
4308 $headlineCount = 0;
4309 $numVisible = 0;
4310
4311 # Ugh .. the TOC should have neat indentation levels which can be
4312 # passed to the skin functions. These are determined here
4313 $toc = '';
4314 $full = '';
4315 $head = [];
4316 $sublevelCount = [];
4317 $levelCount = [];
4318 $level = 0;
4319 $prevlevel = 0;
4320 $toclevel = 0;
4321 $prevtoclevel = 0;
4322 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4323 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4324 $oldType = $this->mOutputType;
4325 $this->setOutputType( self::OT_WIKI );
4326 $frame = $this->getPreprocessor()->newFrame();
4327 $root = $this->preprocessToDom( $origText );
4328 $node = $root->getFirstChild();
4329 $byteOffset = 0;
4330 $tocraw = [];
4331 $refers = [];
4332
4333 $headlines = $numMatches !== false ? $matches[3] : [];
4334
4335 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4336 foreach ( $headlines as $headline ) {
4337 $isTemplate = false;
4338 $titleText = false;
4339 $sectionIndex = false;
4340 $numbering = '';
4341 $markerMatches = [];
4342 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4343 $serial = $markerMatches[1];
4344 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4345 $isTemplate = ( $titleText != $baseTitleText );
4346 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4347 }
4348
4349 if ( $toclevel ) {
4350 $prevlevel = $level;
4351 }
4352 $level = $matches[1][$headlineCount];
4353
4354 if ( $level > $prevlevel ) {
4355 # Increase TOC level
4356 $toclevel++;
4357 $sublevelCount[$toclevel] = 0;
4358 if ( $toclevel < $maxTocLevel ) {
4359 $prevtoclevel = $toclevel;
4360 $toc .= Linker::tocIndent();
4361 $numVisible++;
4362 }
4363 } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4364 # Decrease TOC level, find level to jump to
4365
4366 for ( $i = $toclevel; $i > 0; $i-- ) {
4367 if ( $levelCount[$i] == $level ) {
4368 # Found last matching level
4369 $toclevel = $i;
4370 break;
4371 } elseif ( $levelCount[$i] < $level ) {
4372 # Found first matching level below current level
4373 $toclevel = $i + 1;
4374 break;
4375 }
4376 }
4377 if ( $i == 0 ) {
4378 $toclevel = 1;
4379 }
4380 if ( $toclevel < $maxTocLevel ) {
4381 if ( $prevtoclevel < $maxTocLevel ) {
4382 # Unindent only if the previous toc level was shown :p
4383 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4384 $prevtoclevel = $toclevel;
4385 } else {
4386 $toc .= Linker::tocLineEnd();
4387 }
4388 }
4389 } else {
4390 # No change in level, end TOC line
4391 if ( $toclevel < $maxTocLevel ) {
4392 $toc .= Linker::tocLineEnd();
4393 }
4394 }
4395
4396 $levelCount[$toclevel] = $level;
4397
4398 # count number of headlines for each level
4399 $sublevelCount[$toclevel]++;
4400 $dot = 0;
4401 for ( $i = 1; $i <= $toclevel; $i++ ) {
4402 if ( !empty( $sublevelCount[$i] ) ) {
4403 if ( $dot ) {
4404 $numbering .= '.';
4405 }
4406 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4407 $dot = 1;
4408 }
4409 }
4410
4411 # The safe header is a version of the header text safe to use for links
4412
4413 # Remove link placeholders by the link text.
4414 # <!--LINK number-->
4415 # turns into
4416 # link text with suffix
4417 # Do this before unstrip since link text can contain strip markers
4418 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4419
4420 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4421 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4422
4423 # Remove any <style> or <script> tags (T198618)
4424 $safeHeadline = preg_replace(
4425 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4426 '',
4427 $safeHeadline
4428 );
4429
4430 # Strip out HTML (first regex removes any tag not allowed)
4431 # Allowed tags are:
4432 # * <sup> and <sub> (T10393)
4433 # * <i> (T28375)
4434 # * <b> (r105284)
4435 # * <bdi> (T74884)
4436 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4437 # * <s> and <strike> (T35715)
4438 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4439 # to allow setting directionality in toc items.
4440 $tocline = preg_replace(
4441 [
4442 '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4443 '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4444 ],
4445 [ '', '<$1>' ],
4446 $safeHeadline
4447 );
4448
4449 # Strip '<span></span>', which is the result from the above if
4450 # <span id="foo"></span> is used to produce an additional anchor
4451 # for a section.
4452 $tocline = str_replace( '<span></span>', '', $tocline );
4453
4454 $tocline = trim( $tocline );
4455
4456 # For the anchor, strip out HTML-y stuff period
4457 $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4458 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4459
4460 # Save headline for section edit hint before it's escaped
4461 $headlineHint = $safeHeadline;
4462
4463 # Decode HTML entities
4464 $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4465
4466 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4467
4468 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4469 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4470 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4471 if ( $fallbackHeadline === $safeHeadline ) {
4472 # No reason to have both (in fact, we can't)
4473 $fallbackHeadline = false;
4474 }
4475
4476 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4477 # @todo FIXME: We may be changing them depending on the current locale.
4478 $arrayKey = strtolower( $safeHeadline );
4479 if ( $fallbackHeadline === false ) {
4480 $fallbackArrayKey = false;
4481 } else {
4482 $fallbackArrayKey = strtolower( $fallbackHeadline );
4483 }
4484
4485 # Create the anchor for linking from the TOC to the section
4486 $anchor = $safeHeadline;
4487 $fallbackAnchor = $fallbackHeadline;
4488 if ( isset( $refers[$arrayKey] ) ) {
4489 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4490 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4491 $anchor .= "_$i";
4492 $linkAnchor .= "_$i";
4493 $refers["${arrayKey}_$i"] = true;
4494 } else {
4495 $refers[$arrayKey] = true;
4496 }
4497 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4498 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4499 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4500 $fallbackAnchor .= "_$i";
4501 $refers["${fallbackArrayKey}_$i"] = true;
4502 } else {
4503 $refers[$fallbackArrayKey] = true;
4504 }
4505
4506 # Don't number the heading if it is the only one (looks silly)
4507 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4508 # the two are different if the line contains a link
4509 $headline = Html::element(
4510 'span',
4511 [ 'class' => 'mw-headline-number' ],
4512 $numbering
4513 ) . ' ' . $headline;
4514 }
4515
4516 if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4517 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4518 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4519 }
4520
4521 # Add the section to the section tree
4522 # Find the DOM node for this header
4523 $noOffset = ( $isTemplate || $sectionIndex === false );
4524 while ( $node && !$noOffset ) {
4525 if ( $node->getName() === 'h' ) {
4526 $bits = $node->splitHeading();
4527 if ( $bits['i'] == $sectionIndex ) {
4528 break;
4529 }
4530 }
4531 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4532 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4533 $node = $node->getNextSibling();
4534 }
4535 $tocraw[] = [
4536 'toclevel' => $toclevel,
4537 'level' => $level,
4538 'line' => $tocline,
4539 'number' => $numbering,
4540 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4541 'fromtitle' => $titleText,
4542 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4543 'anchor' => $anchor,
4544 ];
4545
4546 # give headline the correct <h#> tag
4547 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4548 // Output edit section links as markers with styles that can be customized by skins
4549 if ( $isTemplate ) {
4550 # Put a T flag in the section identifier, to indicate to extractSections()
4551 # that sections inside <includeonly> should be counted.
4552 $editsectionPage = $titleText;
4553 $editsectionSection = "T-$sectionIndex";
4554 $editsectionContent = null;
4555 } else {
4556 $editsectionPage = $this->mTitle->getPrefixedText();
4557 $editsectionSection = $sectionIndex;
4558 $editsectionContent = $headlineHint;
4559 }
4560 // We use a bit of pesudo-xml for editsection markers. The
4561 // language converter is run later on. Using a UNIQ style marker
4562 // leads to the converter screwing up the tokens when it
4563 // converts stuff. And trying to insert strip tags fails too. At
4564 // this point all real inputted tags have already been escaped,
4565 // so we don't have to worry about a user trying to input one of
4566 // these markers directly. We use a page and section attribute
4567 // to stop the language converter from converting these
4568 // important bits of data, but put the headline hint inside a
4569 // content block because the language converter is supposed to
4570 // be able to convert that piece of data.
4571 // Gets replaced with html in ParserOutput::getText
4572 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4573 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4574 if ( $editsectionContent !== null ) {
4575 $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4576 } else {
4577 $editlink .= '/>';
4578 }
4579 } else {
4580 $editlink = '';
4581 }
4582 $head[$headlineCount] = Linker::makeHeadline( $level,
4583 $matches['attrib'][$headlineCount], $anchor, $headline,
4584 $editlink, $fallbackAnchor );
4585
4586 $headlineCount++;
4587 }
4588
4589 $this->setOutputType( $oldType );
4590
4591 # Never ever show TOC if no headers
4592 if ( $numVisible < 1 ) {
4593 $enoughToc = false;
4594 }
4595
4596 if ( $enoughToc ) {
4597 if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4598 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4599 }
4600 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4601 $this->mOutput->setTOCHTML( $toc );
4602 $toc = self::TOC_START . $toc . self::TOC_END;
4603 }
4604
4605 if ( $isMain ) {
4606 $this->mOutput->setSections( $tocraw );
4607 }
4608
4609 # split up and insert constructed headlines
4610 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4611 $i = 0;
4612
4613 // build an array of document sections
4614 $sections = [];
4615 foreach ( $blocks as $block ) {
4616 // $head is zero-based, sections aren't.
4617 if ( empty( $head[$i - 1] ) ) {
4618 $sections[$i] = $block;
4619 } else {
4620 $sections[$i] = $head[$i - 1] . $block;
4621 }
4622
4623 /**
4624 * Send a hook, one per section.
4625 * The idea here is to be able to make section-level DIVs, but to do so in a
4626 * lower-impact, more correct way than r50769
4627 *
4628 * $this : caller
4629 * $section : the section number
4630 * &$sectionContent : ref to the content of the section
4631 * $maybeShowEditLinks : boolean describing whether this section has an edit link
4632 */
4633 Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4634
4635 $i++;
4636 }
4637
4638 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4639 // append the TOC at the beginning
4640 // Top anchor now in skin
4641 $sections[0] .= $toc . "\n";
4642 }
4643
4644 $full .= implode( '', $sections );
4645
4646 if ( $this->mForceTocPosition ) {
4647 return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4648 } else {
4649 return $full;
4650 }
4651 }
4652
4653 /**
4654 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4655 * conversion, substituting signatures, {{subst:}} templates, etc.
4656 *
4657 * @param string $text The text to transform
4658 * @param Title $title The Title object for the current article
4659 * @param User $user The User object describing the current user
4660 * @param ParserOptions $options Parsing options
4661 * @param bool $clearState Whether to clear the parser state first
4662 * @return string The altered wiki markup
4663 */
4664 public function preSaveTransform( $text, Title $title, User $user,
4665 ParserOptions $options, $clearState = true
4666 ) {
4667 if ( $clearState ) {
4668 $magicScopeVariable = $this->lock();
4669 }
4670 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4671 $this->setUser( $user );
4672
4673 // Strip U+0000 NULL (T159174)
4674 $text = str_replace( "\000", '', $text );
4675
4676 // We still normalize line endings for backwards-compatibility
4677 // with other code that just calls PST, but this should already
4678 // be handled in TextContent subclasses
4679 $text = TextContent::normalizeLineEndings( $text );
4680
4681 if ( $options->getPreSaveTransform() ) {
4682 $text = $this->pstPass2( $text, $user );
4683 }
4684 $text = $this->mStripState->unstripBoth( $text );
4685
4686 $this->setUser( null ); # Reset
4687
4688 return $text;
4689 }
4690
4691 /**
4692 * Pre-save transform helper function
4693 *
4694 * @param string $text
4695 * @param User $user
4696 *
4697 * @return string
4698 */
4699 private function pstPass2( $text, $user ) {
4700 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4701 # $this->contLang here in order to give everyone the same signature and use the default one
4702 # rather than the one selected in each user's preferences. (see also T14815)
4703 $ts = $this->mOptions->getTimestamp();
4704 $timestamp = MWTimestamp::getLocalInstance( $ts );
4705 $ts = $timestamp->format( 'YmdHis' );
4706 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4707
4708 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4709
4710 # Variable replacement
4711 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4712 $text = $this->replaceVariables( $text );
4713
4714 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4715 # which may corrupt this parser instance via its wfMessage()->text() call-
4716
4717 # Signatures
4718 if ( strpos( $text, '~~~' ) !== false ) {
4719 $sigText = $this->getUserSig( $user );
4720 $text = strtr( $text, [
4721 '~~~~~' => $d,
4722 '~~~~' => "$sigText $d",
4723 '~~~' => $sigText
4724 ] );
4725 # The main two signature forms used above are time-sensitive
4726 $this->setOutputFlag( 'user-signature', 'User signature detected' );
4727 }
4728
4729 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4730 $tc = '[' . Title::legalChars() . ']';
4731 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4732
4733 // [[ns:page (context)|]]
4734 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4735 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4736 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4737 // [[ns:page (context), context|]] (using either single or double-width comma)
4738 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4739 // [[|page]] (reverse pipe trick: add context from page title)
4740 $p2 = "/\[\[\\|($tc+)]]/";
4741
4742 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4743 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4744 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4745 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4746
4747 $t = $this->mTitle->getText();
4748 $m = [];
4749 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4750 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4751 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4752 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4753 } else {
4754 # if there's no context, don't bother duplicating the title
4755 $text = preg_replace( $p2, '[[\\1]]', $text );
4756 }
4757
4758 return $text;
4759 }
4760
4761 /**
4762 * Fetch the user's signature text, if any, and normalize to
4763 * validated, ready-to-insert wikitext.
4764 * If you have pre-fetched the nickname or the fancySig option, you can
4765 * specify them here to save a database query.
4766 * Do not reuse this parser instance after calling getUserSig(),
4767 * as it may have changed.
4768 *
4769 * @param User &$user
4770 * @param string|bool $nickname Nickname to use or false to use user's default nickname
4771 * @param bool|null $fancySig whether the nicknname is the complete signature
4772 * or null to use default value
4773 * @return string
4774 */
4775 public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4776 $username = $user->getName();
4777
4778 # If not given, retrieve from the user object.
4779 if ( $nickname === false ) {
4780 $nickname = $user->getOption( 'nickname' );
4781 }
4782
4783 if ( is_null( $fancySig ) ) {
4784 $fancySig = $user->getBoolOption( 'fancysig' );
4785 }
4786
4787 $nickname = $nickname == null ? $username : $nickname;
4788
4789 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4790 $nickname = $username;
4791 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4792 } elseif ( $fancySig !== false ) {
4793 # Sig. might contain markup; validate this
4794 if ( $this->validateSig( $nickname ) !== false ) {
4795 # Validated; clean up (if needed) and return it
4796 return $this->cleanSig( $nickname, true );
4797 } else {
4798 # Failed to validate; fall back to the default
4799 $nickname = $username;
4800 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4801 }
4802 }
4803
4804 # Make sure nickname doesnt get a sig in a sig
4805 $nickname = self::cleanSigInSig( $nickname );
4806
4807 # If we're still here, make it a link to the user page
4808 $userText = wfEscapeWikiText( $username );
4809 $nickText = wfEscapeWikiText( $nickname );
4810 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4811
4812 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4813 ->title( $this->getTitle() )->text();
4814 }
4815
4816 /**
4817 * Check that the user's signature contains no bad XML
4818 *
4819 * @param string $text
4820 * @return string|bool An expanded string, or false if invalid.
4821 */
4822 public function validateSig( $text ) {
4823 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4824 }
4825
4826 /**
4827 * Clean up signature text
4828 *
4829 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4830 * 2) Substitute all transclusions
4831 *
4832 * @param string $text
4833 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4834 * @return string Signature text
4835 */
4836 public function cleanSig( $text, $parsing = false ) {
4837 if ( !$parsing ) {
4838 global $wgTitle;
4839 $magicScopeVariable = $this->lock();
4840 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4841 }
4842
4843 # Option to disable this feature
4844 if ( !$this->mOptions->getCleanSignatures() ) {
4845 return $text;
4846 }
4847
4848 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4849 # => Move this logic to braceSubstitution()
4850 $substWord = $this->magicWordFactory->get( 'subst' );
4851 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4852 $substText = '{{' . $substWord->getSynonym( 0 );
4853
4854 $text = preg_replace( $substRegex, $substText, $text );
4855 $text = self::cleanSigInSig( $text );
4856 $dom = $this->preprocessToDom( $text );
4857 $frame = $this->getPreprocessor()->newFrame();
4858 $text = $frame->expand( $dom );
4859
4860 if ( !$parsing ) {
4861 $text = $this->mStripState->unstripBoth( $text );
4862 }
4863
4864 return $text;
4865 }
4866
4867 /**
4868 * Strip 3, 4 or 5 tildes out of signatures.
4869 *
4870 * @param string $text
4871 * @return string Signature text with /~{3,5}/ removed
4872 */
4873 public static function cleanSigInSig( $text ) {
4874 $text = preg_replace( '/~{3,5}/', '', $text );
4875 return $text;
4876 }
4877
4878 /**
4879 * Set up some variables which are usually set up in parse()
4880 * so that an external function can call some class members with confidence
4881 *
4882 * @param Title|null $title
4883 * @param ParserOptions $options
4884 * @param int $outputType
4885 * @param bool $clearState
4886 * @param int|null $revId
4887 */
4888 public function startExternalParse( Title $title = null, ParserOptions $options,
4889 $outputType, $clearState = true, $revId = null
4890 ) {
4891 $this->startParse( $title, $options, $outputType, $clearState );
4892 if ( $revId !== null ) {
4893 $this->mRevisionId = $revId;
4894 }
4895 }
4896
4897 /**
4898 * @param Title|null $title
4899 * @param ParserOptions $options
4900 * @param int $outputType
4901 * @param bool $clearState
4902 */
4903 private function startParse( Title $title = null, ParserOptions $options,
4904 $outputType, $clearState = true
4905 ) {
4906 $this->setTitle( $title );
4907 $this->mOptions = $options;
4908 $this->setOutputType( $outputType );
4909 if ( $clearState ) {
4910 $this->clearState();
4911 }
4912 }
4913
4914 /**
4915 * Wrapper for preprocess()
4916 *
4917 * @param string $text The text to preprocess
4918 * @param ParserOptions $options
4919 * @param Title|null $title Title object or null to use $wgTitle
4920 * @return string
4921 */
4922 public function transformMsg( $text, $options, $title = null ) {
4923 static $executing = false;
4924
4925 # Guard against infinite recursion
4926 if ( $executing ) {
4927 return $text;
4928 }
4929 $executing = true;
4930
4931 if ( !$title ) {
4932 global $wgTitle;
4933 $title = $wgTitle;
4934 }
4935
4936 $text = $this->preprocess( $text, $title, $options );
4937
4938 $executing = false;
4939 return $text;
4940 }
4941
4942 /**
4943 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4944 * The callback should have the following form:
4945 * function myParserHook( $text, $params, $parser, $frame ) { ... }
4946 *
4947 * Transform and return $text. Use $parser for any required context, e.g. use
4948 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4949 *
4950 * Hooks may return extended information by returning an array, of which the
4951 * first numbered element (index 0) must be the return string, and all other
4952 * entries are extracted into local variables within an internal function
4953 * in the Parser class.
4954 *
4955 * This interface (introduced r61913) appears to be undocumented, but
4956 * 'markerType' is used by some core tag hooks to override which strip
4957 * array their results are placed in. **Use great caution if attempting
4958 * this interface, as it is not documented and injudicious use could smash
4959 * private variables.**
4960 *
4961 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4962 * @param callable $callback The callback function (and object) to use for the tag
4963 * @throws MWException
4964 * @return callable|null The old value of the mTagHooks array associated with the hook
4965 */
4966 public function setHook( $tag, callable $callback ) {
4967 $tag = strtolower( $tag );
4968 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4969 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4970 }
4971 $oldVal = $this->mTagHooks[$tag] ?? null;
4972 $this->mTagHooks[$tag] = $callback;
4973 if ( !in_array( $tag, $this->mStripList ) ) {
4974 $this->mStripList[] = $tag;
4975 }
4976
4977 return $oldVal;
4978 }
4979
4980 /**
4981 * As setHook(), but letting the contents be parsed.
4982 *
4983 * Transparent tag hooks are like regular XML-style tag hooks, except they
4984 * operate late in the transformation sequence, on HTML instead of wikitext.
4985 *
4986 * This is probably obsoleted by things dealing with parser frames?
4987 * The only extension currently using it is geoserver.
4988 *
4989 * @since 1.10
4990 * @todo better document or deprecate this
4991 *
4992 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4993 * @param callable $callback The callback function (and object) to use for the tag
4994 * @throws MWException
4995 * @return callable|null The old value of the mTagHooks array associated with the hook
4996 */
4997 public function setTransparentTagHook( $tag, callable $callback ) {
4998 $tag = strtolower( $tag );
4999 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5000 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5001 }
5002 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5003 $this->mTransparentTagHooks[$tag] = $callback;
5004
5005 return $oldVal;
5006 }
5007
5008 /**
5009 * Remove all tag hooks
5010 */
5011 public function clearTagHooks() {
5012 $this->mTagHooks = [];
5013 $this->mFunctionTagHooks = [];
5014 $this->mStripList = $this->mDefaultStripList;
5015 }
5016
5017 /**
5018 * Create a function, e.g. {{sum:1|2|3}}
5019 * The callback function should have the form:
5020 * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5021 *
5022 * Or with Parser::SFH_OBJECT_ARGS:
5023 * function myParserFunction( $parser, $frame, $args ) { ... }
5024 *
5025 * The callback may either return the text result of the function, or an array with the text
5026 * in element 0, and a number of flags in the other elements. The names of the flags are
5027 * specified in the keys. Valid flags are:
5028 * found The text returned is valid, stop processing the template. This
5029 * is on by default.
5030 * nowiki Wiki markup in the return value should be escaped
5031 * isHTML The returned text is HTML, armour it against wikitext transformation
5032 *
5033 * @param string $id The magic word ID
5034 * @param callable $callback The callback function (and object) to use
5035 * @param int $flags A combination of the following flags:
5036 * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5037 *
5038 * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text.
5039 * This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5040 * branches and thus speed up parsing. It is also possible to analyse the parse tree of
5041 * the arguments, and to control the way they are expanded.
5042 *
5043 * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5044 * arguments, for instance:
5045 * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5046 *
5047 * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5048 * future versions. Please call $frame->expand() on it anyway so that your code keeps
5049 * working if/when this is changed.
5050 *
5051 * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5052 * expansion.
5053 *
5054 * Please read the documentation in includes/parser/Preprocessor.php for more information
5055 * about the methods available in PPFrame and PPNode.
5056 *
5057 * @throws MWException
5058 * @return string|callable The old callback function for this name, if any
5059 */
5060 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5061 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5062 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5063
5064 # Add to function cache
5065 $mw = $this->magicWordFactory->get( $id );
5066 if ( !$mw ) {
5067 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5068 }
5069
5070 $synonyms = $mw->getSynonyms();
5071 $sensitive = intval( $mw->isCaseSensitive() );
5072
5073 foreach ( $synonyms as $syn ) {
5074 # Case
5075 if ( !$sensitive ) {
5076 $syn = $this->contLang->lc( $syn );
5077 }
5078 # Add leading hash
5079 if ( !( $flags & self::SFH_NO_HASH ) ) {
5080 $syn = '#' . $syn;
5081 }
5082 # Remove trailing colon
5083 if ( substr( $syn, -1, 1 ) === ':' ) {
5084 $syn = substr( $syn, 0, -1 );
5085 }
5086 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5087 }
5088 return $oldVal;
5089 }
5090
5091 /**
5092 * Get all registered function hook identifiers
5093 *
5094 * @return array
5095 */
5096 public function getFunctionHooks() {
5097 $this->firstCallInit();
5098 return array_keys( $this->mFunctionHooks );
5099 }
5100
5101 /**
5102 * Create a tag function, e.g. "<test>some stuff</test>".
5103 * Unlike tag hooks, tag functions are parsed at preprocessor level.
5104 * Unlike parser functions, their content is not preprocessed.
5105 * @param string $tag
5106 * @param callable $callback
5107 * @param int $flags
5108 * @throws MWException
5109 * @return null
5110 */
5111 public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5112 $tag = strtolower( $tag );
5113 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5114 throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5115 }
5116 $old = $this->mFunctionTagHooks[$tag] ?? null;
5117 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5118
5119 if ( !in_array( $tag, $this->mStripList ) ) {
5120 $this->mStripList[] = $tag;
5121 }
5122
5123 return $old;
5124 }
5125
5126 /**
5127 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5128 * Placeholders created in Linker::link()
5129 *
5130 * @param string &$text
5131 * @param int $options
5132 */
5133 public function replaceLinkHolders( &$text, $options = 0 ) {
5134 $this->mLinkHolders->replace( $text );
5135 }
5136
5137 /**
5138 * Replace "<!--LINK-->" link placeholders with plain text of links
5139 * (not HTML-formatted).
5140 *
5141 * @param string $text
5142 * @return string
5143 */
5144 public function replaceLinkHoldersText( $text ) {
5145 return $this->mLinkHolders->replaceText( $text );
5146 }
5147
5148 /**
5149 * Renders an image gallery from a text with one line per image.
5150 * text labels may be given by using |-style alternative text. E.g.
5151 * Image:one.jpg|The number "1"
5152 * Image:tree.jpg|A tree
5153 * given as text will return the HTML of a gallery with two images,
5154 * labeled 'The number "1"' and
5155 * 'A tree'.
5156 *
5157 * @param string $text
5158 * @param array $params
5159 * @return string HTML
5160 */
5161 public function renderImageGallery( $text, $params ) {
5162 $mode = false;
5163 if ( isset( $params['mode'] ) ) {
5164 $mode = $params['mode'];
5165 }
5166
5167 try {
5168 $ig = ImageGalleryBase::factory( $mode );
5169 } catch ( Exception $e ) {
5170 // If invalid type set, fallback to default.
5171 $ig = ImageGalleryBase::factory( false );
5172 }
5173
5174 $ig->setContextTitle( $this->mTitle );
5175 $ig->setShowBytes( false );
5176 $ig->setShowDimensions( false );
5177 $ig->setShowFilename( false );
5178 $ig->setParser( $this );
5179 $ig->setHideBadImages();
5180 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5181
5182 if ( isset( $params['showfilename'] ) ) {
5183 $ig->setShowFilename( true );
5184 } else {
5185 $ig->setShowFilename( false );
5186 }
5187 if ( isset( $params['caption'] ) ) {
5188 // NOTE: We aren't passing a frame here or below. Frame info
5189 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5190 // See T107332#4030581
5191 $caption = $this->recursiveTagParse( $params['caption'] );
5192 $ig->setCaptionHtml( $caption );
5193 }
5194 if ( isset( $params['perrow'] ) ) {
5195 $ig->setPerRow( $params['perrow'] );
5196 }
5197 if ( isset( $params['widths'] ) ) {
5198 $ig->setWidths( $params['widths'] );
5199 }
5200 if ( isset( $params['heights'] ) ) {
5201 $ig->setHeights( $params['heights'] );
5202 }
5203 $ig->setAdditionalOptions( $params );
5204
5205 // Avoid PHP 7.1 warning from passing $this by reference
5206 $parser = $this;
5207 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5208
5209 $lines = StringUtils::explode( "\n", $text );
5210 foreach ( $lines as $line ) {
5211 # match lines like these:
5212 # Image:someimage.jpg|This is some image
5213 $matches = [];
5214 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5215 # Skip empty lines
5216 if ( count( $matches ) == 0 ) {
5217 continue;
5218 }
5219
5220 if ( strpos( $matches[0], '%' ) !== false ) {
5221 $matches[1] = rawurldecode( $matches[1] );
5222 }
5223 $title = Title::newFromText( $matches[1], NS_FILE );
5224 if ( is_null( $title ) ) {
5225 # Bogus title. Ignore these so we don't bomb out later.
5226 continue;
5227 }
5228
5229 # We need to get what handler the file uses, to figure out parameters.
5230 # Note, a hook can overide the file name, and chose an entirely different
5231 # file (which potentially could be of a different type and have different handler).
5232 $options = [];
5233 $descQuery = false;
5234 Hooks::run( 'BeforeParserFetchFileAndTitle',
5235 [ $this, $title, &$options, &$descQuery ] );
5236 # Don't register it now, as TraditionalImageGallery does that later.
5237 $file = $this->fetchFileNoRegister( $title, $options );
5238 $handler = $file ? $file->getHandler() : false;
5239
5240 $paramMap = [
5241 'img_alt' => 'gallery-internal-alt',
5242 'img_link' => 'gallery-internal-link',
5243 ];
5244 if ( $handler ) {
5245 $paramMap += $handler->getParamMap();
5246 // We don't want people to specify per-image widths.
5247 // Additionally the width parameter would need special casing anyhow.
5248 unset( $paramMap['img_width'] );
5249 }
5250
5251 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5252
5253 $label = '';
5254 $alt = '';
5255 $link = '';
5256 $handlerOptions = [];
5257 if ( isset( $matches[3] ) ) {
5258 // look for an |alt= definition while trying not to break existing
5259 // captions with multiple pipes (|) in it, until a more sensible grammar
5260 // is defined for images in galleries
5261
5262 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5263 // splitting on '|' is a bit odd, and different from makeImage.
5264 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5265 // Protect LanguageConverter markup
5266 $parameterMatches = StringUtils::delimiterExplode(
5267 '-{', '}-', '|', $matches[3], true /* nested */
5268 );
5269
5270 foreach ( $parameterMatches as $parameterMatch ) {
5271 list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5272 if ( $magicName ) {
5273 $paramName = $paramMap[$magicName];
5274
5275 switch ( $paramName ) {
5276 case 'gallery-internal-alt':
5277 $alt = $this->stripAltText( $match, false );
5278 break;
5279 case 'gallery-internal-link':
5280 $linkValue = $this->stripAltText( $match, false );
5281 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5282 // Result of LanguageConverter::markNoConversion
5283 // invoked on an external link.
5284 $linkValue = substr( $linkValue, 4, -2 );
5285 }
5286 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5287 if ( $type === 'link-url' ) {
5288 $link = $target;
5289 $this->mOutput->addExternalLink( $target );
5290 } elseif ( $type === 'link-title' ) {
5291 $link = $target->getLinkURL();
5292 $this->mOutput->addLink( $target );
5293 }
5294 break;
5295 default:
5296 // Must be a handler specific parameter.
5297 if ( $handler->validateParam( $paramName, $match ) ) {
5298 $handlerOptions[$paramName] = $match;
5299 } else {
5300 // Guess not, consider it as caption.
5301 $this->logger->debug(
5302 "$parameterMatch failed parameter validation" );
5303 $label = $parameterMatch;
5304 }
5305 }
5306
5307 } else {
5308 // Last pipe wins.
5309 $label = $parameterMatch;
5310 }
5311 }
5312 }
5313
5314 $ig->add( $title, $label, $alt, $link, $handlerOptions );
5315 }
5316 $html = $ig->toHTML();
5317 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5318 return $html;
5319 }
5320
5321 /**
5322 * @param MediaHandler $handler
5323 * @return array
5324 */
5325 public function getImageParams( $handler ) {
5326 if ( $handler ) {
5327 $handlerClass = get_class( $handler );
5328 } else {
5329 $handlerClass = '';
5330 }
5331 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5332 # Initialise static lists
5333 static $internalParamNames = [
5334 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5335 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5336 'bottom', 'text-bottom' ],
5337 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5338 'upright', 'border', 'link', 'alt', 'class' ],
5339 ];
5340 static $internalParamMap;
5341 if ( !$internalParamMap ) {
5342 $internalParamMap = [];
5343 foreach ( $internalParamNames as $type => $names ) {
5344 foreach ( $names as $name ) {
5345 // For grep: img_left, img_right, img_center, img_none,
5346 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5347 // img_bottom, img_text_bottom,
5348 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5349 // img_border, img_link, img_alt, img_class
5350 $magicName = str_replace( '-', '_', "img_$name" );
5351 $internalParamMap[$magicName] = [ $type, $name ];
5352 }
5353 }
5354 }
5355
5356 # Add handler params
5357 $paramMap = $internalParamMap;
5358 if ( $handler ) {
5359 $handlerParamMap = $handler->getParamMap();
5360 foreach ( $handlerParamMap as $magic => $paramName ) {
5361 $paramMap[$magic] = [ 'handler', $paramName ];
5362 }
5363 }
5364 $this->mImageParams[$handlerClass] = $paramMap;
5365 $this->mImageParamsMagicArray[$handlerClass] =
5366 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5367 }
5368 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5369 }
5370
5371 /**
5372 * Parse image options text and use it to make an image
5373 *
5374 * @param Title $title
5375 * @param string $options
5376 * @param LinkHolderArray|bool $holders
5377 * @return string HTML
5378 */
5379 public function makeImage( $title, $options, $holders = false ) {
5380 # Check if the options text is of the form "options|alt text"
5381 # Options are:
5382 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5383 # * left no resizing, just left align. label is used for alt= only
5384 # * right same, but right aligned
5385 # * none same, but not aligned
5386 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5387 # * center center the image
5388 # * frame Keep original image size, no magnify-button.
5389 # * framed Same as "frame"
5390 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5391 # * upright reduce width for upright images, rounded to full __0 px
5392 # * border draw a 1px border around the image
5393 # * alt Text for HTML alt attribute (defaults to empty)
5394 # * class Set a class for img node
5395 # * link Set the target of the image link. Can be external, interwiki, or local
5396 # vertical-align values (no % or length right now):
5397 # * baseline
5398 # * sub
5399 # * super
5400 # * top
5401 # * text-top
5402 # * middle
5403 # * bottom
5404 # * text-bottom
5405
5406 # Protect LanguageConverter markup when splitting into parts
5407 $parts = StringUtils::delimiterExplode(
5408 '-{', '}-', '|', $options, true /* allow nesting */
5409 );
5410
5411 # Give extensions a chance to select the file revision for us
5412 $options = [];
5413 $descQuery = false;
5414 Hooks::run( 'BeforeParserFetchFileAndTitle',
5415 [ $this, $title, &$options, &$descQuery ] );
5416 # Fetch and register the file (file title may be different via hooks)
5417 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5418
5419 # Get parameter map
5420 $handler = $file ? $file->getHandler() : false;
5421
5422 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5423
5424 if ( !$file ) {
5425 $this->addTrackingCategory( 'broken-file-category' );
5426 }
5427
5428 # Process the input parameters
5429 $caption = '';
5430 $params = [ 'frame' => [], 'handler' => [],
5431 'horizAlign' => [], 'vertAlign' => [] ];
5432 $seenformat = false;
5433 foreach ( $parts as $part ) {
5434 $part = trim( $part );
5435 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5436 $validated = false;
5437 if ( isset( $paramMap[$magicName] ) ) {
5438 list( $type, $paramName ) = $paramMap[$magicName];
5439
5440 # Special case; width and height come in one variable together
5441 if ( $type === 'handler' && $paramName === 'width' ) {
5442 $parsedWidthParam = self::parseWidthParam( $value );
5443 if ( isset( $parsedWidthParam['width'] ) ) {
5444 $width = $parsedWidthParam['width'];
5445 if ( $handler->validateParam( 'width', $width ) ) {
5446 $params[$type]['width'] = $width;
5447 $validated = true;
5448 }
5449 }
5450 if ( isset( $parsedWidthParam['height'] ) ) {
5451 $height = $parsedWidthParam['height'];
5452 if ( $handler->validateParam( 'height', $height ) ) {
5453 $params[$type]['height'] = $height;
5454 $validated = true;
5455 }
5456 }
5457 # else no validation -- T15436
5458 } else {
5459 if ( $type === 'handler' ) {
5460 # Validate handler parameter
5461 $validated = $handler->validateParam( $paramName, $value );
5462 } else {
5463 # Validate internal parameters
5464 switch ( $paramName ) {
5465 case 'manualthumb':
5466 case 'alt':
5467 case 'class':
5468 # @todo FIXME: Possibly check validity here for
5469 # manualthumb? downstream behavior seems odd with
5470 # missing manual thumbs.
5471 $validated = true;
5472 $value = $this->stripAltText( $value, $holders );
5473 break;
5474 case 'link':
5475 list( $paramName, $value ) =
5476 $this->parseLinkParameter(
5477 $this->stripAltText( $value, $holders )
5478 );
5479 if ( $paramName ) {
5480 $validated = true;
5481 if ( $paramName === 'no-link' ) {
5482 $value = true;
5483 }
5484 if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5485 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5486 }
5487 }
5488 break;
5489 case 'frameless':
5490 case 'framed':
5491 case 'thumbnail':
5492 // use first appearing option, discard others.
5493 $validated = !$seenformat;
5494 $seenformat = true;
5495 break;
5496 default:
5497 # Most other things appear to be empty or numeric...
5498 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5499 }
5500 }
5501
5502 if ( $validated ) {
5503 $params[$type][$paramName] = $value;
5504 }
5505 }
5506 }
5507 if ( !$validated ) {
5508 $caption = $part;
5509 }
5510 }
5511
5512 # Process alignment parameters
5513 if ( $params['horizAlign'] ) {
5514 $params['frame']['align'] = key( $params['horizAlign'] );
5515 }
5516 if ( $params['vertAlign'] ) {
5517 $params['frame']['valign'] = key( $params['vertAlign'] );
5518 }
5519
5520 $params['frame']['caption'] = $caption;
5521
5522 # Will the image be presented in a frame, with the caption below?
5523 $imageIsFramed = isset( $params['frame']['frame'] )
5524 || isset( $params['frame']['framed'] )
5525 || isset( $params['frame']['thumbnail'] )
5526 || isset( $params['frame']['manualthumb'] );
5527
5528 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5529 # came to also set the caption, ordinary text after the image -- which
5530 # makes no sense, because that just repeats the text multiple times in
5531 # screen readers. It *also* came to set the title attribute.
5532 # Now that we have an alt attribute, we should not set the alt text to
5533 # equal the caption: that's worse than useless, it just repeats the
5534 # text. This is the framed/thumbnail case. If there's no caption, we
5535 # use the unnamed parameter for alt text as well, just for the time be-
5536 # ing, if the unnamed param is set and the alt param is not.
5537 # For the future, we need to figure out if we want to tweak this more,
5538 # e.g., introducing a title= parameter for the title; ignoring the un-
5539 # named parameter entirely for images without a caption; adding an ex-
5540 # plicit caption= parameter and preserving the old magic unnamed para-
5541 # meter for BC; ...
5542 if ( $imageIsFramed ) { # Framed image
5543 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5544 # No caption or alt text, add the filename as the alt text so
5545 # that screen readers at least get some description of the image
5546 $params['frame']['alt'] = $title->getText();
5547 }
5548 # Do not set $params['frame']['title'] because tooltips don't make sense
5549 # for framed images
5550 } else { # Inline image
5551 if ( !isset( $params['frame']['alt'] ) ) {
5552 # No alt text, use the "caption" for the alt text
5553 if ( $caption !== '' ) {
5554 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5555 } else {
5556 # No caption, fall back to using the filename for the
5557 # alt text
5558 $params['frame']['alt'] = $title->getText();
5559 }
5560 }
5561 # Use the "caption" for the tooltip text
5562 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5563 }
5564 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5565
5566 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5567
5568 # Linker does the rest
5569 $time = $options['time'] ?? false;
5570 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5571 $time, $descQuery, $this->mOptions->getThumbSize() );
5572
5573 # Give the handler a chance to modify the parser object
5574 if ( $handler ) {
5575 $handler->parserTransformHook( $this, $file );
5576 }
5577
5578 return $ret;
5579 }
5580
5581 /**
5582 * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5583 *
5584 * Adds an entry to appropriate link tables.
5585 *
5586 * @since 1.32
5587 * @param string $value
5588 * @return array of `[ type, target ]`, where:
5589 * - `type` is one of:
5590 * - `null`: Given value is not a valid link target, use default
5591 * - `'no-link'`: Given value is empty, do not generate a link
5592 * - `'link-url'`: Given value is a valid external link
5593 * - `'link-title'`: Given value is a valid internal link
5594 * - `target` is:
5595 * - When `type` is `null` or `'no-link'`: `false`
5596 * - When `type` is `'link-url'`: URL string corresponding to given value
5597 * - When `type` is `'link-title'`: Title object corresponding to given value
5598 */
5599 public function parseLinkParameter( $value ) {
5600 $chars = self::EXT_LINK_URL_CLASS;
5601 $addr = self::EXT_LINK_ADDR;
5602 $prots = $this->mUrlProtocols;
5603 $type = null;
5604 $target = false;
5605 if ( $value === '' ) {
5606 $type = 'no-link';
5607 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5608 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5609 $this->mOutput->addExternalLink( $value );
5610 $type = 'link-url';
5611 $target = $value;
5612 }
5613 } else {
5614 $linkTitle = Title::newFromText( $value );
5615 if ( $linkTitle ) {
5616 $this->mOutput->addLink( $linkTitle );
5617 $type = 'link-title';
5618 $target = $linkTitle;
5619 }
5620 }
5621 return [ $type, $target ];
5622 }
5623
5624 /**
5625 * @param string $caption
5626 * @param LinkHolderArray|bool $holders
5627 * @return mixed|string
5628 */
5629 protected function stripAltText( $caption, $holders ) {
5630 # Strip bad stuff out of the title (tooltip). We can't just use
5631 # replaceLinkHoldersText() here, because if this function is called
5632 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5633 if ( $holders ) {
5634 $tooltip = $holders->replaceText( $caption );
5635 } else {
5636 $tooltip = $this->replaceLinkHoldersText( $caption );
5637 }
5638
5639 # make sure there are no placeholders in thumbnail attributes
5640 # that are later expanded to html- so expand them now and
5641 # remove the tags
5642 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5643 # Compatibility hack! In HTML certain entity references not terminated
5644 # by a semicolon are decoded (but not if we're in an attribute; that's
5645 # how link URLs get away without properly escaping & in queries).
5646 # But wikitext has always required semicolon-termination of entities,
5647 # so encode & where needed to avoid decode of semicolon-less entities.
5648 # See T209236 and
5649 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5650 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5651 $tooltip = preg_replace( "/
5652 & # 1. entity prefix
5653 (?= # 2. followed by:
5654 (?: # a. one of the legacy semicolon-less named entities
5655 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5656 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5657 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5658 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5659 U(?:acute|circ|grave|uml)|Yacute|
5660 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5661 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5662 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5663 frac(?:1(?:2|4)|34)|
5664 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5665 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5666 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5667 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5668 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5669 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5670 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5671 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5672 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5673 )
5674 (?:[^;]|$)) # b. and not followed by a semicolon
5675 # S = study, for efficiency
5676 /Sx", '&amp;', $tooltip );
5677 $tooltip = Sanitizer::stripAllTags( $tooltip );
5678
5679 return $tooltip;
5680 }
5681
5682 /**
5683 * Set a flag in the output object indicating that the content is dynamic and
5684 * shouldn't be cached.
5685 * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5686 */
5687 public function disableCache() {
5688 $this->logger->debug( "Parser output marked as uncacheable." );
5689 if ( !$this->mOutput ) {
5690 throw new MWException( __METHOD__ .
5691 " can only be called when actually parsing something" );
5692 }
5693 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5694 }
5695
5696 /**
5697 * Callback from the Sanitizer for expanding items found in HTML attribute
5698 * values, so they can be safely tested and escaped.
5699 *
5700 * @param string &$text
5701 * @param bool|PPFrame $frame
5702 * @return string
5703 */
5704 public function attributeStripCallback( &$text, $frame = false ) {
5705 $text = $this->replaceVariables( $text, $frame );
5706 $text = $this->mStripState->unstripBoth( $text );
5707 return $text;
5708 }
5709
5710 /**
5711 * Accessor
5712 *
5713 * @return array
5714 */
5715 public function getTags() {
5716 $this->firstCallInit();
5717 return array_merge(
5718 array_keys( $this->mTransparentTagHooks ),
5719 array_keys( $this->mTagHooks ),
5720 array_keys( $this->mFunctionTagHooks )
5721 );
5722 }
5723
5724 /**
5725 * @since 1.32
5726 * @return array
5727 */
5728 public function getFunctionSynonyms() {
5729 $this->firstCallInit();
5730 return $this->mFunctionSynonyms;
5731 }
5732
5733 /**
5734 * @since 1.32
5735 * @return string
5736 */
5737 public function getUrlProtocols() {
5738 return $this->mUrlProtocols;
5739 }
5740
5741 /**
5742 * Replace transparent tags in $text with the values given by the callbacks.
5743 *
5744 * Transparent tag hooks are like regular XML-style tag hooks, except they
5745 * operate late in the transformation sequence, on HTML instead of wikitext.
5746 *
5747 * @param string $text
5748 *
5749 * @return string
5750 */
5751 public function replaceTransparentTags( $text ) {
5752 $matches = [];
5753 $elements = array_keys( $this->mTransparentTagHooks );
5754 $text = self::extractTagsAndParams( $elements, $text, $matches );
5755 $replacements = [];
5756
5757 foreach ( $matches as $marker => $data ) {
5758 list( $element, $content, $params, $tag ) = $data;
5759 $tagName = strtolower( $element );
5760 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5761 $output = call_user_func_array(
5762 $this->mTransparentTagHooks[$tagName],
5763 [ $content, $params, $this ]
5764 );
5765 } else {
5766 $output = $tag;
5767 }
5768 $replacements[$marker] = $output;
5769 }
5770 return strtr( $text, $replacements );
5771 }
5772
5773 /**
5774 * Break wikitext input into sections, and either pull or replace
5775 * some particular section's text.
5776 *
5777 * External callers should use the getSection and replaceSection methods.
5778 *
5779 * @param string $text Page wikitext
5780 * @param string|int $sectionId A section identifier string of the form:
5781 * "<flag1> - <flag2> - ... - <section number>"
5782 *
5783 * Currently the only recognised flag is "T", which means the target section number
5784 * was derived during a template inclusion parse, in other words this is a template
5785 * section edit link. If no flags are given, it was an ordinary section edit link.
5786 * This flag is required to avoid a section numbering mismatch when a section is
5787 * enclosed by "<includeonly>" (T8563).
5788 *
5789 * The section number 0 pulls the text before the first heading; other numbers will
5790 * pull the given section along with its lower-level subsections. If the section is
5791 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5792 *
5793 * Section 0 is always considered to exist, even if it only contains the empty
5794 * string. If $text is the empty string and section 0 is replaced, $newText is
5795 * returned.
5796 *
5797 * @param string $mode One of "get" or "replace"
5798 * @param string $newText Replacement text for section data.
5799 * @return string For "get", the extracted section text.
5800 * for "replace", the whole page with the section replaced.
5801 */
5802 private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5803 global $wgTitle; # not generally used but removes an ugly failure mode
5804
5805 $magicScopeVariable = $this->lock();
5806 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5807 $outText = '';
5808 $frame = $this->getPreprocessor()->newFrame();
5809
5810 # Process section extraction flags
5811 $flags = 0;
5812 $sectionParts = explode( '-', $sectionId );
5813 $sectionIndex = array_pop( $sectionParts );
5814 foreach ( $sectionParts as $part ) {
5815 if ( $part === 'T' ) {
5816 $flags |= self::PTD_FOR_INCLUSION;
5817 }
5818 }
5819
5820 # Check for empty input
5821 if ( strval( $text ) === '' ) {
5822 # Only sections 0 and T-0 exist in an empty document
5823 if ( $sectionIndex == 0 ) {
5824 if ( $mode === 'get' ) {
5825 return '';
5826 }
5827
5828 return $newText;
5829 } else {
5830 if ( $mode === 'get' ) {
5831 return $newText;
5832 }
5833
5834 return $text;
5835 }
5836 }
5837
5838 # Preprocess the text
5839 $root = $this->preprocessToDom( $text, $flags );
5840
5841 # <h> nodes indicate section breaks
5842 # They can only occur at the top level, so we can find them by iterating the root's children
5843 $node = $root->getFirstChild();
5844
5845 # Find the target section
5846 if ( $sectionIndex == 0 ) {
5847 # Section zero doesn't nest, level=big
5848 $targetLevel = 1000;
5849 } else {
5850 while ( $node ) {
5851 if ( $node->getName() === 'h' ) {
5852 $bits = $node->splitHeading();
5853 if ( $bits['i'] == $sectionIndex ) {
5854 $targetLevel = $bits['level'];
5855 break;
5856 }
5857 }
5858 if ( $mode === 'replace' ) {
5859 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5860 }
5861 $node = $node->getNextSibling();
5862 }
5863 }
5864
5865 if ( !$node ) {
5866 # Not found
5867 if ( $mode === 'get' ) {
5868 return $newText;
5869 } else {
5870 return $text;
5871 }
5872 }
5873
5874 # Find the end of the section, including nested sections
5875 do {
5876 if ( $node->getName() === 'h' ) {
5877 $bits = $node->splitHeading();
5878 $curLevel = $bits['level'];
5879 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5880 break;
5881 }
5882 }
5883 if ( $mode === 'get' ) {
5884 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5885 }
5886 $node = $node->getNextSibling();
5887 } while ( $node );
5888
5889 # Write out the remainder (in replace mode only)
5890 if ( $mode === 'replace' ) {
5891 # Output the replacement text
5892 # Add two newlines on -- trailing whitespace in $newText is conventionally
5893 # stripped by the editor, so we need both newlines to restore the paragraph gap
5894 # Only add trailing whitespace if there is newText
5895 if ( $newText != "" ) {
5896 $outText .= $newText . "\n\n";
5897 }
5898
5899 while ( $node ) {
5900 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5901 $node = $node->getNextSibling();
5902 }
5903 }
5904
5905 if ( is_string( $outText ) ) {
5906 # Re-insert stripped tags
5907 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5908 }
5909
5910 return $outText;
5911 }
5912
5913 /**
5914 * This function returns the text of a section, specified by a number ($section).
5915 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5916 * the first section before any such heading (section 0).
5917 *
5918 * If a section contains subsections, these are also returned.
5919 *
5920 * @param string $text Text to look in
5921 * @param string|int $sectionId Section identifier as a number or string
5922 * (e.g. 0, 1 or 'T-1').
5923 * @param string $defaultText Default to return if section is not found
5924 *
5925 * @return string Text of the requested section
5926 */
5927 public function getSection( $text, $sectionId, $defaultText = '' ) {
5928 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5929 }
5930
5931 /**
5932 * This function returns $oldtext after the content of the section
5933 * specified by $section has been replaced with $text. If the target
5934 * section does not exist, $oldtext is returned unchanged.
5935 *
5936 * @param string $oldText Former text of the article
5937 * @param string|int $sectionId Section identifier as a number or string
5938 * (e.g. 0, 1 or 'T-1').
5939 * @param string $newText Replacing text
5940 *
5941 * @return string Modified text
5942 */
5943 public function replaceSection( $oldText, $sectionId, $newText ) {
5944 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5945 }
5946
5947 /**
5948 * Get the ID of the revision we are parsing
5949 *
5950 * The return value will be either:
5951 * - a) Positive, indicating a specific revision ID (current or old)
5952 * - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5953 * - c) Null, meaning the parse is for preview mode and there is no revision
5954 *
5955 * @return int|null
5956 */
5957 public function getRevisionId() {
5958 return $this->mRevisionId;
5959 }
5960
5961 /**
5962 * Get the revision object for $this->mRevisionId
5963 *
5964 * @return Revision|null Either a Revision object or null
5965 * @since 1.23 (public since 1.23)
5966 */
5967 public function getRevisionObject() {
5968 if ( $this->mRevisionObject ) {
5969 return $this->mRevisionObject;
5970 }
5971
5972 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5973 // This is useful when parsing a revision that has not yet been saved.
5974 // However, if we get back a saved revision even though we are in
5975 // preview mode, we'll have to ignore it, see below.
5976 // NOTE: This callback may be used to inject an OLD revision that was
5977 // already loaded, so "current" is a bit of a misnomer. We can't just
5978 // skip it if mRevisionId is set.
5979 $rev = call_user_func(
5980 $this->mOptions->getCurrentRevisionCallback(),
5981 $this->getTitle(),
5982 $this
5983 );
5984
5985 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
5986 // We are in preview mode (mRevisionId is null), and the current revision callback
5987 // returned an existing revision. Ignore it and return null, it's probably the page's
5988 // current revision, which is not what we want here. Note that we do want to call the
5989 // callback to allow the unsaved revision to be injected here, e.g. for
5990 // self-transclusion previews.
5991 return null;
5992 }
5993
5994 // If the parse is for a new revision, then the callback should have
5995 // already been set to force the object and should match mRevisionId.
5996 // If not, try to fetch by mRevisionId for sanity.
5997 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
5998 $rev = Revision::newFromId( $this->mRevisionId );
5999 }
6000
6001 $this->mRevisionObject = $rev;
6002
6003 return $this->mRevisionObject;
6004 }
6005
6006 /**
6007 * Get the timestamp associated with the current revision, adjusted for
6008 * the default server-local timestamp
6009 * @return string TS_MW timestamp
6010 */
6011 public function getRevisionTimestamp() {
6012 if ( $this->mRevisionTimestamp !== null ) {
6013 return $this->mRevisionTimestamp;
6014 }
6015
6016 # Use specified revision timestamp, falling back to the current timestamp
6017 $revObject = $this->getRevisionObject();
6018 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6019 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6020
6021 # The cryptic '' timezone parameter tells to use the site-default
6022 # timezone offset instead of the user settings.
6023 # Since this value will be saved into the parser cache, served
6024 # to other users, and potentially even used inside links and such,
6025 # it needs to be consistent for all visitors.
6026 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6027
6028 return $this->mRevisionTimestamp;
6029 }
6030
6031 /**
6032 * Get the name of the user that edited the last revision
6033 *
6034 * @return string User name
6035 */
6036 public function getRevisionUser() {
6037 if ( is_null( $this->mRevisionUser ) ) {
6038 $revObject = $this->getRevisionObject();
6039
6040 # if this template is subst: the revision id will be blank,
6041 # so just use the current user's name
6042 if ( $revObject ) {
6043 $this->mRevisionUser = $revObject->getUserText();
6044 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6045 $this->mRevisionUser = $this->getUser()->getName();
6046 }
6047 }
6048 return $this->mRevisionUser;
6049 }
6050
6051 /**
6052 * Get the size of the revision
6053 *
6054 * @return int|null Revision size
6055 */
6056 public function getRevisionSize() {
6057 if ( is_null( $this->mRevisionSize ) ) {
6058 $revObject = $this->getRevisionObject();
6059
6060 # if this variable is subst: the revision id will be blank,
6061 # so just use the parser input size, because the own substituation
6062 # will change the size.
6063 if ( $revObject ) {
6064 $this->mRevisionSize = $revObject->getSize();
6065 } else {
6066 $this->mRevisionSize = $this->mInputSize;
6067 }
6068 }
6069 return $this->mRevisionSize;
6070 }
6071
6072 /**
6073 * Mutator for $mDefaultSort
6074 *
6075 * @param string $sort New value
6076 */
6077 public function setDefaultSort( $sort ) {
6078 $this->mDefaultSort = $sort;
6079 $this->mOutput->setProperty( 'defaultsort', $sort );
6080 }
6081
6082 /**
6083 * Accessor for $mDefaultSort
6084 * Will use the empty string if none is set.
6085 *
6086 * This value is treated as a prefix, so the
6087 * empty string is equivalent to sorting by
6088 * page name.
6089 *
6090 * @return string
6091 */
6092 public function getDefaultSort() {
6093 if ( $this->mDefaultSort !== false ) {
6094 return $this->mDefaultSort;
6095 } else {
6096 return '';
6097 }
6098 }
6099
6100 /**
6101 * Accessor for $mDefaultSort
6102 * Unlike getDefaultSort(), will return false if none is set
6103 *
6104 * @return string|bool
6105 */
6106 public function getCustomDefaultSort() {
6107 return $this->mDefaultSort;
6108 }
6109
6110 private static function getSectionNameFromStrippedText( $text ) {
6111 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6112 $text = Sanitizer::decodeCharReferences( $text );
6113 $text = self::normalizeSectionName( $text );
6114 return $text;
6115 }
6116
6117 private static function makeAnchor( $sectionName ) {
6118 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6119 }
6120
6121 private function makeLegacyAnchor( $sectionName ) {
6122 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6123 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6124 // ForAttribute() and ForLink() are the same for legacy encoding
6125 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6126 } else {
6127 $id = Sanitizer::escapeIdForLink( $sectionName );
6128 }
6129
6130 return "#$id";
6131 }
6132
6133 /**
6134 * Try to guess the section anchor name based on a wikitext fragment
6135 * presumably extracted from a heading, for example "Header" from
6136 * "== Header ==".
6137 *
6138 * @param string $text
6139 * @return string Anchor (starting with '#')
6140 */
6141 public function guessSectionNameFromWikiText( $text ) {
6142 # Strip out wikitext links(they break the anchor)
6143 $text = $this->stripSectionName( $text );
6144 $sectionName = self::getSectionNameFromStrippedText( $text );
6145 return self::makeAnchor( $sectionName );
6146 }
6147
6148 /**
6149 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6150 * instead, if possible. For use in redirects, since various versions
6151 * of Microsoft browsers interpret Location: headers as something other
6152 * than UTF-8, resulting in breakage.
6153 *
6154 * @param string $text The section name
6155 * @return string Anchor (starting with '#')
6156 */
6157 public function guessLegacySectionNameFromWikiText( $text ) {
6158 # Strip out wikitext links(they break the anchor)
6159 $text = $this->stripSectionName( $text );
6160 $sectionName = self::getSectionNameFromStrippedText( $text );
6161 return $this->makeLegacyAnchor( $sectionName );
6162 }
6163
6164 /**
6165 * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6166 * @param string $text Section name (plain text)
6167 * @return string Anchor (starting with '#')
6168 */
6169 public static function guessSectionNameFromStrippedText( $text ) {
6170 $sectionName = self::getSectionNameFromStrippedText( $text );
6171 return self::makeAnchor( $sectionName );
6172 }
6173
6174 /**
6175 * Apply the same normalization as code making links to this section would
6176 *
6177 * @param string $text
6178 * @return string
6179 */
6180 private static function normalizeSectionName( $text ) {
6181 # T90902: ensure the same normalization is applied for IDs as to links
6182 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6183 try {
6184
6185 $parts = $titleParser->splitTitleString( "#$text" );
6186 } catch ( MalformedTitleException $ex ) {
6187 return $text;
6188 }
6189 return $parts['fragment'];
6190 }
6191
6192 /**
6193 * Strips a text string of wikitext for use in a section anchor
6194 *
6195 * Accepts a text string and then removes all wikitext from the
6196 * string and leaves only the resultant text (i.e. the result of
6197 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6198 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6199 * to create valid section anchors by mimicing the output of the
6200 * parser when headings are parsed.
6201 *
6202 * @param string $text Text string to be stripped of wikitext
6203 * for use in a Section anchor
6204 * @return string Filtered text string
6205 */
6206 public function stripSectionName( $text ) {
6207 # Strip internal link markup
6208 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6209 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6210
6211 # Strip external link markup
6212 # @todo FIXME: Not tolerant to blank link text
6213 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6214 # on how many empty links there are on the page - need to figure that out.
6215 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6216
6217 # Parse wikitext quotes (italics & bold)
6218 $text = $this->doQuotes( $text );
6219
6220 # Strip HTML tags
6221 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6222 return $text;
6223 }
6224
6225 /**
6226 * strip/replaceVariables/unstrip for preprocessor regression testing
6227 *
6228 * @param string $text
6229 * @param Title $title
6230 * @param ParserOptions $options
6231 * @param int $outputType
6232 *
6233 * @return string
6234 */
6235 public function testSrvus( $text, Title $title, ParserOptions $options,
6236 $outputType = self::OT_HTML
6237 ) {
6238 $magicScopeVariable = $this->lock();
6239 $this->startParse( $title, $options, $outputType, true );
6240
6241 $text = $this->replaceVariables( $text );
6242 $text = $this->mStripState->unstripBoth( $text );
6243 $text = Sanitizer::removeHTMLtags( $text );
6244 return $text;
6245 }
6246
6247 /**
6248 * @param string $text
6249 * @param Title $title
6250 * @param ParserOptions $options
6251 * @return string
6252 */
6253 public function testPst( $text, Title $title, ParserOptions $options ) {
6254 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6255 }
6256
6257 /**
6258 * @param string $text
6259 * @param Title $title
6260 * @param ParserOptions $options
6261 * @return string
6262 */
6263 public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6264 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6265 }
6266
6267 /**
6268 * Call a callback function on all regions of the given text that are not
6269 * inside strip markers, and replace those regions with the return value
6270 * of the callback. For example, with input:
6271 *
6272 * aaa<MARKER>bbb
6273 *
6274 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6275 * two strings will be replaced with the value returned by the callback in
6276 * each case.
6277 *
6278 * @param string $s
6279 * @param callable $callback
6280 *
6281 * @return string
6282 */
6283 public function markerSkipCallback( $s, $callback ) {
6284 $i = 0;
6285 $out = '';
6286 while ( $i < strlen( $s ) ) {
6287 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6288 if ( $markerStart === false ) {
6289 $out .= call_user_func( $callback, substr( $s, $i ) );
6290 break;
6291 } else {
6292 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6293 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6294 if ( $markerEnd === false ) {
6295 $out .= substr( $s, $markerStart );
6296 break;
6297 } else {
6298 $markerEnd += strlen( self::MARKER_SUFFIX );
6299 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6300 $i = $markerEnd;
6301 }
6302 }
6303 }
6304 return $out;
6305 }
6306
6307 /**
6308 * Remove any strip markers found in the given text.
6309 *
6310 * @param string $text
6311 * @return string
6312 */
6313 public function killMarkers( $text ) {
6314 return $this->mStripState->killMarkers( $text );
6315 }
6316
6317 /**
6318 * Save the parser state required to convert the given half-parsed text to
6319 * HTML. "Half-parsed" in this context means the output of
6320 * recursiveTagParse() or internalParse(). This output has strip markers
6321 * from replaceVariables (extensionSubstitution() etc.), and link
6322 * placeholders from replaceLinkHolders().
6323 *
6324 * Returns an array which can be serialized and stored persistently. This
6325 * array can later be loaded into another parser instance with
6326 * unserializeHalfParsedText(). The text can then be safely incorporated into
6327 * the return value of a parser hook.
6328 *
6329 * @deprecated since 1.31
6330 * @param string $text
6331 *
6332 * @return array
6333 */
6334 public function serializeHalfParsedText( $text ) {
6335 wfDeprecated( __METHOD__, '1.31' );
6336 $data = [
6337 'text' => $text,
6338 'version' => self::HALF_PARSED_VERSION,
6339 'stripState' => $this->mStripState->getSubState( $text ),
6340 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6341 ];
6342 return $data;
6343 }
6344
6345 /**
6346 * Load the parser state given in the $data array, which is assumed to
6347 * have been generated by serializeHalfParsedText(). The text contents is
6348 * extracted from the array, and its markers are transformed into markers
6349 * appropriate for the current Parser instance. This transformed text is
6350 * returned, and can be safely included in the return value of a parser
6351 * hook.
6352 *
6353 * If the $data array has been stored persistently, the caller should first
6354 * check whether it is still valid, by calling isValidHalfParsedText().
6355 *
6356 * @deprecated since 1.31
6357 * @param array $data Serialized data
6358 * @throws MWException
6359 * @return string
6360 */
6361 public function unserializeHalfParsedText( $data ) {
6362 wfDeprecated( __METHOD__, '1.31' );
6363 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6364 throw new MWException( __METHOD__ . ': invalid version' );
6365 }
6366
6367 # First, extract the strip state.
6368 $texts = [ $data['text'] ];
6369 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6370
6371 # Now renumber links
6372 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6373
6374 # Should be good to go.
6375 return $texts[0];
6376 }
6377
6378 /**
6379 * Returns true if the given array, presumed to be generated by
6380 * serializeHalfParsedText(), is compatible with the current version of the
6381 * parser.
6382 *
6383 * @deprecated since 1.31
6384 * @param array $data
6385 *
6386 * @return bool
6387 */
6388 public function isValidHalfParsedText( $data ) {
6389 wfDeprecated( __METHOD__, '1.31' );
6390 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6391 }
6392
6393 /**
6394 * Parsed a width param of imagelink like 300px or 200x300px
6395 *
6396 * @param string $value
6397 * @param bool $parseHeight
6398 *
6399 * @return array
6400 * @since 1.20
6401 */
6402 public static function parseWidthParam( $value, $parseHeight = true ) {
6403 $parsedWidthParam = [];
6404 if ( $value === '' ) {
6405 return $parsedWidthParam;
6406 }
6407 $m = [];
6408 # (T15500) In both cases (width/height and width only),
6409 # permit trailing "px" for backward compatibility.
6410 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6411 $width = intval( $m[1] );
6412 $height = intval( $m[2] );
6413 $parsedWidthParam['width'] = $width;
6414 $parsedWidthParam['height'] = $height;
6415 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6416 $width = intval( $value );
6417 $parsedWidthParam['width'] = $width;
6418 }
6419 return $parsedWidthParam;
6420 }
6421
6422 /**
6423 * Lock the current instance of the parser.
6424 *
6425 * This is meant to stop someone from calling the parser
6426 * recursively and messing up all the strip state.
6427 *
6428 * @throws MWException If parser is in a parse
6429 * @return ScopedCallback The lock will be released once the return value goes out of scope.
6430 */
6431 protected function lock() {
6432 if ( $this->mInParse ) {
6433 throw new MWException( "Parser state cleared while parsing. "
6434 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6435 }
6436
6437 // Save the backtrace when locking, so that if some code tries locking again,
6438 // we can print the lock owner's backtrace for easier debugging
6439 $e = new Exception;
6440 $this->mInParse = $e->getTraceAsString();
6441
6442 $recursiveCheck = new ScopedCallback( function () {
6443 $this->mInParse = false;
6444 } );
6445
6446 return $recursiveCheck;
6447 }
6448
6449 /**
6450 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6451 *
6452 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6453 * or if there is more than one <p/> tag in the input HTML.
6454 *
6455 * @param string $html
6456 * @return string
6457 * @since 1.24
6458 */
6459 public static function stripOuterParagraph( $html ) {
6460 $m = [];
6461 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6462 $html = $m[1];
6463 }
6464
6465 return $html;
6466 }
6467
6468 /**
6469 * Return this parser if it is not doing anything, otherwise
6470 * get a fresh parser. You can use this method by doing
6471 * $newParser = $oldParser->getFreshParser(), or more simply
6472 * $oldParser->getFreshParser()->parse( ... );
6473 * if you're unsure if $oldParser is safe to use.
6474 *
6475 * @since 1.24
6476 * @return Parser A parser object that is not parsing anything
6477 */
6478 public function getFreshParser() {
6479 if ( $this->mInParse ) {
6480 return $this->factory->create();
6481 } else {
6482 return $this;
6483 }
6484 }
6485
6486 /**
6487 * Set's up the PHP implementation of OOUI for use in this request
6488 * and instructs OutputPage to enable OOUI for itself.
6489 *
6490 * @since 1.26
6491 */
6492 public function enableOOUI() {
6493 OutputPage::setupOOUI();
6494 $this->mOutput->setEnableOOUI( true );
6495 }
6496
6497 /**
6498 * @param string $flag
6499 * @param string $reason
6500 */
6501 protected function setOutputFlag( $flag, $reason ) {
6502 $this->mOutput->setFlag( $flag );
6503 $name = $this->mTitle->getPrefixedText();
6504 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6505 }
6506 }