Deprecate setting Parser::mTitle to null
[lhc/web/wiklou.git] / includes / parser / Parser.php
1 <?php
2 /**
3 * PHP parser that converts wiki markup to HTML.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23 use MediaWiki\BadFileLookup;
24 use MediaWiki\Config\ServiceOptions;
25 use MediaWiki\Linker\LinkRenderer;
26 use MediaWiki\Linker\LinkRendererFactory;
27 use MediaWiki\Linker\LinkTarget;
28 use MediaWiki\MediaWikiServices;
29 use MediaWiki\Special\SpecialPageFactory;
30 use Psr\Log\NullLogger;
31 use Wikimedia\ScopedCallback;
32 use Psr\Log\LoggerInterface;
33
34 /**
35 * @defgroup Parser Parser
36 */
37
38 /**
39 * PHP Parser - Processes wiki markup (which uses a more user-friendly
40 * syntax, such as "[[link]]" for making links), and provides a one-way
41 * transformation of that wiki markup it into (X)HTML output / markup
42 * (which in turn the browser understands, and can display).
43 *
44 * There are seven main entry points into the Parser class:
45 *
46 * - Parser::parse()
47 * produces HTML output
48 * - Parser::preSaveTransform()
49 * produces altered wiki markup
50 * - Parser::preprocess()
51 * removes HTML comments and expands templates
52 * - Parser::cleanSig() and Parser::cleanSigInSig()
53 * cleans a signature before saving it to preferences
54 * - Parser::getSection()
55 * return the content of a section from an article for section editing
56 * - Parser::replaceSection()
57 * replaces a section by number inside an article
58 * - Parser::getPreloadText()
59 * removes <noinclude> sections and <includeonly> tags
60 *
61 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
62 *
63 * @par Settings:
64 * $wgNamespacesWithSubpages
65 *
66 * @par Settings only within ParserOptions:
67 * $wgAllowExternalImages
68 * $wgAllowSpecialInclusion
69 * $wgInterwikiMagic
70 * $wgMaxArticleSize
71 *
72 * @ingroup Parser
73 */
74 class Parser {
75 /**
76 * Update this version number when the ParserOutput format
77 * changes in an incompatible way, so the parser cache
78 * can automatically discard old data.
79 */
80 const VERSION = '1.6.4';
81
82 /**
83 * Update this version number when the output of serialiseHalfParsedText()
84 * changes in an incompatible way
85 */
86 const HALF_PARSED_VERSION = 2;
87
88 # Flags for Parser::setFunctionHook
89 const SFH_NO_HASH = 1;
90 const SFH_OBJECT_ARGS = 2;
91
92 # Constants needed for external link processing
93 # Everything except bracket, space, or control characters
94 # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
95 # as well as U+3000 is IDEOGRAPHIC SPACE for T21052
96 # \x{FFFD} is the Unicode replacement character, which Preprocessor_DOM
97 # uses to replace invalid HTML characters.
98 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
99 # Simplified expression to match an IPv4 or IPv6 address, or
100 # at least one character of a host name (embeds EXT_LINK_URL_CLASS)
101 const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
102 # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
103 // phpcs:ignore Generic.Files.LineLength
104 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
105 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
106
107 # Regular expression for a non-newline space
108 const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
109
110 # Flags for preprocessToDom
111 const PTD_FOR_INCLUSION = 1;
112
113 # Allowed values for $this->mOutputType
114 # Parameter to startExternalParse().
115 const OT_HTML = 1; # like parse()
116 const OT_WIKI = 2; # like preSaveTransform()
117 const OT_PREPROCESS = 3; # like preprocess()
118 const OT_MSG = 3;
119 const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
120
121 /**
122 * @var string Prefix and suffix for temporary replacement strings
123 * for the multipass parser.
124 *
125 * \x7f should never appear in input as it's disallowed in XML.
126 * Using it at the front also gives us a little extra robustness
127 * since it shouldn't match when butted up against identifier-like
128 * string constructs.
129 *
130 * Must not consist of all title characters, or else it will change
131 * the behavior of <nowiki> in a link.
132 *
133 * Must have a character that needs escaping in attributes, otherwise
134 * someone could put a strip marker in an attribute, to get around
135 * escaping quote marks, and break out of the attribute. Thus we add
136 * `'".
137 */
138 const MARKER_SUFFIX = "-QINU`\"'\x7f";
139 const MARKER_PREFIX = "\x7f'\"`UNIQ-";
140
141 # Markers used for wrapping the table of contents
142 const TOC_START = '<mw:toc>';
143 const TOC_END = '</mw:toc>';
144
145 /** @var int Assume that no output will later be saved this many seconds after parsing */
146 const MAX_TTS = 900;
147
148 # Persistent:
149 public $mTagHooks = [];
150 public $mTransparentTagHooks = [];
151 public $mFunctionHooks = [];
152 public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
153 public $mFunctionTagHooks = [];
154 public $mStripList = [];
155 public $mDefaultStripList = [];
156 public $mVarCache = [];
157 public $mImageParams = [];
158 public $mImageParamsMagicArray = [];
159 public $mMarkerIndex = 0;
160 /**
161 * @var bool Whether firstCallInit still needs to be called
162 */
163 public $mFirstCall = true;
164
165 # Initialised by initialiseVariables()
166
167 /**
168 * @var MagicWordArray
169 */
170 public $mVariables;
171
172 /**
173 * @var MagicWordArray
174 */
175 public $mSubstWords;
176
177 /**
178 * @deprecated since 1.34, there should be no need to use this
179 * @var array
180 */
181 public $mConf;
182
183 # Initialised in constructor
184 public $mExtLinkBracketedRegex, $mUrlProtocols;
185
186 # Initialized in getPreprocessor()
187 /** @var Preprocessor */
188 public $mPreprocessor;
189
190 # Cleared with clearState():
191 /**
192 * @var ParserOutput
193 */
194 public $mOutput;
195 public $mAutonumber;
196
197 /**
198 * @var StripState
199 */
200 public $mStripState;
201
202 public $mIncludeCount;
203 /**
204 * @var LinkHolderArray
205 */
206 public $mLinkHolders;
207
208 public $mLinkID;
209 public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
210 public $mDefaultSort;
211 public $mTplRedirCache, $mHeadings, $mDoubleUnderscores;
212 public $mExpensiveFunctionCount; # number of expensive parser function calls
213 public $mShowToc, $mForceTocPosition;
214 /** @var array */
215 public $mTplDomCache;
216
217 /**
218 * @var User
219 */
220 public $mUser; # User object; only used when doing pre-save transform
221
222 # Temporary
223 # These are variables reset at least once per parse regardless of $clearState
224
225 /**
226 * @var ParserOptions
227 */
228 public $mOptions;
229
230 /**
231 * Since 1.34, leaving `mTitle` uninitialized or setting `mTitle` to
232 * `null` is deprecated.
233 *
234 * @internal
235 * @var Title|null
236 */
237 public $mTitle; # Title context, used for self-link rendering and similar things
238 public $mOutputType; # Output type, one of the OT_xxx constants
239 public $ot; # Shortcut alias, see setOutputType()
240 public $mRevisionObject; # The revision object of the specified revision ID
241 public $mRevisionId; # ID to display in {{REVISIONID}} tags
242 public $mRevisionTimestamp; # The timestamp of the specified revision ID
243 public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
244 public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
245 public $mRevIdForTs; # The revision ID which was used to fetch the timestamp
246 public $mInputSize = false; # For {{PAGESIZE}} on current page.
247
248 /**
249 * @var array Array with the language name of each language link (i.e. the
250 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
251 * duplicate language links to the ParserOutput.
252 */
253 public $mLangLinkLanguages;
254
255 /**
256 * @var MapCacheLRU|null
257 * @since 1.24
258 *
259 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
260 */
261 public $currentRevisionCache;
262
263 /**
264 * @var bool|string Recursive call protection.
265 * This variable should be treated as if it were private.
266 */
267 public $mInParse = false;
268
269 /** @var SectionProfiler */
270 protected $mProfiler;
271
272 /**
273 * @var LinkRenderer
274 */
275 protected $mLinkRenderer;
276
277 /** @var MagicWordFactory */
278 private $magicWordFactory;
279
280 /** @var Language */
281 private $contLang;
282
283 /** @var ParserFactory */
284 private $factory;
285
286 /** @var SpecialPageFactory */
287 private $specialPageFactory;
288
289 /**
290 * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
291 * $mOptions, which is public and widely used, and also with the local variable $options used
292 * for ParserOptions throughout this file.
293 *
294 * @var ServiceOptions
295 */
296 private $svcOptions;
297
298 /** @var LinkRendererFactory */
299 private $linkRendererFactory;
300
301 /** @var NamespaceInfo */
302 private $nsInfo;
303
304 /** @var LoggerInterface */
305 private $logger;
306
307 /** @var BadFileLookup */
308 private $badFileLookup;
309
310 /**
311 * TODO Make this a const when HHVM support is dropped (T192166)
312 *
313 * @var array
314 * @since 1.33
315 */
316 public static $constructorOptions = [
317 // See $wgParserConf documentation
318 'class',
319 'preprocessorClass',
320 // See documentation for the corresponding config options
321 'ArticlePath',
322 'EnableScaryTranscluding',
323 'ExtraInterlanguageLinkPrefixes',
324 'FragmentMode',
325 'LanguageCode',
326 'MaxSigChars',
327 'MaxTocLevel',
328 'MiserMode',
329 'ScriptPath',
330 'Server',
331 'ServerName',
332 'ShowHostnames',
333 'Sitename',
334 'StylePath',
335 'TranscludeCacheExpiry',
336 ];
337
338 /**
339 * Constructing parsers directly is deprecated! Use a ParserFactory.
340 *
341 * @param ServiceOptions|null $svcOptions
342 * @param MagicWordFactory|null $magicWordFactory
343 * @param Language|null $contLang Content language
344 * @param ParserFactory|null $factory
345 * @param string|null $urlProtocols As returned from wfUrlProtocols()
346 * @param SpecialPageFactory|null $spFactory
347 * @param LinkRendererFactory|null $linkRendererFactory
348 * @param NamespaceInfo|null $nsInfo
349 * @param LoggerInterface|null $logger
350 * @param BadFileLookup|null $badFileLookup
351 */
352 public function __construct(
353 $svcOptions = null,
354 MagicWordFactory $magicWordFactory = null,
355 Language $contLang = null,
356 ParserFactory $factory = null,
357 $urlProtocols = null,
358 SpecialPageFactory $spFactory = null,
359 $linkRendererFactory = null,
360 $nsInfo = null,
361 $logger = null,
362 BadFileLookup $badFileLookup = null
363 ) {
364 if ( !$svcOptions || is_array( $svcOptions ) ) {
365 // Pre-1.34 calling convention is the first parameter is just ParserConf, the seventh is
366 // Config, and the eighth is LinkRendererFactory.
367 $this->mConf = (array)$svcOptions;
368 if ( empty( $this->mConf['class'] ) ) {
369 $this->mConf['class'] = self::class;
370 }
371 if ( empty( $this->mConf['preprocessorClass'] ) ) {
372 $this->mConf['preprocessorClass'] = self::getDefaultPreprocessorClass();
373 }
374 $this->svcOptions = new ServiceOptions( self::$constructorOptions,
375 $this->mConf, func_num_args() > 6
376 ? func_get_arg( 6 ) : MediaWikiServices::getInstance()->getMainConfig()
377 );
378 $linkRendererFactory = func_num_args() > 7 ? func_get_arg( 7 ) : null;
379 $nsInfo = func_num_args() > 8 ? func_get_arg( 8 ) : null;
380 } else {
381 // New calling convention
382 $svcOptions->assertRequiredOptions( self::$constructorOptions );
383 // $this->mConf is public, so we'll keep those two options there as well for
384 // compatibility until it's removed
385 $this->mConf = [
386 'class' => $svcOptions->get( 'class' ),
387 'preprocessorClass' => $svcOptions->get( 'preprocessorClass' ),
388 ];
389 $this->svcOptions = $svcOptions;
390 }
391
392 $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
393 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
394 self::EXT_LINK_ADDR .
395 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
396
397 $this->magicWordFactory = $magicWordFactory ??
398 MediaWikiServices::getInstance()->getMagicWordFactory();
399
400 $this->contLang = $contLang ?? MediaWikiServices::getInstance()->getContentLanguage();
401
402 $this->factory = $factory ?? MediaWikiServices::getInstance()->getParserFactory();
403 $this->specialPageFactory = $spFactory ??
404 MediaWikiServices::getInstance()->getSpecialPageFactory();
405 $this->linkRendererFactory = $linkRendererFactory ??
406 MediaWikiServices::getInstance()->getLinkRendererFactory();
407 $this->nsInfo = $nsInfo ?? MediaWikiServices::getInstance()->getNamespaceInfo();
408 $this->logger = $logger ?: new NullLogger();
409 $this->badFileLookup = $badFileLookup ??
410 MediaWikiServices::getInstance()->getBadFileLookup();
411 }
412
413 /**
414 * Reduce memory usage to reduce the impact of circular references
415 */
416 public function __destruct() {
417 if ( isset( $this->mLinkHolders ) ) {
418 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
419 unset( $this->mLinkHolders );
420 }
421 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
422 foreach ( $this as $name => $value ) {
423 unset( $this->$name );
424 }
425 }
426
427 /**
428 * Allow extensions to clean up when the parser is cloned
429 */
430 public function __clone() {
431 $this->mInParse = false;
432
433 // T58226: When you create a reference "to" an object field, that
434 // makes the object field itself be a reference too (until the other
435 // reference goes out of scope). When cloning, any field that's a
436 // reference is copied as a reference in the new object. Both of these
437 // are defined PHP5 behaviors, as inconvenient as it is for us when old
438 // hooks from PHP4 days are passing fields by reference.
439 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
440 // Make a non-reference copy of the field, then rebind the field to
441 // reference the new copy.
442 $tmp = $this->$k;
443 $this->$k =& $tmp;
444 unset( $tmp );
445 }
446
447 Hooks::run( 'ParserCloned', [ $this ] );
448 }
449
450 /**
451 * Which class should we use for the preprocessor if not otherwise specified?
452 *
453 * @since 1.34
454 * @deprecated since 1.34, removing configurability of preprocessor
455 * @return string
456 */
457 public static function getDefaultPreprocessorClass() {
458 return Preprocessor_Hash::class;
459 }
460
461 /**
462 * Do various kinds of initialisation on the first call of the parser
463 */
464 public function firstCallInit() {
465 if ( !$this->mFirstCall ) {
466 return;
467 }
468 $this->mFirstCall = false;
469
470 CoreParserFunctions::register( $this );
471 CoreTagHooks::register( $this );
472 $this->initialiseVariables();
473
474 // Avoid PHP 7.1 warning from passing $this by reference
475 $parser = $this;
476 Hooks::run( 'ParserFirstCallInit', [ &$parser ] );
477 }
478
479 /**
480 * Clear Parser state
481 *
482 * @private
483 */
484 public function clearState() {
485 $this->firstCallInit();
486 $this->resetOutput();
487 $this->mAutonumber = 0;
488 $this->mIncludeCount = [];
489 $this->mLinkHolders = new LinkHolderArray( $this );
490 $this->mLinkID = 0;
491 $this->mRevisionObject = $this->mRevisionTimestamp =
492 $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
493 $this->mVarCache = [];
494 $this->mUser = null;
495 $this->mLangLinkLanguages = [];
496 $this->currentRevisionCache = null;
497
498 $this->mStripState = new StripState( $this );
499
500 # Clear these on every parse, T6549
501 $this->mTplRedirCache = $this->mTplDomCache = [];
502
503 $this->mShowToc = true;
504 $this->mForceTocPosition = false;
505 $this->mIncludeSizes = [
506 'post-expand' => 0,
507 'arg' => 0,
508 ];
509 $this->mPPNodeCount = 0;
510 $this->mGeneratedPPNodeCount = 0;
511 $this->mHighestExpansionDepth = 0;
512 $this->mDefaultSort = false;
513 $this->mHeadings = [];
514 $this->mDoubleUnderscores = [];
515 $this->mExpensiveFunctionCount = 0;
516
517 # Fix cloning
518 if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
519 $this->mPreprocessor = null;
520 }
521
522 $this->mProfiler = new SectionProfiler();
523
524 // Avoid PHP 7.1 warning from passing $this by reference
525 $parser = $this;
526 Hooks::run( 'ParserClearState', [ &$parser ] );
527 }
528
529 /**
530 * Reset the ParserOutput
531 */
532 public function resetOutput() {
533 $this->mOutput = new ParserOutput;
534 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
535 }
536
537 /**
538 * Convert wikitext to HTML
539 * Do not call this function recursively.
540 *
541 * @param string $text Text we want to parse
542 * @param-taint $text escapes_htmlnoent
543 * @param Title $title
544 * @param ParserOptions $options
545 * @param bool $linestart
546 * @param bool $clearState
547 * @param int|null $revid ID of the revision being rendered. This is used to render
548 * REVISION* magic words. 0 means that any current revision will be used. Null means
549 * that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
550 * use the current timestamp.
551 * @return ParserOutput A ParserOutput
552 * @return-taint escaped
553 */
554 public function parse(
555 $text, Title $title, ParserOptions $options,
556 $linestart = true, $clearState = true, $revid = null
557 ) {
558 if ( $clearState ) {
559 // We use U+007F DELETE to construct strip markers, so we have to make
560 // sure that this character does not occur in the input text.
561 $text = strtr( $text, "\x7f", "?" );
562 $magicScopeVariable = $this->lock();
563 }
564 // Strip U+0000 NULL (T159174)
565 $text = str_replace( "\000", '', $text );
566
567 $this->startParse( $title, $options, self::OT_HTML, $clearState );
568
569 $this->currentRevisionCache = null;
570 $this->mInputSize = strlen( $text );
571 if ( $this->mOptions->getEnableLimitReport() ) {
572 $this->mOutput->resetParseStartTime();
573 }
574
575 $oldRevisionId = $this->mRevisionId;
576 $oldRevisionObject = $this->mRevisionObject;
577 $oldRevisionTimestamp = $this->mRevisionTimestamp;
578 $oldRevisionUser = $this->mRevisionUser;
579 $oldRevisionSize = $this->mRevisionSize;
580 if ( $revid !== null ) {
581 $this->mRevisionId = $revid;
582 $this->mRevisionObject = null;
583 $this->mRevisionTimestamp = null;
584 $this->mRevisionUser = null;
585 $this->mRevisionSize = null;
586 }
587
588 // Avoid PHP 7.1 warning from passing $this by reference
589 $parser = $this;
590 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
591 # No more strip!
592 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
593 $text = $this->internalParse( $text );
594 Hooks::run( 'ParserAfterParse', [ &$parser, &$text, &$this->mStripState ] );
595
596 $text = $this->internalParseHalfParsed( $text, true, $linestart );
597
598 /**
599 * A converted title will be provided in the output object if title and
600 * content conversion are enabled, the article text does not contain
601 * a conversion-suppressing double-underscore tag, and no
602 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
603 * automatic link conversion.
604 */
605 if ( !( $options->getDisableTitleConversion()
606 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
607 || isset( $this->mDoubleUnderscores['notitleconvert'] )
608 || $this->mOutput->getDisplayTitle() !== false )
609 ) {
610 $convruletitle = $this->getTargetLanguage()->getConvRuleTitle();
611 if ( $convruletitle ) {
612 $this->mOutput->setTitleText( $convruletitle );
613 } else {
614 $titleText = $this->getTargetLanguage()->convertTitle( $title );
615 $this->mOutput->setTitleText( $titleText );
616 }
617 }
618
619 # Compute runtime adaptive expiry if set
620 $this->mOutput->finalizeAdaptiveCacheExpiry();
621
622 # Warn if too many heavyweight parser functions were used
623 if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
624 $this->limitationWarn( 'expensive-parserfunction',
625 $this->mExpensiveFunctionCount,
626 $this->mOptions->getExpensiveParserFunctionLimit()
627 );
628 }
629
630 # Information on limits, for the benefit of users who try to skirt them
631 if ( $this->mOptions->getEnableLimitReport() ) {
632 $text .= $this->makeLimitReport();
633 }
634
635 # Wrap non-interface parser output in a <div> so it can be targeted
636 # with CSS (T37247)
637 $class = $this->mOptions->getWrapOutputClass();
638 if ( $class !== false && !$this->mOptions->getInterfaceMessage() ) {
639 $this->mOutput->addWrapperDivClass( $class );
640 }
641
642 $this->mOutput->setText( $text );
643
644 $this->mRevisionId = $oldRevisionId;
645 $this->mRevisionObject = $oldRevisionObject;
646 $this->mRevisionTimestamp = $oldRevisionTimestamp;
647 $this->mRevisionUser = $oldRevisionUser;
648 $this->mRevisionSize = $oldRevisionSize;
649 $this->mInputSize = false;
650 $this->currentRevisionCache = null;
651
652 return $this->mOutput;
653 }
654
655 /**
656 * Set the limit report data in the current ParserOutput, and return the
657 * limit report HTML comment.
658 *
659 * @return string
660 */
661 protected function makeLimitReport() {
662 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
663
664 $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
665 if ( $cpuTime !== null ) {
666 $this->mOutput->setLimitReportData( 'limitreport-cputime',
667 sprintf( "%.3f", $cpuTime )
668 );
669 }
670
671 $wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
672 $this->mOutput->setLimitReportData( 'limitreport-walltime',
673 sprintf( "%.3f", $wallTime )
674 );
675
676 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
677 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
678 );
679 $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
680 [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
681 );
682 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
683 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
684 );
685 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
686 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
687 );
688 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
689 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
690 );
691 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
692 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
693 );
694
695 foreach ( $this->mStripState->getLimitReport() as list( $key, $value ) ) {
696 $this->mOutput->setLimitReportData( $key, $value );
697 }
698
699 Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
700
701 $limitReport = "NewPP limit report\n";
702 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
703 $limitReport .= 'Parsed by ' . wfHostname() . "\n";
704 }
705 $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
706 $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
707 $limitReport .= 'Dynamic content: ' .
708 ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
709 "\n";
710 $limitReport .= 'Complications: [' . implode( ', ', $this->mOutput->getAllFlags() ) . "]\n";
711
712 foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
713 if ( Hooks::run( 'ParserLimitReportFormat',
714 [ $key, &$value, &$limitReport, false, false ]
715 ) ) {
716 $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
717 $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
718 ->inLanguage( 'en' )->useDatabase( false );
719 if ( !$valueMsg->exists() ) {
720 $valueMsg = new RawMessage( '$1' );
721 }
722 if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
723 $valueMsg->params( $value );
724 $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
725 }
726 }
727 }
728 // Since we're not really outputting HTML, decode the entities and
729 // then re-encode the things that need hiding inside HTML comments.
730 $limitReport = htmlspecialchars_decode( $limitReport );
731
732 // Sanitize for comment. Note '‐' in the replacement is U+2010,
733 // which looks much like the problematic '-'.
734 $limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
735 $text = "\n<!-- \n$limitReport-->\n";
736
737 // Add on template profiling data in human/machine readable way
738 $dataByFunc = $this->mProfiler->getFunctionStats();
739 uasort( $dataByFunc, function ( $a, $b ) {
740 return $b['real'] <=> $a['real']; // descending order
741 } );
742 $profileReport = [];
743 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
744 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
745 $item['%real'], $item['real'], $item['calls'],
746 htmlspecialchars( $item['name'] ) );
747 }
748 $text .= "<!--\nTransclusion expansion time report (%,ms,calls,template)\n";
749 $text .= implode( "\n", $profileReport ) . "\n-->\n";
750
751 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
752
753 // Add other cache related metadata
754 if ( $this->svcOptions->get( 'ShowHostnames' ) ) {
755 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
756 }
757 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
758 $this->mOutput->getCacheTime() );
759 $this->mOutput->setLimitReportData( 'cachereport-ttl',
760 $this->mOutput->getCacheExpiry() );
761 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
762 $this->mOutput->hasDynamicContent() );
763
764 if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
765 wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
766 $this->mTitle->getPrefixedDBkey() );
767 }
768 return $text;
769 }
770
771 /**
772 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
773 * can be called from an extension tag hook.
774 *
775 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
776 * instead, which means that lists and links have not been fully parsed yet,
777 * and strip markers are still present.
778 *
779 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
780 *
781 * Use this function if you're a parser tag hook and you want to parse
782 * wikitext before or after applying additional transformations, and you
783 * intend to *return the result as hook output*, which will cause it to go
784 * through the rest of parsing process automatically.
785 *
786 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
787 * $text are not expanded
788 *
789 * @param string $text Text extension wants to have parsed
790 * @param-taint $text escapes_htmlnoent
791 * @param bool|PPFrame $frame The frame to use for expanding any template variables
792 * @return string UNSAFE half-parsed HTML
793 * @return-taint escaped
794 */
795 public function recursiveTagParse( $text, $frame = false ) {
796 // Avoid PHP 7.1 warning from passing $this by reference
797 $parser = $this;
798 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
799 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
800 $text = $this->internalParse( $text, false, $frame );
801 return $text;
802 }
803
804 /**
805 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
806 * point can be called from an extension tag hook.
807 *
808 * The output of this function is fully-parsed HTML that is safe for output.
809 * If you're a parser tag hook, you might want to use recursiveTagParse()
810 * instead.
811 *
812 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
813 * $text are not expanded
814 *
815 * @since 1.25
816 *
817 * @param string $text Text extension wants to have parsed
818 * @param-taint $text escapes_htmlnoent
819 * @param bool|PPFrame $frame The frame to use for expanding any template variables
820 * @return string Fully parsed HTML
821 * @return-taint escaped
822 */
823 public function recursiveTagParseFully( $text, $frame = false ) {
824 $text = $this->recursiveTagParse( $text, $frame );
825 $text = $this->internalParseHalfParsed( $text, false );
826 return $text;
827 }
828
829 /**
830 * Expand templates and variables in the text, producing valid, static wikitext.
831 * Also removes comments.
832 * Do not call this function recursively.
833 * @param string $text
834 * @param Title|null $title
835 * @param ParserOptions $options
836 * @param int|null $revid
837 * @param bool|PPFrame $frame
838 * @return mixed|string
839 */
840 public function preprocess( $text, Title $title = null,
841 ParserOptions $options, $revid = null, $frame = false
842 ) {
843 $magicScopeVariable = $this->lock();
844 $this->startParse( $title, $options, self::OT_PREPROCESS, true );
845 if ( $revid !== null ) {
846 $this->mRevisionId = $revid;
847 }
848 // Avoid PHP 7.1 warning from passing $this by reference
849 $parser = $this;
850 Hooks::run( 'ParserBeforeStrip', [ &$parser, &$text, &$this->mStripState ] );
851 Hooks::run( 'ParserAfterStrip', [ &$parser, &$text, &$this->mStripState ] );
852 $text = $this->replaceVariables( $text, $frame );
853 $text = $this->mStripState->unstripBoth( $text );
854 return $text;
855 }
856
857 /**
858 * Recursive parser entry point that can be called from an extension tag
859 * hook.
860 *
861 * @param string $text Text to be expanded
862 * @param bool|PPFrame $frame The frame to use for expanding any template variables
863 * @return string
864 * @since 1.19
865 */
866 public function recursivePreprocess( $text, $frame = false ) {
867 $text = $this->replaceVariables( $text, $frame );
868 $text = $this->mStripState->unstripBoth( $text );
869 return $text;
870 }
871
872 /**
873 * Process the wikitext for the "?preload=" feature. (T7210)
874 *
875 * "<noinclude>", "<includeonly>" etc. are parsed as for template
876 * transclusion, comments, templates, arguments, tags hooks and parser
877 * functions are untouched.
878 *
879 * @param string $text
880 * @param Title $title
881 * @param ParserOptions $options
882 * @param array $params
883 * @return string
884 */
885 public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
886 $msg = new RawMessage( $text );
887 $text = $msg->params( $params )->plain();
888
889 # Parser (re)initialisation
890 $magicScopeVariable = $this->lock();
891 $this->startParse( $title, $options, self::OT_PLAIN, true );
892
893 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
894 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
895 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
896 $text = $this->mStripState->unstripBoth( $text );
897 return $text;
898 }
899
900 /**
901 * Set the current user.
902 * Should only be used when doing pre-save transform.
903 *
904 * @param User|null $user User object or null (to reset)
905 */
906 public function setUser( $user ) {
907 $this->mUser = $user;
908 }
909
910 /**
911 * Set the context title
912 *
913 * @param Title|null $t
914 */
915 public function setTitle( Title $t = null ) {
916 if ( !$t ) {
917 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
918 }
919
920 if ( $t->hasFragment() ) {
921 # Strip the fragment to avoid various odd effects
922 $this->mTitle = $t->createFragmentTarget( '' );
923 } else {
924 $this->mTitle = $t;
925 }
926 }
927
928 /**
929 * Accessor for the Title object
930 *
931 * Since 1.34, leaving `mTitle` uninitialized as `null` is deprecated.
932 *
933 * @return Title|null
934 */
935 public function getTitle() : ?Title {
936 if ( $this->mTitle === null ) {
937 wfDeprecated( 'Parser title should never be null', '1.34' );
938 }
939 return $this->mTitle;
940 }
941
942 /**
943 * Accessor/mutator for the Title object
944 *
945 * @param Title|null $x Title object or null to just get the current one
946 * @return Title|null
947 */
948 public function Title( Title $x = null ) : ?Title {
949 return wfSetVar( $this->mTitle, $x );
950 }
951
952 /**
953 * Set the output type
954 *
955 * @param int $ot New value
956 */
957 public function setOutputType( $ot ) {
958 $this->mOutputType = $ot;
959 # Shortcut alias
960 $this->ot = [
961 'html' => $ot == self::OT_HTML,
962 'wiki' => $ot == self::OT_WIKI,
963 'pre' => $ot == self::OT_PREPROCESS,
964 'plain' => $ot == self::OT_PLAIN,
965 ];
966 }
967
968 /**
969 * Accessor/mutator for the output type
970 *
971 * @param int|null $x New value or null to just get the current one
972 * @return int
973 */
974 public function OutputType( $x = null ) {
975 return wfSetVar( $this->mOutputType, $x );
976 }
977
978 /**
979 * Get the ParserOutput object
980 *
981 * @return ParserOutput
982 */
983 public function getOutput() {
984 return $this->mOutput;
985 }
986
987 /**
988 * Get the ParserOptions object
989 *
990 * @return ParserOptions
991 */
992 public function getOptions() {
993 return $this->mOptions;
994 }
995
996 /**
997 * Accessor/mutator for the ParserOptions object
998 *
999 * @param ParserOptions|null $x New value or null to just get the current one
1000 * @return ParserOptions Current ParserOptions object
1001 */
1002 public function Options( $x = null ) {
1003 return wfSetVar( $this->mOptions, $x );
1004 }
1005
1006 /**
1007 * @return int
1008 */
1009 public function nextLinkID() {
1010 return $this->mLinkID++;
1011 }
1012
1013 /**
1014 * @param int $id
1015 */
1016 public function setLinkID( $id ) {
1017 $this->mLinkID = $id;
1018 }
1019
1020 /**
1021 * Get a language object for use in parser functions such as {{FORMATNUM:}}
1022 * @return Language
1023 */
1024 public function getFunctionLang() {
1025 return $this->getTargetLanguage();
1026 }
1027
1028 /**
1029 * Get the target language for the content being parsed. This is usually the
1030 * language that the content is in.
1031 *
1032 * @since 1.19
1033 *
1034 * @throws MWException
1035 * @return Language
1036 */
1037 public function getTargetLanguage() {
1038 $target = $this->mOptions->getTargetLanguage();
1039
1040 if ( $target !== null ) {
1041 return $target;
1042 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1043 return $this->mOptions->getUserLangObj();
1044 } elseif ( is_null( $this->mTitle ) ) {
1045 throw new MWException( __METHOD__ . ': $this->mTitle is null' );
1046 }
1047
1048 return $this->mTitle->getPageLanguage();
1049 }
1050
1051 /**
1052 * Get the language object for language conversion
1053 * @deprecated since 1.32, just use getTargetLanguage()
1054 * @return Language|null
1055 */
1056 public function getConverterLanguage() {
1057 return $this->getTargetLanguage();
1058 }
1059
1060 /**
1061 * Get a User object either from $this->mUser, if set, or from the
1062 * ParserOptions object otherwise
1063 *
1064 * @return User
1065 */
1066 public function getUser() {
1067 if ( !is_null( $this->mUser ) ) {
1068 return $this->mUser;
1069 }
1070 return $this->mOptions->getUser();
1071 }
1072
1073 /**
1074 * Get a preprocessor object
1075 *
1076 * @return Preprocessor
1077 */
1078 public function getPreprocessor() {
1079 if ( !isset( $this->mPreprocessor ) ) {
1080 $class = $this->svcOptions->get( 'preprocessorClass' );
1081 $this->mPreprocessor = new $class( $this );
1082 }
1083 return $this->mPreprocessor;
1084 }
1085
1086 /**
1087 * Get a LinkRenderer instance to make links with
1088 *
1089 * @since 1.28
1090 * @return LinkRenderer
1091 */
1092 public function getLinkRenderer() {
1093 // XXX We make the LinkRenderer with current options and then cache it forever
1094 if ( !$this->mLinkRenderer ) {
1095 $this->mLinkRenderer = $this->linkRendererFactory->create();
1096 $this->mLinkRenderer->setStubThreshold(
1097 $this->getOptions()->getStubThreshold()
1098 );
1099 }
1100
1101 return $this->mLinkRenderer;
1102 }
1103
1104 /**
1105 * Get the MagicWordFactory that this Parser is using
1106 *
1107 * @since 1.32
1108 * @return MagicWordFactory
1109 */
1110 public function getMagicWordFactory() {
1111 return $this->magicWordFactory;
1112 }
1113
1114 /**
1115 * Get the content language that this Parser is using
1116 *
1117 * @since 1.32
1118 * @return Language
1119 */
1120 public function getContentLanguage() {
1121 return $this->contLang;
1122 }
1123
1124 /**
1125 * Replaces all occurrences of HTML-style comments and the given tags
1126 * in the text with a random marker and returns the next text. The output
1127 * parameter $matches will be an associative array filled with data in
1128 * the form:
1129 *
1130 * @code
1131 * 'UNIQ-xxxxx' => [
1132 * 'element',
1133 * 'tag content',
1134 * [ 'param' => 'x' ],
1135 * '<element param="x">tag content</element>' ]
1136 * @endcode
1137 *
1138 * @param array $elements List of element names. Comments are always extracted.
1139 * @param string $text Source text string.
1140 * @param array &$matches Out parameter, Array: extracted tags
1141 * @return string Stripped text
1142 */
1143 public static function extractTagsAndParams( $elements, $text, &$matches ) {
1144 static $n = 1;
1145 $stripped = '';
1146 $matches = [];
1147
1148 $taglist = implode( '|', $elements );
1149 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1150
1151 while ( $text != '' ) {
1152 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1153 $stripped .= $p[0];
1154 if ( count( $p ) < 5 ) {
1155 break;
1156 }
1157 if ( count( $p ) > 5 ) {
1158 # comment
1159 $element = $p[4];
1160 $attributes = '';
1161 $close = '';
1162 $inside = $p[5];
1163 } else {
1164 # tag
1165 list( , $element, $attributes, $close, $inside ) = $p;
1166 }
1167
1168 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1169 $stripped .= $marker;
1170
1171 if ( $close === '/>' ) {
1172 # Empty element tag, <tag />
1173 $content = null;
1174 $text = $inside;
1175 $tail = null;
1176 } else {
1177 if ( $element === '!--' ) {
1178 $end = '/(-->)/';
1179 } else {
1180 $end = "/(<\\/$element\\s*>)/i";
1181 }
1182 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1183 $content = $q[0];
1184 if ( count( $q ) < 3 ) {
1185 # No end tag -- let it run out to the end of the text.
1186 $tail = '';
1187 $text = '';
1188 } else {
1189 list( , $tail, $text ) = $q;
1190 }
1191 }
1192
1193 $matches[$marker] = [ $element,
1194 $content,
1195 Sanitizer::decodeTagAttributes( $attributes ),
1196 "<$element$attributes$close$content$tail" ];
1197 }
1198 return $stripped;
1199 }
1200
1201 /**
1202 * Get a list of strippable XML-like elements
1203 *
1204 * @return array
1205 */
1206 public function getStripList() {
1207 return $this->mStripList;
1208 }
1209
1210 /**
1211 * Get the StripState
1212 *
1213 * @return StripState
1214 */
1215 public function getStripState() {
1216 return $this->mStripState;
1217 }
1218
1219 /**
1220 * Add an item to the strip state
1221 * Returns the unique tag which must be inserted into the stripped text
1222 * The tag will be replaced with the original text in unstrip()
1223 *
1224 * @param string $text
1225 *
1226 * @return string
1227 */
1228 public function insertStripItem( $text ) {
1229 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1230 $this->mMarkerIndex++;
1231 $this->mStripState->addGeneral( $marker, $text );
1232 return $marker;
1233 }
1234
1235 /**
1236 * parse the wiki syntax used to render tables
1237 *
1238 * @private
1239 * @param string $text
1240 * @return string
1241 */
1242 public function doTableStuff( $text ) {
1243 $lines = StringUtils::explode( "\n", $text );
1244 $out = '';
1245 $td_history = []; # Is currently a td tag open?
1246 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1247 $tr_history = []; # Is currently a tr tag open?
1248 $tr_attributes = []; # history of tr attributes
1249 $has_opened_tr = []; # Did this table open a <tr> element?
1250 $indent_level = 0; # indent level of the table
1251
1252 foreach ( $lines as $outLine ) {
1253 $line = trim( $outLine );
1254
1255 if ( $line === '' ) { # empty line, go to next line
1256 $out .= $outLine . "\n";
1257 continue;
1258 }
1259
1260 $first_character = $line[0];
1261 $first_two = substr( $line, 0, 2 );
1262 $matches = [];
1263
1264 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1265 # First check if we are starting a new table
1266 $indent_level = strlen( $matches[1] );
1267
1268 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1269 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1270
1271 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1272 array_push( $td_history, false );
1273 array_push( $last_tag_history, '' );
1274 array_push( $tr_history, false );
1275 array_push( $tr_attributes, '' );
1276 array_push( $has_opened_tr, false );
1277 } elseif ( count( $td_history ) == 0 ) {
1278 # Don't do any of the following
1279 $out .= $outLine . "\n";
1280 continue;
1281 } elseif ( $first_two === '|}' ) {
1282 # We are ending a table
1283 $line = '</table>' . substr( $line, 2 );
1284 $last_tag = array_pop( $last_tag_history );
1285
1286 if ( !array_pop( $has_opened_tr ) ) {
1287 $line = "<tr><td></td></tr>{$line}";
1288 }
1289
1290 if ( array_pop( $tr_history ) ) {
1291 $line = "</tr>{$line}";
1292 }
1293
1294 if ( array_pop( $td_history ) ) {
1295 $line = "</{$last_tag}>{$line}";
1296 }
1297 array_pop( $tr_attributes );
1298 if ( $indent_level > 0 ) {
1299 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1300 } else {
1301 $outLine = $line;
1302 }
1303 } elseif ( $first_two === '|-' ) {
1304 # Now we have a table row
1305 $line = preg_replace( '#^\|-+#', '', $line );
1306
1307 # Whats after the tag is now only attributes
1308 $attributes = $this->mStripState->unstripBoth( $line );
1309 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1310 array_pop( $tr_attributes );
1311 array_push( $tr_attributes, $attributes );
1312
1313 $line = '';
1314 $last_tag = array_pop( $last_tag_history );
1315 array_pop( $has_opened_tr );
1316 array_push( $has_opened_tr, true );
1317
1318 if ( array_pop( $tr_history ) ) {
1319 $line = '</tr>';
1320 }
1321
1322 if ( array_pop( $td_history ) ) {
1323 $line = "</{$last_tag}>{$line}";
1324 }
1325
1326 $outLine = $line;
1327 array_push( $tr_history, false );
1328 array_push( $td_history, false );
1329 array_push( $last_tag_history, '' );
1330 } elseif ( $first_character === '|'
1331 || $first_character === '!'
1332 || $first_two === '|+'
1333 ) {
1334 # This might be cell elements, td, th or captions
1335 if ( $first_two === '|+' ) {
1336 $first_character = '+';
1337 $line = substr( $line, 2 );
1338 } else {
1339 $line = substr( $line, 1 );
1340 }
1341
1342 // Implies both are valid for table headings.
1343 if ( $first_character === '!' ) {
1344 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1345 }
1346
1347 # Split up multiple cells on the same line.
1348 # FIXME : This can result in improper nesting of tags processed
1349 # by earlier parser steps.
1350 $cells = explode( '||', $line );
1351
1352 $outLine = '';
1353
1354 # Loop through each table cell
1355 foreach ( $cells as $cell ) {
1356 $previous = '';
1357 if ( $first_character !== '+' ) {
1358 $tr_after = array_pop( $tr_attributes );
1359 if ( !array_pop( $tr_history ) ) {
1360 $previous = "<tr{$tr_after}>\n";
1361 }
1362 array_push( $tr_history, true );
1363 array_push( $tr_attributes, '' );
1364 array_pop( $has_opened_tr );
1365 array_push( $has_opened_tr, true );
1366 }
1367
1368 $last_tag = array_pop( $last_tag_history );
1369
1370 if ( array_pop( $td_history ) ) {
1371 $previous = "</{$last_tag}>\n{$previous}";
1372 }
1373
1374 if ( $first_character === '|' ) {
1375 $last_tag = 'td';
1376 } elseif ( $first_character === '!' ) {
1377 $last_tag = 'th';
1378 } elseif ( $first_character === '+' ) {
1379 $last_tag = 'caption';
1380 } else {
1381 $last_tag = '';
1382 }
1383
1384 array_push( $last_tag_history, $last_tag );
1385
1386 # A cell could contain both parameters and data
1387 $cell_data = explode( '|', $cell, 2 );
1388
1389 # T2553: Note that a '|' inside an invalid link should not
1390 # be mistaken as delimiting cell parameters
1391 # Bug T153140: Neither should language converter markup.
1392 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1393 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1394 } elseif ( count( $cell_data ) == 1 ) {
1395 // Whitespace in cells is trimmed
1396 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1397 } else {
1398 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1399 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1400 // Whitespace in cells is trimmed
1401 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1402 }
1403
1404 $outLine .= $cell;
1405 array_push( $td_history, true );
1406 }
1407 }
1408 $out .= $outLine . "\n";
1409 }
1410
1411 # Closing open td, tr && table
1412 while ( count( $td_history ) > 0 ) {
1413 if ( array_pop( $td_history ) ) {
1414 $out .= "</td>\n";
1415 }
1416 if ( array_pop( $tr_history ) ) {
1417 $out .= "</tr>\n";
1418 }
1419 if ( !array_pop( $has_opened_tr ) ) {
1420 $out .= "<tr><td></td></tr>\n";
1421 }
1422
1423 $out .= "</table>\n";
1424 }
1425
1426 # Remove trailing line-ending (b/c)
1427 if ( substr( $out, -1 ) === "\n" ) {
1428 $out = substr( $out, 0, -1 );
1429 }
1430
1431 # special case: don't return empty table
1432 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1433 $out = '';
1434 }
1435
1436 return $out;
1437 }
1438
1439 /**
1440 * Helper function for parse() that transforms wiki markup into half-parsed
1441 * HTML. Only called for $mOutputType == self::OT_HTML.
1442 *
1443 * @private
1444 *
1445 * @param string $text The text to parse
1446 * @param-taint $text escapes_html
1447 * @param bool $isMain Whether this is being called from the main parse() function
1448 * @param PPFrame|bool $frame A pre-processor frame
1449 *
1450 * @return string
1451 */
1452 public function internalParse( $text, $isMain = true, $frame = false ) {
1453 $origText = $text;
1454
1455 // Avoid PHP 7.1 warning from passing $this by reference
1456 $parser = $this;
1457
1458 # Hook to suspend the parser in this state
1459 if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$parser, &$text, &$this->mStripState ] ) ) {
1460 return $text;
1461 }
1462
1463 # if $frame is provided, then use $frame for replacing any variables
1464 if ( $frame ) {
1465 # use frame depth to infer how include/noinclude tags should be handled
1466 # depth=0 means this is the top-level document; otherwise it's an included document
1467 if ( !$frame->depth ) {
1468 $flag = 0;
1469 } else {
1470 $flag = self::PTD_FOR_INCLUSION;
1471 }
1472 $dom = $this->preprocessToDom( $text, $flag );
1473 $text = $frame->expand( $dom );
1474 } else {
1475 # if $frame is not provided, then use old-style replaceVariables
1476 $text = $this->replaceVariables( $text );
1477 }
1478
1479 Hooks::run( 'InternalParseBeforeSanitize', [ &$parser, &$text, &$this->mStripState ] );
1480 $text = Sanitizer::removeHTMLtags(
1481 $text,
1482 [ $this, 'attributeStripCallback' ],
1483 false,
1484 array_keys( $this->mTransparentTagHooks ),
1485 [],
1486 [ $this, 'addTrackingCategory' ]
1487 );
1488 Hooks::run( 'InternalParseBeforeLinks', [ &$parser, &$text, &$this->mStripState ] );
1489
1490 # Tables need to come after variable replacement for things to work
1491 # properly; putting them before other transformations should keep
1492 # exciting things like link expansions from showing up in surprising
1493 # places.
1494 $text = $this->doTableStuff( $text );
1495
1496 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1497
1498 $text = $this->doDoubleUnderscore( $text );
1499
1500 $text = $this->doHeadings( $text );
1501 $text = $this->replaceInternalLinks( $text );
1502 $text = $this->doAllQuotes( $text );
1503 $text = $this->replaceExternalLinks( $text );
1504
1505 # replaceInternalLinks may sometimes leave behind
1506 # absolute URLs, which have to be masked to hide them from replaceExternalLinks
1507 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1508
1509 $text = $this->doMagicLinks( $text );
1510 $text = $this->formatHeadings( $text, $origText, $isMain );
1511
1512 return $text;
1513 }
1514
1515 /**
1516 * Helper function for parse() that transforms half-parsed HTML into fully
1517 * parsed HTML.
1518 *
1519 * @param string $text
1520 * @param bool $isMain
1521 * @param bool $linestart
1522 * @return string
1523 */
1524 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1525 $text = $this->mStripState->unstripGeneral( $text );
1526
1527 // Avoid PHP 7.1 warning from passing $this by reference
1528 $parser = $this;
1529
1530 if ( $isMain ) {
1531 Hooks::run( 'ParserAfterUnstrip', [ &$parser, &$text ] );
1532 }
1533
1534 # Clean up special characters, only run once, next-to-last before doBlockLevels
1535 $text = Sanitizer::armorFrenchSpaces( $text );
1536
1537 $text = $this->doBlockLevels( $text, $linestart );
1538
1539 $this->replaceLinkHolders( $text );
1540
1541 /**
1542 * The input doesn't get language converted if
1543 * a) It's disabled
1544 * b) Content isn't converted
1545 * c) It's a conversion table
1546 * d) it is an interface message (which is in the user language)
1547 */
1548 if ( !( $this->mOptions->getDisableContentConversion()
1549 || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1550 && !$this->mOptions->getInterfaceMessage()
1551 ) {
1552 # The position of the convert() call should not be changed. it
1553 # assumes that the links are all replaced and the only thing left
1554 # is the <nowiki> mark.
1555 $text = $this->getTargetLanguage()->convert( $text );
1556 }
1557
1558 $text = $this->mStripState->unstripNoWiki( $text );
1559
1560 if ( $isMain ) {
1561 Hooks::run( 'ParserBeforeTidy', [ &$parser, &$text ] );
1562 }
1563
1564 $text = $this->replaceTransparentTags( $text );
1565 $text = $this->mStripState->unstripGeneral( $text );
1566
1567 $text = Sanitizer::normalizeCharReferences( $text );
1568
1569 if ( MWTidy::isEnabled() ) {
1570 if ( $this->mOptions->getTidy() ) {
1571 $text = MWTidy::tidy( $text );
1572 }
1573 } else {
1574 # attempt to sanitize at least some nesting problems
1575 # (T4702 and quite a few others)
1576 # This code path is buggy and deprecated!
1577 wfDeprecated( 'disabling tidy', '1.33' );
1578 $tidyregs = [
1579 # ''Something [http://www.cool.com cool''] -->
1580 # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1581 '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1582 '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1583 # fix up an anchor inside another anchor, only
1584 # at least for a single single nested link (T5695)
1585 '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1586 '\\1\\2</a>\\3</a>\\1\\4</a>',
1587 # fix div inside inline elements- doBlockLevels won't wrap a line which
1588 # contains a div, so fix it up here; replace
1589 # div with escaped text
1590 '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1591 '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1592 # remove empty italic or bold tag pairs, some
1593 # introduced by rules above
1594 '/<([bi])><\/\\1>/' => '',
1595 ];
1596
1597 $text = preg_replace(
1598 array_keys( $tidyregs ),
1599 array_values( $tidyregs ),
1600 $text );
1601 }
1602
1603 if ( $isMain ) {
1604 Hooks::run( 'ParserAfterTidy', [ &$parser, &$text ] );
1605 }
1606
1607 return $text;
1608 }
1609
1610 /**
1611 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1612 * magic external links.
1613 *
1614 * DML
1615 * @private
1616 *
1617 * @param string $text
1618 *
1619 * @return string
1620 */
1621 public function doMagicLinks( $text ) {
1622 $prots = wfUrlProtocolsWithoutProtRel();
1623 $urlChar = self::EXT_LINK_URL_CLASS;
1624 $addr = self::EXT_LINK_ADDR;
1625 $space = self::SPACE_NOT_NL; # non-newline space
1626 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1627 $spaces = "$space++"; # possessive match of 1 or more spaces
1628 $text = preg_replace_callback(
1629 '!(?: # Start cases
1630 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1631 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1632 (\b # m[3]: Free external links
1633 (?i:$prots)
1634 ($addr$urlChar*) # m[4]: Post-protocol path
1635 ) |
1636 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1637 ([0-9]+)\b |
1638 \bISBN $spaces ( # m[6]: ISBN, capture number
1639 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1640 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1641 [0-9Xx] # check digit
1642 )\b
1643 )!xu", [ $this, 'magicLinkCallback' ], $text );
1644 return $text;
1645 }
1646
1647 /**
1648 * @throws MWException
1649 * @param array $m
1650 * @return string HTML
1651 */
1652 public function magicLinkCallback( $m ) {
1653 if ( isset( $m[1] ) && $m[1] !== '' ) {
1654 # Skip anchor
1655 return $m[0];
1656 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1657 # Skip HTML element
1658 return $m[0];
1659 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1660 # Free external link
1661 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1662 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1663 # RFC or PMID
1664 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1665 if ( !$this->mOptions->getMagicRFCLinks() ) {
1666 return $m[0];
1667 }
1668 $keyword = 'RFC';
1669 $urlmsg = 'rfcurl';
1670 $cssClass = 'mw-magiclink-rfc';
1671 $trackingCat = 'magiclink-tracking-rfc';
1672 $id = $m[5];
1673 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1674 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1675 return $m[0];
1676 }
1677 $keyword = 'PMID';
1678 $urlmsg = 'pubmedurl';
1679 $cssClass = 'mw-magiclink-pmid';
1680 $trackingCat = 'magiclink-tracking-pmid';
1681 $id = $m[5];
1682 } else {
1683 throw new MWException( __METHOD__ . ': unrecognised match type "' .
1684 substr( $m[0], 0, 20 ) . '"' );
1685 }
1686 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1687 $this->addTrackingCategory( $trackingCat );
1688 return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1689 } elseif ( isset( $m[6] ) && $m[6] !== ''
1690 && $this->mOptions->getMagicISBNLinks()
1691 ) {
1692 # ISBN
1693 $isbn = $m[6];
1694 $space = self::SPACE_NOT_NL; # non-newline space
1695 $isbn = preg_replace( "/$space/", ' ', $isbn );
1696 $num = strtr( $isbn, [
1697 '-' => '',
1698 ' ' => '',
1699 'x' => 'X',
1700 ] );
1701 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1702 return $this->getLinkRenderer()->makeKnownLink(
1703 SpecialPage::getTitleFor( 'Booksources', $num ),
1704 "ISBN $isbn",
1705 [
1706 'class' => 'internal mw-magiclink-isbn',
1707 'title' => false // suppress title attribute
1708 ]
1709 );
1710 } else {
1711 return $m[0];
1712 }
1713 }
1714
1715 /**
1716 * Make a free external link, given a user-supplied URL
1717 *
1718 * @param string $url
1719 * @param int $numPostProto
1720 * The number of characters after the protocol.
1721 * @return string HTML
1722 * @private
1723 */
1724 public function makeFreeExternalLink( $url, $numPostProto ) {
1725 $trail = '';
1726
1727 # The characters '<' and '>' (which were escaped by
1728 # removeHTMLtags()) should not be included in
1729 # URLs, per RFC 2396.
1730 # Make &nbsp; terminate a URL as well (bug T84937)
1731 $m2 = [];
1732 if ( preg_match(
1733 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1734 $url,
1735 $m2,
1736 PREG_OFFSET_CAPTURE
1737 ) ) {
1738 $trail = substr( $url, $m2[0][1] ) . $trail;
1739 $url = substr( $url, 0, $m2[0][1] );
1740 }
1741
1742 # Move trailing punctuation to $trail
1743 $sep = ',;\.:!?';
1744 # If there is no left bracket, then consider right brackets fair game too
1745 if ( strpos( $url, '(' ) === false ) {
1746 $sep .= ')';
1747 }
1748
1749 $urlRev = strrev( $url );
1750 $numSepChars = strspn( $urlRev, $sep );
1751 # Don't break a trailing HTML entity by moving the ; into $trail
1752 # This is in hot code, so use substr_compare to avoid having to
1753 # create a new string object for the comparison
1754 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1755 # more optimization: instead of running preg_match with a $
1756 # anchor, which can be slow, do the match on the reversed
1757 # string starting at the desired offset.
1758 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1759 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1760 $numSepChars--;
1761 }
1762 }
1763 if ( $numSepChars ) {
1764 $trail = substr( $url, -$numSepChars ) . $trail;
1765 $url = substr( $url, 0, -$numSepChars );
1766 }
1767
1768 # Verify that we still have a real URL after trail removal, and
1769 # not just lone protocol
1770 if ( strlen( $trail ) >= $numPostProto ) {
1771 return $url . $trail;
1772 }
1773
1774 $url = Sanitizer::cleanUrl( $url );
1775
1776 # Is this an external image?
1777 $text = $this->maybeMakeExternalImage( $url );
1778 if ( $text === false ) {
1779 # Not an image, make a link
1780 $text = Linker::makeExternalLink( $url,
1781 $this->getTargetLanguage()->getConverter()->markNoConversion( $url ),
1782 true, 'free',
1783 $this->getExternalLinkAttribs( $url ), $this->mTitle );
1784 # Register it in the output object...
1785 $this->mOutput->addExternalLink( $url );
1786 }
1787 return $text . $trail;
1788 }
1789
1790 /**
1791 * Parse headers and return html
1792 *
1793 * @private
1794 *
1795 * @param string $text
1796 *
1797 * @return string
1798 */
1799 public function doHeadings( $text ) {
1800 for ( $i = 6; $i >= 1; --$i ) {
1801 $h = str_repeat( '=', $i );
1802 // Trim non-newline whitespace from headings
1803 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1804 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1805 }
1806 return $text;
1807 }
1808
1809 /**
1810 * Replace single quotes with HTML markup
1811 * @private
1812 *
1813 * @param string $text
1814 *
1815 * @return string The altered text
1816 */
1817 public function doAllQuotes( $text ) {
1818 $outtext = '';
1819 $lines = StringUtils::explode( "\n", $text );
1820 foreach ( $lines as $line ) {
1821 $outtext .= $this->doQuotes( $line ) . "\n";
1822 }
1823 $outtext = substr( $outtext, 0, -1 );
1824 return $outtext;
1825 }
1826
1827 /**
1828 * Helper function for doAllQuotes()
1829 *
1830 * @param string $text
1831 *
1832 * @return string
1833 */
1834 public function doQuotes( $text ) {
1835 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1836 $countarr = count( $arr );
1837 if ( $countarr == 1 ) {
1838 return $text;
1839 }
1840
1841 // First, do some preliminary work. This may shift some apostrophes from
1842 // being mark-up to being text. It also counts the number of occurrences
1843 // of bold and italics mark-ups.
1844 $numbold = 0;
1845 $numitalics = 0;
1846 for ( $i = 1; $i < $countarr; $i += 2 ) {
1847 $thislen = strlen( $arr[$i] );
1848 // If there are ever four apostrophes, assume the first is supposed to
1849 // be text, and the remaining three constitute mark-up for bold text.
1850 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1851 if ( $thislen == 4 ) {
1852 $arr[$i - 1] .= "'";
1853 $arr[$i] = "'''";
1854 $thislen = 3;
1855 } elseif ( $thislen > 5 ) {
1856 // If there are more than 5 apostrophes in a row, assume they're all
1857 // text except for the last 5.
1858 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1859 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1860 $arr[$i] = "'''''";
1861 $thislen = 5;
1862 }
1863 // Count the number of occurrences of bold and italics mark-ups.
1864 if ( $thislen == 2 ) {
1865 $numitalics++;
1866 } elseif ( $thislen == 3 ) {
1867 $numbold++;
1868 } elseif ( $thislen == 5 ) {
1869 $numitalics++;
1870 $numbold++;
1871 }
1872 }
1873
1874 // If there is an odd number of both bold and italics, it is likely
1875 // that one of the bold ones was meant to be an apostrophe followed
1876 // by italics. Which one we cannot know for certain, but it is more
1877 // likely to be one that has a single-letter word before it.
1878 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1879 $firstsingleletterword = -1;
1880 $firstmultiletterword = -1;
1881 $firstspace = -1;
1882 for ( $i = 1; $i < $countarr; $i += 2 ) {
1883 if ( strlen( $arr[$i] ) == 3 ) {
1884 $x1 = substr( $arr[$i - 1], -1 );
1885 $x2 = substr( $arr[$i - 1], -2, 1 );
1886 if ( $x1 === ' ' ) {
1887 if ( $firstspace == -1 ) {
1888 $firstspace = $i;
1889 }
1890 } elseif ( $x2 === ' ' ) {
1891 $firstsingleletterword = $i;
1892 // if $firstsingleletterword is set, we don't
1893 // look at the other options, so we can bail early.
1894 break;
1895 } elseif ( $firstmultiletterword == -1 ) {
1896 $firstmultiletterword = $i;
1897 }
1898 }
1899 }
1900
1901 // If there is a single-letter word, use it!
1902 if ( $firstsingleletterword > -1 ) {
1903 $arr[$firstsingleletterword] = "''";
1904 $arr[$firstsingleletterword - 1] .= "'";
1905 } elseif ( $firstmultiletterword > -1 ) {
1906 // If not, but there's a multi-letter word, use that one.
1907 $arr[$firstmultiletterword] = "''";
1908 $arr[$firstmultiletterword - 1] .= "'";
1909 } elseif ( $firstspace > -1 ) {
1910 // ... otherwise use the first one that has neither.
1911 // (notice that it is possible for all three to be -1 if, for example,
1912 // there is only one pentuple-apostrophe in the line)
1913 $arr[$firstspace] = "''";
1914 $arr[$firstspace - 1] .= "'";
1915 }
1916 }
1917
1918 // Now let's actually convert our apostrophic mush to HTML!
1919 $output = '';
1920 $buffer = '';
1921 $state = '';
1922 $i = 0;
1923 foreach ( $arr as $r ) {
1924 if ( ( $i % 2 ) == 0 ) {
1925 if ( $state === 'both' ) {
1926 $buffer .= $r;
1927 } else {
1928 $output .= $r;
1929 }
1930 } else {
1931 $thislen = strlen( $r );
1932 if ( $thislen == 2 ) {
1933 if ( $state === 'i' ) {
1934 $output .= '</i>';
1935 $state = '';
1936 } elseif ( $state === 'bi' ) {
1937 $output .= '</i>';
1938 $state = 'b';
1939 } elseif ( $state === 'ib' ) {
1940 $output .= '</b></i><b>';
1941 $state = 'b';
1942 } elseif ( $state === 'both' ) {
1943 $output .= '<b><i>' . $buffer . '</i>';
1944 $state = 'b';
1945 } else { // $state can be 'b' or ''
1946 $output .= '<i>';
1947 $state .= 'i';
1948 }
1949 } elseif ( $thislen == 3 ) {
1950 if ( $state === 'b' ) {
1951 $output .= '</b>';
1952 $state = '';
1953 } elseif ( $state === 'bi' ) {
1954 $output .= '</i></b><i>';
1955 $state = 'i';
1956 } elseif ( $state === 'ib' ) {
1957 $output .= '</b>';
1958 $state = 'i';
1959 } elseif ( $state === 'both' ) {
1960 $output .= '<i><b>' . $buffer . '</b>';
1961 $state = 'i';
1962 } else { // $state can be 'i' or ''
1963 $output .= '<b>';
1964 $state .= 'b';
1965 }
1966 } elseif ( $thislen == 5 ) {
1967 if ( $state === 'b' ) {
1968 $output .= '</b><i>';
1969 $state = 'i';
1970 } elseif ( $state === 'i' ) {
1971 $output .= '</i><b>';
1972 $state = 'b';
1973 } elseif ( $state === 'bi' ) {
1974 $output .= '</i></b>';
1975 $state = '';
1976 } elseif ( $state === 'ib' ) {
1977 $output .= '</b></i>';
1978 $state = '';
1979 } elseif ( $state === 'both' ) {
1980 $output .= '<i><b>' . $buffer . '</b></i>';
1981 $state = '';
1982 } else { // ($state == '')
1983 $buffer = '';
1984 $state = 'both';
1985 }
1986 }
1987 }
1988 $i++;
1989 }
1990 // Now close all remaining tags. Notice that the order is important.
1991 if ( $state === 'b' || $state === 'ib' ) {
1992 $output .= '</b>';
1993 }
1994 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1995 $output .= '</i>';
1996 }
1997 if ( $state === 'bi' ) {
1998 $output .= '</b>';
1999 }
2000 // There might be lonely ''''', so make sure we have a buffer
2001 if ( $state === 'both' && $buffer ) {
2002 $output .= '<b><i>' . $buffer . '</i></b>';
2003 }
2004 return $output;
2005 }
2006
2007 /**
2008 * Replace external links (REL)
2009 *
2010 * Note: this is all very hackish and the order of execution matters a lot.
2011 * Make sure to run tests/parser/parserTests.php if you change this code.
2012 *
2013 * @private
2014 *
2015 * @param string $text
2016 *
2017 * @throws MWException
2018 * @return string
2019 */
2020 public function replaceExternalLinks( $text ) {
2021 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2022 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2023 if ( $bits === false ) {
2024 throw new MWException( "PCRE needs to be compiled with "
2025 . "--enable-unicode-properties in order for MediaWiki to function" );
2026 }
2027 $s = array_shift( $bits );
2028
2029 $i = 0;
2030 while ( $i < count( $bits ) ) {
2031 $url = $bits[$i++];
2032 $i++; // protocol
2033 $text = $bits[$i++];
2034 $trail = $bits[$i++];
2035
2036 # The characters '<' and '>' (which were escaped by
2037 # removeHTMLtags()) should not be included in
2038 # URLs, per RFC 2396.
2039 $m2 = [];
2040 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2041 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2042 $url = substr( $url, 0, $m2[0][1] );
2043 }
2044
2045 # If the link text is an image URL, replace it with an <img> tag
2046 # This happened by accident in the original parser, but some people used it extensively
2047 $img = $this->maybeMakeExternalImage( $text );
2048 if ( $img !== false ) {
2049 $text = $img;
2050 }
2051
2052 $dtrail = '';
2053
2054 # Set linktype for CSS
2055 $linktype = 'text';
2056
2057 # No link text, e.g. [http://domain.tld/some.link]
2058 if ( $text == '' ) {
2059 # Autonumber
2060 $langObj = $this->getTargetLanguage();
2061 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2062 $linktype = 'autonumber';
2063 } else {
2064 # Have link text, e.g. [http://domain.tld/some.link text]s
2065 # Check for trail
2066 list( $dtrail, $trail ) = Linker::splitTrail( $trail );
2067 }
2068
2069 // Excluding protocol-relative URLs may avoid many false positives.
2070 if ( preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
2071 $text = $this->getTargetLanguage()->getConverter()->markNoConversion( $text );
2072 }
2073
2074 $url = Sanitizer::cleanUrl( $url );
2075
2076 # Use the encoded URL
2077 # This means that users can paste URLs directly into the text
2078 # Funny characters like ö aren't valid in URLs anyway
2079 # This was changed in August 2004
2080 $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2081 $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
2082
2083 # Register link in the output object.
2084 $this->mOutput->addExternalLink( $url );
2085 }
2086
2087 return $s;
2088 }
2089
2090 /**
2091 * Get the rel attribute for a particular external link.
2092 *
2093 * @since 1.21
2094 * @param string|bool $url Optional URL, to extract the domain from for rel =>
2095 * nofollow if appropriate
2096 * @param LinkTarget|null $title Optional LinkTarget, for wgNoFollowNsExceptions lookups
2097 * @return string|null Rel attribute for $url
2098 */
2099 public static function getExternalLinkRel( $url = false, $title = null ) {
2100 global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
2101 $ns = $title ? $title->getNamespace() : false;
2102 if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
2103 && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
2104 ) {
2105 return 'nofollow';
2106 }
2107 return null;
2108 }
2109
2110 /**
2111 * Get an associative array of additional HTML attributes appropriate for a
2112 * particular external link. This currently may include rel => nofollow
2113 * (depending on configuration, namespace, and the URL's domain) and/or a
2114 * target attribute (depending on configuration).
2115 *
2116 * @param string $url URL to extract the domain from for rel =>
2117 * nofollow if appropriate
2118 * @return array Associative array of HTML attributes
2119 */
2120 public function getExternalLinkAttribs( $url ) {
2121 $attribs = [];
2122 $rel = self::getExternalLinkRel( $url, $this->mTitle );
2123
2124 $target = $this->mOptions->getExternalLinkTarget();
2125 if ( $target ) {
2126 $attribs['target'] = $target;
2127 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2128 // T133507. New windows can navigate parent cross-origin.
2129 // Including noreferrer due to lacking browser
2130 // support of noopener. Eventually noreferrer should be removed.
2131 if ( $rel !== '' ) {
2132 $rel .= ' ';
2133 }
2134 $rel .= 'noreferrer noopener';
2135 }
2136 }
2137 $attribs['rel'] = $rel;
2138 return $attribs;
2139 }
2140
2141 /**
2142 * Replace unusual escape codes in a URL with their equivalent characters
2143 *
2144 * This generally follows the syntax defined in RFC 3986, with special
2145 * consideration for HTTP query strings.
2146 *
2147 * @param string $url
2148 * @return string
2149 */
2150 public static function normalizeLinkUrl( $url ) {
2151 # Test for RFC 3986 IPv6 syntax
2152 $scheme = '[a-z][a-z0-9+.-]*:';
2153 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2154 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2155 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2156 IP::isValid( rawurldecode( $m[1] ) )
2157 ) {
2158 $isIPv6 = rawurldecode( $m[1] );
2159 } else {
2160 $isIPv6 = false;
2161 }
2162
2163 # Make sure unsafe characters are encoded
2164 $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
2165 function ( $m ) {
2166 return rawurlencode( $m[0] );
2167 },
2168 $url
2169 );
2170
2171 $ret = '';
2172 $end = strlen( $url );
2173
2174 # Fragment part - 'fragment'
2175 $start = strpos( $url, '#' );
2176 if ( $start !== false && $start < $end ) {
2177 $ret = self::normalizeUrlComponent(
2178 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2179 $end = $start;
2180 }
2181
2182 # Query part - 'query' minus &=+;
2183 $start = strpos( $url, '?' );
2184 if ( $start !== false && $start < $end ) {
2185 $ret = self::normalizeUrlComponent(
2186 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2187 $end = $start;
2188 }
2189
2190 # Scheme and path part - 'pchar'
2191 # (we assume no userinfo or encoded colons in the host)
2192 $ret = self::normalizeUrlComponent(
2193 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2194
2195 # Fix IPv6 syntax
2196 if ( $isIPv6 !== false ) {
2197 $ipv6Host = "%5B({$isIPv6})%5D";
2198 $ret = preg_replace(
2199 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2200 "$1[$2]",
2201 $ret
2202 );
2203 }
2204
2205 return $ret;
2206 }
2207
2208 private static function normalizeUrlComponent( $component, $unsafe ) {
2209 $callback = function ( $matches ) use ( $unsafe ) {
2210 $char = urldecode( $matches[0] );
2211 $ord = ord( $char );
2212 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2213 # Unescape it
2214 return $char;
2215 } else {
2216 # Leave it escaped, but use uppercase for a-f
2217 return strtoupper( $matches[0] );
2218 }
2219 };
2220 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2221 }
2222
2223 /**
2224 * make an image if it's allowed, either through the global
2225 * option, through the exception, or through the on-wiki whitelist
2226 *
2227 * @param string $url
2228 *
2229 * @return string
2230 */
2231 private function maybeMakeExternalImage( $url ) {
2232 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2233 $imagesexception = !empty( $imagesfrom );
2234 $text = false;
2235 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2236 if ( $imagesexception && is_array( $imagesfrom ) ) {
2237 $imagematch = false;
2238 foreach ( $imagesfrom as $match ) {
2239 if ( strpos( $url, $match ) === 0 ) {
2240 $imagematch = true;
2241 break;
2242 }
2243 }
2244 } elseif ( $imagesexception ) {
2245 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2246 } else {
2247 $imagematch = false;
2248 }
2249
2250 if ( $this->mOptions->getAllowExternalImages()
2251 || ( $imagesexception && $imagematch )
2252 ) {
2253 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2254 # Image found
2255 $text = Linker::makeExternalImage( $url );
2256 }
2257 }
2258 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2259 && preg_match( self::EXT_IMAGE_REGEX, $url )
2260 ) {
2261 $whitelist = explode(
2262 "\n",
2263 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2264 );
2265
2266 foreach ( $whitelist as $entry ) {
2267 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2268 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2269 continue;
2270 }
2271 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2272 # Image matches a whitelist entry
2273 $text = Linker::makeExternalImage( $url );
2274 break;
2275 }
2276 }
2277 }
2278 return $text;
2279 }
2280
2281 /**
2282 * Process [[ ]] wikilinks
2283 *
2284 * @param string $s
2285 *
2286 * @return string Processed text
2287 *
2288 * @private
2289 */
2290 public function replaceInternalLinks( $s ) {
2291 $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2292 return $s;
2293 }
2294
2295 /**
2296 * Process [[ ]] wikilinks (RIL)
2297 * @param string &$s
2298 * @throws MWException
2299 * @return LinkHolderArray
2300 *
2301 * @private
2302 */
2303 public function replaceInternalLinks2( &$s ) {
2304 static $tc = false, $e1, $e1_img;
2305 # the % is needed to support urlencoded titles as well
2306 if ( !$tc ) {
2307 $tc = Title::legalChars() . '#%';
2308 # Match a link having the form [[namespace:link|alternate]]trail
2309 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2310 # Match cases where there is no "]]", which might still be images
2311 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2312 }
2313
2314 $holders = new LinkHolderArray( $this );
2315
2316 # split the entire text string on occurrences of [[
2317 $a = StringUtils::explode( '[[', ' ' . $s );
2318 # get the first element (all text up to first [[), and remove the space we added
2319 $s = $a->current();
2320 $a->next();
2321 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2322 $s = substr( $s, 1 );
2323
2324 if ( is_null( $this->mTitle ) ) {
2325 throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2326 }
2327 $nottalk = !$this->mTitle->isTalkPage();
2328
2329 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2330 $e2 = null;
2331 if ( $useLinkPrefixExtension ) {
2332 # Match the end of a line for a word that's not followed by whitespace,
2333 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2334 $charset = $this->contLang->linkPrefixCharset();
2335 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2336 $m = [];
2337 if ( preg_match( $e2, $s, $m ) ) {
2338 $first_prefix = $m[2];
2339 } else {
2340 $first_prefix = false;
2341 }
2342 } else {
2343 $prefix = '';
2344 }
2345
2346 $useSubpages = $this->areSubpagesAllowed();
2347
2348 # Loop for each link
2349 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2350 # Check for excessive memory usage
2351 if ( $holders->isBig() ) {
2352 # Too big
2353 # Do the existence check, replace the link holders and clear the array
2354 $holders->replace( $s );
2355 $holders->clear();
2356 }
2357
2358 if ( $useLinkPrefixExtension ) {
2359 if ( preg_match( $e2, $s, $m ) ) {
2360 list( , $s, $prefix ) = $m;
2361 } else {
2362 $prefix = '';
2363 }
2364 # first link
2365 if ( $first_prefix ) {
2366 $prefix = $first_prefix;
2367 $first_prefix = false;
2368 }
2369 }
2370
2371 $might_be_img = false;
2372
2373 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2374 $text = $m[2];
2375 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2376 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2377 # the real problem is with the $e1 regex
2378 # See T1500.
2379 # Still some problems for cases where the ] is meant to be outside punctuation,
2380 # and no image is in sight. See T4095.
2381 if ( $text !== ''
2382 && substr( $m[3], 0, 1 ) === ']'
2383 && strpos( $text, '[' ) !== false
2384 ) {
2385 $text .= ']'; # so that replaceExternalLinks($text) works later
2386 $m[3] = substr( $m[3], 1 );
2387 }
2388 # fix up urlencoded title texts
2389 if ( strpos( $m[1], '%' ) !== false ) {
2390 # Should anchors '#' also be rejected?
2391 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2392 }
2393 $trail = $m[3];
2394 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2395 # Invalid, but might be an image with a link in its caption
2396 $might_be_img = true;
2397 $text = $m[2];
2398 if ( strpos( $m[1], '%' ) !== false ) {
2399 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2400 }
2401 $trail = "";
2402 } else { # Invalid form; output directly
2403 $s .= $prefix . '[[' . $line;
2404 continue;
2405 }
2406
2407 $origLink = ltrim( $m[1], ' ' );
2408
2409 # Don't allow internal links to pages containing
2410 # PROTO: where PROTO is a valid URL protocol; these
2411 # should be external links.
2412 if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2413 $s .= $prefix . '[[' . $line;
2414 continue;
2415 }
2416
2417 # Make subpage if necessary
2418 if ( $useSubpages ) {
2419 $link = $this->maybeDoSubpageLink( $origLink, $text );
2420 } else {
2421 $link = $origLink;
2422 }
2423
2424 // \x7f isn't a default legal title char, so most likely strip
2425 // markers will force us into the "invalid form" path above. But,
2426 // just in case, let's assert that xmlish tags aren't valid in
2427 // the title position.
2428 $unstrip = $this->mStripState->killMarkers( $link );
2429 $noMarkers = ( $unstrip === $link );
2430
2431 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2432 if ( $nt === null ) {
2433 $s .= $prefix . '[[' . $line;
2434 continue;
2435 }
2436
2437 $ns = $nt->getNamespace();
2438 $iw = $nt->getInterwiki();
2439
2440 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2441
2442 if ( $might_be_img ) { # if this is actually an invalid link
2443 if ( $ns == NS_FILE && $noforce ) { # but might be an image
2444 $found = false;
2445 while ( true ) {
2446 # look at the next 'line' to see if we can close it there
2447 $a->next();
2448 $next_line = $a->current();
2449 if ( $next_line === false || $next_line === null ) {
2450 break;
2451 }
2452 $m = explode( ']]', $next_line, 3 );
2453 if ( count( $m ) == 3 ) {
2454 # the first ]] closes the inner link, the second the image
2455 $found = true;
2456 $text .= "[[{$m[0]}]]{$m[1]}";
2457 $trail = $m[2];
2458 break;
2459 } elseif ( count( $m ) == 2 ) {
2460 # if there's exactly one ]] that's fine, we'll keep looking
2461 $text .= "[[{$m[0]}]]{$m[1]}";
2462 } else {
2463 # if $next_line is invalid too, we need look no further
2464 $text .= '[[' . $next_line;
2465 break;
2466 }
2467 }
2468 if ( !$found ) {
2469 # we couldn't find the end of this imageLink, so output it raw
2470 # but don't ignore what might be perfectly normal links in the text we've examined
2471 $holders->merge( $this->replaceInternalLinks2( $text ) );
2472 $s .= "{$prefix}[[$link|$text";
2473 # note: no $trail, because without an end, there *is* no trail
2474 continue;
2475 }
2476 } else { # it's not an image, so output it raw
2477 $s .= "{$prefix}[[$link|$text";
2478 # note: no $trail, because without an end, there *is* no trail
2479 continue;
2480 }
2481 }
2482
2483 $wasblank = ( $text == '' );
2484 if ( $wasblank ) {
2485 $text = $link;
2486 if ( !$noforce ) {
2487 # Strip off leading ':'
2488 $text = substr( $text, 1 );
2489 }
2490 } else {
2491 # T6598 madness. Handle the quotes only if they come from the alternate part
2492 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2493 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2494 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2495 $text = $this->doQuotes( $text );
2496 }
2497
2498 # Link not escaped by : , create the various objects
2499 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2500 # Interwikis
2501 if (
2502 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2503 Language::fetchLanguageName( $iw, null, 'mw' ) ||
2504 in_array( $iw, $this->svcOptions->get( 'ExtraInterlanguageLinkPrefixes' ) )
2505 )
2506 ) {
2507 # T26502: filter duplicates
2508 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2509 $this->mLangLinkLanguages[$iw] = true;
2510 $this->mOutput->addLanguageLink( $nt->getFullText() );
2511 }
2512
2513 /**
2514 * Strip the whitespace interwiki links produce, see T10897
2515 */
2516 $s = rtrim( $s . $prefix ) . $trail; # T175416
2517 continue;
2518 }
2519
2520 if ( $ns == NS_FILE ) {
2521 if ( !$this->badFileLookup->isBadFile( $nt->getDBkey(), $this->mTitle ) ) {
2522 if ( $wasblank ) {
2523 # if no parameters were passed, $text
2524 # becomes something like "File:Foo.png",
2525 # which we don't want to pass on to the
2526 # image generator
2527 $text = '';
2528 } else {
2529 # recursively parse links inside the image caption
2530 # actually, this will parse them in any other parameters, too,
2531 # but it might be hard to fix that, and it doesn't matter ATM
2532 $text = $this->replaceExternalLinks( $text );
2533 $holders->merge( $this->replaceInternalLinks2( $text ) );
2534 }
2535 # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2536 $s .= $prefix . $this->armorLinks(
2537 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2538 continue;
2539 }
2540 } elseif ( $ns == NS_CATEGORY ) {
2541 /**
2542 * Strip the whitespace Category links produce, see T2087
2543 */
2544 $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2545
2546 if ( $wasblank ) {
2547 $sortkey = $this->getDefaultSort();
2548 } else {
2549 $sortkey = $text;
2550 }
2551 $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2552 $sortkey = str_replace( "\n", '', $sortkey );
2553 $sortkey = $this->getTargetLanguage()->convertCategoryKey( $sortkey );
2554 $this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2555
2556 continue;
2557 }
2558 }
2559
2560 # Self-link checking. For some languages, variants of the title are checked in
2561 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2562 # for linking to a different variant.
2563 if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2564 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2565 continue;
2566 }
2567
2568 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2569 # @todo FIXME: Should do batch file existence checks, see comment below
2570 if ( $ns == NS_MEDIA ) {
2571 # Give extensions a chance to select the file revision for us
2572 $options = [];
2573 $descQuery = false;
2574 Hooks::run( 'BeforeParserFetchFileAndTitle',
2575 [ $this, $nt, &$options, &$descQuery ] );
2576 # Fetch and register the file (file title may be different via hooks)
2577 list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2578 # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2579 $s .= $prefix . $this->armorLinks(
2580 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2581 continue;
2582 }
2583
2584 # Some titles, such as valid special pages or files in foreign repos, should
2585 # be shown as bluelinks even though they're not included in the page table
2586 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2587 # batch file existence checks for NS_FILE and NS_MEDIA
2588 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2589 $this->mOutput->addLink( $nt );
2590 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2591 } else {
2592 # Links will be added to the output link list after checking
2593 $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2594 }
2595 }
2596 return $holders;
2597 }
2598
2599 /**
2600 * Render a forced-blue link inline; protect against double expansion of
2601 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2602 * Since this little disaster has to split off the trail text to avoid
2603 * breaking URLs in the following text without breaking trails on the
2604 * wiki links, it's been made into a horrible function.
2605 *
2606 * @param Title $nt
2607 * @param string $text
2608 * @param string $trail
2609 * @param string $prefix
2610 * @return string HTML-wikitext mix oh yuck
2611 */
2612 protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2613 list( $inside, $trail ) = Linker::splitTrail( $trail );
2614
2615 if ( $text == '' ) {
2616 $text = htmlspecialchars( $nt->getPrefixedText() );
2617 }
2618
2619 $link = $this->getLinkRenderer()->makeKnownLink(
2620 $nt, new HtmlArmor( "$prefix$text$inside" )
2621 );
2622
2623 return $this->armorLinks( $link ) . $trail;
2624 }
2625
2626 /**
2627 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2628 * going to go through further parsing steps before inline URL expansion.
2629 *
2630 * Not needed quite as much as it used to be since free links are a bit
2631 * more sensible these days. But bracketed links are still an issue.
2632 *
2633 * @param string $text More-or-less HTML
2634 * @return string Less-or-more HTML with NOPARSE bits
2635 */
2636 public function armorLinks( $text ) {
2637 return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2638 self::MARKER_PREFIX . "NOPARSE$1", $text );
2639 }
2640
2641 /**
2642 * Return true if subpage links should be expanded on this page.
2643 * @return bool
2644 */
2645 public function areSubpagesAllowed() {
2646 # Some namespaces don't allow subpages
2647 return $this->nsInfo->hasSubpages( $this->mTitle->getNamespace() );
2648 }
2649
2650 /**
2651 * Handle link to subpage if necessary
2652 *
2653 * @param string $target The source of the link
2654 * @param string &$text The link text, modified as necessary
2655 * @return string The full name of the link
2656 * @private
2657 */
2658 public function maybeDoSubpageLink( $target, &$text ) {
2659 return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2660 }
2661
2662 /**
2663 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2664 *
2665 * @param string $text
2666 * @param bool $linestart Whether or not this is at the start of a line.
2667 * @private
2668 * @return string The lists rendered as HTML
2669 */
2670 public function doBlockLevels( $text, $linestart ) {
2671 return BlockLevelPass::doBlockLevels( $text, $linestart );
2672 }
2673
2674 /**
2675 * Return value of a magic variable (like PAGENAME)
2676 *
2677 * @private
2678 *
2679 * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2680 * @param bool|PPFrame $frame
2681 *
2682 * @throws MWException
2683 * @return string
2684 */
2685 public function getVariableValue( $index, $frame = false ) {
2686 if ( is_null( $this->mTitle ) ) {
2687 // If no title set, bad things are going to happen
2688 // later. Title should always be set since this
2689 // should only be called in the middle of a parse
2690 // operation (but the unit-tests do funky stuff)
2691 throw new MWException( __METHOD__ . ' Should only be '
2692 . ' called while parsing (no title set)' );
2693 }
2694
2695 // Avoid PHP 7.1 warning from passing $this by reference
2696 $parser = $this;
2697
2698 /**
2699 * Some of these require message or data lookups and can be
2700 * expensive to check many times.
2701 */
2702 if (
2703 Hooks::run( 'ParserGetVariableValueVarCache', [ &$parser, &$this->mVarCache ] ) &&
2704 isset( $this->mVarCache[$index] )
2705 ) {
2706 return $this->mVarCache[$index];
2707 }
2708
2709 $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2710 Hooks::run( 'ParserGetVariableValueTs', [ &$parser, &$ts ] );
2711
2712 $pageLang = $this->getFunctionLang();
2713
2714 switch ( $index ) {
2715 case '!':
2716 $value = '|';
2717 break;
2718 case 'currentmonth':
2719 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ), true );
2720 break;
2721 case 'currentmonth1':
2722 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ), true );
2723 break;
2724 case 'currentmonthname':
2725 $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2726 break;
2727 case 'currentmonthnamegen':
2728 $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2729 break;
2730 case 'currentmonthabbrev':
2731 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2732 break;
2733 case 'currentday':
2734 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ), true );
2735 break;
2736 case 'currentday2':
2737 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ), true );
2738 break;
2739 case 'localmonth':
2740 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ), true );
2741 break;
2742 case 'localmonth1':
2743 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ), true );
2744 break;
2745 case 'localmonthname':
2746 $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2747 break;
2748 case 'localmonthnamegen':
2749 $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2750 break;
2751 case 'localmonthabbrev':
2752 $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2753 break;
2754 case 'localday':
2755 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ), true );
2756 break;
2757 case 'localday2':
2758 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ), true );
2759 break;
2760 case 'pagename':
2761 $value = wfEscapeWikiText( $this->mTitle->getText() );
2762 break;
2763 case 'pagenamee':
2764 $value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2765 break;
2766 case 'fullpagename':
2767 $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2768 break;
2769 case 'fullpagenamee':
2770 $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2771 break;
2772 case 'subpagename':
2773 $value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2774 break;
2775 case 'subpagenamee':
2776 $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2777 break;
2778 case 'rootpagename':
2779 $value = wfEscapeWikiText( $this->mTitle->getRootText() );
2780 break;
2781 case 'rootpagenamee':
2782 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2783 ' ',
2784 '_',
2785 $this->mTitle->getRootText()
2786 ) ) );
2787 break;
2788 case 'basepagename':
2789 $value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2790 break;
2791 case 'basepagenamee':
2792 $value = wfEscapeWikiText( wfUrlencode( str_replace(
2793 ' ',
2794 '_',
2795 $this->mTitle->getBaseText()
2796 ) ) );
2797 break;
2798 case 'talkpagename':
2799 if ( $this->mTitle->canHaveTalkPage() ) {
2800 $talkPage = $this->mTitle->getTalkPage();
2801 $value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2802 } else {
2803 $value = '';
2804 }
2805 break;
2806 case 'talkpagenamee':
2807 if ( $this->mTitle->canHaveTalkPage() ) {
2808 $talkPage = $this->mTitle->getTalkPage();
2809 $value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2810 } else {
2811 $value = '';
2812 }
2813 break;
2814 case 'subjectpagename':
2815 $subjPage = $this->mTitle->getSubjectPage();
2816 $value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2817 break;
2818 case 'subjectpagenamee':
2819 $subjPage = $this->mTitle->getSubjectPage();
2820 $value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2821 break;
2822 case 'pageid': // requested in T25427
2823 # Inform the edit saving system that getting the canonical output
2824 # after page insertion requires a parse that used that exact page ID
2825 $this->setOutputFlag( 'vary-page-id', '{{PAGEID}} used' );
2826 $value = $this->mTitle->getArticleID();
2827 if ( !$value ) {
2828 $value = $this->mOptions->getSpeculativePageId();
2829 if ( $value ) {
2830 $this->mOutput->setSpeculativePageIdUsed( $value );
2831 }
2832 }
2833 break;
2834 case 'revisionid':
2835 if (
2836 $this->svcOptions->get( 'MiserMode' ) &&
2837 !$this->mOptions->getInterfaceMessage() &&
2838 // @TODO: disallow this word on all namespaces
2839 $this->nsInfo->isContent( $this->mTitle->getNamespace() )
2840 ) {
2841 // Use a stub result instead of the actual revision ID in order to avoid
2842 // double parses on page save but still allow preview detection (T137900)
2843 if ( $this->getRevisionId() || $this->mOptions->getSpeculativeRevId() ) {
2844 $value = '-';
2845 } else {
2846 $this->setOutputFlag( 'vary-revision-exists', '{{REVISIONID}} used' );
2847 $value = '';
2848 }
2849 } else {
2850 # Inform the edit saving system that getting the canonical output after
2851 # revision insertion requires a parse that used that exact revision ID
2852 $this->setOutputFlag( 'vary-revision-id', '{{REVISIONID}} used' );
2853 $value = $this->getRevisionId();
2854 if ( $value === 0 ) {
2855 $rev = $this->getRevisionObject();
2856 $value = $rev ? $rev->getId() : $value;
2857 }
2858 if ( !$value ) {
2859 $value = $this->mOptions->getSpeculativeRevId();
2860 if ( $value ) {
2861 $this->mOutput->setSpeculativeRevIdUsed( $value );
2862 }
2863 }
2864 }
2865 break;
2866 case 'revisionday':
2867 $value = (int)$this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2868 break;
2869 case 'revisionday2':
2870 $value = $this->getRevisionTimestampSubstring( 6, 2, self::MAX_TTS, $index );
2871 break;
2872 case 'revisionmonth':
2873 $value = $this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2874 break;
2875 case 'revisionmonth1':
2876 $value = (int)$this->getRevisionTimestampSubstring( 4, 2, self::MAX_TTS, $index );
2877 break;
2878 case 'revisionyear':
2879 $value = $this->getRevisionTimestampSubstring( 0, 4, self::MAX_TTS, $index );
2880 break;
2881 case 'revisiontimestamp':
2882 $value = $this->getRevisionTimestampSubstring( 0, 14, self::MAX_TTS, $index );
2883 break;
2884 case 'revisionuser':
2885 # Inform the edit saving system that getting the canonical output after
2886 # revision insertion requires a parse that used the actual user ID
2887 $this->setOutputFlag( 'vary-user', '{{REVISIONUSER}} used' );
2888 $value = $this->getRevisionUser();
2889 break;
2890 case 'revisionsize':
2891 $value = $this->getRevisionSize();
2892 break;
2893 case 'namespace':
2894 $value = str_replace( '_', ' ',
2895 $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2896 break;
2897 case 'namespacee':
2898 $value = wfUrlencode( $this->contLang->getNsText( $this->mTitle->getNamespace() ) );
2899 break;
2900 case 'namespacenumber':
2901 $value = $this->mTitle->getNamespace();
2902 break;
2903 case 'talkspace':
2904 $value = $this->mTitle->canHaveTalkPage()
2905 ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2906 : '';
2907 break;
2908 case 'talkspacee':
2909 $value = $this->mTitle->canHaveTalkPage() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2910 break;
2911 case 'subjectspace':
2912 $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2913 break;
2914 case 'subjectspacee':
2915 $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2916 break;
2917 case 'currentdayname':
2918 $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2919 break;
2920 case 'currentyear':
2921 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2922 break;
2923 case 'currenttime':
2924 $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2925 break;
2926 case 'currenthour':
2927 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2928 break;
2929 case 'currentweek':
2930 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2931 # int to remove the padding
2932 $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2933 break;
2934 case 'currentdow':
2935 $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2936 break;
2937 case 'localdayname':
2938 $value = $pageLang->getWeekdayName(
2939 (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2940 );
2941 break;
2942 case 'localyear':
2943 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2944 break;
2945 case 'localtime':
2946 $value = $pageLang->time(
2947 MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2948 false,
2949 false
2950 );
2951 break;
2952 case 'localhour':
2953 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2954 break;
2955 case 'localweek':
2956 # @bug T6594 PHP5 has it zero padded, PHP4 does not, cast to
2957 # int to remove the padding
2958 $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2959 break;
2960 case 'localdow':
2961 $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2962 break;
2963 case 'numberofarticles':
2964 $value = $pageLang->formatNum( SiteStats::articles() );
2965 break;
2966 case 'numberoffiles':
2967 $value = $pageLang->formatNum( SiteStats::images() );
2968 break;
2969 case 'numberofusers':
2970 $value = $pageLang->formatNum( SiteStats::users() );
2971 break;
2972 case 'numberofactiveusers':
2973 $value = $pageLang->formatNum( SiteStats::activeUsers() );
2974 break;
2975 case 'numberofpages':
2976 $value = $pageLang->formatNum( SiteStats::pages() );
2977 break;
2978 case 'numberofadmins':
2979 $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2980 break;
2981 case 'numberofedits':
2982 $value = $pageLang->formatNum( SiteStats::edits() );
2983 break;
2984 case 'currenttimestamp':
2985 $value = wfTimestamp( TS_MW, $ts );
2986 break;
2987 case 'localtimestamp':
2988 $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2989 break;
2990 case 'currentversion':
2991 $value = SpecialVersion::getVersion();
2992 break;
2993 case 'articlepath':
2994 return $this->svcOptions->get( 'ArticlePath' );
2995 case 'sitename':
2996 return $this->svcOptions->get( 'Sitename' );
2997 case 'server':
2998 return $this->svcOptions->get( 'Server' );
2999 case 'servername':
3000 return $this->svcOptions->get( 'ServerName' );
3001 case 'scriptpath':
3002 return $this->svcOptions->get( 'ScriptPath' );
3003 case 'stylepath':
3004 return $this->svcOptions->get( 'StylePath' );
3005 case 'directionmark':
3006 return $pageLang->getDirMark();
3007 case 'contentlanguage':
3008 return $this->svcOptions->get( 'LanguageCode' );
3009 case 'pagelanguage':
3010 $value = $pageLang->getCode();
3011 break;
3012 case 'cascadingsources':
3013 $value = CoreParserFunctions::cascadingsources( $this );
3014 break;
3015 default:
3016 $ret = null;
3017 Hooks::run(
3018 'ParserGetVariableValueSwitch',
3019 [ &$parser, &$this->mVarCache, &$index, &$ret, &$frame ]
3020 );
3021
3022 return $ret;
3023 }
3024
3025 if ( $index ) {
3026 $this->mVarCache[$index] = $value;
3027 }
3028
3029 return $value;
3030 }
3031
3032 /**
3033 * @param int $start
3034 * @param int $len
3035 * @param int $mtts Max time-till-save; sets vary-revision-timestamp if result changes by then
3036 * @param string $variable Parser variable name
3037 * @return string
3038 */
3039 private function getRevisionTimestampSubstring( $start, $len, $mtts, $variable ) {
3040 # Get the timezone-adjusted timestamp to be used for this revision
3041 $resNow = substr( $this->getRevisionTimestamp(), $start, $len );
3042 # Possibly set vary-revision if there is not yet an associated revision
3043 if ( !$this->getRevisionObject() ) {
3044 # Get the timezone-adjusted timestamp $mtts seconds in the future.
3045 # This future is relative to the current time and not that of the
3046 # parser options. The rendered timestamp can be compared to that
3047 # of the timestamp specified by the parser options.
3048 $resThen = substr(
3049 $this->contLang->userAdjust( wfTimestamp( TS_MW, time() + $mtts ), '' ),
3050 $start,
3051 $len
3052 );
3053
3054 if ( $resNow !== $resThen ) {
3055 # Inform the edit saving system that getting the canonical output after
3056 # revision insertion requires a parse that used an actual revision timestamp
3057 $this->setOutputFlag( 'vary-revision-timestamp', "$variable used" );
3058 }
3059 }
3060
3061 return $resNow;
3062 }
3063
3064 /**
3065 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
3066 *
3067 * @private
3068 */
3069 public function initialiseVariables() {
3070 $variableIDs = $this->magicWordFactory->getVariableIDs();
3071 $substIDs = $this->magicWordFactory->getSubstIDs();
3072
3073 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
3074 $this->mSubstWords = $this->magicWordFactory->newArray( $substIDs );
3075 }
3076
3077 /**
3078 * Preprocess some wikitext and return the document tree.
3079 * This is the ghost of replace_variables().
3080 *
3081 * @param string $text The text to parse
3082 * @param int $flags Bitwise combination of:
3083 * - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
3084 * included. Default is to assume a direct page view.
3085 *
3086 * The generated DOM tree must depend only on the input text and the flags.
3087 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of T6899.
3088 *
3089 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
3090 * change in the DOM tree for a given text, must be passed through the section identifier
3091 * in the section edit link and thus back to extractSections().
3092 *
3093 * The output of this function is currently only cached in process memory, but a persistent
3094 * cache may be implemented at a later date which takes further advantage of these strict
3095 * dependency requirements.
3096 *
3097 * @return PPNode
3098 */
3099 public function preprocessToDom( $text, $flags = 0 ) {
3100 $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
3101 return $dom;
3102 }
3103
3104 /**
3105 * Return a three-element array: leading whitespace, string contents, trailing whitespace
3106 *
3107 * @param string $s
3108 *
3109 * @return array
3110 */
3111 public static function splitWhitespace( $s ) {
3112 $ltrimmed = ltrim( $s );
3113 $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
3114 $trimmed = rtrim( $ltrimmed );
3115 $diff = strlen( $ltrimmed ) - strlen( $trimmed );
3116 if ( $diff > 0 ) {
3117 $w2 = substr( $ltrimmed, -$diff );
3118 } else {
3119 $w2 = '';
3120 }
3121 return [ $w1, $trimmed, $w2 ];
3122 }
3123
3124 /**
3125 * Replace magic variables, templates, and template arguments
3126 * with the appropriate text. Templates are substituted recursively,
3127 * taking care to avoid infinite loops.
3128 *
3129 * Note that the substitution depends on value of $mOutputType:
3130 * self::OT_WIKI: only {{subst:}} templates
3131 * self::OT_PREPROCESS: templates but not extension tags
3132 * self::OT_HTML: all templates and extension tags
3133 *
3134 * @param string $text The text to transform
3135 * @param false|PPFrame|array $frame Object describing the arguments passed to the
3136 * template. Arguments may also be provided as an associative array, as
3137 * was the usual case before MW1.12. Providing arguments this way may be
3138 * useful for extensions wishing to perform variable replacement
3139 * explicitly.
3140 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
3141 * double-brace expansion.
3142 * @return string
3143 */
3144 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
3145 # Is there any text? Also, Prevent too big inclusions!
3146 $textSize = strlen( $text );
3147 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
3148 return $text;
3149 }
3150
3151 if ( $frame === false ) {
3152 $frame = $this->getPreprocessor()->newFrame();
3153 } elseif ( !( $frame instanceof PPFrame ) ) {
3154 $this->logger->debug(
3155 __METHOD__ . " called using plain parameters instead of " .
3156 "a PPFrame instance. Creating custom frame."
3157 );
3158 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
3159 }
3160
3161 $dom = $this->preprocessToDom( $text );
3162 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
3163 $text = $frame->expand( $dom, $flags );
3164
3165 return $text;
3166 }
3167
3168 /**
3169 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
3170 *
3171 * @param array $args
3172 *
3173 * @return array
3174 */
3175 public static function createAssocArgs( $args ) {
3176 $assocArgs = [];
3177 $index = 1;
3178 foreach ( $args as $arg ) {
3179 $eqpos = strpos( $arg, '=' );
3180 if ( $eqpos === false ) {
3181 $assocArgs[$index++] = $arg;
3182 } else {
3183 $name = trim( substr( $arg, 0, $eqpos ) );
3184 $value = trim( substr( $arg, $eqpos + 1 ) );
3185 if ( $value === false ) {
3186 $value = '';
3187 }
3188 if ( $name !== false ) {
3189 $assocArgs[$name] = $value;
3190 }
3191 }
3192 }
3193
3194 return $assocArgs;
3195 }
3196
3197 /**
3198 * Warn the user when a parser limitation is reached
3199 * Will warn at most once the user per limitation type
3200 *
3201 * The results are shown during preview and run through the Parser (See EditPage.php)
3202 *
3203 * @param string $limitationType Should be one of:
3204 * 'expensive-parserfunction' (corresponding messages:
3205 * 'expensive-parserfunction-warning',
3206 * 'expensive-parserfunction-category')
3207 * 'post-expand-template-argument' (corresponding messages:
3208 * 'post-expand-template-argument-warning',
3209 * 'post-expand-template-argument-category')
3210 * 'post-expand-template-inclusion' (corresponding messages:
3211 * 'post-expand-template-inclusion-warning',
3212 * 'post-expand-template-inclusion-category')
3213 * 'node-count-exceeded' (corresponding messages:
3214 * 'node-count-exceeded-warning',
3215 * 'node-count-exceeded-category')
3216 * 'expansion-depth-exceeded' (corresponding messages:
3217 * 'expansion-depth-exceeded-warning',
3218 * 'expansion-depth-exceeded-category')
3219 * @param string|int|null $current Current value
3220 * @param string|int|null $max Maximum allowed, when an explicit limit has been
3221 * exceeded, provide the values (optional)
3222 */
3223 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3224 # does no harm if $current and $max are present but are unnecessary for the message
3225 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3226 # only during preview, and that would split the parser cache unnecessarily.
3227 $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
3228 ->text();
3229 $this->mOutput->addWarning( $warning );
3230 $this->addTrackingCategory( "$limitationType-category" );
3231 }
3232
3233 /**
3234 * Return the text of a template, after recursively
3235 * replacing any variables or templates within the template.
3236 *
3237 * @param array $piece The parts of the template
3238 * $piece['title']: the title, i.e. the part before the |
3239 * $piece['parts']: the parameter array
3240 * $piece['lineStart']: whether the brace was at the start of a line
3241 * @param PPFrame $frame The current frame, contains template arguments
3242 * @throws Exception
3243 * @return string|array The text of the template
3244 */
3245 public function braceSubstitution( $piece, $frame ) {
3246 // Flags
3247
3248 // $text has been filled
3249 $found = false;
3250 // wiki markup in $text should be escaped
3251 $nowiki = false;
3252 // $text is HTML, armour it against wikitext transformation
3253 $isHTML = false;
3254 // Force interwiki transclusion to be done in raw mode not rendered
3255 $forceRawInterwiki = false;
3256 // $text is a DOM node needing expansion in a child frame
3257 $isChildObj = false;
3258 // $text is a DOM node needing expansion in the current frame
3259 $isLocalObj = false;
3260
3261 # Title object, where $text came from
3262 $title = false;
3263
3264 # $part1 is the bit before the first |, and must contain only title characters.
3265 # Various prefixes will be stripped from it later.
3266 $titleWithSpaces = $frame->expand( $piece['title'] );
3267 $part1 = trim( $titleWithSpaces );
3268 $titleText = false;
3269
3270 # Original title text preserved for various purposes
3271 $originalTitle = $part1;
3272
3273 # $args is a list of argument nodes, starting from index 0, not including $part1
3274 # @todo FIXME: If piece['parts'] is null then the call to getLength()
3275 # below won't work b/c this $args isn't an object
3276 $args = ( $piece['parts'] == null ) ? [] : $piece['parts'];
3277
3278 $profileSection = null; // profile templates
3279
3280 # SUBST
3281 if ( !$found ) {
3282 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3283
3284 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3285 # Decide whether to expand template or keep wikitext as-is.
3286 if ( $this->ot['wiki'] ) {
3287 if ( $substMatch === false ) {
3288 $literal = true; # literal when in PST with no prefix
3289 } else {
3290 $literal = false; # expand when in PST with subst: or safesubst:
3291 }
3292 } else {
3293 if ( $substMatch == 'subst' ) {
3294 $literal = true; # literal when not in PST with plain subst:
3295 } else {
3296 $literal = false; # expand when not in PST with safesubst: or no prefix
3297 }
3298 }
3299 if ( $literal ) {
3300 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3301 $isLocalObj = true;
3302 $found = true;
3303 }
3304 }
3305
3306 # Variables
3307 if ( !$found && $args->getLength() == 0 ) {
3308 $id = $this->mVariables->matchStartToEnd( $part1 );
3309 if ( $id !== false ) {
3310 $text = $this->getVariableValue( $id, $frame );
3311 if ( $this->magicWordFactory->getCacheTTL( $id ) > -1 ) {
3312 $this->mOutput->updateCacheExpiry(
3313 $this->magicWordFactory->getCacheTTL( $id ) );
3314 }
3315 $found = true;
3316 }
3317 }
3318
3319 # MSG, MSGNW and RAW
3320 if ( !$found ) {
3321 # Check for MSGNW:
3322 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3323 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3324 $nowiki = true;
3325 } else {
3326 # Remove obsolete MSG:
3327 $mwMsg = $this->magicWordFactory->get( 'msg' );
3328 $mwMsg->matchStartAndRemove( $part1 );
3329 }
3330
3331 # Check for RAW:
3332 $mwRaw = $this->magicWordFactory->get( 'raw' );
3333 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3334 $forceRawInterwiki = true;
3335 }
3336 }
3337
3338 # Parser functions
3339 if ( !$found ) {
3340 $colonPos = strpos( $part1, ':' );
3341 if ( $colonPos !== false ) {
3342 $func = substr( $part1, 0, $colonPos );
3343 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3344 $argsLength = $args->getLength();
3345 for ( $i = 0; $i < $argsLength; $i++ ) {
3346 $funcArgs[] = $args->item( $i );
3347 }
3348
3349 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3350
3351 // Extract any forwarded flags
3352 if ( isset( $result['title'] ) ) {
3353 $title = $result['title'];
3354 }
3355 if ( isset( $result['found'] ) ) {
3356 $found = $result['found'];
3357 }
3358 if ( array_key_exists( 'text', $result ) ) {
3359 // a string or null
3360 $text = $result['text'];
3361 }
3362 if ( isset( $result['nowiki'] ) ) {
3363 $nowiki = $result['nowiki'];
3364 }
3365 if ( isset( $result['isHTML'] ) ) {
3366 $isHTML = $result['isHTML'];
3367 }
3368 if ( isset( $result['forceRawInterwiki'] ) ) {
3369 $forceRawInterwiki = $result['forceRawInterwiki'];
3370 }
3371 if ( isset( $result['isChildObj'] ) ) {
3372 $isChildObj = $result['isChildObj'];
3373 }
3374 if ( isset( $result['isLocalObj'] ) ) {
3375 $isLocalObj = $result['isLocalObj'];
3376 }
3377 }
3378 }
3379
3380 # Finish mangling title and then check for loops.
3381 # Set $title to a Title object and $titleText to the PDBK
3382 if ( !$found ) {
3383 $ns = NS_TEMPLATE;
3384 # Split the title into page and subpage
3385 $subpage = '';
3386 $relative = $this->maybeDoSubpageLink( $part1, $subpage );
3387 if ( $part1 !== $relative ) {
3388 $part1 = $relative;
3389 $ns = $this->mTitle->getNamespace();
3390 }
3391 $title = Title::newFromText( $part1, $ns );
3392 if ( $title ) {
3393 $titleText = $title->getPrefixedText();
3394 # Check for language variants if the template is not found
3395 if ( $this->getTargetLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3396 $this->getTargetLanguage()->findVariantLink( $part1, $title, true );
3397 }
3398 # Do recursion depth check
3399 $limit = $this->mOptions->getMaxTemplateDepth();
3400 if ( $frame->depth >= $limit ) {
3401 $found = true;
3402 $text = '<span class="error">'
3403 . wfMessage( 'parser-template-recursion-depth-warning' )
3404 ->numParams( $limit )->inContentLanguage()->text()
3405 . '</span>';
3406 }
3407 }
3408 }
3409
3410 # Load from database
3411 if ( !$found && $title ) {
3412 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3413 if ( !$title->isExternal() ) {
3414 if ( $title->isSpecialPage()
3415 && $this->mOptions->getAllowSpecialInclusion()
3416 && $this->ot['html']
3417 ) {
3418 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3419 // Pass the template arguments as URL parameters.
3420 // "uselang" will have no effect since the Language object
3421 // is forced to the one defined in ParserOptions.
3422 $pageArgs = [];
3423 $argsLength = $args->getLength();
3424 for ( $i = 0; $i < $argsLength; $i++ ) {
3425 $bits = $args->item( $i )->splitArg();
3426 if ( strval( $bits['index'] ) === '' ) {
3427 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3428 $value = trim( $frame->expand( $bits['value'] ) );
3429 $pageArgs[$name] = $value;
3430 }
3431 }
3432
3433 // Create a new context to execute the special page
3434 $context = new RequestContext;
3435 $context->setTitle( $title );
3436 $context->setRequest( new FauxRequest( $pageArgs ) );
3437 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3438 $context->setUser( $this->getUser() );
3439 } else {
3440 // If this page is cached, then we better not be per user.
3441 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3442 }
3443 $context->setLanguage( $this->mOptions->getUserLangObj() );
3444 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3445 if ( $ret ) {
3446 $text = $context->getOutput()->getHTML();
3447 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3448 $found = true;
3449 $isHTML = true;
3450 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3451 $this->mOutput->updateRuntimeAdaptiveExpiry(
3452 $specialPage->maxIncludeCacheTime()
3453 );
3454 }
3455 }
3456 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3457 $found = false; # access denied
3458 $this->logger->debug(
3459 __METHOD__ .
3460 ": template inclusion denied for " . $title->getPrefixedDBkey()
3461 );
3462 } else {
3463 list( $text, $title ) = $this->getTemplateDom( $title );
3464 if ( $text !== false ) {
3465 $found = true;
3466 $isChildObj = true;
3467 }
3468 }
3469
3470 # If the title is valid but undisplayable, make a link to it
3471 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3472 $text = "[[:$titleText]]";
3473 $found = true;
3474 }
3475 } elseif ( $title->isTrans() ) {
3476 # Interwiki transclusion
3477 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3478 $text = $this->interwikiTransclude( $title, 'render' );
3479 $isHTML = true;
3480 } else {
3481 $text = $this->interwikiTransclude( $title, 'raw' );
3482 # Preprocess it like a template
3483 $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3484 $isChildObj = true;
3485 }
3486 $found = true;
3487 }
3488
3489 # Do infinite loop check
3490 # This has to be done after redirect resolution to avoid infinite loops via redirects
3491 if ( !$frame->loopCheck( $title ) ) {
3492 $found = true;
3493 $text = '<span class="error">'
3494 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3495 . '</span>';
3496 $this->addTrackingCategory( 'template-loop-category' );
3497 $this->mOutput->addWarning( wfMessage( 'template-loop-warning',
3498 wfEscapeWikiText( $titleText ) )->text() );
3499 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3500 }
3501 }
3502
3503 # If we haven't found text to substitute by now, we're done
3504 # Recover the source wikitext and return it
3505 if ( !$found ) {
3506 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3507 if ( $profileSection ) {
3508 $this->mProfiler->scopedProfileOut( $profileSection );
3509 }
3510 return [ 'object' => $text ];
3511 }
3512
3513 # Expand DOM-style return values in a child frame
3514 if ( $isChildObj ) {
3515 # Clean up argument array
3516 $newFrame = $frame->newChild( $args, $title );
3517
3518 if ( $nowiki ) {
3519 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3520 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3521 # Expansion is eligible for the empty-frame cache
3522 $text = $newFrame->cachedExpand( $titleText, $text );
3523 } else {
3524 # Uncached expansion
3525 $text = $newFrame->expand( $text );
3526 }
3527 }
3528 if ( $isLocalObj && $nowiki ) {
3529 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3530 $isLocalObj = false;
3531 }
3532
3533 if ( $profileSection ) {
3534 $this->mProfiler->scopedProfileOut( $profileSection );
3535 }
3536
3537 # Replace raw HTML by a placeholder
3538 if ( $isHTML ) {
3539 $text = $this->insertStripItem( $text );
3540 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3541 # Escape nowiki-style return values
3542 $text = wfEscapeWikiText( $text );
3543 } elseif ( is_string( $text )
3544 && !$piece['lineStart']
3545 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3546 ) {
3547 # T2529: if the template begins with a table or block-level
3548 # element, it should be treated as beginning a new line.
3549 # This behavior is somewhat controversial.
3550 $text = "\n" . $text;
3551 }
3552
3553 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3554 # Error, oversize inclusion
3555 if ( $titleText !== false ) {
3556 # Make a working, properly escaped link if possible (T25588)
3557 $text = "[[:$titleText]]";
3558 } else {
3559 # This will probably not be a working link, but at least it may
3560 # provide some hint of where the problem is
3561 preg_replace( '/^:/', '', $originalTitle );
3562 $text = "[[:$originalTitle]]";
3563 }
3564 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3565 . 'post-expand include size too large -->' );
3566 $this->limitationWarn( 'post-expand-template-inclusion' );
3567 }
3568
3569 if ( $isLocalObj ) {
3570 $ret = [ 'object' => $text ];
3571 } else {
3572 $ret = [ 'text' => $text ];
3573 }
3574
3575 return $ret;
3576 }
3577
3578 /**
3579 * Call a parser function and return an array with text and flags.
3580 *
3581 * The returned array will always contain a boolean 'found', indicating
3582 * whether the parser function was found or not. It may also contain the
3583 * following:
3584 * text: string|object, resulting wikitext or PP DOM object
3585 * isHTML: bool, $text is HTML, armour it against wikitext transformation
3586 * isChildObj: bool, $text is a DOM node needing expansion in a child frame
3587 * isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3588 * nowiki: bool, wiki markup in $text should be escaped
3589 *
3590 * @since 1.21
3591 * @param PPFrame $frame The current frame, contains template arguments
3592 * @param string $function Function name
3593 * @param array $args Arguments to the function
3594 * @throws MWException
3595 * @return array
3596 */
3597 public function callParserFunction( $frame, $function, array $args = [] ) {
3598 # Case sensitive functions
3599 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3600 $function = $this->mFunctionSynonyms[1][$function];
3601 } else {
3602 # Case insensitive functions
3603 $function = $this->contLang->lc( $function );
3604 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3605 $function = $this->mFunctionSynonyms[0][$function];
3606 } else {
3607 return [ 'found' => false ];
3608 }
3609 }
3610
3611 list( $callback, $flags ) = $this->mFunctionHooks[$function];
3612
3613 // Avoid PHP 7.1 warning from passing $this by reference
3614 $parser = $this;
3615
3616 $allArgs = [ &$parser ];
3617 if ( $flags & self::SFH_OBJECT_ARGS ) {
3618 # Convert arguments to PPNodes and collect for appending to $allArgs
3619 $funcArgs = [];
3620 foreach ( $args as $k => $v ) {
3621 if ( $v instanceof PPNode || $k === 0 ) {
3622 $funcArgs[] = $v;
3623 } else {
3624 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3625 }
3626 }
3627
3628 # Add a frame parameter, and pass the arguments as an array
3629 $allArgs[] = $frame;
3630 $allArgs[] = $funcArgs;
3631 } else {
3632 # Convert arguments to plain text and append to $allArgs
3633 foreach ( $args as $k => $v ) {
3634 if ( $v instanceof PPNode ) {
3635 $allArgs[] = trim( $frame->expand( $v ) );
3636 } elseif ( is_int( $k ) && $k >= 0 ) {
3637 $allArgs[] = trim( $v );
3638 } else {
3639 $allArgs[] = trim( "$k=$v" );
3640 }
3641 }
3642 }
3643
3644 $result = $callback( ...$allArgs );
3645
3646 # The interface for function hooks allows them to return a wikitext
3647 # string or an array containing the string and any flags. This mungs
3648 # things around to match what this method should return.
3649 if ( !is_array( $result ) ) {
3650 $result = [
3651 'found' => true,
3652 'text' => $result,
3653 ];
3654 } else {
3655 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3656 $result['text'] = $result[0];
3657 }
3658 unset( $result[0] );
3659 $result += [
3660 'found' => true,
3661 ];
3662 }
3663
3664 $noparse = true;
3665 $preprocessFlags = 0;
3666 if ( isset( $result['noparse'] ) ) {
3667 $noparse = $result['noparse'];
3668 }
3669 if ( isset( $result['preprocessFlags'] ) ) {
3670 $preprocessFlags = $result['preprocessFlags'];
3671 }
3672
3673 if ( !$noparse ) {
3674 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3675 $result['isChildObj'] = true;
3676 }
3677
3678 return $result;
3679 }
3680
3681 /**
3682 * Get the semi-parsed DOM representation of a template with a given title,
3683 * and its redirect destination title. Cached.
3684 *
3685 * @param Title $title
3686 *
3687 * @return array
3688 */
3689 public function getTemplateDom( $title ) {
3690 $cacheTitle = $title;
3691 $titleText = $title->getPrefixedDBkey();
3692
3693 if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3694 list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3695 $title = Title::makeTitle( $ns, $dbk );
3696 $titleText = $title->getPrefixedDBkey();
3697 }
3698 if ( isset( $this->mTplDomCache[$titleText] ) ) {
3699 return [ $this->mTplDomCache[$titleText], $title ];
3700 }
3701
3702 # Cache miss, go to the database
3703 list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3704
3705 if ( $text === false ) {
3706 $this->mTplDomCache[$titleText] = false;
3707 return [ false, $title ];
3708 }
3709
3710 $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3711 $this->mTplDomCache[$titleText] = $dom;
3712
3713 if ( !$title->equals( $cacheTitle ) ) {
3714 $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3715 [ $title->getNamespace(), $title->getDBkey() ];
3716 }
3717
3718 return [ $dom, $title ];
3719 }
3720
3721 /**
3722 * Fetch the current revision of a given title. Note that the revision
3723 * (and even the title) may not exist in the database, so everything
3724 * contributing to the output of the parser should use this method
3725 * where possible, rather than getting the revisions themselves. This
3726 * method also caches its results, so using it benefits performance.
3727 *
3728 * @since 1.24
3729 * @param Title $title
3730 * @return Revision
3731 */
3732 public function fetchCurrentRevisionOfTitle( $title ) {
3733 $cacheKey = $title->getPrefixedDBkey();
3734 if ( !$this->currentRevisionCache ) {
3735 $this->currentRevisionCache = new MapCacheLRU( 100 );
3736 }
3737 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3738 $this->currentRevisionCache->set( $cacheKey,
3739 // Defaults to Parser::statelessFetchRevision()
3740 call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3741 );
3742 }
3743 return $this->currentRevisionCache->get( $cacheKey );
3744 }
3745
3746 /**
3747 * @param Title $title
3748 * @return bool
3749 * @since 1.34
3750 */
3751 public function isCurrentRevisionOfTitleCached( $title ) {
3752 return (
3753 $this->currentRevisionCache &&
3754 $this->currentRevisionCache->has( $title->getPrefixedText() )
3755 );
3756 }
3757
3758 /**
3759 * Wrapper around Revision::newFromTitle to allow passing additional parameters
3760 * without passing them on to it.
3761 *
3762 * @since 1.24
3763 * @param Title $title
3764 * @param Parser|bool $parser
3765 * @return Revision|bool False if missing
3766 */
3767 public static function statelessFetchRevision( Title $title, $parser = false ) {
3768 $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $title );
3769
3770 return $rev;
3771 }
3772
3773 /**
3774 * Fetch the unparsed text of a template and register a reference to it.
3775 * @param Title $title
3776 * @return array ( string or false, Title )
3777 */
3778 public function fetchTemplateAndTitle( $title ) {
3779 // Defaults to Parser::statelessFetchTemplate()
3780 $templateCb = $this->mOptions->getTemplateCallback();
3781 $stuff = call_user_func( $templateCb, $title, $this );
3782 $rev = $stuff['revision'] ?? null;
3783 $text = $stuff['text'];
3784 if ( is_string( $stuff['text'] ) ) {
3785 // We use U+007F DELETE to distinguish strip markers from regular text
3786 $text = strtr( $text, "\x7f", "?" );
3787 }
3788 $finalTitle = $stuff['finalTitle'] ?? $title;
3789 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3790 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3791 if ( $dep['title']->equals( $this->getTitle() ) && $rev instanceof Revision ) {
3792 // Self-transclusion; final result may change based on the new page version
3793 $this->setOutputFlag( 'vary-revision-sha1', 'Self transclusion' );
3794 $this->getOutput()->setRevisionUsedSha1Base36( $rev->getSha1() );
3795 }
3796 }
3797
3798 return [ $text, $finalTitle ];
3799 }
3800
3801 /**
3802 * Fetch the unparsed text of a template and register a reference to it.
3803 * @param Title $title
3804 * @return string|bool
3805 */
3806 public function fetchTemplate( $title ) {
3807 return $this->fetchTemplateAndTitle( $title )[0];
3808 }
3809
3810 /**
3811 * Static function to get a template
3812 * Can be overridden via ParserOptions::setTemplateCallback().
3813 *
3814 * @param Title $title
3815 * @param bool|Parser $parser
3816 *
3817 * @return array
3818 */
3819 public static function statelessFetchTemplate( $title, $parser = false ) {
3820 $text = $skip = false;
3821 $finalTitle = $title;
3822 $deps = [];
3823 $rev = null;
3824
3825 # Loop to fetch the article, with up to 1 redirect
3826 for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3827 # Give extensions a chance to select the revision instead
3828 $id = false; # Assume current
3829 Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3830 [ $parser, $title, &$skip, &$id ] );
3831
3832 if ( $skip ) {
3833 $text = false;
3834 $deps[] = [
3835 'title' => $title,
3836 'page_id' => $title->getArticleID(),
3837 'rev_id' => null
3838 ];
3839 break;
3840 }
3841 # Get the revision
3842 if ( $id ) {
3843 $rev = Revision::newFromId( $id );
3844 } elseif ( $parser ) {
3845 $rev = $parser->fetchCurrentRevisionOfTitle( $title );
3846 } else {
3847 $rev = Revision::newFromTitle( $title );
3848 }
3849 $rev_id = $rev ? $rev->getId() : 0;
3850 # If there is no current revision, there is no page
3851 if ( $id === false && !$rev ) {
3852 $linkCache = MediaWikiServices::getInstance()->getLinkCache();
3853 $linkCache->addBadLinkObj( $title );
3854 }
3855
3856 $deps[] = [
3857 'title' => $title,
3858 'page_id' => $title->getArticleID(),
3859 'rev_id' => $rev_id
3860 ];
3861 if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3862 # We fetched a rev from a different title; register it too...
3863 $deps[] = [
3864 'title' => $rev->getTitle(),
3865 'page_id' => $rev->getPage(),
3866 'rev_id' => $rev_id
3867 ];
3868 }
3869
3870 if ( $rev ) {
3871 $content = $rev->getContent();
3872 $text = $content ? $content->getWikitextForTransclusion() : null;
3873
3874 Hooks::run( 'ParserFetchTemplate',
3875 [ $parser, $title, $rev, &$text, &$deps ] );
3876
3877 if ( $text === false || $text === null ) {
3878 $text = false;
3879 break;
3880 }
3881 } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3882 $message = wfMessage( MediaWikiServices::getInstance()->getContentLanguage()->
3883 lcfirst( $title->getText() ) )->inContentLanguage();
3884 if ( !$message->exists() ) {
3885 $text = false;
3886 break;
3887 }
3888 $content = $message->content();
3889 $text = $message->plain();
3890 } else {
3891 break;
3892 }
3893 if ( !$content ) {
3894 break;
3895 }
3896 # Redirect?
3897 $finalTitle = $title;
3898 $title = $content->getRedirectTarget();
3899 }
3900 return [
3901 'revision' => $rev,
3902 'text' => $text,
3903 'finalTitle' => $finalTitle,
3904 'deps' => $deps
3905 ];
3906 }
3907
3908 /**
3909 * Fetch a file and its title and register a reference to it.
3910 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3911 * @param Title $title
3912 * @param array $options Array of options to RepoGroup::findFile
3913 * @return array ( File or false, Title of file )
3914 */
3915 public function fetchFileAndTitle( $title, $options = [] ) {
3916 $file = $this->fetchFileNoRegister( $title, $options );
3917
3918 $time = $file ? $file->getTimestamp() : false;
3919 $sha1 = $file ? $file->getSha1() : false;
3920 # Register the file as a dependency...
3921 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3922 if ( $file && !$title->equals( $file->getTitle() ) ) {
3923 # Update fetched file title
3924 $title = $file->getTitle();
3925 $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3926 }
3927 return [ $file, $title ];
3928 }
3929
3930 /**
3931 * Helper function for fetchFileAndTitle.
3932 *
3933 * Also useful if you need to fetch a file but not use it yet,
3934 * for example to get the file's handler.
3935 *
3936 * @param Title $title
3937 * @param array $options Array of options to RepoGroup::findFile
3938 * @return File|bool
3939 */
3940 protected function fetchFileNoRegister( $title, $options = [] ) {
3941 if ( isset( $options['broken'] ) ) {
3942 $file = false; // broken thumbnail forced by hook
3943 } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3944 $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3945 } else { // get by (name,timestamp)
3946 $file = MediaWikiServices::getInstance()->getRepoGroup()->findFile( $title, $options );
3947 }
3948 return $file;
3949 }
3950
3951 /**
3952 * Transclude an interwiki link.
3953 *
3954 * @param Title $title
3955 * @param string $action Usually one of (raw, render)
3956 *
3957 * @return string
3958 */
3959 public function interwikiTransclude( $title, $action ) {
3960 if ( !$this->svcOptions->get( 'EnableScaryTranscluding' ) ) {
3961 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3962 }
3963
3964 $url = $title->getFullURL( [ 'action' => $action ] );
3965 if ( strlen( $url ) > 1024 ) {
3966 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3967 }
3968
3969 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3970
3971 $fname = __METHOD__;
3972 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
3973
3974 $data = $cache->getWithSetCallback(
3975 $cache->makeGlobalKey(
3976 'interwiki-transclude',
3977 ( $wikiId !== false ) ? $wikiId : 'external',
3978 sha1( $url )
3979 ),
3980 $this->svcOptions->get( 'TranscludeCacheExpiry' ),
3981 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3982 $req = MWHttpRequest::factory( $url, [], $fname );
3983
3984 $status = $req->execute(); // Status object
3985 if ( !$status->isOK() ) {
3986 $ttl = $cache::TTL_UNCACHEABLE;
3987 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3988 $ttl = min( $cache::TTL_LAGGED, $ttl );
3989 }
3990
3991 return [
3992 'text' => $status->isOK() ? $req->getContent() : null,
3993 'code' => $req->getStatus()
3994 ];
3995 },
3996 [
3997 'checkKeys' => ( $wikiId !== false )
3998 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3999 : [],
4000 'pcGroup' => 'interwiki-transclude:5',
4001 'pcTTL' => $cache::TTL_PROC_LONG
4002 ]
4003 );
4004
4005 if ( is_string( $data['text'] ) ) {
4006 $text = $data['text'];
4007 } elseif ( $data['code'] != 200 ) {
4008 // Though we failed to fetch the content, this status is useless.
4009 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
4010 ->params( $url, $data['code'] )->inContentLanguage()->text();
4011 } else {
4012 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
4013 }
4014
4015 return $text;
4016 }
4017
4018 /**
4019 * Triple brace replacement -- used for template arguments
4020 * @private
4021 *
4022 * @param array $piece
4023 * @param PPFrame $frame
4024 *
4025 * @return array
4026 */
4027 public function argSubstitution( $piece, $frame ) {
4028 $error = false;
4029 $parts = $piece['parts'];
4030 $nameWithSpaces = $frame->expand( $piece['title'] );
4031 $argName = trim( $nameWithSpaces );
4032 $object = false;
4033 $text = $frame->getArgument( $argName );
4034 if ( $text === false && $parts->getLength() > 0
4035 && ( $this->ot['html']
4036 || $this->ot['pre']
4037 || ( $this->ot['wiki'] && $frame->isTemplate() )
4038 )
4039 ) {
4040 # No match in frame, use the supplied default
4041 $object = $parts->item( 0 )->getChildren();
4042 }
4043 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
4044 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
4045 $this->limitationWarn( 'post-expand-template-argument' );
4046 }
4047
4048 if ( $text === false && $object === false ) {
4049 # No match anywhere
4050 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4051 }
4052 if ( $error !== false ) {
4053 $text .= $error;
4054 }
4055 if ( $object !== false ) {
4056 $ret = [ 'object' => $object ];
4057 } else {
4058 $ret = [ 'text' => $text ];
4059 }
4060
4061 return $ret;
4062 }
4063
4064 /**
4065 * Return the text to be used for a given extension tag.
4066 * This is the ghost of strip().
4067 *
4068 * @param array $params Associative array of parameters:
4069 * name PPNode for the tag name
4070 * attr PPNode for unparsed text where tag attributes are thought to be
4071 * attributes Optional associative array of parsed attributes
4072 * inner Contents of extension element
4073 * noClose Original text did not have a close tag
4074 * @param PPFrame $frame
4075 *
4076 * @throws MWException
4077 * @return string
4078 */
4079 public function extensionSubstitution( $params, $frame ) {
4080 static $errorStr = '<span class="error">';
4081 static $errorLen = 20;
4082
4083 $name = $frame->expand( $params['name'] );
4084 if ( substr( $name, 0, $errorLen ) === $errorStr ) {
4085 // Probably expansion depth or node count exceeded. Just punt the
4086 // error up.
4087 return $name;
4088 }
4089
4090 $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
4091 if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
4092 // See above
4093 return $attrText;
4094 }
4095
4096 // We can't safely check if the expansion for $content resulted in an
4097 // error, because the content could happen to be the error string
4098 // (T149622).
4099 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4100
4101 $marker = self::MARKER_PREFIX . "-$name-"
4102 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4103
4104 $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
4105 ( $this->ot['html'] || $this->ot['pre'] );
4106 if ( $isFunctionTag ) {
4107 $markerType = 'none';
4108 } else {
4109 $markerType = 'general';
4110 }
4111 if ( $this->ot['html'] || $isFunctionTag ) {
4112 $name = strtolower( $name );
4113 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4114 if ( isset( $params['attributes'] ) ) {
4115 $attributes += $params['attributes'];
4116 }
4117
4118 if ( isset( $this->mTagHooks[$name] ) ) {
4119 $output = call_user_func_array( $this->mTagHooks[$name],
4120 [ $content, $attributes, $this, $frame ] );
4121 } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
4122 list( $callback, ) = $this->mFunctionTagHooks[$name];
4123
4124 // Avoid PHP 7.1 warning from passing $this by reference
4125 $parser = $this;
4126 $output = call_user_func_array( $callback, [ &$parser, $frame, $content, $attributes ] );
4127 } else {
4128 $output = '<span class="error">Invalid tag extension name: ' .
4129 htmlspecialchars( $name ) . '</span>';
4130 }
4131
4132 if ( is_array( $output ) ) {
4133 // Extract flags
4134 $flags = $output;
4135 $output = $flags[0];
4136 if ( isset( $flags['markerType'] ) ) {
4137 $markerType = $flags['markerType'];
4138 }
4139 }
4140 } else {
4141 if ( is_null( $attrText ) ) {
4142 $attrText = '';
4143 }
4144 if ( isset( $params['attributes'] ) ) {
4145 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4146 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4147 htmlspecialchars( $attrValue ) . '"';
4148 }
4149 }
4150 if ( $content === null ) {
4151 $output = "<$name$attrText/>";
4152 } else {
4153 $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
4154 if ( substr( $close, 0, $errorLen ) === $errorStr ) {
4155 // See above
4156 return $close;
4157 }
4158 $output = "<$name$attrText>$content$close";
4159 }
4160 }
4161
4162 if ( $markerType === 'none' ) {
4163 return $output;
4164 } elseif ( $markerType === 'nowiki' ) {
4165 $this->mStripState->addNoWiki( $marker, $output );
4166 } elseif ( $markerType === 'general' ) {
4167 $this->mStripState->addGeneral( $marker, $output );
4168 } else {
4169 throw new MWException( __METHOD__ . ': invalid marker type' );
4170 }
4171 return $marker;
4172 }
4173
4174 /**
4175 * Increment an include size counter
4176 *
4177 * @param string $type The type of expansion
4178 * @param int $size The size of the text
4179 * @return bool False if this inclusion would take it over the maximum, true otherwise
4180 */
4181 public function incrementIncludeSize( $type, $size ) {
4182 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4183 return false;
4184 } else {
4185 $this->mIncludeSizes[$type] += $size;
4186 return true;
4187 }
4188 }
4189
4190 /**
4191 * Increment the expensive function count
4192 *
4193 * @return bool False if the limit has been exceeded
4194 */
4195 public function incrementExpensiveFunctionCount() {
4196 $this->mExpensiveFunctionCount++;
4197 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4198 }
4199
4200 /**
4201 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4202 * Fills $this->mDoubleUnderscores, returns the modified text
4203 *
4204 * @param string $text
4205 *
4206 * @return string
4207 */
4208 public function doDoubleUnderscore( $text ) {
4209 # The position of __TOC__ needs to be recorded
4210 $mw = $this->magicWordFactory->get( 'toc' );
4211 if ( $mw->match( $text ) ) {
4212 $this->mShowToc = true;
4213 $this->mForceTocPosition = true;
4214
4215 # Set a placeholder. At the end we'll fill it in with the TOC.
4216 $text = $mw->replace( '<!--MWTOC\'"-->', $text, 1 );
4217
4218 # Only keep the first one.
4219 $text = $mw->replace( '', $text );
4220 }
4221
4222 # Now match and remove the rest of them
4223 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4224 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4225
4226 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4227 $this->mOutput->mNoGallery = true;
4228 }
4229 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4230 $this->mShowToc = false;
4231 }
4232 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4233 && $this->mTitle->getNamespace() == NS_CATEGORY
4234 ) {
4235 $this->addTrackingCategory( 'hidden-category-category' );
4236 }
4237 # (T10068) Allow control over whether robots index a page.
4238 # __INDEX__ always overrides __NOINDEX__, see T16899
4239 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
4240 $this->mOutput->setIndexPolicy( 'noindex' );
4241 $this->addTrackingCategory( 'noindex-category' );
4242 }
4243 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
4244 $this->mOutput->setIndexPolicy( 'index' );
4245 $this->addTrackingCategory( 'index-category' );
4246 }
4247
4248 # Cache all double underscores in the database
4249 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4250 $this->mOutput->setProperty( $key, '' );
4251 }
4252
4253 return $text;
4254 }
4255
4256 /**
4257 * @see ParserOutput::addTrackingCategory()
4258 * @param string $msg Message key
4259 * @return bool Whether the addition was successful
4260 */
4261 public function addTrackingCategory( $msg ) {
4262 return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
4263 }
4264
4265 /**
4266 * This function accomplishes several tasks:
4267 * 1) Auto-number headings if that option is enabled
4268 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4269 * 3) Add a Table of contents on the top for users who have enabled the option
4270 * 4) Auto-anchor headings
4271 *
4272 * It loops through all headlines, collects the necessary data, then splits up the
4273 * string and re-inserts the newly formatted headlines.
4274 *
4275 * @param string $text
4276 * @param string $origText Original, untouched wikitext
4277 * @param bool $isMain
4278 * @return mixed|string
4279 * @private
4280 */
4281 public function formatHeadings( $text, $origText, $isMain = true ) {
4282 # Inhibit editsection links if requested in the page
4283 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4284 $maybeShowEditLink = false;
4285 } else {
4286 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4287 }
4288
4289 # Get all headlines for numbering them and adding funky stuff like [edit]
4290 # links - this is for later, but we need the number of headlines right now
4291 # NOTE: white space in headings have been trimmed in doHeadings. They shouldn't
4292 # be trimmed here since whitespace in HTML headings is significant.
4293 $matches = [];
4294 $numMatches = preg_match_all(
4295 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4296 $text,
4297 $matches
4298 );
4299
4300 # if there are fewer than 4 headlines in the article, do not show TOC
4301 # unless it's been explicitly enabled.
4302 $enoughToc = $this->mShowToc &&
4303 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4304
4305 # Allow user to stipulate that a page should have a "new section"
4306 # link added via __NEWSECTIONLINK__
4307 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4308 $this->mOutput->setNewSection( true );
4309 }
4310
4311 # Allow user to remove the "new section"
4312 # link via __NONEWSECTIONLINK__
4313 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4314 $this->mOutput->hideNewSection( true );
4315 }
4316
4317 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4318 # override above conditions and always show TOC above first header
4319 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4320 $this->mShowToc = true;
4321 $enoughToc = true;
4322 }
4323
4324 # headline counter
4325 $headlineCount = 0;
4326 $numVisible = 0;
4327
4328 # Ugh .. the TOC should have neat indentation levels which can be
4329 # passed to the skin functions. These are determined here
4330 $toc = '';
4331 $full = '';
4332 $head = [];
4333 $sublevelCount = [];
4334 $levelCount = [];
4335 $level = 0;
4336 $prevlevel = 0;
4337 $toclevel = 0;
4338 $prevtoclevel = 0;
4339 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4340 $baseTitleText = $this->mTitle->getPrefixedDBkey();
4341 $oldType = $this->mOutputType;
4342 $this->setOutputType( self::OT_WIKI );
4343 $frame = $this->getPreprocessor()->newFrame();
4344 $root = $this->preprocessToDom( $origText );
4345 $node = $root->getFirstChild();
4346 $byteOffset = 0;
4347 $tocraw = [];
4348 $refers = [];
4349
4350 $headlines = $numMatches !== false ? $matches[3] : [];
4351
4352 $maxTocLevel = $this->svcOptions->get( 'MaxTocLevel' );
4353 foreach ( $headlines as $headline ) {
4354 $isTemplate = false;
4355 $titleText = false;
4356 $sectionIndex = false;
4357 $numbering = '';
4358 $markerMatches = [];
4359 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4360 $serial = $markerMatches[1];
4361 list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4362 $isTemplate = ( $titleText != $baseTitleText );
4363 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4364 }
4365
4366 if ( $toclevel ) {
4367 $prevlevel = $level;
4368 }
4369 $level = $matches[1][$headlineCount];
4370
4371 if ( $level > $prevlevel ) {
4372 # Increase TOC level
4373 $toclevel++;
4374 $sublevelCount[$toclevel] = 0;
4375 if ( $toclevel < $maxTocLevel ) {
4376 $prevtoclevel = $toclevel;
4377 $toc .= Linker::tocIndent();
4378 $numVisible++;
4379 }
4380 } elseif ( $level < $prevlevel && $toclevel > 1 ) {
4381 # Decrease TOC level, find level to jump to
4382
4383 for ( $i = $toclevel; $i > 0; $i-- ) {
4384 if ( $levelCount[$i] == $level ) {
4385 # Found last matching level
4386 $toclevel = $i;
4387 break;
4388 } elseif ( $levelCount[$i] < $level ) {
4389 # Found first matching level below current level
4390 $toclevel = $i + 1;
4391 break;
4392 }
4393 }
4394 if ( $i == 0 ) {
4395 $toclevel = 1;
4396 }
4397 if ( $toclevel < $maxTocLevel ) {
4398 if ( $prevtoclevel < $maxTocLevel ) {
4399 # Unindent only if the previous toc level was shown :p
4400 $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4401 $prevtoclevel = $toclevel;
4402 } else {
4403 $toc .= Linker::tocLineEnd();
4404 }
4405 }
4406 } else {
4407 # No change in level, end TOC line
4408 if ( $toclevel < $maxTocLevel ) {
4409 $toc .= Linker::tocLineEnd();
4410 }
4411 }
4412
4413 $levelCount[$toclevel] = $level;
4414
4415 # count number of headlines for each level
4416 $sublevelCount[$toclevel]++;
4417 $dot = 0;
4418 for ( $i = 1; $i <= $toclevel; $i++ ) {
4419 if ( !empty( $sublevelCount[$i] ) ) {
4420 if ( $dot ) {
4421 $numbering .= '.';
4422 }
4423 $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4424 $dot = 1;
4425 }
4426 }
4427
4428 # The safe header is a version of the header text safe to use for links
4429
4430 # Remove link placeholders by the link text.
4431 # <!--LINK number-->
4432 # turns into
4433 # link text with suffix
4434 # Do this before unstrip since link text can contain strip markers
4435 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4436
4437 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4438 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4439
4440 # Remove any <style> or <script> tags (T198618)
4441 $safeHeadline = preg_replace(
4442 '#<(style|script)(?: [^>]*[^>/])?>.*?</\1>#is',
4443 '',
4444 $safeHeadline
4445 );
4446
4447 # Strip out HTML (first regex removes any tag not allowed)
4448 # Allowed tags are:
4449 # * <sup> and <sub> (T10393)
4450 # * <i> (T28375)
4451 # * <b> (r105284)
4452 # * <bdi> (T74884)
4453 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4454 # * <s> and <strike> (T35715)
4455 # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4456 # to allow setting directionality in toc items.
4457 $tocline = preg_replace(
4458 [
4459 '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4460 '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4461 ],
4462 [ '', '<$1>' ],
4463 $safeHeadline
4464 );
4465
4466 # Strip '<span></span>', which is the result from the above if
4467 # <span id="foo"></span> is used to produce an additional anchor
4468 # for a section.
4469 $tocline = str_replace( '<span></span>', '', $tocline );
4470
4471 $tocline = trim( $tocline );
4472
4473 # For the anchor, strip out HTML-y stuff period
4474 $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4475 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4476
4477 # Save headline for section edit hint before it's escaped
4478 $headlineHint = $safeHeadline;
4479
4480 # Decode HTML entities
4481 $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
4482
4483 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4484
4485 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4486 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4487 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4488 if ( $fallbackHeadline === $safeHeadline ) {
4489 # No reason to have both (in fact, we can't)
4490 $fallbackHeadline = false;
4491 }
4492
4493 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4494 # @todo FIXME: We may be changing them depending on the current locale.
4495 $arrayKey = strtolower( $safeHeadline );
4496 if ( $fallbackHeadline === false ) {
4497 $fallbackArrayKey = false;
4498 } else {
4499 $fallbackArrayKey = strtolower( $fallbackHeadline );
4500 }
4501
4502 # Create the anchor for linking from the TOC to the section
4503 $anchor = $safeHeadline;
4504 $fallbackAnchor = $fallbackHeadline;
4505 if ( isset( $refers[$arrayKey] ) ) {
4506 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4507 for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4508 $anchor .= "_$i";
4509 $linkAnchor .= "_$i";
4510 $refers["${arrayKey}_$i"] = true;
4511 } else {
4512 $refers[$arrayKey] = true;
4513 }
4514 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4515 // phpcs:ignore Generic.Formatting.DisallowMultipleStatements
4516 for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
4517 $fallbackAnchor .= "_$i";
4518 $refers["${fallbackArrayKey}_$i"] = true;
4519 } else {
4520 $refers[$fallbackArrayKey] = true;
4521 }
4522
4523 # Don't number the heading if it is the only one (looks silly)
4524 if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4525 # the two are different if the line contains a link
4526 $headline = Html::element(
4527 'span',
4528 [ 'class' => 'mw-headline-number' ],
4529 $numbering
4530 ) . ' ' . $headline;
4531 }
4532
4533 if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
4534 $toc .= Linker::tocLine( $linkAnchor, $tocline,
4535 $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4536 }
4537
4538 # Add the section to the section tree
4539 # Find the DOM node for this header
4540 $noOffset = ( $isTemplate || $sectionIndex === false );
4541 while ( $node && !$noOffset ) {
4542 if ( $node->getName() === 'h' ) {
4543 $bits = $node->splitHeading();
4544 if ( $bits['i'] == $sectionIndex ) {
4545 break;
4546 }
4547 }
4548 $byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4549 $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4550 $node = $node->getNextSibling();
4551 }
4552 $tocraw[] = [
4553 'toclevel' => $toclevel,
4554 'level' => $level,
4555 'line' => $tocline,
4556 'number' => $numbering,
4557 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4558 'fromtitle' => $titleText,
4559 'byteoffset' => ( $noOffset ? null : $byteOffset ),
4560 'anchor' => $anchor,
4561 ];
4562
4563 # give headline the correct <h#> tag
4564 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4565 // Output edit section links as markers with styles that can be customized by skins
4566 if ( $isTemplate ) {
4567 # Put a T flag in the section identifier, to indicate to extractSections()
4568 # that sections inside <includeonly> should be counted.
4569 $editsectionPage = $titleText;
4570 $editsectionSection = "T-$sectionIndex";
4571 $editsectionContent = null;
4572 } else {
4573 $editsectionPage = $this->mTitle->getPrefixedText();
4574 $editsectionSection = $sectionIndex;
4575 $editsectionContent = $headlineHint;
4576 }
4577 // We use a bit of pesudo-xml for editsection markers. The
4578 // language converter is run later on. Using a UNIQ style marker
4579 // leads to the converter screwing up the tokens when it
4580 // converts stuff. And trying to insert strip tags fails too. At
4581 // this point all real inputted tags have already been escaped,
4582 // so we don't have to worry about a user trying to input one of
4583 // these markers directly. We use a page and section attribute
4584 // to stop the language converter from converting these
4585 // important bits of data, but put the headline hint inside a
4586 // content block because the language converter is supposed to
4587 // be able to convert that piece of data.
4588 // Gets replaced with html in ParserOutput::getText
4589 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4590 $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4591 if ( $editsectionContent !== null ) {
4592 $editlink .= '>' . $editsectionContent . '</mw:editsection>';
4593 } else {
4594 $editlink .= '/>';
4595 }
4596 } else {
4597 $editlink = '';
4598 }
4599 $head[$headlineCount] = Linker::makeHeadline( $level,
4600 $matches['attrib'][$headlineCount], $anchor, $headline,
4601 $editlink, $fallbackAnchor );
4602
4603 $headlineCount++;
4604 }
4605
4606 $this->setOutputType( $oldType );
4607
4608 # Never ever show TOC if no headers
4609 if ( $numVisible < 1 ) {
4610 $enoughToc = false;
4611 }
4612
4613 if ( $enoughToc ) {
4614 if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
4615 $toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4616 }
4617 $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4618 $this->mOutput->setTOCHTML( $toc );
4619 $toc = self::TOC_START . $toc . self::TOC_END;
4620 }
4621
4622 if ( $isMain ) {
4623 $this->mOutput->setSections( $tocraw );
4624 }
4625
4626 # split up and insert constructed headlines
4627 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4628 $i = 0;
4629
4630 // build an array of document sections
4631 $sections = [];
4632 foreach ( $blocks as $block ) {
4633 // $head is zero-based, sections aren't.
4634 if ( empty( $head[$i - 1] ) ) {
4635 $sections[$i] = $block;
4636 } else {
4637 $sections[$i] = $head[$i - 1] . $block;
4638 }
4639
4640 /**
4641 * Send a hook, one per section.
4642 * The idea here is to be able to make section-level DIVs, but to do so in a
4643 * lower-impact, more correct way than r50769
4644 *
4645 * $this : caller
4646 * $section : the section number
4647 * &$sectionContent : ref to the content of the section
4648 * $maybeShowEditLinks : boolean describing whether this section has an edit link
4649 */
4650 Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $maybeShowEditLink ] );
4651
4652 $i++;
4653 }
4654
4655 if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4656 // append the TOC at the beginning
4657 // Top anchor now in skin
4658 $sections[0] .= $toc . "\n";
4659 }
4660
4661 $full .= implode( '', $sections );
4662
4663 if ( $this->mForceTocPosition ) {
4664 return str_replace( '<!--MWTOC\'"-->', $toc, $full );
4665 } else {
4666 return $full;
4667 }
4668 }
4669
4670 /**
4671 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4672 * conversion, substituting signatures, {{subst:}} templates, etc.
4673 *
4674 * @param string $text The text to transform
4675 * @param Title $title The Title object for the current article
4676 * @param User $user The User object describing the current user
4677 * @param ParserOptions $options Parsing options
4678 * @param bool $clearState Whether to clear the parser state first
4679 * @return string The altered wiki markup
4680 */
4681 public function preSaveTransform( $text, Title $title, User $user,
4682 ParserOptions $options, $clearState = true
4683 ) {
4684 if ( $clearState ) {
4685 $magicScopeVariable = $this->lock();
4686 }
4687 $this->startParse( $title, $options, self::OT_WIKI, $clearState );
4688 $this->setUser( $user );
4689
4690 // Strip U+0000 NULL (T159174)
4691 $text = str_replace( "\000", '', $text );
4692
4693 // We still normalize line endings for backwards-compatibility
4694 // with other code that just calls PST, but this should already
4695 // be handled in TextContent subclasses
4696 $text = TextContent::normalizeLineEndings( $text );
4697
4698 if ( $options->getPreSaveTransform() ) {
4699 $text = $this->pstPass2( $text, $user );
4700 }
4701 $text = $this->mStripState->unstripBoth( $text );
4702
4703 $this->setUser( null ); # Reset
4704
4705 return $text;
4706 }
4707
4708 /**
4709 * Pre-save transform helper function
4710 *
4711 * @param string $text
4712 * @param User $user
4713 *
4714 * @return string
4715 */
4716 private function pstPass2( $text, $user ) {
4717 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4718 # $this->contLang here in order to give everyone the same signature and use the default one
4719 # rather than the one selected in each user's preferences. (see also T14815)
4720 $ts = $this->mOptions->getTimestamp();
4721 $timestamp = MWTimestamp::getLocalInstance( $ts );
4722 $ts = $timestamp->format( 'YmdHis' );
4723 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4724
4725 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4726
4727 # Variable replacement
4728 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4729 $text = $this->replaceVariables( $text );
4730
4731 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4732 # which may corrupt this parser instance via its wfMessage()->text() call-
4733
4734 # Signatures
4735 if ( strpos( $text, '~~~' ) !== false ) {
4736 $sigText = $this->getUserSig( $user );
4737 $text = strtr( $text, [
4738 '~~~~~' => $d,
4739 '~~~~' => "$sigText $d",
4740 '~~~' => $sigText
4741 ] );
4742 # The main two signature forms used above are time-sensitive
4743 $this->setOutputFlag( 'user-signature', 'User signature detected' );
4744 }
4745
4746 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4747 $tc = '[' . Title::legalChars() . ']';
4748 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4749
4750 // [[ns:page (context)|]]
4751 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4752 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4753 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4754 // [[ns:page (context), context|]] (using either single or double-width comma)
4755 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4756 // [[|page]] (reverse pipe trick: add context from page title)
4757 $p2 = "/\[\[\\|($tc+)]]/";
4758
4759 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4760 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4761 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4762 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4763
4764 $t = $this->mTitle->getText();
4765 $m = [];
4766 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4767 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4768 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4769 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4770 } else {
4771 # if there's no context, don't bother duplicating the title
4772 $text = preg_replace( $p2, '[[\\1]]', $text );
4773 }
4774
4775 return $text;
4776 }
4777
4778 /**
4779 * Fetch the user's signature text, if any, and normalize to
4780 * validated, ready-to-insert wikitext.
4781 * If you have pre-fetched the nickname or the fancySig option, you can
4782 * specify them here to save a database query.
4783 * Do not reuse this parser instance after calling getUserSig(),
4784 * as it may have changed.
4785 *
4786 * @param User &$user
4787 * @param string|bool $nickname Nickname to use or false to use user's default nickname
4788 * @param bool|null $fancySig whether the nicknname is the complete signature
4789 * or null to use default value
4790 * @return string
4791 */
4792 public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4793 $username = $user->getName();
4794
4795 # If not given, retrieve from the user object.
4796 if ( $nickname === false ) {
4797 $nickname = $user->getOption( 'nickname' );
4798 }
4799
4800 if ( is_null( $fancySig ) ) {
4801 $fancySig = $user->getBoolOption( 'fancysig' );
4802 }
4803
4804 $nickname = $nickname == null ? $username : $nickname;
4805
4806 if ( mb_strlen( $nickname ) > $this->svcOptions->get( 'MaxSigChars' ) ) {
4807 $nickname = $username;
4808 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4809 } elseif ( $fancySig !== false ) {
4810 # Sig. might contain markup; validate this
4811 if ( $this->validateSig( $nickname ) !== false ) {
4812 # Validated; clean up (if needed) and return it
4813 return $this->cleanSig( $nickname, true );
4814 } else {
4815 # Failed to validate; fall back to the default
4816 $nickname = $username;
4817 $this->logger->debug( __METHOD__ . ": $username has bad XML tags in signature." );
4818 }
4819 }
4820
4821 # Make sure nickname doesnt get a sig in a sig
4822 $nickname = self::cleanSigInSig( $nickname );
4823
4824 # If we're still here, make it a link to the user page
4825 $userText = wfEscapeWikiText( $username );
4826 $nickText = wfEscapeWikiText( $nickname );
4827 $msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4828
4829 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4830 ->title( $this->getTitle() )->text();
4831 }
4832
4833 /**
4834 * Check that the user's signature contains no bad XML
4835 *
4836 * @param string $text
4837 * @return string|bool An expanded string, or false if invalid.
4838 */
4839 public function validateSig( $text ) {
4840 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4841 }
4842
4843 /**
4844 * Clean up signature text
4845 *
4846 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4847 * 2) Substitute all transclusions
4848 *
4849 * @param string $text
4850 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4851 * @return string Signature text
4852 */
4853 public function cleanSig( $text, $parsing = false ) {
4854 if ( !$parsing ) {
4855 global $wgTitle;
4856 $magicScopeVariable = $this->lock();
4857 $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4858 }
4859
4860 # Option to disable this feature
4861 if ( !$this->mOptions->getCleanSignatures() ) {
4862 return $text;
4863 }
4864
4865 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4866 # => Move this logic to braceSubstitution()
4867 $substWord = $this->magicWordFactory->get( 'subst' );
4868 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4869 $substText = '{{' . $substWord->getSynonym( 0 );
4870
4871 $text = preg_replace( $substRegex, $substText, $text );
4872 $text = self::cleanSigInSig( $text );
4873 $dom = $this->preprocessToDom( $text );
4874 $frame = $this->getPreprocessor()->newFrame();
4875 $text = $frame->expand( $dom );
4876
4877 if ( !$parsing ) {
4878 $text = $this->mStripState->unstripBoth( $text );
4879 }
4880
4881 return $text;
4882 }
4883
4884 /**
4885 * Strip 3, 4 or 5 tildes out of signatures.
4886 *
4887 * @param string $text
4888 * @return string Signature text with /~{3,5}/ removed
4889 */
4890 public static function cleanSigInSig( $text ) {
4891 $text = preg_replace( '/~{3,5}/', '', $text );
4892 return $text;
4893 }
4894
4895 /**
4896 * Set up some variables which are usually set up in parse()
4897 * so that an external function can call some class members with confidence
4898 *
4899 * @param Title|null $title
4900 * @param ParserOptions $options
4901 * @param int $outputType
4902 * @param bool $clearState
4903 * @param int|null $revId
4904 */
4905 public function startExternalParse( Title $title = null, ParserOptions $options,
4906 $outputType, $clearState = true, $revId = null
4907 ) {
4908 $this->startParse( $title, $options, $outputType, $clearState );
4909 if ( $revId !== null ) {
4910 $this->mRevisionId = $revId;
4911 }
4912 }
4913
4914 /**
4915 * @param Title|null $title
4916 * @param ParserOptions $options
4917 * @param int $outputType
4918 * @param bool $clearState
4919 */
4920 private function startParse( Title $title = null, ParserOptions $options,
4921 $outputType, $clearState = true
4922 ) {
4923 $this->setTitle( $title );
4924 $this->mOptions = $options;
4925 $this->setOutputType( $outputType );
4926 if ( $clearState ) {
4927 $this->clearState();
4928 }
4929 }
4930
4931 /**
4932 * Wrapper for preprocess()
4933 *
4934 * @param string $text The text to preprocess
4935 * @param ParserOptions $options
4936 * @param Title|null $title Title object or null to use $wgTitle
4937 * @return string
4938 */
4939 public function transformMsg( $text, $options, $title = null ) {
4940 static $executing = false;
4941
4942 # Guard against infinite recursion
4943 if ( $executing ) {
4944 return $text;
4945 }
4946 $executing = true;
4947
4948 if ( !$title ) {
4949 global $wgTitle;
4950 $title = $wgTitle;
4951 }
4952
4953 $text = $this->preprocess( $text, $title, $options );
4954
4955 $executing = false;
4956 return $text;
4957 }
4958
4959 /**
4960 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4961 * The callback should have the following form:
4962 * function myParserHook( $text, $params, $parser, $frame ) { ... }
4963 *
4964 * Transform and return $text. Use $parser for any required context, e.g. use
4965 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4966 *
4967 * Hooks may return extended information by returning an array, of which the
4968 * first numbered element (index 0) must be the return string, and all other
4969 * entries are extracted into local variables within an internal function
4970 * in the Parser class.
4971 *
4972 * This interface (introduced r61913) appears to be undocumented, but
4973 * 'markerType' is used by some core tag hooks to override which strip
4974 * array their results are placed in. **Use great caution if attempting
4975 * this interface, as it is not documented and injudicious use could smash
4976 * private variables.**
4977 *
4978 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4979 * @param callable $callback The callback function (and object) to use for the tag
4980 * @throws MWException
4981 * @return callable|null The old value of the mTagHooks array associated with the hook
4982 */
4983 public function setHook( $tag, callable $callback ) {
4984 $tag = strtolower( $tag );
4985 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4986 throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4987 }
4988 $oldVal = $this->mTagHooks[$tag] ?? null;
4989 $this->mTagHooks[$tag] = $callback;
4990 if ( !in_array( $tag, $this->mStripList ) ) {
4991 $this->mStripList[] = $tag;
4992 }
4993
4994 return $oldVal;
4995 }
4996
4997 /**
4998 * As setHook(), but letting the contents be parsed.
4999 *
5000 * Transparent tag hooks are like regular XML-style tag hooks, except they
5001 * operate late in the transformation sequence, on HTML instead of wikitext.
5002 *
5003 * This is probably obsoleted by things dealing with parser frames?
5004 * The only extension currently using it is geoserver.
5005 *
5006 * @since 1.10
5007 * @todo better document or deprecate this
5008 *
5009 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
5010 * @param callable $callback The callback function (and object) to use for the tag
5011 * @throws MWException
5012 * @return callable|null The old value of the mTagHooks array associated with the hook
5013 */
5014 public function setTransparentTagHook( $tag, callable $callback ) {
5015 $tag = strtolower( $tag );
5016 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5017 throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
5018 }
5019 $oldVal = $this->mTransparentTagHooks[$tag] ?? null;
5020 $this->mTransparentTagHooks[$tag] = $callback;
5021
5022 return $oldVal;
5023 }
5024
5025 /**
5026 * Remove all tag hooks
5027 */
5028 public function clearTagHooks() {
5029 $this->mTagHooks = [];
5030 $this->mFunctionTagHooks = [];
5031 $this->mStripList = $this->mDefaultStripList;
5032 }
5033
5034 /**
5035 * Create a function, e.g. {{sum:1|2|3}}
5036 * The callback function should have the form:
5037 * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5038 *
5039 * Or with Parser::SFH_OBJECT_ARGS:
5040 * function myParserFunction( $parser, $frame, $args ) { ... }
5041 *
5042 * The callback may either return the text result of the function, or an array with the text
5043 * in element 0, and a number of flags in the other elements. The names of the flags are
5044 * specified in the keys. Valid flags are:
5045 * found The text returned is valid, stop processing the template. This
5046 * is on by default.
5047 * nowiki Wiki markup in the return value should be escaped
5048 * isHTML The returned text is HTML, armour it against wikitext transformation
5049 *
5050 * @param string $id The magic word ID
5051 * @param callable $callback The callback function (and object) to use
5052 * @param int $flags A combination of the following flags:
5053 * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5054 *
5055 * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text.
5056 * This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5057 * branches and thus speed up parsing. It is also possible to analyse the parse tree of
5058 * the arguments, and to control the way they are expanded.
5059 *
5060 * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5061 * arguments, for instance:
5062 * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5063 *
5064 * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5065 * future versions. Please call $frame->expand() on it anyway so that your code keeps
5066 * working if/when this is changed.
5067 *
5068 * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5069 * expansion.
5070 *
5071 * Please read the documentation in includes/parser/Preprocessor.php for more information
5072 * about the methods available in PPFrame and PPNode.
5073 *
5074 * @throws MWException
5075 * @return string|callable The old callback function for this name, if any
5076 */
5077 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5078 $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
5079 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5080
5081 # Add to function cache
5082 $mw = $this->magicWordFactory->get( $id );
5083 if ( !$mw ) {
5084 throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
5085 }
5086
5087 $synonyms = $mw->getSynonyms();
5088 $sensitive = intval( $mw->isCaseSensitive() );
5089
5090 foreach ( $synonyms as $syn ) {
5091 # Case
5092 if ( !$sensitive ) {
5093 $syn = $this->contLang->lc( $syn );
5094 }
5095 # Add leading hash
5096 if ( !( $flags & self::SFH_NO_HASH ) ) {
5097 $syn = '#' . $syn;
5098 }
5099 # Remove trailing colon
5100 if ( substr( $syn, -1, 1 ) === ':' ) {
5101 $syn = substr( $syn, 0, -1 );
5102 }
5103 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5104 }
5105 return $oldVal;
5106 }
5107
5108 /**
5109 * Get all registered function hook identifiers
5110 *
5111 * @return array
5112 */
5113 public function getFunctionHooks() {
5114 $this->firstCallInit();
5115 return array_keys( $this->mFunctionHooks );
5116 }
5117
5118 /**
5119 * Create a tag function, e.g. "<test>some stuff</test>".
5120 * Unlike tag hooks, tag functions are parsed at preprocessor level.
5121 * Unlike parser functions, their content is not preprocessed.
5122 * @param string $tag
5123 * @param callable $callback
5124 * @param int $flags
5125 * @throws MWException
5126 * @return null
5127 */
5128 public function setFunctionTagHook( $tag, callable $callback, $flags ) {
5129 $tag = strtolower( $tag );
5130 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5131 throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
5132 }
5133 $old = $this->mFunctionTagHooks[$tag] ?? null;
5134 $this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
5135
5136 if ( !in_array( $tag, $this->mStripList ) ) {
5137 $this->mStripList[] = $tag;
5138 }
5139
5140 return $old;
5141 }
5142
5143 /**
5144 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5145 * Placeholders created in Linker::link()
5146 *
5147 * @param string &$text
5148 * @param int $options
5149 */
5150 public function replaceLinkHolders( &$text, $options = 0 ) {
5151 $this->mLinkHolders->replace( $text );
5152 }
5153
5154 /**
5155 * Replace "<!--LINK-->" link placeholders with plain text of links
5156 * (not HTML-formatted).
5157 *
5158 * @param string $text
5159 * @return string
5160 */
5161 public function replaceLinkHoldersText( $text ) {
5162 return $this->mLinkHolders->replaceText( $text );
5163 }
5164
5165 /**
5166 * Renders an image gallery from a text with one line per image.
5167 * text labels may be given by using |-style alternative text. E.g.
5168 * Image:one.jpg|The number "1"
5169 * Image:tree.jpg|A tree
5170 * given as text will return the HTML of a gallery with two images,
5171 * labeled 'The number "1"' and
5172 * 'A tree'.
5173 *
5174 * @param string $text
5175 * @param array $params
5176 * @return string HTML
5177 */
5178 public function renderImageGallery( $text, $params ) {
5179 $mode = false;
5180 if ( isset( $params['mode'] ) ) {
5181 $mode = $params['mode'];
5182 }
5183
5184 try {
5185 $ig = ImageGalleryBase::factory( $mode );
5186 } catch ( Exception $e ) {
5187 // If invalid type set, fallback to default.
5188 $ig = ImageGalleryBase::factory( false );
5189 }
5190
5191 $ig->setContextTitle( $this->mTitle );
5192 $ig->setShowBytes( false );
5193 $ig->setShowDimensions( false );
5194 $ig->setShowFilename( false );
5195 $ig->setParser( $this );
5196 $ig->setHideBadImages();
5197 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5198
5199 if ( isset( $params['showfilename'] ) ) {
5200 $ig->setShowFilename( true );
5201 } else {
5202 $ig->setShowFilename( false );
5203 }
5204 if ( isset( $params['caption'] ) ) {
5205 // NOTE: We aren't passing a frame here or below. Frame info
5206 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5207 // See T107332#4030581
5208 $caption = $this->recursiveTagParse( $params['caption'] );
5209 $ig->setCaptionHtml( $caption );
5210 }
5211 if ( isset( $params['perrow'] ) ) {
5212 $ig->setPerRow( $params['perrow'] );
5213 }
5214 if ( isset( $params['widths'] ) ) {
5215 $ig->setWidths( $params['widths'] );
5216 }
5217 if ( isset( $params['heights'] ) ) {
5218 $ig->setHeights( $params['heights'] );
5219 }
5220 $ig->setAdditionalOptions( $params );
5221
5222 // Avoid PHP 7.1 warning from passing $this by reference
5223 $parser = $this;
5224 Hooks::run( 'BeforeParserrenderImageGallery', [ &$parser, &$ig ] );
5225
5226 $lines = StringUtils::explode( "\n", $text );
5227 foreach ( $lines as $line ) {
5228 # match lines like these:
5229 # Image:someimage.jpg|This is some image
5230 $matches = [];
5231 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5232 # Skip empty lines
5233 if ( count( $matches ) == 0 ) {
5234 continue;
5235 }
5236
5237 if ( strpos( $matches[0], '%' ) !== false ) {
5238 $matches[1] = rawurldecode( $matches[1] );
5239 }
5240 $title = Title::newFromText( $matches[1], NS_FILE );
5241 if ( is_null( $title ) ) {
5242 # Bogus title. Ignore these so we don't bomb out later.
5243 continue;
5244 }
5245
5246 # We need to get what handler the file uses, to figure out parameters.
5247 # Note, a hook can overide the file name, and chose an entirely different
5248 # file (which potentially could be of a different type and have different handler).
5249 $options = [];
5250 $descQuery = false;
5251 Hooks::run( 'BeforeParserFetchFileAndTitle',
5252 [ $this, $title, &$options, &$descQuery ] );
5253 # Don't register it now, as TraditionalImageGallery does that later.
5254 $file = $this->fetchFileNoRegister( $title, $options );
5255 $handler = $file ? $file->getHandler() : false;
5256
5257 $paramMap = [
5258 'img_alt' => 'gallery-internal-alt',
5259 'img_link' => 'gallery-internal-link',
5260 ];
5261 if ( $handler ) {
5262 $paramMap += $handler->getParamMap();
5263 // We don't want people to specify per-image widths.
5264 // Additionally the width parameter would need special casing anyhow.
5265 unset( $paramMap['img_width'] );
5266 }
5267
5268 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5269
5270 $label = '';
5271 $alt = '';
5272 $link = '';
5273 $handlerOptions = [];
5274 if ( isset( $matches[3] ) ) {
5275 // look for an |alt= definition while trying not to break existing
5276 // captions with multiple pipes (|) in it, until a more sensible grammar
5277 // is defined for images in galleries
5278
5279 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5280 // splitting on '|' is a bit odd, and different from makeImage.
5281 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5282 // Protect LanguageConverter markup
5283 $parameterMatches = StringUtils::delimiterExplode(
5284 '-{', '}-', '|', $matches[3], true /* nested */
5285 );
5286
5287 foreach ( $parameterMatches as $parameterMatch ) {
5288 list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5289 if ( $magicName ) {
5290 $paramName = $paramMap[$magicName];
5291
5292 switch ( $paramName ) {
5293 case 'gallery-internal-alt':
5294 $alt = $this->stripAltText( $match, false );
5295 break;
5296 case 'gallery-internal-link':
5297 $linkValue = $this->stripAltText( $match, false );
5298 if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
5299 // Result of LanguageConverter::markNoConversion
5300 // invoked on an external link.
5301 $linkValue = substr( $linkValue, 4, -2 );
5302 }
5303 list( $type, $target ) = $this->parseLinkParameter( $linkValue );
5304 if ( $type === 'link-url' ) {
5305 $link = $target;
5306 $this->mOutput->addExternalLink( $target );
5307 } elseif ( $type === 'link-title' ) {
5308 $link = $target->getLinkURL();
5309 $this->mOutput->addLink( $target );
5310 }
5311 break;
5312 default:
5313 // Must be a handler specific parameter.
5314 if ( $handler->validateParam( $paramName, $match ) ) {
5315 $handlerOptions[$paramName] = $match;
5316 } else {
5317 // Guess not, consider it as caption.
5318 $this->logger->debug(
5319 "$parameterMatch failed parameter validation" );
5320 $label = $parameterMatch;
5321 }
5322 }
5323
5324 } else {
5325 // Last pipe wins.
5326 $label = $parameterMatch;
5327 }
5328 }
5329 }
5330
5331 $ig->add( $title, $label, $alt, $link, $handlerOptions );
5332 }
5333 $html = $ig->toHTML();
5334 Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5335 return $html;
5336 }
5337
5338 /**
5339 * @param MediaHandler $handler
5340 * @return array
5341 */
5342 public function getImageParams( $handler ) {
5343 if ( $handler ) {
5344 $handlerClass = get_class( $handler );
5345 } else {
5346 $handlerClass = '';
5347 }
5348 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5349 # Initialise static lists
5350 static $internalParamNames = [
5351 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5352 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5353 'bottom', 'text-bottom' ],
5354 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5355 'upright', 'border', 'link', 'alt', 'class' ],
5356 ];
5357 static $internalParamMap;
5358 if ( !$internalParamMap ) {
5359 $internalParamMap = [];
5360 foreach ( $internalParamNames as $type => $names ) {
5361 foreach ( $names as $name ) {
5362 // For grep: img_left, img_right, img_center, img_none,
5363 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5364 // img_bottom, img_text_bottom,
5365 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5366 // img_border, img_link, img_alt, img_class
5367 $magicName = str_replace( '-', '_', "img_$name" );
5368 $internalParamMap[$magicName] = [ $type, $name ];
5369 }
5370 }
5371 }
5372
5373 # Add handler params
5374 $paramMap = $internalParamMap;
5375 if ( $handler ) {
5376 $handlerParamMap = $handler->getParamMap();
5377 foreach ( $handlerParamMap as $magic => $paramName ) {
5378 $paramMap[$magic] = [ 'handler', $paramName ];
5379 }
5380 }
5381 $this->mImageParams[$handlerClass] = $paramMap;
5382 $this->mImageParamsMagicArray[$handlerClass] =
5383 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5384 }
5385 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5386 }
5387
5388 /**
5389 * Parse image options text and use it to make an image
5390 *
5391 * @param Title $title
5392 * @param string $options
5393 * @param LinkHolderArray|bool $holders
5394 * @return string HTML
5395 */
5396 public function makeImage( $title, $options, $holders = false ) {
5397 # Check if the options text is of the form "options|alt text"
5398 # Options are:
5399 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5400 # * left no resizing, just left align. label is used for alt= only
5401 # * right same, but right aligned
5402 # * none same, but not aligned
5403 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5404 # * center center the image
5405 # * frame Keep original image size, no magnify-button.
5406 # * framed Same as "frame"
5407 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5408 # * upright reduce width for upright images, rounded to full __0 px
5409 # * border draw a 1px border around the image
5410 # * alt Text for HTML alt attribute (defaults to empty)
5411 # * class Set a class for img node
5412 # * link Set the target of the image link. Can be external, interwiki, or local
5413 # vertical-align values (no % or length right now):
5414 # * baseline
5415 # * sub
5416 # * super
5417 # * top
5418 # * text-top
5419 # * middle
5420 # * bottom
5421 # * text-bottom
5422
5423 # Protect LanguageConverter markup when splitting into parts
5424 $parts = StringUtils::delimiterExplode(
5425 '-{', '}-', '|', $options, true /* allow nesting */
5426 );
5427
5428 # Give extensions a chance to select the file revision for us
5429 $options = [];
5430 $descQuery = false;
5431 Hooks::run( 'BeforeParserFetchFileAndTitle',
5432 [ $this, $title, &$options, &$descQuery ] );
5433 # Fetch and register the file (file title may be different via hooks)
5434 list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5435
5436 # Get parameter map
5437 $handler = $file ? $file->getHandler() : false;
5438
5439 list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5440
5441 if ( !$file ) {
5442 $this->addTrackingCategory( 'broken-file-category' );
5443 }
5444
5445 # Process the input parameters
5446 $caption = '';
5447 $params = [ 'frame' => [], 'handler' => [],
5448 'horizAlign' => [], 'vertAlign' => [] ];
5449 $seenformat = false;
5450 foreach ( $parts as $part ) {
5451 $part = trim( $part );
5452 list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5453 $validated = false;
5454 if ( isset( $paramMap[$magicName] ) ) {
5455 list( $type, $paramName ) = $paramMap[$magicName];
5456
5457 # Special case; width and height come in one variable together
5458 if ( $type === 'handler' && $paramName === 'width' ) {
5459 $parsedWidthParam = self::parseWidthParam( $value );
5460 if ( isset( $parsedWidthParam['width'] ) ) {
5461 $width = $parsedWidthParam['width'];
5462 if ( $handler->validateParam( 'width', $width ) ) {
5463 $params[$type]['width'] = $width;
5464 $validated = true;
5465 }
5466 }
5467 if ( isset( $parsedWidthParam['height'] ) ) {
5468 $height = $parsedWidthParam['height'];
5469 if ( $handler->validateParam( 'height', $height ) ) {
5470 $params[$type]['height'] = $height;
5471 $validated = true;
5472 }
5473 }
5474 # else no validation -- T15436
5475 } else {
5476 if ( $type === 'handler' ) {
5477 # Validate handler parameter
5478 $validated = $handler->validateParam( $paramName, $value );
5479 } else {
5480 # Validate internal parameters
5481 switch ( $paramName ) {
5482 case 'manualthumb':
5483 case 'alt':
5484 case 'class':
5485 # @todo FIXME: Possibly check validity here for
5486 # manualthumb? downstream behavior seems odd with
5487 # missing manual thumbs.
5488 $validated = true;
5489 $value = $this->stripAltText( $value, $holders );
5490 break;
5491 case 'link':
5492 list( $paramName, $value ) =
5493 $this->parseLinkParameter(
5494 $this->stripAltText( $value, $holders )
5495 );
5496 if ( $paramName ) {
5497 $validated = true;
5498 if ( $paramName === 'no-link' ) {
5499 $value = true;
5500 }
5501 if ( ( $paramName === 'link-url' ) && $this->mOptions->getExternalLinkTarget() ) {
5502 $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5503 }
5504 }
5505 break;
5506 case 'frameless':
5507 case 'framed':
5508 case 'thumbnail':
5509 // use first appearing option, discard others.
5510 $validated = !$seenformat;
5511 $seenformat = true;
5512 break;
5513 default:
5514 # Most other things appear to be empty or numeric...
5515 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5516 }
5517 }
5518
5519 if ( $validated ) {
5520 $params[$type][$paramName] = $value;
5521 }
5522 }
5523 }
5524 if ( !$validated ) {
5525 $caption = $part;
5526 }
5527 }
5528
5529 # Process alignment parameters
5530 if ( $params['horizAlign'] ) {
5531 $params['frame']['align'] = key( $params['horizAlign'] );
5532 }
5533 if ( $params['vertAlign'] ) {
5534 $params['frame']['valign'] = key( $params['vertAlign'] );
5535 }
5536
5537 $params['frame']['caption'] = $caption;
5538
5539 # Will the image be presented in a frame, with the caption below?
5540 $imageIsFramed = isset( $params['frame']['frame'] )
5541 || isset( $params['frame']['framed'] )
5542 || isset( $params['frame']['thumbnail'] )
5543 || isset( $params['frame']['manualthumb'] );
5544
5545 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5546 # came to also set the caption, ordinary text after the image -- which
5547 # makes no sense, because that just repeats the text multiple times in
5548 # screen readers. It *also* came to set the title attribute.
5549 # Now that we have an alt attribute, we should not set the alt text to
5550 # equal the caption: that's worse than useless, it just repeats the
5551 # text. This is the framed/thumbnail case. If there's no caption, we
5552 # use the unnamed parameter for alt text as well, just for the time be-
5553 # ing, if the unnamed param is set and the alt param is not.
5554 # For the future, we need to figure out if we want to tweak this more,
5555 # e.g., introducing a title= parameter for the title; ignoring the un-
5556 # named parameter entirely for images without a caption; adding an ex-
5557 # plicit caption= parameter and preserving the old magic unnamed para-
5558 # meter for BC; ...
5559 if ( $imageIsFramed ) { # Framed image
5560 if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5561 # No caption or alt text, add the filename as the alt text so
5562 # that screen readers at least get some description of the image
5563 $params['frame']['alt'] = $title->getText();
5564 }
5565 # Do not set $params['frame']['title'] because tooltips don't make sense
5566 # for framed images
5567 } else { # Inline image
5568 if ( !isset( $params['frame']['alt'] ) ) {
5569 # No alt text, use the "caption" for the alt text
5570 if ( $caption !== '' ) {
5571 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5572 } else {
5573 # No caption, fall back to using the filename for the
5574 # alt text
5575 $params['frame']['alt'] = $title->getText();
5576 }
5577 }
5578 # Use the "caption" for the tooltip text
5579 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5580 }
5581 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5582
5583 Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5584
5585 # Linker does the rest
5586 $time = $options['time'] ?? false;
5587 $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5588 $time, $descQuery, $this->mOptions->getThumbSize() );
5589
5590 # Give the handler a chance to modify the parser object
5591 if ( $handler ) {
5592 $handler->parserTransformHook( $this, $file );
5593 }
5594
5595 return $ret;
5596 }
5597
5598 /**
5599 * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5600 *
5601 * Adds an entry to appropriate link tables.
5602 *
5603 * @since 1.32
5604 * @param string $value
5605 * @return array of `[ type, target ]`, where:
5606 * - `type` is one of:
5607 * - `null`: Given value is not a valid link target, use default
5608 * - `'no-link'`: Given value is empty, do not generate a link
5609 * - `'link-url'`: Given value is a valid external link
5610 * - `'link-title'`: Given value is a valid internal link
5611 * - `target` is:
5612 * - When `type` is `null` or `'no-link'`: `false`
5613 * - When `type` is `'link-url'`: URL string corresponding to given value
5614 * - When `type` is `'link-title'`: Title object corresponding to given value
5615 */
5616 public function parseLinkParameter( $value ) {
5617 $chars = self::EXT_LINK_URL_CLASS;
5618 $addr = self::EXT_LINK_ADDR;
5619 $prots = $this->mUrlProtocols;
5620 $type = null;
5621 $target = false;
5622 if ( $value === '' ) {
5623 $type = 'no-link';
5624 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5625 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5626 $this->mOutput->addExternalLink( $value );
5627 $type = 'link-url';
5628 $target = $value;
5629 }
5630 } else {
5631 $linkTitle = Title::newFromText( $value );
5632 if ( $linkTitle ) {
5633 $this->mOutput->addLink( $linkTitle );
5634 $type = 'link-title';
5635 $target = $linkTitle;
5636 }
5637 }
5638 return [ $type, $target ];
5639 }
5640
5641 /**
5642 * @param string $caption
5643 * @param LinkHolderArray|bool $holders
5644 * @return mixed|string
5645 */
5646 protected function stripAltText( $caption, $holders ) {
5647 # Strip bad stuff out of the title (tooltip). We can't just use
5648 # replaceLinkHoldersText() here, because if this function is called
5649 # from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5650 if ( $holders ) {
5651 $tooltip = $holders->replaceText( $caption );
5652 } else {
5653 $tooltip = $this->replaceLinkHoldersText( $caption );
5654 }
5655
5656 # make sure there are no placeholders in thumbnail attributes
5657 # that are later expanded to html- so expand them now and
5658 # remove the tags
5659 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5660 # Compatibility hack! In HTML certain entity references not terminated
5661 # by a semicolon are decoded (but not if we're in an attribute; that's
5662 # how link URLs get away without properly escaping & in queries).
5663 # But wikitext has always required semicolon-termination of entities,
5664 # so encode & where needed to avoid decode of semicolon-less entities.
5665 # See T209236 and
5666 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5667 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5668 $tooltip = preg_replace( "/
5669 & # 1. entity prefix
5670 (?= # 2. followed by:
5671 (?: # a. one of the legacy semicolon-less named entities
5672 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5673 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5674 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5675 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5676 U(?:acute|circ|grave|uml)|Yacute|
5677 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5678 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5679 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5680 frac(?:1(?:2|4)|34)|
5681 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5682 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5683 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5684 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5685 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5686 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5687 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5688 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5689 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5690 )
5691 (?:[^;]|$)) # b. and not followed by a semicolon
5692 # S = study, for efficiency
5693 /Sx", '&amp;', $tooltip );
5694 $tooltip = Sanitizer::stripAllTags( $tooltip );
5695
5696 return $tooltip;
5697 }
5698
5699 /**
5700 * Set a flag in the output object indicating that the content is dynamic and
5701 * shouldn't be cached.
5702 * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5703 */
5704 public function disableCache() {
5705 $this->logger->debug( "Parser output marked as uncacheable." );
5706 if ( !$this->mOutput ) {
5707 throw new MWException( __METHOD__ .
5708 " can only be called when actually parsing something" );
5709 }
5710 $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5711 }
5712
5713 /**
5714 * Callback from the Sanitizer for expanding items found in HTML attribute
5715 * values, so they can be safely tested and escaped.
5716 *
5717 * @param string &$text
5718 * @param bool|PPFrame $frame
5719 * @return string
5720 */
5721 public function attributeStripCallback( &$text, $frame = false ) {
5722 $text = $this->replaceVariables( $text, $frame );
5723 $text = $this->mStripState->unstripBoth( $text );
5724 return $text;
5725 }
5726
5727 /**
5728 * Accessor
5729 *
5730 * @return array
5731 */
5732 public function getTags() {
5733 $this->firstCallInit();
5734 return array_merge(
5735 array_keys( $this->mTransparentTagHooks ),
5736 array_keys( $this->mTagHooks ),
5737 array_keys( $this->mFunctionTagHooks )
5738 );
5739 }
5740
5741 /**
5742 * @since 1.32
5743 * @return array
5744 */
5745 public function getFunctionSynonyms() {
5746 $this->firstCallInit();
5747 return $this->mFunctionSynonyms;
5748 }
5749
5750 /**
5751 * @since 1.32
5752 * @return string
5753 */
5754 public function getUrlProtocols() {
5755 return $this->mUrlProtocols;
5756 }
5757
5758 /**
5759 * Replace transparent tags in $text with the values given by the callbacks.
5760 *
5761 * Transparent tag hooks are like regular XML-style tag hooks, except they
5762 * operate late in the transformation sequence, on HTML instead of wikitext.
5763 *
5764 * @param string $text
5765 *
5766 * @return string
5767 */
5768 public function replaceTransparentTags( $text ) {
5769 $matches = [];
5770 $elements = array_keys( $this->mTransparentTagHooks );
5771 $text = self::extractTagsAndParams( $elements, $text, $matches );
5772 $replacements = [];
5773
5774 foreach ( $matches as $marker => $data ) {
5775 list( $element, $content, $params, $tag ) = $data;
5776 $tagName = strtolower( $element );
5777 if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5778 $output = call_user_func_array(
5779 $this->mTransparentTagHooks[$tagName],
5780 [ $content, $params, $this ]
5781 );
5782 } else {
5783 $output = $tag;
5784 }
5785 $replacements[$marker] = $output;
5786 }
5787 return strtr( $text, $replacements );
5788 }
5789
5790 /**
5791 * Break wikitext input into sections, and either pull or replace
5792 * some particular section's text.
5793 *
5794 * External callers should use the getSection and replaceSection methods.
5795 *
5796 * @param string $text Page wikitext
5797 * @param string|int $sectionId A section identifier string of the form:
5798 * "<flag1> - <flag2> - ... - <section number>"
5799 *
5800 * Currently the only recognised flag is "T", which means the target section number
5801 * was derived during a template inclusion parse, in other words this is a template
5802 * section edit link. If no flags are given, it was an ordinary section edit link.
5803 * This flag is required to avoid a section numbering mismatch when a section is
5804 * enclosed by "<includeonly>" (T8563).
5805 *
5806 * The section number 0 pulls the text before the first heading; other numbers will
5807 * pull the given section along with its lower-level subsections. If the section is
5808 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5809 *
5810 * Section 0 is always considered to exist, even if it only contains the empty
5811 * string. If $text is the empty string and section 0 is replaced, $newText is
5812 * returned.
5813 *
5814 * @param string $mode One of "get" or "replace"
5815 * @param string $newText Replacement text for section data.
5816 * @return string For "get", the extracted section text.
5817 * for "replace", the whole page with the section replaced.
5818 */
5819 private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5820 global $wgTitle; # not generally used but removes an ugly failure mode
5821
5822 $magicScopeVariable = $this->lock();
5823 $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5824 $outText = '';
5825 $frame = $this->getPreprocessor()->newFrame();
5826
5827 # Process section extraction flags
5828 $flags = 0;
5829 $sectionParts = explode( '-', $sectionId );
5830 $sectionIndex = array_pop( $sectionParts );
5831 foreach ( $sectionParts as $part ) {
5832 if ( $part === 'T' ) {
5833 $flags |= self::PTD_FOR_INCLUSION;
5834 }
5835 }
5836
5837 # Check for empty input
5838 if ( strval( $text ) === '' ) {
5839 # Only sections 0 and T-0 exist in an empty document
5840 if ( $sectionIndex == 0 ) {
5841 if ( $mode === 'get' ) {
5842 return '';
5843 }
5844
5845 return $newText;
5846 } else {
5847 if ( $mode === 'get' ) {
5848 return $newText;
5849 }
5850
5851 return $text;
5852 }
5853 }
5854
5855 # Preprocess the text
5856 $root = $this->preprocessToDom( $text, $flags );
5857
5858 # <h> nodes indicate section breaks
5859 # They can only occur at the top level, so we can find them by iterating the root's children
5860 $node = $root->getFirstChild();
5861
5862 # Find the target section
5863 if ( $sectionIndex == 0 ) {
5864 # Section zero doesn't nest, level=big
5865 $targetLevel = 1000;
5866 } else {
5867 while ( $node ) {
5868 if ( $node->getName() === 'h' ) {
5869 $bits = $node->splitHeading();
5870 if ( $bits['i'] == $sectionIndex ) {
5871 $targetLevel = $bits['level'];
5872 break;
5873 }
5874 }
5875 if ( $mode === 'replace' ) {
5876 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5877 }
5878 $node = $node->getNextSibling();
5879 }
5880 }
5881
5882 if ( !$node ) {
5883 # Not found
5884 if ( $mode === 'get' ) {
5885 return $newText;
5886 } else {
5887 return $text;
5888 }
5889 }
5890
5891 # Find the end of the section, including nested sections
5892 do {
5893 if ( $node->getName() === 'h' ) {
5894 $bits = $node->splitHeading();
5895 $curLevel = $bits['level'];
5896 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5897 break;
5898 }
5899 }
5900 if ( $mode === 'get' ) {
5901 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5902 }
5903 $node = $node->getNextSibling();
5904 } while ( $node );
5905
5906 # Write out the remainder (in replace mode only)
5907 if ( $mode === 'replace' ) {
5908 # Output the replacement text
5909 # Add two newlines on -- trailing whitespace in $newText is conventionally
5910 # stripped by the editor, so we need both newlines to restore the paragraph gap
5911 # Only add trailing whitespace if there is newText
5912 if ( $newText != "" ) {
5913 $outText .= $newText . "\n\n";
5914 }
5915
5916 while ( $node ) {
5917 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5918 $node = $node->getNextSibling();
5919 }
5920 }
5921
5922 if ( is_string( $outText ) ) {
5923 # Re-insert stripped tags
5924 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5925 }
5926
5927 return $outText;
5928 }
5929
5930 /**
5931 * This function returns the text of a section, specified by a number ($section).
5932 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5933 * the first section before any such heading (section 0).
5934 *
5935 * If a section contains subsections, these are also returned.
5936 *
5937 * @param string $text Text to look in
5938 * @param string|int $sectionId Section identifier as a number or string
5939 * (e.g. 0, 1 or 'T-1').
5940 * @param string $defaultText Default to return if section is not found
5941 *
5942 * @return string Text of the requested section
5943 */
5944 public function getSection( $text, $sectionId, $defaultText = '' ) {
5945 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5946 }
5947
5948 /**
5949 * This function returns $oldtext after the content of the section
5950 * specified by $section has been replaced with $text. If the target
5951 * section does not exist, $oldtext is returned unchanged.
5952 *
5953 * @param string $oldText Former text of the article
5954 * @param string|int $sectionId Section identifier as a number or string
5955 * (e.g. 0, 1 or 'T-1').
5956 * @param string $newText Replacing text
5957 *
5958 * @return string Modified text
5959 */
5960 public function replaceSection( $oldText, $sectionId, $newText ) {
5961 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5962 }
5963
5964 /**
5965 * Get the ID of the revision we are parsing
5966 *
5967 * The return value will be either:
5968 * - a) Positive, indicating a specific revision ID (current or old)
5969 * - b) Zero, meaning the revision ID is specified by getCurrentRevisionCallback()
5970 * - c) Null, meaning the parse is for preview mode and there is no revision
5971 *
5972 * @return int|null
5973 */
5974 public function getRevisionId() {
5975 return $this->mRevisionId;
5976 }
5977
5978 /**
5979 * Get the revision object for $this->mRevisionId
5980 *
5981 * @return Revision|null Either a Revision object or null
5982 * @since 1.23 (public since 1.23)
5983 */
5984 public function getRevisionObject() {
5985 if ( $this->mRevisionObject ) {
5986 return $this->mRevisionObject;
5987 }
5988
5989 // NOTE: try to get the RevisionObject even if mRevisionId is null.
5990 // This is useful when parsing a revision that has not yet been saved.
5991 // However, if we get back a saved revision even though we are in
5992 // preview mode, we'll have to ignore it, see below.
5993 // NOTE: This callback may be used to inject an OLD revision that was
5994 // already loaded, so "current" is a bit of a misnomer. We can't just
5995 // skip it if mRevisionId is set.
5996 $rev = call_user_func(
5997 $this->mOptions->getCurrentRevisionCallback(),
5998 $this->getTitle(),
5999 $this
6000 );
6001
6002 if ( $this->mRevisionId === null && $rev && $rev->getId() ) {
6003 // We are in preview mode (mRevisionId is null), and the current revision callback
6004 // returned an existing revision. Ignore it and return null, it's probably the page's
6005 // current revision, which is not what we want here. Note that we do want to call the
6006 // callback to allow the unsaved revision to be injected here, e.g. for
6007 // self-transclusion previews.
6008 return null;
6009 }
6010
6011 // If the parse is for a new revision, then the callback should have
6012 // already been set to force the object and should match mRevisionId.
6013 // If not, try to fetch by mRevisionId for sanity.
6014 if ( $this->mRevisionId && $rev && $rev->getId() != $this->mRevisionId ) {
6015 $rev = Revision::newFromId( $this->mRevisionId );
6016 }
6017
6018 $this->mRevisionObject = $rev;
6019
6020 return $this->mRevisionObject;
6021 }
6022
6023 /**
6024 * Get the timestamp associated with the current revision, adjusted for
6025 * the default server-local timestamp
6026 * @return string TS_MW timestamp
6027 */
6028 public function getRevisionTimestamp() {
6029 if ( $this->mRevisionTimestamp !== null ) {
6030 return $this->mRevisionTimestamp;
6031 }
6032
6033 # Use specified revision timestamp, falling back to the current timestamp
6034 $revObject = $this->getRevisionObject();
6035 $timestamp = $revObject ? $revObject->getTimestamp() : $this->mOptions->getTimestamp();
6036 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6037
6038 # The cryptic '' timezone parameter tells to use the site-default
6039 # timezone offset instead of the user settings.
6040 # Since this value will be saved into the parser cache, served
6041 # to other users, and potentially even used inside links and such,
6042 # it needs to be consistent for all visitors.
6043 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6044
6045 return $this->mRevisionTimestamp;
6046 }
6047
6048 /**
6049 * Get the name of the user that edited the last revision
6050 *
6051 * @return string User name
6052 */
6053 public function getRevisionUser() {
6054 if ( is_null( $this->mRevisionUser ) ) {
6055 $revObject = $this->getRevisionObject();
6056
6057 # if this template is subst: the revision id will be blank,
6058 # so just use the current user's name
6059 if ( $revObject ) {
6060 $this->mRevisionUser = $revObject->getUserText();
6061 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6062 $this->mRevisionUser = $this->getUser()->getName();
6063 }
6064 }
6065 return $this->mRevisionUser;
6066 }
6067
6068 /**
6069 * Get the size of the revision
6070 *
6071 * @return int|null Revision size
6072 */
6073 public function getRevisionSize() {
6074 if ( is_null( $this->mRevisionSize ) ) {
6075 $revObject = $this->getRevisionObject();
6076
6077 # if this variable is subst: the revision id will be blank,
6078 # so just use the parser input size, because the own substituation
6079 # will change the size.
6080 if ( $revObject ) {
6081 $this->mRevisionSize = $revObject->getSize();
6082 } else {
6083 $this->mRevisionSize = $this->mInputSize;
6084 }
6085 }
6086 return $this->mRevisionSize;
6087 }
6088
6089 /**
6090 * Mutator for $mDefaultSort
6091 *
6092 * @param string $sort New value
6093 */
6094 public function setDefaultSort( $sort ) {
6095 $this->mDefaultSort = $sort;
6096 $this->mOutput->setProperty( 'defaultsort', $sort );
6097 }
6098
6099 /**
6100 * Accessor for $mDefaultSort
6101 * Will use the empty string if none is set.
6102 *
6103 * This value is treated as a prefix, so the
6104 * empty string is equivalent to sorting by
6105 * page name.
6106 *
6107 * @return string
6108 */
6109 public function getDefaultSort() {
6110 if ( $this->mDefaultSort !== false ) {
6111 return $this->mDefaultSort;
6112 } else {
6113 return '';
6114 }
6115 }
6116
6117 /**
6118 * Accessor for $mDefaultSort
6119 * Unlike getDefaultSort(), will return false if none is set
6120 *
6121 * @return string|bool
6122 */
6123 public function getCustomDefaultSort() {
6124 return $this->mDefaultSort;
6125 }
6126
6127 private static function getSectionNameFromStrippedText( $text ) {
6128 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6129 $text = Sanitizer::decodeCharReferences( $text );
6130 $text = self::normalizeSectionName( $text );
6131 return $text;
6132 }
6133
6134 private static function makeAnchor( $sectionName ) {
6135 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6136 }
6137
6138 private function makeLegacyAnchor( $sectionName ) {
6139 $fragmentMode = $this->svcOptions->get( 'FragmentMode' );
6140 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6141 // ForAttribute() and ForLink() are the same for legacy encoding
6142 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6143 } else {
6144 $id = Sanitizer::escapeIdForLink( $sectionName );
6145 }
6146
6147 return "#$id";
6148 }
6149
6150 /**
6151 * Try to guess the section anchor name based on a wikitext fragment
6152 * presumably extracted from a heading, for example "Header" from
6153 * "== Header ==".
6154 *
6155 * @param string $text
6156 * @return string Anchor (starting with '#')
6157 */
6158 public function guessSectionNameFromWikiText( $text ) {
6159 # Strip out wikitext links(they break the anchor)
6160 $text = $this->stripSectionName( $text );
6161 $sectionName = self::getSectionNameFromStrippedText( $text );
6162 return self::makeAnchor( $sectionName );
6163 }
6164
6165 /**
6166 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6167 * instead, if possible. For use in redirects, since various versions
6168 * of Microsoft browsers interpret Location: headers as something other
6169 * than UTF-8, resulting in breakage.
6170 *
6171 * @param string $text The section name
6172 * @return string Anchor (starting with '#')
6173 */
6174 public function guessLegacySectionNameFromWikiText( $text ) {
6175 # Strip out wikitext links(they break the anchor)
6176 $text = $this->stripSectionName( $text );
6177 $sectionName = self::getSectionNameFromStrippedText( $text );
6178 return $this->makeLegacyAnchor( $sectionName );
6179 }
6180
6181 /**
6182 * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6183 * @param string $text Section name (plain text)
6184 * @return string Anchor (starting with '#')
6185 */
6186 public static function guessSectionNameFromStrippedText( $text ) {
6187 $sectionName = self::getSectionNameFromStrippedText( $text );
6188 return self::makeAnchor( $sectionName );
6189 }
6190
6191 /**
6192 * Apply the same normalization as code making links to this section would
6193 *
6194 * @param string $text
6195 * @return string
6196 */
6197 private static function normalizeSectionName( $text ) {
6198 # T90902: ensure the same normalization is applied for IDs as to links
6199 /** @var MediaWikiTitleCodec $titleParser */
6200 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6201 '@phan-var MediaWikiTitleCodec $titleParser';
6202 try {
6203
6204 $parts = $titleParser->splitTitleString( "#$text" );
6205 } catch ( MalformedTitleException $ex ) {
6206 return $text;
6207 }
6208 return $parts['fragment'];
6209 }
6210
6211 /**
6212 * Strips a text string of wikitext for use in a section anchor
6213 *
6214 * Accepts a text string and then removes all wikitext from the
6215 * string and leaves only the resultant text (i.e. the result of
6216 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6217 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6218 * to create valid section anchors by mimicing the output of the
6219 * parser when headings are parsed.
6220 *
6221 * @param string $text Text string to be stripped of wikitext
6222 * for use in a Section anchor
6223 * @return string Filtered text string
6224 */
6225 public function stripSectionName( $text ) {
6226 # Strip internal link markup
6227 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6228 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6229
6230 # Strip external link markup
6231 # @todo FIXME: Not tolerant to blank link text
6232 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6233 # on how many empty links there are on the page - need to figure that out.
6234 $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6235
6236 # Parse wikitext quotes (italics & bold)
6237 $text = $this->doQuotes( $text );
6238
6239 # Strip HTML tags
6240 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6241 return $text;
6242 }
6243
6244 /**
6245 * strip/replaceVariables/unstrip for preprocessor regression testing
6246 *
6247 * @param string $text
6248 * @param Title $title
6249 * @param ParserOptions $options
6250 * @param int $outputType
6251 *
6252 * @return string
6253 */
6254 public function testSrvus( $text, Title $title, ParserOptions $options,
6255 $outputType = self::OT_HTML
6256 ) {
6257 $magicScopeVariable = $this->lock();
6258 $this->startParse( $title, $options, $outputType, true );
6259
6260 $text = $this->replaceVariables( $text );
6261 $text = $this->mStripState->unstripBoth( $text );
6262 $text = Sanitizer::removeHTMLtags( $text );
6263 return $text;
6264 }
6265
6266 /**
6267 * @param string $text
6268 * @param Title $title
6269 * @param ParserOptions $options
6270 * @return string
6271 */
6272 public function testPst( $text, Title $title, ParserOptions $options ) {
6273 return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
6274 }
6275
6276 /**
6277 * @param string $text
6278 * @param Title $title
6279 * @param ParserOptions $options
6280 * @return string
6281 */
6282 public function testPreprocess( $text, Title $title, ParserOptions $options ) {
6283 return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
6284 }
6285
6286 /**
6287 * Call a callback function on all regions of the given text that are not
6288 * inside strip markers, and replace those regions with the return value
6289 * of the callback. For example, with input:
6290 *
6291 * aaa<MARKER>bbb
6292 *
6293 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6294 * two strings will be replaced with the value returned by the callback in
6295 * each case.
6296 *
6297 * @param string $s
6298 * @param callable $callback
6299 *
6300 * @return string
6301 */
6302 public function markerSkipCallback( $s, $callback ) {
6303 $i = 0;
6304 $out = '';
6305 while ( $i < strlen( $s ) ) {
6306 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6307 if ( $markerStart === false ) {
6308 $out .= call_user_func( $callback, substr( $s, $i ) );
6309 break;
6310 } else {
6311 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6312 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6313 if ( $markerEnd === false ) {
6314 $out .= substr( $s, $markerStart );
6315 break;
6316 } else {
6317 $markerEnd += strlen( self::MARKER_SUFFIX );
6318 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6319 $i = $markerEnd;
6320 }
6321 }
6322 }
6323 return $out;
6324 }
6325
6326 /**
6327 * Remove any strip markers found in the given text.
6328 *
6329 * @param string $text
6330 * @return string
6331 */
6332 public function killMarkers( $text ) {
6333 return $this->mStripState->killMarkers( $text );
6334 }
6335
6336 /**
6337 * Save the parser state required to convert the given half-parsed text to
6338 * HTML. "Half-parsed" in this context means the output of
6339 * recursiveTagParse() or internalParse(). This output has strip markers
6340 * from replaceVariables (extensionSubstitution() etc.), and link
6341 * placeholders from replaceLinkHolders().
6342 *
6343 * Returns an array which can be serialized and stored persistently. This
6344 * array can later be loaded into another parser instance with
6345 * unserializeHalfParsedText(). The text can then be safely incorporated into
6346 * the return value of a parser hook.
6347 *
6348 * @deprecated since 1.31
6349 * @param string $text
6350 *
6351 * @return array
6352 */
6353 public function serializeHalfParsedText( $text ) {
6354 wfDeprecated( __METHOD__, '1.31' );
6355 $data = [
6356 'text' => $text,
6357 'version' => self::HALF_PARSED_VERSION,
6358 'stripState' => $this->mStripState->getSubState( $text ),
6359 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
6360 ];
6361 return $data;
6362 }
6363
6364 /**
6365 * Load the parser state given in the $data array, which is assumed to
6366 * have been generated by serializeHalfParsedText(). The text contents is
6367 * extracted from the array, and its markers are transformed into markers
6368 * appropriate for the current Parser instance. This transformed text is
6369 * returned, and can be safely included in the return value of a parser
6370 * hook.
6371 *
6372 * If the $data array has been stored persistently, the caller should first
6373 * check whether it is still valid, by calling isValidHalfParsedText().
6374 *
6375 * @deprecated since 1.31
6376 * @param array $data Serialized data
6377 * @throws MWException
6378 * @return string
6379 */
6380 public function unserializeHalfParsedText( $data ) {
6381 wfDeprecated( __METHOD__, '1.31' );
6382 if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
6383 throw new MWException( __METHOD__ . ': invalid version' );
6384 }
6385
6386 # First, extract the strip state.
6387 $texts = [ $data['text'] ];
6388 $texts = $this->mStripState->merge( $data['stripState'], $texts );
6389
6390 # Now renumber links
6391 $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
6392
6393 # Should be good to go.
6394 return $texts[0];
6395 }
6396
6397 /**
6398 * Returns true if the given array, presumed to be generated by
6399 * serializeHalfParsedText(), is compatible with the current version of the
6400 * parser.
6401 *
6402 * @deprecated since 1.31
6403 * @param array $data
6404 *
6405 * @return bool
6406 */
6407 public function isValidHalfParsedText( $data ) {
6408 wfDeprecated( __METHOD__, '1.31' );
6409 return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
6410 }
6411
6412 /**
6413 * Parsed a width param of imagelink like 300px or 200x300px
6414 *
6415 * @param string $value
6416 * @param bool $parseHeight
6417 *
6418 * @return array
6419 * @since 1.20
6420 */
6421 public static function parseWidthParam( $value, $parseHeight = true ) {
6422 $parsedWidthParam = [];
6423 if ( $value === '' ) {
6424 return $parsedWidthParam;
6425 }
6426 $m = [];
6427 # (T15500) In both cases (width/height and width only),
6428 # permit trailing "px" for backward compatibility.
6429 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6430 $width = intval( $m[1] );
6431 $height = intval( $m[2] );
6432 $parsedWidthParam['width'] = $width;
6433 $parsedWidthParam['height'] = $height;
6434 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6435 $width = intval( $value );
6436 $parsedWidthParam['width'] = $width;
6437 }
6438 return $parsedWidthParam;
6439 }
6440
6441 /**
6442 * Lock the current instance of the parser.
6443 *
6444 * This is meant to stop someone from calling the parser
6445 * recursively and messing up all the strip state.
6446 *
6447 * @throws MWException If parser is in a parse
6448 * @return ScopedCallback The lock will be released once the return value goes out of scope.
6449 */
6450 protected function lock() {
6451 if ( $this->mInParse ) {
6452 throw new MWException( "Parser state cleared while parsing. "
6453 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6454 }
6455
6456 // Save the backtrace when locking, so that if some code tries locking again,
6457 // we can print the lock owner's backtrace for easier debugging
6458 $e = new Exception;
6459 $this->mInParse = $e->getTraceAsString();
6460
6461 $recursiveCheck = new ScopedCallback( function () {
6462 $this->mInParse = false;
6463 } );
6464
6465 return $recursiveCheck;
6466 }
6467
6468 /**
6469 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6470 *
6471 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6472 * or if there is more than one <p/> tag in the input HTML.
6473 *
6474 * @param string $html
6475 * @return string
6476 * @since 1.24
6477 */
6478 public static function stripOuterParagraph( $html ) {
6479 $m = [];
6480 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6481 $html = $m[1];
6482 }
6483
6484 return $html;
6485 }
6486
6487 /**
6488 * Return this parser if it is not doing anything, otherwise
6489 * get a fresh parser. You can use this method by doing
6490 * $newParser = $oldParser->getFreshParser(), or more simply
6491 * $oldParser->getFreshParser()->parse( ... );
6492 * if you're unsure if $oldParser is safe to use.
6493 *
6494 * @since 1.24
6495 * @return Parser A parser object that is not parsing anything
6496 */
6497 public function getFreshParser() {
6498 if ( $this->mInParse ) {
6499 return $this->factory->create();
6500 } else {
6501 return $this;
6502 }
6503 }
6504
6505 /**
6506 * Set's up the PHP implementation of OOUI for use in this request
6507 * and instructs OutputPage to enable OOUI for itself.
6508 *
6509 * @since 1.26
6510 */
6511 public function enableOOUI() {
6512 OutputPage::setupOOUI();
6513 $this->mOutput->setEnableOOUI( true );
6514 }
6515
6516 /**
6517 * @param string $flag
6518 * @param string $reason
6519 */
6520 protected function setOutputFlag( $flag, $reason ) {
6521 $this->mOutput->setFlag( $flag );
6522 $name = $this->mTitle->getPrefixedText();
6523 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6524 }
6525 }