X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=81e23add64043b87d7d3b72423a10f421011a6c2;hb=4077b57759756ecd0d25427ec9598feb28a28ac1;hp=dcb2c89db100bca4a7f65a8693ac952c38c64809;hpb=69286527352e8f7b59a4b78adc327b8336de9312;p=lhc%2Fweb%2Fwiklou.git
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index dcb2c89db1..81e23add64 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -273,25 +273,30 @@ class Parser {
/** @var SpecialPageFactory */
private $specialPageFactory;
+ /** @var Config */
+ private $siteConfig;
+
/**
- * @param array $conf See $wgParserConf documentation
+ * @param array $parserConf See $wgParserConf documentation
* @param MagicWordFactory|null $magicWordFactory
* @param Language|null $contLang Content language
* @param ParserFactory|null $factory
* @param string|null $urlProtocols As returned from wfUrlProtocols()
* @param SpecialPageFactory|null $spFactory
+ * @param Config|null $siteConfig
*/
public function __construct(
- array $conf = [], MagicWordFactory $magicWordFactory = null, Language $contLang = null,
- ParserFactory $factory = null, $urlProtocols = null, SpecialPageFactory $spFactory = null
+ array $parserConf = [], MagicWordFactory $magicWordFactory = null,
+ Language $contLang = null, ParserFactory $factory = null, $urlProtocols = null,
+ SpecialPageFactory $spFactory = null, Config $siteConfig = null
) {
- $this->mConf = $conf;
+ $this->mConf = $parserConf;
$this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols();
$this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
self::EXT_LINK_ADDR .
self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su';
- if ( isset( $conf['preprocessorClass'] ) ) {
- $this->mPreprocessorClass = $conf['preprocessorClass'];
+ if ( isset( $parserConf['preprocessorClass'] ) ) {
+ $this->mPreprocessorClass = $parserConf['preprocessorClass'];
} elseif ( wfIsHHVM() ) {
# Under HHVM Preprocessor_Hash is much faster than Preprocessor_DOM
$this->mPreprocessorClass = Preprocessor_Hash::class;
@@ -314,6 +319,7 @@ class Parser {
$this->factory = $factory ?? $services->getParserFactory();
$this->specialPageFactory = $spFactory ?? $services->getSpecialPageFactory();
+ $this->siteConfig = $siteConfig ?? MediaWikiServices::getInstance()->getMainConfig();
}
/**
@@ -542,8 +548,6 @@ class Parser {
* @return string
*/
protected function makeLimitReport() {
- global $wgShowHostnames;
-
$maxIncludeSize = $this->mOptions->getMaxIncludeSize();
$cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
@@ -584,7 +588,7 @@ class Parser {
Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
$limitReport = "NewPP limit report\n";
- if ( $wgShowHostnames ) {
+ if ( $this->siteConfig->get( 'ShowHostnames' ) ) {
$limitReport .= 'Parsed by ' . wfHostname() . "\n";
}
$limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
@@ -635,7 +639,7 @@ class Parser {
$this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
// Add other cache related metadata
- if ( $wgShowHostnames ) {
+ if ( $this->siteConfig->get( 'ShowHostnames' ) ) {
$this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
}
$this->mOutput->setLimitReportData( 'cachereport-timestamp',
@@ -1448,6 +1452,8 @@ class Parser {
} else {
# attempt to sanitize at least some nesting problems
# (T4702 and quite a few others)
+ # This code path is buggy and deprecated!
+ wfDeprecated( 'disabling tidy', '1.33' );
$tidyregs = [
# ''Something [http://www.cool.com cool''] -->
# Somethingcool>
@@ -2022,7 +2028,19 @@ class Parser {
* @return string
*/
public static function normalizeLinkUrl( $url ) {
- # First, make sure unsafe characters are encoded
+ # Test for RFC 3986 IPv6 syntax
+ $scheme = '[a-z][a-z0-9+.-]*:';
+ $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
+ $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
+ if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
+ IP::isValid( rawurldecode( $m[1] ) )
+ ) {
+ $isIPv6 = rawurldecode( $m[1] );
+ } else {
+ $isIPv6 = false;
+ }
+
+ # Make sure unsafe characters are encoded
$url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
function ( $m ) {
return rawurlencode( $m[0] );
@@ -2054,6 +2072,16 @@ class Parser {
$ret = self::normalizeUrlComponent(
substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
+ # Fix IPv6 syntax
+ if ( $isIPv6 !== false ) {
+ $ipv6Host = "%5B({$isIPv6})%5D";
+ $ret = preg_replace(
+ "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
+ "$1[$2]",
+ $ret
+ );
+ }
+
return $ret;
}
@@ -2153,8 +2181,6 @@ class Parser {
* @private
*/
public function replaceInternalLinks2( &$s ) {
- global $wgExtraInterlanguageLinkPrefixes;
-
static $tc = false, $e1, $e1_img;
# the % is needed to support urlencoded titles as well
if ( !$tc ) {
@@ -2359,7 +2385,7 @@ class Parser {
if (
$iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
Language::fetchLanguageName( $iw, null, 'mw' ) ||
- in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
+ in_array( $iw, $this->siteConfig->get( 'ExtraInterlanguageLinkPrefixes' ) )
)
) {
# T26502: filter duplicates
@@ -2541,9 +2567,6 @@ class Parser {
* @return string
*/
public function getVariableValue( $index, $frame = false ) {
- global $wgSitename, $wgServer, $wgServerName;
- global $wgArticlePath, $wgScriptPath, $wgStylePath;
-
if ( is_null( $this->mTitle ) ) {
// If no title set, bad things are going to happen
// later. Title should always be set since this
@@ -2845,22 +2868,21 @@ class Parser {
$value = SpecialVersion::getVersion();
break;
case 'articlepath':
- return $wgArticlePath;
+ return $this->siteConfig->get( 'ArticlePath' );
case 'sitename':
- return $wgSitename;
+ return $this->siteConfig->get( 'Sitename' );
case 'server':
- return $wgServer;
+ return $this->siteConfig->get( 'Server' );
case 'servername':
- return $wgServerName;
+ return $this->siteConfig->get( 'ServerName' );
case 'scriptpath':
- return $wgScriptPath;
+ return $this->siteConfig->get( 'ScriptPath' );
case 'stylepath':
- return $wgStylePath;
+ return $this->siteConfig->get( 'StylePath' );
case 'directionmark':
return $pageLang->getDirMark();
case 'contentlanguage':
- global $wgLanguageCode;
- return $wgLanguageCode;
+ return $this->siteConfig->get( 'LanguageCode' );
case 'pagelanguage':
$value = $pageLang->getCode();
break;
@@ -3803,9 +3825,7 @@ class Parser {
* @return string
*/
public function interwikiTransclude( $title, $action ) {
- global $wgEnableScaryTranscluding, $wgTranscludeCacheExpiry;
-
- if ( !$wgEnableScaryTranscluding ) {
+ if ( !$this->siteConfig->get( 'EnableScaryTranscluding' ) ) {
return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
}
@@ -3825,7 +3845,7 @@ class Parser {
( $wikiId !== false ) ? $wikiId : 'external',
sha1( $url )
),
- $wgTranscludeCacheExpiry,
+ $this->siteConfig->get( 'TranscludeCacheExpiry' ),
function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
$req = MWHttpRequest::factory( $url, [], $fname );
@@ -4127,8 +4147,6 @@ class Parser {
* @private
*/
public function formatHeadings( $text, $origText, $isMain = true ) {
- global $wgMaxTocLevel;
-
# Inhibit editsection links if requested in the page
if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
$maybeShowEditLink = false;
@@ -4199,6 +4217,7 @@ class Parser {
$headlines = $numMatches !== false ? $matches[3] : [];
+ $maxTocLevel = $this->siteConfig->get( 'MaxTocLevel' );
foreach ( $headlines as $headline ) {
$isTemplate = false;
$titleText = false;
@@ -4221,7 +4240,7 @@ class Parser {
# Increase TOC level
$toclevel++;
$sublevelCount[$toclevel] = 0;
- if ( $toclevel < $wgMaxTocLevel ) {
+ if ( $toclevel < $maxTocLevel ) {
$prevtoclevel = $toclevel;
$toc .= Linker::tocIndent();
$numVisible++;
@@ -4243,8 +4262,8 @@ class Parser {
if ( $i == 0 ) {
$toclevel = 1;
}
- if ( $toclevel < $wgMaxTocLevel ) {
- if ( $prevtoclevel < $wgMaxTocLevel ) {
+ if ( $toclevel < $maxTocLevel ) {
+ if ( $prevtoclevel < $maxTocLevel ) {
# Unindent only if the previous toc level was shown :p
$toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
$prevtoclevel = $toclevel;
@@ -4254,7 +4273,7 @@ class Parser {
}
} else {
# No change in level, end TOC line
- if ( $toclevel < $wgMaxTocLevel ) {
+ if ( $toclevel < $maxTocLevel ) {
$toc .= Linker::tocLineEnd();
}
}
@@ -4379,7 +4398,7 @@ class Parser {
) . ' ' . $headline;
}
- if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
+ if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) {
$toc .= Linker::tocLine( $linkAnchor, $tocline,
$numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
}
@@ -4460,7 +4479,7 @@ class Parser {
}
if ( $enoughToc ) {
- if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
+ if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) {
$toc .= Linker::tocUnindent( $prevtoclevel - 1 );
}
$toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
@@ -4639,8 +4658,6 @@ class Parser {
* @return string
*/
public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
- global $wgMaxSigChars;
-
$username = $user->getName();
# If not given, retrieve from the user object.
@@ -4654,7 +4671,7 @@ class Parser {
$nickname = $nickname == null ? $username : $nickname;
- if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
+ if ( mb_strlen( $nickname ) > $this->siteConfig->get( 'MaxSigChars' ) ) {
$nickname = $username;
wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
} elseif ( $fancySig !== false ) {
@@ -5049,9 +5066,10 @@ class Parser {
$ig->setShowFilename( false );
}
if ( isset( $params['caption'] ) ) {
- $caption = $params['caption'];
- $caption = htmlspecialchars( $caption );
- $caption = $this->replaceInternalLinks( $caption );
+ // NOTE: We aren't passing a frame here or below. Frame info
+ // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
+ // See T107332#4030581
+ $caption = $this->recursiveTagParse( $params['caption'] );
$ig->setCaptionHtml( $caption );
}
if ( isset( $params['perrow'] ) ) {
@@ -5140,7 +5158,7 @@ class Parser {
$alt = $this->stripAltText( $match, false );
break;
case 'gallery-internal-link':
- $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
+ $linkValue = $this->stripAltText( $match, false );
if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) {
// Result of LanguageConverter::markNoConversion
// invoked on an external link.
@@ -5265,6 +5283,8 @@ class Parser {
# * bottom
# * text-bottom
+ global $wgMediaInTargetLanguage;
+
# Protect LanguageConverter markup when splitting into parts
$parts = StringUtils::delimiterExplode(
'-{', '}-', '|', $options, true /* allow nesting */
@@ -5334,7 +5354,10 @@ class Parser {
$value = $this->stripAltText( $value, $holders );
break;
case 'link':
- list( $paramName, $value ) = $this->parseLinkParameter( $value );
+ list( $paramName, $value ) =
+ $this->parseLinkParameter(
+ $this->stripAltText( $value, $holders )
+ );
if ( $paramName ) {
$validated = true;
if ( $paramName === 'no-link' ) {
@@ -5422,6 +5445,9 @@ class Parser {
# Use the "caption" for the tooltip text
$params['frame']['title'] = $this->stripAltText( $caption, $holders );
}
+ if ( $wgMediaInTargetLanguage ) {
+ $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
+ }
Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
@@ -5499,6 +5525,40 @@ class Parser {
# that are later expanded to html- so expand them now and
# remove the tags
$tooltip = $this->mStripState->unstripBoth( $tooltip );
+ # Compatibility hack! In HTML certain entity references not terminated
+ # by a semicolon are decoded (but not if we're in an attribute; that's
+ # how link URLs get away without properly escaping & in queries).
+ # But wikitext has always required semicolon-termination of entities,
+ # so encode & where needed to avoid decode of semicolon-less entities.
+ # See T209236 and
+ # https://www.w3.org/TR/html5/syntax.html#named-character-references
+ # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
+ $tooltip = preg_replace( "/
+ & # 1. entity prefix
+ (?= # 2. followed by:
+ (?: # a. one of the legacy semicolon-less named entities
+ A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
+ C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
+ GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
+ O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
+ U(?:acute|circ|grave|uml)|Yacute|
+ a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
+ c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
+ divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
+ frac(?:1(?:2|4)|34)|
+ gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
+ i(?:acute|circ|excl|grave|quest|uml)|laquo|
+ lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
+ m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
+ not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
+ o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
+ p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
+ s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
+ u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
+ )
+ (?:[^;]|$)) # b. and not followed by a semicolon
+ # S = study, for efficiency
+ /Sx", '&', $tooltip );
$tooltip = Sanitizer::stripAllTags( $tooltip );
return $tooltip;
@@ -5931,9 +5991,9 @@ class Parser {
return '#' . Sanitizer::escapeIdForLink( $sectionName );
}
- private static function makeLegacyAnchor( $sectionName ) {
- global $wgFragmentMode;
- if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
+ private function makeLegacyAnchor( $sectionName ) {
+ $fragmentMode = $this->siteConfig->get( 'FragmentMode' );
+ if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
// ForAttribute() and ForLink() are the same for legacy encoding
$id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
} else {
@@ -5971,7 +6031,7 @@ class Parser {
# Strip out wikitext links(they break the anchor)
$text = $this->stripSectionName( $text );
$sectionName = self::getSectionNameFromStrippedText( $text );
- return self::makeLegacyAnchor( $sectionName );
+ return $this->makeLegacyAnchor( $sectionName );
}
/**