X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=81e23add64043b87d7d3b72423a10f421011a6c2;hb=4077b57759756ecd0d25427ec9598feb28a28ac1;hp=dcb2c89db100bca4a7f65a8693ac952c38c64809;hpb=d69b967481649acbf8fa3d4e60df821bd24fda01;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index dcb2c89db1..81e23add64 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -273,25 +273,30 @@ class Parser { /** @var SpecialPageFactory */ private $specialPageFactory; + /** @var Config */ + private $siteConfig; + /** - * @param array $conf See $wgParserConf documentation + * @param array $parserConf See $wgParserConf documentation * @param MagicWordFactory|null $magicWordFactory * @param Language|null $contLang Content language * @param ParserFactory|null $factory * @param string|null $urlProtocols As returned from wfUrlProtocols() * @param SpecialPageFactory|null $spFactory + * @param Config|null $siteConfig */ public function __construct( - array $conf = [], MagicWordFactory $magicWordFactory = null, Language $contLang = null, - ParserFactory $factory = null, $urlProtocols = null, SpecialPageFactory $spFactory = null + array $parserConf = [], MagicWordFactory $magicWordFactory = null, + Language $contLang = null, ParserFactory $factory = null, $urlProtocols = null, + SpecialPageFactory $spFactory = null, Config $siteConfig = null ) { - $this->mConf = $conf; + $this->mConf = $parserConf; $this->mUrlProtocols = $urlProtocols ?? wfUrlProtocols(); $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . self::EXT_LINK_ADDR . self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*?)\]/Su'; - if ( isset( $conf['preprocessorClass'] ) ) { - $this->mPreprocessorClass = $conf['preprocessorClass']; + if ( isset( $parserConf['preprocessorClass'] ) ) { + $this->mPreprocessorClass = $parserConf['preprocessorClass']; } elseif ( wfIsHHVM() ) { # Under HHVM Preprocessor_Hash is much faster than Preprocessor_DOM $this->mPreprocessorClass = Preprocessor_Hash::class; @@ -314,6 +319,7 @@ class Parser { $this->factory = $factory ?? $services->getParserFactory(); $this->specialPageFactory = $spFactory ?? $services->getSpecialPageFactory(); + $this->siteConfig = $siteConfig ?? MediaWikiServices::getInstance()->getMainConfig(); } /** @@ -542,8 +548,6 @@ class Parser { * @return string */ protected function makeLimitReport() { - global $wgShowHostnames; - $maxIncludeSize = $this->mOptions->getMaxIncludeSize(); $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); @@ -584,7 +588,7 @@ class Parser { Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] ); $limitReport = "NewPP limit report\n"; - if ( $wgShowHostnames ) { + if ( $this->siteConfig->get( 'ShowHostnames' ) ) { $limitReport .= 'Parsed by ' . wfHostname() . "\n"; } $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n"; @@ -635,7 +639,7 @@ class Parser { $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport ); // Add other cache related metadata - if ( $wgShowHostnames ) { + if ( $this->siteConfig->get( 'ShowHostnames' ) ) { $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() ); } $this->mOutput->setLimitReportData( 'cachereport-timestamp', @@ -1448,6 +1452,8 @@ class Parser { } else { # attempt to sanitize at least some nesting problems # (T4702 and quite a few others) + # This code path is buggy and deprecated! + wfDeprecated( 'disabling tidy', '1.33' ); $tidyregs = [ # ''Something [http://www.cool.com cool''] --> # Somethingcool> @@ -2022,7 +2028,19 @@ class Parser { * @return string */ public static function normalizeLinkUrl( $url ) { - # First, make sure unsafe characters are encoded + # Test for RFC 3986 IPv6 syntax + $scheme = '[a-z][a-z0-9+.-]*:'; + $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*'; + $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]'; + if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) && + IP::isValid( rawurldecode( $m[1] ) ) + ) { + $isIPv6 = rawurldecode( $m[1] ); + } else { + $isIPv6 = false; + } + + # Make sure unsafe characters are encoded $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/', function ( $m ) { return rawurlencode( $m[0] ); @@ -2054,6 +2072,16 @@ class Parser { $ret = self::normalizeUrlComponent( substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret; + # Fix IPv6 syntax + if ( $isIPv6 !== false ) { + $ipv6Host = "%5B({$isIPv6})%5D"; + $ret = preg_replace( + "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i", + "$1[$2]", + $ret + ); + } + return $ret; } @@ -2153,8 +2181,6 @@ class Parser { * @private */ public function replaceInternalLinks2( &$s ) { - global $wgExtraInterlanguageLinkPrefixes; - static $tc = false, $e1, $e1_img; # the % is needed to support urlencoded titles as well if ( !$tc ) { @@ -2359,7 +2385,7 @@ class Parser { if ( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && ( Language::fetchLanguageName( $iw, null, 'mw' ) || - in_array( $iw, $wgExtraInterlanguageLinkPrefixes ) + in_array( $iw, $this->siteConfig->get( 'ExtraInterlanguageLinkPrefixes' ) ) ) ) { # T26502: filter duplicates @@ -2541,9 +2567,6 @@ class Parser { * @return string */ public function getVariableValue( $index, $frame = false ) { - global $wgSitename, $wgServer, $wgServerName; - global $wgArticlePath, $wgScriptPath, $wgStylePath; - if ( is_null( $this->mTitle ) ) { // If no title set, bad things are going to happen // later. Title should always be set since this @@ -2845,22 +2868,21 @@ class Parser { $value = SpecialVersion::getVersion(); break; case 'articlepath': - return $wgArticlePath; + return $this->siteConfig->get( 'ArticlePath' ); case 'sitename': - return $wgSitename; + return $this->siteConfig->get( 'Sitename' ); case 'server': - return $wgServer; + return $this->siteConfig->get( 'Server' ); case 'servername': - return $wgServerName; + return $this->siteConfig->get( 'ServerName' ); case 'scriptpath': - return $wgScriptPath; + return $this->siteConfig->get( 'ScriptPath' ); case 'stylepath': - return $wgStylePath; + return $this->siteConfig->get( 'StylePath' ); case 'directionmark': return $pageLang->getDirMark(); case 'contentlanguage': - global $wgLanguageCode; - return $wgLanguageCode; + return $this->siteConfig->get( 'LanguageCode' ); case 'pagelanguage': $value = $pageLang->getCode(); break; @@ -3803,9 +3825,7 @@ class Parser { * @return string */ public function interwikiTransclude( $title, $action ) { - global $wgEnableScaryTranscluding, $wgTranscludeCacheExpiry; - - if ( !$wgEnableScaryTranscluding ) { + if ( !$this->siteConfig->get( 'EnableScaryTranscluding' ) ) { return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); } @@ -3825,7 +3845,7 @@ class Parser { ( $wikiId !== false ) ? $wikiId : 'external', sha1( $url ) ), - $wgTranscludeCacheExpiry, + $this->siteConfig->get( 'TranscludeCacheExpiry' ), function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) { $req = MWHttpRequest::factory( $url, [], $fname ); @@ -4127,8 +4147,6 @@ class Parser { * @private */ public function formatHeadings( $text, $origText, $isMain = true ) { - global $wgMaxTocLevel; - # Inhibit editsection links if requested in the page if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { $maybeShowEditLink = false; @@ -4199,6 +4217,7 @@ class Parser { $headlines = $numMatches !== false ? $matches[3] : []; + $maxTocLevel = $this->siteConfig->get( 'MaxTocLevel' ); foreach ( $headlines as $headline ) { $isTemplate = false; $titleText = false; @@ -4221,7 +4240,7 @@ class Parser { # Increase TOC level $toclevel++; $sublevelCount[$toclevel] = 0; - if ( $toclevel < $wgMaxTocLevel ) { + if ( $toclevel < $maxTocLevel ) { $prevtoclevel = $toclevel; $toc .= Linker::tocIndent(); $numVisible++; @@ -4243,8 +4262,8 @@ class Parser { if ( $i == 0 ) { $toclevel = 1; } - if ( $toclevel < $wgMaxTocLevel ) { - if ( $prevtoclevel < $wgMaxTocLevel ) { + if ( $toclevel < $maxTocLevel ) { + if ( $prevtoclevel < $maxTocLevel ) { # Unindent only if the previous toc level was shown :p $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); $prevtoclevel = $toclevel; @@ -4254,7 +4273,7 @@ class Parser { } } else { # No change in level, end TOC line - if ( $toclevel < $wgMaxTocLevel ) { + if ( $toclevel < $maxTocLevel ) { $toc .= Linker::tocLineEnd(); } } @@ -4379,7 +4398,7 @@ class Parser { ) . ' ' . $headline; } - if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { + if ( $enoughToc && ( !isset( $maxTocLevel ) || $toclevel < $maxTocLevel ) ) { $toc .= Linker::tocLine( $linkAnchor, $tocline, $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); } @@ -4460,7 +4479,7 @@ class Parser { } if ( $enoughToc ) { - if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { + if ( $prevtoclevel > 0 && $prevtoclevel < $maxTocLevel ) { $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); } $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); @@ -4639,8 +4658,6 @@ class Parser { * @return string */ public function getUserSig( &$user, $nickname = false, $fancySig = null ) { - global $wgMaxSigChars; - $username = $user->getName(); # If not given, retrieve from the user object. @@ -4654,7 +4671,7 @@ class Parser { $nickname = $nickname == null ? $username : $nickname; - if ( mb_strlen( $nickname ) > $wgMaxSigChars ) { + if ( mb_strlen( $nickname ) > $this->siteConfig->get( 'MaxSigChars' ) ) { $nickname = $username; wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); } elseif ( $fancySig !== false ) { @@ -5049,9 +5066,10 @@ class Parser { $ig->setShowFilename( false ); } if ( isset( $params['caption'] ) ) { - $caption = $params['caption']; - $caption = htmlspecialchars( $caption ); - $caption = $this->replaceInternalLinks( $caption ); + // NOTE: We aren't passing a frame here or below. Frame info + // is currently opaque to Parsoid, which acts on OT_PREPROCESS. + // See T107332#4030581 + $caption = $this->recursiveTagParse( $params['caption'] ); $ig->setCaptionHtml( $caption ); } if ( isset( $params['perrow'] ) ) { @@ -5140,7 +5158,7 @@ class Parser { $alt = $this->stripAltText( $match, false ); break; case 'gallery-internal-link': - $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); + $linkValue = $this->stripAltText( $match, false ); if ( preg_match( '/^-{R|(.*)}-$/', $linkValue ) ) { // Result of LanguageConverter::markNoConversion // invoked on an external link. @@ -5265,6 +5283,8 @@ class Parser { # * bottom # * text-bottom + global $wgMediaInTargetLanguage; + # Protect LanguageConverter markup when splitting into parts $parts = StringUtils::delimiterExplode( '-{', '}-', '|', $options, true /* allow nesting */ @@ -5334,7 +5354,10 @@ class Parser { $value = $this->stripAltText( $value, $holders ); break; case 'link': - list( $paramName, $value ) = $this->parseLinkParameter( $value ); + list( $paramName, $value ) = + $this->parseLinkParameter( + $this->stripAltText( $value, $holders ) + ); if ( $paramName ) { $validated = true; if ( $paramName === 'no-link' ) { @@ -5422,6 +5445,9 @@ class Parser { # Use the "caption" for the tooltip text $params['frame']['title'] = $this->stripAltText( $caption, $holders ); } + if ( $wgMediaInTargetLanguage ) { + $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode(); + } Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] ); @@ -5499,6 +5525,40 @@ class Parser { # that are later expanded to html- so expand them now and # remove the tags $tooltip = $this->mStripState->unstripBoth( $tooltip ); + # Compatibility hack! In HTML certain entity references not terminated + # by a semicolon are decoded (but not if we're in an attribute; that's + # how link URLs get away without properly escaping & in queries). + # But wikitext has always required semicolon-termination of entities, + # so encode & where needed to avoid decode of semicolon-less entities. + # See T209236 and + # https://www.w3.org/TR/html5/syntax.html#named-character-references + # T210437 discusses moving this workaround to Sanitizer::stripAllTags. + $tooltip = preg_replace( "/ + & # 1. entity prefix + (?= # 2. followed by: + (?: # a. one of the legacy semicolon-less named entities + A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)| + C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)| + GT|I(?:acute|circ|grave|uml)|LT|Ntilde| + O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN| + U(?:acute|circ|grave|uml)|Yacute| + a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar| + c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg| + divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)| + frac(?:1(?:2|4)|34)| + gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)| + i(?:acute|circ|excl|grave|quest|uml)|laquo| + lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)| + m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)| + not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)| + o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)| + p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)| + s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)| + u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml) + ) + (?:[^;]|$)) # b. and not followed by a semicolon + # S = study, for efficiency + /Sx", '&', $tooltip ); $tooltip = Sanitizer::stripAllTags( $tooltip ); return $tooltip; @@ -5931,9 +5991,9 @@ class Parser { return '#' . Sanitizer::escapeIdForLink( $sectionName ); } - private static function makeLegacyAnchor( $sectionName ) { - global $wgFragmentMode; - if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { + private function makeLegacyAnchor( $sectionName ) { + $fragmentMode = $this->siteConfig->get( 'FragmentMode' ); + if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) { // ForAttribute() and ForLink() are the same for legacy encoding $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK ); } else { @@ -5971,7 +6031,7 @@ class Parser { # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); $sectionName = self::getSectionNameFromStrippedText( $text ); - return self::makeLegacyAnchor( $sectionName ); + return $this->makeLegacyAnchor( $sectionName ); } /**