X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fparser%2FParser.php;h=58f0c0ce698a8c392cde0f719da71e8023ae5111;hb=47818f1b44a3525dff1c07778aaef65d06b8ae89;hp=f2e47dc36acbaa2eb8901f99b765b3d4cad23b11;hpb=f428f95ac49e71bde47bc982e31ff60b3ef41706;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index f2e47dc36a..58f0c0ce69 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -406,13 +406,6 @@ class Parser { $text, Title $title, ParserOptions $options, $linestart = true, $clearState = true, $revid = null ) { - /** - * First pass--just handle sections, pass the rest off - * to internalParse() which does all the real work. - */ - - global $wgShowHostnames; - if ( $clearState ) { // We use U+007F DELETE to construct strip markers, so we have to make // sure that this character does not occur in the input text. @@ -474,7 +467,7 @@ class Parser { } } - # Done parsing! Compute runtime adaptive expiry if set + # Compute runtime adaptive expiry if set $this->mOutput->finalizeAdaptiveCacheExpiry(); # Warn if too many heavyweight parser functions were used @@ -485,110 +478,9 @@ class Parser { ); } - # Information on include size limits, for the benefit of users who try to skirt them + # Information on limits, for the benefit of users who try to skirt them if ( $this->mOptions->getEnableLimitReport() ) { - $max = $this->mOptions->getMaxIncludeSize(); - - $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); - if ( $cpuTime !== null ) { - $this->mOutput->setLimitReportData( 'limitreport-cputime', - sprintf( "%.3f", $cpuTime ) - ); - } - - $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); - $this->mOutput->setLimitReportData( 'limitreport-walltime', - sprintf( "%.3f", $wallTime ) - ); - - $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', - [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ] - ); - $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', - [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ] - ); - $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', - [ $this->mIncludeSizes['post-expand'], $max ] - ); - $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', - [ $this->mIncludeSizes['arg'], $max ] - ); - $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', - [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ] - ); - $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', - [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ] - ); - Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] ); - - $limitReport = "NewPP limit report\n"; - if ( $wgShowHostnames ) { - $limitReport .= 'Parsed by ' . wfHostname() . "\n"; - } - $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n"; - $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n"; - $limitReport .= 'Dynamic content: ' . - ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) . - "\n"; - - foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { - if ( Hooks::run( 'ParserLimitReportFormat', - [ $key, &$value, &$limitReport, false, false ] - ) ) { - $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); - $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] ) - ->inLanguage( 'en' )->useDatabase( false ); - if ( !$valueMsg->exists() ) { - $valueMsg = new RawMessage( '$1' ); - } - if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) { - $valueMsg->params( $value ); - $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; - } - } - } - // Since we're not really outputting HTML, decode the entities and - // then re-encode the things that need hiding inside HTML comments. - $limitReport = htmlspecialchars_decode( $limitReport ); - // Run deprecated hook - Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ], '1.22' ); - - // Sanitize for comment. Note '‐' in the replacement is U+2010, - // which looks much like the problematic '-'. - $limitReport = str_replace( [ '-', '&' ], [ '‐', '&' ], $limitReport ); - $text .= "\n\n"; - - // Add on template profiling data in human/machine readable way - $dataByFunc = $this->mProfiler->getFunctionStats(); - uasort( $dataByFunc, function ( $a, $b ) { - return $a['real'] < $b['real']; // descending order - } ); - $profileReport = []; - foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) { - $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s", - $item['%real'], $item['real'], $item['calls'], - htmlspecialchars( $item['name'] ) ); - } - $text .= "\n"; - - $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport ); - - // Add other cache related metadata - if ( $wgShowHostnames ) { - $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() ); - } - $this->mOutput->setLimitReportData( 'cachereport-timestamp', - $this->mOutput->getCacheTime() ); - $this->mOutput->setLimitReportData( 'cachereport-ttl', - $this->mOutput->getCacheExpiry() ); - $this->mOutput->setLimitReportData( 'cachereport-transientcontent', - $this->mOutput->hasDynamicContent() ); - - if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { - wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . - $this->mTitle->getPrefixedDBkey() ); - } + $text .= $this->makeLimitReport(); } # Wrap non-interface parser output in a
so it can be targeted @@ -611,6 +503,120 @@ class Parser { return $this->mOutput; } + /** + * Set the limit report data in the current ParserOutput, and return the + * limit report HTML comment. + * + * @return string + */ + protected function makeLimitReport() { + global $wgShowHostnames; + + $maxIncludeSize = $this->mOptions->getMaxIncludeSize(); + + $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); + if ( $cpuTime !== null ) { + $this->mOutput->setLimitReportData( 'limitreport-cputime', + sprintf( "%.3f", $cpuTime ) + ); + } + + $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); + $this->mOutput->setLimitReportData( 'limitreport-walltime', + sprintf( "%.3f", $wallTime ) + ); + + $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', + [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ] + ); + $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', + [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ] + ); + $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', + [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ] + ); + $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', + [ $this->mIncludeSizes['arg'], $maxIncludeSize ] + ); + $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', + [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ] + ); + $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', + [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ] + ); + Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] ); + + $limitReport = "NewPP limit report\n"; + if ( $wgShowHostnames ) { + $limitReport .= 'Parsed by ' . wfHostname() . "\n"; + } + $limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n"; + $limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n"; + $limitReport .= 'Dynamic content: ' . + ( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) . + "\n"; + + foreach ( $this->mOutput->getLimitReportData() as $key => $value ) { + if ( Hooks::run( 'ParserLimitReportFormat', + [ $key, &$value, &$limitReport, false, false ] + ) ) { + $keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false ); + $valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] ) + ->inLanguage( 'en' )->useDatabase( false ); + if ( !$valueMsg->exists() ) { + $valueMsg = new RawMessage( '$1' ); + } + if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) { + $valueMsg->params( $value ); + $limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n"; + } + } + } + // Since we're not really outputting HTML, decode the entities and + // then re-encode the things that need hiding inside HTML comments. + $limitReport = htmlspecialchars_decode( $limitReport ); + // Run deprecated hook + Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ], '1.22' ); + + // Sanitize for comment. Note '‐' in the replacement is U+2010, + // which looks much like the problematic '-'. + $limitReport = str_replace( [ '-', '&' ], [ '‐', '&' ], $limitReport ); + $text = "\n\n"; + + // Add on template profiling data in human/machine readable way + $dataByFunc = $this->mProfiler->getFunctionStats(); + uasort( $dataByFunc, function ( $a, $b ) { + return $a['real'] < $b['real']; // descending order + } ); + $profileReport = []; + foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) { + $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s", + $item['%real'], $item['real'], $item['calls'], + htmlspecialchars( $item['name'] ) ); + } + $text .= "\n"; + + $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport ); + + // Add other cache related metadata + if ( $wgShowHostnames ) { + $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() ); + } + $this->mOutput->setLimitReportData( 'cachereport-timestamp', + $this->mOutput->getCacheTime() ); + $this->mOutput->setLimitReportData( 'cachereport-ttl', + $this->mOutput->getCacheExpiry() ); + $this->mOutput->setLimitReportData( 'cachereport-transientcontent', + $this->mOutput->hasDynamicContent() ); + + if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) { + wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . + $this->mTitle->getPrefixedDBkey() ); + } + return $text; + } + /** * Half-parse wikitext to half-parsed HTML. This recursive parser entry point * can be called from an extension tag hook. @@ -3944,7 +3950,7 @@ class Parser { $this->mForceTocPosition = true; # Set a placeholder. At the end we'll fill it in with the TOC. - $text = $mw->replace( '', $text, 1 ); + $text = $mw->replace( '', $text, 1 ); # Only keep the first one. $text = $mw->replace( '', $text ); @@ -4206,6 +4212,9 @@ class Parser { # Decode HTML entities $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline ); + + $safeHeadline = self::normalizeSectionName( $safeHeadline ); + $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY ); @@ -4387,7 +4396,7 @@ class Parser { $full .= implode( '', $sections ); if ( $this->mForceTocPosition ) { - return str_replace( '', $toc, $full ); + return str_replace( '', $toc, $full ); } else { return $full; } @@ -5753,21 +5762,42 @@ class Parser { return $this->mDefaultSort; } + private static function getSectionNameFromStrippedText( $text ) { + $text = Sanitizer::normalizeSectionNameWhitespace( $text ); + $text = Sanitizer::decodeCharReferences( $text ); + $text = self::normalizeSectionName( $text ); + return $text; + } + + private static function makeAnchor( $sectionName ) { + return '#' . Sanitizer::escapeIdForLink( $sectionName ); + } + + private static function makeLegacyAnchor( $sectionName ) { + global $wgFragmentMode; + if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { + // ForAttribute() and ForLink() are the same for legacy encoding + $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK ); + } else { + $id = Sanitizer::escapeIdForLink( $text ); + } + + return "#$id"; + } + /** * Try to guess the section anchor name based on a wikitext fragment * presumably extracted from a heading, for example "Header" from * "== Header ==". * * @param string $text - * - * @return string + * @return string Anchor (starting with '#') */ public function guessSectionNameFromWikiText( $text ) { # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $text = Sanitizer::normalizeSectionNameWhitespace( $text ); - $text = Sanitizer::decodeCharReferences( $text ); - return '#' . Sanitizer::escapeIdForLink( $text ); + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeAnchor( $sectionName ); } /** @@ -5777,24 +5807,41 @@ class Parser { * than UTF-8, resulting in breakage. * * @param string $text The section name - * @return string An anchor + * @return string Anchor (starting with '#') */ public function guessLegacySectionNameFromWikiText( $text ) { - global $wgFragmentMode; - # Strip out wikitext links(they break the anchor) $text = $this->stripSectionName( $text ); - $text = Sanitizer::normalizeSectionNameWhitespace( $text ); - $text = Sanitizer::decodeCharReferences( $text ); + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeLegacyAnchor( $sectionName ); + } - if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { - // ForAttribute() and ForLink() are the same for legacy encoding - $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK ); - } else { - $id = Sanitizer::escapeIdForLink( $text ); - } + /** + * Like guessSectionNameFromWikiText(), but takes already-stripped text as input. + * @param string $text Section name (plain text) + * @return string Anchor (starting with '#') + */ + public static function guessSectionNameFromStrippedText( $text ) { + $sectionName = self::getSectionNameFromStrippedText( $text ); + return self::makeAnchor( $sectionName ); + } - return "#$id"; + /** + * Apply the same normalization as code making links to this section would + * + * @param string $text + * @return string + */ + private static function normalizeSectionName( $text ) { + # T90902: ensure the same normalization is applied for IDs as to links + $titleParser = MediaWikiServices::getInstance()->getTitleParser(); + try { + + $parts = $titleParser->splitTitleString( "#$text" ); + } catch ( MalformedTitleException $ex ) { + return $text; + } + return $parts['fragment']; } /**