From 129067c907ea65f621ab64cdfff59fd2b28091e1 Mon Sep 17 00:00:00 2001 From: Max Semenik Date: Thu, 2 Nov 2017 19:35:11 -0700 Subject: [PATCH] Remove nbsp and similar characters from section IDs Bug: T90902 Change-Id: I71bdb7dd43c3e532287290e3c691d9739da45475 --- RELEASE-NOTES-1.31 | 1 + includes/parser/Parser.php | 24 ++++++++++++++++++++++++ tests/parser/parserTests.txt | 14 ++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31 index 4bfcfcb5de..3688163f23 100644 --- a/RELEASE-NOTES-1.31 +++ b/RELEASE-NOTES-1.31 @@ -41,6 +41,7 @@ production. * … === Bug fixes in 1.31 === +* (T90902) Non-breaking space in header ID breaks anchor * … === Action API changes in 1.31 === diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index f2e47dc36a..3548da9581 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4206,6 +4206,9 @@ class Parser { # Decode HTML entities $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline ); + + $safeHeadline = $this->normalizeSectionName( $safeHeadline ); + $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY ); @@ -5767,6 +5770,8 @@ class Parser { $text = $this->stripSectionName( $text ); $text = Sanitizer::normalizeSectionNameWhitespace( $text ); $text = Sanitizer::decodeCharReferences( $text ); + $text = $this->normalizeSectionName( $text ); + return '#' . Sanitizer::escapeIdForLink( $text ); } @@ -5786,6 +5791,7 @@ class Parser { $text = $this->stripSectionName( $text ); $text = Sanitizer::normalizeSectionNameWhitespace( $text ); $text = Sanitizer::decodeCharReferences( $text ); + $text = $this->normalizeSectionName( $text ); if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) { // ForAttribute() and ForLink() are the same for legacy encoding @@ -5797,6 +5803,24 @@ class Parser { return "#$id"; } + /** + * Apply the same normalization as code making links to this section would + * + * @param string $text + * @return string + */ + private function normalizeSectionName( $text ) { + # T90902: ensure the same normalization is applied for IDs as to links + $titleParser = MediaWikiServices::getInstance()->getTitleParser(); + try { + + $parts = $titleParser->splitTitleString( "#$text" ); + } catch ( MalformedTitleException $ex ) { + return $text; + } + return $parts['fragment']; + } + /** * Strips a text string of wikitext for use in a section anchor * diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 3c861ea10b..1204dbd715 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -29536,3 +29536,17 @@ wgFragmentMode=[ 'html5' ]

#啤酒 #啤酒

!! end + +!! test +T90902: Normalize weird characters in section IDs +!! config +wgFragmentMode=[ 'html5', 'legacy' ] +!! wikitext +== Foo bar == +[[#Foo bar]] + +!! html/php +

Foo bar[edit]

+

#Foo bar +

+!! end -- 2.20.1