Do not double decode HTML entities for IDs
authorFomafix <fomafix@googlemail.com>
Mon, 2 May 2016 05:14:45 +0000 (05:14 +0000)
committerFomafix <fomafix@googlemail.com>
Tue, 12 Sep 2017 13:42:17 +0000 (15:42 +0200)
* in links (T103714)
* in indicators (T104196)

This change removes the automatic Sanitizer::decodeCharReferences from
Sanitizer::escapeId and Sanitizer::escapeIdInternal. Where decoding of
HTML entities are wanted an explicit call to
Sanitizer::decodeCharReferences is added.

Explicit decode HTML entities in non local autocomments. (T104311)

Bug: T103714
Bug: T104196
Bug: T104311
Change-Id: I88e8e2077e6f5eec2b232391f7818370894a62dc

includes/Linker.php
includes/Sanitizer.php
includes/parser/Parser.php
tests/parser/parserTests.txt
tests/phpunit/includes/SanitizerTest.php

index dccd99c..4110575 100644 (file)
@@ -1175,7 +1175,7 @@ class Linker {
                                                        $sectionTitle = Title::newFromText( '#' . $section );
                                                } else {
                                                        $sectionTitle = Title::makeTitleSafe( $title->getNamespace(),
-                                                               $title->getDBkey(), $section );
+                                                               $title->getDBkey(), Sanitizer::decodeCharReferences( $section ) );
                                                }
                                                if ( $sectionTitle ) {
                                                        $link = Linker::makeCommentLink( $sectionTitle, $wgLang->getArrow(), $wikiId, 'noclasses' );
index 7d17cd1..a7f963a 100644 (file)
@@ -1203,8 +1203,6 @@ class Sanitizer {
                global $wgExperimentalHtmlIds;
                $options = (array)$options;
 
-               $id = self::decodeCharReferences( $id );
-
                if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) ) {
                        $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
                        $id = trim( $id, '_' );
@@ -1313,8 +1311,6 @@ class Sanitizer {
         * @return string
         */
        private static function escapeIdInternal( $id, $mode ) {
-               $id = self::decodeCharReferences( $id );
-
                switch ( $mode ) {
                        case 'html5':
                                $id = str_replace( ' ', '_', $id );
index ff4936d..e901f6f 100644 (file)
@@ -4204,6 +4204,8 @@ class Parser {
                        # Save headline for section edit hint before it's escaped
                        $headlineHint = $safeHeadline;
 
+                       # Decode HTML entities
+                       $safeHeadline = Sanitizer::decodeCharReferences( $safeHeadline );
                        $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
                        $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
                        $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
@@ -5764,6 +5766,7 @@ class Parser {
                # Strip out wikitext links(they break the anchor)
                $text = $this->stripSectionName( $text );
                $text = Sanitizer::normalizeSectionNameWhitespace( $text );
+               $text = Sanitizer::decodeCharReferences( $text );
                return '#' . Sanitizer::escapeIdForLink( $text );
        }
 
@@ -5782,6 +5785,7 @@ class Parser {
                # Strip out wikitext links(they break the anchor)
                $text = $this->stripSectionName( $text );
                $text = Sanitizer::normalizeSectionNameWhitespace( $text );
+               $text = Sanitizer::decodeCharReferences( $text );
 
                if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
                        // ForAttribute() and ForLink() are the same for legacy encoding
index 00d2538..77854b7 100644 (file)
@@ -29279,7 +29279,7 @@ Decoding of HTML entities in headings and links for IDs and link fragments (T103
 [[#A&B&amp;C&amp;amp;D&amp;amp;amp;E]]
 !! html/php
 <h2><span class="mw-headline" id="A.26B.26C.26amp.3BD.26amp.3Bamp.3BE">A&amp;B&amp;C&amp;amp;D&amp;amp;amp;E</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: A&amp;B&amp;C&amp;amp;D&amp;amp;amp;E">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
-<p><a href="#A.26B.26C.26D.26amp.3BE">#A&amp;B&amp;C&amp;amp;D&amp;amp;amp;E</a>
+<p><a href="#A.26B.26C.26amp.3BD.26amp.3Bamp.3BE">#A&amp;B&amp;C&amp;amp;D&amp;amp;amp;E</a>
 </p>
 !! end
 
index 7472fb9..13fed56 100644 (file)
@@ -376,7 +376,7 @@ class SanitizerTest extends MediaWikiTestCase {
                        [ '\'', '.27' ],
                        [ '§', '.C2.A7' ],
                        [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
-                       [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26C.26amp.3BD.26amp.3Bamp.3BE' ],
+                       [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
                ];
        }
 
@@ -453,10 +453,11 @@ class SanitizerTest extends MediaWikiTestCase {
 
        public function provideEscapeIdForStuff() {
                // Test inputs and outputs
-               $text = 'foo тест_#%!\'()[]:<>';
-               $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E';
-               $html5Encoded = 'foo_тест_#%!\'()[]:<>';
-               $html5Experimental = 'foo_тест_!_()[]:<>';
+               $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
+               $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
+                       '.26.26amp.3B.26amp.3Bamp.3B';
+               $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
+               $html5Experimental = 'foo_тест_!_()[]:<>_amp;_amp;amp;';
 
                // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
                $legacy = [ 'legacy', 'legacy' ];