Don't percent-encode HTML5 IDs
authorMax Semenik <maxsem.wiki@gmail.com>
Fri, 1 Sep 2017 00:48:42 +0000 (17:48 -0700)
committerMax Semenik <maxsem.wiki@gmail.com>
Tue, 12 Sep 2017 01:22:04 +0000 (18:22 -0700)
During the TechCom meeting, it was decided this is the better way.

Bug: T152540

Change-Id: I6c3ec1c407225b4e925b7373bf52208e2f6b6c4a

includes/DefaultSettings.php
includes/Linker.php
includes/Sanitizer.php
resources/src/mediawiki/mediawiki.util.js
tests/parser/parserTests.txt
tests/phpunit/includes/SanitizerTest.php
tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js

index 86b1bdc..5b77d16 100644 (file)
@@ -3394,8 +3394,8 @@ $wgExperimentalHtmlIds = false;
 /**
  * How should section IDs be encoded?
  * This array can contain 1 or 2 elements, each of them can be one of:
- * - 'html5'  is modern HTML5 style encoding with minimal escaping. Allows to
- *            display Unicode characters in many browsers' address bars.
+ * - 'html5'  is modern HTML5 style encoding with minimal escaping. Displays Unicode
+ *            characters in most browsers' address bars.
  * - 'legacy' is old MediaWiki-style encoding, e.g. 啤酒 turns into .E5.95.A4.E9.85.92
  * - 'html5-legacy' corresponds to DEPRECATED $wgExperimentalHtmlIds mode. DO NOT use
  *            it for anything but migration off that mode (see below).
index aedb704..dccd99c 100644 (file)
@@ -1539,10 +1539,16 @@ class Linker {
                if ( $sectionIndex !== false ) {
                        $classes .= " tocsection-$sectionIndex";
                }
-               return "\n<li class=\"$classes\"><a href=\"#" .
-                       $anchor . '"><span class="tocnumber">' .
-                       $tocnumber . '</span> <span class="toctext">' .
-                       $tocline . '</span></a>';
+
+               // \n<li class="$classes"><a href="#$anchor"><span class="tocnumber">
+               // $tocnumber</span> <span class="toctext">$tocline</span></a>
+               return "\n" . Html::openElement( 'li', [ 'class' => $classes ] )
+                       . Html::rawElement( 'a',
+                               [ 'href' => "#$anchor" ],
+                               Html::element( 'span', [ 'class' => 'tocnumber' ], $tocnumber )
+                                       . ' '
+                                       . Html::rawElement( 'span', [ 'class' => 'toctext' ], $tocline )
+                       );
        }
 
        /**
index ed09701..7d17cd1 100644 (file)
@@ -1284,7 +1284,6 @@ class Sanitizer {
                $mode = $wgFragmentMode[self::ID_PRIMARY];
 
                $id = self::escapeIdInternal( $id, $mode );
-               $id = self::urlEscapeId( $id, $mode );
 
                return $id;
        }
@@ -1302,23 +1301,6 @@ class Sanitizer {
                global $wgExternalInterwikiFragmentMode;
 
                $id = self::escapeIdInternal( $id, $wgExternalInterwikiFragmentMode );
-               $id = self::urlEscapeId( $id, $wgExternalInterwikiFragmentMode );
-
-               return $id;
-       }
-
-       /**
-        * Helper for escapeIdFor*() functions. URL-escapes the ID if needed.
-        *
-        * @param string $id String to escape
-        * @param string $mode One of modes from $wgFragmentMode
-        * @return string
-        */
-       private static function urlEscapeId( $id, $mode ) {
-               if ( $mode === 'html5' ) {
-                       $id = urlencode( $id );
-                       $id = str_replace( '%3A', ':', $id );
-               }
 
                return $id;
        }
index 34f7eba..fb34a89 100644 (file)
                 * @return {string} Encoded string
                 */
                escapeIdForLink: function ( str ) {
-                       var mode = mw.config.get( 'wgFragmentMode' )[ 0 ],
-                               id = escapeIdInternal( str, mode );
-
-                       if ( mode === 'html5' ) {
-                               id = encodeURIComponent( id ).replace( /%3A/g, ':' );
-                       }
+                       var mode = mw.config.get( 'wgFragmentMode' )[ 0 ];
 
-                       return id;
+                       return escapeIdInternal( str, mode );
                },
 
                /**
index 3f93793..bf2679f 100644 (file)
@@ -29307,10 +29307,10 @@ wgFragmentMode=[ 'html5', 'legacy' ]
 <ul>
 <li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
 <li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
-<li class="toclevel-1 tocsection-3"><a href="#%D0%A2%D0%B5%D1%81%D1%82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
-<li class="toclevel-1 tocsection-4"><a href="#%D0%A2%D0%B5%D1%81%D1%82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
-<li class="toclevel-1 tocsection-5"><a href="#%D1%82%D0%B5%D1%81%D1%82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
-<li class="toclevel-1 tocsection-6"><a href="#Hey_%3C_%23_%22_%3E_%25_:_%27"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#Тест"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#Тест_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#тест"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_&lt;_#_&quot;_&gt;_%_:_'"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
 </ul>
 </div>
 
@@ -29320,9 +29320,9 @@ wgFragmentMode=[ 'html5', 'legacy' ]
 <h2><span id=".D0.A2.D0.B5.D1.81.D1.82_2"></span><span class="mw-headline" id="Тест_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <h2><span id=".D1.82.D0.B5.D1.81.D1.82"></span><span class="mw-headline" id="тест">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <h2><span id="Hey_.3C_.23_.22_.3E_.25_:_.27"></span><span class="mw-headline" id="Hey_&lt;_#_&quot;_&gt;_%_:_'">Hey &lt; # " &gt;&#160;%&#160;: '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=6" title="Edit section: Hey &lt; # &quot; &gt; % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
-<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#%D0%A2%D0%B5%D1%81%D1%82">#Тест</a> <a href="#%D1%82%D0%B5%D1%81%D1%82">#тест</a> <a href="#Hey_%3C_%23_%22_%3E_%25_:_%27">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
-</p><p>%F0%9F%92%A9 <span id="%F0%9F%92%A9"></span>
-</p><p><a href="#%E5%95%A4%E9%85%92">#啤酒</a> <a href="#%E5%95%A4%E9%85%92">#啤酒</a>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#Тест">#Тест</a> <a href="#тест">#тест</a> <a href="#Hey_&lt;_#_&quot;_&gt;_%_:_'">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
+</p><p>💩 <span id="💩"></span>
+</p><p><a href="#啤酒">#啤酒</a> <a href="#啤酒">#啤酒</a>
 </p>
 !! end
 
@@ -29401,10 +29401,10 @@ wgFragmentMode=[ 'html5' ]
 <ul>
 <li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
 <li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
-<li class="toclevel-1 tocsection-3"><a href="#%D0%A2%D0%B5%D1%81%D1%82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
-<li class="toclevel-1 tocsection-4"><a href="#%D0%A2%D0%B5%D1%81%D1%82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
-<li class="toclevel-1 tocsection-5"><a href="#%D1%82%D0%B5%D1%81%D1%82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
-<li class="toclevel-1 tocsection-6"><a href="#Hey_%3C_%23_%22_%3E_%25_:_%27"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#Тест"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#Тест_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#тест"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_&lt;_#_&quot;_&gt;_%_:_'"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
 </ul>
 </div>
 
@@ -29414,8 +29414,8 @@ wgFragmentMode=[ 'html5' ]
 <h2><span class="mw-headline" id="Тест_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <h2><span class="mw-headline" id="тест">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 <h2><span class="mw-headline" id="Hey_&lt;_#_&quot;_&gt;_%_:_'">Hey &lt; # " &gt;&#160;%&#160;: '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=6" title="Edit section: Hey &lt; # &quot; &gt; % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
-<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#%D0%A2%D0%B5%D1%81%D1%82">#Тест</a> <a href="#%D1%82%D0%B5%D1%81%D1%82">#тест</a> <a href="#Hey_%3C_%23_%22_%3E_%25_:_%27">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
-</p><p>%F0%9F%92%A9 <span id="%F0%9F%92%A9"></span>
-</p><p><a href="#%E5%95%A4%E9%85%92">#啤酒</a> <a href="#%E5%95%A4%E9%85%92">#啤酒</a>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#Тест">#Тест</a> <a href="#тест">#тест</a> <a href="#Hey_&lt;_#_&quot;_&gt;_%_:_'">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
+</p><p>💩 <span id="💩"></span>
+</p><p><a href="#啤酒">#啤酒</a> <a href="#啤酒">#啤酒</a>
 </p>
 !! end
index d506623..7472fb9 100644 (file)
@@ -456,7 +456,6 @@ class SanitizerTest extends MediaWikiTestCase {
                $text = 'foo тест_#%!\'()[]:<>';
                $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E';
                $html5Encoded = 'foo_тест_#%!\'()[]:<>';
-               $html5Escaped = 'foo_%D1%82%D0%B5%D1%81%D1%82_%23%25%21%27%28%29%5B%5D:%3C%3E';
                $html5Experimental = 'foo_тест_!_()[]:<>';
 
                // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
@@ -484,20 +483,20 @@ class SanitizerTest extends MediaWikiTestCase {
                        // New world: HTML5 links, legacy fallbacks
                        [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
                        [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
-                       [ 'Link', $newLegacy, $text, $html5Escaped ],
+                       [ 'Link', $newLegacy, $text, $html5Encoded ],
                        [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
 
                        // Distant future: no legacy fallbacks, but still linking to leagacy wikis
                        [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
                        [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
-                       [ 'Link', $new, $text, $html5Escaped ],
+                       [ 'Link', $new, $text, $html5Encoded ],
                        [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
 
                        // Just before the heat death of universe: external interwikis are also HTML5 \m/
                        [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
                        [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
-                       [ 'Link', $allNew, $text, $html5Escaped ],
-                       [ 'ExternalInterwiki', $allNew, $text, $html5Escaped ],
+                       [ 'Link', $allNew, $text, $html5Encoded ],
+                       [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
 
                        // Someone flipped $wgExperimentalHtmlIds on
                        [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
@@ -508,7 +507,7 @@ class SanitizerTest extends MediaWikiTestCase {
                        // Migration from $wgExperimentalHtmlIds to modern HTML5
                        [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
                        [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
-                       [ 'Link', $newExperimental, $text, $html5Escaped ],
+                       [ 'Link', $newExperimental, $text, $html5Encoded ],
                        [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
                ];
        }
index 2efe9cd..bb27626 100644 (file)
                // Test cases are kept in sync with SanitizerTest.php
                var text = 'foo тест_#%!\'()[]:<>',
                        legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E',
-                       html5Escaped = 'foo_%D1%82%D0%B5%D1%81%D1%82_%23%25!\'()%5B%5D:%3C%3E',
+                       html5Encoded = 'foo_тест_#%!\'()[]:<>',
                        html5Experimental = 'foo_тест_!_()[]:<>',
                        // Settings: this is wgFragmentMode
                        legacy = [ 'legacy' ],
                        // Transition to a new world: legacy links with HTML5 fallback
                        [ legacyNew, text, legacyEncoded ],
                        // New world: HTML5 links, legacy fallbacks
-                       [ newLegacy, text, html5Escaped ],
+                       [ newLegacy, text, html5Encoded ],
                        // Distant future: no legacy fallbacks
-                       [ allNew, text, html5Escaped ],
+                       [ allNew, text, html5Encoded ],
                        // Someone flipped wgExperimentalHtmlIds on
                        [ experimentalLegacy, text, html5Experimental ],
                        // Migration from wgExperimentalHtmlIds to modern HTML5
-                       [ newExperimental, text, html5Escaped ]
+                       [ newExperimental, text, html5Encoded ]
                ], function ( index, testCase ) {
                        mw.config.set( 'wgFragmentMode', testCase[ 0 ] );