Merge "Fix Special:Undelete search - use variable and not request param"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 2 Aug 2017 12:38:28 +0000 (12:38 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 2 Aug 2017 12:38:28 +0000 (12:38 +0000)
31 files changed:
RELEASE-NOTES-1.30
autoload.php
includes/DefaultSettings.php
includes/EditPage.php
includes/Linker.php
includes/Sanitizer.php
includes/Setup.php
includes/Title.php
includes/actions/InfoAction.php
includes/api/ApiMain.php
includes/htmlform/HTMLForm.php
includes/htmlform/HTMLFormField.php
includes/htmlform/OOUIHTMLForm.php
includes/htmlform/fields/HTMLFormFieldCloner.php
includes/htmlform/fields/HTMLRadioField.php
includes/page/ImagePage.php
includes/parser/Parser.php
includes/resourceloader/ResourceLoaderMediaWikiUtilModule.php [new file with mode: 0644]
includes/skins/BaseTemplate.php
includes/skins/Skin.php
includes/specials/SpecialListgrants.php
includes/specials/SpecialListgrouprights.php
includes/specials/SpecialVersion.php
includes/specials/pagers/AllMessagesTablePager.php
resources/Resources.php
resources/src/mediawiki.action/mediawiki.action.edit.preview.js
resources/src/mediawiki/mediawiki.util.js
tests/parser/ParserTestRunner.php
tests/parser/parserTests.txt
tests/phpunit/includes/SanitizerTest.php
tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js

index c5ab81a..452cb35 100644 (file)
@@ -26,6 +26,13 @@ section).
   array. This allows dependency injection to be used for ResourceLoader modules.
 * $wgExceptionHooks has been removed.
 * (T45547) $wgUsePigLatinVariant added (off by default).
+* (T152540) MediaWiki now supports a section ID escaping style that allows to display
+  non-Latin characters verbatim on many modern browsers. This is controlled by the
+  new configuration setting, $wgFragmentMode.
+* $wgExperimentalHtmlIds is now deprecated and will be removed in a future version,
+  use $wgFragmentMode to migrate off it to a modern alternative.
+* $wgExternalInterwikiFragmentMode was introduced to control how fragments in
+  sinterwikis going outside of current wiki farm are encoded.
 
 === New features in 1.30 ===
 * (T37247) Output from Parser::parse() will now be wrapped in a div with
@@ -143,6 +150,14 @@ changes to languages because of Phabricator reports.
   MediaWikiServices instead. Access to the underlying BagOStuff is possible
   through the new ParserCache::getCacheStorage() method.
 * .mw-ui-constructive CSS class (deprecated in 1.27) was removed.
+* Sanitizer::escapeId() was deprecated, use escapeIdForAttribute(),
+  escapeIdForLink() or escapeIdForExternalInterwiki() instead.
+* Title::escapeFragmentForURL() was deprecated, use one of the aforementioned
+  Sanitizer functions or, if possible, Title::getFragmentForURL().
+* Second parameter to Sanitizer::escapeIdReferenceList() ($options) now does
+  nothing and is deprecated.
+* mw.util.escapeId() was deprecated, use escapeIdForAttribute() or
+  escapeIdForLink().
 
 == Compatibility ==
 MediaWiki 1.30 requires PHP 5.5.9 or later. There is experimental support for
index 2bf1d4c..d44a305 100644 (file)
@@ -1231,6 +1231,7 @@ $wgAutoloadLocalClasses = [
        'ResourceLoaderJqueryMsgModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderJqueryMsgModule.php',
        'ResourceLoaderLanguageDataModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderLanguageDataModule.php',
        'ResourceLoaderLanguageNamesModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderLanguageNamesModule.php',
+       'ResourceLoaderMediaWikiUtilModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderMediaWikiUtilModule.php',
        'ResourceLoaderModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderModule.php',
        'ResourceLoaderOOUIFileModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderOOUIFileModule.php',
        'ResourceLoaderOOUIImageModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderOOUIImageModule.php',
index b6d75ce..8e38121 100644 (file)
@@ -3372,16 +3372,56 @@ $wgApiFrameOptions = 'DENY';
 $wgDisableOutputCompression = false;
 
 /**
- * Should we allow a broader set of characters in id attributes, per HTML5?  If
- * not, use only HTML 4-compatible IDs.  This option is for testing -- when the
- * functionality is ready, it will be on by default with no option.
+ * Abandoned experiment with HTML5-style ID escaping. Normalized IDs a bit
+ * too aggressively, breaking preexisting content (particularly Cite).
+ * See T29733, T29694, T29474.
  *
- * Currently this appears to work fine in all browsers, but it's disabled by
- * default because it normalizes id's a bit too aggressively, breaking preexisting
- * content (particularly Cite).  See T29733, T29694, T29474.
+ * @deprecated since 1.30, use $wgFragmentMode
  */
 $wgExperimentalHtmlIds = false;
 
+/**
+ * How should section IDs be encoded?
+ * This array can contain 1 or 2 elements, each of them can be one of:
+ * - 'html5'  is modern HTML5 style encoding with minimal escaping. Allows to
+ *            display Unicode characters in many browsers' address bars.
+ * - 'legacy' is old MediaWiki-style encoding, e.g. 啤酒 turns into .E5.95.A4.E9.85.92
+ * - 'html5-legacy' corresponds to DEPRECATED $wgExperimentalHtmlIds mode. DO NOT use
+ *            it for anything but migration off that mode (see below).
+ *
+ * The first element of this array specifies the primary mode of escaping IDs. This
+ * is what users will see when they e.g. follow an [[#internal link]] to a section of
+ * a page.
+ *
+ * The optional second element defines a fallback mode, useful for migrations.
+ * If present, it will direct MediaWiki to add empty <span>s to every section with its
+ * id attribute set to fallback encoded title so that links using the previous encoding
+ * would still work.
+ *
+ * Example: you want to migrate your wiki from 'legacy' to 'html5'
+ *
+ * On the first step, set this variable to [ 'legacy', 'html5' ]. After a while, when
+ * all caches (parser, HTTP, etc.) contain only pages generated with this setting,
+ * flip the value to [ 'html5', 'legacy' ]. This will result in all internal links being
+ * generated in the new encoding while old links (both external and cached internal) will
+ * still work. After a long time, you might want to ditch backwards compatibility and
+ * set it to [ 'html5' ]. After all, pages get edited, breaking incoming links no matter which
+ * fragment mode is used.
+ *
+ * @since 1.30
+ */
+$wgFragmentMode = [ 'legacy' ];
+
+/**
+ * Which ID escaping mode should be used for external interwiki links? See documentation
+ * for $wgFragmentMode above for details of each mode. Because you can't control external sites,
+ * this setting should probably always be 'legacy', unless every wiki you link to has converted
+ * to 'html5'.
+ *
+ * @since 1.30
+ */
+$wgExternalInterwikiFragmentMode = 'legacy';
+
 /**
  * Abstract list of footer icons for skins in place of old copyrightico and poweredbyico code
  * You can add new icons to the built in copyright or poweredby, or you can create
index fc77006..9d83fbd 100644 (file)
@@ -1698,7 +1698,7 @@ class EditPage {
                global $wgParser;
 
                if ( $this->sectiontitle !== '' ) {
-                       $sectionanchor = $wgParser->guessLegacySectionNameFromWikiText( $this->sectiontitle );
+                       $sectionanchor = $this->guessSectionName( $this->sectiontitle );
                        // If no edit summary was specified, create one automatically from the section
                        // title and have it link to the new section. Otherwise, respect the summary as
                        // passed.
@@ -1708,7 +1708,7 @@ class EditPage {
                                        ->rawParams( $cleanSectionTitle )->inContentLanguage()->text();
                        }
                } elseif ( $this->summary !== '' ) {
-                       $sectionanchor = $wgParser->guessLegacySectionNameFromWikiText( $this->summary );
+                       $sectionanchor = $this->guessSectionName( $this->summary );
                        # This is a new section, so create a link to the new section
                        # in the revision summary.
                        $cleanSummary = $wgParser->stripSectionName( $this->summary );
@@ -1743,7 +1743,7 @@ class EditPage {
         * time.
         */
        public function internalAttemptSave( &$result, $bot = false ) {
-               global $wgUser, $wgRequest, $wgParser, $wgMaxArticleSize;
+               global $wgUser, $wgRequest, $wgMaxArticleSize;
                global $wgContentHandlerUseDB;
 
                $status = Status::newGood();
@@ -2117,7 +2117,7 @@ class EditPage {
                                # We can't deal with anchors, includes, html etc in the header for now,
                                # headline would need to be parsed to improve this.
                                if ( $hasmatch && strlen( $matches[2] ) > 0 ) {
-                                       $sectionanchor = $wgParser->guessLegacySectionNameFromWikiText( $matches[2] );
+                                       $sectionanchor = $this->guessSectionName( $matches[2] );
                                }
                        }
                        $result['sectionanchor'] = $sectionanchor;
@@ -4795,4 +4795,27 @@ HTML
                }
                return $wikitext;
        }
+
+       /**
+        * Turns section name wikitext into anchors for use in HTTP redirects. Various
+        * versions of Microsoft browsers misinterpret fragment encoding of Location: headers
+        * resulting in mojibake in address bar. Redirect them to legacy section IDs,
+        * if possible. All the other browsers get HTML5 if the wiki is configured for it, to
+        * spread the new style links more efficiently.
+        *
+        * @param string $text
+        * @return string
+        */
+       private function guessSectionName( $text ) {
+               global $wgParser;
+
+               // Detect Microsoft browsers
+               $userAgent = $this->context->getRequest()->getHeader( 'User-Agent' );
+               if ( $userAgent && preg_match( '/MSIE|Edge/', $userAgent ) ) {
+                       // ...and redirect them to legacy encoding, if available
+                       return $wgParser->guessLegacySectionNameFromWikiText( $text );
+               }
+               // Meanwhile, real browsers get real anchors
+               return $wgParser->guessSectionNameFromWikiText( $text );
+       }
 }
index 4aae3ba..2ca851c 100644 (file)
@@ -1608,22 +1608,24 @@ class Linker {
         *   a space and ending with '>'
         *   This *must* be at least '>' for no attribs
         * @param string $anchor The anchor to give the headline (the bit after the #)
-        * @param string $html Html for the text of the header
+        * @param string $html HTML for the text of the header
         * @param string $link HTML to add for the section edit link
-        * @param bool|string $legacyAnchor A second, optional anchor to give for
+        * @param string|bool $fallbackAnchor A second, optional anchor to give for
         *   backward compatibility (false to omit)
         *
         * @return string HTML headline
         */
        public static function makeHeadline( $level, $attribs, $anchor, $html,
-               $link, $legacyAnchor = false
+               $link, $fallbackAnchor = false
        ) {
+               $anchorEscaped = htmlspecialchars( $anchor );
                $ret = "<h$level$attribs"
-                       . "<span class=\"mw-headline\" id=\"$anchor\">$html</span>"
+                       . "<span class=\"mw-headline\" id=\"$anchorEscaped\">$html</span>"
                        . $link
                        . "</h$level>";
-               if ( $legacyAnchor !== false ) {
-                       $ret = "<div id=\"$legacyAnchor\"></div>$ret";
+               if ( $fallbackAnchor !== false && $fallbackAnchor !== $anchor ) {
+                       $fallbackAnchor = htmlspecialchars( $fallbackAnchor );
+                       $ret = "<div id=\"$fallbackAnchor\"></div>$ret";
                }
                return $ret;
        }
index 2def06a..907da16 100644 (file)
@@ -56,6 +56,21 @@ class Sanitizer {
        const EVIL_URI_PATTERN = '!(^|\s|\*/\s*)(javascript|vbscript)([^\w]|$)!i';
        const XMLNS_ATTRIBUTE_PATTERN = "/^xmlns:[:A-Z_a-z-.0-9]+$/";
 
+       /**
+        * Tells escapeUrlForHtml() to encode the ID using the wiki's primary encoding.
+        *
+        * @since 1.30
+        */
+       const ID_PRIMARY = 0;
+
+       /**
+        * Tells escapeUrlForHtml() to encode the ID using the fallback encoding, or return false
+        * if no fallback is configured.
+        *
+        * @since 1.30
+        */
+       const ID_FALLBACK = 1;
+
        /**
         * List of all named character entities defined in HTML 4.01
         * https://www.w3.org/TR/html4/sgml/entities.html
@@ -800,7 +815,7 @@ class Sanitizer {
 
                        # Escape HTML id attributes
                        if ( $attribute === 'id' ) {
-                               $value = self::escapeId( $value, 'noninitial' );
+                               $value = self::escapeIdForAttribute( $value, Sanitizer::ID_PRIMARY );
                        }
 
                        # Escape HTML id reference lists
@@ -1164,6 +1179,8 @@ class Sanitizer {
         * ambiguous if it's part of something that looks like a percent escape
         * (which don't work reliably in fragments cross-browser).
         *
+        * @deprecated since 1.30, use one of this class' escapeIdFor*() functions
+        *
         * @see https://www.w3.org/TR/html401/types.html#type-name Valid characters
         *   in the id and name attributes
         * @see https://www.w3.org/TR/html401/struct/links.html#h-12.2.3 Anchors with
@@ -1215,21 +1232,146 @@ class Sanitizer {
                return $id;
        }
 
+       /**
+        * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+        * a valid HTML id attribute.
+        *
+        * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe,
+        * be sure to use proper escaping.
+        *
+        * @param string $id String to escape
+        * @param int $mode One of ID_* constants, specifying whether the primary or fallback encoding
+        *     should be used.
+        * @return string|bool Escaped ID or false if fallback encoding is requested but it's not
+        *     configured.
+        *
+        * @since 1.30
+        */
+       public static function escapeIdForAttribute( $id, $mode = self::ID_PRIMARY ) {
+               global $wgFragmentMode;
+
+               if ( !isset( $wgFragmentMode[$mode] ) ) {
+                       if ( $mode === self::ID_PRIMARY ) {
+                               throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' );
+                       }
+                       return false;
+               }
+
+               $internalMode = $wgFragmentMode[$mode];
+
+               return self::escapeIdInternal( $id, $internalMode );
+       }
+
+       /**
+        * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+        * a valid URL fragment.
+        *
+        * WARNING: unlike escapeId(), the output of this function is not guaranteed to be HTML safe,
+        * be sure to use proper escaping.
+        *
+        * @param string $id String to escape
+        * @return string Escaped ID
+        *
+        * @since 1.30
+        */
+       public static function escapeIdForLink( $id ) {
+               global $wgFragmentMode;
+
+               if ( !isset( $wgFragmentMode[self::ID_PRIMARY] ) ) {
+                       throw new UnexpectedValueException( '$wgFragmentMode is configured with no primary mode' );
+               }
+
+               $mode = $wgFragmentMode[self::ID_PRIMARY];
+
+               $id = self::escapeIdInternal( $id, $mode );
+               $id = self::urlEscapeId( $id, $mode );
+
+               return $id;
+       }
+
+       /**
+        * Given a section name or other user-generated or otherwise unsafe string, escapes it to be
+        * a valid URL fragment for external interwikis.
+        *
+        * @param string $id String to escape
+        * @return string Escaped ID
+        *
+        * @since 1.30
+        */
+       public static function escapeIdForExternalInterwiki( $id ) {
+               global $wgExternalInterwikiFragmentMode;
+
+               $id = self::escapeIdInternal( $id, $wgExternalInterwikiFragmentMode );
+               $id = self::urlEscapeId( $id, $wgExternalInterwikiFragmentMode );
+
+               return $id;
+       }
+
+       /**
+        * Helper for escapeIdFor*() functions. URL-escapes the ID if needed.
+        *
+        * @param string $id String to escape
+        * @param string $mode One of modes from $wgFragmentMode
+        * @return string
+        */
+       private static function urlEscapeId( $id, $mode ) {
+               if ( $mode === 'html5' ) {
+                       $id = urlencode( $id );
+                       $id = str_replace( '%3A', ':', $id );
+               }
+
+               return $id;
+       }
+
+       /**
+        * Helper for escapeIdFor*() functions. Performs most of the actual escaping.
+        *
+        * @param string $id String to escape
+        * @param string $mode One of modes from $wgFragmentMode
+        * @return string
+        */
+       private static function escapeIdInternal( $id, $mode ) {
+               $id = Sanitizer::decodeCharReferences( $id );
+
+               switch ( $mode ) {
+                       case 'html5':
+                               $id = str_replace( ' ', '_', $id );
+                               break;
+                       case 'legacy':
+                               // This corresponds to 'noninitial' mode of the old escapeId()
+                               static $replace = [
+                                       '%3A' => ':',
+                                       '%' => '.'
+                               ];
+
+                               $id = urlencode( str_replace( ' ', '_', $id ) );
+                               $id = strtr( $id, $replace );
+                               break;
+                       case 'html5-legacy':
+                               $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
+                               $id = trim( $id, '_' );
+                               if ( $id === '' ) {
+                                       // Must have been all whitespace to start with.
+                                       $id = '_';
+                               }
+                               break;
+                       default:
+                               throw new InvalidArgumentException( "Invalid mode '$mode' passed to '" . __METHOD__ );
+               }
+
+               return $id;
+       }
+
        /**
         * Given a string containing a space delimited list of ids, escape each id
         * to match ids escaped by the escapeId() function.
         *
+        * @todo wfDeprecated() uses of $options in 1.31, remove completely in 1.32
+        *
         * @since 1.27
         *
         * @param string $referenceString Space delimited list of ids
-        * @param string|array $options String or array of strings (default is array()):
-        *   'noninitial': This is a non-initial fragment of an id, not a full id,
-        *       so don't pay attention if the first character isn't valid at the
-        *       beginning of an id.  Only matters if $wgExperimentalHtmlIds is
-        *       false.
-        *   'legacy': Behave the way the old HTML 4-based ID escaping worked even
-        *       if $wgExperimentalHtmlIds is used, so we can generate extra
-        *       anchors and links won't break.
+        * @param string|array $options Deprecated and does nothing.
         * @return string
         */
        static function escapeIdReferenceList( $referenceString, $options = [] ) {
@@ -1238,7 +1380,7 @@ class Sanitizer {
 
                # Escape each token as an id
                foreach ( $references as &$ref ) {
-                       $ref = self::escapeId( $ref, $options );
+                       $ref = self::escapeIdForAttribute( $ref );
                }
 
                # Merge the array back to a space delimited list string
index 3d5bee2..68e3d96 100644 (file)
@@ -282,6 +282,11 @@ foreach ( $wgForeignFileRepos as &$repo ) {
 }
 unset( $repo ); // no global pollution; destroy reference
 
+// Convert this deprecated setting to modern system
+if ( $wgExperimentalHtmlIds ) {
+       $wgFragmentMode = [ 'html5-legacy', 'legacy' ];
+}
+
 $rcMaxAgeDays = $wgRCMaxAge / ( 3600 * 24 );
 if ( $wgRCFilterByAge ) {
        // Trim down $wgRCLinkDays so that it only lists links which are valid
index 0a2f868..7b3e3a5 100644 (file)
@@ -748,6 +748,8 @@ class Title implements LinkTarget {
        /**
         * Escape a text fragment, say from a link, for a URL
         *
+        * @deprecated since 1.30, use Sanitizer::escapeIdForLink() or escapeIdForExternalInterwiki()
+        *
         * @param string $fragment Containing a URL or link fragment (after the "#")
         * @return string Escaped string
         */
@@ -1397,14 +1399,16 @@ class Title implements LinkTarget {
 
        /**
         * Get the fragment in URL form, including the "#" character if there is one
+        *
         * @return string Fragment in URL form
         */
        public function getFragmentForURL() {
                if ( !$this->hasFragment() ) {
                        return '';
-               } else {
-                       return '#' . self::escapeFragmentForURL( $this->getFragment() );
+               } elseif ( $this->isExternal() && !$this->getTransWikiID() ) {
+                       return '#' . Sanitizer::escapeIdForExternalInterwiki( $this->getFragment() );
                }
+               return '#' . Sanitizer::escapeIdForLink( $this->getFragment() );
        }
 
        /**
index baec944..68dda37 100644 (file)
@@ -156,8 +156,8 @@ class InfoAction extends FormlessAction {
         * @return string The HTML.
         */
        protected function makeHeader( $header, $canonicalId ) {
-               $spanAttribs = [ 'class' => 'mw-headline', 'id' => Sanitizer::escapeId( $header ) ];
-               $h2Attribs = [ 'id' => Sanitizer::escapeId( $canonicalId ) ];
+               $spanAttribs = [ 'class' => 'mw-headline', 'id' => Sanitizer::escapeIdForAttribute( $header ) ];
+               $h2Attribs = [ 'id' => Sanitizer::escapeIdForAttribute( $canonicalId ) ];
 
                return Html::rawElement( 'h2', $h2Attribs, Html::element( 'span', $spanAttribs, $header ) );
        }
index b7d4529..6468235 100644 (file)
@@ -1931,14 +1931,15 @@ class ApiMain extends ApiBase {
 
                        $header = $this->msg( 'api-help-datatypes-header' )->parse();
 
-                       // Add an additional span with sanitized ID
-                       if ( !$this->getConfig()->get( 'ExperimentalHtmlIds' ) ) {
-                               $header = Html::element( 'span', [ 'id' => Sanitizer::escapeId( 'main/datatypes' ) ] ) .
-                                       $header;
-                       }
-                       $help['datatypes'] .= Html::rawElement( 'h' . min( 6, $level ),
-                               [ 'id' => 'main/datatypes', 'class' => 'apihelp-header' ],
-                               $header
+                       $id = Sanitizer::escapeIdForAttribute( 'main/datatypes', Sanitizer::ID_PRIMARY );
+                       $idFallback = Sanitizer::escapeIdForAttribute( 'main/datatypes', Sanitizer::ID_FALLBACK );
+
+                       $help['datatypes'] .= Linker::makeHeadline( min( 6, $level ),
+                               ' class="apihelp-header"',
+                               $id,
+                               $header,
+                               '',
+                               $idFallback
                        );
                        $help['datatypes'] .= $this->msg( 'api-help-datatypes' )->parseAsBlock();
                        if ( !isset( $tocData['main/datatypes'] ) ) {
@@ -1953,15 +1954,15 @@ class ApiMain extends ApiBase {
                                ];
                        }
 
-                       // Add an additional span with sanitized ID
-                       if ( !$this->getConfig()->get( 'ExperimentalHtmlIds' ) ) {
-                               $header = Html::element( 'span', [ 'id' => Sanitizer::escapeId( 'main/credits' ) ] ) .
-                                       $header;
-                       }
                        $header = $this->msg( 'api-credits-header' )->parse();
-                       $help['credits'] .= Html::rawElement( 'h' . min( 6, $level ),
-                               [ 'id' => 'main/credits', 'class' => 'apihelp-header' ],
-                               $header
+                       $id = Sanitizer::escapeIdForAttribute( 'main/credits', Sanitizer::ID_PRIMARY );
+                       $idFallback = Sanitizer::escapeIdForAttribute( 'main/credits', Sanitizer::ID_FALLBACK );
+                       $help['credits'] .= Linker::makeHeadline( min( 6, $level ),
+                               ' class="apihelp-header"',
+                               $id,
+                               $header,
+                               '',
+                               $idFallback
                        );
                        $help['credits'] .= $this->msg( 'api-credits' )->useDatabase( false )->parseAsBlock();
                        if ( !isset( $tocData['main/credits'] ) ) {
index d4351e0..702c2eb 100644 (file)
@@ -1692,7 +1692,7 @@ class HTMLForm extends ContextSource {
 
                                        $attributes = [];
                                        if ( $fieldsetIDPrefix ) {
-                                               $attributes['id'] = Sanitizer::escapeId( "$fieldsetIDPrefix$key" );
+                                               $attributes['id'] = Sanitizer::escapeIdForAttribute( "$fieldsetIDPrefix$key" );
                                        }
                                        $subsectionHtml .= $this->wrapFieldSetSection( $legend, $section, $attributes );
                                } else {
@@ -1741,7 +1741,7 @@ class HTMLForm extends ContextSource {
                ];
 
                if ( $sectionName ) {
-                       $attribs['id'] = Sanitizer::escapeId( $sectionName );
+                       $attribs['id'] = Sanitizer::escapeIdForAttribute( $sectionName );
                }
 
                if ( $displayFormat === 'table' ) {
index 7cb83e2..77ddc1a 100644 (file)
@@ -416,8 +416,8 @@ abstract class HTMLFormField {
                        $this->mDir = $params['dir'];
                }
 
-               $validName = Sanitizer::escapeId( $this->mName );
-               $validName = str_replace( [ '.5B', '.5D' ], [ '[', ']' ], $validName );
+               $validName = urlencode( $this->mName );
+               $validName = str_replace( [ '%5B', '%5D' ], [ '[', ']' ], $validName );
                if ( $this->mName != $validName && !isset( $params['nodata'] ) ) {
                        throw new MWException( "Invalid name '{$this->mName}' passed to " . __METHOD__ );
                }
@@ -430,7 +430,7 @@ abstract class HTMLFormField {
 
                if ( isset( $params['id'] ) ) {
                        $id = $params['id'];
-                       $validId = Sanitizer::escapeId( $id );
+                       $validId = urlencode( $id );
 
                        if ( $id != $validId ) {
                                throw new MWException( "Invalid id '$id' passed to " . __METHOD__ );
index 9dd37b3..e47de61 100644 (file)
@@ -180,7 +180,7 @@ class OOUIHTMLForm extends HTMLForm {
                        'items' => $fieldsHtml,
                ];
                if ( $sectionName ) {
-                       $config['id'] = Sanitizer::escapeId( $sectionName );
+                       $config['id'] = Sanitizer::escapeIdForAttribute( $sectionName );
                }
                if ( is_string( $this->mWrapperLegend ) ) {
                        $config['label'] = $this->mWrapperLegend;
index dd9184b..53c6835 100644 (file)
@@ -93,9 +93,9 @@ class HTMLFormFieldCloner extends HTMLFormField {
                                $info['name'] = $name;
                        }
                        if ( isset( $info['id'] ) ) {
-                               $info['id'] = Sanitizer::escapeId( "{$this->mID}--$key--{$info['id']}" );
+                               $info['id'] = Sanitizer::escapeIdForAttribute( "{$this->mID}--$key--{$info['id']}" );
                        } else {
-                               $info['id'] = Sanitizer::escapeId( "{$this->mID}--$key--$fieldname" );
+                               $info['id'] = Sanitizer::escapeIdForAttribute( "{$this->mID}--$key--$fieldname" );
                        }
                        // Copy the hide-if rules to "child" fields, so that the JavaScript code handling them
                        // (resources/src/mediawiki/htmlform/hide-if.js) doesn't have to handle nested fields.
@@ -313,7 +313,7 @@ class HTMLFormFieldCloner extends HTMLFormField {
                                'type' => 'submit',
                                'formnovalidate' => true,
                                'name' => $name,
-                               'id' => Sanitizer::escapeId( "{$this->mID}--$key--delete" ),
+                               'id' => Sanitizer::escapeIdForAttribute( "{$this->mID}--$key--delete" ),
                                'cssclass' => 'mw-htmlform-cloner-delete-button',
                                'default' => $this->getMessage( $label )->text(),
                        ], $this->mParent );
@@ -386,7 +386,7 @@ class HTMLFormFieldCloner extends HTMLFormField {
                        'type' => 'submit',
                        'formnovalidate' => true,
                        'name' => $name,
-                       'id' => Sanitizer::escapeId( "{$this->mID}--create" ),
+                       'id' => Sanitizer::escapeIdForAttribute( "{$this->mID}--create" ),
                        'cssclass' => 'mw-htmlform-cloner-create-button',
                        'default' => $this->getMessage( $label )->text(),
                ], $this->mParent );
index 06ec372..77ea7cd 100644 (file)
@@ -90,7 +90,7 @@ class HTMLRadioField extends HTMLFormField {
                                $html .= Html::rawElement( 'h1', [], $label ) . "\n";
                                $html .= $this->formatOptions( $info, $value );
                        } else {
-                               $id = Sanitizer::escapeId( $this->mID . "-$info" );
+                               $id = Sanitizer::escapeIdForAttribute( $this->mID . "-$info" );
                                $classes = [ 'mw-htmlform-flatlist-item' ];
                                if ( $wgUseMediaWikiUIEverywhere || $this->mParent instanceof VFormHTMLForm ) {
                                        $classes[] = 'mw-ui-radio';
index d37700b..b870831 100644 (file)
@@ -254,15 +254,16 @@ class ImagePage extends Article {
                $r .= "<table id=\"mw_metadata\" class=\"mw_metadata\">\n";
                foreach ( $metadata as $type => $stuff ) {
                        foreach ( $stuff as $v ) {
-                               # @todo FIXME: Why is this using escapeId for a class?!
-                               $class = Sanitizer::escapeId( $v['id'] );
+                               $class = str_replace( ' ', '_', $v['id'] );
                                if ( $type == 'collapsed' ) {
                                        // Handled by mediawiki.action.view.metadata module.
                                        $class .= ' collapsable';
                                }
-                               $r .= "<tr class=\"$class\">\n";
-                               $r .= "<th>{$v['name']}</th>\n";
-                               $r .= "<td>{$v['value']}</td>\n</tr>";
+                               $r .= Html::rawElement( 'tr',
+                                       [ 'class' => $class ],
+                                       Html::rawElement( 'th', [], $v['name'] )
+                                               . Html::rawElement( 'td', [], $v['value'] )
+                               );
                        }
                }
                $r .= "</table>\n</div>\n";
index b035b02..88439db 100644 (file)
@@ -4035,7 +4035,7 @@ class Parser {
         * @private
         */
        public function formatHeadings( $text, $origText, $isMain = true ) {
-               global $wgMaxTocLevel, $wgExperimentalHtmlIds;
+               global $wgMaxTocLevel;
 
                # Inhibit editsection links if requested in the page
                if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
@@ -4229,61 +4229,44 @@ class Parser {
                        # Save headline for section edit hint before it's escaped
                        $headlineHint = $safeHeadline;
 
-                       if ( $wgExperimentalHtmlIds ) {
-                               # For reverse compatibility, provide an id that's
-                               # HTML4-compatible, like we used to.
-                               # It may be worth noting, academically, that it's possible for
-                               # the legacy anchor to conflict with a non-legacy headline
-                               # anchor on the page.  In this case likely the "correct" thing
-                               # would be to either drop the legacy anchors or make sure
-                               # they're numbered first.  However, this would require people
-                               # to type in section names like "abc_.D7.93.D7.90.D7.A4"
-                               # manually, so let's not bother worrying about it.
-                               $legacyHeadline = Sanitizer::escapeId( $safeHeadline,
-                                       [ 'noninitial', 'legacy' ] );
-                               $safeHeadline = Sanitizer::escapeId( $safeHeadline );
-
-                               if ( $legacyHeadline == $safeHeadline ) {
-                                       # No reason to have both (in fact, we can't)
-                                       $legacyHeadline = false;
-                               }
-                       } else {
-                               $legacyHeadline = false;
-                               $safeHeadline = Sanitizer::escapeId( $safeHeadline,
-                                       'noninitial' );
+                       $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
+                       $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
+                       $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
+                       if ( $fallbackHeadline === $safeHeadline ) {
+                               # No reason to have both (in fact, we can't)
+                               $fallbackHeadline = false;
                        }
 
-                       # HTML names must be case-insensitively unique (T12721).
-                       # This does not apply to Unicode characters per
-                       # https://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
+                       # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
                        # @todo FIXME: We may be changing them depending on the current locale.
                        $arrayKey = strtolower( $safeHeadline );
-                       if ( $legacyHeadline === false ) {
-                               $legacyArrayKey = false;
+                       if ( $fallbackHeadline === false ) {
+                               $fallbackArrayKey = false;
                        } else {
-                               $legacyArrayKey = strtolower( $legacyHeadline );
+                               $fallbackArrayKey = strtolower( $fallbackHeadline );
                        }
 
                        # Create the anchor for linking from the TOC to the section
                        $anchor = $safeHeadline;
-                       $legacyAnchor = $legacyHeadline;
+                       $fallbackAnchor = $fallbackHeadline;
                        if ( isset( $refers[$arrayKey] ) ) {
                                // @codingStandardsIgnoreStart
                                for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
                                // @codingStandardsIgnoreEnd
                                $anchor .= "_$i";
+                               $linkAnchor .= "_$i";
                                $refers["${arrayKey}_$i"] = true;
                        } else {
                                $refers[$arrayKey] = true;
                        }
-                       if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
+                       if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
                                // @codingStandardsIgnoreStart
-                               for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
+                               for ( $i = 2; isset( $refers["${fallbackArrayKey}_$i"] ); ++$i );
                                // @codingStandardsIgnoreEnd
-                               $legacyAnchor .= "_$i";
-                               $refers["${legacyArrayKey}_$i"] = true;
+                               $fallbackAnchor .= "_$i";
+                               $refers["${fallbackArrayKey}_$i"] = true;
                        } else {
-                               $refers[$legacyArrayKey] = true;
+                               $refers[$fallbackArrayKey] = true;
                        }
 
                        # Don't number the heading if it is the only one (looks silly)
@@ -4297,7 +4280,7 @@ class Parser {
                        }
 
                        if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
-                               $toc .= Linker::tocLine( $anchor, $tocline,
+                               $toc .= Linker::tocLine( $linkAnchor, $tocline,
                                        $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
                        }
 
@@ -4364,7 +4347,7 @@ class Parser {
                        }
                        $head[$headlineCount] = Linker::makeHeadline( $level,
                                $matches['attrib'][$headlineCount], $anchor, $headline,
-                               $editlink, $legacyAnchor );
+                               $editlink, $fallbackAnchor );
 
                        $headlineCount++;
                }
@@ -5806,22 +5789,33 @@ class Parser {
                # Strip out wikitext links(they break the anchor)
                $text = $this->stripSectionName( $text );
                $text = Sanitizer::normalizeSectionNameWhitespace( $text );
-               return '#' . Sanitizer::escapeId( $text, 'noninitial' );
+               return '#' . Sanitizer::escapeIdForLink( $text );
        }
 
        /**
         * Same as guessSectionNameFromWikiText(), but produces legacy anchors
-        * instead.  For use in redirects, since IE6 interprets Redirect: headers
-        * as something other than UTF-8 (apparently?), resulting in breakage.
+        * instead, if possible. For use in redirects, since various versions
+        * of Microsoft browsers interpret Location: headers as something other
+        * than UTF-8, resulting in breakage.
         *
         * @param string $text The section name
         * @return string An anchor
         */
        public function guessLegacySectionNameFromWikiText( $text ) {
+               global $wgFragmentMode;
+
                # Strip out wikitext links(they break the anchor)
                $text = $this->stripSectionName( $text );
                $text = Sanitizer::normalizeSectionNameWhitespace( $text );
-               return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
+
+               if ( isset( $wgFragmentMode[1] ) && $wgFragmentMode[1] === 'legacy' ) {
+                       // ForAttribute() and ForLink() are the same for legacy encoding
+                       $id = Sanitizer::escapeIdForAttribute( $text, Sanitizer::ID_FALLBACK );
+               } else {
+                       $id = Sanitizer::escapeIdForLink( $text );
+               }
+
+               return "#$id";
        }
 
        /**
diff --git a/includes/resourceloader/ResourceLoaderMediaWikiUtilModule.php b/includes/resourceloader/ResourceLoaderMediaWikiUtilModule.php
new file mode 100644 (file)
index 0000000..1fe3434
--- /dev/null
@@ -0,0 +1,46 @@
+<?php
+/**
+ * ResourceLoader mediawiki.util module
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * ResourceLoader module for mediawiki.util
+ *
+ * @since 1.30
+ */
+class ResourceLoaderMediaWikiUtilModule extends ResourceLoaderFileModule {
+       /**
+        * @inheritdoc
+        */
+       public function getScript( ResourceLoaderContext $context ) {
+               return ResourceLoader::makeConfigSetScript(
+                               [ 'wgFragmentMode' => $this->getConfig()->get( 'FragmentMode' ) ]
+                       )
+                       . "\n"
+                       . parent::getScript( $context );
+       }
+
+       /**
+        * @inheritdoc
+        */
+       public function enableModuleContentVersion() {
+               return true;
+       }
+}
index 0b7fc2f..aad676f 100644 (file)
@@ -678,7 +678,7 @@ abstract class BaseTemplate extends QuickTemplate {
                }
                foreach ( $validFooterIcons as $blockName => $footerIcons ) {
                        $html .= Html::openElement( 'div', [
-                               'id' => 'f-' . Sanitizer::escapeId( $blockName ) . 'ico',
+                               'id' => Sanitizer::escapeIdForAttribute( "f-{$blockName}ico" ),
                                'class' => 'footer-icons'
                        ] );
                        foreach ( $footerIcons as $icon ) {
@@ -691,7 +691,7 @@ abstract class BaseTemplate extends QuickTemplate {
                        foreach ( $validFooterLinks as $aLink ) {
                                $html .= Html::rawElement(
                                        'li',
-                                       [ 'id' => Sanitizer::escapeId( $aLink ) ],
+                                       [ 'id' => Sanitizer::escapeIdForAttribute( $aLink ) ],
                                        $this->get( $aLink )
                                );
                        }
@@ -734,7 +734,7 @@ abstract class BaseTemplate extends QuickTemplate {
                        $out .= Html::rawElement(
                                'div',
                                [
-                                       'id' => Sanitizer::escapeId( "mw-indicator-$id" ),
+                                       'id' => Sanitizer::escapeIdForAttribute( "mw-indicator-$id" ),
                                        'class' => 'mw-indicator',
                                ],
                                $content
index 40905a5..849362a 100644 (file)
@@ -1375,8 +1375,8 @@ abstract class Skin extends ContextSource {
                                        $bar[$heading][] = array_merge( [
                                                'text' => $text,
                                                'href' => $href,
-                                               'id' => 'n-' . Sanitizer::escapeId( strtr( $line[1], ' ', '-' ), 'noninitial' ),
-                                               'active' => false
+                                               'id' => Sanitizer::escapeIdForAttribute( 'n-' . strtr( $line[1], ' ', '-' ) ),
+                                               'active' => false,
                                        ], $extraAttribs );
                                } else {
                                        continue;
index 2c92410..1a04eec 100644 (file)
@@ -69,7 +69,7 @@ class SpecialListGrants extends SpecialPage {
                                $grantCellHtml = '<ul><li>' . implode( "</li>\n<li>", $descs ) . '</li></ul>';
                        }
 
-                       $id = \Sanitizer::escapeId( $grant );
+                       $id = Sanitizer::escapeIdForAttribute( $grant );
                        $out->addHTML( \Html::rawElement( 'tr', [ 'id' => $id ],
                                "<td>" .
                                $this->msg(
index 7a25e55..2315887 100644 (file)
@@ -126,7 +126,7 @@ class SpecialListGroupRights extends SpecialPage {
                                ? $groupsRemoveFromSelf[$group]
                                : [];
 
-                       $id = $group == '*' ? false : Sanitizer::escapeId( $group );
+                       $id = $group == '*' ? false : Sanitizer::escapeIdForAttribute( $group );
                        $out->addHTML( Html::rawElement( 'tr', [ 'id' => $id ], "
                                <td>$grouppage$grouplink</td>
                                        <td>" .
index 30c4a0b..3ea1d03 100644 (file)
@@ -840,7 +840,7 @@ class SpecialVersion extends SpecialPage {
                // Finally! Create the table
                $html = Html::openElement( 'tr', [
                                'class' => 'mw-version-ext',
-                               'id' => Sanitizer::escapeId( 'mw-version-ext-' . $type . '-' . $extension['name'] )
+                               'id' => Sanitizer::escapeIdForAttribute( 'mw-version-ext-' . $type . '-' . $extension['name'] )
                        ]
                );
 
index ca1b7dc..e6a0f0b 100644 (file)
@@ -375,7 +375,9 @@ class AllMessagesTablePager extends TablePager {
                }
 
                if ( !$isSecond ) {
-                       $arr['id'] = Sanitizer::escapeId( 'msg_' . $this->getLanguage()->lcfirst( $row->am_title ) );
+                       $arr['id'] = Sanitizer::escapeIdForAttribute(
+                               'msg_' . $this->getLanguage()->lcfirst( $row->am_title )
+                       );
                }
 
                return $arr;
index 9860328..1663b50 100644 (file)
@@ -1379,6 +1379,7 @@ return [
                ]
        ],
        'mediawiki.util' => [
+               'class' => 'ResourceLoaderMediaWikiUtilModule',
                'scripts' => 'resources/src/mediawiki/mediawiki.util.js',
                'dependencies' => [
                        'jquery.accessKeyLabel',
index 2b6fc9d..706e1fe 100644 (file)
                                        newList.push(
                                                $( '<div>' )
                                                        .addClass( 'mw-indicator' )
-                                                       .attr( 'id', mw.util.escapeId( 'mw-indicator-' + name ) )
+                                                       .attr( 'id', mw.util.escapeIdForAttribute( 'mw-indicator-' + name ) )
                                                        .html( indicator )
                                                        .get( 0 ),
                                                // Add a whitespace between the <div>s because
index 4844e21..d0ec585 100644 (file)
@@ -1,12 +1,59 @@
 ( function ( mw, $ ) {
        'use strict';
 
+       var util;
+
+       /**
+        * Encode the string like PHP's rawurlencode
+        * @ignore
+        *
+        * @param {string} str String to be encoded.
+        * @return {string} Encoded string
+        */
+       function rawurlencode( str ) {
+               str = String( str );
+               return encodeURIComponent( str )
+                       .replace( /!/g, '%21' ).replace( /'/g, '%27' ).replace( /\(/g, '%28' )
+                       .replace( /\)/g, '%29' ).replace( /\*/g, '%2A' ).replace( /~/g, '%7E' );
+       }
+
+       /**
+        * Private helper function used by util.escapeId*()
+        * @ignore
+        *
+        * @param {string} str String to be encoded
+        * @param {string} mode Encoding mode, see documentation for $wgFragmentMode
+        *     in DefaultSettings.php
+        * @return {string} Encoded string
+        */
+       function escapeIdInternal( str, mode ) {
+               str = String( str );
+
+               switch ( mode ) {
+                       case 'html5':
+                               return str.replace( / /g, '_' );
+                       case 'html5-legacy':
+                               str = str.replace( /[ \t\n\r\f_'"&#%]+/g, '_' )
+                                       .replace( /^_+|_+$/, '' );
+                               if ( str === '' ) {
+                                       str = '_';
+                               }
+                               return str;
+                       case 'legacy':
+                               return rawurlencode( str.replace( / /g, '_' ) )
+                                       .replace( /%3A/g, ':' )
+                                       .replace( /%/g, '.' );
+                       default:
+                               throw new Error( 'Unrecognized ID escaping mode ' + mode );
+               }
+       }
+
        /**
         * Utility library
         * @class mw.util
         * @singleton
         */
-       var util = {
+       util = {
 
                /* Main body */
 
                 * @param {string} str String to be encoded.
                 * @return {string} Encoded string
                 */
-               rawurlencode: function ( str ) {
-                       str = String( str );
-                       return encodeURIComponent( str )
-                               .replace( /!/g, '%21' ).replace( /'/g, '%27' ).replace( /\(/g, '%28' )
-                               .replace( /\)/g, '%29' ).replace( /\*/g, '%2A' ).replace( /~/g, '%7E' );
-               },
+               rawurlencode: rawurlencode,
 
                /**
-                * Encode the string like Sanitizer::escapeId in PHP
+                * Encode the string like Sanitizer::escapeId() in PHP
+                * @deprecated since 1.30 use escapeIdForAttribute() or escapeIdForLink()
                 *
                 * @param {string} str String to be encoded.
                 * @return {string} Encoded string
                 */
                escapeId: function ( str ) {
-                       str = String( str );
-                       return util.rawurlencode( str.replace( / /g, '_' ) )
-                               .replace( /%3A/g, ':' )
-                               .replace( /%/g, '.' );
+                       return escapeIdInternal( str, 'legacy' );
+               },
+
+               /**
+                * Encode string into HTML id compatible form suitable for use in HTML
+                * Analog to PHP Sanitizer::escapeIdForAttribute()
+                *
+                * @since 1.30
+                *
+                * @param {string} str String to encode
+                * @return {string} Encoded string
+                */
+               escapeIdForAttribute: function ( str ) {
+                       var mode = mw.config.get( 'wgFragmentMode' )[ 0 ];
+
+                       return escapeIdInternal( str, mode );
+               },
+
+               /**
+                * Encode string into HTML id compatible form suitable for use in links
+                * Analog to PHP Sanitizer::escapeIdForLink()
+                *
+                * @since 1.30
+                *
+                * @param {string} str String to encode
+                * @return {string} Encoded string
+                */
+               escapeIdForLink: function ( str ) {
+                       var mode = mw.config.get( 'wgFragmentMode' )[ 0 ],
+                               id = escapeIdInternal( str, mode );
+
+                       if ( mode === 'html5' ) {
+                               id = encodeURIComponent( id ).replace( /%3A/g, ':' );
+                       }
+
+                       return id;
                },
 
                /**
 
                        // Append the encoded fragment
                        if ( fragment.length ) {
-                               url += '#' + util.escapeId( fragment );
+                               url += '#' + util.escapeIdForLink( fragment );
                        }
 
                        return url;
index feed77f..e578418 100644 (file)
@@ -1055,6 +1055,8 @@ class ParserTestRunner {
                        // wgEnableMagicLinks={"ISBN":false, "PMID":false, "RFC":false}
                        'wgEnableMagicLinks' => self::getOptionValue( 'wgEnableMagicLinks', $opts, [] )
                                + [ 'ISBN' => true, 'PMID' => true, 'RFC' => true ],
+                       // Test with legacy encoding by default until HTML5 is very stable and default
+                       'wgFragmentMode' => [ 'legacy' ],
                ];
 
                if ( $config ) {
index f8ba742..ab79b59 100644 (file)
@@ -28386,3 +28386,144 @@ showindicators
 1&2&3&amp;4&amp;amp;5=Indicator
 
 !! end
+
+!! test
+HTML5 ids: fallback to legacy
+!! config
+wgFragmentMode=[ 'html5', 'legacy' ]
+!! wikitext
+== Foo bar ==
+
+== foo Bar ==
+
+== Тест ==
+
+== Тест ==
+
+== тест ==
+
+== Hey < # " > % : ' ==
+[[#Foo bar]] [[#foo Bar]] [[#Тест]] [[#тест]] [[#Hey < # " > % : ']]
+
+{{anchorencode:💩}} <span id="{{anchorencode:💩}}"></span>
+
+<!-- These two links should produce identical HTML -->
+[[#啤酒]] [[#%E5%95%A4%E9%85%92]]
+
+!! html/php
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#%D0%A2%D0%B5%D1%81%D1%82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#%D0%A2%D0%B5%D1%81%D1%82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#%D1%82%D0%B5%D1%81%D1%82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_%3C_%23_%22_%3E_%25_:_%27"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="foo_Bar_2">foo Bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit section: foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id=".D0.A2.D0.B5.D1.81.D1.82"></div><h2><span class="mw-headline" id="Тест">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=3" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id=".D0.A2.D0.B5.D1.81.D1.82_2"></div><h2><span class="mw-headline" id="Тест_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id=".D1.82.D0.B5.D1.81.D1.82"></div><h2><span class="mw-headline" id="тест">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Hey_.3C_.23_.22_.3E_.25_:_.27"></div><h2><span class="mw-headline" id="Hey_&lt;_#_&quot;_&gt;_%_:_'">Hey &lt; # " &gt;&#160;%&#160;: '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=6" title="Edit section: Hey &lt; # &quot; &gt; % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#%D0%A2%D0%B5%D1%81%D1%82">#Тест</a> <a href="#%D1%82%D0%B5%D1%81%D1%82">#тест</a> <a href="#Hey_%3C_%23_%22_%3E_%25_:_%27">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
+</p><p>%F0%9F%92%A9 <span id="%F0%9F%92%A9"></span>
+</p><p><a href="#%E5%95%A4%E9%85%92">#啤酒</a> <a href="#%E5%95%A4%E9%85%92">#啤酒</a>
+</p>
+!! end
+
+!! test
+HTML5 ids: legacy with a fallback to modern
+!! config
+wgFragmentMode=[ 'legacy', 'html5' ]
+!! wikitext
+== Foo bar ==
+
+== foo Bar ==
+
+== Тест ==
+
+== Тест ==
+
+== тест ==
+
+== Hey < # " > % : ' ==
+[[#Foo bar]] [[#foo Bar]] [[#Тест]] [[#тест]] [[#Hey < # " > % : ']]
+
+{{anchorencode:💩}} <span id="{{anchorencode:💩}}"></span>
+
+<!-- These two links should produce identical HTML -->
+[[#啤酒]] [[#%E5%95%A4%E9%85%92]]
+
+!! html/php
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#.D0.A2.D0.B5.D1.81.D1.82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#.D0.A2.D0.B5.D1.81.D1.82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#.D1.82.D0.B5.D1.81.D1.82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_.3C_.23_.22_.3E_.25_:_.27"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="foo_Bar_2">foo Bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit section: foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Тест"></div><h2><span class="mw-headline" id=".D0.A2.D0.B5.D1.81.D1.82">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=3" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Тест_2"></div><h2><span class="mw-headline" id=".D0.A2.D0.B5.D1.81.D1.82_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="тест"></div><h2><span class="mw-headline" id=".D1.82.D0.B5.D1.81.D1.82">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<div id="Hey_&lt;_#_&quot;_&gt;_%_:_'"></div><h2><span class="mw-headline" id="Hey_.3C_.23_.22_.3E_.25_:_.27">Hey &lt; # " &gt;&#160;%&#160;: '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=6" title="Edit section: Hey &lt; # &quot; &gt; % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#.D0.A2.D0.B5.D1.81.D1.82">#Тест</a> <a href="#.D1.82.D0.B5.D1.81.D1.82">#тест</a> <a href="#Hey_.3C_.23_.22_.3E_.25_:_.27">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
+</p><p>.F0.9F.92.A9 <span id=".F0.9F.92.A9"></span>
+</p><p><a href="#.E5.95.A4.E9.85.92">#啤酒</a> <a href="#.E5.95.A4.E9.85.92">#啤酒</a>
+</p>
+!! end
+
+!! test
+HTML5 ids: no legacy
+!! config
+wgFragmentMode=[ 'html5' ]
+!! wikitext
+== Foo bar ==
+
+== foo Bar ==
+
+== Тест ==
+
+== Тест ==
+
+== тест ==
+
+== Hey < # " > % : ' ==
+[[#Foo bar]] [[#foo Bar]] [[#Тест]] [[#тест]] [[#Hey < # " > % : ']]
+
+{{anchorencode:💩}} <span id="{{anchorencode:💩}}"></span>
+
+<!-- These two links should produce identical HTML -->
+[[#啤酒]] [[#%E5%95%A4%E9%85%92]]
+
+!! html/php
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Foo_bar"><span class="tocnumber">1</span> <span class="toctext">Foo bar</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#foo_Bar_2"><span class="tocnumber">2</span> <span class="toctext">foo Bar</span></a></li>
+<li class="toclevel-1 tocsection-3"><a href="#%D0%A2%D0%B5%D1%81%D1%82"><span class="tocnumber">3</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-4"><a href="#%D0%A2%D0%B5%D1%81%D1%82_2"><span class="tocnumber">4</span> <span class="toctext">Тест</span></a></li>
+<li class="toclevel-1 tocsection-5"><a href="#%D1%82%D0%B5%D1%81%D1%82"><span class="tocnumber">5</span> <span class="toctext">тест</span></a></li>
+<li class="toclevel-1 tocsection-6"><a href="#Hey_%3C_%23_%22_%3E_%25_:_%27"><span class="tocnumber">6</span> <span class="toctext">Hey &lt; # " &gt;&#160;%&#160;: '</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Foo_bar">Foo bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: Foo bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="foo_Bar_2">foo Bar</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit section: foo Bar">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Тест">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=3" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Тест_2">Тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=4" title="Edit section: Тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="тест">тест</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=5" title="Edit section: тест">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="Hey_&lt;_#_&quot;_&gt;_%_:_'">Hey &lt; # " &gt;&#160;%&#160;: '</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=6" title="Edit section: Hey &lt; # &quot; &gt; % : '">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p><a href="#Foo_bar">#Foo bar</a> <a href="#foo_Bar">#foo Bar</a> <a href="#%D0%A2%D0%B5%D1%81%D1%82">#Тест</a> <a href="#%D1%82%D0%B5%D1%81%D1%82">#тест</a> <a href="#Hey_%3C_%23_%22_%3E_%25_:_%27">#Hey &lt; # " &gt;&#160;%&#160;: '</a>
+</p><p>%F0%9F%92%A9 <span id="%F0%9F%92%A9"></span>
+</p><p><a href="#%E5%95%A4%E9%85%92">#啤酒</a> <a href="#%E5%95%A4%E9%85%92">#啤酒</a>
+</p>
+!! end
index 6d093b0..d506623 100644 (file)
@@ -3,6 +3,8 @@
 /**
  * @todo Tests covering decodeCharReferences can be refactored into a single
  * method and dataprovider.
+ *
+ * @group Sanitizer
  */
 class SanitizerTest extends MediaWikiTestCase {
 
@@ -379,7 +381,7 @@ class SanitizerTest extends MediaWikiTestCase {
        }
 
        /**
-        * Test escapeIdReferenceList for consistency with escapeId
+        * Test escapeIdReferenceList for consistency with escapeIdForAttribute
         *
         * @dataProvider provideEscapeIdReferenceList
         * @covers Sanitizer::escapeIdReferenceList
@@ -387,9 +389,9 @@ class SanitizerTest extends MediaWikiTestCase {
        public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
                $this->assertEquals(
                        Sanitizer::escapeIdReferenceList( $referenceList, 'noninitial' ),
-                       Sanitizer::escapeId( $id1, 'noninitial' )
+                       Sanitizer::escapeIdForAttribute( $id1 )
                                . ' '
-                               . Sanitizer::escapeId( $id2, 'noninitial' )
+                               . Sanitizer::escapeIdForAttribute( $id2 )
                );
        }
 
@@ -422,4 +424,119 @@ class SanitizerTest extends MediaWikiTestCase {
                        [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
                ];
        }
+
+       /**
+        * @dataProvider provideEscapeIdForStuff
+        *
+        * @covers Sanitizer::escapeIdForAttribute()
+        * @covers Sanitizer::escapeIdForLink()
+        * @covers Sanitizer::escapeIdForExternalInterwiki()
+        * @covers Sanitizer::escapeIdInternal()
+        * @covers Sanitizer::urlEscapeId()
+        *
+        * @param string $stuff
+        * @param string[] $config
+        * @param string $id
+        * @param string|false $expected
+        * @param int|null $mode
+        */
+       public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
+               $func = "Sanitizer::escapeIdFor{$stuff}";
+               $iwFlavor = array_pop( $config );
+               $this->setMwGlobals( [
+                       'wgFragmentMode' => $config,
+                       'wgExternalInterwikiFragmentMode' => $iwFlavor,
+               ] );
+               $escaped = call_user_func( $func, $id, $mode );
+               self::assertEquals( $expected, $escaped );
+       }
+
+       public function provideEscapeIdForStuff() {
+               // Test inputs and outputs
+               $text = 'foo тест_#%!\'()[]:<>';
+               $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E';
+               $html5Encoded = 'foo_тест_#%!\'()[]:<>';
+               $html5Escaped = 'foo_%D1%82%D0%B5%D1%81%D1%82_%23%25%21%27%28%29%5B%5D:%3C%3E';
+               $html5Experimental = 'foo_тест_!_()[]:<>';
+
+               // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
+               $legacy = [ 'legacy', 'legacy' ];
+               $legacyNew = [ 'legacy', 'html5', 'legacy' ];
+               $newLegacy = [ 'html5', 'legacy', 'legacy' ];
+               $new = [ 'html5', 'legacy' ];
+               $allNew = [ 'html5', 'html5' ];
+               $experimentalLegacy = [ 'html5-legacy', 'legacy', 'legacy' ];
+               $newExperimental = [ 'html5', 'html5-legacy', 'legacy' ];
+
+               return [
+                       // Pure legacy: how MW worked before 2017
+                       [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $legacy, $text, $legacyEncoded ],
+                       [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
+
+                       // Transition to a new world: legacy links with HTML5 fallback
+                       [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $legacyNew, $text, $legacyEncoded ],
+                       [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
+
+                       // New world: HTML5 links, legacy fallbacks
+                       [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $newLegacy, $text, $html5Escaped ],
+                       [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
+
+                       // Distant future: no legacy fallbacks, but still linking to leagacy wikis
+                       [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $new, $text, $html5Escaped ],
+                       [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
+
+                       // Just before the heat death of universe: external interwikis are also HTML5 \m/
+                       [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $allNew, $text, $html5Escaped ],
+                       [ 'ExternalInterwiki', $allNew, $text, $html5Escaped ],
+
+                       // Someone flipped $wgExperimentalHtmlIds on
+                       [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $experimentalLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $experimentalLegacy, $text, $html5Experimental ],
+                       [ 'ExternalInterwiki', $experimentalLegacy, $text, $legacyEncoded ],
+
+                       // Migration from $wgExperimentalHtmlIds to modern HTML5
+                       [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+                       [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
+                       [ 'Link', $newExperimental, $text, $html5Escaped ],
+                       [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
+               ];
+       }
+
+       /**
+        * @expectedException InvalidArgumentException
+        * @covers Sanitizer::escapeIdInternal()
+        */
+       public function testInvalidFragmentThrows() {
+               $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
+               Sanitizer::escapeIdForAttribute( 'This should throw' );
+       }
+
+       /**
+        * @expectedException UnexpectedValueException
+        * @covers Sanitizer::escapeIdForAttribute()
+        */
+       public function testNoPrimaryFragmentModeThrows() {
+               $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
+               Sanitizer::escapeIdForAttribute( 'This should throw' );
+       }
+
+       /**
+        * @expectedException UnexpectedValueException
+        * @covers Sanitizer::escapeIdForLink()
+        */
+       public function testNoPrimaryFragmentModeThrows2() {
+               $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
+               Sanitizer::escapeIdForLink( 'This should throw' );
+       }
 }
index da04c8d..2efe9cd 100644 (file)
@@ -93,7 +93,7 @@
        } );
 
        QUnit.test( 'escapeId', function ( assert ) {
-               mw.config.set( 'wgExperimentalHtmlIds', false );
+               mw.config.set( 'wgFragmentMode', [ 'legacy' ] );
                $.each( {
                        '+': '.2B',
                        '&': '.26',
                } );
        } );
 
+       QUnit.test( 'escapeIdForAttribute', function ( assert ) {
+               // Test cases are kept in sync with SanitizerTest.php
+               var text = 'foo тест_#%!\'()[]:<>',
+                       legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E',
+                       html5Encoded = 'foo_тест_#%!\'()[]:<>',
+                       html5Experimental = 'foo_тест_!_()[]:<>',
+                       // Settings: this is $wgFragmentMode
+                       legacy = [ 'legacy' ],
+                       legacyNew = [ 'legacy', 'html5' ],
+                       newLegacy = [ 'html5', 'legacy' ],
+                       allNew = [ 'html5' ],
+                       experimentalLegacy = [ 'html5-legacy', 'legacy' ],
+                       newExperimental = [ 'html5', 'html5-legacy' ];
+
+               // Test cases are kept in sync with SanitizerTest.php
+               $.each( [
+                       // Pure legacy: how MW worked before 2017
+                       [ legacy, text, legacyEncoded ],
+                       // Transition to a new world: legacy links with HTML5 fallback
+                       [ legacyNew, text, legacyEncoded ],
+                       // New world: HTML5 links, legacy fallbacks
+                       [ newLegacy, text, html5Encoded ],
+                       // Distant future: no legacy fallbacks
+                       [ allNew, text, html5Encoded ],
+                       // Someone flipped $wgExperimentalHtmlIds on
+                       [ experimentalLegacy, text, html5Experimental ],
+                       // Migration from $wgExperimentalHtmlIds to modern HTML5
+                       [ newExperimental, text, html5Encoded ]
+               ], function ( index, testCase ) {
+                       mw.config.set( 'wgFragmentMode', testCase[ 0 ] );
+
+                       assert.equal( util.escapeIdForAttribute( testCase[ 1 ] ), testCase[ 2 ] );
+               } );
+       } );
+
+       QUnit.test( 'escapeIdForLink', function ( assert ) {
+               // Test cases are kept in sync with SanitizerTest.php
+               var text = 'foo тест_#%!\'()[]:<>',
+                       legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E',
+                       html5Escaped = 'foo_%D1%82%D0%B5%D1%81%D1%82_%23%25!\'()%5B%5D:%3C%3E',
+                       html5Experimental = 'foo_тест_!_()[]:<>',
+                       // Settings: this is wgFragmentMode
+                       legacy = [ 'legacy' ],
+                       legacyNew = [ 'legacy', 'html5' ],
+                       newLegacy = [ 'html5', 'legacy' ],
+                       allNew = [ 'html5' ],
+                       experimentalLegacy = [ 'html5-legacy', 'legacy' ],
+                       newExperimental = [ 'html5', 'html5-legacy' ];
+
+               $.each( [
+                       // Pure legacy: how MW worked before 2017
+                       [ legacy, text, legacyEncoded ],
+                       // Transition to a new world: legacy links with HTML5 fallback
+                       [ legacyNew, text, legacyEncoded ],
+                       // New world: HTML5 links, legacy fallbacks
+                       [ newLegacy, text, html5Escaped ],
+                       // Distant future: no legacy fallbacks
+                       [ allNew, text, html5Escaped ],
+                       // Someone flipped wgExperimentalHtmlIds on
+                       [ experimentalLegacy, text, html5Experimental ],
+                       // Migration from wgExperimentalHtmlIds to modern HTML5
+                       [ newExperimental, text, html5Escaped ]
+               ], function ( index, testCase ) {
+                       mw.config.set( 'wgFragmentMode', testCase[ 0 ] );
+
+                       assert.equal( util.escapeIdForLink( testCase[ 1 ] ), testCase[ 2 ] );
+               } );
+       } );
+
        QUnit.test( 'wikiUrlencode', function ( assert ) {
                assert.equal( util.wikiUrlencode( 'Test:A & B/Here' ), 'Test:A_%26_B/Here' );
                // See also wfUrlencodeTest.php#provideURLS