Merge "Sanitizer: Allow attribute names to use any Unicode "Letter" or "Number""
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 12 Jul 2017 15:13:19 +0000 (15:13 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 12 Jul 2017 15:13:19 +0000 (15:13 +0000)
1  2 
includes/Sanitizer.php

diff --combined includes/Sanitizer.php
@@@ -339,8 -339,8 +339,8 @@@ class Sanitizer 
         */
        static function getAttribsRegex() {
                if ( self::$attribsRegex === null ) {
-                       $attribFirst = '[:A-Z_a-z0-9]';
-                       $attrib = '[:A-Z_a-z-.0-9]';
+                       $attribFirst = "[:_\p{L}\p{N}]";
+                       $attrib = "[:_\.\-\p{L}\p{N}]";
                        $space = '[\x09\x0a\x0c\x0d\x20]';
                        self::$attribsRegex =
                                "/(?:^|$space)({$attribFirst}{$attrib}*)
                                                | '([^']*)(?:'|\$)
                                                | (((?!$space|>).)*)
                                        )
-                               )?(?=$space|\$)/sx";
+                               )?(?=$space|\$)/sxu";
                }
                return self::$attribsRegex;
        }
                        }
  
                        # Strip javascript "expression" from stylesheets.
 -                      # http://msdn.microsoft.com/workshop/author/dhtml/overview/recalc.asp
 +                      # https://msdn.microsoft.com/en-us/library/ms537634.aspx
                        if ( $attribute == 'style' ) {
                                $value = Sanitizer::checkCss( $value );
                        }
         * @return string normalized css
         */
        public static function normalizeCss( $value ) {
 -
                // Decode character references like &#123;
                $value = Sanitizer::decodeCharReferences( $value );