X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FSanitizer.php;h=c02bdc996f8de6167ce0149ceea8052413d65f2b;hb=7ab2c2e2c3db05e4135d5cf2f117db13f837ff13;hp=ddaf1b2d99d1c29db5a22f71f5576f150783221d;hpb=8b703a7fa00a388cdee36bdfd2f444c72a6b6e50;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index ddaf1b2d99..c02bdc996f 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -349,9 +349,6 @@ class Sanitizer { \"([^<\"]*)\" | '([^<']*)' | ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+) - | (\#[0-9a-fA-F]+) # Technically wrong, but lots of - # colors are specified like this. - # We'll be normalizing it. ) )?(?=$space|\$)/sx"; } @@ -966,7 +963,8 @@ class Sanitizer { $value = self::normalizeCss( $value ); // Reject problematic keywords and control characters - if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) { + if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) || + strpos( $value, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) { return '/* invalid control char */'; } elseif ( preg_match( '! expression @@ -1263,10 +1261,7 @@ class Sanitizer { * @return string */ private static function getTagAttributeCallback( $set ) { - if ( isset( $set[6] ) ) { - # Illegal #XXXXXX color with no quotes. - return $set[6]; - } elseif ( isset( $set[5] ) ) { + if ( isset( $set[5] ) ) { # No quotes. return $set[5]; } elseif ( isset( $set[4] ) ) { @@ -1276,9 +1271,10 @@ class Sanitizer { # Double-quoted return $set[3]; } elseif ( !isset( $set[2] ) ) { - # In XHTML, attributes must have a value. - # For 'reduced' form, return explicitly the attribute name here. - return $set[1]; + # In XHTML, attributes must have a value so return an empty string. + # See "Empty attribute syntax", + # http://www.w3.org/TR/html5/syntax.html#syntax-attribute-name + return ""; } else { throw new MWException( "Tag conditions not met. This should never happen and is a bug." ); } @@ -1398,15 +1394,19 @@ class Sanitizer { } /** - * Returns true if a given Unicode codepoint is a valid character in XML. + * Returns true if a given Unicode codepoint is a valid character in + * both HTML5 and XML. * @param int $codepoint * @return bool */ private static function validateCodepoint( $codepoint ) { + # U+000C is valid in HTML5 but not allowed in XML. + # U+000D is valid in XML but not allowed in HTML5. + # U+007F - U+009F are disallowed in HTML5 (control characters). return $codepoint == 0x09 || $codepoint == 0x0a - || $codepoint == 0x0d - || ( $codepoint >= 0x20 && $codepoint <= 0xd7ff ) + || ( $codepoint >= 0x20 && $codepoint <= 0x7e ) + || ( $codepoint >= 0xa0 && $codepoint <= 0xd7ff ) || ( $codepoint >= 0xe000 && $codepoint <= 0xfffd ) || ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff ); } @@ -1808,6 +1808,11 @@ class Sanitizer { $host = preg_replace( $strip, '', $host ); + // IPv6 host names are bracketed with []. Url-decode these. + if ( substr_compare( "//%5B", $host, 0, 5 ) === 0 && preg_match( '!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches ) ) { + $host = '//[' . $matches[1] . ']' . $matches[2]; + } + // @todo FIXME: Validate hostnames here return $protocol . $host . $rest;