Merge "SpecialMovepage: Convert form to use OOUI controls"
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 387f24f..de63af7 100644 (file)
@@ -346,12 +346,9 @@ class Sanitizer {
                                  ($space*=$space*
                                        (?:
                                         # The attribute value: quoted or alone
-                                         \"([^<\"]*)\"
-                                        | '([^<']*)'
+                                         \"([^<\"]*)(?:\"|\$)
+                                        | '([^<']*)(?:'|\$)
                                         |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
-                                        |  (\#[0-9a-fA-F]+) # Technically wrong, but lots of
-                                                                                # colors are specified like this.
-                                                                                # We'll be normalizing it.
                                        )
                                )?(?=$space|\$)/sx";
                }
@@ -457,15 +454,13 @@ class Sanitizer {
        public static function removeHTMLtags( $text, $processCallback = null,
                $args = array(), $extratags = array(), $removetags = array()
        ) {
-               global $wgUseTidy;
-
                extract( self::getRecognizedTagData( $extratags, $removetags ) );
 
                # Remove HTML comments
                $text = Sanitizer::removeHTMLcomments( $text );
                $bits = explode( '<', $text );
                $text = str_replace( '>', '&gt;', array_shift( $bits ) );
-               if ( !$wgUseTidy ) {
+               if ( !MWTidy::isEnabled() ) {
                        $tagstack = $tablestack = array();
                        foreach ( $bits as $x ) {
                                $regs = array();
@@ -966,7 +961,8 @@ class Sanitizer {
                $value = self::normalizeCss( $value );
 
                // Reject problematic keywords and control characters
-               if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
+               if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ||
+                       strpos( $value, UtfNormal\Constants::UTF8_REPLACEMENT ) !== false ) {
                        return '/* invalid control char */';
                } elseif ( preg_match(
                        '! expression
@@ -1263,10 +1259,7 @@ class Sanitizer {
         * @return string
         */
        private static function getTagAttributeCallback( $set ) {
-               if ( isset( $set[6] ) ) {
-                       # Illegal #XXXXXX color with no quotes.
-                       return $set[6];
-               } elseif ( isset( $set[5] ) ) {
+               if ( isset( $set[5] ) ) {
                        # No quotes.
                        return $set[5];
                } elseif ( isset( $set[4] ) ) {
@@ -1399,15 +1392,19 @@ class Sanitizer {
        }
 
        /**
-        * Returns true if a given Unicode codepoint is a valid character in XML.
+        * Returns true if a given Unicode codepoint is a valid character in
+        * both HTML5 and XML.
         * @param int $codepoint
         * @return bool
         */
        private static function validateCodepoint( $codepoint ) {
+               # U+000C is valid in HTML5 but not allowed in XML.
+               # U+000D is valid in XML but not allowed in HTML5.
+               # U+007F - U+009F are disallowed in HTML5 (control characters).
                return $codepoint == 0x09
                        || $codepoint == 0x0a
-                       || $codepoint == 0x0d
-                       || ( $codepoint >= 0x20 && $codepoint <= 0xd7ff )
+                       || ( $codepoint >= 0x20 && $codepoint <= 0x7e )
+                       || ( $codepoint >= 0xa0 && $codepoint <= 0xd7ff )
                        || ( $codepoint >= 0xe000 && $codepoint <= 0xfffd )
                        || ( $codepoint >= 0x10000 && $codepoint <= 0x10ffff );
        }
@@ -1809,6 +1806,11 @@ class Sanitizer {
 
                        $host = preg_replace( $strip, '', $host );
 
+                       // IPv6 host names are bracketed with [].  Url-decode these.
+                       if ( substr_compare( "//%5B", $host, 0, 5 ) === 0 && preg_match( '!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches ) ) {
+                               $host = '//[' . $matches[1] . ']' . $matches[2];
+                       }
+
                        // @todo FIXME: Validate hostnames here
 
                        return $protocol . $host . $rest;