Reverted r111188: backport conflict fodder
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 83cd5fd..196abd9 100644 (file)
@@ -33,7 +33,7 @@ class Sanitizer {
         * Regular expression to match various types of character references in
         * Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences
         */
-       const CHAR_REFS_REGEX = 
+       const CHAR_REFS_REGEX =
                '/&([A-Za-z0-9\x80-\xff]+);
                 |&\#([0-9]+);
                 |&\#[xX]([0-9A-Fa-f]+);
@@ -335,7 +335,7 @@ class Sanitizer {
                        $attribFirst = '[:A-Z_a-z0-9]';
                        $attrib = '[:A-Z_a-z-.0-9]';
                        $space = '[\x09\x0a\x0d\x20]';
-                       self::$attribsRegex = 
+                       self::$attribsRegex =
                                "/(?:^|$space)({$attribFirst}{$attrib}*)
                                  ($space*=$space*
                                        (?:
@@ -457,16 +457,26 @@ class Sanitizer {
                                                                # and see if we find a match below them
                                                                $optstack = array();
                                                                array_push( $optstack, $ot );
-                                                               $ot = @array_pop( $tagstack );
+                                                               wfSuppressWarnings();
+                                                               $ot = array_pop( $tagstack );
+                                                               wfRestoreWarnings();
                                                                while ( $ot != $t && isset( $htmlsingleallowed[$ot] ) ) {
                                                                        array_push( $optstack, $ot );
-                                                                       $ot = @array_pop( $tagstack );
+                                                                       wfSuppressWarnings();
+                                                                       $ot = array_pop( $tagstack );
+                                                                       wfRestoreWarnings();
                                                                }
                                                                if ( $t != $ot ) {
                                                                        # No match. Push the optional elements back again
                                                                        $badtag = true;
-                                                                       while ( $ot = @array_pop( $optstack ) ) {
+                                                                       wfSuppressWarnings();
+                                                                       $ot = array_pop( $optstack );
+                                                                       wfRestoreWarnings();
+                                                                       while ( $ot ) {
                                                                                array_push( $tagstack, $ot );
+                                                                               wfSuppressWarnings();
+                                                                               $ot = array_pop( $optstack );
+                                                                               wfRestoreWarnings();
                                                                        }
                                                                }
                                                        } else {
@@ -608,7 +618,7 @@ class Sanitizer {
         * This does not validate properties, so you should ensure that you call
         * validateTagAttributes AFTER this to ensure that the resulting style rule
         * this may add is safe.
-        * 
+        *
         * - Converts most presentational attributes like align into inline css
         *
         * @param $attribs Array
@@ -617,19 +627,19 @@ class Sanitizer {
         */
        static function fixDeprecatedAttributes( $attribs, $element ) {
                global $wgHtml5, $wgCleanupPresentationalAttributes;
-               
+
                // presentational attributes were removed from html5, we can leave them
                // in when html5 is turned off
                if ( !$wgHtml5 || !$wgCleanupPresentationalAttributes ) {
                        return $attribs;
                }
-               
+
                $table = array( 'table' );
                $cells = array( 'td', 'th' );
                $colls = array( 'col', 'colgroup' );
                $tblocks = array( 'tbody', 'tfoot', 'thead' );
                $h = array( 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' );
-               
+
                $presentationalAttribs = array(
                        'align' => array( 'text-align', array_merge( array( 'caption', 'hr', 'div', 'p', 'tr' ), $table, $cells, $colls, $tblocks, $h ) ),
                        'clear' => array( 'clear', array( 'br' ) ),
@@ -640,40 +650,53 @@ class Sanitizer {
                        'valign' => array( 'vertical-align', array_merge( $cells, $colls, $tblocks ) ),
                        'width' => array( 'width', array_merge( array( 'hr', 'pre' ), $table, $cells, $colls ) ),
                );
-               
+
+               // Ensure that any upper case or mixed case attributes are converted to lowercase
+               foreach ( $attribs as $attribute => $value ) {
+                       if ( $attribute !== strtolower( $attribute ) && array_key_exists( strtolower( $attribute ), $presentationalAttribs ) ) {
+                               $attribs[strtolower( $attribute )] = $value;
+                               unset( $attribs[$attribute] );
+                       }
+               }
+
                $style = "";
                foreach ( $presentationalAttribs as $attribute => $info ) {
                        list( $property, $elements ) = $info;
-                       
+
                        // Skip if this attribute is not relevant to this element
                        if ( !in_array( $element, $elements ) ) {
                                continue;
                        }
-                       
+
                        // Skip if the attribute is not used
                        if ( !array_key_exists( $attribute, $attribs ) ) {
                                continue;
                        }
-                       
+
                        $value = $attribs[$attribute];
-                       
+
                        // For nowrap the value should be nowrap instead of whatever text is in the value
                        if ( $attribute === 'nowrap' ) {
                                $value = 'nowrap';
                        }
-                       
+
+                       // clear="all" is clear: both; in css
+                       if ( $attribute === 'clear' && strtolower( $value ) === 'all' ) {
+                               $value = 'both';
+                       }
+
                        // Size based properties should have px applied to them if they have no unit
                        if ( in_array( $attribute, array( 'height', 'width', 'size' ) ) ) {
                                if ( preg_match( '/^[\d.]+$/', $value ) ) {
                                        $value = "{$value}px";
                                }
                        }
-                       
+
                        $style .= " $property: $value;";
-                       
+
                        unset( $attribs[$attribute] );
                }
-               
+
                if ( $style ) {
                        // Prepend our style rules so that they can be overridden by user css
                        if ( isset($attribs['style']) ) {
@@ -681,7 +704,7 @@ class Sanitizer {
                        }
                        $attribs['style'] = trim($style);
                }
-               
+
                return $attribs;
        }
 
@@ -753,7 +776,7 @@ class Sanitizer {
                        }
 
                        //RDFa and microdata properties allow URLs, URIs and/or CURIs. check them for sanity
-                       if ( $attribute === 'rel' || $attribute === 'rev' || 
+                       if ( $attribute === 'rel' || $attribute === 'rev' ||
                                $attribute === 'about' || $attribute === 'property' || $attribute === 'resource' || #RDFa
                                $attribute === 'datatype' || $attribute === 'typeof' ||                             #RDFa
                                $attribute === 'itemid' || $attribute === 'itemprop' || $attribute === 'itemref' || #HTML5 microdata
@@ -761,7 +784,7 @@ class Sanitizer {
 
                                //Paranoia. Allow "simple" values but suppress javascript
                                if ( preg_match( self::EVIL_URI_PATTERN, $value ) ) {
-                                       continue; 
+                                       continue;
                                }
                        }
 
@@ -826,7 +849,7 @@ class Sanitizer {
         * returned string may contain character references given certain
         * clever input strings. These character references must
         * be escaped before the return value is embedded in HTML.
-        * 
+        *
         * @param $value String
         * @return String
         */
@@ -848,7 +871,7 @@ class Sanitizer {
                        $space = '[\\x20\\t\\r\\n\\f]';
                        $nl = '(?:\\n|\\r\\n|\\r|\\f)';
                        $backslash = '\\\\';
-                       $decodeRegex = "/ $backslash 
+                       $decodeRegex = "/ $backslash
                                (?:
                                        ($nl) |  # 1. Line continuation
                                        ([0-9A-Fa-f]{1,6})$space? |  # 2. character number
@@ -858,7 +881,7 @@ class Sanitizer {
                }
                $value = preg_replace_callback( $decodeRegex,
                        array( __CLASS__, 'cssDecodeCallback' ), $value );
-               
+
                // Remove any comments; IE gets token splitting wrong
                // This must be done AFTER decoding character references and
                // escape sequences, because those steps can introduce comments
@@ -1433,7 +1456,7 @@ class Sanitizer {
                if ( $wgAllowRdfaAttributes ) {
                        #RDFa attributes as specified in section 9 of http://www.w3.org/TR/2008/REC-rdfa-syntax-20081014
                        $common = array_merge( $common, array(
-                           'about', 'property', 'resource', 'datatype', 'typeof', 
+                           'about', 'property', 'resource', 'datatype', 'typeof',
                        ) );
                }
 
@@ -1550,7 +1573,7 @@ class Sanitizer {
                        'th'         => array_merge( $common, $tablecell, $tablealign ),
 
                        # 12.2 # NOTE: <a> is not allowed directly, but the attrib whitelist is used from the Parser object
-                       'a'          => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa 
+                       'a'          => array_merge( $common, array( 'href', 'rel', 'rev' ) ), # rel/rev esp. for RDFa
 
                        # 13.2
                        # Not usually allowed, but may be used for extension-style hooks
@@ -1641,7 +1664,7 @@ class Sanitizer {
                $url = Sanitizer::decodeCharReferences( $url );
 
                # Escape any control characters introduced by the above step
-               $url = preg_replace_callback( '/[\][<>"\\x00-\\x20\\x7F\|]/', 
+               $url = preg_replace_callback( '/[\][<>"\\x00-\\x20\\x7F\|]/',
                        array( __CLASS__, 'cleanUrlCallback' ), $url );
 
                # Validate hostname portion
@@ -1665,7 +1688,7 @@ class Sanitizer {
                                \xe1\xa0\x8d| # 180d MONGOLIAN FREE VARIATION SELECTOR THREE
                                \xe2\x80\x8c| # 200c ZERO WIDTH NON-JOINER
                                \xe2\x80\x8d| # 200d ZERO WIDTH JOINER
-                               [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe00f VARIATION SELECTOR-1-16
+                               [\xef\xb8\x80-\xef\xb8\x8f] # fe00-fe0f VARIATION SELECTOR-1-16
                                /xuD";
 
                        $host = preg_replace( $strip, '', $host );