Merge "Stop using the unholy trinity in DatabaseError"
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 499d821..3384af0 100644 (file)
@@ -865,6 +865,27 @@ class Sanitizer {
                $value = preg_replace_callback( $decodeRegex,
                        array( __CLASS__, 'cssDecodeCallback' ), $value );
 
+               // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
+               $value = preg_replace_callback(
+                       '/[!-z]/u', // U+FF01 to U+FF5A
+                       function ( $matches ) {
+                               $cp = utf8ToCodepoint( $matches[0] );
+                               if ( $cp === false ) {
+                                       return '';
+                               }
+                               return chr( $cp - 65248 ); // ASCII range \x21-\x7A
+                       },
+                       $value
+               );
+
+               // Convert more characters IE6 might treat as ascii
+               // U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D
+               $value = str_replace(
+                       array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ),
+                       array( 'r', 'n', 'n', 'l', 'i', '(', '(' ),
+                       $value
+               );
+
                // Let the value through if it's nothing but a single comment, to
                // allow other functions which may reject it to pass some error
                // message through.
@@ -885,8 +906,24 @@ class Sanitizer {
                        }
                }
 
+               // S followed by repeat, iteration, or prolonged sound marks,
+               // which IE will treat as "ss"
+               $value = preg_replace(
+                       '/s(?:
+                               \xE3\x80\xB1 | # U+3031
+                               \xE3\x82\x9D | # U+309D
+                               \xE3\x83\xBC | # U+30FC
+                               \xE3\x83\xBD | # U+30FD
+                               \xEF\xB9\xBC | # U+FE7C
+                               \xEF\xB9\xBD | # U+FE7D
+                               \xEF\xBD\xB0   # U+FF70
+                       )/ix',
+                       'ss',
+                       $value
+               );
+
                // Reject problematic keywords and control characters
-               if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) {
+               if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
                        return '/* invalid control char */';
                } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( | image\s*\( | image-set\s*\( !ix', $value ) ) {
                        return '/* insecure input */';
@@ -1484,7 +1521,7 @@ class Sanitizer {
                }
 
                $block = array_merge( $common, array( 'align' ) );
-               $tablealign = array( 'align', 'char', 'charoff', 'valign' );
+               $tablealign = array( 'align', 'valign' );
                $tablecell = array(
                        'abbr',
                        'axis',
@@ -1504,7 +1541,7 @@ class Sanitizer {
                        # 7.5.4
                        'div'        => $block,
                        'center'     => $common, # deprecated
-                       'span'       => $block, # ??
+                       'span'       => $common,
 
                        # 7.5.5
                        'h1'         => $block,
@@ -1518,7 +1555,7 @@ class Sanitizer {
                        # address
 
                        # 8.2.4
-                       # bdo
+                       'bdo'        => $common,
 
                        # 9.2.1
                        'em'         => $common,
@@ -1534,7 +1571,7 @@ class Sanitizer {
 
                        # 9.2.2
                        'blockquote' => array_merge( $common, array( 'cite' ) ),
-                       # q
+                       'q'          => array_merge( $common, array( 'cite' ) ),
 
                        # 9.2.3
                        'sub'        => $common,
@@ -1544,10 +1581,10 @@ class Sanitizer {
                        'p'          => $block,
 
                        # 9.3.2
-                       'br'         => array( 'id', 'class', 'title', 'style', 'clear' ),
+                       'br'         => array_merge( $common, array( 'clear' ) ),
 
                        # http://www.whatwg.org/html/text-level-semantics.html#the-wbr-element
-                       'wbr'        => array( 'id', 'class', 'title', 'style' ),
+                       'wbr'        => $common,
 
                        # 9.3.4
                        'pre'        => array_merge( $common, array( 'width' ) ),
@@ -1574,16 +1611,16 @@ class Sanitizer {
                                                                ) ),
 
                        # 11.2.2
-                       'caption'    => array_merge( $common, array( 'align' ) ),
+                       'caption'    => $block,
 
                        # 11.2.3
-                       'thead'      => array_merge( $common, $tablealign ),
-                       'tfoot'      => array_merge( $common, $tablealign ),
-                       'tbody'      => array_merge( $common, $tablealign ),
+                       'thead'      => $common,
+                       'tfoot'      => $common,
+                       'tbody'      => $common,
 
                        # 11.2.4
-                       'colgroup'   => array_merge( $common, array( 'span', 'width' ), $tablealign ),
-                       'col'        => array_merge( $common, array( 'span', 'width' ), $tablealign ),
+                       'colgroup'   => array_merge( $common, array( 'span' ) ),
+                       'col'        => array_merge( $common, array( 'span' ) ),
 
                        # 11.2.5
                        'tr'         => array_merge( $common, array( 'bgcolor' ), $tablealign ),
@@ -1618,7 +1655,7 @@ class Sanitizer {
                        # basefont
 
                        # 15.3
-                       'hr'         => array_merge( $common, array( 'noshade', 'size', 'width' ) ),
+                       'hr'         => array_merge( $common, array( 'width' ) ),
 
                        # HTML Ruby annotation text module, simple ruby only.
                        # http://www.whatwg.org/html/text-level-semantics.html#the-ruby-element