Merge "Stop using the unholy trinity in DatabaseError"
[lhc/web/wiklou.git] / includes / Sanitizer.php
index 4dbc9dd..3384af0 100644 (file)
@@ -865,6 +865,27 @@ class Sanitizer {
                $value = preg_replace_callback( $decodeRegex,
                        array( __CLASS__, 'cssDecodeCallback' ), $value );
 
+               // Normalize Halfwidth and Fullwidth Unicode block that IE6 might treat as ascii
+               $value = preg_replace_callback(
+                       '/[!-z]/u', // U+FF01 to U+FF5A
+                       function ( $matches ) {
+                               $cp = utf8ToCodepoint( $matches[0] );
+                               if ( $cp === false ) {
+                                       return '';
+                               }
+                               return chr( $cp - 65248 ); // ASCII range \x21-\x7A
+                       },
+                       $value
+               );
+
+               // Convert more characters IE6 might treat as ascii
+               // U+0280, U+0274, U+207F, U+029F, U+026A, U+207D, U+208D
+               $value = str_replace(
+                       array( 'ʀ', 'ɴ', 'ⁿ', 'ʟ', 'ɪ', '⁽', '₍' ),
+                       array( 'r', 'n', 'n', 'l', 'i', '(', '(' ),
+                       $value
+               );
+
                // Let the value through if it's nothing but a single comment, to
                // allow other functions which may reject it to pass some error
                // message through.
@@ -885,8 +906,24 @@ class Sanitizer {
                        }
                }
 
+               // S followed by repeat, iteration, or prolonged sound marks,
+               // which IE will treat as "ss"
+               $value = preg_replace(
+                       '/s(?:
+                               \xE3\x80\xB1 | # U+3031
+                               \xE3\x82\x9D | # U+309D
+                               \xE3\x83\xBC | # U+30FC
+                               \xE3\x83\xBD | # U+30FD
+                               \xEF\xB9\xBC | # U+FE7C
+                               \xEF\xB9\xBD | # U+FE7D
+                               \xEF\xBD\xB0   # U+FF70
+                       )/ix',
+                       'ss',
+                       $value
+               );
+
                // Reject problematic keywords and control characters
-               if ( preg_match( '/[\000-\010\016-\037\177]/', $value ) ) {
+               if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
                        return '/* invalid control char */';
                } elseif ( preg_match( '! expression | filter\s*: | accelerator\s*: | url\s*\( | image\s*\( | image-set\s*\( !ix', $value ) ) {
                        return '/* insecure input */';