Check return value of preg_match in Sanitizer.php
[lhc/web/wiklou.git] / includes / Sanitizer.php
index a2de004..96193a7 100644 (file)
@@ -41,7 +41,7 @@ class Sanitizer {
 
        /**
         * Acceptable tag name charset from HTML5 parsing spec
-        * http://dev.w3.org/html5/spec-preview/tokenization.html#tag-open-state
+        * http://www.w3.org/TR/html5/syntax.html#tag-open-state
         */
        const ELEMENT_BITS_REGEX = '!^(/?)([A-Za-z][^\t\n\v />\0]*+)([^>]*?)(/?>)([^<]*)$!';
 
@@ -573,27 +573,25 @@ class Sanitizer {
                } else {
                        # this might be possible using tidy itself
                        foreach ( $bits as $x ) {
-                               preg_match( self::ELEMENT_BITS_REGEX, $x, $regs );
-
-                               wfSuppressWarnings();
-                               list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
-                               wfRestoreWarnings();
+                               if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) ) {
+                                       list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs;
 
-                               $badtag = false;
-                               if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
-                                       if ( is_callable( $processCallback ) ) {
-                                               call_user_func_array( $processCallback, array( &$params, $args ) );
-                                       }
+                                       $badtag = false;
+                                       if ( isset( $htmlelements[$t = strtolower( $t )] ) ) {
+                                               if ( is_callable( $processCallback ) ) {
+                                                       call_user_func_array( $processCallback, array( &$params, $args ) );
+                                               }
 
-                                       if ( !Sanitizer::validateTag( $params, $t ) ) {
-                                               $badtag = true;
-                                       }
+                                               if ( !Sanitizer::validateTag( $params, $t ) ) {
+                                                       $badtag = true;
+                                               }
 
-                                       $newparams = Sanitizer::fixTagAttributes( $params, $t );
-                                       if ( !$badtag ) {
-                                               $rest = str_replace( '>', '&gt;', $rest );
-                                               $text .= "<$slash$t$newparams$brace$rest";
-                                               continue;
+                                               $newparams = Sanitizer::fixTagAttributes( $params, $t );
+                                               if ( !$badtag ) {
+                                                       $rest = str_replace( '>', '&gt;', $rest );
+                                                       $text .= "<$slash$t$newparams$brace$rest";
+                                                       continue;
+                                               }
                                        }
                                }
                                $text .= '&lt;' . str_replace( '>', '&gt;', $x );
@@ -865,7 +863,7 @@ class Sanitizer {
                $value = preg_replace_callback(
                        '/[!-[]-z]/u', // U+FF01 to U+FF5A, excluding U+FF3C (bug 58088)
                        function ( $matches ) {
-                               $cp = utf8ToCodepoint( $matches[0] );
+                               $cp = UtfNormal\Utils::utf8ToCodepoint( $matches[0] );
                                if ( $cp === false ) {
                                        return '';
                                }
@@ -971,7 +969,7 @@ class Sanitizer {
                        // Line continuation
                        return '';
                } elseif ( $matches[2] !== '' ) {
-                       $char = codepointToUtf8( hexdec( $matches[2] ) );
+                       $char = UtfNormal\Utils::codepointToUtf8( hexdec( $matches[2] ) );
                } elseif ( $matches[3] !== '' ) {
                        $char = $matches[3];
                } else {
@@ -1452,9 +1450,9 @@ class Sanitizer {
         */
        static function decodeChar( $codepoint ) {
                if ( Sanitizer::validateCodepoint( $codepoint ) ) {
-                       return codepointToUtf8( $codepoint );
+                       return UtfNormal\Utils::codepointToUtf8( $codepoint );
                } else {
-                       return UTF8_REPLACEMENT;
+                       return UtfNormal\Constants::UTF8_REPLACEMENT;
                }
        }
 
@@ -1471,7 +1469,7 @@ class Sanitizer {
                        $name = self::$htmlEntityAliases[$name];
                }
                if ( isset( self::$htmlEntities[$name] ) ) {
-                       return codepointToUtf8( self::$htmlEntities[$name] );
+                       return UtfNormal\Utils::codepointToUtf8( self::$htmlEntities[$name] );
                } else {
                        return "&$name;";
                }