Fix regression in ICU-mode UTF-8 verification: U+FFFF is forbidden

author Brion Vibber <brion@users.mediawiki.org>

Sun, 14 Nov 2004 21:36:43 +0000 (21:36 +0000)

committer Brion Vibber <brion@users.mediawiki.org>

Sun, 14 Nov 2004 21:36:43 +0000 (21:36 +0000)
author Brion Vibber <brion@users.mediawiki.org>
Sun, 14 Nov 2004 21:36:43 +0000 (21:36 +0000)
committer Brion Vibber <brion@users.mediawiki.org>
Sun, 14 Nov 2004 21:36:43 +0000 (21:36 +0000)
diff --git a/includes/normal/CleanUpTest.php b/includes/normal/CleanUpTest.php

index badade6..e9156ab 100644 (file)
--- a/includes/normal/CleanUpTest.php
+++ b/includes/normal/CleanUpTest.php
@@ -330,6 +330,14 @@ class CleanUpTest extends PHPUnit_TestCase {
                         bin2hex( $expect ),
                         bin2hex( UtfNormal::cleanUp( $text ) ) );
         }
+
+       function testForbiddenRegression() {
+               $text   = "\xef\xbf\xbf"; # U+FFFF, illegal char
+               $expect = "\xef\xbf\xbd";
+               $this->assertEquals(
+                       bin2hex( $expect ),
+                       bin2hex( UtfNormal::cleanUp( $text ) ) );
+       }
  }
  
  
diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php

index a4c095c..62461d6 100644 (file)
--- a/includes/normal/UtfNormal.php
+++ b/includes/normal/UtfNormal.php
@@ -132,11 +132,12 @@ class UtfNormal {
                                 '/[\x00-\x08\x0b\x0c\x0e-\x1f]/',
                                 UTF8_REPLACEMENT,
                                 $string );
-                       $str = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string );
+                       $string = str_replace( UTF8_FFFE, UTF8_REPLACEMENT, $string );
+                       $string = str_replace( UTF8_FFFF, UTF8_REPLACEMENT, $string );
                         
                         # UnicodeString constructor fails if the string ends with a
                         # head byte. Add a junk char at the end, we'll strip it off.
-                       return rtrim( utf8_normalize( $str . "\x01", UNORM_NFC ), "\x01" );
+                       return rtrim( utf8_normalize( $string . "\x01", UNORM_NFC ), "\x01" );
                 } elseif( UtfNormal::quickIsNFCVerify( $string ) ) {
                         # Side effect -- $string has had UTF-8 errors cleaned up.
                         return $string;
author	Brion Vibber <brion@users.mediawiki.org>
	Sun, 14 Nov 2004 21:36:43 +0000 (21:36 +0000)
committer	Brion Vibber <brion@users.mediawiki.org>
	Sun, 14 Nov 2004 21:36:43 +0000 (21:36 +0000)
includes/normal/CleanUpTest.php		patch \| blob \| history
includes/normal/UtfNormal.php		patch \| blob \| history