Break UTF-8 support class from the case conversion tables; grab tables from memcached...

[lhc/web/wiklou.git] / includes / Utf8Case.php
diff --git a/includes/Utf8Case.php b/includes/Utf8Case.php

index bcf4eeb..996070a 100644 (file)
--- a/includes/Utf8Case.php
+++ b/includes/Utf8Case.php
@@ -1,12 +1,11 @@
  <?
-$wgInputEncoding    = "utf-8";
-$wgOutputEncoding      = "utf-8";
-
  # Simple 1:1 upper/lowercase switching arrays for utf-8 text
  # Won't get context-sensitive things yet
  
  # Hack for bugs in ucfirst() and company
-# TODO: store this in shared memory or something
+
+# These are pulled from memcached if possible, as this is faster than filling
+# up a big array manually. See also languages/LanguageUtf8.php
  
  $wikiUpperChars = array(
         "a" => "A",
@@ -1494,49 +1493,4 @@ $wikiLowerChars = array (
         "\xf0\x90\x90\xa5" => "\xf0\x90\x91\x8d"
  );
  
-# Base stuff useful to all UTF-8 based language files
-class LanguageUtf8 extends Language {
-
-       function ucfirst( $string ) {
-               # For most languages, this is a wrapper for ucfirst()
-               # But that doesn't work right in a UTF-8 locale
-               global $wikiUpperChars, $wikiLowerChars;
-               return preg_replace (
-               "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-               "strtr ( \"\$1\" , \$wikiUpperChars )",
-               $string );
-       }
-       
-       function stripForSearch( $string ) {
-               # MySQL fulltext index doesn't grok utf-8, so we
-               # need to fold cases and convert to hex
-               global $wikiLowerChars;
-               return preg_replace(
-                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
-                 $string );
-       }
-
-       function fallback8bitEncoding() {
-               # Windows codepage 1252 is a superset of iso 8859-1
-               # override this to use difference source encoding to
-               # translate incoming 8-bit URLs.
-               return "windows-1252";
-       }
-
-       function checkTitleEncoding( $s ) {
-               global $wgInputEncoding;
-
-               # Check for non-UTF-8 URLs
-               $ishigh = preg_match( '/[\x80-\xff]/', $s);
-               if(!$ishigh) return $s;
-               
-               $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
-                '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
-               if( $isutf8 ) return $s;
-
-               return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
-       }
-}
-
  ?>
 \ No newline at end of file