Break UTF-8 support class from the case conversion tables; grab tables from memcached...
authorBrion Vibber <brion@users.mediawiki.org>
Sat, 30 Aug 2003 09:39:34 +0000 (09:39 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Sat, 30 Aug 2003 09:39:34 +0000 (09:39 +0000)
12 files changed:
includes/Setup.php
includes/Utf8Case.php
languages/Language.php
languages/LanguageAr.php
languages/LanguageEo.php
languages/LanguageHe.php
languages/LanguageHi.php
languages/LanguageHu.php
languages/LanguageJa.php
languages/LanguageKo.php
languages/LanguageMl.php
languages/LanguageUtf8.php [new file with mode: 0644]

index 3974d36..039d72c 100644 (file)
@@ -7,7 +7,6 @@
 
 global $IP;
 include_once( "$IP/GlobalFunctions.php" );
-include_once( "$IP/Language.php" );
 include_once( "$IP/Namespace.php" );
 include_once( "$IP/Skin.php" );
 include_once( "$IP/OutputPage.php" );
@@ -17,7 +16,7 @@ include_once( "$IP/User.php" );
 include_once( "$IP/LinkCache.php" );
 include_once( "$IP/Title.php" );
 include_once( "$IP/Article.php" );
-require( "$IP/MemCachedClient.inc.php" );
+include_once( "$IP/MemCachedClient.inc.php" );
 
 wfDebug( "\n\n" );
 
@@ -37,10 +36,12 @@ if( $wgUseMemCached ) {
        $wgMemc->set_debug( $wgMemCachedDebug );
 }
 
+include_once( "$IP/Language.php" );
+
 $wgOut = new OutputPage();
 $wgLangClass = "Language" . ucfirst( $wgLanguageCode );
 if( ! class_exists( $wgLangClass ) ) {
-       include_once( "$IP/Utf8Case.php" );
+       include_once( "$IP/LanguageUtf8.php" );
        $wgLangClass = "LanguageUtf8";
 }
 $wgLang = new $wgLangClass();
index bcf4eeb..996070a 100644 (file)
@@ -1,12 +1,11 @@
 <?
-$wgInputEncoding    = "utf-8";
-$wgOutputEncoding      = "utf-8";
-
 # Simple 1:1 upper/lowercase switching arrays for utf-8 text
 # Won't get context-sensitive things yet
 
 # Hack for bugs in ucfirst() and company
-# TODO: store this in shared memory or something
+
+# These are pulled from memcached if possible, as this is faster than filling
+# up a big array manually. See also languages/LanguageUtf8.php
 
 $wikiUpperChars = array(
        "a" => "A",
@@ -1494,49 +1493,4 @@ $wikiLowerChars = array (
        "\xf0\x90\x90\xa5" => "\xf0\x90\x91\x8d"
 );
 
-# Base stuff useful to all UTF-8 based language files
-class LanguageUtf8 extends Language {
-
-       function ucfirst( $string ) {
-               # For most languages, this is a wrapper for ucfirst()
-               # But that doesn't work right in a UTF-8 locale
-               global $wikiUpperChars, $wikiLowerChars;
-               return preg_replace (
-               "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-               "strtr ( \"\$1\" , \$wikiUpperChars )",
-               $string );
-       }
-       
-       function stripForSearch( $string ) {
-               # MySQL fulltext index doesn't grok utf-8, so we
-               # need to fold cases and convert to hex
-               global $wikiLowerChars;
-               return preg_replace(
-                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
-                 $string );
-       }
-
-       function fallback8bitEncoding() {
-               # Windows codepage 1252 is a superset of iso 8859-1
-               # override this to use difference source encoding to
-               # translate incoming 8-bit URLs.
-               return "windows-1252";
-       }
-
-       function checkTitleEncoding( $s ) {
-               global $wgInputEncoding;
-
-               # Check for non-UTF-8 URLs
-               $ishigh = preg_match( '/[\x80-\xff]/', $s);
-               if(!$ishigh) return $s;
-               
-               $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
-                '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
-               if( $isutf8 ) return $s;
-
-               return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
-       }
-}
-
 ?>
\ No newline at end of file
index eb5699d..f93afd2 100644 (file)
@@ -276,7 +276,7 @@ this</a> (alternative: like this<a href=\"\" class=\"internal\">?</a>).",
        "Recentchangeslinked" => "",
        "Movepage"              => "",
        "Booksources"   => "External book sources",
-"Categories" => "Page categories",
+"Categories" => "Page categories"
 );
 
 /* private */ $wgSysopSpecialPagesEn = array(
index 59ffc48..7cca4f6 100644 (file)
@@ -1,6 +1,6 @@
 <?
 # See language.doc
-include_once("Utf8Case.php");
+include_once("LanguageUtf8.php");
 
 class LanguageAr extends LanguageUtf8 {
        # TODO: TRANSLATION!
index 8f26d96..e32386f 100644 (file)
@@ -1,7 +1,5 @@
 <?
-include("Utf8Case.php");
-$wgInputEncoding       = "utf-8";
-$wgOutputEncoding      = "utf-8";
+include("LanguageUtf8.php");
 $wgEditEncoding                = "x";
 
 # See language.doc
index fe1ef23..775aa04 100644 (file)
@@ -1,6 +1,6 @@
 <?
 
-include_once("Utf8Case.php");
+include_once("LanguageUtf8.php");
 
 # NOTE: To turn off "Current Events" in the sidebar,
 # set "currentevents" => "-"
index 2d94bee..7bb997c 100644 (file)
@@ -1,6 +1,6 @@
 <?
 
-include( "Utf8Case.php" );
+include( "LanguageUtf8.php" );
 
 # NOTE: To turn off "Current Events" in the sidebar,
 # set "currentevents" => "-"
index 2ade066..009a0d4 100644 (file)
@@ -1,6 +1,6 @@
 <?
 
-include_once("Utf8Case.php");
+include_once("LanguageUtf8.php");
 
 # NOTE: To turn off "Current Events" in the sidebar,
 # set "currentevents" => "-"
index ce09f9d..7e45e5a 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 global $IP;
-include_once( "$IP/Utf8Case.php" );
+include_once( "$IP/LanguageUtf8.php" );
 
 # The names of the namespaces can be set here, but the numbers
 # are magical, so don't change or move them!  The Namespace class
index 7294029..5da5bda 100644 (file)
@@ -1,6 +1,6 @@
 <?
 global $IP;
-include_once( "$IP/Utf8Case.php" );
+include_once( "$IP/LanguageUtf8.php" );
 
 # The names of the namespaces can be set here, but the numbers
 # are magical, so don't change or move them!  The Namespace class
index 1da1733..27be655 100644 (file)
@@ -1,7 +1,7 @@
 <?
 # See language.doc
 
-include_once( "Utf8Case.php" );
+include_once( "LanguageUtf8.php" );
 
 class LanguageMl extends LanguageUtf8 {
        # Inherit everything
diff --git a/languages/LanguageUtf8.php b/languages/LanguageUtf8.php
new file mode 100644 (file)
index 0000000..5106512
--- /dev/null
@@ -0,0 +1,60 @@
+<?
+
+$wgInputEncoding    = "utf-8";
+$wgOutputEncoding      = "utf-8";
+
+$wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
+$wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
+
+if(($wikiUpperChars === false) || ($wikiLowerChars === false)) {
+       include_once("$IP/Utf8Case.php");
+       $wgMemc->set( $key1, $wikiUpperChars );
+       $wgMemc->set( $key2, $wikiLowerChars );
+}
+
+# Base stuff useful to all UTF-8 based language files
+class LanguageUtf8 extends Language {
+
+       function ucfirst( $string ) {
+               # For most languages, this is a wrapper for ucfirst()
+               # But that doesn't work right in a UTF-8 locale
+               global $wikiUpperChars, $wikiLowerChars;
+               return preg_replace (
+               "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+               "strtr ( \"\$1\" , \$wikiUpperChars )",
+               $string );
+       }
+       
+       function stripForSearch( $string ) {
+               # MySQL fulltext index doesn't grok utf-8, so we
+               # need to fold cases and convert to hex
+               global $wikiLowerChars;
+               return preg_replace(
+                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
+                 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
+                 $string );
+       }
+
+       function fallback8bitEncoding() {
+               # Windows codepage 1252 is a superset of iso 8859-1
+               # override this to use difference source encoding to
+               # translate incoming 8-bit URLs.
+               return "windows-1252";
+       }
+
+       function checkTitleEncoding( $s ) {
+               global $wgInputEncoding;
+
+               # Check for non-UTF-8 URLs
+               $ishigh = preg_match( '/[\x80-\xff]/', $s);
+               if(!$ishigh) return $s;
+               
+               $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
+                '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
+               if( $isutf8 ) return $s;
+
+               return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
+       }
+}
+
+?>