Localisation updates. Import from tl.wikipedia.org.

[lhc/web/wiklou.git] / languages / Language.php
diff --git a/languages/Language.php b/languages/Language.php

index 4c878d0..b238c5f 100644 (file)
--- a/languages/Language.php
+++ b/languages/Language.php
@@ -74,6 +74,7 @@ class Language {
         static public $mMergeableAliasListKeys = array( 'specialPageAliases' );
  
         static public $mLocalisationCache = array();
+       static public $mLangObjCache = array();
  
         static public $mWeekdayMsgs = array(
                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
@@ -130,12 +131,25 @@ class Language {
         );
  
         /**
-        * Create a language object for a given language code
+        * Get a cached language object for a given language code
          */
         static function factory( $code ) {
+               if ( !isset( self::$mLangObjCache[$code] ) ) {
+                       if( count( self::$mLangObjCache ) > 10 ) {
+                               // Don't keep a billion objects around, that's stupid.
+                               self::$mLangObjCache = array();
+                       }
+                       self::$mLangObjCache[$code] = self::newFromCode( $code );
+               }
+               return self::$mLangObjCache[$code];
+       }
+
+       /**
+        * Create a language object for a given language code
+        */
+       protected static function newFromCode( $code ) {
                 global $IP;
                 static $recursionLevel = 0;
-
                 if ( $code == 'en' ) {
                         $class = 'Language';
                 } else {
@@ -156,13 +170,12 @@ class Language {
                 if( ! class_exists( $class ) ) {
                         $fallback = Language::getFallbackFor( $code );
                         ++$recursionLevel;
-                       $lang = Language::factory( $fallback );
+                       $lang = Language::newFromCode( $fallback );
                         --$recursionLevel;
                         $lang->setCode( $code );
                 } else {
                         $lang = new $class;
                 }
-
                 return $lang;
         }
  
@@ -402,21 +415,13 @@ class Language {
         }
  
         /**
-        * Ugly hack to get a message maybe from the MediaWiki namespace, if this
-        * language object is the content or user language.
+        * Get a message from the MediaWiki namespace.
+        *
+        * @param $msg String: message name
+        * @return string
          */
         function getMessageFromDB( $msg ) {
-               global $wgContLang, $wgLang;
-               if ( $wgContLang->getCode() == $this->getCode() ) {
-                       # Content language
-                       return wfMsgForContent( $msg );
-               } elseif ( $wgLang->getCode() == $this->getCode() ) {
-                       # User language
-                       return wfMsg( $msg );
-               } else {
-                       # Neither, get from localisation
-                       return $this->getMessage( $msg );
-               }
+               return wfMsgExt( $msg, array( 'parsemag', 'language' => $this ) );
         }
  
         function getLanguageName( $code ) {
@@ -1522,25 +1527,72 @@ class Language {
                         return $string;
                 }
  
-               # MySQL fulltext index doesn't grok utf-8, so we
-               # need to fold cases and convert to hex
  
                 wfProfileIn( __METHOD__ );
-               if( function_exists( 'mb_strtolower' ) ) {
-                       $out = preg_replace(
-                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                               "'U8' . bin2hex( \"$1\" )",
-                               mb_strtolower( $string ) );
-               } else {
-                       list( , $wikiLowerChars ) = self::getCaseMaps();
+               
+               // MySQL fulltext index doesn't grok utf-8, so we
+               // need to fold cases and convert to hex
+               $out = preg_replace_callback(
+                       "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
+                       array( $this, 'stripForSearchCallback' ),
+                       $this->lc( $string ) );
+               
+               // And to add insult to injury, the default indexing
+               // ignores short words... Pad them so we can pass them
+               // through without reconfiguring the server...
+               $minLength = $this->minSearchLength();
+               if( $minLength > 1 ) {
+                       $n = $minLength-1;
                         $out = preg_replace(
-                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                               "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
-                               $string );
+                               "/\b(\w{1,$n})\b/",
+                               "$1U800",
+                               $out );
                 }
+               
+               // Periods within things like hostnames and IP addresses
+               // are also important -- we want a search for "example.com"
+               // or "192.168.1.1" to work sanely.
+               //
+               // MySQL's search seems to ignore them, so you'd match on
+               // "example.wikipedia.com" and "192.168.83.1" as well.
+               $out = preg_replace(
+                       "/(\w)\.(\w|\*)/u",
+                       "$1U82e$2",
+                       $out );
+               
                 wfProfileOut( __METHOD__ );
                 return $out;
         }
+       
+       /**
+        * Armor a case-folded UTF-8 string to get through MySQL's
+        * fulltext search without being mucked up by funny charset
+        * settings or anything else of the sort.
+        */
+       protected function stripForSearchCallback( $matches ) {
+               return 'U8' . bin2hex( $matches[1] );
+       }
+       
+       /**
+        * Check MySQL server's ft_min_word_len setting so we know
+        * if we need to pad short words...
+        */
+       protected function minSearchLength() {
+               if( !isset( $this->minSearchLength ) ) {
+                       $sql = "show global variables like 'ft\\_min\\_word\\_len'";
+                       $dbr = wfGetDB( DB_SLAVE );
+                       $result = $dbr->query( $sql );
+                       $row = $result->fetchObject();
+                       $result->free();
+                       
+                       if( $row && $row->Variable_name == 'ft_min_word_len' ) {
+                               $this->minSearchLength = intval( $row->Value );
+                       } else {
+                               $this->minSearchLength = 0;
+                       }
+               }
+               return $this->minSearchLength;
+       }
  
         function convertForSearchResult( $termsArray ) {
                 # some languages, e.g. Chinese, need to do a conversion
@@ -1769,7 +1821,7 @@ class Language {
                                         $aliases[$code] = $this->fixSpecialPageAliases( $aliases[$code] );
                                         /* Merge the aliases, THIS will break if there is special page name
                                         * which looks like a numerical key, thanks to PHP...
-                                       * See the comments for wfArrayMerge in GlobalSettings.php. */
+                                       * See the array_merge_recursive manual entry */
                                         $this->mExtendedSpecialPageAliases = array_merge_recursive(
                                                 $this->mExtendedSpecialPageAliases, $aliases[$code] );
  
@@ -1821,7 +1873,8 @@ class Language {
           * </code>
           *
           * See LanguageGu.php for the Gujarati implementation and
-         * LanguageIs.php for the , => . and . => , implementation.
+         * $separatorTransformTable on MessageIs.php for
+         * the , => . and . => , implementation.
           *
           * @todo check if it's viable to use localeconv() for the decimal
           *       separator thing.
@@ -1891,9 +1944,9 @@ class Language {
                         if ($i == $m) {
                                 $s = $l[$i];
                         } else if ($i == $m - 1) {
-                               $s = $l[$i] . ' ' . $this->getMessageFromDB( 'and' ) . ' ' . $s;
+                               $s = $l[$i] . $this->getMessageFromDB( 'and' ) . $this->getMessageFromDB( 'word-separator' ) . $s;
                         } else {
-                               $s = $l[$i] . ', ' . $s;
+                               $s = $l[$i] . $this->getMessageFromDB( 'comma-separator' ) . $s;
                         }
                 }
                 return $s;
@@ -1903,33 +1956,23 @@ class Language {
          * Take a list of strings and build a locale-friendly comma-separated
          * list, using the local comma-separator message.
          * @param $list array of strings to put in a comma list
-        * @param $forContent bool Use $wgContentLang instead of the UI lang
          * @return string
          */
         function commaList( $list, $forContent = false ) {
-               $params = array( 'escapenoentities' );
-               if ( $forContent === true ) {
-                       $params[] = 'content';
-               }
                 return implode(
                         $list,
-                       wfMsgExt( 'comma-separator', $params ) );
+                       wfMsgExt( 'comma-separator', array( 'escapenoentities', 'language' => $this ) ) );
         }
         
         /**
          * Same as commaList, but separate it with the pipe instead.
          * @param $list array of strings to put in a pipe list
-        * @param $forContent bool Use $wgContentLang instead of the UI lang
          * @return string
          */
-       function pipeList( $list, $forContent = false ) {
-               $params = array( 'escapenoentities' );
-               if ( $forContent === true ) {
-                       $params[] = 'content';
-               }
+       function pipeList( $list ) {
                 return implode(
                         $list,
-                       wfMsgExt( 'pipe-separator', $params ) );
+                       wfMsgExt( 'pipe-separator', array( 'escapenoentities', 'language' => $this ) ) );
         }
  
         /**
@@ -2240,7 +2283,7 @@ class Language {
          */
         static function loadLocalisation( $code, $disableCache = false ) {
                 static $recursionGuard = array();
-               global $wgMemc, $wgCheckSerialized;
+               global $wgMemc, $wgEnableSerializedMessages, $wgCheckSerialized;
  
                 if ( !$code ) {
                         throw new MWException( "Invalid language code requested" );
@@ -2255,16 +2298,18 @@ class Language {
                         wfProfileIn( __METHOD__ );
  
                         # Try the serialized directory
-                       $cache = wfGetPrecompiledData( self::getFileName( "Messages", $code, '.ser' ) );
-                       if ( $cache ) {
-                               if ( $wgCheckSerialized && self::isLocalisationOutOfDate( $cache ) ) {
-                                       $cache = false;
-                                       wfDebug( "Language::loadLocalisation(): precompiled data file for $code is out of date\n" );
-                               } else {
-                                       self::$mLocalisationCache[$code] = $cache;
-                                       wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
-                                       wfProfileOut( __METHOD__ );
-                                       return self::$mLocalisationCache[$code]['deps'];
+                       if( $wgEnableSerializedMessages ) {
+                               $cache = wfGetPrecompiledData( self::getFileName( "Messages", $code, '.ser' ) );
+                               if ( $cache ) {
+                                       if ( $wgCheckSerialized && self::isLocalisationOutOfDate( $cache ) ) {
+                                               $cache = false;
+                                               wfDebug( "Language::loadLocalisation(): precompiled data file for $code is out of date\n" );
+                                       } else {
+                                               self::$mLocalisationCache[$code] = $cache;
+                                               wfDebug( "Language::loadLocalisation(): got localisation for $code from precompiled data file\n" );
+                                               wfProfileOut( __METHOD__ );
+                                               return self::$mLocalisationCache[$code]['deps'];
+                                       }
                                 }
                         }