* (bug 1130) Reorder old title checks; use title null instead of empty
[lhc/web/wiklou.git] / languages / LanguageConverter.php
index 8b066fe..d4a0b10 100644 (file)
@@ -18,6 +18,8 @@ class LanguageConverter {
        var $mCacheKey;
        var $mLangObj;
        var $mMarkup;
+       var $mFlags;
+       var $mUcfirst = false;
        /**
      * Constructor
         *
@@ -25,22 +27,25 @@ class LanguageConverter {
      * @param array $variants the supported variants of this language
      * @param array $variantfallback the fallback language of each variant
      * @param array $markup array defining the markup used for manual conversion
+        * @param array $flags array defining the custom strings that maps to the flags
      * @access public
      */
        function LanguageConverter($langobj, $maincode, 
                                                                $variants=array(), 
                                                                $variantfallbacks=array(), 
-                                                               $markup=array('begin'=>'-{',
-                                                                                         'codesep'=>':',
-                                                                                         'varsep'=>';',
-                                                                                         'end'=>'}-')) {
+                                                               $markup=array(),
+                                                               $flags = array()) {
                global $wgDBname;
                $this->mLangObj = $langobj;
                $this->mMainLanguageCode = $maincode;
                $this->mVariants = $variants;           
                $this->mVariantFallbacks = $variantfallbacks;
                $this->mCacheKey = $wgDBname . ":conversiontables";
-               $this->mMarkup = $markup;
+               $m = array('begin'=>'-{', 'flagsep'=>'|', 'codesep'=>':',
+                                  'varsep'=>';', 'end'=>'}-');
+               $this->mMarkup = array_merge($m, $markup);
+               $f = array('A'=>'A', 'T'=>'T');
+               $this->mFlags = array_merge($f, $flags);
        }
 
        /**
@@ -87,23 +92,26 @@ class LanguageConverter {
                // get language variant preference from logged in users 
                if(is_object($wgUser) && $wgUser->isLoggedIn() )  {
                        $this->mPreferredVariant = $wgUser->getOption('variant');
+                       return $this->mPreferredVariant;
                }
 
                # FIXME rewrite code for parsing http header. The current code
                # is written specific for detecting zh- variants
                if( !$this->mPreferredVariant ) {
-                       // see if some zh- variant is set in the http header,
-                       $this->mPreferredVariant=$this->mMainLanguageCode;
+                       // see if some supported language variant is set in the
+                       // http header, but we don't set the mPreferredVariant
+                       // variable in case this is called before the user's
+                       // preference is loaded
+                       $pv=$this->mMainLanguageCode;
                        if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
                                $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
                                $zh = strstr($header, 'zh-');
                                if($zh) {
-                                       $this->mPreferredVariant = substr($zh,0,5);
+                                       $pv = substr($zh,0,5);
                                }
                        }
+                       return $pv;
                }
-
-               return $this->mPreferredVariant;
        }
 
        /**
@@ -116,6 +124,7 @@ class LanguageConverter {
      */
        function autoConvert($text, $toVariant=false) {
                $fname="LanguageConverter::autoConvert";
+
                wfProfileIn( $fname );
 
                if(!$this->mTablesLoaded)
@@ -126,24 +135,19 @@ class LanguageConverter {
                if(!in_array($toVariant, $this->mVariants))
                        return $text;
 
-               $ret = '';
 
-               $a = explode('<', $text);
-               $a0 = array_shift($a);
-               $ret .= strtr($a0, $this->mTables[$toVariant]);
-               foreach( $a as $aa ) {
-                       $b = explode('>', $aa, 2);
-                       $ret .= '<' . $b[0];
-                       if(sizeof($b) == 2)
-                               $ret .= '>' . strtr($b[1], $this->mTables[$toVariant]);
-               }
+               $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;|'.UNIQ_PREFIX.'-[a-zA-Z0-9]+/';
+               $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
+
 
-#              /* put back the marker if any */
-#              if(!empty($reg)) {
-#                      $reg = '<'.$reg.'>';
-#                      $ret = preg_replace('/'.$reg.'/', '${1}', $ret);
-#              }
-#
+               $m = array_shift($matches);
+               $ret = strtr($m[0], $this->mTables[$toVariant]);
+               $mstart = $m[1]+strlen($m[0]);
+               foreach($matches as $m) {
+                       $ret .= substr($text, $mstart, $m[1]-$mstart);
+                       $ret .= strtr($m[0], $this->mTables[$toVariant]);
+                       $mstart = $m[1] + strlen($m[0]);
+               }               
                wfProfileOut( $fname );
                return $ret;
        }
@@ -186,6 +190,13 @@ class LanguageConverter {
      */
        function convert( $text , $isTitle=false) {
                global $wgDisableLangConversion;
+               global $wgTitle;
+
+               /* don't do anything if this is the conversion table */
+               if($wgTitle->getNamespace() == NS_MEDIAWIKI &&
+                  strpos($wgTitle->getText(), "Conversiontable")!==false)
+                       return $text;
+
                if($wgDisableLangConversion)
                        return $text; 
 
@@ -226,101 +237,77 @@ class LanguageConverter {
                if( !$this->mDoContentConvert )
                        return $text;
 
-               $search = array('/('.UNIQ_PREFIX.'-[a-zA-Z0-9]+)/', //nowiki marker
-                                       '/(&[a-z#][a-z0-9]+;)/', //html entities
-                        );
-               $replace = $this->mMarkup['begin'].'${1}'.$this->mMarkup['end'];
-
-               $text = preg_replace($search, $replace, $text);
-
                $plang = $this->getPreferredVariant();
                $fallback = $this->mVariantFallbacks[$plang];
+
                $tarray = explode($this->mMarkup['begin'], $text);
                $tfirst = array_shift($tarray);
                $text = $this->autoConvert($tfirst);
                foreach($tarray as $txt) {
                        $marked = explode($this->mMarkup['end'], $txt);
-               
-                       //strip &nbsp; since it interferes with the parsing, plus,
-                       //all spaces should be stripped in this tag anyway.
-                       $marked[0] = str_replace('&nbsp;', '', $marked[0]);
-
-                       /* see if this conversion has special meaning
-                          # for article title:
-                                -{T|zh-cn:foo;zh-tw:bar}-
-                          # convert all occurence of foo/bar in this article:
-                                -{A|zh-cn:foo;zh-tw:bar}-
-                       */
-                       $flag = '';
-                       $choice = false;
-                       $tt = explode("|", $marked[0], 2);
+                       $flags = array();
+                       $tt = explode($this->mMarkup['flagsep'], $marked[0], 2);
+
                        if(sizeof($tt) == 2) {
-                               $flag = trim($tt[0]);
-                               $choice = explode(";", $tt[1]);
+                               $f = explode($this->mMarkup['varsep'], $tt[0]);
+                               foreach($f as $ff) {
+                                       $ff = trim($ff);
+                                       if(array_key_exists($ff, $this->mFlags) &&
+                                               !array_key_exists($this->mFlags[$ff], $flags))
+                                               $flags[] = $this->mFlags[$ff];
+                               }
+                               $rules = $tt[1];
                        }
+                       else
+                               $rules = $marked[0];
 
-                       if(!$choice) {
-                               $choice = explode($this->mMarkup['varsep'], $marked[0]);
-                       }
-                       $disp = '';
-                       $carray = array();
-                       if(!array_key_exists(1, $choice)) {
-                               /* a single choice */
-                               $disp = $choice[0];
+#FIXME: may cause trouble here...
+                       //strip &nbsp; since it interferes with the parsing, plus,
+                       //all spaces should be stripped in this tag anyway.
+                       $rules = str_replace('&nbsp;', '', $rules);
 
-                               /* fill the carray if the conversion is for the whole article*/
-                               if($flag == 'A') {
-                                       foreach($this->mVariants as $v) {
-                                               $carray[$v] = $disp;
-                                       }
-                               }
-                       } 
-                       else {
-                               foreach($choice as $c) {
-                                       $v = explode($this->mMarkup['codesep'], $c);
-                                       if(sizeof($v) != 2) // syntax error, skip
-                                               continue;
-                                       $carray[trim($v[0])] = trim($v[1]);
-                               }
-                               if(array_key_exists($plang, $carray))
-                                       $disp = $carray[$plang];
-                               else if(array_key_exists($fallback, $carray))
-                                       $disp = $carray[$fallback];
-                       }
-                       if(empty($disp)) { // syntax error
-                               $text .= $marked[0];
-                       }
-                       else {  
-                               if($flag == 'T') // for title only
+                       $carray = $this->parseManualRule($rules, $flags);
+                       $disp = '';
+                       if(array_key_exists($plang, $carray))
+                               $disp = $carray[$plang];
+                       else if(array_key_exists($fallback, $carray))
+                               $disp = $carray[$fallback];
+                       if($disp) {
+                               if(in_array('T',  $flags))
                                        $this->mTitleDisplay = $disp;
-                               else {
+                               else
                                        $text .= $disp;
-                                       if($flag == 'A') {
-                                               /* modify the conversion table for this session*/
-
-                                               /* fill in the missing variants, if any,
-                                                   with fallbacks */ 
-                                               foreach($this->mVariants as $v) {
-                                                       if(!array_key_exists($v, $carray)) {
-                                                               $vf = $this->getVariantFallback($v);
-                                                               if(array_key_exists($vf, $carray))
-                                                                       $carray[$v] = $carray[$vf];
-                                                       }
+
+                               if(in_array('A', $flags)) {
+                                       /* modify the conversion table for this session*/
+
+                                       /* fill in the missing variants, if any,
+                                           with fallbacks */ 
+                                       foreach($this->mVariants as $v) {
+                                               if(!array_key_exists($v, $carray)) {
+                                                       $vf = $this->getVariantFallback($v);
+                                                       if(array_key_exists($vf, $carray))
+                                                               $carray[$v] = $carray[$vf];
                                                }
-                                               foreach($this->mVariants as $vfrom) {
-                                                       if(!array_key_exists($vfrom, $carray))
+                                       }
+
+                                       foreach($this->mVariants as $vfrom) {
+                                               if(!array_key_exists($vfrom, $carray))
+                                                       continue;
+                                               foreach($this->mVariants as $vto) {
+                                                       if($vfrom == $vto)
+                                                               continue;
+                                                       if(!array_key_exists($vto, $carray))
                                                                continue;
-                                                       foreach($this->mVariants as $vto) {
-                                                               if($vfrom == $vto)
-                                                                       continue;
-                                                               if(!array_key_exists($vto, $carray))
-                                                                       continue;
-                                                               $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto];
-                                                       }
+                                                       $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto];
+
                                                }
                                        }
                                }
                        }
+                       else {
+                               $text .= $marked[0];
+                       }
                        if(array_key_exists(1, $marked))
                                $text .= $this->autoConvert($marked[1]);
                }
@@ -328,6 +315,31 @@ class LanguageConverter {
                return $text;
        }
 
+       /**
+        * parse the manually marked conversion rule
+        * @param string $rule the text of the rule
+        * @return array of the translation in each variant
+        * @access private
+        */
+       function parseManualRule($rules, $flags=array()) {
+
+               $choice = explode($this->mMarkup['varsep'], $rules);
+               $carray = array();
+               if(sizeof($choice) == 1) {
+                       /* a single choice */
+                       foreach($this->mVariants as $v)
+                               $carray[$v] = $choice[0];
+               }
+               else {
+                       foreach($choice as $c) {
+                               $v = explode($this->mMarkup['codesep'], $c);
+                               if(sizeof($v) != 2) // syntax error, skip
+                                       continue;
+                               $carray[trim($v[0])] = trim($v[1]);
+                       }
+               }
+               return $carray;
+       }
 
        /**
         * if a language supports multiple variants, it is
@@ -345,7 +357,10 @@ class LanguageConverter {
                static $cache=array();
                global $wgDisableLangConversion;
                $pref = $this->getPreferredVariant();
-               if( $count > 50 )
+               $ns=0;
+               if(is_object($nt))
+                       $ns = $nt->getNamespace();
+               if( $count > 50 && $ns != NS_CATEGORY )
                        return;
                $count++;
                $variants = $this->autoConvertToAllVariants($link);
@@ -358,7 +373,7 @@ class LanguageConverter {
                        $varnt = Title::newFromText( $v );
                        if( $varnt && $varnt->getArticleID() > 0 ) {
                                $nt = $varnt;
-                               if( !$wgDisableLangConversion && $pref != 'zh' )
+                               if( !$wgDisableLangConversion )
                                        $link = $v;
                                break;
                        }
@@ -460,20 +475,6 @@ class LanguageConverter {
      */
        function postLoadTables() {}
 
-       /* deprecated? */
-       function updateTablexxxx($code, $table) {
-               global $wgMemc;
-               if(!$this->mTablesLoaded)
-                       $this->loadTables();
-
-               $this->mTables[$code] = array_merge($this->mTables[$code], $table);
-               if($this->lockCache()) {
-                       $wgMemc->delete($this->mCacheKey);
-                       $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
-                       $this->unlockCache();
-               }
-       }
-
     /**
      * Reload the conversion tables
      * 
@@ -543,11 +544,11 @@ class LanguageConverter {
 
 
                // parse the mappings in this page
-               $blocks = explode('-{', $txt);
+               $blocks = explode($this->mMarkup['begin'], $txt);
                array_shift($blocks);
                $ret = array(); 
                foreach($blocks as $block) {
-                       $mappings = explode('}-', $block, 2);
+                       $mappings = explode($this->mMarkup['end'], $block, 2);
                        $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
                        $table = explode( ';', $stripped );
                        foreach( $table as $t ) {
@@ -569,6 +570,12 @@ class LanguageConverter {
                                $ret = array_merge($ret, $s);
                        }
                }
+               
+               if ($this->mUcfirst) {
+                       foreach ($ret as $k => $v) {
+                               $ret[LanguageUtf8::ucfirst($k)] = LanguageUtf8::ucfirst($v);
+                       }
+               }
                return $ret;
        }
 
@@ -580,10 +587,23 @@ class LanguageConverter {
         * @return string the tagged text
        */
        function markNoConversion($text) {
+               # don't mark if already marked
+               if(strpos($text, $this->mMarkup['begin']) ||
+                  strpos($text, $this->mMarkup['end']))
+                       return $text;
+
                $ret = $this->mMarkup['begin'] . $text . $this->mMarkup['end'];
+               return $ret;
        }
 
        /**
+        * convert the sorting key for category links. this should make different 
+        * keys that are variants of each other map to the same key
+       */
+       function convertCategoryKey( $key ) {
+               return $key;
+       }
+       /**
      * hook to refresh the cache of conversion tables when 
      * MediaWiki:conversiontable* is updated
      * @access private