X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=d4a0b10eb0f272d502ad4a4e46b68958345f4f54;hb=0ea2bcdbeaa453f00324c14f90b71f545e1d706c;hp=8b066fef8b3b5cb3c11c464b0e513fc977901496;hpb=62711a5d850a868cf2e536a0f9e0e8e09015f17f;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 8b066fef8b..d4a0b10eb0 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -18,6 +18,8 @@ class LanguageConverter { var $mCacheKey; var $mLangObj; var $mMarkup; + var $mFlags; + var $mUcfirst = false; /** * Constructor * @@ -25,22 +27,25 @@ class LanguageConverter { * @param array $variants the supported variants of this language * @param array $variantfallback the fallback language of each variant * @param array $markup array defining the markup used for manual conversion + * @param array $flags array defining the custom strings that maps to the flags * @access public */ function LanguageConverter($langobj, $maincode, $variants=array(), $variantfallbacks=array(), - $markup=array('begin'=>'-{', - 'codesep'=>':', - 'varsep'=>';', - 'end'=>'}-')) { + $markup=array(), + $flags = array()) { global $wgDBname; $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; $this->mVariants = $variants; $this->mVariantFallbacks = $variantfallbacks; $this->mCacheKey = $wgDBname . ":conversiontables"; - $this->mMarkup = $markup; + $m = array('begin'=>'-{', 'flagsep'=>'|', 'codesep'=>':', + 'varsep'=>';', 'end'=>'}-'); + $this->mMarkup = array_merge($m, $markup); + $f = array('A'=>'A', 'T'=>'T'); + $this->mFlags = array_merge($f, $flags); } /** @@ -87,23 +92,26 @@ class LanguageConverter { // get language variant preference from logged in users if(is_object($wgUser) && $wgUser->isLoggedIn() ) { $this->mPreferredVariant = $wgUser->getOption('variant'); + return $this->mPreferredVariant; } # FIXME rewrite code for parsing http header. The current code # is written specific for detecting zh- variants if( !$this->mPreferredVariant ) { - // see if some zh- variant is set in the http header, - $this->mPreferredVariant=$this->mMainLanguageCode; + // see if some supported language variant is set in the + // http header, but we don't set the mPreferredVariant + // variable in case this is called before the user's + // preference is loaded + $pv=$this->mMainLanguageCode; if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) { $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"])); $zh = strstr($header, 'zh-'); if($zh) { - $this->mPreferredVariant = substr($zh,0,5); + $pv = substr($zh,0,5); } } + return $pv; } - - return $this->mPreferredVariant; } /** @@ -116,6 +124,7 @@ class LanguageConverter { */ function autoConvert($text, $toVariant=false) { $fname="LanguageConverter::autoConvert"; + wfProfileIn( $fname ); if(!$this->mTablesLoaded) @@ -126,24 +135,19 @@ class LanguageConverter { if(!in_array($toVariant, $this->mVariants)) return $text; - $ret = ''; - $a = explode('<', $text); - $a0 = array_shift($a); - $ret .= strtr($a0, $this->mTables[$toVariant]); - foreach( $a as $aa ) { - $b = explode('>', $aa, 2); - $ret .= '<' . $b[0]; - if(sizeof($b) == 2) - $ret .= '>' . strtr($b[1], $this->mTables[$toVariant]); - } + $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;|'.UNIQ_PREFIX.'-[a-zA-Z0-9]+/'; + $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); + -# /* put back the marker if any */ -# if(!empty($reg)) { -# $reg = '<'.$reg.'>'; -# $ret = preg_replace('/'.$reg.'/', '${1}', $ret); -# } -# + $m = array_shift($matches); + $ret = strtr($m[0], $this->mTables[$toVariant]); + $mstart = $m[1]+strlen($m[0]); + foreach($matches as $m) { + $ret .= substr($text, $mstart, $m[1]-$mstart); + $ret .= strtr($m[0], $this->mTables[$toVariant]); + $mstart = $m[1] + strlen($m[0]); + } wfProfileOut( $fname ); return $ret; } @@ -186,6 +190,13 @@ class LanguageConverter { */ function convert( $text , $isTitle=false) { global $wgDisableLangConversion; + global $wgTitle; + + /* don't do anything if this is the conversion table */ + if($wgTitle->getNamespace() == NS_MEDIAWIKI && + strpos($wgTitle->getText(), "Conversiontable")!==false) + return $text; + if($wgDisableLangConversion) return $text; @@ -226,101 +237,77 @@ class LanguageConverter { if( !$this->mDoContentConvert ) return $text; - $search = array('/('.UNIQ_PREFIX.'-[a-zA-Z0-9]+)/', //nowiki marker - '/(&[a-z#][a-z0-9]+;)/', //html entities - ); - $replace = $this->mMarkup['begin'].'${1}'.$this->mMarkup['end']; - - $text = preg_replace($search, $replace, $text); - $plang = $this->getPreferredVariant(); $fallback = $this->mVariantFallbacks[$plang]; + $tarray = explode($this->mMarkup['begin'], $text); $tfirst = array_shift($tarray); $text = $this->autoConvert($tfirst); foreach($tarray as $txt) { $marked = explode($this->mMarkup['end'], $txt); - - //strip   since it interferes with the parsing, plus, - //all spaces should be stripped in this tag anyway. - $marked[0] = str_replace(' ', '', $marked[0]); - - /* see if this conversion has special meaning - # for article title: - -{T|zh-cn:foo;zh-tw:bar}- - # convert all occurence of foo/bar in this article: - -{A|zh-cn:foo;zh-tw:bar}- - */ - $flag = ''; - $choice = false; - $tt = explode("|", $marked[0], 2); + $flags = array(); + $tt = explode($this->mMarkup['flagsep'], $marked[0], 2); + if(sizeof($tt) == 2) { - $flag = trim($tt[0]); - $choice = explode(";", $tt[1]); + $f = explode($this->mMarkup['varsep'], $tt[0]); + foreach($f as $ff) { + $ff = trim($ff); + if(array_key_exists($ff, $this->mFlags) && + !array_key_exists($this->mFlags[$ff], $flags)) + $flags[] = $this->mFlags[$ff]; + } + $rules = $tt[1]; } + else + $rules = $marked[0]; - if(!$choice) { - $choice = explode($this->mMarkup['varsep'], $marked[0]); - } - $disp = ''; - $carray = array(); - if(!array_key_exists(1, $choice)) { - /* a single choice */ - $disp = $choice[0]; +#FIXME: may cause trouble here... + //strip   since it interferes with the parsing, plus, + //all spaces should be stripped in this tag anyway. + $rules = str_replace(' ', '', $rules); - /* fill the carray if the conversion is for the whole article*/ - if($flag == 'A') { - foreach($this->mVariants as $v) { - $carray[$v] = $disp; - } - } - } - else { - foreach($choice as $c) { - $v = explode($this->mMarkup['codesep'], $c); - if(sizeof($v) != 2) // syntax error, skip - continue; - $carray[trim($v[0])] = trim($v[1]); - } - if(array_key_exists($plang, $carray)) - $disp = $carray[$plang]; - else if(array_key_exists($fallback, $carray)) - $disp = $carray[$fallback]; - } - if(empty($disp)) { // syntax error - $text .= $marked[0]; - } - else { - if($flag == 'T') // for title only + $carray = $this->parseManualRule($rules, $flags); + $disp = ''; + if(array_key_exists($plang, $carray)) + $disp = $carray[$plang]; + else if(array_key_exists($fallback, $carray)) + $disp = $carray[$fallback]; + if($disp) { + if(in_array('T', $flags)) $this->mTitleDisplay = $disp; - else { + else $text .= $disp; - if($flag == 'A') { - /* modify the conversion table for this session*/ - - /* fill in the missing variants, if any, - with fallbacks */ - foreach($this->mVariants as $v) { - if(!array_key_exists($v, $carray)) { - $vf = $this->getVariantFallback($v); - if(array_key_exists($vf, $carray)) - $carray[$v] = $carray[$vf]; - } + + if(in_array('A', $flags)) { + /* modify the conversion table for this session*/ + + /* fill in the missing variants, if any, + with fallbacks */ + foreach($this->mVariants as $v) { + if(!array_key_exists($v, $carray)) { + $vf = $this->getVariantFallback($v); + if(array_key_exists($vf, $carray)) + $carray[$v] = $carray[$vf]; } - foreach($this->mVariants as $vfrom) { - if(!array_key_exists($vfrom, $carray)) + } + + foreach($this->mVariants as $vfrom) { + if(!array_key_exists($vfrom, $carray)) + continue; + foreach($this->mVariants as $vto) { + if($vfrom == $vto) + continue; + if(!array_key_exists($vto, $carray)) continue; - foreach($this->mVariants as $vto) { - if($vfrom == $vto) - continue; - if(!array_key_exists($vto, $carray)) - continue; - $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto]; - } + $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto]; + } } } } + else { + $text .= $marked[0]; + } if(array_key_exists(1, $marked)) $text .= $this->autoConvert($marked[1]); } @@ -328,6 +315,31 @@ class LanguageConverter { return $text; } + /** + * parse the manually marked conversion rule + * @param string $rule the text of the rule + * @return array of the translation in each variant + * @access private + */ + function parseManualRule($rules, $flags=array()) { + + $choice = explode($this->mMarkup['varsep'], $rules); + $carray = array(); + if(sizeof($choice) == 1) { + /* a single choice */ + foreach($this->mVariants as $v) + $carray[$v] = $choice[0]; + } + else { + foreach($choice as $c) { + $v = explode($this->mMarkup['codesep'], $c); + if(sizeof($v) != 2) // syntax error, skip + continue; + $carray[trim($v[0])] = trim($v[1]); + } + } + return $carray; + } /** * if a language supports multiple variants, it is @@ -345,7 +357,10 @@ class LanguageConverter { static $cache=array(); global $wgDisableLangConversion; $pref = $this->getPreferredVariant(); - if( $count > 50 ) + $ns=0; + if(is_object($nt)) + $ns = $nt->getNamespace(); + if( $count > 50 && $ns != NS_CATEGORY ) return; $count++; $variants = $this->autoConvertToAllVariants($link); @@ -358,7 +373,7 @@ class LanguageConverter { $varnt = Title::newFromText( $v ); if( $varnt && $varnt->getArticleID() > 0 ) { $nt = $varnt; - if( !$wgDisableLangConversion && $pref != 'zh' ) + if( !$wgDisableLangConversion ) $link = $v; break; } @@ -460,20 +475,6 @@ class LanguageConverter { */ function postLoadTables() {} - /* deprecated? */ - function updateTablexxxx($code, $table) { - global $wgMemc; - if(!$this->mTablesLoaded) - $this->loadTables(); - - $this->mTables[$code] = array_merge($this->mTables[$code], $table); - if($this->lockCache()) { - $wgMemc->delete($this->mCacheKey); - $wgMemc->set($this->mCacheKey, $this->mTables, 43200); - $this->unlockCache(); - } - } - /** * Reload the conversion tables * @@ -543,11 +544,11 @@ class LanguageConverter { // parse the mappings in this page - $blocks = explode('-{', $txt); + $blocks = explode($this->mMarkup['begin'], $txt); array_shift($blocks); $ret = array(); foreach($blocks as $block) { - $mappings = explode('}-', $block, 2); + $mappings = explode($this->mMarkup['end'], $block, 2); $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]); $table = explode( ';', $stripped ); foreach( $table as $t ) { @@ -569,6 +570,12 @@ class LanguageConverter { $ret = array_merge($ret, $s); } } + + if ($this->mUcfirst) { + foreach ($ret as $k => $v) { + $ret[LanguageUtf8::ucfirst($k)] = LanguageUtf8::ucfirst($v); + } + } return $ret; } @@ -580,10 +587,23 @@ class LanguageConverter { * @return string the tagged text */ function markNoConversion($text) { + # don't mark if already marked + if(strpos($text, $this->mMarkup['begin']) || + strpos($text, $this->mMarkup['end'])) + return $text; + $ret = $this->mMarkup['begin'] . $text . $this->mMarkup['end']; + return $ret; } /** + * convert the sorting key for category links. this should make different + * keys that are variants of each other map to the same key + */ + function convertCategoryKey( $key ) { + return $key; + } + /** * hook to refresh the cache of conversion tables when * MediaWiki:conversiontable* is updated * @access private