X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=b54fcbc4a829d8ab6ef64d971819f87e3ed96bab;hb=dc07a4cae2e76e9b31cfc762794cbcce07c84a19;hp=86b9da9b15aea166488df1f143ba0ca1cdeab33b;hpb=81549b37eae59bededf6286019e9a4374349db21;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 86b9da9b15..b54fcbc4a8 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -21,10 +21,7 @@ class LanguageConverter { var $mVariants, $mVariantFallbacks, $mVariantNames; var $mTablesLoaded = false; var $mTables; - var $mManualAddTables; - var $mManualRemoveTables; var $mNamespaceTables; - var $mTitleDisplay=''; var $mDoTitleConvert=true, $mDoContentConvert=true; var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants var $mTitleFromFlag = false; @@ -34,6 +31,8 @@ class LanguageConverter { var $mFlags; var $mDescCodeSep = ':',$mDescVarSep = ';'; var $mUcfirst = false; + var $mTitleOriginal = ''; + var $mTitleDisplay = ''; const CACHE_VERSION_KEY = 'VERSION 6'; @@ -48,7 +47,7 @@ class LanguageConverter { * @param array $manualLevel limit for supported variants * @public */ - function __construct($langobj, $maincode, + function __construct( $langobj, $maincode, $variants=array(), $variantfallbacks=array(), $markup=array(), @@ -56,7 +55,13 @@ class LanguageConverter { $manualLevel = array() ) { $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; - $this->mVariants = $variants; + + global $wgDisabledVariants; + $this->mVariants = array(); + foreach( $variants as $variant ) { + if( !in_array( $variant, $wgDisabledVariants ) ) + $this->mVariants[] = $variant; + } $this->mVariantFallbacks = $variantfallbacks; global $wgLanguageNames; $this->mVariantNames = $wgLanguageNames; @@ -88,8 +93,6 @@ class LanguageConverter { $this->mManualLevel[$v]=array_key_exists($v,$manualLevel) ?$manualLevel[$v] :'bidirectional'; - $this->mManualAddTables[$v] = array(); - $this->mManualRemoveTables[$v] = array(); $this->mNamespaceTables[$v] = array(); $this->mFlags[$v] = $v; } @@ -123,13 +126,15 @@ class LanguageConverter { /** * get preferred language variants. * @param boolean $fromUser Get it from $wgUser's preferences + * @param boolean $fromHeader Get it from Accept-Language * @return string the preferred language code * @public */ - function getPreferredVariant( $fromUser = true ) { - global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant; + function getPreferredVariant( $fromUser = true, $fromHeader = false ) { + global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant, $wgOut; - if($this->mPreferredVariant) + // bug 21974, don't return $this->mPreferredVariant if $fromUser = false + if( $fromUser && $this->mPreferredVariant ) return $this->mPreferredVariant; // figure out user lang without constructing wgLang to avoid infinite recursion @@ -171,34 +176,63 @@ class LanguageConverter { } // see if default variant is globaly set - if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){ + if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){ $this->mPreferredVariant = $wgDefaultLanguageVariant; return $this->mPreferredVariant; } - # FIXME rewrite code for parsing http header. The current code - # is written specific for detecting zh- variants if( !$this->mPreferredVariant ) { // see if some supported language variant is set in the // http header, but we don't set the mPreferredVariant // variable in case this is called before the user's // preference is loaded - $pv=$this->mMainLanguageCode; - if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) { - $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"])); - $zh = strstr($header, $pv.'-'); - if($zh) { - $ary = split("[,;]",$zh); - $pv = $ary[0]; + if( $fromHeader && array_key_exists( 'HTTP_ACCEPT_LANGUAGE', $_SERVER ) ) { + $acceptLanguage = strtolower( $_SERVER['HTTP_ACCEPT_LANGUAGE'] ); + // explode by comma + $result = explode(',', $acceptLanguage); + + $languages = array(); + + foreach( $result as $elem ) { + // if $elem likes 'zh-cn;q=0.9' + if(($posi = strpos( $elem, ';' )) !== false ) { + // get the real language code likes 'zh-cn' + $languages[] = substr( $elem, 0, $posi ); + } + else { + $languages[] = $elem; + } + } + + $fallback_languages = array(); + foreach( $languages as $language ) { + // strip whitespace + $language = trim( $language ); + if( in_array( $language, $this->mVariants ) ) { + return $language; + } + else { + // To see if there are fallbacks of current language. + // We record these fallback variants, and process + // them later. + $fallbacks = $this->getVariantFallbacks( $language ); + if( is_string( $fallbacks ) ) + $fallback_languages[] = $fallbacks; + elseif( is_array( $fallbacks ) ) + $fallback_languages = array_merge( $fallback_languages, $fallbacks ); + } + } + + // process fallback languages now + $fallback_languages = array_unique( $fallback_languages ); + foreach( $fallback_languages as $language ) { + if( in_array( $language, $this->mVariants ) ) { + return $language; + } } } - // don't try to return bad variant - if(in_array( $pv, $this->mVariants )) - return $pv; } - return $this->mMainLanguageCode; - } /** @@ -273,13 +307,26 @@ class LanguageConverter { // enable convertsion of 'xxxxtranslate($m[0], $toVariant); + // Let's convert the trtext only once, + // it would give us more performance improvement + $notrtext[] = $mark; + $trtext .= $m[0] . $trtextmark; $mstart = $m[1] + strlen($m[0]); } + $notrtext[] = ''; + $trtext = $this->translate( $trtext, $toVariant ); + $trtext = StringUtils::explode( $trtextmark, $trtext ); + foreach( $trtext as $t ) { + $ret .= array_shift($notrtext); + $ret .= $t; + } wfProfileOut( $fname ); return $ret; } @@ -295,9 +342,13 @@ class LanguageConverter { */ function translate( $text, $variant ) { wfProfileIn( __METHOD__ ); - if( !$this->mTablesLoaded ) - $this->loadTables(); - $text = $this->mTables[$variant]->replace( $text ); + // If $text is empty or only includes spaces, do nothing + // Otherwise translate it + if( trim($text) ) { + if( !$this->mTablesLoaded ) + $this->loadTables(); + $text = $this->mTables[$variant]->replace( $text ); + } wfProfileOut( __METHOD__ ); return $text; } @@ -360,7 +411,7 @@ class LanguageConverter { * prepare manual conversion table * @private */ - function prepareManualConv( $convRule ){ + function applyManualConv( $convRule ){ // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title $title = $convRule->getTitle(); if( $title ){ @@ -371,38 +422,20 @@ class LanguageConverter { //apply manual conversion table to global table $convTable = $convRule->getConvTable(); $action = $convRule->getRulesAction(); - foreach( $convTable as $v => $t ) { - if( !in_array( $v, $this->mVariants ) )continue; - if( $action=="add" ) { - foreach( $t as $from => $to ) { + foreach( $convTable as $variant => $pair ) { + if( !in_array( $variant, $this->mVariants ) )continue; + if( $action == 'add' ) { + foreach( $pair as $from => $to ) { // to ensure that $from and $to not be left blank // so $this->translate() could always return a string if ( $from || $to ) // more efficient than array_merge(), about 2.5 times. - $this->mManualAddTables[$v][$from] = $to; + $this->mTables[$variant]->setPair( $from, $to ); } } - elseif ( $action == "remove" ) { - foreach ( $t as $from=>$to ) { - if ( $from || $to ) - $this->mManualRemoveTables[$v][$from] = $to; - } - } - } - } - - /** - * apply manual conversion from $this->mManualAddTables and $this->mManualRemoveTables - * @private - */ - function applyManualConv(){ - //apply manual conversion table to global table - foreach($this->mVariants as $v) { - if (count($this->mManualAddTables[$v]) > 0) { - $this->mTables[$v]->mergeArray($this->mManualAddTables[$v]); + elseif ( $action == 'remove' ) { + $this->mTables[$variant]->removeArray( $pair ); } - if (count($this->mManualRemoveTables[$v]) > 0) - $this->mTables[$v]->removeArray($this->mManualRemoveTables[$v]); } } @@ -424,8 +457,9 @@ class LanguageConverter { $text = $this->convert( $text ); - if ( $this->mTitleFromFlag ) - $parser->mOutput->setTitleText( $this->mTitleDisplay ); + $this->convertTitle(); + $parser->mOutput->setTitleText( $this->mTitleDisplay ); + return $text; } @@ -446,34 +480,36 @@ class LanguageConverter { } /** - * convert title + * Pre convert title. Store the original title $this->mTitleOrginal; + * store the default converted title to $this->mTitleDisplay. * @private */ - function convertTitle( $text, $variant ){ - global $wgDisableTitleConversion, $wgUser; - - // check for global param and __NOTC__ tag - if( $wgDisableTitleConversion || !$this->mDoTitleConvert || $wgUser->getOption('noconvertlink') == 1 ) { - $this->mTitleDisplay = $text; - return $text; - } - - // use the title from the T flag if any - if( $this->mTitleFromFlag ){ - $this->mTitleFromFlag = false; - return $this->mTitleDisplay; - } + function preConvertTitle( $text, $variant ){ + $this->mTitleOriginal = $text; + + $text = $this->convertNamespace( $text, $variant ); + $this->mTitleDisplay = $this->convert( $text ); + } - global $wgRequest; + /** + * convert title + * @private + */ + function convertTitle(){ + global $wgDisableTitleConversion, $wgUser, $wgRequest; $isredir = $wgRequest->getText( 'redirect', 'yes' ); $action = $wgRequest->getText( 'action' ); $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); - if ( $isredir == 'no' || $action == 'edit' || $action == 'submit' || $linkconvert == 'no' ) { - return $text; - } else { - $text = $this->convertNamespace( $text, $variant ); - $this->mTitleDisplay = $this->convert( $text ); - return $this->mTitleDisplay; + + // check for the global variable, __NOTC__ magic word, and user setting + if( $wgDisableTitleConversion || !$this->mDoTitleConvert || + $wgUser->getOption('noconvertlink') == 1 ) { + $this->mTitleDisplay = $this->mTitleOriginal; + } + + // check for GET params + elseif ( $isredir == 'no' || $action == 'edit' || $linkconvert == 'no' ) { + $this->mTitleDisplay = $this->mTitleOriginal; } } @@ -511,36 +547,37 @@ class LanguageConverter { $plang = $this->getPreferredVariant(); // for title convertion - if ( $isTitle ) return $this->convertTitle( $text, $plang ); + if ( $isTitle ) { + $this->preConvertTitle( $text, $plang ); + return $text; + } $tarray = StringUtils::explode( $this->mMarkup['end'], $text ); $text = ''; - $marks = array(); foreach ( $tarray as $txt ) { + $marked = explode( $this->mMarkup['begin'], $txt, 2 ); + + if( $this->mDoContentConvert ) + // Bug 19620: should convert a string immediately after a new rule added. + $text .= $this->autoConvert( $marked[0], $plang ); + else + $text .= $marked[0]; + if ( array_key_exists( 1, $marked ) ) { $crule = new ConverterRule($marked[1], $this); $crule->parse( $plang ); - $marked[1] = $crule->getDisplay(); - $this->prepareManualConv( $crule ); + $text .= $crule->getDisplay(); + $this->applyManualConv( $crule ); } else - $marked[0] .= $this->mMarkup['end']; - array_push( $marks, $marked ); - } - $this->applyManualConv(); - foreach ( $marks as $marked ) { - if( $this->mDoContentConvert ) - $text .= $this->autoConvert( $marked[0], $plang ); - else - $text .= $marked[0]; - if( array_key_exists( 1, $marked ) ) - $text .= $marked[1]; + $text .= $this->mMarkup['end']; + } + // Remove the last delimiter (wasn't real) $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) ); - return $text; } @@ -558,6 +595,11 @@ class LanguageConverter { * @public */ function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { + # If the article has already existed, there is no need to + # check it again, otherwise it may cause a fault. + if ( is_object( $nt ) && $nt->exists() ) + return; + global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, $wgUser; $isredir = $wgRequest->getText( 'redirect', 'yes' ); $action = $wgRequest->getText( 'action' ); @@ -571,7 +613,7 @@ class LanguageConverter { || $action == 'submit' || $linkconvert == 'no' || $wgUser->getOption('noconvertlink') == 1 ) ) ) return; - if(is_object($nt)) + if ( is_object( $nt ) ) $ns = $nt->getNamespace(); $variants = $this->autoConvertToAllVariants($link); @@ -621,33 +663,6 @@ class LanguageConverter { return $this->mTitleDisplay; } - /** - * a write lock to the cache - * - * @private - */ - function lockCache() { - global $wgMemc; - $success = false; - for($i=0; $i<30; $i++) { - if($success = $wgMemc->add($this->mCacheKey . "lock", 1, 10)) - break; - sleep(1); - } - return $success; - } - - /** - * unlock cache - * - * @private - */ - function unlockCache() { - global $wgMemc; - $wgMemc->delete($this->mCacheKey . "lock"); - } - - /** * Load default conversion tables * This method must be implemented in derived class @@ -688,11 +703,8 @@ class LanguageConverter { $this->postLoadTables(); $this->mTables[self::CACHE_VERSION_KEY] = true; - - if($this->lockCache()) { - $wgMemc->set($this->mCacheKey, $this->mTables, 43200); - $this->unlockCache(); - } + + $wgMemc->set($this->mCacheKey, $this->mTables, 43200); wfProfileOut( __METHOD__.'-recache' ); } wfProfileOut( __METHOD__ ); @@ -866,6 +878,9 @@ class LanguageConverter { * @public */ function armourMath($text){ + // we need to convert '-{' and '}-' to '-{' and '}-' + // to avoid a unwanted '}-' appeared after the math-image. + $text = strtr( $text, array('-{' => '-{', '}-' => '}-') ); $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; return $ret; } @@ -879,8 +894,9 @@ class LanguageConverter { class ConverterRule { var $mText; // original text in -{text}- var $mConverter; // LanguageConverter object - var $mManualCodeError='code error!'; - var $mRuleDisplay = '',$mRuleTitle=false; + var $mManualCodeError = 'code error!'; + var $mRuleDisplay = ''; + var $mRuleTitle = false; var $mRules = '';// string : the text of the rules var $mRulesAction = 'none'; var $mFlags = array(); @@ -895,11 +911,11 @@ class ConverterRule { * @param object $converter a LanguageConverter object * @access public */ - function __construct($text,$converter){ + function __construct( $text, $converter ){ $this->mText = $text; - $this->mConverter=$converter; - foreach($converter->mVariants as $v){ - $this->mConvTable[$v]=array(); + $this->mConverter = $converter; + foreach( $converter->mVariants as $v ){ + $this->mConvTable[$v] = array(); } } @@ -910,11 +926,11 @@ class ConverterRule { * @return string Translated text * @public */ - function getTextInBidtable($variants){ - if(is_string($variants)){ $variants=array($variants); } - if(!is_array($variants)) return false; - foreach ($variants as $variant){ - if(array_key_exists($variant, $this->mBidtable)){ + function getTextInBidtable( $variants ){ + if( is_string( $variants ) ){ $variants = array( $variants ); } + if( !is_array( $variants ) ) return false; + foreach( $variants as $variant ){ + if( array_key_exists( $variant, $this->mBidtable ) ){ return $this->mBidtable[$variant]; } } @@ -976,10 +992,12 @@ class ConverterRule { $flags=array_diff($flags,array('S')); $flags_temp = array(); foreach ($variants as $variant) { + // try to find flags like "zh-hans", "zh-hant" + // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" if ( in_array($variant, $flags) ) $flags_temp[] = $variant; } - if ( count($flags_temp) == 0 ) + if ( count($flags_temp) !== 0 ) $flags = $flags_temp; } if ( count($flags) == 0 ) @@ -1000,19 +1018,34 @@ class ConverterRule { $markup = $this->mConverter->mMarkup; $variants = $this->mConverter->mVariants; + // varsep_pattern for preg_split: + // text should be splited by ";" only if a valid variant + // name exist after the markup, for example: + // -{zh-hans:xxx;zh-hant:yyy;}- + // we should split it as: + // array( + // [0] => 'zh-hans:xxx' + // [1] => 'zh-hant:yyy' + // [2] => '' + // ) $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?='; - foreach( $variants as $variant ) - $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; + foreach( $variants as $variant ) { + $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; // zh-hans:xxx;zh-hant:yyy + $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant + . '\s*' . $markup['codesep'] . '|'; // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz + } $varsep_pattern .= '\s*$)/'; $choice = preg_split($varsep_pattern, $rules); + foreach( $choice as $c ) { $v = explode($markup['codesep'], $c, 2); if( count($v) != 2 ) continue;// syntax error, skip $to = trim($v[1]); $v = trim($v[0]); - $u = explode($markup['unidsep'], $v); + $u = explode($markup['unidsep'], $v, 2); + // if $to is empty, strtr() could return a wrong result if( count($u) == 1 && $to && in_array( $v, $variants ) ) { $bidtable[$v] = $to; } else if(count($u) == 2){ @@ -1054,13 +1087,13 @@ class ConverterRule { * Parse rules conversion * @private */ - function getRuleConvertedStr($variant,$doConvert){ + function getRuleConvertedStr( $variant, $doConvert ){ $bidtable = $this->mBidtable; $unidtable = $this->mUnidtable; if( count($bidtable) + count($unidtable) == 0 ){ return $this->mRules; - } elseif ($doConvert){// the text converted + } elseif ( $doConvert ){// the text converted // display current variant in bidirectional array $disp = $this->getTextInBidtable($variant); // or display current variant in fallbacks @@ -1129,9 +1162,9 @@ class ConverterRule { fill to convert tables */ $allow_unid = $manLevel[$v]=='bidirectional' || $manLevel[$v]=='unidirectional'; - if($allow_unid && array_key_exists($v,$unidtable)){ - $ct=$this->mConvTable[$v]; - $this->mConvTable[$v] = array_merge($ct,$unidtable[$v]); + if( $allow_unid && array_key_exists( $v, $unidtable ) ){ + $ct = $this->mConvTable[$v]; + $this->mConvTable[$v] = array_merge($ct, $unidtable[$v]); } } } @@ -1148,14 +1181,19 @@ class ConverterRule { $this->parseFlags(); $flags = $this->mFlags; - //convert to specified variant + // convert to specified variant + // syntax: -{zh-hans;zh-hant[;...]|}- if( count( array_diff( $flags, $variants ) ) == 0 and count( $flags ) != 0 ) { - if ( in_array( $variant, $flags ) ) + if ( in_array( $variant, $flags ) ) // check if current variant in flags + // then convert to current language $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variant ); - else { + else { // if current variant no in flags, + // then we check its fallback variants. $variantFallbacks = $this->mConverter->getVariantFallbacks($variant); foreach ( $variantFallbacks as $variantFallback ) { + // if current variant's fallback exist in flags if ( in_array( $variantFallback, $flags ) ) { + // then convert to fallback language $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variantFallback ); break; }