X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=a1f6681de133ff9ac796904e29e84d576365cf39;hb=22b4e4eff7bb1a04133b19166b778dd8085b5066;hp=63b98f1292296ed6b9e94cb8fe217c7eccc21520;hpb=2a59db5897296b30af277abba8e9a4d1471da27e;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 63b98f1292..a1f6681de1 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -18,11 +18,10 @@ class LanguageConverter { var $mPreferredVariant=''; var $mMainLanguageCode; - var $mVariants, $mVariantFallbacks, $mLanguageNames; + var $mVariants, $mVariantFallbacks, $mVariantNames; var $mTablesLoaded = false; var $mTables; - var $mManualAddTables; - var $mManualRemoveTables; + var $mNamespaceTables; var $mTitleDisplay=''; var $mDoTitleConvert=true, $mDoContentConvert=true; var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants @@ -47,7 +46,7 @@ class LanguageConverter { * @param array $manualLevel limit for supported variants * @public */ - function __construct($langobj, $maincode, + function __construct( $langobj, $maincode, $variants=array(), $variantfallbacks=array(), $markup=array(), @@ -55,10 +54,16 @@ class LanguageConverter { $manualLevel = array() ) { $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; - $this->mVariants = $variants; + + global $wgDisabledVariants; + $this->mVariants = array(); + foreach( $variants as $variant ) { + if( !in_array( $variant, $wgDisabledVariants ) ) + $this->mVariants[] = $variant; + } $this->mVariantFallbacks = $variantfallbacks; global $wgLanguageNames; - $this->mLanguageNames = $wgLanguageNames; + $this->mVariantNames = $wgLanguageNames; $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); $m = array( 'begin'=>'-{', @@ -87,8 +92,8 @@ class LanguageConverter { $this->mManualLevel[$v]=array_key_exists($v,$manualLevel) ?$manualLevel[$v] :'bidirectional'; - $this->mManualAddTables[$v] = array(); - $this->mManualRemoveTables[$v] = array(); + $this->mNamespaceTables[$v] = array(); + $this->mFlags[$v] = $v; } } @@ -117,9 +122,116 @@ class LanguageConverter { return $this->mMainLanguageCode; } - // this method body in Language class + /** + * get preferred language variants. + * @param boolean $fromUser Get it from $wgUser's preferences + * @return string the preferred language code + * @public + */ function getPreferredVariant( $fromUser = true ) { - return $this->mLangObj->getPreferredVariant( $fromUser ); + global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant; + + if($this->mPreferredVariant) + return $this->mPreferredVariant; + + // figure out user lang without constructing wgLang to avoid infinite recursion + if( $fromUser ) + $defaultUserLang = $wgUser->getOption( 'language' ); + else + $defaultUserLang = $this->mMainLanguageCode; + $userLang = $wgRequest->getVal( 'uselang', $defaultUserLang ); + // see if interface language is same as content, if not, prevent conversion + if( ! in_array( $userLang, $this->mVariants ) ){ + $this->mPreferredVariant = $this->mMainLanguageCode; // no conversion + return $this->mPreferredVariant; + } + + // see if the preference is set in the request + $req = $wgRequest->getText( 'variant' ); + if( in_array( $req, $this->mVariants ) ) { + $this->mPreferredVariant = $req; + return $req; + } + + // check the syntax /code/ArticleTitle + if($wgVariantArticlePath!=false && isset($_SERVER['SCRIPT_NAME'])){ + // Note: SCRIPT_NAME probably won't hold the correct value if PHP is run as CGI + // (it will hold path to php.cgi binary), and might not exist on some very old PHP installations + $scriptBase = basename( $_SERVER['SCRIPT_NAME'] ); + if(in_array($scriptBase,$this->mVariants)){ + $this->mPreferredVariant = $scriptBase; + return $this->mPreferredVariant; + } + } + + // get language variant preference from logged in users + // Don't call this on stub objects because that causes infinite + // recursion during initialisation + if( $fromUser && $wgUser->isLoggedIn() ) { + $this->mPreferredVariant = $wgUser->getOption('variant'); + return $this->mPreferredVariant; + } + + // see if default variant is globaly set + if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){ + $this->mPreferredVariant = $wgDefaultLanguageVariant; + return $this->mPreferredVariant; + } + + if( !$this->mPreferredVariant ) { + // see if some supported language variant is set in the + // http header, but we don't set the mPreferredVariant + // variable in case this is called before the user's + // preference is loaded + if( array_key_exists( 'HTTP_ACCEPT_LANGUAGE', $_SERVER ) ) { + $acceptLanguage = strtolower( $_SERVER['HTTP_ACCEPT_LANGUAGE'] ); + + // explode by comma + $result = explode(',', $acceptLanguage); + + $languages = array(); + + foreach( $result as $elem ) { + // if $elem likes 'zh-cn;q=0.9' + if(($posi = strpos( $elem, ';' )) !== false ) { + // get the real language code likes 'zh-cn' + $languages[] = substr( $elem, 0, $posi ); + } + else { + $languages[] = $elem; + } + } + + $fallback_languages = array(); + foreach( $languages as $language ) { + // strip whitespace + $language = trim( $language ); + if( in_array( $language, $this->mVariants ) ) { + return $language; + } + else { + // To see if there are fallbacks of current language. + // We record these fallback variants, and process + // them later. + $fallbacks = $this->getVariantFallbacks( $language ); + if( is_string( $fallbacks ) ) + $fallback_languages[] = $fallbacks; + elseif( is_array( $fallbacks ) ) + $fallback_languages = array_merge( $fallback_languages, $fallbacks ); + } + } + + // process fallback languages now + $fallback_languages = array_unique( $fallback_languages ); + foreach( $fallback_languages as $language ) { + if( in_array( $language, $this->mVariants ) ) { + return $language; + } + } + } + } + + return $this->mMainLanguageCode; } /** @@ -281,10 +393,10 @@ class LanguageConverter { * prepare manual conversion table * @private */ - function prepareManualConv($convRule){ + function applyManualConv( $convRule ){ // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title $title = $convRule->getTitle(); - if($title){ + if( $title ){ $this->mTitleFromFlag = true; $this->mTitleDisplay = $title; } @@ -292,41 +404,23 @@ class LanguageConverter { //apply manual conversion table to global table $convTable = $convRule->getConvTable(); $action = $convRule->getRulesAction(); - foreach($convTable as $v=>$t) { - if( !in_array($v,$this->mVariants) )continue; + foreach( $convTable as $variant => $pair ) { + if( !in_array( $variant, $this->mVariants ) )continue; if( $action=="add" ) { - foreach($t as $from=>$to) { + foreach( $pair as $from => $to ) { // to ensure that $from and $to not be left blank // so $this->translate() could always return a string - if ($from || $to) + if ( $from || $to ) // more efficient than array_merge(), about 2.5 times. - $this->mManualAddTables[$v][$from] = $to; + $this->mTables[$variant]->setPair( $from, $to ); } } - elseif ( $action=="remove" ) { - foreach($t as $from=>$to) { - if ($from || $to) - $this->mManualRemoveTables[$v][$from] = $to; - } + elseif ( $action == "remove" ) { + $this->mTables[$variant]->removeArray( $pair ); } } } - /** - * apply manual conversion from $this->mManualAddTables and $this->mManualRemoveTables - * @private - */ - function applyManualConv(){ - //apply manual conversion table to global table - foreach($this->mVariants as $v) { - if (count($this->mManualAddTables[$v]) > 0) { - $this->mTables[$v]->mergeArray($this->mManualAddTables[$v]); - } - if (count($this->mManualRemoveTables[$v]) > 0) - $this->mTables[$v]->removeArray($this->mManualRemoveTables[$v]); - } - } - /** * Convert text using a parser object for context * @public @@ -340,19 +434,37 @@ class LanguageConverter { return $text; } - if($wgDisableLangConversion) + if ( $wgDisableLangConversion ) return $text; $text = $this->convert( $text ); - $parser->mOutput->setTitleText( $this->mTitleDisplay ); + + if ( $this->mTitleFromFlag ) + $parser->mOutput->setTitleText( $this->mTitleDisplay ); return $text; } + + /** + * convert namespace + * @param string $title the title included namespace + * @return array of string + * @private + */ + function convertNamespace( $title, $variant ) { + $splittitle = explode( ':', $title ); + if (count($splittitle) < 2) + return $title; + if ( isset( $this->mNamespaceTables[$variant][$splittitle[0]] ) ) + $splittitle[0] = $this->mNamespaceTables[$variant][$splittitle[0]]; + $ret = implode(':', $splittitle ); + return $ret; + } /** - * convert title + * convert title * @private */ - function convertTitle($text){ + function convertTitle( $text, $variant ){ global $wgDisableTitleConversion, $wgUser; // check for global param and __NOTC__ tag @@ -362,7 +474,7 @@ class LanguageConverter { } // use the title from the T flag if any - if($this->mTitleFromFlag){ + if( $this->mTitleFromFlag ){ $this->mTitleFromFlag = false; return $this->mTitleDisplay; } @@ -374,7 +486,8 @@ class LanguageConverter { if ( $isredir == 'no' || $action == 'edit' || $action == 'submit' || $linkconvert == 'no' ) { return $text; } else { - $this->mTitleDisplay = $this->convert($text); + $text = $this->convertNamespace( $text, $variant ); + $this->mTitleDisplay = $this->convert( $text ); return $this->mTitleDisplay; } } @@ -395,7 +508,7 @@ class LanguageConverter { * @return string converted text * @public */ - function convert( $text , $isTitle=false) { + function convert( $text, $isTitle = false ) { $mw =& MagicWord::get( 'notitleconvert' ); if( $mw->matchAndRemove( $text ) ) @@ -407,41 +520,40 @@ class LanguageConverter { // no conversion if redirecting $mw =& MagicWord::get( 'redirect' ); - if( $mw->matchStart( $text )) + if( $mw->matchStart( $text ) ) return $text; + $plang = $this->getPreferredVariant(); + // for title convertion - if ($isTitle) return $this->convertTitle($text); + if ( $isTitle ) return $this->convertTitle( $text, $plang ); - $plang = $this->getPreferredVariant(); - $tarray = StringUtils::explode($this->mMarkup['end'], $text); + $tarray = StringUtils::explode( $this->mMarkup['end'], $text ); $text = ''; - $marks = array(); - foreach($tarray as $txt) { - $marked = explode($this->mMarkup['begin'], $txt, 2); - if (array_key_exists(1, $marked)) { - $crule = new ConverterRule($marked[1], $this); - $crule->parse($plang); - $marked[1] = $crule->getDisplay(); - $this->prepareManualConv($crule); - } - else - $marked[0] .= $this->mMarkup['end']; - array_push($marks, $marked); - } - $this->applyManualConv(); - foreach ($marks as $marked) { + foreach ( $tarray as $txt ) { + + $marked = explode( $this->mMarkup['begin'], $txt, 2 ); + if( $this->mDoContentConvert ) - $text .= $this->autoConvert($marked[0],$plang); + // Bug 19620: should convert a string immediately after a new rule added. + $text .= $this->autoConvert( $marked[0], $plang ); else $text .= $marked[0]; - if( array_key_exists(1, $marked) ) - $text .= $marked[1]; + + if ( array_key_exists( 1, $marked ) ) { + $crule = new ConverterRule($marked[1], $this); + $crule->parse( $plang ); + $text .= $crule->getDisplay(); + $this->applyManualConv( $crule ); + } + else + $text .= $this->mMarkup['end']; + } + // Remove the last delimiter (wasn't real) $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) ); - return $text; } @@ -453,10 +565,17 @@ class LanguageConverter { * * @param string $link the name of the link * @param mixed $nt the title object of the link + * @param boolean $ignoreOtherCond: to disable other conditions when + * we need to transclude a template or update a category's link * @return null the input parameters may be modified upon return * @public */ - function findVariantLink( &$link, &$nt, $forTemplate = false ) { + function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { + # If the article has already existed, there is no need to + # check it again, otherwise it may cause a fault. + if ( is_object( $nt ) && $nt->exists() ) + return; + global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, $wgUser; $isredir = $wgRequest->getText( 'redirect', 'yes' ); $action = $wgRequest->getText( 'action' ); @@ -466,12 +585,11 @@ class LanguageConverter { $ns=NS_MAIN; - if ( $disableLinkConversion || ( !$forTemplate && ( $isredir == 'no' || $action == 'edit' - || $action == 'submit' || $linkconvert == 'no' || $wgUser->getOption('noconvertlink') == 1 ) ) ) { + if ( $disableLinkConversion || ( !$ignoreOtherCond && ( $isredir == 'no' || $action == 'edit' + || $action == 'submit' || $linkconvert == 'no' || $wgUser->getOption('noconvertlink') == 1 ) ) ) return; - } - if(is_object($nt)) + if ( is_object( $nt ) ) $ns = $nt->getNamespace(); $variants = $this->autoConvertToAllVariants($link); @@ -496,7 +614,7 @@ class LanguageConverter { foreach( $titles as $varnt ) { if( $varnt->getArticleID() > 0 ) { $nt = $varnt; - $link = $v; + $link = $varnt->getText(); break; } } @@ -766,6 +884,9 @@ class LanguageConverter { * @public */ function armourMath($text){ + // we need to convert '-{' and '}-' to '-{' and '}-' + // to avoid a unwanted '}-' appeared after the math-image. + $text = strtr( $text, array('-{' => '-{', '}-' => '}-') ); $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; return $ret; } @@ -774,7 +895,7 @@ class LanguageConverter { /** * parser for rules of language conversion , parse rules in -{ }- tag * @ingroup Language - * @author fdcn + * @author fdcn , PhiLiP */ class ConverterRule { var $mText; // original text in -{text}- @@ -836,6 +957,7 @@ class ConverterRule { $flags = array(); $markup = $this->mConverter->mMarkup; $validFlags = $this->mConverter->mFlags; + $variants = $this->mConverter->mVariants; $tt = explode($markup['flagsep'], $text, 2); if(count($tt) == 2) { @@ -867,14 +989,23 @@ class ConverterRule { if(in_array('D',$flags)) $temp[] = 'D'; $flags = $temp; } else { - if ( in_array('A',$flags)) { + if ( in_array('A',$flags) ) { $flags[]='+'; $flags[]='S'; } if ( in_array('D',$flags) ) $flags=array_diff($flags,array('S')); + $flags_temp = array(); + foreach ($variants as $variant) { + // try to find flags like "zh-hans", "zh-hant" + // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" + if ( in_array($variant, $flags) ) + $flags_temp[] = $variant; + } + if ( count($flags_temp) !== 0 ) + $flags = $flags_temp; } - if ( count($flags)==0 ) + if ( count($flags) == 0 ) $flags = array('S'); $this->mRules=$rules; $this->mFlags=$flags; @@ -890,26 +1021,49 @@ class ConverterRule { $bidtable = array(); $unidtable = array(); $markup = $this->mConverter->mMarkup; + $variants = $this->mConverter->mVariants; + + // varsep_pattern for preg_split: + // text should be splited by ";" only if a valid variant + // name exist after the markup, for example: + // -{zh-hans:xxx;zh-hant:yyy;}- + // we should split it as: + // array( + // [0] => 'zh-hans:xxx' + // [1] => 'zh-hant:yyy' + // [2] => '' + // ) + $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?='; + foreach( $variants as $variant ) { + $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; // zh-hans:xxx;zh-hant:yyy + $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant + . '\s*' . $markup['codesep'] . '|'; // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz + } + $varsep_pattern .= '\s*$)/'; - $choice = explode($markup['varsep'], $rules ); - foreach($choice as $c) { - $v = explode($markup['codesep'], $c); - if(count($v) != 2) + $choice = preg_split($varsep_pattern, $rules); + + foreach( $choice as $c ) { + $v = explode($markup['codesep'], $c, 2); + if( count($v) != 2 ) continue;// syntax error, skip - $to=trim($v[1]); - $v=trim($v[0]); - $u = explode($markup['unidsep'], $v); - if(count($u) == 1) { + $to = trim($v[1]); + $v = trim($v[0]); + $u = explode($markup['unidsep'], $v); + // if $to is empty, strtr() could return a wrong result + if( count($u) == 1 && $to && in_array( $v, $variants ) ) { $bidtable[$v] = $to; } else if(count($u) == 2){ - $from=trim($u[0]);$v=trim($u[1]); - if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) ) - $unidtable[$v]=array($from=>$to); - else - $unidtable[$v][$from]=$to; + $from = trim($u[0]); + $v = trim($u[1]); + if( array_key_exists( $v, $unidtable ) && !is_array( $unidtable[$v] ) + && $to && in_array( $v, $variants ) ) + $unidtable[$v] = array( $from=>$to ); + elseif ( $to && in_array( $v, $variants ) ) + $unidtable[$v][$from] = $to; } // syntax error, pass - if (!array_key_exists($v,$this->mConverter->mLanguageNames)){ + if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ){ $bidtable = array(); $unidtable = array(); break; @@ -927,10 +1081,10 @@ class ConverterRule { $varsep = $this->mConverter->mDescVarSep; $text=''; foreach($this->mBidtable as $k => $v) - $text .= $this->mConverter->mLanguageNames[$k]."$codesep$v$varsep"; + $text .= $this->mConverter->mVariantNames[$k]."$codesep$v$varsep"; foreach($this->mUnidtable as $k => $a) foreach($a as $from=>$to) - $text.="$from⇒".$this->mConverter->mLanguageNames[$k]."$codesep$to$varsep"; + $text.=$from.'⇒'.$this->mConverter->mVariantNames[$k]."$codesep$to$varsep"; return $text; } @@ -1025,16 +1179,35 @@ class ConverterRule { * @public */ function parse($variant){ - if(!$variant) $variant = $this->mConverter->getPreferredVariant(); + if(!$variant) + $variant = $this->mConverter->getPreferredVariant(); + $variants = $this->mConverter->mVariants; $this->parseFlags(); $flags = $this->mFlags; - if( !in_array('R',$flags) || !in_array('N',$flags) ){ - //FIXME: may cause trouble here... - //strip   since it interferes with the parsing, plus, - //all spaces should be stripped in this tag anyway. - $this->mRules = str_replace(' ', '', $this->mRules); + // convert to specified variant + // syntax: -{zh-hans;zh-hant[;...]|}- + if( count( array_diff( $flags, $variants ) ) == 0 and count( $flags ) != 0 ) { + if ( in_array( $variant, $flags ) ) // check if current variant in flags + // then convert to current language + $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variant ); + else { // if current variant no in flags, + // then we check its fallback variants. + $variantFallbacks = $this->mConverter->getVariantFallbacks($variant); + foreach ( $variantFallbacks as $variantFallback ) { + // if current variant's fallback exist in flags + if ( in_array( $variantFallback, $flags ) ) { + // then convert to fallback language + $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variantFallback ); + break; + } + } + } + $this->mFlags = $flags = array('R'); + } + + if( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) { // decode => HTML entities modified by Sanitizer::removeHTMLtags $this->mRules = str_replace('=>','=>',$this->mRules); @@ -1042,7 +1215,7 @@ class ConverterRule { } $rules = $this->mRules; - if(count($this->mBidtable)==0 && count($this->mUnidtable)==0){ + if( count( $this->mBidtable ) == 0 && count( $this->mUnidtable ) == 0 ){ if(in_array('+',$flags) || in_array('-',$flags)) // fill all variants if text in -{A/H/-|text} without rules foreach($this->mConverter->mVariants as $v) @@ -1056,7 +1229,7 @@ class ConverterRule { $this->mRuleDisplay = $rules; } elseif ( in_array('N',$flags) ){ // proces N flag: output current variant name - $this->mRuleDisplay = $this->mConverter->mLanguageNames[trim($rules)]; + $this->mRuleDisplay = $this->mConverter->mVariantNames[trim($rules)]; } elseif ( in_array('D',$flags) ){ // proces D flag: output rules description $this->mRuleDisplay = $this->getRulesDesc();