X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=a1f6681de133ff9ac796904e29e84d576365cf39;hb=22b4e4eff7bb1a04133b19166b778dd8085b5066;hp=701381ab49425accac16b6b095cf585ae98228ef;hpb=ed5a6072afcadb5c84aed4582e856f796f47d782;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 701381ab49..a1f6681de1 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -21,8 +21,7 @@ class LanguageConverter { var $mVariants, $mVariantFallbacks, $mVariantNames; var $mTablesLoaded = false; var $mTables; - var $mManualAddTables; - var $mManualRemoveTables; + var $mNamespaceTables; var $mTitleDisplay=''; var $mDoTitleConvert=true, $mDoContentConvert=true; var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants @@ -47,7 +46,7 @@ class LanguageConverter { * @param array $manualLevel limit for supported variants * @public */ - function __construct($langobj, $maincode, + function __construct( $langobj, $maincode, $variants=array(), $variantfallbacks=array(), $markup=array(), @@ -55,7 +54,13 @@ class LanguageConverter { $manualLevel = array() ) { $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; - $this->mVariants = $variants; + + global $wgDisabledVariants; + $this->mVariants = array(); + foreach( $variants as $variant ) { + if( !in_array( $variant, $wgDisabledVariants ) ) + $this->mVariants[] = $variant; + } $this->mVariantFallbacks = $variantfallbacks; global $wgLanguageNames; $this->mVariantNames = $wgLanguageNames; @@ -87,8 +92,8 @@ class LanguageConverter { $this->mManualLevel[$v]=array_key_exists($v,$manualLevel) ?$manualLevel[$v] :'bidirectional'; - $this->mManualAddTables[$v] = array(); - $this->mManualRemoveTables[$v] = array(); + $this->mNamespaceTables[$v] = array(); + $this->mFlags[$v] = $v; } } @@ -173,29 +178,60 @@ class LanguageConverter { return $this->mPreferredVariant; } - # FIXME rewrite code for parsing http header. The current code - # is written specific for detecting zh- variants if( !$this->mPreferredVariant ) { // see if some supported language variant is set in the // http header, but we don't set the mPreferredVariant // variable in case this is called before the user's // preference is loaded - $pv=$this->mMainLanguageCode; - if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) { - $header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"])); - $zh = strstr($header, $pv.'-'); - if($zh) { - $ary = split("[,;]",$zh); - $pv = $ary[0]; + if( array_key_exists( 'HTTP_ACCEPT_LANGUAGE', $_SERVER ) ) { + $acceptLanguage = strtolower( $_SERVER['HTTP_ACCEPT_LANGUAGE'] ); + + // explode by comma + $result = explode(',', $acceptLanguage); + + $languages = array(); + + foreach( $result as $elem ) { + // if $elem likes 'zh-cn;q=0.9' + if(($posi = strpos( $elem, ';' )) !== false ) { + // get the real language code likes 'zh-cn' + $languages[] = substr( $elem, 0, $posi ); + } + else { + $languages[] = $elem; + } + } + + $fallback_languages = array(); + foreach( $languages as $language ) { + // strip whitespace + $language = trim( $language ); + if( in_array( $language, $this->mVariants ) ) { + return $language; + } + else { + // To see if there are fallbacks of current language. + // We record these fallback variants, and process + // them later. + $fallbacks = $this->getVariantFallbacks( $language ); + if( is_string( $fallbacks ) ) + $fallback_languages[] = $fallbacks; + elseif( is_array( $fallbacks ) ) + $fallback_languages = array_merge( $fallback_languages, $fallbacks ); + } + } + + // process fallback languages now + $fallback_languages = array_unique( $fallback_languages ); + foreach( $fallback_languages as $language ) { + if( in_array( $language, $this->mVariants ) ) { + return $language; + } } } - // don't try to return bad variant - if(in_array( $pv, $this->mVariants )) - return $pv; } return $this->mMainLanguageCode; - } /** @@ -357,7 +393,7 @@ class LanguageConverter { * prepare manual conversion table * @private */ - function prepareManualConv( $convRule ){ + function applyManualConv( $convRule ){ // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title $title = $convRule->getTitle(); if( $title ){ @@ -368,41 +404,23 @@ class LanguageConverter { //apply manual conversion table to global table $convTable = $convRule->getConvTable(); $action = $convRule->getRulesAction(); - foreach( $convTable as $v => $t ) { - if( !in_array( $v, $this->mVariants ) )continue; + foreach( $convTable as $variant => $pair ) { + if( !in_array( $variant, $this->mVariants ) )continue; if( $action=="add" ) { - foreach( $t as $from => $to ) { + foreach( $pair as $from => $to ) { // to ensure that $from and $to not be left blank // so $this->translate() could always return a string if ( $from || $to ) // more efficient than array_merge(), about 2.5 times. - $this->mManualAddTables[$v][$from] = $to; + $this->mTables[$variant]->setPair( $from, $to ); } } elseif ( $action == "remove" ) { - foreach ( $t as $from=>$to ) { - if ( $from || $to ) - $this->mManualRemoveTables[$v][$from] = $to; - } + $this->mTables[$variant]->removeArray( $pair ); } } } - /** - * apply manual conversion from $this->mManualAddTables and $this->mManualRemoveTables - * @private - */ - function applyManualConv(){ - //apply manual conversion table to global table - foreach($this->mVariants as $v) { - if (count($this->mManualAddTables[$v]) > 0) { - $this->mTables[$v]->mergeArray($this->mManualAddTables[$v]); - } - if (count($this->mManualRemoveTables[$v]) > 0) - $this->mTables[$v]->removeArray($this->mManualRemoveTables[$v]); - } - } - /** * Convert text using a parser object for context * @public @@ -425,12 +443,28 @@ class LanguageConverter { $parser->mOutput->setTitleText( $this->mTitleDisplay ); return $text; } + + /** + * convert namespace + * @param string $title the title included namespace + * @return array of string + * @private + */ + function convertNamespace( $title, $variant ) { + $splittitle = explode( ':', $title ); + if (count($splittitle) < 2) + return $title; + if ( isset( $this->mNamespaceTables[$variant][$splittitle[0]] ) ) + $splittitle[0] = $this->mNamespaceTables[$variant][$splittitle[0]]; + $ret = implode(':', $splittitle ); + return $ret; + } /** - * convert title + * convert title * @private */ - function convertTitle( $text ){ + function convertTitle( $text, $variant ){ global $wgDisableTitleConversion, $wgUser; // check for global param and __NOTC__ tag @@ -452,7 +486,8 @@ class LanguageConverter { if ( $isredir == 'no' || $action == 'edit' || $action == 'submit' || $linkconvert == 'no' ) { return $text; } else { - $this->mTitleDisplay = $this->convert($text); + $text = $this->convertNamespace( $text, $variant ); + $this->mTitleDisplay = $this->convert( $text ); return $this->mTitleDisplay; } } @@ -488,38 +523,37 @@ class LanguageConverter { if( $mw->matchStart( $text ) ) return $text; + $plang = $this->getPreferredVariant(); + // for title convertion - if ( $isTitle ) return $this->convertTitle( $text ); + if ( $isTitle ) return $this->convertTitle( $text, $plang ); - $plang = $this->getPreferredVariant(); $tarray = StringUtils::explode( $this->mMarkup['end'], $text ); $text = ''; - $marks = array(); foreach ( $tarray as $txt ) { + $marked = explode( $this->mMarkup['begin'], $txt, 2 ); + + if( $this->mDoContentConvert ) + // Bug 19620: should convert a string immediately after a new rule added. + $text .= $this->autoConvert( $marked[0], $plang ); + else + $text .= $marked[0]; + if ( array_key_exists( 1, $marked ) ) { $crule = new ConverterRule($marked[1], $this); $crule->parse( $plang ); - $marked[1] = $crule->getDisplay(); - $this->prepareManualConv( $crule ); + $text .= $crule->getDisplay(); + $this->applyManualConv( $crule ); } else - $marked[0] .= $this->mMarkup['end']; - array_push( $marks, $marked ); - } - $this->applyManualConv(); - foreach ( $marks as $marked ) { - if( $this->mDoContentConvert ) - $text .= $this->autoConvert( $marked[0], $plang ); - else - $text .= $marked[0]; - if( array_key_exists( 1, $marked ) ) - $text .= $marked[1]; + $text .= $this->mMarkup['end']; + } + // Remove the last delimiter (wasn't real) $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) ); - return $text; } @@ -537,6 +571,11 @@ class LanguageConverter { * @public */ function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { + # If the article has already existed, there is no need to + # check it again, otherwise it may cause a fault. + if ( is_object( $nt ) && $nt->exists() ) + return; + global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, $wgUser; $isredir = $wgRequest->getText( 'redirect', 'yes' ); $action = $wgRequest->getText( 'action' ); @@ -550,7 +589,7 @@ class LanguageConverter { || $action == 'submit' || $linkconvert == 'no' || $wgUser->getOption('noconvertlink') == 1 ) ) ) return; - if(is_object($nt)) + if ( is_object( $nt ) ) $ns = $nt->getNamespace(); $variants = $this->autoConvertToAllVariants($link); @@ -845,6 +884,9 @@ class LanguageConverter { * @public */ function armourMath($text){ + // we need to convert '-{' and '}-' to '-{' and '}-' + // to avoid a unwanted '}-' appeared after the math-image. + $text = strtr( $text, array('-{' => '-{', '}-' => '}-') ); $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; return $ret; } @@ -853,7 +895,7 @@ class LanguageConverter { /** * parser for rules of language conversion , parse rules in -{ }- tag * @ingroup Language - * @author fdcn + * @author fdcn , PhiLiP */ class ConverterRule { var $mText; // original text in -{text}- @@ -915,6 +957,7 @@ class ConverterRule { $flags = array(); $markup = $this->mConverter->mMarkup; $validFlags = $this->mConverter->mFlags; + $variants = $this->mConverter->mVariants; $tt = explode($markup['flagsep'], $text, 2); if(count($tt) == 2) { @@ -946,14 +989,23 @@ class ConverterRule { if(in_array('D',$flags)) $temp[] = 'D'; $flags = $temp; } else { - if ( in_array('A',$flags)) { + if ( in_array('A',$flags) ) { $flags[]='+'; $flags[]='S'; } if ( in_array('D',$flags) ) $flags=array_diff($flags,array('S')); + $flags_temp = array(); + foreach ($variants as $variant) { + // try to find flags like "zh-hans", "zh-hant" + // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" + if ( in_array($variant, $flags) ) + $flags_temp[] = $variant; + } + if ( count($flags_temp) !== 0 ) + $flags = $flags_temp; } - if ( count($flags)==0 ) + if ( count($flags) == 0 ) $flags = array('S'); $this->mRules=$rules; $this->mFlags=$flags; @@ -969,26 +1021,49 @@ class ConverterRule { $bidtable = array(); $unidtable = array(); $markup = $this->mConverter->mMarkup; + $variants = $this->mConverter->mVariants; + + // varsep_pattern for preg_split: + // text should be splited by ";" only if a valid variant + // name exist after the markup, for example: + // -{zh-hans:xxx;zh-hant:yyy;}- + // we should split it as: + // array( + // [0] => 'zh-hans:xxx' + // [1] => 'zh-hant:yyy' + // [2] => '' + // ) + $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?='; + foreach( $variants as $variant ) { + $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; // zh-hans:xxx;zh-hant:yyy + $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant + . '\s*' . $markup['codesep'] . '|'; // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz + } + $varsep_pattern .= '\s*$)/'; + + $choice = preg_split($varsep_pattern, $rules); - $choice = explode($markup['varsep'], $rules ); - foreach($choice as $c) { - $v = explode($markup['codesep'], $c); - if(count($v) != 2) + foreach( $choice as $c ) { + $v = explode($markup['codesep'], $c, 2); + if( count($v) != 2 ) continue;// syntax error, skip - $to=trim($v[1]); - $v=trim($v[0]); - $u = explode($markup['unidsep'], $v); - if(count($u) == 1) { + $to = trim($v[1]); + $v = trim($v[0]); + $u = explode($markup['unidsep'], $v); + // if $to is empty, strtr() could return a wrong result + if( count($u) == 1 && $to && in_array( $v, $variants ) ) { $bidtable[$v] = $to; } else if(count($u) == 2){ - $from=trim($u[0]);$v=trim($u[1]); - if( array_key_exists($v,$unidtable) && !is_array($unidtable[$v]) ) - $unidtable[$v]=array($from=>$to); - else - $unidtable[$v][$from]=$to; + $from = trim($u[0]); + $v = trim($u[1]); + if( array_key_exists( $v, $unidtable ) && !is_array( $unidtable[$v] ) + && $to && in_array( $v, $variants ) ) + $unidtable[$v] = array( $from=>$to ); + elseif ( $to && in_array( $v, $variants ) ) + $unidtable[$v][$from] = $to; } // syntax error, pass - if (!array_key_exists($v,$this->mConverter->mVariantNames)){ + if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ){ $bidtable = array(); $unidtable = array(); break; @@ -1104,16 +1179,35 @@ class ConverterRule { * @public */ function parse($variant){ - if(!$variant) $variant = $this->mConverter->getPreferredVariant(); + if(!$variant) + $variant = $this->mConverter->getPreferredVariant(); + $variants = $this->mConverter->mVariants; $this->parseFlags(); $flags = $this->mFlags; - if( !in_array('R',$flags) || !in_array('N',$flags) ){ - //FIXME: may cause trouble here... - //strip   since it interferes with the parsing, plus, - //all spaces should be stripped in this tag anyway. - $this->mRules = str_replace(' ', '', $this->mRules); + // convert to specified variant + // syntax: -{zh-hans;zh-hant[;...]|}- + if( count( array_diff( $flags, $variants ) ) == 0 and count( $flags ) != 0 ) { + if ( in_array( $variant, $flags ) ) // check if current variant in flags + // then convert to current language + $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variant ); + else { // if current variant no in flags, + // then we check its fallback variants. + $variantFallbacks = $this->mConverter->getVariantFallbacks($variant); + foreach ( $variantFallbacks as $variantFallback ) { + // if current variant's fallback exist in flags + if ( in_array( $variantFallback, $flags ) ) { + // then convert to fallback language + $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variantFallback ); + break; + } + } + } + $this->mFlags = $flags = array('R'); + } + + if( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) { // decode => HTML entities modified by Sanitizer::removeHTMLtags $this->mRules = str_replace('=>','=>',$this->mRules); @@ -1121,7 +1215,7 @@ class ConverterRule { } $rules = $this->mRules; - if(count($this->mBidtable)==0 && count($this->mUnidtable)==0){ + if( count( $this->mBidtable ) == 0 && count( $this->mUnidtable ) == 0 ){ if(in_array('+',$flags) || in_array('-',$flags)) // fill all variants if text in -{A/H/-|text} without rules foreach($this->mConverter->mVariants as $v)