X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageConverter.php;h=5c2289a77af9cc81cd2f519ecde65a2c1a996a13;hb=c38e2e499c4533a33cff30829a46c0e93d0de002;hp=23d532c728b8d4b364bbe164f317869e7ee7871d;hpb=3b147f876a0c7450025d2fe4e8c7117cb2be50d0;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 23d532c728..5c2289a77a 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -1,11 +1,24 @@ , shinjiman , PhiLiP */ class LanguageConverter { - var $mPreferredVariant = ''; // The User's preferred variant var $mMainLanguageCode; var $mVariants, $mVariantFallbacks, $mVariantNames; var $mTablesLoaded = false; var $mTables; - var $mNamespaceTables; // 'bidirectional' 'unidirectional' 'disable' for each variant var $mManualLevel; var $mCacheKey; var $mLangObj; - var $mMarkup; var $mFlags; var $mDescCodeSep = ':', $mDescVarSep = ';'; var $mUcfirst = false; - var $mHeaderVariant; var $mConvRuleTitle = false; + var $mURLVariant; + var $mUserVariant; + var $mHeaderVariant; + var $mMaxDepth = 10; + var $mVarSeparatorPattern; const CACHE_VERSION_KEY = 'VERSION 6'; @@ -39,45 +53,25 @@ class LanguageConverter { * Constructor * * @param $langobj The Language Object - * @param string $maincode the main language code of this language - * @param array $variants the supported variants of this language - * @param array $variantfallback the fallback language of each variant - * @param array $markup array defining the markup used for manual conversion - * @param array $flags array defining the custom strings that maps to the - * flags - * @param array $manualLevel limit for supported variants - * @public + * @param $maincode String: the main language code of this language + * @param $variants Array: the supported variants of this language + * @param $variantfallbacks Array: the fallback language of each variant + * @param $flags Array: defining the custom strings that maps to the flags + * @param $manualLevel Array: limit for supported variants */ - function __construct( $langobj, $maincode, + public function __construct( $langobj, $maincode, $variants = array(), $variantfallbacks = array(), - $markup = array(), $flags = array(), $manualLevel = array() ) { + global $wgDisabledVariants, $wgLanguageNames; $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; - - global $wgDisabledVariants; - $this->mVariants = array(); - foreach ( $variants as $variant ) { - if ( !in_array( $variant, $wgDisabledVariants ) ) { - $this->mVariants[] = $variant; - } - } + $this->mVariants = array_diff( $variants, $wgDisabledVariants ); $this->mVariantFallbacks = $variantfallbacks; - global $wgLanguageNames; $this->mVariantNames = $wgLanguageNames; $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); - $m = array( - 'begin' => '-{', - 'flagsep' => '|', - 'unidsep' => '=>', // for unidirectional conversion - 'codesep' => ':', - 'varsep' => ';', - 'end' => '}-' - ); - $this->mMarkup = array_merge( $m, $markup ); - $f = array( + $defaultflags = array( // 'S' show converted text // '+' add rules for alltext // 'E' the gave flags is error @@ -88,25 +82,27 @@ class LanguageConverter { 'D' => 'D', // convert description (subclass implement) '-' => '-', // remove convert (not implement) 'H' => 'H', // add rule for convert code - // (but no display in placed code ) + // (but no display in placed code ) 'N' => 'N' // current variant name ); - $this->mFlags = array_merge( $f, $flags ); + $this->mFlags = array_merge( $defaultflags, $flags ); foreach ( $this->mVariants as $v ) { if ( array_key_exists( $v, $manualLevel ) ) { $this->mManualLevel[$v] = $manualLevel[$v]; } else { $this->mManualLevel[$v] = 'bidirectional'; } - $this->mNamespaceTables[$v] = array(); $this->mFlags[$v] = $v; } } /** - * @public + * Get all valid variants. + * Call this instead of using $this->mVariants directly. + * + * @return Array: contains all valid variants */ - function getVariants() { + public function getVariants() { return $this->mVariants; } @@ -117,82 +113,145 @@ class LanguageConverter { * when zh-sg is preferred but not defined, we will pick zh-hans * in this case. Right now this is only used by zh. * - * @param string $v The language code of the variant - * @return string array The code of the fallback language or false if there - * is no fallback - * @public + * @param $variant String: the language code of the variant + * @return String: The code of the fallback language or the + * main code if there is no fallback */ - function getVariantFallbacks( $v ) { - if ( isset( $this->mVariantFallbacks[$v] ) ) { - return $this->mVariantFallbacks[$v]; + public function getVariantFallbacks( $variant ) { + if ( isset( $this->mVariantFallbacks[$variant] ) ) { + return $this->mVariantFallbacks[$variant]; } return $this->mMainLanguageCode; } /** - * Get preferred language variants. - * @param boolean $fromUser Get it from $wgUser's preferences - * @param boolean $fromHeader Get it from Accept-Language - * @return string the preferred language code - * @public + * Get the title produced by the conversion rule. + * @return String: The converted title text */ - function getPreferredVariant( $fromUser = true, $fromHeader = false ) { - global $wgUser, $wgRequest, $wgVariantArticlePath, - $wgDefaultLanguageVariant, $wgOut; + public function getConvRuleTitle() { + return $this->mConvRuleTitle; + } - // see if the preference is set in the request - $req = $wgRequest->getText( 'variant' ); + /** + * Get preferred language variant. + * @return String: the preferred language code + */ + public function getPreferredVariant() { + global $wgDefaultLanguageVariant, $wgUser; - if ( !$req ) { - $req = $wgRequest->getVal( 'uselang' ); - } + $req = $this->getURLVariant(); - if ( $fromUser && !$req ) { + if ( $wgUser->isLoggedIn() && !$req ) { $req = $this->getUserVariant(); } - if ( $fromHeader && !$req ) { + elseif ( !$req ) { $req = $this->getHeaderVariant(); } if ( $wgDefaultLanguageVariant && !$req ) { - $req = $wgDefaultLanguageVariant; + $req = $this->validateVariant( $wgDefaultLanguageVariant ); + } + + // This function, unlike the other get*Variant functions, is + // not memoized (i.e. there return value is not cached) since + // new information might appear during processing after this + // is first called. + if ( $req ) { + return $req; + } + return $this->mMainLanguageCode; + } + + /** + * Get default variant. + * This function would not be affected by user's settings or headers + * @return String: the default variant code + */ + public function getDefaultVariant() { + global $wgDefaultLanguageVariant; + + $req = $this->getURLVariant(); + + if ( $wgDefaultLanguageVariant && !$req ) { + $req = $this->validateVariant( $wgDefaultLanguageVariant ); } - if ( in_array( $req, $this->mVariants ) ) { + if ( $req ) { return $req; } return $this->mMainLanguageCode; } /** - * Determine the user has a variant set. + * Validate the variant + * @param $variant String: the variant to validate + * @return Mixed: returns the variant if it is valid, null otherwise + */ + protected function validateVariant( $variant = null ) { + if ( $variant !== null && + in_array( $variant, $this->mVariants ) ) { + return $variant; + } + return null; + } + + /** + * Get the variant specified in the URL * - * @returns mixed variant if one found, false otherwise. + * @return Mixed: variant if one found, false otherwise. */ - function getUserVariant() { + public function getURLVariant() { + global $wgRequest; + + if ( $this->mURLVariant ) { + return $this->mURLVariant; + } + + // see if the preference is set in the request + $ret = $wgRequest->getText( 'variant' ); + + if ( !$ret ) { + $ret = $wgRequest->getVal( 'uselang' ); + } + + return $this->mURLVariant = $this->validateVariant( $ret ); + } + + /** + * Determine if the user has a variant set. + * + * @return Mixed: variant if one found, false otherwise. + */ + protected function getUserVariant() { global $wgUser; + // memoizing this function wreaks havoc on parserTest.php + /* if ( $this->mUserVariant ) { */ + /* return $this->mUserVariant; */ + /* } */ + // get language variant preference from logged in users // Don't call this on stub objects because that causes infinite // recursion during initialisation if ( $wgUser->isLoggedIn() ) { - return $wgUser->getOption( 'variant' ); + $ret = $wgUser->getOption( 'variant' ); } else { // figure out user lang without constructing wgLang to avoid // infinite recursion - return $wgUser->getOption( 'language' ); + $ret = $wgUser->getOption( 'language' ); } - } + return $this->mUserVariant = $this->validateVariant( $ret ); + } /** * Determine the language variant from the Accept-Language header. * - * @returns mixed variant if one found, false otherwise. + * @return Mixed: variant if one found, false otherwise. */ - function getHeaderVariant() { + protected function getHeaderVariant() { global $wgRequest; if ( $this->mHeaderVariant ) { @@ -200,60 +259,44 @@ class LanguageConverter { } // see if some supported language variant is set in the - // http header, but we don't set the mPreferredVariant - // variable in case this is called before the user's - // preference is loaded - - $acceptLanguage = $wgRequest->getHeader( 'Accept-Language' ); - if ( !$acceptLanguage ) { - return false; + // http header. + $languages = array_keys( $wgRequest->getAcceptLang() ); + if ( empty( $languages ) ) { + return null; } - // explode by comma - $result = explode( ',', strtolower( $acceptLanguage ) ); - - $languages = array(); + $fallback_languages = array(); + foreach ( $languages as $language ) { + $this->mHeaderVariant = $this->validateVariant( $language ); + if ( $this->mHeaderVariant ) { + break; + } - foreach ( $result as $elem ) { - // if $elem likes 'zh-cn;q=0.9' - if ( ( $posi = strpos( $elem, ';' ) ) !== false ) { - // get the real language code likes 'zh-cn' - $languages[] = substr( $elem, 0, $posi ); - } else { - $languages[] = $elem; + // To see if there are fallbacks of current language. + // We record these fallback variants, and process + // them later. + $fallbacks = $this->getVariantFallbacks( $language ); + if ( is_string( $fallbacks ) ) { + $fallback_languages[] = $fallbacks; + } elseif ( is_array( $fallbacks ) ) { + $fallback_languages = + array_merge( $fallback_languages, + $fallbacks ); } } - $fallback_languages = array(); - foreach ( $languages as $language ) { - // strip whitespace - $language = trim( $language ); - if ( in_array( $language, $this->mVariants ) ) { - $this->mHeaderVariant = $language; - return $language; - } else { - // To see if there are fallbacks of current language. - // We record these fallback variants, and process - // them later. - $fallbacks = $this->getVariantFallbacks( $language ); - if ( is_string( $fallbacks ) ) { - $fallback_languages[] = $fallbacks; - } elseif ( is_array( $fallbacks ) ) { - $fallback_languages = - array_merge( $fallback_languages, - $fallbacks ); + if ( !$this->mHeaderVariant ) { + // process fallback languages now + $fallback_languages = array_unique( $fallback_languages ); + foreach ( $fallback_languages as $language ) { + $this->mHeaderVariant = $this->validateVariant( $language ); + if ( $this->mHeaderVariant ) { + break; } } } - // process fallback languages now - $fallback_languages = array_unique( $fallback_languages ); - foreach ( $fallback_languages as $language ) { - if ( in_array( $language, $this->mVariants ) ) { - $this->mHeaderVariant = $language; - return $language; - } - } + return $this->mHeaderVariant; } /** @@ -262,42 +305,49 @@ class LanguageConverter { * To convert text in "title" or "alt", like 'textgetPreferredVariant(); $title = $matches[1]; - $text = $matches[2]; + $text = $matches[2]; + // we convert captions except URL if ( !strpos( $text, '://' ) ) { $text = $this->translate( $text, $toVariant ); } - return " $title=\"$text\""; + + // remove HTML tags to prevent disrupting the layout + $text = preg_replace( '/<[^>]+>/', '', $text ); + // escape HTML special chars to prevent disrupting the layout + $text = htmlspecialchars( $text ); + + return " {$title}=\"{$text}\""; } /** * Dictionary-based conversion. + * This function would not parse the conversion rules. + * If you want to parse rules, try to use convert() or + * convertTo(). * - * @param string $text the text to be converted - * @param string $toVariant the target language code - * @return string the converted text - * @private + * @param $text String: the text to be converted + * @param $toVariant String: the target language code + * @return String: the converted text */ - function autoConvert( $text, $toVariant = false ) { - $fname = 'LanguageConverter::autoConvert'; - - wfProfileIn( $fname ); + public function autoConvert( $text, $toVariant = false ) { + wfProfileIn( __METHOD__ ); - if ( !$this->mTablesLoaded ) { - $this->loadTables(); - } + $this->loadTables(); if ( !$toVariant ) { $toVariant = $this->getPreferredVariant(); - } - if ( !in_array( $toVariant, $this->mVariants ) ) { - return $text; + if ( !$toVariant ) { + wfProfileOut( __METHOD__ ); + return $text; + } } /* we convert everything except: @@ -357,28 +407,25 @@ class LanguageConverter { $ret .= array_shift( $notrtext ); $ret .= $t; } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $ret; } /** * Translate a string to a variant. - * Doesn't process markup or do any of that other stuff, for that use - * convert(). + * Doesn't parse rules or do any of that other stuff, for that use + * convert() or convertTo(). * - * @param string $text Text to convert - * @param string $variant Variant language code - * @return string Translated text - * @private + * @param $text String: text to convert + * @param $variant String: variant language code + * @return String: translated text */ - function translate( $text, $variant ) { + protected function translate( $text, $variant ) { wfProfileIn( __METHOD__ ); // If $text is empty or only includes spaces, do nothing // Otherwise translate it if ( trim( $text ) ) { - if ( !$this->mTablesLoaded ) { - $this->loadTables(); - } + $this->loadTables(); $text = $this->mTables[$variant]->replace( $text ); } wfProfileOut( __METHOD__ ); @@ -386,78 +433,60 @@ class LanguageConverter { } /** - * Convert text to all supported variants. + * Call translate() to convert text to all valid variants. * - * @param string $text the text to be converted - * @return array of string - * @public + * @param $text String: the text to be converted + * @return Array: variant => converted text */ - function autoConvertToAllVariants( $text ) { - $fname = 'LanguageConverter::autoConvertToAllVariants'; - wfProfileIn( $fname ); - if ( !$this->mTablesLoaded ) { - $this->loadTables(); - } + public function autoConvertToAllVariants( $text ) { + wfProfileIn( __METHOD__ ); + $this->loadTables(); $ret = array(); foreach ( $this->mVariants as $variant ) { $ret[$variant] = $this->translate( $text, $variant ); } - wfProfileOut( $fname ); + wfProfileOut( __METHOD__ ); return $ret; } /** - * Convert link text to all supported variants. + * Convert link text to all valid variants. + * In the first, this function only convert text outside the + * "-{" "}-" markups. Since the "{" and "}" are not allowed in + * titles, the text will get all converted always. + * So I removed this feature and deprecated the function. * - * @param string $text the text to be converted - * @return array of string - * @public + * @param $text String: the text to be converted + * @return Array: variant => converted text + * @deprecated Use autoConvertToAllVariants() instead */ - function convertLinkToAllVariants( $text ) { - if ( !$this->mTablesLoaded ) { - $this->loadTables(); - } - - $ret = array(); - $tarray = explode( $this->mMarkup['begin'], $text ); - $tfirst = array_shift( $tarray ); - - foreach ( $this->mVariants as $variant ) { - $ret[$variant] = $this->translate( $tfirst, $variant ); - } - - foreach ( $tarray as $txt ) { - $marked = explode( $this->mMarkup['end'], $txt, 2 ); - - foreach ( $this->mVariants as $variant ) { - $ret[$variant] .= $this->mMarkup['begin'] . $marked[0] . - $this->mMarkup['end']; - if ( array_key_exists( 1, $marked ) ) { - $ret[$variant] .= $this->translate( $marked[1], $variant ); - } - } - - } - - return $ret; + public function convertLinkToAllVariants( $text ) { + return $this->autoConvertToAllVariants( $text ); } /** - * Prepare manual conversion table. - * @private + * Apply manual conversion rules. + * + * @param $convRule Object: Object of ConverterRule */ - function applyManualConv( $convRule ) { - // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom - // conversion in title - $this->mConvRuleTitle = $convRule->getTitle(); - - // apply manual conversion table to global table + protected function applyManualConv( $convRule ) { + // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom + // title conversion. + // Bug 24072: $mConvRuleTitle was overwritten by other manual + // rule(s) not for title, this breaks the title conversion. + $newConvRuleTitle = $convRule->getTitle(); + if ( $newConvRuleTitle ) { + // So I add an empty check for getTitle() + $this->mConvRuleTitle = $newConvRuleTitle; + } + + // merge/remove manual conversion rules to/from global table $convTable = $convRule->getConvTable(); $action = $convRule->getRulesAction(); foreach ( $convTable as $variant => $pair ) { - if ( !in_array( $variant, $this->mVariants ) ) { + if ( !$this->validateVariant( $variant ) ) { continue; } @@ -477,47 +506,33 @@ class LanguageConverter { } /** - * Convert namespace. - * @param string $title the title included namespace - * @return array of string - * @private - */ - function convertNamespace( $title, $variant ) { - $splittitle = explode( ':', $title ); - if ( count( $splittitle ) < 2 ) { - return $title; - } - if ( isset( $this->mNamespaceTables[$variant][$splittitle[0]] ) ) { - $splittitle[0] = $this->mNamespaceTables[$variant][$splittitle[0]]; - } - $ret = implode( ':', $splittitle ); - return $ret; - } - - /** - * Convert a text fragment. + * Auto convert a Title object to a readable string in the + * preferred variant. * - * @param string $text text to be converted - * @param string $plang preferred variant - * @return string converted text - * @private + *@param $title Object: a object of Title + *@return String: converted title text */ - function convertFragment( $text, $plang ) { - $marked = explode( $this->mMarkup['begin'], $text, 2 ); - $converted = ''; - - $converted .= $this->autoConvert( $marked[0], $plang ); - - if ( array_key_exists( 1, $marked ) ) { - $crule = new ConverterRule( $marked[1], $this ); - $crule->parse( $plang ); - $converted .= $crule->getDisplay(); - $this->applyManualConv( $crule ); + public function convertTitle( $title ) { + $variant = $this->getPreferredVariant(); + $index = $title->getNamespace(); + if ( $index === NS_MAIN ) { + $text = ''; } else { - $converted .= $this->mMarkup['end']; + // first let's check if a message has given us a converted name + $nsConvKey = 'conversion-ns' . $index; + if ( !wfEmptyMsg( $nsConvKey ) ) { + $text = wfMsgForContentNoTrans( $nsConvKey ); + } else { + // the message does not exist, try retrieve it from the current + // variant's namespace names. + $langObj = $this->mLangObj->factory( $variant ); + $text = $langObj->getFormattedNsText( $index ); + } + $text .= ':'; } - - return $converted; + $text .= $title->getText(); + $text = $this->translate( $text, $variant ); + return $text; } /** @@ -531,26 +546,136 @@ class LanguageConverter { * -{flags|code1:text1;code2:text2;...}- or * -{text}- in which case no conversion should take place for text * - * @param string $text text to be converted - * @return string converted text - * @public + * @param $text String: text to be converted + * @return String: converted text + */ + public function convert( $text ) { + $variant = $this->getPreferredVariant(); + return $this->convertTo( $text, $variant ); + } + + /** + * Same as convert() except a extra parameter to custom variant. + * + * @param $text String: text to be converted + * @param $variant String: the target variant code + * @return String: converted text */ - function convert( $text ) { + public function convertTo( $text, $variant ) { global $wgDisableLangConversion; if ( $wgDisableLangConversion ) return $text; + return $this->recursiveConvertTopLevel( $text, $variant ); + } + + /** + * Recursively convert text on the outside. Allow to use nested + * markups to custom rules. + * + * @param $text String: text to be converted + * @param $variant String: the target variant code + * @param $depth Integer: depth of recursion + * @return String: converted text + */ + protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { + $startPos = 0; + $out = ''; + $length = strlen( $text ); + while ( $startPos < $length ) { + $pos = strpos( $text, '-{', $startPos ); + + if ( $pos === false ) { + // No more markup, append final segment + $out .= $this->autoConvert( substr( $text, $startPos ), $variant ); + return $out; + } - $plang = $this->getPreferredVariant(); + // Markup found + // Append initial segment + $out .= $this->autoConvert( substr( $text, $startPos, $pos - $startPos ), $variant ); - $tarray = StringUtils::explode( $this->mMarkup['end'], $text ); - $converted = ''; + // Advance position + $startPos = $pos; + + // Do recursive conversion + $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); + } + + return $out; + } + + /** + * Recursively convert text on the inside. + * + * @param $text String: text to be converted + * @param $variant String: the target variant code + * @param $depth Integer: depth of recursion + * @return String: converted text + */ + protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { + // Quick sanity check (no function calls) + if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { + throw new MWException( __METHOD__ . ': invalid input string' ); + } + + $startPos += 2; + $inner = ''; + $warningDone = false; + $length = strlen( $text ); + + while ( $startPos < $length ) { + $m = false; + preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); + if ( !$m ) { + // Unclosed rule + break; + } - foreach ( $tarray as $txt ) { - $converted .= $this->convertFragment( $txt, $plang ); + $token = $m[0][0]; + $pos = $m[0][1]; + + // Markup found + // Append initial segment + $inner .= substr( $text, $startPos, $pos - $startPos ); + + // Advance position + $startPos = $pos; + + switch ( $token ) { + case '-{': + // Check max depth + if ( $depth >= $this->mMaxDepth ) { + $inner .= '-{'; + if ( !$warningDone ) { + $inner .= '' . + wfMsgForContent( 'language-converter-depth-warning', + $this->mMaxDepth ) . + ''; + $warningDone = true; + } + $startPos += 2; + continue; + } + // Recursively parse another rule + $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); + break; + case '}-': + // Apply the rule + $startPos += 2; + $rule = new ConverterRule( $inner, $this ); + $rule->parse( $variant ); + $this->applyManualConv( $rule ); + return $rule->getDisplay(); + default: + throw new MWException( __METHOD__ . ': invalid regex match' ); + } } - // Remove the last delimiter (wasn't real) - $converted = substr( $converted, 0, - strlen( $this->mMarkup['end'] ) ); - return $converted; + // Unclosed rule + if ( $startPos < $length ) { + $inner .= substr( $text, $startPos ); + } + $startPos = $length; + return '-{' . $this->autoConvert( $inner, $variant ); } /** @@ -559,14 +684,13 @@ class LanguageConverter { * actually exists in another variant. This function * tries to find it. See e.g. LanguageZh.php * - * @param string $link the name of the link - * @param mixed $nt the title object of the link - * @param boolean $ignoreOtherCond: to disable other conditions when - * we need to transclude a template or update a category's link - * @return null the input parameters may be modified upon return - * @public + * @param $link String: the name of the link + * @param $nt Mixed: the title object of the link + * @param $ignoreOtherCond Boolean: to disable other conditions when + * we need to transclude a template or update a category's link + * @return Null, the input parameters may be modified upon return */ - function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { + public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { # If the article has already existed, there is no need to # check it again, otherwise it may cause a fault. if ( is_object( $nt ) && $nt->exists() ) { @@ -599,7 +723,7 @@ class LanguageConverter { } $variants = $this->autoConvertToAllVariants( $link ); - if ( $variants == false ) { // give up + if ( !$variants ) { // give up return; } @@ -627,12 +751,10 @@ class LanguageConverter { } } - /** + /** * Returns language specific hash options. - * - * @public */ - function getExtraHashOptions() { + public function getExtraHashOptions() { $variant = $this->getPreferredVariant(); return '!' . $variant ; } @@ -653,10 +775,10 @@ class LanguageConverter { * @private */ function loadTables( $fromcache = true ) { - global $wgMemc; if ( $this->mTablesLoaded ) { return; } + global $wgMemc; wfProfileIn( __METHOD__ ); $this->mTablesLoaded = true; $this->mTables = false; @@ -666,7 +788,7 @@ class LanguageConverter { wfProfileOut( __METHOD__ . '-cache' ); } if ( !$this->mTables - || !isset( $this->mTables[self::CACHE_VERSION_KEY] ) ) { + || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { wfProfileIn( __METHOD__ . '-recache' ); // not in cache, or we need a fresh reload. // we will first load the default tables @@ -686,13 +808,13 @@ class LanguageConverter { wfProfileOut( __METHOD__ ); } - /** + /** * Hook for post processig after conversion tables are loaded. * */ function postLoadTables() { } - /** + /** * Reload the conversion tables. * * @private @@ -721,13 +843,8 @@ class LanguageConverter { * */ function parseCachedTable( $code, $subpage = '', $recursive = true ) { - global $wgMessageCache; static $parsed = array(); - if ( !is_object( $wgMessageCache ) ) { - return array(); - } - $key = 'Conversiontable/' . $code; if ( $subpage ) { $key .= '/' . $subpage; @@ -737,7 +854,12 @@ class LanguageConverter { } if ( strpos( $code, '/' ) === false ) { - $txt = $wgMessageCache->get( 'Conversiontable', true, $code ); + $txt = MessageCache::singleton()->get( 'Conversiontable', true, $code ); + if ( $txt === false ) { + # FIXME: this method doesn't seem to be expecting + # this possible outcome... + $txt = '<Conversiontable>'; + } } else { $title = Title::makeTitleSafe( NS_MEDIAWIKI, "Conversiontable/$code" ); @@ -753,14 +875,14 @@ class LanguageConverter { // [[MediaWiki:conversiontable/zh-xx/...|...]] $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . ':Conversiontable'; - $subs = explode( '[[', $txt ); + $subs = StringUtils::explode( '[[', $txt ); $sublinks = array(); foreach ( $subs as $sub ) { $link = explode( ']]', $sub, 2 ); if ( count( $link ) != 2 ) { continue; } - $b = explode( '|', $link[0] ); + $b = explode( '|', $link[0], 2 ); $b = explode( '/', trim( $b[0] ), 3 ); if ( count( $b ) == 3 ) { $sublink = $b[2]; @@ -773,18 +895,22 @@ class LanguageConverter { } } - // parse the mappings in this page - $blocks = explode( $this->mMarkup['begin'], $txt ); - array_shift( $blocks ); + $blocks = StringUtils::explode( '-{', $txt ); $ret = array(); + $first = true; foreach ( $blocks as $block ) { - $mappings = explode( $this->mMarkup['end'], $block, 2 ); + if ( $first ) { + // Skip the part before the first -{ + $first = false; + continue; + } + $mappings = explode( '}-', $block, 2 ); $stripped = str_replace( array( "'", '"', '*', '#' ), '', $mappings[0] ); - $table = explode( ';', $stripped ); + $table = StringUtils::explode( ';', $stripped ); foreach ( $table as $t ) { - $m = explode( '=>', $t ); + $m = explode( '=>', $t, 3 ); if ( count( $m ) != 2 ) continue; // trim any trailling comments starting with '//' @@ -794,7 +920,6 @@ class LanguageConverter { } $parsed[$key] = true; - // recursively parse the subpages if ( $recursive ) { foreach ( $sublinks as $link ) { @@ -805,7 +930,7 @@ class LanguageConverter { if ( $this->mUcfirst ) { foreach ( $ret as $k => $v ) { - $ret[Language::ucfirst( $k )] = Language::ucfirst( $v ); + $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); } } return $ret; @@ -815,18 +940,17 @@ class LanguageConverter { * Enclose a string with the "no conversion" tag. This is used by * various functions in the Parser. * - * @param string $text text to be tagged for no conversion - * @return string the tagged text - * @public + * @param $text String: text to be tagged for no conversion + * @param $noParse Unused (?) + * @return String: the tagged text */ - function markNoConversion( $text, $noParse = false ) { + public function markNoConversion( $text, $noParse = false ) { # don't mark if already marked - if ( strpos( $text, $this->mMarkup['begin'] ) - || strpos( $text, $this->mMarkup['end'] ) ) { + if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { return $text; } - $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; + $ret = "-{R|$text}-"; return $ret; } @@ -851,7 +975,7 @@ class LanguageConverter { $t = explode( '/', $title, 3 ); $c = count( $t ); if ( $c > 1 && $t[0] == 'Conversiontable' ) { - if ( in_array( $t[1], $this->mVariants ) ) { + if ( $this->validateVariant( $t[1] ) ) { $this->reloadTables(); } } @@ -861,15 +985,42 @@ class LanguageConverter { /** * Armour rendered math against conversion. - * Wrap math into rawoutput -{R| math }- syntax. - * @public + * Escape special chars in parsed math text.(in most cases are img elements) */ - function armourMath( $text ) { - // we need to convert '-{' and '}-' to '-{' and '}-' - // to avoid a unwanted '}-' appeared after the math-image. + public function armourMath( $text ) { + // convert '-{' and '}-' to '-{' and '}-' to prevent + // any unwanted markup appearing in the math image tag. $text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) ); - $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end']; - return $ret; + return $text; + } + + /** + * Get the cached separator pattern for ConverterRule::parseRules() + */ + function getVarSeparatorPattern() { + if ( is_null( $this->mVarSeparatorPattern ) ) { + // varsep_pattern for preg_split: + // text should be splited by ";" only if a valid variant + // name exist after the markup, for example: + // -{zh-hans:xxx;zh-hant:\ + // yyy;}- + // we should split it as: + // array( + // [0] => 'zh-hans:xxx' + // [1] => 'zh-hant:yyy' + // [2] => '' + // ) + $pat = '/;\s*(?='; + foreach ( $this->mVariants as $variant ) { + // zh-hans:xxx;zh-hant:yyy + $pat .= $variant . '\s*:|'; + // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz + $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; + } + $pat .= '\s*$)/'; + $this->mVarSeparatorPattern = $pat; + } + return $this->mVarSeparatorPattern; } } @@ -887,6 +1038,7 @@ class ConverterRule { var $mRules = '';// string : the text of the rules var $mRulesAction = 'none'; var $mFlags = array(); + var $mVariantFlags = array(); var $mConvTable = array(); var $mBidtable = array();// array of the translation in each variant var $mUnidtable = array();// array of the translation in each variant @@ -894,34 +1046,27 @@ class ConverterRule { /** * Constructor * - * @param string $text the text between -{ and }- - * @param object $converter a LanguageConverter object - * @access public + * @param $text String: the text between -{ and }- + * @param $converter LanguageConverter object */ - function __construct( $text, $converter ) { + public function __construct( $text, $converter ) { $this->mText = $text; $this->mConverter = $converter; - foreach ( $converter->mVariants as $v ) { - $this->mConvTable[$v] = array(); - } } /** * Check if variants array in convert array. * - * @param string $variant Variant language code - * @return string Translated text - * @public + * @param $variants Array or string: variant language code + * @return String: translated text */ - function getTextInBidtable( $variants ) { - if ( is_string( $variants ) ) { - $variants = array( $variants ); - } - if ( !is_array( $variants ) ) { + public function getTextInBidtable( $variants ) { + $variants = (array)$variants; + if ( !$variants ) { return false; } foreach ( $variants as $variant ) { - if ( array_key_exists( $variant, $this->mBidtable ) ) { + if ( isset( $this->mBidtable[$variant] ) ) { return $this->mBidtable[$variant]; } } @@ -934,74 +1079,60 @@ class ConverterRule { */ function parseFlags() { $text = $this->mText; - if ( strlen( $text ) < 2 ) { - $this->mFlags = array( 'R' ); - $this->mRules = $text; - return; - } - $flags = array(); - $markup = $this->mConverter->mMarkup; - $validFlags = $this->mConverter->mFlags; - $variants = $this->mConverter->mVariants; + $variantFlags = array(); - $tt = explode( $markup['flagsep'], $text, 2 ); - if ( count( $tt ) == 2 ) { - $f = explode( $markup['varsep'], $tt[0] ); + $sepPos = strpos( $text, '|' ); + if ( $sepPos !== false ) { + $validFlags = $this->mConverter->mFlags; + $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); foreach ( $f as $ff ) { $ff = trim( $ff ); - if ( array_key_exists( $ff, $validFlags ) - && !in_array( $validFlags[$ff], $flags ) ) { - $flags[] = $validFlags[$ff]; + if ( isset( $validFlags[$ff] ) ) { + $flags[$validFlags[$ff]] = true; } } - $rules = $tt[1]; - } else { - $rules = $text; - } - - // check flags - if ( in_array( 'R', $flags ) ) { - $flags = array( 'R' );// remove other flags - } elseif ( in_array( 'N', $flags ) ) { - $flags = array( 'N' );// remove other flags - } elseif ( in_array( '-', $flags ) ) { - $flags = array( '-' );// remove other flags - } elseif ( count( $flags ) == 1 && $flags[0] == 'T' ) { - $flags[] = 'H'; - } elseif ( in_array( 'H', $flags ) ) { + $text = strval( substr( $text, $sepPos + 1 ) ); + } + + if ( !$flags ) { + $flags['S'] = true; + } elseif ( isset( $flags['R'] ) ) { + $flags = array( 'R' => true );// remove other flags + } elseif ( isset( $flags['N'] ) ) { + $flags = array( 'N' => true );// remove other flags + } elseif ( isset( $flags['-'] ) ) { + $flags = array( '-' => true );// remove other flags + } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) { + $flags['H'] = true; + } elseif ( isset( $flags['H'] ) ) { // replace A flag, and remove other flags except T - $temp = array( '+', 'H' ); - if ( in_array( 'T', $flags ) ) { - $temp[] = 'T'; + $temp = array( '+' => true, 'H' => true ); + if ( isset( $flags['T'] ) ) { + $temp['T'] = true; } - if ( in_array( 'D', $flags ) ) { - $temp[] = 'D'; + if ( isset( $flags['D'] ) ) { + $temp['D'] = true; } $flags = $temp; } else { - if ( in_array( 'A', $flags ) ) { - $flags[] = '+'; - $flags[] = 'S'; - } - if ( in_array( 'D', $flags ) ) { - $flags = array_diff( $flags, array( 'S' ) ); + if ( isset( $flags['A'] ) ) { + $flags['+'] = true; + $flags['S'] = true; } - $flags_temp = array(); - foreach ( $variants as $variant ) { - // try to find flags like "zh-hans", "zh-hant" - // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" - if ( in_array( $variant, $flags ) ) - $flags_temp[] = $variant; + if ( isset( $flags['D'] ) ) { + unset( $flags['S'] ); } - if ( count( $flags_temp ) !== 0 ) { - $flags = $flags_temp; + // try to find flags like "zh-hans", "zh-hant" + // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" + $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants ); + if ( $variantFlags ) { + $variantFlags = array_flip( $variantFlags ); + $flags = array(); } } - if ( count( $flags ) == 0 ) { - $flags = array( 'S' ); - } - $this->mRules = $rules; + $this->mVariantFlags = $variantFlags; + $this->mRules = $text; $this->mFlags = $flags; } @@ -1011,50 +1142,28 @@ class ConverterRule { */ function parseRules() { $rules = $this->mRules; - $flags = $this->mFlags; $bidtable = array(); $unidtable = array(); - $markup = $this->mConverter->mMarkup; $variants = $this->mConverter->mVariants; - - // varsep_pattern for preg_split: - // text should be splited by ";" only if a valid variant - // name exist after the markup, for example: - // -{zh-hans:xxx;zh-hant:\ - // yyy;}- - // we should split it as: - // array( - // [0] => 'zh-hans:xxx' - // [1] => 'zh-hant:yyy' - // [2] => '' - // ) - $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?='; - foreach ( $variants as $variant ) { - // zh-hans:xxx;zh-hant:yyy - $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; - // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz - $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant - . '\s*' . $markup['codesep'] . '|'; - } - $varsep_pattern .= '\s*$)/'; + $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); $choice = preg_split( $varsep_pattern, $rules ); foreach ( $choice as $c ) { - $v = explode( $markup['codesep'], $c, 2 ); + $v = explode( ':', $c, 2 ); if ( count( $v ) != 2 ) { // syntax error, skip continue; } $to = trim( $v[1] ); $v = trim( $v[0] ); - $u = explode( $markup['unidsep'], $v, 2 ); + $u = explode( '=>', $v, 2 ); // if $to is empty, strtr() could return a wrong result if ( count( $u ) == 1 && $to && in_array( $v, $variants ) ) { $bidtable[$v] = $to; } elseif ( count( $u ) == 2 ) { $from = trim( $u[0] ); - $v = trim( $u[1] ); + $v = trim( $u[1] ); if ( array_key_exists( $v, $unidtable ) && !is_array( $unidtable[$v] ) && $to @@ -1065,7 +1174,7 @@ class ConverterRule { } } // syntax error, pass - if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ) { + if ( !isset( $this->mConverter->mVariantNames[$v] ) ) { $bidtable = array(); $unidtable = array(); break; @@ -1138,7 +1247,12 @@ class ConverterRule { * @private */ function generateConvTable() { - $flags = $this->mFlags; + // Special case optimisation + if ( !$this->mBidtable && !$this->mUnidtable ) { + $this->mConvTable = array(); + return; + } + $bidtable = $this->mBidtable; $unidtable = $this->mUnidtable; $manLevel = $this->mConverter->mManualLevel; @@ -1148,7 +1262,7 @@ class ConverterRule { /* for bidirectional array fill in the missing variants, if any, with fallbacks */ - if ( !array_key_exists( $v, $bidtable ) ) { + if ( !isset( $bidtable[$v] ) ) { $variantFallbacks = $this->mConverter->getVariantFallbacks( $v ); $vf = $this->getTextInBidtable( $variantFallbacks ); @@ -1157,7 +1271,7 @@ class ConverterRule { } } - if ( array_key_exists( $v, $bidtable ) ) { + if ( isset( $bidtable[$v] ) ) { foreach ( $vmarked as $vo ) { // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- // or -{H|zh:WordZh;zh-tw:WordTw}- @@ -1174,11 +1288,14 @@ class ConverterRule { $vmarked[] = $v; } /*for unidirectional array fill to convert tables */ - if ( ( $manLevel[$v] == 'bidirectional' - || $manLevel[$v] == 'unidirectional' ) - && array_key_exists( $v, $unidtable ) ) { - $ct = $this->mConvTable[$v]; - $this->mConvTable[$v] = array_merge( $ct, $unidtable[$v] ); + if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' ) + && isset( $unidtable[$v] ) ) + { + if ( isset( $this->mConvTable[$v] ) ) { + $this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] ); + } else { + $this->mConvTable[$v] = $unidtable[$v]; + } } } } @@ -1192,16 +1309,14 @@ class ConverterRule { $variant = $this->mConverter->getPreferredVariant(); } - $variants = $this->mConverter->mVariants; $this->parseFlags(); $flags = $this->mFlags; // convert to specified variant // syntax: -{zh-hans;zh-hant[;...]|}- - if ( count( array_diff( $flags, $variants ) ) == 0 - and count( $flags ) != 0 ) { + if ( $this->mVariantFlags ) { // check if current variant in flags - if ( in_array( $variant, $flags ) ) { + if ( isset( $this->mVariantFlags[$variant] ) ) { // then convert to current language $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variant ); @@ -1211,7 +1326,7 @@ class ConverterRule { $this->mConverter->getVariantFallbacks( $variant ); foreach ( $variantFallbacks as $variantFallback ) { // if current variant's fallback exist in flags - if ( in_array( $variantFallback, $flags ) ) { + if ( isset( $this->mVariantFlags[$variantFallback] ) ) { // then convert to fallback language $this->mRules = $this->mConverter->autoConvert( $this->mRules, @@ -1220,57 +1335,72 @@ class ConverterRule { } } } - $this->mFlags = $flags = array( 'R' ); + $this->mFlags = $flags = array( 'R' => true ); } - if ( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) { + if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { // decode => HTML entities modified by Sanitizer::removeHTMLtags $this->mRules = str_replace( '=>', '=>', $this->mRules ); - $this->parseRules(); } $rules = $this->mRules; - if ( count( $this->mBidtable ) == 0 - && count( $this->mUnidtable ) == 0 ) { - if ( in_array( '+', $flags ) || in_array( '-', $flags ) ) { + if ( !$this->mBidtable && !$this->mUnidtable ) { + if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { // fill all variants if text in -{A/H/-|text} without rules foreach ( $this->mConverter->mVariants as $v ) { $this->mBidtable[$v] = $rules; } - } elseif ( !in_array( 'N', $flags ) && !in_array( 'T', $flags ) ) { - $this->mFlags = $flags = array( 'R' ); + } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { + $this->mFlags = $flags = array( 'R' => true ); } } - if ( in_array( 'R', $flags ) ) { - // if we don't do content convert, still strip the -{}- tags - $this->mRuleDisplay = $rules; - } elseif ( in_array( 'N', $flags ) ) { - // proces N flag: output current variant name - $this->mRuleDisplay = - $this->mConverter->mVariantNames[ trim( $rules ) ]; - } elseif ( in_array( 'D', $flags ) ) { - // proces D flag: output rules description - $this->mRuleDisplay = $this->getRulesDesc(); - } elseif ( in_array( 'H', $flags ) || in_array( '-', $flags ) ) { - // proces H,- flag or T only: output nothing - $this->mRuleDisplay = ''; - } elseif ( in_array( 'S', $flags ) ) { - $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); - } else { - $this->mRuleDisplay = $this->mManualCodeError; - } - // process T flag - if ( in_array( 'T', $flags ) ) { - $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); - } - - if ( in_array( '-', $flags ) ) { - $this->mRulesAction = 'remove'; + $this->mRuleDisplay = false; + foreach ( $flags as $flag => $unused ) { + switch ( $flag ) { + case 'R': + // if we don't do content convert, still strip the -{}- tags + $this->mRuleDisplay = $rules; + break; + case 'N': + // process N flag: output current variant name + $ruleVar = trim( $rules ); + if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) { + $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar]; + } else { + $this->mRuleDisplay = ''; + } + break; + case 'D': + // process D flag: output rules description + $this->mRuleDisplay = $this->getRulesDesc(); + break; + case 'H': + // process H,- flag or T only: output nothing + $this->mRuleDisplay = ''; + break; + case '-': + $this->mRulesAction = 'remove'; + $this->mRuleDisplay = ''; + break; + case '+': + $this->mRulesAction = 'add'; + $this->mRuleDisplay = ''; + break; + case 'S': + $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); + break; + case 'T': + $this->mRuleTitle = $this->getRuleConvertedStr( $variant ); + $this->mRuleDisplay = ''; + break; + default: + // ignore unknown flags (but see error case below) + } } - if ( in_array( '+', $flags ) ) { - $this->mRulesAction = 'add'; + if ( $this->mRuleDisplay === false ) { + $this->mRuleDisplay = $this->mManualCodeError; } $this->generateConvTable();