* @author fdcn * @author shinjiman * @author PhiLiP */ class LanguageConverter { /** * languages supporting variants * @since 1.20 * @var array */ static public $languagesWithVariants = [ 'en', 'gan', 'iu', 'kk', 'ku', 'shi', 'sr', 'tg', 'uz', 'zh', ]; public $mMainLanguageCode; /** * @var string[] */ public $mVariants; public $mVariantFallbacks; public $mVariantNames; public $mTablesLoaded = false; public $mTables; // 'bidirectional' 'unidirectional' 'disable' for each variant public $mManualLevel; public $mLangObj; public $mFlags; public $mDescCodeSep = ':', $mDescVarSep = ';'; public $mUcfirst = false; public $mConvRuleTitle = false; public $mURLVariant; public $mUserVariant; public $mHeaderVariant; public $mMaxDepth = 10; public $mVarSeparatorPattern; const CACHE_VERSION_KEY = 'VERSION 7'; /** * @param Language $langobj * @param string $maincode The main language code of this language * @param string[] $variants The supported variants of this language * @param array $variantfallbacks The fallback language of each variant * @param array $flags Defining the custom strings that maps to the flags * @param array $manualLevel Limit for supported variants */ public function __construct( $langobj, $maincode, $variants = [], $variantfallbacks = [], $flags = [], $manualLevel = [] ) { global $wgDisabledVariants; $this->mLangObj = $langobj; $this->mMainLanguageCode = $maincode; $this->mVariants = array_diff( $variants, $wgDisabledVariants ); $this->mVariantFallbacks = $variantfallbacks; $this->mVariantNames = Language::fetchLanguageNames(); $defaultflags = [ // 'S' show converted text // '+' add rules for alltext // 'E' the gave flags is error // these flags above are reserved for program 'A' => 'A', // add rule for convert code (all text convert) 'T' => 'T', // title convert 'R' => 'R', // raw content 'D' => 'D', // convert description (subclass implement) '-' => '-', // remove convert (not implement) 'H' => 'H', // add rule for convert code (but no display in placed code) 'N' => 'N', // current variant name ]; $this->mFlags = array_merge( $defaultflags, $flags ); foreach ( $this->mVariants as $v ) { if ( array_key_exists( $v, $manualLevel ) ) { $this->mManualLevel[$v] = $manualLevel[$v]; } else { $this->mManualLevel[$v] = 'bidirectional'; } $this->mFlags[$v] = $v; } } /** * Get all valid variants. * Call this instead of using $this->mVariants directly. * * @return string[] Contains all valid variants */ public function getVariants() { return $this->mVariants; } /** * In case some variant is not defined in the markup, we need * to have some fallback. For example, in zh, normally people * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk. * when zh-sg is preferred but not defined, we will pick zh-hans * in this case. Right now this is only used by zh. * * @param string $variant The language code of the variant * @return string|array The code of the fallback language or the * main code if there is no fallback */ public function getVariantFallbacks( $variant ) { if ( isset( $this->mVariantFallbacks[$variant] ) ) { return $this->mVariantFallbacks[$variant]; } return $this->mMainLanguageCode; } /** * Get the title produced by the conversion rule. * @return string The converted title text */ public function getConvRuleTitle() { return $this->mConvRuleTitle; } /** * Get preferred language variant. * @return string The preferred language code */ public function getPreferredVariant() { global $wgDefaultLanguageVariant, $wgUser; $req = $this->getURLVariant(); if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) { $req = $this->getUserVariant(); } elseif ( !$req ) { $req = $this->getHeaderVariant(); } if ( $wgDefaultLanguageVariant && !$req ) { $req = $this->validateVariant( $wgDefaultLanguageVariant ); } // This function, unlike the other get*Variant functions, is // not memoized (i.e. there return value is not cached) since // new information might appear during processing after this // is first called. if ( $this->validateVariant( $req ) ) { return $req; } return $this->mMainLanguageCode; } /** * Get default variant. * This function would not be affected by user's settings * @return string The default variant code */ public function getDefaultVariant() { global $wgDefaultLanguageVariant; $req = $this->getURLVariant(); if ( !$req ) { $req = $this->getHeaderVariant(); } if ( $wgDefaultLanguageVariant && !$req ) { $req = $this->validateVariant( $wgDefaultLanguageVariant ); } if ( $req ) { return $req; } return $this->mMainLanguageCode; } /** * Validate the variant * @param string $variant The variant to validate * @return mixed Returns the variant if it is valid, null otherwise */ public function validateVariant( $variant = null ) { if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { return $variant; } return null; } /** * Get the variant specified in the URL * * @return mixed Variant if one found, false otherwise. */ public function getURLVariant() { global $wgRequest; if ( $this->mURLVariant ) { return $this->mURLVariant; } // see if the preference is set in the request $ret = $wgRequest->getText( 'variant' ); if ( !$ret ) { $ret = $wgRequest->getVal( 'uselang' ); } $this->mURLVariant = $this->validateVariant( $ret ); return $this->mURLVariant; } /** * Determine if the user has a variant set. * * @return mixed Variant if one found, false otherwise. */ protected function getUserVariant() { global $wgUser, $wgContLang; // memoizing this function wreaks havoc on parserTest.php /* if ( $this->mUserVariant ) { return $this->mUserVariant; } */ // Get language variant preference from logged in users // Don't call this on stub objects because that causes infinite // recursion during initialisation if ( !$wgUser->isSafeToLoad() ) { return false; } if ( $wgUser->isLoggedIn() ) { if ( $this->mMainLanguageCode == $wgContLang->getCode() ) { $ret = $wgUser->getOption( 'variant' ); } else { $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode ); } } else { // figure out user lang without constructing wgLang to avoid // infinite recursion $ret = $wgUser->getOption( 'language' ); } $this->mUserVariant = $this->validateVariant( $ret ); return $this->mUserVariant; } /** * Determine the language variant from the Accept-Language header. * * @return mixed Variant if one found, false otherwise. */ protected function getHeaderVariant() { global $wgRequest; if ( $this->mHeaderVariant ) { return $this->mHeaderVariant; } // see if some supported language variant is set in the // HTTP header. $languages = array_keys( $wgRequest->getAcceptLang() ); if ( empty( $languages ) ) { return null; } $fallbackLanguages = []; foreach ( $languages as $language ) { $this->mHeaderVariant = $this->validateVariant( $language ); if ( $this->mHeaderVariant ) { break; } // To see if there are fallbacks of current language. // We record these fallback variants, and process // them later. $fallbacks = $this->getVariantFallbacks( $language ); if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { $fallbackLanguages[] = $fallbacks; } elseif ( is_array( $fallbacks ) ) { $fallbackLanguages = array_merge( $fallbackLanguages, $fallbacks ); } } if ( !$this->mHeaderVariant ) { // process fallback languages now $fallback_languages = array_unique( $fallbackLanguages ); foreach ( $fallback_languages as $language ) { $this->mHeaderVariant = $this->validateVariant( $language ); if ( $this->mHeaderVariant ) { break; } } } return $this->mHeaderVariant; } /** * Dictionary-based conversion. * This function would not parse the conversion rules. * If you want to parse rules, try to use convert() or * convertTo(). * * @param string $text The text to be converted * @param bool|string $toVariant The target language code * @return string The converted text */ public function autoConvert( $text, $toVariant = false ) { $this->loadTables(); if ( !$toVariant ) { $toVariant = $this->getPreferredVariant(); if ( !$toVariant ) { return $text; } } if ( $this->guessVariant( $text, $toVariant ) ) { return $text; } /* we convert everything except: 1. HTML markups (anything between < and >) 2. HTML entities 3. placeholders created by the parser IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). Minimize use of backtracking where possible. */ $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; // this one is needed when the text is inside an HTML markup $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; // Optimize for the common case where these tags have // few or no children. Thus try and possesively get as much as // possible, and only engage in backtracking when we hit a '<'. // disable convert to variants between tags $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; // disable conversion of