<?php
-
/**
* Contains the LanguageConverter class and ConverterRule class
- * @ingroup Language
*
- * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
* @file
+ * @ingroup Language
*/
/**
'D' => 'D', // convert description (subclass implement)
'-' => '-', // remove convert (not implement)
'H' => 'H', // add rule for convert code
- // (but no display in placed code )
+ // (but no display in placed code )
'N' => 'N' // current variant name
);
$this->mFlags = array_merge( $defaultflags, $flags );
*
* @param $variant String: the language code of the variant
* @return String: The code of the fallback language or the
- * main code if there is no fallback
+ * main code if there is no fallback
*/
public function getVariantFallbacks( $variant ) {
if ( isset( $this->mVariantFallbacks[$variant] ) ) {
/**
* Get preferred language variant.
- * @param $fromUser Boolean: get it from $wgUser's preferences
- * @param $fromHeader Boolean: get it from Accept-Language
* @return String: the preferred language code
*/
- public function getPreferredVariant( $fromUser = true, $fromHeader = false ) {
- global $wgDefaultLanguageVariant;
+ public function getPreferredVariant() {
+ global $wgDefaultLanguageVariant, $wgUser;
$req = $this->getURLVariant();
- if ( $fromUser && !$req ) {
+ if ( $wgUser->isLoggedIn() && !$req ) {
$req = $this->getUserVariant();
}
- if ( $fromHeader && !$req ) {
+ elseif ( !$req ) {
$req = $this->getHeaderVariant();
}
return $this->mMainLanguageCode;
}
+ /**
+ * Get default variant.
+ * This function would not be affected by user's settings or headers
+ * @return String: the default variant code
+ */
+ public function getDefaultVariant() {
+ global $wgDefaultLanguageVariant;
+
+ $req = $this->getURLVariant();
+
+ if ( $wgDefaultLanguageVariant && !$req ) {
+ $req = $this->validateVariant( $wgDefaultLanguageVariant );
+ }
+
+ if ( $req ) {
+ return $req;
+ }
+ return $this->mMainLanguageCode;
+ }
+
/**
* Validate the variant
* @param $variant String: the variant to validate
*
* @return Mixed: variant if one found, false otherwise.
*/
- protected function getURLVariant() {
+ public function getURLVariant() {
global $wgRequest;
- $ret = null;
if ( $this->mURLVariant ) {
return $this->mURLVariant;
*/
protected function getUserVariant() {
global $wgUser;
- $ret = null;
// memoizing this function wreaks havoc on parserTest.php
/* if ( $this->mUserVariant ) { */
return $this->mUserVariant = $this->validateVariant( $ret );
}
-
/**
* Determine the language variant from the Accept-Language header.
*
*/
protected function getHeaderVariant() {
global $wgRequest;
- $ret = null;
if ( $this->mHeaderVariant ) {
return $this->mHeaderVariant;
// see if some supported language variant is set in the
// http header.
- $acceptLanguage = $wgRequest->getHeader( 'Accept-Language' );
- if ( !$acceptLanguage ) {
+ $languages = array_keys( $wgRequest->getAcceptLang() );
+ if ( empty( $languages ) ) {
return null;
}
- // explode by comma
- $result = StringUtils::explode( ',', strtolower( $acceptLanguage ) );
- $languages = array();
-
- foreach ( $result as $elem ) {
- // if $elem likes 'zh-cn;q=0.9'
- if ( ( $posi = strpos( $elem, ';' ) ) !== false ) {
- // get the real language code likes 'zh-cn'
- $languages[] = substr( $elem, 0, $posi );
- } else {
- $languages[] = $elem;
- }
- }
-
$fallback_languages = array();
foreach ( $languages as $language ) {
- // strip whitespace
- $language = trim( $language );
$this->mHeaderVariant = $this->validateVariant( $language );
if ( $this->mHeaderVariant ) {
break;
* @return String like ' alt="yyyy"' or ' title="yyyy"'
*/
protected function captionConvert( $matches ) {
+ // TODO: cache the preferred variant in every autoConvert() process,
+ // this helps improve performance in a way.
$toVariant = $this->getPreferredVariant();
$title = $matches[1];
- $text = $matches[2];
+ $text = $matches[2];
+
// we convert captions except URL
if ( !strpos( $text, '://' ) ) {
$text = $this->translate( $text, $toVariant );
}
- return " $title=\"$text\"";
+
+ // remove HTML tags to prevent disrupting the layout
+ $text = preg_replace( '/<[^>]+>/', '', $text );
+ // escape HTML special chars to prevent disrupting the layout
+ $text = htmlspecialchars( $text );
+
+ return " {$title}=\"{$text}\"";
}
/**
if ( !$toVariant ) {
$toVariant = $this->getPreferredVariant();
if ( !$toVariant ) {
+ wfProfileOut( __METHOD__ );
return $text;
}
}
}
/**
- * Prepare manual conversion table.
- * @private
+ * Apply manual conversion rules.
+ *
+ * @param $convRule Object: Object of ConverterRule
*/
- function applyManualConv( $convRule ) {
+ protected function applyManualConv( $convRule ) {
// Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
// title conversion.
- // Bug 24072: mConvRuleTitle won't work if the title conversion
- // rule was followed by other manual conversion rule(s).
+ // Bug 24072: $mConvRuleTitle was overwritten by other manual
+ // rule(s) not for title, this breaks the title conversion.
$newConvRuleTitle = $convRule->getTitle();
- if( $newConvRuleTitle ) {
+ if ( $newConvRuleTitle ) {
+ // So I add an empty check for getTitle()
$this->mConvRuleTitle = $newConvRuleTitle;
}
- // apply manual conversion table to global table
+ // merge/remove manual conversion rules to/from global table
$convTable = $convRule->getConvTable();
$action = $convRule->getRulesAction();
foreach ( $convTable as $variant => $pair ) {
}
/**
- * Convert text to different variants of a language. The automatic
- * conversion is done in autoConvert(). Here we parse the text
- * marked with -{}-, which specifies special conversions of the
- * text that can not be accomplished in autoConvert().
- *
- * Syntax of the markup:
- * -{code1:text1;code2:text2;...}- or
- * -{flags|code1:text1;code2:text2;...}- or
- * -{text}- in which case no conversion should take place for text
+ * Auto convert a Title object to a readable string in the
+ * preferred variant.
*
- * @param $text String: text to be converted
- * @return String: converted text
- */
- public function convert( $text ) {
- global $wgDisableLangConversion;
- if ( $wgDisableLangConversion ) return $text;
-
- $variant = $this->getPreferredVariant();
-
- return $this->recursiveConvertTopLevel( $text, $variant );
- }
-
- /**
- * Convert a Title object to a readable string in the preferred variant
+ *@param $title Object: a object of Title
+ *@return String: converted title text
*/
public function convertTitle( $title ) {
$variant = $this->getPreferredVariant();
$text .= ':';
}
$text .= $title->getText();
- $text = $this->autoConvert( $text, $variant );
+ $text = $this->translate( $text, $variant );
return $text;
}
+ /**
+ * Convert text to different variants of a language. The automatic
+ * conversion is done in autoConvert(). Here we parse the text
+ * marked with -{}-, which specifies special conversions of the
+ * text that can not be accomplished in autoConvert().
+ *
+ * Syntax of the markup:
+ * -{code1:text1;code2:text2;...}- or
+ * -{flags|code1:text1;code2:text2;...}- or
+ * -{text}- in which case no conversion should take place for text
+ *
+ * @param $text String: text to be converted
+ * @return String: converted text
+ */
+ public function convert( $text ) {
+ $variant = $this->getPreferredVariant();
+ return $this->convertTo( $text, $variant );
+ }
+
+ /**
+ * Same as convert() except a extra parameter to custom variant.
+ *
+ * @param $text String: text to be converted
+ * @param $variant String: the target variant code
+ * @return String: converted text
+ */
+ public function convertTo( $text, $variant ) {
+ global $wgDisableLangConversion;
+ if ( $wgDisableLangConversion ) return $text;
+ return $this->recursiveConvertTopLevel( $text, $variant );
+ }
+
+ /**
+ * Recursively convert text on the outside. Allow to use nested
+ * markups to custom rules.
+ *
+ * @param $text String: text to be converted
+ * @param $variant String: the target variant code
+ * @param $depth Integer: depth of recursion
+ * @return String: converted text
+ */
protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
$startPos = 0;
$out = '';
$length = strlen( $text );
while ( $startPos < $length ) {
- $m = false;
$pos = strpos( $text, '-{', $startPos );
-
+
if ( $pos === false ) {
// No more markup, append final segment
$out .= $this->autoConvert( substr( $text, $startPos ), $variant );
- $startPos = $length;
return $out;
}
return $out;
}
+ /**
+ * Recursively convert text on the inside.
+ *
+ * @param $text String: text to be converted
+ * @param $variant String: the target variant code
+ * @param $depth Integer: depth of recursion
+ * @return String: converted text
+ */
protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
// Quick sanity check (no function calls)
if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
- throw new MWException( __METHOD__.': invalid input string' );
+ throw new MWException( __METHOD__ . ': invalid input string' );
}
$startPos += 2;
$inner .= '-{';
if ( !$warningDone ) {
$inner .= '<span class="error">' .
- wfMsgForContent( 'language-converter-depth-warning',
+ wfMsgForContent( 'language-converter-depth-warning',
$this->mMaxDepth ) .
'</span>';
$warningDone = true;
$this->applyManualConv( $rule );
return $rule->getDisplay();
default:
- throw new MWException( __METHOD__.': invalid regex match' );
+ throw new MWException( __METHOD__ . ': invalid regex match' );
}
}
* @param $link String: the name of the link
* @param $nt Mixed: the title object of the link
* @param $ignoreOtherCond Boolean: to disable other conditions when
- * we need to transclude a template or update a category's link
+ * we need to transclude a template or update a category's link
* @return Null, the input parameters may be modified upon return
*/
public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
}
}
- /**
+ /**
* Returns language specific hash options.
*/
public function getExtraHashOptions() {
wfProfileOut( __METHOD__ );
}
- /**
+ /**
* Hook for post processig after conversion tables are loaded.
*
*/
function postLoadTables() { }
- /**
+ /**
* Reload the conversion tables.
*
* @private
*
*/
function parseCachedTable( $code, $subpage = '', $recursive = true ) {
- global $wgMessageCache;
static $parsed = array();
- if ( !is_object( $wgMessageCache ) ) {
- return array();
- }
-
$key = 'Conversiontable/' . $code;
if ( $subpage ) {
$key .= '/' . $subpage;
}
if ( strpos( $code, '/' ) === false ) {
- $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
- if( $txt === false ){
+ $txt = MessageCache::singleton()->get( 'Conversiontable', true, $code );
+ if ( $txt === false ) {
# FIXME: this method doesn't seem to be expecting
# this possible outcome...
$txt = '<Conversiontable>';
}
}
-
// parse the mappings in this page
$blocks = StringUtils::explode( '-{', $txt );
$ret = array();
}
$parsed[$key] = true;
-
// recursively parse the subpages
if ( $recursive ) {
foreach ( $sublinks as $link ) {
if ( $this->mUcfirst ) {
foreach ( $ret as $k => $v ) {
- $ret[Language::ucfirst( $k )] = Language::ucfirst( $v );
+ $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
}
}
return $ret;
/**
* Armour rendered math against conversion.
- * Wrap math into rawoutput -{R| math }- syntax.
+ * Escape special chars in parsed math text.(in most cases are img elements)
*/
public function armourMath( $text ) {
- // we need to convert '-{' and '}-' to '-{' and '}-'
- // to avoid a unwanted '}-' appeared after the math-image.
+ // convert '-{' and '}-' to '-{' and '}-' to prevent
+ // any unwanted markup appearing in the math image tag.
$text = strtr( $text, array( '-{' => '-{', '}-' => '}-' ) );
- $ret = "-{R|$text}-";
- return $ret;
+ return $text;
}
/**
// text should be splited by ";" only if a valid variant
// name exist after the markup, for example:
// -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
- // <span style="font-size:120%;">yyy</span>;}-
+ // <span style="font-size:120%;">yyy</span>;}-
// we should split it as:
// array(
// [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
*/
function parseRules() {
$rules = $this->mRules;
- $flags = $this->mFlags;
$bidtable = array();
$unidtable = array();
$variants = $this->mConverter->mVariants;
$bidtable[$v] = $to;
} elseif ( count( $u ) == 2 ) {
$from = trim( $u[0] );
- $v = trim( $u[1] );
+ $v = trim( $u[1] );
if ( array_key_exists( $v, $unidtable )
&& !is_array( $unidtable[$v] )
&& $to
}
/*for unidirectional array fill to convert tables */
if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
- && isset( $unidtable[$v] ) )
+ && isset( $unidtable[$v] ) )
{
if ( isset( $this->mConvTable[$v] ) ) {
$this->mConvTable[$v] = array_merge( $this->mConvTable[$v], $unidtable[$v] );
$variant = $this->mConverter->getPreferredVariant();
}
- $variants = $this->mConverter->mVariants;
$this->parseFlags();
$flags = $this->mFlags;