<?php
/**
- * @package MediaWiki
- * @subpackage Language
+ * @addtogroup Language
*
* @author Zhengzhu Feng <zhengzhu@gmail.com>
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
var $mTables;
var $mTitleDisplay='';
var $mDoTitleConvert=true, $mDoContentConvert=true;
+ var $mTitleFromFlag = false;
var $mCacheKey;
var $mLangObj;
var $mMarkup;
$variantfallbacks=array(),
$markup=array(),
$flags = array()) {
- global $wgDBname;
$this->mLangObj = $langobj;
$this->mMainLanguageCode = $maincode;
$this->mVariants = $variants;
$this->mVariantFallbacks = $variantfallbacks;
- $this->mCacheKey = $wgDBname . ":conversiontables";
+ $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
$m = array('begin'=>'-{', 'flagsep'=>'|', 'codesep'=>':',
'varsep'=>';', 'end'=>'}-');
$this->mMarkup = array_merge($m, $markup);
- $f = array('A'=>'A', 'T'=>'T');
+ $f = array('A'=>'A', 'T'=>'T', 'R' => 'R');
$this->mFlags = array_merge($f, $flags);
}
/**
- * get preferred language variants.
+ * get preferred language variants.
+ * @param boolean $fromUser Get it from $wgUser's preferences
* @return string the preferred language code
* @access public
*/
- function getPreferredVariant() {
- global $wgUser, $wgRequest;
+ function getPreferredVariant( $fromUser = true ) {
+ global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
if($this->mPreferredVariant)
return $this->mPreferredVariant;
return $req;
}
+ // check the syntax /code/ArticleTitle
+ if($wgVariantArticlePath!=false && isset($_SERVER['SCRIPT_NAME'])){
+ // Note: SCRIPT_NAME probably won't hold the correct value if PHP is run as CGI
+ // (it will hold path to php.cgi binary), and might not exist on some very old PHP installations
+ $scriptBase = basename( $_SERVER['SCRIPT_NAME'] );
+ if(in_array($scriptBase,$this->mVariants)){
+ $this->mPreferredVariant = $scriptBase;
+ return $this->mPreferredVariant;
+ }
+ }
+
// get language variant preference from logged in users
- if(is_object($wgUser) && $wgUser->isLoggedIn() ) {
+ // Don't call this on stub objects because that causes infinite
+ // recursion during initialisation
+ if( $fromUser && $wgUser->isLoggedIn() ) {
$this->mPreferredVariant = $wgUser->getOption('variant');
return $this->mPreferredVariant;
}
+ // see if default variant is globaly set
+ if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){
+ $this->mPreferredVariant = $wgDefaultLanguageVariant;
+ return $this->mPreferredVariant;
+ }
+
# FIXME rewrite code for parsing http header. The current code
# is written specific for detecting zh- variants
if( !$this->mPreferredVariant ) {
$pv=$this->mMainLanguageCode;
if(array_key_exists('HTTP_ACCEPT_LANGUAGE', $_SERVER)) {
$header = str_replace( '_', '-', strtolower($_SERVER["HTTP_ACCEPT_LANGUAGE"]));
- $zh = strstr($header, 'zh-');
+ $zh = strstr($header, $pv.'-');
if($zh) {
$pv = substr($zh,0,5);
}
}
- return $pv;
+ // don't try to return bad variant
+ if(in_array( $pv, $this->mVariants ))
+ return $pv;
}
+
+ return $this->mMainLanguageCode;
+
}
/**
$marker = "";
// this one is needed when the text is inside an html markup
- $htmlfix = '|<[^>]+=\"[^(>=)]*$|^[^(<>=\")]*\"[^>]*>';
+ $htmlfix = '|<[^>]+$|^[^<>]*>';
- $reg = '/<[^>]+>|&[a-z#][a-z0-9]+;' . $marker . $htmlfix . '/';
+ // disable convert to variants between <code></code> tags
+ $codefix = '<code>.+?<\/code>|';
+ // disable convertsion of <script type="text/javascript"> ... </script>
+ $scriptfix = '<script.*?>.*?<\/script>|';
+
+ $reg = '/'.$codefix . $scriptfix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
$matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
-
$m = array_shift($matches);
- $ret = strtr($m[0], $this->mTables[$toVariant]);
+
+ $ret = $this->translate($m[0], $toVariant);
$mstart = $m[1]+strlen($m[0]);
foreach($matches as $m) {
$ret .= substr($text, $mstart, $m[1]-$mstart);
- $ret .= strtr($m[0], $this->mTables[$toVariant]);
+ $ret .= $this->translate($m[0], $toVariant);
$mstart = $m[1] + strlen($m[0]);
}
wfProfileOut( $fname );
return $ret;
}
+ /**
+ * Translate a string to a variant
+ * Doesn't process markup or do any of that other stuff, for that use convert()
+ *
+ * @param string $text Text to convert
+ * @param string $variant Variant language code
+ * @return string Translated text
+ */
+ function translate( $text, $variant ) {
+ wfProfileIn( __METHOD__ );
+ if( !$this->mTablesLoaded )
+ $this->loadTables();
+ $text = $this->mTables[$variant]->replace( $text );
+ wfProfileOut( __METHOD__ );
+ return $text;
+ }
+
/**
* convert text to all supported variants
*
* @param string $text the text to be converted
* @return array of string
- * @private
+ * @public
*/
function autoConvertToAllVariants($text) {
$fname="LanguageConverter::autoConvertToAllVariants";
$ret = array();
foreach($this->mVariants as $variant) {
- $ret[$variant] = strtr($text, $this->mTables[$variant]);
+ $ret[$variant] = $this->translate($text, $variant);
}
+
wfProfileOut( $fname );
return $ret;
}
+ /**
+ * convert link text to all supported variants
+ *
+ * @param string $text the text to be converted
+ * @return array of string
+ * @public
+ */
+ function convertLinkToAllVariants($text) {
+ if( !$this->mTablesLoaded )
+ $this->loadTables();
+
+ $ret = array();
+ $tarray = explode($this->mMarkup['begin'], $text);
+ $tfirst = array_shift($tarray);
+
+ foreach($this->mVariants as $variant)
+ $ret[$variant] = $this->translate($tfirst,$variant);
+
+ foreach($tarray as $txt) {
+ $marked = explode($this->mMarkup['end'], $txt, 2);
+
+ foreach($this->mVariants as $variant){
+ $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end'];
+ if(array_key_exists(1, $marked))
+ $ret[$variant] .= $this->translate($marked[1],$variant);
+ }
+
+ }
+
+ return $ret;
+ }
+
+
/**
* Convert text using a parser object for context
*/
function parserConvert( $text, &$parser ) {
global $wgDisableLangConversion;
/* don't do anything if this is the conversion table */
- if ( $parser->mTitle->getNamespace() == NS_MEDIAWIKI &&
- strpos($parser->mTitle->getText, "Conversiontable") !== false )
+ if ( $parser->getTitle()->getNamespace() == NS_MEDIAWIKI &&
+ strpos($parser->mTitle->getText(), "Conversiontable") !== false )
{
return $text;
}
return $text;
}
+ /**
+ * Parse flags with syntax -{FLAG| ... }-
+ *
+ */
+ function parseFlags($marked){
+ $flags = array();
+
+ // process flag only if the flag is valid
+ if(strlen($marked) < 2 || !(in_array($marked[0],$this->mFlags) && $marked[1]=='|' ) )
+ return array($marked,array());
+
+ $tt = explode($this->mMarkup['flagsep'], $marked, 2);
+
+ if(sizeof($tt) == 2) {
+ $f = explode($this->mMarkup['varsep'], $tt[0]);
+ foreach($f as $ff) {
+ $ff = trim($ff);
+ if(array_key_exists($ff, $this->mFlags) &&
+ !array_key_exists($this->mFlags[$ff], $flags))
+ $flags[] = $this->mFlags[$ff];
+ }
+ $rules = $tt[1];
+ }
+ else
+ $rules = $marked;
+
+ if( !in_array('R',$flags) ){
+ //FIXME: may cause trouble here...
+ //strip since it interferes with the parsing, plus,
+ //all spaces should be stripped in this tag anyway.
+ $rules = str_replace(' ', '', $rules);
+ }
+
+ return array($rules,$flags);
+ }
+
/**
* convert text to different variants of a language. the automatic
* conversion is done in autoConvert(). here we parse the text
return $text;
if( $isTitle ) {
+
+ // use the title from the T flag if any
+ if($this->mTitleFromFlag){
+ $this->mTitleFromFlag = false;
+ return $this->mTitleDisplay;
+ }
+
+ // check for __NOTC__ tag
if( !$this->mDoTitleConvert ) {
$this->mTitleDisplay = $text;
return $text;
}
- if( !empty($this->mTitleDisplay))
- return $this->mTitleDisplay;
global $wgRequest;
$isredir = $wgRequest->getText( 'redirect', 'yes' );
return $text;
}
else {
- $this->mTitleDisplay = $this->autoConvert($text);
+ $this->mTitleDisplay = $this->convert($text);
return $this->mTitleDisplay;
}
}
- if( !$this->mDoContentConvert )
- return $text;
-
$plang = $this->getPreferredVariant();
if( isset( $this->mVariantFallbacks[$plang] ) ) {
$fallback = $this->mVariantFallbacks[$plang];
} else {
- // This sounds... bad?
- $fallback = '';
+ $fallback = $this->mMainLanguageCode;
}
$tarray = explode($this->mMarkup['begin'], $text);
$tfirst = array_shift($tarray);
- $text = $this->autoConvert($tfirst);
- foreach($tarray as $txt) {
+ if($this->mDoContentConvert)
+ $text = $this->autoConvert($tfirst);
+ else
+ $text = $tfirst;
+ foreach($tarray as $txt) {
$marked = explode($this->mMarkup['end'], $txt, 2);
- $flags = array();
- $tt = explode($this->mMarkup['flagsep'], $marked[0], 2);
- if(sizeof($tt) == 2) {
- $f = explode($this->mMarkup['varsep'], $tt[0]);
- foreach($f as $ff) {
- $ff = trim($ff);
- if(array_key_exists($ff, $this->mFlags) &&
- !array_key_exists($this->mFlags[$ff], $flags))
- $flags[] = $this->mFlags[$ff];
+ // strip the flags from syntax like -{T| ... }-
+ list($rules,$flags) = $this->parseFlags($marked[0]);
+
+ // proces R flag: output raw content of -{ ... }-
+ if( in_array('R',$flags) ){
+ $disp = $rules;
+ } else if( $this->mDoContentConvert){
+ // parse the contents -{ ... }-
+ $carray = $this->parseManualRule($rules, $flags);
+
+ $disp = '';
+ if(array_key_exists($plang, $carray)) {
+ $disp = $carray[$plang];
+ } else if(array_key_exists($fallback, $carray)) {
+ $disp = $carray[$fallback];
}
- $rules = $tt[1];
+ } else{
+ // if we don't do content convert, still strip the -{}- tags
+ $disp = $rules;
+ $flags = array();
}
- else
- $rules = $marked[0];
-
-#FIXME: may cause trouble here...
- //strip since it interferes with the parsing, plus,
- //all spaces should be stripped in this tag anyway.
- $rules = str_replace(' ', '', $rules);
-
- $carray = $this->parseManualRule($rules, $flags);
- $disp = '';
- if(array_key_exists($plang, $carray))
- $disp = $carray[$plang];
- else if(array_key_exists($fallback, $carray))
- $disp = $carray[$fallback];
+
if($disp) {
- if(in_array('T', $flags))
+ // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
+ if(in_array('T', $flags)){
+ $this->mTitleFromFlag = true;
$this->mTitleDisplay = $disp;
+ }
else
$text .= $disp;
+ // use syntax -{A|zh:WordZh;zh-tw:WordTw}- to introduce a custom mapping between
+ // words WordZh and WordTw in the whole text
if(in_array('A', $flags)) {
- /* modify the conversion table for this session*/
/* fill in the missing variants, if any,
with fallbacks */
continue;
if(!array_key_exists($vto, $carray))
continue;
- $this->mTables[$vto][$carray[$vfrom]] = $carray[$vto];
-
+ $this->mTables[$vto]->setPair($carray[$vfrom], $carray[$vto]);
}
}
}
else {
$text .= $marked[0];
}
- if(array_key_exists(1, $marked))
- $text .= $this->autoConvert($marked[1]);
+ if(array_key_exists(1, $marked)){
+ if( $this->mDoContentConvert )
+ $text .= $this->autoConvert($marked[1]);
+ else
+ $text .= $marked[1];
+ }
}
return $text;
* @access public
*/
function findVariantLink( &$link, &$nt ) {
- static $count=0; //used to limit this operation
- static $cache=array();
global $wgDisableLangConversion;
- $pref = $this->getPreferredVariant();
- $ns=0;
+ $linkBatch = new LinkBatch();
+
+ $ns=NS_MAIN;
+
if(is_object($nt))
$ns = $nt->getNamespace();
- if( $count > 50 && $ns != NS_CATEGORY )
- return;
- $count++;
+
$variants = $this->autoConvertToAllVariants($link);
if($variants == false) //give up
return;
+
+ $titles = array();
+
foreach( $variants as $v ) {
- if(isset($cache[$v]))
- continue;
- $cache[$v] = 1;
- $varnt = Title::newFromText( $v, $ns );
- if( $varnt && $varnt->getArticleID() > 0 ) {
+ if($v != $link){
+ $varnt = Title::newFromText( $v, $ns );
+ if(!is_null($varnt)){
+ $linkBatch->addObj($varnt);
+ $titles[]=$varnt;
+ }
+ }
+ }
+
+ // fetch all variants in single query
+ $linkBatch->execute();
+
+ foreach( $titles as $varnt ) {
+ if( $varnt->getArticleID() > 0 ) {
$nt = $varnt;
if( !$wgDisableLangConversion )
$link = $v;
global $wgMemc;
if( $this->mTablesLoaded )
return;
+ wfProfileIn( __METHOD__ );
$this->mTablesLoaded = true;
+ $this->mTables = false;
if($fromcache) {
+ wfProfileIn( __METHOD__.'-cache' );
$this->mTables = $wgMemc->get( $this->mCacheKey );
- if( !empty( $this->mTables ) ) //all done
- return;
- }
- // not in cache, or we need a fresh reload.
- // we will first load the default tables
- // then update them using things in MediaWiki:Zhconversiontable/*
- global $wgMessageCache;
- $this->loadDefaultTables();
- foreach($this->mVariants as $var) {
- $cached = $this->parseCachedTable($var);
- $this->mTables[$var] = array_merge($this->mTables[$var], $cached);
+ wfProfileOut( __METHOD__.'-cache' );
}
+ if ( !$this->mTables || !isset( $this->mTables['VERSION 2'] ) ) {
+ wfProfileIn( __METHOD__.'-recache' );
+ // not in cache, or we need a fresh reload.
+ // we will first load the default tables
+ // then update them using things in MediaWiki:Zhconversiontable/*
+ $this->loadDefaultTables();
+ foreach($this->mVariants as $var) {
+ $cached = $this->parseCachedTable($var);
+ $this->mTables[$var]->mergeArray($cached);
+ }
- $this->postLoadTables();
+ $this->postLoadTables();
+ $this->mTables['VERSION 2'] = true;
- if($this->lockCache()) {
- $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
- $this->unlockCache();
+ if($this->lockCache()) {
+ $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
+ $this->unlockCache();
+ }
+ wfProfileOut( __METHOD__.'-recache' );
}
+ wfProfileOut( __METHOD__ );
}
/**
if ($this->mUcfirst) {
foreach ($ret as $k => $v) {
- $ret[LanguageUtf8::ucfirst($k)] = LanguageUtf8::ucfirst($v);
+ $ret[Language::ucfirst($k)] = Language::ucfirst($v);
}
}
return $ret;
* @param string $text text to be tagged for no conversion
* @return string the tagged text
*/
- function markNoConversion($text) {
+ function markNoConversion($text, $noParse=false) {
# don't mark if already marked
if(strpos($text, $this->mMarkup['begin']) ||
strpos($text, $this->mMarkup['end']))
* MediaWiki:conversiontable* is updated
* @private
*/
- function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section) {
+ function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) {
$titleobj = $article->getTitle();
if($titleobj->getNamespace() == NS_MEDIAWIKI) {
/*
}
return true;
}
+
+ /**
+ * Armour rendered math against conversion
+ * Wrap math into rawoutput -{R| math }- syntax
+ */
+ function armourMath($text){
+ $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
+ return $ret;
+ }
+
+
}
-?>
+