follow-up r59522. ONLY add Accept-Language when a valid variant has been found out.
[lhc/web/wiklou.git] / languages / LanguageConverter.php
1 <?php
2
3 /**
4 * Contains the LanguageConverter class and ConverterRule class
5 * @ingroup Language
6 *
7 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
8 * @file
9 */
10
11 /**
12 * base class for language convert
13 * @ingroup Language
14 *
15 * @author Zhengzhu Feng <zhengzhu@gmail.com>
16 * @maintainers fdcn <fdcn64@gmail.com>, shinjiman <shinjiman@gmail.com>, PhiLiP <philip.npc@gmail.com>
17 */
18 class LanguageConverter {
19 var $mPreferredVariant='';
20 var $mMainLanguageCode;
21 var $mVariants, $mVariantFallbacks, $mVariantNames;
22 var $mTablesLoaded = false;
23 var $mTables;
24 var $mNamespaceTables;
25 var $mDoTitleConvert=true, $mDoContentConvert=true;
26 var $mManualLevel; // 'bidirectional' 'unidirectional' 'disable' for each variants
27 var $mTitleFromFlag = false;
28 var $mCacheKey;
29 var $mLangObj;
30 var $mMarkup;
31 var $mFlags;
32 var $mDescCodeSep = ':',$mDescVarSep = ';';
33 var $mUcfirst = false;
34 var $mTitleOriginal = '';
35 var $mTitleDisplay = '';
36
37 const CACHE_VERSION_KEY = 'VERSION 6';
38
39 /**
40 * Constructor
41 *
42 * @param string $maincode the main language code of this language
43 * @param array $variants the supported variants of this language
44 * @param array $variantfallback the fallback language of each variant
45 * @param array $markup array defining the markup used for manual conversion
46 * @param array $flags array defining the custom strings that maps to the flags
47 * @param array $manualLevel limit for supported variants
48 * @public
49 */
50 function __construct( $langobj, $maincode,
51 $variants=array(),
52 $variantfallbacks=array(),
53 $markup=array(),
54 $flags = array(),
55 $manualLevel = array() ) {
56 $this->mLangObj = $langobj;
57 $this->mMainLanguageCode = $maincode;
58
59 global $wgDisabledVariants;
60 $this->mVariants = array();
61 foreach( $variants as $variant ) {
62 if( !in_array( $variant, $wgDisabledVariants ) )
63 $this->mVariants[] = $variant;
64 }
65 $this->mVariantFallbacks = $variantfallbacks;
66 global $wgLanguageNames;
67 $this->mVariantNames = $wgLanguageNames;
68 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
69 $m = array(
70 'begin'=>'-{',
71 'flagsep'=>'|',
72 'unidsep'=>'=>', //for unidirectional conversion
73 'codesep'=>':',
74 'varsep'=>';',
75 'end'=>'}-'
76 );
77 $this->mMarkup = array_merge($m, $markup);
78 $f = array(
79 // 'S' show converted text
80 // '+' add rules for alltext
81 // 'E' the gave flags is error
82 // these flags above are reserved for program
83 'A'=>'A', // add rule for convert code (all text convert)
84 'T'=>'T', // title convert
85 'R'=>'R', // raw content
86 'D'=>'D', // convert description (subclass implement)
87 '-'=>'-', // remove convert (not implement)
88 'H'=>'H', // add rule for convert code (but no display in placed code )
89 'N'=>'N' // current variant name
90 );
91 $this->mFlags = array_merge($f, $flags);
92 foreach( $this->mVariants as $v) {
93 $this->mManualLevel[$v]=array_key_exists($v,$manualLevel)
94 ?$manualLevel[$v]
95 :'bidirectional';
96 $this->mNamespaceTables[$v] = array();
97 $this->mFlags[$v] = $v;
98 }
99 }
100
101 /**
102 * @public
103 */
104 function getVariants() {
105 return $this->mVariants;
106 }
107
108 /**
109 * in case some variant is not defined in the markup, we need
110 * to have some fallback. for example, in zh, normally people
111 * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
112 * when zh-sg is preferred but not defined, we will pick zh-hans
113 * in this case. right now this is only used by zh.
114 *
115 * @param string $v the language code of the variant
116 * @return string array the code of the fallback language or false if there is no fallback
117 * @public
118 */
119 function getVariantFallbacks($v) {
120 if( isset( $this->mVariantFallbacks[$v] ) ) {
121 return $this->mVariantFallbacks[$v];
122 }
123 return $this->mMainLanguageCode;
124 }
125
126 /**
127 * get preferred language variants.
128 * @param boolean $fromUser Get it from $wgUser's preferences
129 * @return string the preferred language code
130 * @public
131 */
132 function getPreferredVariant( $fromUser = true ) {
133 global $wgUser, $wgRequest, $wgVariantArticlePath, $wgDefaultLanguageVariant;
134
135 if($this->mPreferredVariant)
136 return $this->mPreferredVariant;
137
138 // figure out user lang without constructing wgLang to avoid infinite recursion
139 if( $fromUser )
140 $defaultUserLang = $wgUser->getOption( 'language' );
141 else
142 $defaultUserLang = $this->mMainLanguageCode;
143 $userLang = $wgRequest->getVal( 'uselang', $defaultUserLang );
144 // see if interface language is same as content, if not, prevent conversion
145 if( ! in_array( $userLang, $this->mVariants ) ){
146 $this->mPreferredVariant = $this->mMainLanguageCode; // no conversion
147 return $this->mPreferredVariant;
148 }
149
150 // see if the preference is set in the request
151 $req = $wgRequest->getText( 'variant' );
152 if( in_array( $req, $this->mVariants ) ) {
153 $this->mPreferredVariant = $req;
154 return $req;
155 }
156
157 // check the syntax /code/ArticleTitle
158 if($wgVariantArticlePath!=false && isset($_SERVER['SCRIPT_NAME'])){
159 // Note: SCRIPT_NAME probably won't hold the correct value if PHP is run as CGI
160 // (it will hold path to php.cgi binary), and might not exist on some very old PHP installations
161 $scriptBase = basename( $_SERVER['SCRIPT_NAME'] );
162 if(in_array($scriptBase,$this->mVariants)){
163 $this->mPreferredVariant = $scriptBase;
164 return $this->mPreferredVariant;
165 }
166 }
167
168 // get language variant preference from logged in users
169 // Don't call this on stub objects because that causes infinite
170 // recursion during initialisation
171 if( $fromUser && $wgUser->isLoggedIn() ) {
172 $this->mPreferredVariant = $wgUser->getOption('variant');
173 return $this->mPreferredVariant;
174 }
175
176 // see if default variant is globaly set
177 if($wgDefaultLanguageVariant != false && in_array( $wgDefaultLanguageVariant, $this->mVariants )){
178 $this->mPreferredVariant = $wgDefaultLanguageVariant;
179 return $this->mPreferredVariant;
180 }
181
182 if( !$this->mPreferredVariant ) {
183 // see if some supported language variant is set in the
184 // http header, but we don't set the mPreferredVariant
185 // variable in case this is called before the user's
186 // preference is loaded
187 if( array_key_exists( 'HTTP_ACCEPT_LANGUAGE', $_SERVER ) ) {
188
189 $acceptLanguage = strtolower( $_SERVER['HTTP_ACCEPT_LANGUAGE'] );
190 // explode by comma
191 $result = explode(',', $acceptLanguage);
192
193 $languages = array();
194
195 foreach( $result as $elem ) {
196 // if $elem likes 'zh-cn;q=0.9'
197 if(($posi = strpos( $elem, ';' )) !== false ) {
198 // get the real language code likes 'zh-cn'
199 $languages[] = substr( $elem, 0, $posi );
200 }
201 else {
202 $languages[] = $elem;
203 }
204 }
205
206 $fallback_languages = array();
207 $ret_language = null;
208 foreach( $languages as $language ) {
209 // strip whitespace
210 $language = trim( $language );
211 if( in_array( $language, $this->mVariants ) ) {
212 $ret_language = $language;
213 break;
214 }
215 else {
216 // To see if there are fallbacks of current language.
217 // We record these fallback variants, and process
218 // them later.
219 $fallbacks = $this->getVariantFallbacks( $language );
220 if( is_string( $fallbacks ) )
221 $fallback_languages[] = $fallbacks;
222 elseif( is_array( $fallbacks ) )
223 $fallback_languages = array_merge( $fallback_languages, $fallbacks );
224 }
225 }
226
227 // process fallback languages now
228 if( $ret_language === null ) {
229 $fallback_languages = array_unique( $fallback_languages );
230 foreach( $fallback_languages as $language ) {
231 if( in_array( $language, $this->mVariants ) ) {
232 $ret_language = $language;
233 break;
234 }
235 }
236 }
237
238 // bug 21672: Add Accept-Language to Vary and XVO headers
239 // to help Squid to determine user's perferred local language
240 // ONLY add Accept-Language when a variant has been found out
241 // thanks to Liangent's help
242 if( $ret_language !== $this->mMainLanguageCode ) {
243 global $wgOut, $wgUseXVO;
244 $wgOut->addVaryHeader( 'Accept-Language' );
245 if( $wgUseXVO )
246 $wgOut->addXVOHeader( 'Accept-Language' );
247
248 }
249 return $ret_language;
250 }
251 }
252
253 return $this->mMainLanguageCode;
254 }
255
256 /**
257 * caption convert, base on preg_replace_callback
258 *
259 * to convert text in "title" or "alt", like '<img alt="text" ... '
260 * or '<span title="text" ... '
261 *
262 * @return string like ' alt="yyyy"' or ' title="yyyy"'
263 * @private
264 */
265 function captionConvert( $matches ) {
266 $toVariant = $this->getPreferredVariant();
267 $title = $matches[1];
268 $text = $matches[2];
269 // we convert captions except URL
270 if( !strpos( $text, '://' ) )
271 $text = $this->translate($text, $toVariant);
272 return " $title=\"$text\"";
273 }
274
275 /**
276 * dictionary-based conversion
277 *
278 * @param string $text the text to be converted
279 * @param string $toVariant the target language code
280 * @return string the converted text
281 * @private
282 */
283 function autoConvert($text, $toVariant=false) {
284 $fname="LanguageConverter::autoConvert";
285
286 wfProfileIn( $fname );
287
288 if(!$this->mTablesLoaded)
289 $this->loadTables();
290
291 if(!$toVariant)
292 $toVariant = $this->getPreferredVariant();
293 if(!in_array($toVariant, $this->mVariants))
294 return $text;
295
296 /* we convert everything except:
297 1. html markups (anything between < and >)
298 2. html entities
299 3. place holders created by the parser
300 */
301 global $wgParser;
302 if (isset($wgParser) && $wgParser->UniqPrefix()!=''){
303 $marker = '|' . $wgParser->UniqPrefix() . '[\-a-zA-Z0-9]+';
304 } else
305 $marker = "";
306
307 // this one is needed when the text is inside an html markup
308 $htmlfix = '|<[^>]+$|^[^<>]*>';
309
310 // disable convert to variants between <code></code> tags
311 $codefix = '<code>.+?<\/code>|';
312 // disable convertsion of <script type="text/javascript"> ... </script>
313 $scriptfix = '<script.*?>.*?<\/script>|';
314 // disable conversion of <pre xxxx> ... </pre>
315 $prefix = '<pre.*?>.*?<\/pre>|';
316
317 $reg = '/'.$codefix . $scriptfix . $prefix . '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
318
319 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
320
321 $m = array_shift($matches);
322
323 $ret = $this->translate($m[0], $toVariant);
324 $mstart = $m[1]+strlen($m[0]);
325
326 // enable convertsion of '<img alt="xxxx" ... ' or '<span title="xxxx" ... '
327 $captionpattern = '/\s(title|alt)\s*=\s*"([\s\S]*?)"/';
328
329 $trtext = '';
330 $trtextmark = "\0";
331 $notrtext = array();
332 foreach($matches as $m) {
333 $mark = substr($text, $mstart, $m[1]-$mstart);
334 $mark = preg_replace_callback($captionpattern, array(&$this, 'captionConvert'), $mark);
335 // Let's convert the trtext only once,
336 // it would give us more performance improvement
337 $notrtext[] = $mark;
338 $trtext .= $m[0] . $trtextmark;
339 $mstart = $m[1] + strlen($m[0]);
340 }
341 $notrtext[] = '';
342 $trtext = $this->translate( $trtext, $toVariant );
343 $trtext = StringUtils::explode( $trtextmark, $trtext );
344 foreach( $trtext as $t ) {
345 $ret .= array_shift($notrtext);
346 $ret .= $t;
347 }
348 wfProfileOut( $fname );
349 return $ret;
350 }
351
352 /**
353 * Translate a string to a variant
354 * Doesn't process markup or do any of that other stuff, for that use convert()
355 *
356 * @param string $text Text to convert
357 * @param string $variant Variant language code
358 * @return string Translated text
359 * @private
360 */
361 function translate( $text, $variant ) {
362 wfProfileIn( __METHOD__ );
363 // If $text is empty or only includes spaces, do nothing
364 // Otherwise translate it
365 if( trim($text) ) {
366 if( !$this->mTablesLoaded )
367 $this->loadTables();
368 $text = $this->mTables[$variant]->replace( $text );
369 }
370 wfProfileOut( __METHOD__ );
371 return $text;
372 }
373
374 /**
375 * convert text to all supported variants
376 *
377 * @param string $text the text to be converted
378 * @return array of string
379 * @public
380 */
381 function autoConvertToAllVariants($text) {
382 $fname="LanguageConverter::autoConvertToAllVariants";
383 wfProfileIn( $fname );
384 if( !$this->mTablesLoaded )
385 $this->loadTables();
386
387 $ret = array();
388 foreach($this->mVariants as $variant) {
389 $ret[$variant] = $this->translate($text, $variant);
390 }
391
392 wfProfileOut( $fname );
393 return $ret;
394 }
395
396 /**
397 * convert link text to all supported variants
398 *
399 * @param string $text the text to be converted
400 * @return array of string
401 * @public
402 */
403 function convertLinkToAllVariants($text) {
404 if( !$this->mTablesLoaded )
405 $this->loadTables();
406
407 $ret = array();
408 $tarray = explode($this->mMarkup['begin'], $text);
409 $tfirst = array_shift($tarray);
410
411 foreach($this->mVariants as $variant)
412 $ret[$variant] = $this->translate($tfirst,$variant);
413
414 foreach($tarray as $txt) {
415 $marked = explode($this->mMarkup['end'], $txt, 2);
416
417 foreach($this->mVariants as $variant){
418 $ret[$variant] .= $this->mMarkup['begin'].$marked[0].$this->mMarkup['end'];
419 if(array_key_exists(1, $marked))
420 $ret[$variant] .= $this->translate($marked[1],$variant);
421 }
422
423 }
424
425 return $ret;
426 }
427
428 /**
429 * prepare manual conversion table
430 * @private
431 */
432 function applyManualConv( $convRule ){
433 // use syntax -{T|zh:TitleZh;zh-tw:TitleTw}- for custom conversion in title
434 $title = $convRule->getTitle();
435 if( $title ){
436 $this->mTitleFromFlag = true;
437 $this->mTitleDisplay = $title;
438 }
439
440 //apply manual conversion table to global table
441 $convTable = $convRule->getConvTable();
442 $action = $convRule->getRulesAction();
443 foreach( $convTable as $variant => $pair ) {
444 if( !in_array( $variant, $this->mVariants ) )continue;
445 if( $action == 'add' ) {
446 foreach( $pair as $from => $to ) {
447 // to ensure that $from and $to not be left blank
448 // so $this->translate() could always return a string
449 if ( $from || $to )
450 // more efficient than array_merge(), about 2.5 times.
451 $this->mTables[$variant]->setPair( $from, $to );
452 }
453 }
454 elseif ( $action == 'remove' ) {
455 $this->mTables[$variant]->removeArray( $pair );
456 }
457 }
458 }
459
460 /**
461 * Convert text using a parser object for context
462 * @public
463 */
464 function parserConvert( $text, &$parser ) {
465 global $wgDisableLangConversion;
466 /* don't do anything if this is the conversion table */
467 if ( $parser->getTitle()->getNamespace() == NS_MEDIAWIKI &&
468 strpos($parser->mTitle->getText(), "Conversiontable") !== false )
469 {
470 return $text;
471 }
472
473 if ( $wgDisableLangConversion )
474 return $text;
475
476 $text = $this->convert( $text );
477
478 $this->convertTitle();
479 $parser->mOutput->setTitleText( $this->mTitleDisplay );
480
481 return $text;
482 }
483
484 /**
485 * convert namespace
486 * @param string $title the title included namespace
487 * @return array of string
488 * @private
489 */
490 function convertNamespace( $title, $variant ) {
491 $splittitle = explode( ':', $title );
492 if (count($splittitle) < 2)
493 return $title;
494 if ( isset( $this->mNamespaceTables[$variant][$splittitle[0]] ) )
495 $splittitle[0] = $this->mNamespaceTables[$variant][$splittitle[0]];
496 $ret = implode(':', $splittitle );
497 return $ret;
498 }
499
500 /**
501 * Pre convert title. Store the original title $this->mTitleOrginal;
502 * store the default converted title to $this->mTitleDisplay.
503 * @private
504 */
505 function preConvertTitle( $text, $variant ){
506 $this->mTitleOriginal = $text;
507
508 $text = $this->convertNamespace( $text, $variant );
509 $this->mTitleDisplay = $this->convert( $text );
510 }
511
512 /**
513 * convert title
514 * @private
515 */
516 function convertTitle(){
517 global $wgDisableTitleConversion, $wgUser, $wgRequest;
518 $isredir = $wgRequest->getText( 'redirect', 'yes' );
519 $action = $wgRequest->getText( 'action' );
520 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
521
522 // check for the global variable, __NOTC__ magic word, and user setting
523 if( $wgDisableTitleConversion || !$this->mDoTitleConvert ||
524 $wgUser->getOption('noconvertlink') == 1 ) {
525 $this->mTitleDisplay = $this->mTitleOriginal;
526 }
527
528 // check for GET params
529 elseif ( $isredir == 'no' || $action == 'edit' || $linkconvert == 'no' ) {
530 $this->mTitleDisplay = $this->mTitleOriginal;
531 }
532 }
533
534 /**
535 * convert text to different variants of a language. the automatic
536 * conversion is done in autoConvert(). here we parse the text
537 * marked with -{}-, which specifies special conversions of the
538 * text that can not be accomplished in autoConvert()
539 *
540 * syntax of the markup:
541 * -{code1:text1;code2:text2;...}- or
542 * -{flags|code1:text1;code2:text2;...}- or
543 * -{text}- in which case no conversion should take place for text
544 *
545 * @param string $text text to be converted
546 * @param bool $isTitle whether this conversion is for the article title
547 * @return string converted text
548 * @public
549 */
550 function convert( $text, $isTitle = false ) {
551
552 $mw =& MagicWord::get( 'notitleconvert' );
553 if( $mw->matchAndRemove( $text ) )
554 $this->mDoTitleConvert = false;
555 $mw =& MagicWord::get( 'nocontentconvert' );
556 if( $mw->matchAndRemove( $text ) ) {
557 $this->mDoContentConvert = false;
558 }
559
560 // no conversion if redirecting
561 $mw =& MagicWord::get( 'redirect' );
562 if( $mw->matchStart( $text ) )
563 return $text;
564
565 $plang = $this->getPreferredVariant();
566
567 // for title convertion
568 if ( $isTitle ) {
569 $this->preConvertTitle( $text, $plang );
570 return $text;
571 }
572
573 $tarray = StringUtils::explode( $this->mMarkup['end'], $text );
574 $text = '';
575
576 foreach ( $tarray as $txt ) {
577
578 $marked = explode( $this->mMarkup['begin'], $txt, 2 );
579
580 if( $this->mDoContentConvert )
581 // Bug 19620: should convert a string immediately after a new rule added.
582 $text .= $this->autoConvert( $marked[0], $plang );
583 else
584 $text .= $marked[0];
585
586 if ( array_key_exists( 1, $marked ) ) {
587 $crule = new ConverterRule($marked[1], $this);
588 $crule->parse( $plang );
589 $text .= $crule->getDisplay();
590 $this->applyManualConv( $crule );
591 }
592 else
593 $text .= $this->mMarkup['end'];
594
595 }
596
597 // Remove the last delimiter (wasn't real)
598 $text = substr( $text, 0, -strlen( $this->mMarkup['end'] ) );
599 return $text;
600 }
601
602 /**
603 * if a language supports multiple variants, it is
604 * possible that non-existing link in one variant
605 * actually exists in another variant. this function
606 * tries to find it. See e.g. LanguageZh.php
607 *
608 * @param string $link the name of the link
609 * @param mixed $nt the title object of the link
610 * @param boolean $ignoreOtherCond: to disable other conditions when
611 * we need to transclude a template or update a category's link
612 * @return null the input parameters may be modified upon return
613 * @public
614 */
615 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
616 # If the article has already existed, there is no need to
617 # check it again, otherwise it may cause a fault.
618 if ( is_object( $nt ) && $nt->exists() )
619 return;
620
621 global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest, $wgUser;
622 $isredir = $wgRequest->getText( 'redirect', 'yes' );
623 $action = $wgRequest->getText( 'action' );
624 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
625 $disableLinkConversion = $wgDisableLangConversion || $wgDisableTitleConversion;
626 $linkBatch = new LinkBatch();
627
628 $ns=NS_MAIN;
629
630 if ( $disableLinkConversion || ( !$ignoreOtherCond && ( $isredir == 'no' || $action == 'edit'
631 || $action == 'submit' || $linkconvert == 'no' || $wgUser->getOption('noconvertlink') == 1 ) ) )
632 return;
633
634 if ( is_object( $nt ) )
635 $ns = $nt->getNamespace();
636
637 $variants = $this->autoConvertToAllVariants($link);
638 if($variants == false) //give up
639 return;
640
641 $titles = array();
642
643 foreach( $variants as $v ) {
644 if($v != $link){
645 $varnt = Title::newFromText( $v, $ns );
646 if(!is_null($varnt)){
647 $linkBatch->addObj($varnt);
648 $titles[]=$varnt;
649 }
650 }
651 }
652
653 // fetch all variants in single query
654 $linkBatch->execute();
655
656 foreach( $titles as $varnt ) {
657 if( $varnt->getArticleID() > 0 ) {
658 $nt = $varnt;
659 $link = $varnt->getText();
660 break;
661 }
662 }
663 }
664
665 /**
666 * returns language specific hash options
667 *
668 * @public
669 */
670 function getExtraHashOptions() {
671 $variant = $this->getPreferredVariant();
672 return '!' . $variant ;
673 }
674
675 /**
676 * get title text as defined in the body of the article text
677 *
678 * @public
679 */
680 function getParsedTitle() {
681 return $this->mTitleDisplay;
682 }
683
684 /**
685 * Load default conversion tables
686 * This method must be implemented in derived class
687 *
688 * @private
689 */
690 function loadDefaultTables() {
691 $name = get_class($this);
692 wfDie("Must implement loadDefaultTables() method in class $name");
693 }
694
695 /**
696 * load conversion tables either from the cache or the disk
697 * @private
698 */
699 function loadTables($fromcache=true) {
700 global $wgMemc;
701 if( $this->mTablesLoaded )
702 return;
703 wfProfileIn( __METHOD__ );
704 $this->mTablesLoaded = true;
705 $this->mTables = false;
706 if($fromcache) {
707 wfProfileIn( __METHOD__.'-cache' );
708 $this->mTables = $wgMemc->get( $this->mCacheKey );
709 wfProfileOut( __METHOD__.'-cache' );
710 }
711 if ( !$this->mTables || !isset( $this->mTables[self::CACHE_VERSION_KEY] ) ) {
712 wfProfileIn( __METHOD__.'-recache' );
713 // not in cache, or we need a fresh reload.
714 // we will first load the default tables
715 // then update them using things in MediaWiki:Zhconversiontable/*
716 $this->loadDefaultTables();
717 foreach($this->mVariants as $var) {
718 $cached = $this->parseCachedTable($var);
719 $this->mTables[$var]->mergeArray($cached);
720 }
721
722 $this->postLoadTables();
723 $this->mTables[self::CACHE_VERSION_KEY] = true;
724
725 $wgMemc->set($this->mCacheKey, $this->mTables, 43200);
726 wfProfileOut( __METHOD__.'-recache' );
727 }
728 wfProfileOut( __METHOD__ );
729 }
730
731 /**
732 * Hook for post processig after conversion tables are loaded
733 *
734 */
735 function postLoadTables() {}
736
737 /**
738 * Reload the conversion tables
739 *
740 * @private
741 */
742 function reloadTables() {
743 if($this->mTables)
744 unset($this->mTables);
745 $this->mTablesLoaded = false;
746 $this->loadTables(false);
747 }
748
749
750 /**
751 * parse the conversion table stored in the cache
752 *
753 * the tables should be in blocks of the following form:
754 * -{
755 * word => word ;
756 * word => word ;
757 * ...
758 * }-
759 *
760 * to make the tables more manageable, subpages are allowed
761 * and will be parsed recursively if $recursive=true
762 *
763 */
764 function parseCachedTable($code, $subpage='', $recursive=true) {
765 global $wgMessageCache;
766 static $parsed = array();
767
768 if(!is_object($wgMessageCache))
769 return array();
770
771 $key = 'Conversiontable/'.$code;
772 if($subpage)
773 $key .= '/' . $subpage;
774
775 if(array_key_exists($key, $parsed))
776 return array();
777
778 if ( strpos( $code, '/' ) === false ) {
779 $txt = $wgMessageCache->get( 'Conversiontable', true, $code );
780 } else {
781 $title = Title::makeTitleSafe( NS_MEDIAWIKI, "Conversiontable/$code" );
782 if ( $title && $title->exists() ) {
783 $article = new Article( $title );
784 $txt = $article->getContents();
785 } else {
786 $txt = '';
787 }
788 }
789
790 // get all subpage links of the form
791 // [[MediaWiki:conversiontable/zh-xx/...|...]]
792 $linkhead = $this->mLangObj->getNsText(NS_MEDIAWIKI) . ':Conversiontable';
793 $subs = explode('[[', $txt);
794 $sublinks = array();
795 foreach( $subs as $sub ) {
796 $link = explode(']]', $sub, 2);
797 if(count($link) != 2)
798 continue;
799 $b = explode('|', $link[0]);
800 $b = explode('/', trim($b[0]), 3);
801 if(count($b)==3)
802 $sublink = $b[2];
803 else
804 $sublink = '';
805
806 if($b[0] == $linkhead && $b[1] == $code) {
807 $sublinks[] = $sublink;
808 }
809 }
810
811
812 // parse the mappings in this page
813 $blocks = explode($this->mMarkup['begin'], $txt);
814 array_shift($blocks);
815 $ret = array();
816 foreach($blocks as $block) {
817 $mappings = explode($this->mMarkup['end'], $block, 2);
818 $stripped = str_replace(array("'", '"', '*','#'), '', $mappings[0]);
819 $table = explode( ';', $stripped );
820 foreach( $table as $t ) {
821 $m = explode( '=>', $t );
822 if( count( $m ) != 2)
823 continue;
824 // trim any trailling comments starting with '//'
825 $tt = explode('//', $m[1], 2);
826 $ret[trim($m[0])] = trim($tt[0]);
827 }
828 }
829 $parsed[$key] = true;
830
831
832 // recursively parse the subpages
833 if($recursive) {
834 foreach($sublinks as $link) {
835 $s = $this->parseCachedTable($code, $link, $recursive);
836 $ret = array_merge($ret, $s);
837 }
838 }
839
840 if ($this->mUcfirst) {
841 foreach ($ret as $k => $v) {
842 $ret[Language::ucfirst($k)] = Language::ucfirst($v);
843 }
844 }
845 return $ret;
846 }
847
848 /**
849 * Enclose a string with the "no conversion" tag. This is used by
850 * various functions in the Parser
851 *
852 * @param string $text text to be tagged for no conversion
853 * @return string the tagged text
854 * @public
855 */
856 function markNoConversion($text, $noParse=false) {
857 # don't mark if already marked
858 if(strpos($text, $this->mMarkup['begin']) ||
859 strpos($text, $this->mMarkup['end']))
860 return $text;
861
862 $ret = $this->mMarkup['begin'] .'R|'. $text . $this->mMarkup['end'];
863 return $ret;
864 }
865
866 /**
867 * convert the sorting key for category links. this should make different
868 * keys that are variants of each other map to the same key
869 */
870 function convertCategoryKey( $key ) {
871 return $key;
872 }
873 /**
874 * hook to refresh the cache of conversion tables when
875 * MediaWiki:conversiontable* is updated
876 * @private
877 */
878 function OnArticleSaveComplete($article, $user, $text, $summary, $isminor, $iswatch, $section, $flags, $revision) {
879 $titleobj = $article->getTitle();
880 if($titleobj->getNamespace() == NS_MEDIAWIKI) {
881 $title = $titleobj->getDBkey();
882 $t = explode('/', $title, 3);
883 $c = count($t);
884 if( $c > 1 && $t[0] == 'Conversiontable' ) {
885 if(in_array($t[1], $this->mVariants)) {
886 $this->reloadTables();
887 }
888 }
889 }
890 return true;
891 }
892
893 /**
894 * Armour rendered math against conversion
895 * Wrap math into rawoutput -{R| math }- syntax
896 * @public
897 */
898 function armourMath($text){
899 // we need to convert '-{' and '}-' to '-&#123;' and '&#125;-'
900 // to avoid a unwanted '}-' appeared after the math-image.
901 $text = strtr( $text, array('-{' => '-&#123;', '}-' => '&#125;-') );
902 $ret = $this->mMarkup['begin'] . 'R|' . $text . $this->mMarkup['end'];
903 return $ret;
904 }
905 }
906
907 /**
908 * parser for rules of language conversion , parse rules in -{ }- tag
909 * @ingroup Language
910 * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com>
911 */
912 class ConverterRule {
913 var $mText; // original text in -{text}-
914 var $mConverter; // LanguageConverter object
915 var $mManualCodeError = '<strong class="error">code error!</strong>';
916 var $mRuleDisplay = '';
917 var $mRuleTitle = false;
918 var $mRules = '';// string : the text of the rules
919 var $mRulesAction = 'none';
920 var $mFlags = array();
921 var $mConvTable = array();
922 var $mBidtable = array();// array of the translation in each variant
923 var $mUnidtable = array();// array of the translation in each variant
924
925 /**
926 * Constructor
927 *
928 * @param string $text the text between -{ and }-
929 * @param object $converter a LanguageConverter object
930 * @access public
931 */
932 function __construct( $text, $converter ){
933 $this->mText = $text;
934 $this->mConverter = $converter;
935 foreach( $converter->mVariants as $v ){
936 $this->mConvTable[$v] = array();
937 }
938 }
939
940 /**
941 * check if variants array in convert array
942 *
943 * @param string $variant Variant language code
944 * @return string Translated text
945 * @public
946 */
947 function getTextInBidtable( $variants ){
948 if( is_string( $variants ) ){ $variants = array( $variants ); }
949 if( !is_array( $variants ) ) return false;
950 foreach( $variants as $variant ){
951 if( array_key_exists( $variant, $this->mBidtable ) ){
952 return $this->mBidtable[$variant];
953 }
954 }
955 return false;
956 }
957
958 /**
959 * Parse flags with syntax -{FLAG| ... }-
960 * @private
961 */
962 function parseFlags(){
963 $text = $this->mText;
964 if(strlen($text) < 2 ) {
965 $this->mFlags = array( 'R' );
966 $this->mRules = $text;
967 return;
968 }
969
970 $flags = array();
971 $markup = $this->mConverter->mMarkup;
972 $validFlags = $this->mConverter->mFlags;
973 $variants = $this->mConverter->mVariants;
974
975 $tt = explode($markup['flagsep'], $text, 2);
976 if(count($tt) == 2) {
977 $f = explode($markup['varsep'], $tt[0]);
978 foreach($f as $ff) {
979 $ff = trim($ff);
980 if(array_key_exists($ff, $validFlags) &&
981 !in_array($validFlags[$ff], $flags))
982 $flags[] = $validFlags[$ff];
983 }
984 $rules = $tt[1];
985 } else {
986 $rules = $text;
987 }
988
989 //check flags
990 if( in_array('R',$flags) ){
991 $flags = array('R');// remove other flags
992 } elseif ( in_array('N',$flags) ){
993 $flags = array('N');// remove other flags
994 } elseif ( in_array('-',$flags) ){
995 $flags = array('-');// remove other flags
996 } elseif (count($flags)==1 && $flags[0]=='T'){
997 $flags[]='H';
998 } elseif ( in_array('H',$flags) ){
999 // replace A flag, and remove other flags except T
1000 $temp=array('+','H');
1001 if(in_array('T',$flags)) $temp[] = 'T';
1002 if(in_array('D',$flags)) $temp[] = 'D';
1003 $flags = $temp;
1004 } else {
1005 if ( in_array('A',$flags) ) {
1006 $flags[]='+';
1007 $flags[]='S';
1008 }
1009 if ( in_array('D',$flags) )
1010 $flags=array_diff($flags,array('S'));
1011 $flags_temp = array();
1012 foreach ($variants as $variant) {
1013 // try to find flags like "zh-hans", "zh-hant"
1014 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
1015 if ( in_array($variant, $flags) )
1016 $flags_temp[] = $variant;
1017 }
1018 if ( count($flags_temp) !== 0 )
1019 $flags = $flags_temp;
1020 }
1021 if ( count($flags) == 0 )
1022 $flags = array('S');
1023 $this->mRules=$rules;
1024 $this->mFlags=$flags;
1025 }
1026
1027 /**
1028 * generate conversion table
1029 * @private
1030 */
1031 function parseRules() {
1032 $rules = $this->mRules;
1033 $flags = $this->mFlags;
1034 $bidtable = array();
1035 $unidtable = array();
1036 $markup = $this->mConverter->mMarkup;
1037 $variants = $this->mConverter->mVariants;
1038
1039 // varsep_pattern for preg_split:
1040 // text should be splited by ";" only if a valid variant
1041 // name exist after the markup, for example:
1042 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:<span style="font-size:120%;">yyy</span>;}-
1043 // we should split it as:
1044 // array(
1045 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1046 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1047 // [2] => ''
1048 // )
1049 $varsep_pattern = '/' . $markup['varsep'] . '\s*' . '(?=';
1050 foreach( $variants as $variant ) {
1051 $varsep_pattern .= $variant . '\s*' . $markup['codesep'] . '|'; // zh-hans:xxx;zh-hant:yyy
1052 $varsep_pattern .= '[^;]*?' . $markup['unidsep'] . '\s*' . $variant
1053 . '\s*' . $markup['codesep'] . '|'; // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1054 }
1055 $varsep_pattern .= '\s*$)/';
1056
1057 $choice = preg_split($varsep_pattern, $rules);
1058
1059 foreach( $choice as $c ) {
1060 $v = explode($markup['codesep'], $c, 2);
1061 if( count($v) != 2 )
1062 continue;// syntax error, skip
1063 $to = trim($v[1]);
1064 $v = trim($v[0]);
1065 $u = explode($markup['unidsep'], $v, 2);
1066 // if $to is empty, strtr() could return a wrong result
1067 if( count($u) == 1 && $to && in_array( $v, $variants ) ) {
1068 $bidtable[$v] = $to;
1069 } else if(count($u) == 2){
1070 $from = trim($u[0]);
1071 $v = trim($u[1]);
1072 if( array_key_exists( $v, $unidtable ) && !is_array( $unidtable[$v] )
1073 && $to && in_array( $v, $variants ) )
1074 $unidtable[$v] = array( $from=>$to );
1075 elseif ( $to && in_array( $v, $variants ) )
1076 $unidtable[$v][$from] = $to;
1077 }
1078 // syntax error, pass
1079 if ( !array_key_exists( $v, $this->mConverter->mVariantNames ) ){
1080 $bidtable = array();
1081 $unidtable = array();
1082 break;
1083 }
1084 }
1085 $this->mBidtable = $bidtable;
1086 $this->mUnidtable = $unidtable;
1087 }
1088
1089 /**
1090 * @private
1091 */
1092 function getRulesDesc(){
1093 $codesep = $this->mConverter->mDescCodeSep;
1094 $varsep = $this->mConverter->mDescVarSep;
1095 $text='';
1096 foreach($this->mBidtable as $k => $v)
1097 $text .= $this->mConverter->mVariantNames[$k]."$codesep$v$varsep";
1098 foreach($this->mUnidtable as $k => $a)
1099 foreach($a as $from=>$to)
1100 $text.=$from.'⇒'.$this->mConverter->mVariantNames[$k]."$codesep$to$varsep";
1101 return $text;
1102 }
1103
1104 /**
1105 * Parse rules conversion
1106 * @private
1107 */
1108 function getRuleConvertedStr( $variant, $doConvert ){
1109 $bidtable = $this->mBidtable;
1110 $unidtable = $this->mUnidtable;
1111
1112 if( count($bidtable) + count($unidtable) == 0 ){
1113 return $this->mRules;
1114 } elseif ( $doConvert ){// the text converted
1115 // display current variant in bidirectional array
1116 $disp = $this->getTextInBidtable($variant);
1117 // or display current variant in fallbacks
1118 if(!$disp)
1119 $disp = $this->getTextInBidtable(
1120 $this->mConverter->getVariantFallbacks($variant));
1121 // or display current variant in unidirectional array
1122 if(!$disp && array_key_exists($variant,$unidtable)){
1123 $disp = array_values($unidtable[$variant]);
1124 $disp = $disp[0];
1125 }
1126 // or display frist text under disable manual convert
1127 if(!$disp && $this->mConverter->mManualLevel[$variant]=='disable') {
1128 if(count($bidtable)>0){
1129 $disp = array_values($bidtable);
1130 $disp = $disp[0];
1131 } else {
1132 $disp = array_values($unidtable);
1133 $disp = array_values($disp[0]);
1134 $disp = $disp[0];
1135 }
1136 }
1137 return $disp;
1138 } else {// no convert
1139 return $this->mRules;
1140 }
1141 }
1142
1143 /**
1144 * generate conversion table for all text
1145 * @private
1146 */
1147 function generateConvTable(){
1148 $flags = $this->mFlags;
1149 $bidtable = $this->mBidtable;
1150 $unidtable = $this->mUnidtable;
1151 $manLevel = $this->mConverter->mManualLevel;
1152
1153 $vmarked=array();
1154 foreach($this->mConverter->mVariants as $v) {
1155 /* for bidirectional array
1156 fill in the missing variants, if any,
1157 with fallbacks */
1158 if(!array_key_exists($v, $bidtable)) {
1159 $variantFallbacks = $this->mConverter->getVariantFallbacks($v);
1160 $vf = $this->getTextInBidtable($variantFallbacks);
1161 if($vf) $bidtable[$v] = $vf;
1162 }
1163
1164 if(array_key_exists($v,$bidtable)){
1165 foreach($vmarked as $vo){
1166 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
1167 // or -{H|zh:WordZh;zh-tw:WordTw}- or -{-|zh:WordZh;zh-tw:WordTw}-
1168 // to introduce a custom mapping between
1169 // words WordZh and WordTw in the whole text
1170 if($manLevel[$v]=='bidirectional'){
1171 $this->mConvTable[$v][$bidtable[$vo]]=$bidtable[$v];
1172 }
1173 if($manLevel[$vo]=='bidirectional'){
1174 $this->mConvTable[$vo][$bidtable[$v]]=$bidtable[$vo];
1175 }
1176 }
1177 $vmarked[]=$v;
1178 }
1179 /*for unidirectional array
1180 fill to convert tables */
1181 $allow_unid = $manLevel[$v]=='bidirectional'
1182 || $manLevel[$v]=='unidirectional';
1183 if( $allow_unid && array_key_exists( $v, $unidtable ) ){
1184 $ct = $this->mConvTable[$v];
1185 $this->mConvTable[$v] = array_merge($ct, $unidtable[$v]);
1186 }
1187 }
1188 }
1189
1190 /**
1191 * Parse rules and flags
1192 * @public
1193 */
1194 function parse($variant){
1195 if(!$variant)
1196 $variant = $this->mConverter->getPreferredVariant();
1197
1198 $variants = $this->mConverter->mVariants;
1199 $this->parseFlags();
1200 $flags = $this->mFlags;
1201
1202 // convert to specified variant
1203 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
1204 if( count( array_diff( $flags, $variants ) ) == 0 and count( $flags ) != 0 ) {
1205 if ( in_array( $variant, $flags ) ) // check if current variant in flags
1206 // then convert <text to convert> to current language
1207 $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variant );
1208 else { // if current variant no in flags,
1209 // then we check its fallback variants.
1210 $variantFallbacks = $this->mConverter->getVariantFallbacks($variant);
1211 foreach ( $variantFallbacks as $variantFallback ) {
1212 // if current variant's fallback exist in flags
1213 if ( in_array( $variantFallback, $flags ) ) {
1214 // then convert <text to convert> to fallback language
1215 $this->mRules = $this->mConverter->autoConvert( $this->mRules, $variantFallback );
1216 break;
1217 }
1218 }
1219 }
1220 $this->mFlags = $flags = array('R');
1221 }
1222
1223 if( !in_array( 'R', $flags ) || !in_array( 'N', $flags ) ) {
1224 // decode => HTML entities modified by Sanitizer::removeHTMLtags
1225 $this->mRules = str_replace('=&gt;','=>',$this->mRules);
1226
1227 $this->parseRules();
1228 }
1229 $rules = $this->mRules;
1230
1231 if( count( $this->mBidtable ) == 0 && count( $this->mUnidtable ) == 0 ){
1232 if(in_array('+',$flags) || in_array('-',$flags))
1233 // fill all variants if text in -{A/H/-|text} without rules
1234 foreach($this->mConverter->mVariants as $v)
1235 $this->mBidtable[$v] = $rules;
1236 elseif (!in_array('N',$flags) && !in_array('T',$flags) )
1237 $this->mFlags = $flags = array('R');
1238 }
1239
1240 if( in_array('R',$flags) ) {
1241 // if we don't do content convert, still strip the -{}- tags
1242 $this->mRuleDisplay = $rules;
1243 } elseif ( in_array('N',$flags) ){
1244 // proces N flag: output current variant name
1245 $this->mRuleDisplay = $this->mConverter->mVariantNames[trim($rules)];
1246 } elseif ( in_array('D',$flags) ){
1247 // proces D flag: output rules description
1248 $this->mRuleDisplay = $this->getRulesDesc();
1249 } elseif ( in_array('H',$flags) || in_array('-',$flags) ) {
1250 // proces H,- flag or T only: output nothing
1251 $this->mRuleDisplay = '';
1252 } elseif ( in_array('S',$flags) ){
1253 $this->mRuleDisplay = $this->getRuleConvertedStr($variant,
1254 $this->mConverter->mDoContentConvert);
1255 } else {
1256 $this->mRuleDisplay= $this->mManualCodeError;
1257 }
1258 // proces T flag
1259 if ( in_array('T',$flags) ) {
1260 $this->mRuleTitle = $this->getRuleConvertedStr($variant,
1261 $this->mConverter->mDoTitleConvert);
1262 }
1263
1264 if (in_array('-', $flags))
1265 $this->mRulesAction='remove';
1266 if (in_array('+', $flags))
1267 $this->mRulesAction='add';
1268
1269 $this->generateConvTable();
1270 }
1271
1272 /**
1273 * @public
1274 */
1275 function hasRules(){
1276 // TODO:
1277 }
1278
1279 /**
1280 * get display text on markup -{...}-
1281 * @public
1282 */
1283 function getDisplay(){
1284 return $this->mRuleDisplay;
1285 }
1286 /**
1287 * get converted title
1288 * @public
1289 */
1290 function getTitle(){
1291 return $this->mRuleTitle;
1292 }
1293
1294 /**
1295 * return how deal with conversion rules
1296 * @public
1297 */
1298 function getRulesAction(){
1299 return $this->mRulesAction;
1300 }
1301
1302 /**
1303 * get conversion table ( bidirectional and unidirectional conversion table )
1304 * @public
1305 */
1306 function getConvTable(){
1307 return $this->mConvTable;
1308 }
1309
1310 /**
1311 * get conversion rules string
1312 * @public
1313 */
1314 function getRules(){
1315 return $this->mRules;
1316 }
1317
1318 /**
1319 * get conversion flags
1320 * @public
1321 */
1322 function getFlags(){
1323 return $this->mFlags;
1324 }
1325 }