languages/classes/LanguageZh.php

   1 <?php
   2 /**
   3   * @addtogroup Language
   4   */
   5 require_once( dirname(__FILE__).'/../LanguageConverter.php' );
   6 require_once( dirname(__FILE__).'/LanguageZh_cn.php' );
   7
   8 class ZhConverter extends LanguageConverter {
   9         function loadDefaultTables() {
  10                 require( "includes/ZhConversion.php" );
  11                 $this->mTables = array(
  12                         'zh-cn' => new ReplacementArray( $zh2CN ),
  13                         'zh-tw' => new ReplacementArray( $zh2TW ),
  14                         'zh-sg' => new ReplacementArray( array_merge($zh2CN, $zh2SG) ),
  15                         'zh-hk' => new ReplacementArray( array_merge($zh2TW, $zh2HK) ),
  16                         'zh' => new ReplacementArray
  17                 );
  18         }
  19
  20         function postLoadTables() {
  21                 $this->mTables['zh-sg']->merge( $this->mTables['zh-cn'] );
  22                 $this->mTables['zh-hk']->merge( $this->mTables['zh-tw'] );
  23     }
  24
  25         /* there shouldn't be any latin text in Chinese conversion, so no need
  26            to mark anything.
  27            $noParse is there for compatibility with LanguageConvert::markNoConversion
  28     */
  29         function markNoConversion($text, $noParse = false) {
  30                 return $text;
  31         }
  32
  33         function convertCategoryKey( $key ) {
  34                 return $this->autoConvert( $key, 'zh-cn' );
  35         }
  36 }
  37
  38
  39 /* class that handles both Traditional and Simplified Chinese
  40    right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk.
  41 */
  42 class LanguageZh extends LanguageZh_cn {
  43
  44         function __construct() {
  45                 global $wgHooks;
  46                 parent::__construct();
  47                 $this->mConverter = new ZhConverter($this, 'zh',
  48                                             array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'),
  49                                                                                         array('zh'=>'zh-cn',
  50                                                                                                   'zh-cn'=>'zh-sg',
  51                                                                                                   'zh-sg'=>'zh-cn',
  52                                                                                                   'zh-tw'=>'zh-hk',
  53                                                                                                   'zh-hk'=>'zh-tw'));
  54                 $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
  55         }
  56
  57
  58         # this should give much better diff info
  59         function segmentForDiff( $text ) {
  60                 return preg_replace(
  61                         "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
  62                         "' ' .\"$1\"", $text);
  63         }
  64
  65         function unsegmentForDiff( $text ) {
  66                 return preg_replace(
  67                         "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
  68                         "\"$1\"", $text);
  69         }
  70
  71         // word segmentation
  72         function stripForSearch( $string ) {
  73                 $fname="LanguageZh::stripForSearch";
  74                 wfProfileIn( $fname );
  75
  76                 // eventually this should be a word segmentation
  77                 // for now just treat each character as a word
  78                 $t = preg_replace(
  79                                 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
  80                                 "' ' .\"$1\"", $string);
  81
  82         //always convert to zh-cn before indexing. it should be
  83                 //better to use zh-cn for search, since conversion from
  84                 //Traditional to Simplified is less ambiguous than the
  85                 //other way around
  86
  87                 $t = $this->mConverter->autoConvert($t, 'zh-cn');
  88                 $t = parent::stripForSearch( $t );
  89                 wfProfileOut( $fname );
  90                 return $t;
  91
  92         }
  93
  94         function convertForSearchResult( $termsArray ) {
  95                 $terms = implode( '|', $termsArray );
  96                 $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
  97                 $ret = array_unique( explode('|', $terms) );
  98                 return $ret;
  99         }
 100
 101 }
 102 ?>