It broke things
[lhc/web/wiklou.git] / languages / LanguageZh.php
1 <?php
2 /**
3 * @package MediaWiki
4 * @subpackage Language
5 */
6 require_once( dirname(__FILE__).'/LanguageConverter.php' );
7 require_once( dirname(__FILE__).'/LanguageZh_cn.php' );
8
9 class ZhConverter extends LanguageConverter {
10 function loadDefaultTables() {
11 require( "includes/ZhConversion.php" );
12 $this->mTables = array();
13 $this->mTables['zh-cn'] = $zh2CN;
14 $this->mTables['zh-tw'] = $zh2TW;
15 $this->mTables['zh-sg'] = array_merge($zh2CN, $zh2SG);
16 $this->mTables['zh-hk'] = array_merge($zh2TW, $zh2HK);
17 $this->mTables['zh'] = array();
18 }
19
20 function postLoadTables() {
21 $this->mTables['zh-sg'] = array_merge($this->mTables['zh-cn'], $this->mTables['zh-sg']);
22 $this->mTables['zh-hk'] = array_merge($this->mTables['zh-tw'], $this->mTables['zh-hk']);
23 }
24
25 /* there shouldn't be any latin text in Chinese conversion, so no need
26 to mark anything
27 */
28 function markNoConversion($text) {
29 return $text;
30 }
31
32 function convertCategoryKey( $key ) {
33 return $this->autoConvert( $key, 'zh-cn' );
34 }
35 }
36
37
38 /* class that handles both Traditional and Simplified Chinese
39 right now it only distinguish zh_cn, zh_tw, zh_sg and zh_hk.
40 */
41 class LanguageZh extends LanguageZh_cn {
42
43 function __construct() {
44 global $wgHooks;
45 parent::__construct();
46 $this->mConverter = new ZhConverter($this, 'zh',
47 array('zh', 'zh-cn', 'zh-tw', 'zh-sg', 'zh-hk'),
48 array('zh'=>'zh-cn',
49 'zh-cn'=>'zh-sg',
50 'zh-sg'=>'zh-cn',
51 'zh-tw'=>'zh-hk',
52 'zh-hk'=>'zh-tw'));
53 $wgHooks['ArticleSaveComplete'][] = $this->mConverter;
54 }
55
56
57 # this should give much better diff info
58 function segmentForDiff( $text ) {
59 return preg_replace(
60 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
61 "' ' .\"$1\"", $text);
62 }
63
64 function unsegmentForDiff( $text ) {
65 return preg_replace(
66 "/ ([\\xc0-\\xff][\\x80-\\xbf]*)/e",
67 "\"$1\"", $text);
68 }
69
70 // word segmentation
71 function stripForSearch( $string ) {
72 $fname="LanguageZh::stripForSearch";
73 wfProfileIn( $fname );
74
75 // eventually this should be a word segmentation
76 // for now just treat each character as a word
77 $t = preg_replace(
78 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
79 "' ' .\"$1\"", $string);
80
81 //always convert to zh-cn before indexing. it should be
82 //better to use zh-cn for search, since conversion from
83 //Traditional to Simplified is less ambiguous than the
84 //other way around
85
86 $t = $this->mConverter->autoConvert($t, 'zh-cn');
87 $t = parent::stripForSearch( $t );
88 wfProfileOut( $fname );
89 return $t;
90
91 }
92
93 function convertForSearchResult( $termsArray ) {
94 $terms = implode( '|', $termsArray );
95 $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
96 $ret = array_unique( explode('|', $terms) );
97 return $ret;
98 }
99
100 }
101 ?>