Merged my changes from REL1_4
[lhc/web/wiklou.git] / includes / ZhClient.php
1 <?php
2 /**
3 * Client for querying zhdaemon
4 *
5 * @package MediaWiki
6 */
7
8 class ZhClient {
9 var $mHost, $mPort, $mFP, $mConnected;
10
11 /**
12 * Constructor
13 *
14 * @access private
15 */
16 function ZhClient($host, $port) {
17 $this->mHost = $host;
18 $this->mPort = $port;
19 $this->mConnected = $this->connect();
20 }
21
22 /**
23 * Check if connection to zhdaemon is successful
24 *
25 * @access public
26 */
27 function isconnected() {
28 return $this->mConnected;
29 }
30
31 /**
32 * Establish conncetion
33 *
34 * @access private
35 */
36 function connect() {
37 wfSuppressWarnings();
38 $this->mFP = fsockopen($this->mHost, $this->mPort, $errno, $errstr, 30);
39 wfRestoreWarnings();
40 if(!$this->mFP) {
41 return false;
42 }
43 return true;
44 }
45
46 /**
47 * Query the daemon and return the result
48 *
49 * @access private
50 */
51 function query($request) {
52 if(!$this->mConnected)
53 return false;
54
55 fwrite($this->mFP, $request);
56
57 $result=fgets($this->mFP, 1024);
58
59 list($status, $len) = explode(" ", $result);
60 if($status == 'ERROR') {
61 //$len is actually the error code...
62 print "zhdaemon error $len<br />\n";
63 return false;
64 }
65 $bytesread=0;
66 $data='';
67 while(!feof($this->mFP) && $bytesread<$len) {
68 $str= fread($this->mFP, $len-$bytesread);
69 $bytesread += strlen($str);
70 $data .= $str;
71 }
72 //data should be of length $len. otherwise something is wrong
73 if(strlen($data) != $len)
74 return false;
75 return $data;
76 }
77
78 /**
79 * Convert the input to a different language variant
80 *
81 * @param string $text input text
82 * @param string $tolang language variant
83 * @return string the converted text
84 * @access public
85 */
86 function convert($text, $tolang) {
87 $len = strlen($text);
88 $q = "CONV $tolang $len\n$text";
89 $result = $this->query($q);
90 if(!$result)
91 $result = $text;
92 return $result;
93 }
94
95 /**
96 * Convert the input to all possible variants
97 *
98 * @param string $text input text
99 * @return array langcode => converted_string
100 * @access public
101 */
102 function convertToAllVariants($text) {
103 $len = strlen($text);
104 $q = "CONV ALL $len\n$text";
105 $result = $this->query($q);
106 if(!$result)
107 return false;
108 list($infoline, $data) = explode('|', $result, 2);
109 $info = explode(";", $infoline);
110 $ret = array();
111 $i=0;
112 foreach($info as $variant) {
113 list($code, $len) = explode(' ', $variant);
114 $ret[strtolower($code)] = substr($data, $i, $len);
115 $r = $ret[strtolower($code)];
116 $i+=$len;
117 }
118 return $ret;
119 }
120 /**
121 * Perform word segmentation
122 *
123 * @param string $text input text
124 * @return string segmented text
125 * @access public
126 */
127 function segment($text) {
128 $len = strlen($text);
129 $q = "SEG $len\n$text";
130 $result = $this->query($q);
131 if(!$result) {// fallback to character based segmentation
132 $result = ZhClientFake::segment($text);
133 }
134 return $result;
135 }
136
137 /**
138 * Close the connection
139 *
140 * @access public
141 */
142 function close() {
143 fclose($this->mFP);
144 }
145 }
146
147
148 class ZhClientFake {
149 function ZhClientFake() {
150 global $wgMemc, $wgDBname;
151 $this->mZh2TW = $wgMemc->get($key1 = "$wgDBname:zhConvert:tw");
152 $this->mZh2CN = $wgMemc->get($key2 = "$wgDBname:zhConvert:cn");
153 $this->mZh2SG = $wgMemc->get($key3 = "$wgDBname:zhConvert:sg");
154 $this->mZh2HK = $wgMemc->get($key4 = "$wgDBname:zhConvert:hk");
155 if(empty($this->mZh2TW) || empty($this->mZh2CN) || empty($this->mZh2SG) || empty($this->mZh2HK)) {
156 require("includes/ZhConversion.php");
157 $this->mZh2TW = $zh2TW;
158 $this->mZh2CN = $zh2CN;
159 $this->mZh2HK = $zh2HK;
160 $this->mZh2SG = $zh2SG;
161 $wgMemc->set($key1, $this->mZh2TW);
162 $wgMemc->set($key2, $this->mZh2CN);
163 $wgMemc->set($key3, $this->mZh2SG);
164 $wgMemc->set($key4, $this->mZh2HK);
165 }
166 }
167
168 function isconnected() {
169 return true;
170 }
171
172 /**
173 * Convert to zh-tw
174 *
175 * @access private
176 */
177 function zh2tw($text) {
178 return strtr($text, $this->mZh2TW);
179 }
180
181 /**
182 * Convert to zh-cn
183 *
184 * @access private
185 */
186 function zh2cn($text) {
187 return strtr($text, $this->mZh2CN);
188 }
189
190 /**
191 * Convert to zh-sg
192 *
193 * @access private
194 */
195 function zh2sg($text) {
196 return strtr(strtr($text, $this->mZh2CN), $this->mZh2SG);
197 }
198
199 /**
200 * Convert to zh-hk
201 *
202 * @access private
203 */
204 function zh2hk($text) {
205 return strtr(strtr($text, $this->mZh2TW), $this->mZh2HK);
206 }
207
208 /**
209 * Convert the input to a different language variant
210 *
211 * @param string $text input text
212 * @param string $tolang language variant
213 * @return string the converted text
214 * @access public
215 */
216 function convert($text, $tolang) {
217 $t = '';
218 switch($tolang) {
219 case 'zh-cn':
220 $t = $this->zh2cn($text);
221 break;
222 case 'zh-tw':
223 $t = $this->zh2tw($text);
224 break;
225 case 'zh-sg':
226 $t = $this->zh2sg($text);
227 break;
228 case 'zh-hk':
229 $t = $this->zh2hk($text);
230 break;
231 default:
232 $t = $text;
233 }
234 return $t;
235 }
236
237 function convertToAllVariants($text) {
238 $ret = array();
239 $ret['zh-cn'] = $this->zh2cn($text);
240 $ret['zh-tw'] = $this->zh2tw($text);
241 $ret['zh-sg'] = $this->zh2sg($text);
242 $ret['zh-hk'] = $this->zh2hk($text);
243 return $ret;
244 }
245
246 /**
247 * Perform "fake" word segmentation, i.e. treating each character as a word
248 *
249 * @param string $text input text
250 * @return string segmented text
251 * @access public
252 */
253 function segment($text) {
254 /* adapted from LanguageZh_cn::stripForSearch()
255 here we will first separate the single characters,
256 and let the caller conver it to hex
257 */
258 if( function_exists( 'mb_strtolower' ) ) {
259 return preg_replace(
260 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
261 "' ' .\"$1\"",
262 mb_strtolower( $text ) );
263 } else {
264 global $wikiLowerChars;
265 return preg_replace(
266 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
267 "' ' . strtr( \"\$1\", \$wikiLowerChars )",
268 $text );
269 }
270 }
271
272 /**
273 * Close the fake connection
274 *
275 * @access public
276 */
277 function close() { }
278 }
279
280 ?>