<?php
/**
- * Client for querying zhdaemon
+ * Client for querying zhdaemon.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
- * @package MediaWiki
- * @version $Id$
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
*/
+/**
+ * Client for querying zhdaemon
+ */
class ZhClient {
var $mHost, $mPort, $mFP, $mConnected;
/**
* Constructor
*
- * @access private
+ * @param $host
+ * @param $port
+ *
+ * @return ZhClient
*/
- function ZhClient($host, $port) {
+ function __construct( $host, $port ) {
$this->mHost = $host;
$this->mPort = $port;
$this->mConnected = $this->connect();
/**
* Check if connection to zhdaemon is successful
*
- * @access public
+ * @return bool
*/
function isconnected() {
return $this->mConnected;
* Establish conncetion
*
* @access private
+ *
+ * @return bool
*/
function connect() {
wfSuppressWarnings();
- $this->mFP = fsockopen($this->mHost, $this->mPort, $errno, $errstr, 30);
+ $errno = $errstr = '';
+ $this->mFP = fsockopen( $this->mHost, $this->mPort, $errno, $errstr, 30 );
wfRestoreWarnings();
- if(!$this->mFP) {
- return false;
- }
- return true;
+ return !$this->mFP;
}
/**
* Query the daemon and return the result
*
* @access private
+ *
+ * @return string
*/
- function query($request) {
- if(!$this->mConnected)
+ function query( $request ) {
+ if ( !$this->mConnected ) {
return false;
+ }
- fwrite($this->mFP, $request);
+ fwrite( $this->mFP, $request );
- $result=fgets($this->mFP, 1024);
+ $result = fgets( $this->mFP, 1024 );
- list($status, $len) = explode(" ", $result);
- if($status == 'ERROR') {
- //$len is actually the error code...
+ list( $status, $len ) = explode( ' ', $result );
+ if( $status == 'ERROR' ) {
+ // $len is actually the error code...
print "zhdaemon error $len<br />\n";
return false;
}
- $bytesread=0;
- $data='';
- while(!feof($this->mFP) && $bytesread<$len) {
- $str= fread($this->mFP, $len-$bytesread);
- $bytesread += strlen($str);
+ $bytesread = 0;
+ $data = '';
+ while( !feof( $this->mFP ) && $bytesread < $len ) {
+ $str = fread( $this->mFP, $len - $bytesread );
+ $bytesread += strlen( $str );
$data .= $str;
}
- //data should be of length $len. otherwise something is wrong
- if(strlen($data) != $len)
- return false;
- return $data;
+ // data should be of length $len. otherwise something is wrong
+ return strlen( $data ) == $len;
}
/**
* Convert the input to a different language variant
*
- * @param string $text input text
- * @param string $tolang language variant
+ * @param $text String: input text
+ * @param $tolang String: language variant
* @return string the converted text
- * @access public
*/
- function convert($text, $tolang) {
- $len = strlen($text);
+ function convert( $text, $tolang ) {
+ $len = strlen( $text );
$q = "CONV $tolang $len\n$text";
- $result = $this->query($q);
- if(!$result)
+ $result = $this->query( $q );
+ if ( !$result ) {
$result = $text;
+ }
return $result;
}
/**
- * Convert the input to all possible variants
+ * Convert the input to all possible variants
*
- * @param string $text input text
+ * @param $text String: input text
* @return array langcode => converted_string
- * @access public
- */
- function convertToAllVariants($text) {
- $len = strlen($text);
+ */
+ function convertToAllVariants( $text ) {
+ $len = strlen( $text );
$q = "CONV ALL $len\n$text";
- $result = $this->query($q);
- if(!$result)
+ $result = $this->query( $q );
+ if ( !$result ) {
return false;
- list($infoline, $data) = explode('|', $result);
- $info = explode(";", $infoline);
+ }
+ list( $infoline, $data ) = explode( '|', $result, 2 );
+ $info = explode( ';', $infoline );
$ret = array();
- $i=0;
- foreach($info as $code => $len) {
- $ret[strtolower($code)] = substr($data, $i, $len);
- $i+=$len+1;
+ $i = 0;
+ foreach( $info as $variant ) {
+ list( $code, $len ) = explode( ' ', $variant );
+ $ret[strtolower( $code )] = substr( $data, $i, $len );
+ $i += $len;
}
return $ret;
- }
+ }
+
/**
* Perform word segmentation
*
- * @param string $text input text
+ * @param $text String: input text
* @return string segmented text
- * @access public
*/
- function segment($text) {
- $len = strlen($text);
+ function segment( $text ) {
+ $len = strlen( $text );
$q = "SEG $len\n$text";
- $result = $this->query($q);
- if(!$result) {// fallback to character based segmentation
- $result = ZhClientFake::segment($text);
+ $result = $this->query( $q );
+ if ( !$result ) { // fallback to character based segmentation
+ $result = $this->segment( $text );
}
return $result;
}
/**
* Close the connection
- *
- * @access public
*/
function close() {
- fclose($this->mFP);
+ fclose( $this->mFP );
}
}
-
-
-class ZhClientFake {
- function ZhClientFake() {
- global $wgMemc, $wgDBname;
- $this->zh2TW = $wgMemc->get($key1 = "$wgDBname:zhConvert:tw");
- $this->zh2CN = $wgMemc->get($key2 = "$wgDBname:zhConvert:cn");
- $this->zh2SG = $wgMemc->get($key3 = "$wgDBname:zhConvert:sg");
- $this->zh2HK = $wgMemc->get($key4 = "$wgDBname:zhConvert:hk");
- if(empty($this->zh2TW) || empty($this->zh2CN) || empty($this->zh2SG) || empty($this->zh2HK)) {
- require("includes/ZhConversion.php");
- $this->zh2TW = $zh2TW;
- $this->zh2CN = $zh2CN;
- $this->zh2HK = $zh2HK;
- $this->zh2SG = $zh2SG;
- $wgMemc->set($key1, $this->zh2TW);
- $wgMemc->set($key2, $this->zh2CN);
- $wgMemc->set($key3, $this->zh2SG);
- $wgMemc->set($key4, $this->zh2HK);
- }
- }
-
- function isconnected() {
- return true;
- }
-
- /**
- * Convert to zh-tw
- *
- * @access private
- */
- function zh2tw($text) {
- return strtr($text, $this->zh2TW);
- }
-
- /**
- * Convert to zh-cn
- *
- * @access private
- */
- function zh2cn($text) {
- return strtr($text, $this->zh2CN);
- }
-
- /**
- * Convert to zh-sg
- *
- * @access private
- */
- function zh2sg($text) {
- return strtr(strtr($text, $this->zh2CN), $this->zh2SG);
- }
-
- /**
- * Convert to zh-hk
- *
- * @access private
- */
- function zh2hk($text) {
- return strtr(strtr($text, $this->zh2TW), $this->zh2HK);
- }
-
- /**
- * Convert the input to a different language variant
- *
- * @param string $text input text
- * @param string $tolang language variant
- * @return string the converted text
- * @access public
- */
- function convert($text, $tolang) {
- $t = '';
- switch($tolang) {
- case 'zh-cn':
- $t = $this->zh2cn($text);
- break;
- case 'zh-tw':
- $t = $this->zh2tw($text);
- break;
- case 'zh-sg':
- $t = $this->zh2sg($text);
- break;
- case 'zh-hk':
- $t = $this->zh2hk($text);
- break;
- default:
- $t = $text;
- }
- return $t;
- }
-
- function convertToAllVariants($text) {
- $ret = array();
- $ret['zh-cn'] = $this->zh2cn($text);
- $ret['zh-tw'] = $this->zh2tw($text);
- $ret['zh-sg'] = $this->zh2sg($text);
- $ret['zh-hk'] = $this->zh2hk($text);
- return $ret;
- }
-
- /**
- * Perform "fake" word segmentation, i.e. treating each character as a word
- *
- * @param string $text input text
- * @return string segmented text
- * @access public
- */
- function segment($text) {
- /* adapted from LanguageZh_cn.stripForSearch()
- here we will first separate the single characters,
- and let the caller conver it to hex
- */
- if( function_exists( 'mb_strtolower' ) ) {
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' ' .\"$1\"",
- mb_strtolower( $text ) );
- } else {
- global $wikiLowerChars;
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' ' . strtr( \"\$1\", \$wikiLowerChars )",
- $text );
- }
- }
-
- /**
- * Close the fake connection
- *
- * @access public
- */
- function close() { }
-}
-
-?>
\ No newline at end of file