testPngNativetZtxt requires zlib extension
[lhc/web/wiklou.git] / includes / ZhClient.php
index 8a6d8e2..4299841 100644 (file)
@@ -1,20 +1,40 @@
 <?php
 /**
- * Client for querying zhdaemon
+ * Client for querying zhdaemon.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
  *
- * @package MediaWiki
- * @version $Id$
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
  */
 
+/**
+ * Client for querying zhdaemon
+ */
 class ZhClient {
        var $mHost, $mPort, $mFP, $mConnected;
 
        /**
         * Constructor
         *
-        * @access private
+        * @param $host
+        * @param $port
+        *
+        * @return ZhClient
         */
-       function ZhClient($host, $port) {
+       function __construct( $host, $port ) {
                $this->mHost = $host;
                $this->mPort = $port;
                $this->mConnected = $this->connect();
@@ -23,7 +43,7 @@ class ZhClient {
        /**
         * Check if connection to zhdaemon is successful
         *
-        * @access public
+        * @return bool
         */
        function isconnected() {
                return $this->mConnected;
@@ -33,207 +53,112 @@ class ZhClient {
         * Establish conncetion
         *
         * @access private
+        *
+        * @return bool
         */
        function connect() {
-               $this->mFP = fsockopen($this->mHost, $this->mPort, $errno, $errstr, 30);
-               if(!$this->mFP) {
-                       return false;
-               }
-               return true;
+               wfSuppressWarnings();
+               $errno = $errstr = '';
+               $this->mFP = fsockopen( $this->mHost, $this->mPort, $errno, $errstr, 30 );
+               wfRestoreWarnings();
+               return !$this->mFP;
        }
 
        /**
         * Query the daemon and return the result
         *
         * @access private
+        *
+        * @return string
         */
-       function query($request) {
-               if(!$this->mConnected)
+       function query( $request ) {
+               if ( !$this->mConnected ) {
                        return false;
+               }
 
-               fwrite($this->mFP, $request);
+               fwrite( $this->mFP, $request );
 
-               $result=fgets($this->mFP, 1024);
+               $result = fgets( $this->mFP, 1024 );
 
-               list($status, $len) = explode(" ", $result);
-               if($status == 'ERROR') {
-                       //$len is actually the error code...
+               list( $status, $len ) = explode( ' ', $result );
+               if( $status == 'ERROR' ) {
+                       // $len is actually the error code...
                        print "zhdaemon error $len<br />\n";
                        return false;
                }
-               $bytesread=0;
-               $data='';
-               while(!feof($this->mFP) && $bytesread<$len) {
-                       $str= fread($this->mFP, $len-$bytesread);
-                       $bytesread += strlen($str);
+               $bytesread = 0;
+               $data = '';
+               while( !feof( $this->mFP ) && $bytesread < $len ) {
+                       $str = fread( $this->mFP, $len - $bytesread );
+                       $bytesread += strlen( $str );
                        $data .= $str;
                }
-               return $data;
+               // data should be of length $len. otherwise something is wrong
+               return strlen( $data ) == $len;
        }
 
        /**
         * Convert the input to a different language variant
         *
-        * @param string $text input text
-        * @param string $tolang language variant
+        * @param $text String: input text
+        * @param $tolang String: language variant
         * @return string the converted text
-        * @access public
         */
-       function convert($text, $tolang) {
-               $len = strlen($text);
+       function convert( $text, $tolang ) {
+               $len = strlen( $text );
                $q = "CONV $tolang $len\n$text";
-               $result = $this->query($q);
-               if(!$result)
-                       $result = $text;
-               return $result;
-       }
-
-       /**
-        * Perform word segmentation
-        *
-        * @param string $text input text
-        * @return string segmented text
-        * @access public
-        */
-       function segment($text) {
-               $len = strlen($text);
-               $q = "SEG $len\n$text";
-               $result = $this->query($q);
-               if(!$result)
+               $result = $this->query( $q );
+               if ( !$result ) {
                        $result = $text;
+               }
                return $result;
        }
 
        /**
-        * Close the connection
+        * Convert the input to all possible variants
         *
-        * @access public
+        * @param $text String: input text
+        * @return array langcode => converted_string
         */
-       function close() {
-               fclose($this->mFP);
-       }
-}
-
-
-class ZhClientFake {
-
-       function ZhClientFake() {
-               global $wgMemc, $wgDBname;
-               $this->zh2TW = $wgMemc->get($key1 = "$wgDBname:zhConvert:tw");
-               $this->zh2CN = $wgMemc->get($key2 = "$wgDBname:zhConvert:cn");
-               $this->zh2SG = $wgMemc->get($key3 = "$wgDBname:zhConvert:sg");
-               $this->zh2HK = $wgMemc->get($key4 = "$wgDBname:zhConvert:hk");
-               if(empty($this->zh2TW) || empty($this->zh2CN) || empty($this->zh2SG) || empty($this->zh2HK)) {
-                       require_once("includes/ZhConversion.php");
-                       $this->zh2TW = $zh2TW;
-                       $this->zh2CN = $zh2CN;
-                       $this->zh2HK = $zh2HK;
-                       $this->zh2SG = $zh2SG;
-                       $wgMemc->set($key1, $this->zh2TW);
-                       $wgMemc->set($key2, $this->zh2CN);
-                       $wgMemc->set($key3, $this->zh2SG);
-                       $wgMemc->set($key4, $this->zh2HK);
+       function convertToAllVariants( $text ) {
+               $len = strlen( $text );
+               $q = "CONV ALL $len\n$text";
+               $result = $this->query( $q );
+               if ( !$result ) {
+                       return false;
                }
-       }
-
-       function isconnected() {
-               return true;
-       }
-
-       /**
-        * Convert to zh-tw
-        *
-        * @access private
-        */
-       function zh2tw($text) {
-               return strtr($text, $this->zh2TW);
-       }
-
-       /**
-        * Convert to zh-cn
-        *
-        * @access private
-        */
-       function zh2cn($text) {
-               return strtr($text, $this->zh2CN);
-       }
-
-       /**
-        * Convert to zh-sg
-        *
-        * @access private
-        */
-       function zh2sg($text) {
-               return strtr(strtr($text, $this->zh2CN), $this->zh2SG);
-       }
-
-       /**
-        * Convert to zh-hk
-        *
-        * @access private
-        */
-       function zh2hk($text) {
-               return strtr(strtr($text, $this->zh2TW), $this->zh2HK);
-       }
-
-       /**
-        * Convert the input to a different language variant
-        *
-        * @param string $text input text
-        * @param string $tolang language variant
-        * @return string the converted text
-        * @access public
-        */
-       function convert($text, $tolang) {
-               $t = '';
-               switch($tolang) {
-        case 'zh-cn':
-                       $t = $this->zh2cn($text);
-                       break;
-               case 'zh-tw':
-                       $t = $this->zh2tw($text);
-                       break;
-               case 'zh-sg':
-                       $t = $this->zh2sg($text);
-                       break;
-               case 'zh-hk':
-                       $t = $this->zh2hk($text);
-                       break;
-               default:
-                       $t = $text;
+               list( $infoline, $data ) = explode( '|', $result, 2 );
+               $info = explode( ';', $infoline );
+               $ret = array();
+               $i = 0;
+               foreach( $info as $variant ) {
+                       list( $code, $len ) = explode( ' ', $variant );
+                       $ret[strtolower( $code )] = substr( $data, $i, $len );
+                       $i += $len;
                }
-               return $t;
+               return $ret;
        }
 
        /**
-        * Perform "fake" word segmentation, i.e. treating each character as a word
+        * Perform word segmentation
         *
-        * @param string $text input text
+        * @param $text String: input text
         * @return string segmented text
-        * @access public
         */
-       function segment($text) {
-               /* copied from LanguageZh_cn.stripForSearch() */
-               if( function_exists( 'mb_strtolower' ) ) {
-                       return preg_replace(
-                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                               "' U8' . bin2hex( \"$1\" )",
-                               mb_strtolower( $string ) );
-               } else {
-                       global $wikiLowerChars;
-                       return preg_replace(
-                               "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
-                               "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
-                               $string );
+       function segment( $text ) {
+               $len = strlen( $text );
+               $q = "SEG $len\n$text";
+               $result = $this->query( $q );
+               if ( !$result ) { // fallback to character based segmentation
+                       $result = $this->segment( $text );
                }
+               return $result;
        }
 
        /**
-        * Close the fake connection
-        *
-        * @access public
+        * Close the connection
         */
-       function close() {      }
+       function close() {
+               fclose( $this->mFP );
+       }
 }
-
-?>
\ No newline at end of file