Revert accidental commit
[lhc/web/wiklou.git] / languages / Language.php
index 4cd4ffa..3497877 100644 (file)
@@ -59,7 +59,6 @@ class Language {
 
        var $mNamespaceIds, $namespaceNames, $namespaceAliases;
        var $dateFormatStrings = array();
-       var $minSearchLength;
        var $mExtendedSpecialPageAliases;
 
        /**
@@ -1687,85 +1686,27 @@ class Language {
        function hasWordBreaks() {
                return true;
        }
-
+       
        /**
-        * Some languages have special punctuation to strip out
-        * or characters which need to be converted for MySQL's
-        * indexing to grok it correctly. Make such changes here.
-        *
+        * Some languages such as Chinese require word segmentation,
+        * Specify such segmentation when overridden in derived class.
+        * 
         * @param $string String
         * @return String
         */
-       function stripForSearch( $string, $doStrip = true ) {
-               global $wgDBtype;
-               if ( $wgDBtype != 'mysql' || $doStrip == false ) {
-                       return $string;
-               }
-
-               wfProfileIn( __METHOD__ );
-
-               // MySQL fulltext index doesn't grok utf-8, so we
-               // need to fold cases and convert to hex
-               $out = preg_replace_callback(
-                       "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
-                       array( $this, 'stripForSearchCallback' ),
-                       $this->lc( $string ) );
-
-               // And to add insult to injury, the default indexing
-               // ignores short words... Pad them so we can pass them
-               // through without reconfiguring the server...
-               $minLength = $this->minSearchLength();
-               if( $minLength > 1 ) {
-                       $n = $minLength-1;
-                       $out = preg_replace(
-                               "/\b(\w{1,$n})\b/",
-                               "$1u800",
-                               $out );
-               }
-
-               // Periods within things like hostnames and IP addresses
-               // are also important -- we want a search for "example.com"
-               // or "192.168.1.1" to work sanely.
-               //
-               // MySQL's search seems to ignore them, so you'd match on
-               // "example.wikipedia.com" and "192.168.83.1" as well.
-               $out = preg_replace(
-                       "/(\w)\.(\w|\*)/u",
-                       "$1u82e$2",
-                       $out );
-
-               wfProfileOut( __METHOD__ );
-               return $out;
-       }
-
-       /**
-        * Armor a case-folded UTF-8 string to get through MySQL's
-        * fulltext search without being mucked up by funny charset
-        * settings or anything else of the sort.
-        */
-       protected function stripForSearchCallback( $matches ) {
-               return 'u8' . bin2hex( $matches[1] );
+       function wordSegmentation( $string ) {
+               return $string;
        }
 
        /**
-        * Check MySQL server's ft_min_word_len setting so we know
-        * if we need to pad short words...
+        * Some languages have special punctuation need to be normalized.
+        * Make such changes here.
+        *
+        * @param $string String
+        * @return String
         */
-       protected function minSearchLength() {
-               if( is_null( $this->minSearchLength ) ) {
-                       $sql = "show global variables like 'ft\\_min\\_word\\_len'";
-                       $dbr = wfGetDB( DB_SLAVE );
-                       $result = $dbr->query( $sql );
-                       $row = $result->fetchObject();
-                       $result->free();
-
-                       if( $row && $row->Variable_name == 'ft_min_word_len' ) {
-                               $this->minSearchLength = intval( $row->Value );
-                       } else {
-                               $this->minSearchLength = 0;
-                       }
-               }
-               return $this->minSearchLength;
+       function normalizeForSearch( $string ) {
+               return $string;
        }
 
        /**
@@ -1778,7 +1719,7 @@ class Language {
                return $string;
        }
 
-       protected static function wordSegmentation( $string, $pattern ) {
+       protected static function insertSpace( $string, $pattern ) {
                $string = preg_replace( $pattern, " $1 ", $string );
                $string = preg_replace( '/ +/', ' ', $string );
                return $string;
@@ -2223,6 +2164,7 @@ class Language {
                if ( strlen( $string ) <= abs( $length ) ) {
                        return $string;
                }
+               $stringOriginal = $string;
                if( $length > 0 ) {
                        $string = substr( $string, 0, $length );
                        $char = ord( $string[strlen( $string ) - 1] );
@@ -2236,7 +2178,8 @@ class Language {
                                # We chopped in the middle of a character; remove it
                                $string = $m[1];
                        }
-                       return $string . $ellipsis;
+                       $string = $string . $ellipsis;
+
                } else {
                        $string = substr( $string, $length );
                        $char = ord( $string[0] );
@@ -2244,7 +2187,13 @@ class Language {
                                # We chopped in the middle of a character; remove the whole thing
                                $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
                        }
-                       return $ellipsis . $string;
+                       $string = $ellipsis . $string;
+               }
+               # Do not truncate if the ellipsis actually make the string longer. Bug 22181
+               if ( strlen( $string ) < strlen( $stringOriginal ) ) {
+                       return $string;
+               } else {
+                       return $stringOriginal;
                }
        }