Localization update for he.
[lhc/web/wiklou.git] / languages / Language.php
index facd0ed..aa1a65c 100644 (file)
@@ -1198,8 +1198,9 @@ class Language {
         *       http://en.wikipedia.org/wiki/Minguo_calendar
         *       http://en.wikipedia.org/wiki/Japanese_era_name
         *
-        * @param $ts String: 14-character timestamp, calender name
-        * @return array converted year, month, day
+        * @param $ts String: 14-character timestamp
+        * @param $cName String: calender name
+        * @return Array: converted year, month, day
         */
        private static function tsToYear( $ts, $cName ) {
                $gy = substr( $ts, 0, 4 );
@@ -1366,8 +1367,7 @@ class Language {
                        if( $usePrefs ) {
                                $datePreference = $wgUser->getDatePreference();
                        } else {
-                               $options = User::getDefaultOptions();
-                               $datePreference = (string)$options['date'];
+                               $datePreference = (string)User::getDefaultOption( 'date' );
                        }
                } else {
                        $datePreference = (string)$usePrefs;
@@ -1686,16 +1686,27 @@ class Language {
        function hasWordBreaks() {
                return true;
        }
+       
+       /**
+        * Some languages such as Chinese require word segmentation,
+        * Specify such segmentation when overridden in derived class.
+        * 
+        * @param $string String
+        * @return String
+        */
+       function segmentByWord( $string ) {
+               return $string;
+       }
 
        /**
-        * Some languages have special punctuation to strip out.
+        * Some languages have special punctuation need to be normalized.
         * Make such changes here.
         *
         * @param $string String
         * @return String
         */
-       function stripForSearch( $string, $doStrip = true ) {
-               return $string;
+       function normalizeForSearch( $string ) {
+               return self::convertDoubleWidth($string);
        }
 
        /**
@@ -1703,12 +1714,21 @@ class Language {
         * range: ff00-ff5f ~= 0020-007f
         */
        protected static function convertDoubleWidth( $string ) {
-               $string = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $string );
-               $string = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $string );
+               static $full = null;
+               static $half = null;
+
+               if( $full === null ) {
+                       $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+                       $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+                       $full = str_split( $fullWidth, 3 );
+                       $half = str_split( $halfWidth );
+               }
+
+               $string = str_replace( $full, $half, $string );
                return $string;
        }
 
-       protected static function wordSegmentation( $string, $pattern ) {
+       protected static function insertSpace( $string, $pattern ) {
                $string = preg_replace( $pattern, " $1 ", $string );
                $string = preg_replace( '/ +/', ' ', $string );
                return $string;
@@ -2143,50 +2163,205 @@ class Language {
         */
        function truncate( $string, $length, $ellipsis = '...' ) {
                # Use the localized ellipsis character
-               if( $ellipsis == '...' ) {
+               if ( $ellipsis == '...' ) {
                        $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
                }
-
-               if( $length == 0 ) {
+               # Check if there is no need to truncate
+               if ( $length == 0 ) {
                        return $ellipsis;
-               }
-               if ( strlen( $string ) <= abs( $length ) ) {
+               } elseif ( strlen( $string ) <= abs( $length ) ) {
                        return $string;
                }
                $stringOriginal = $string;
-               if( $length > 0 ) {
-                       $string = substr( $string, 0, $length );
-                       $char = ord( $string[strlen( $string ) - 1] );
-                       $m = array();
-                       if ($char >= 0xc0) {
-                               # We got the first byte only of a multibyte char; remove it.
-                               $string = substr( $string, 0, -1 );
-                       } elseif( $char >= 0x80 &&
-                                 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
-                                             '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) {
-                               # We chopped in the middle of a character; remove it
-                               $string = $m[1];
+               if ( $length > 0 ) {
+                       $string = substr( $string, 0, $length ); // xyz...
+                       $string = $this->removeBadCharLast( $string );
+                       $string = $string . $ellipsis;
+               } else {
+                       $string = substr( $string, $length ); // ...xyz
+                       $string = $this->removeBadCharFirst( $string );
+                       $string = $ellipsis . $string;
+               }
+               # Do not truncate if the ellipsis makes the string longer/equal (bug 22181)
+               if ( strlen( $string ) < strlen( $stringOriginal ) ) {
+                       return $string;
+               } else {
+                       return $stringOriginal;
+               }
+       }
+
+       /**
+        * Remove bytes that represent an incomplete Unicode character
+        * at the end of string (e.g. bytes of the char are missing)
+        *
+        * @param $string String
+        * @return string
+        */
+       protected function removeBadCharLast( $string ) {
+               $char = ord( $string[strlen( $string ) - 1] );
+               $m = array();
+               if ( $char >= 0xc0 ) {
+                       # We got the first byte only of a multibyte char; remove it.
+                       $string = substr( $string, 0, -1 );
+               } elseif ( $char >= 0x80 &&
+                     preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
+                                 '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
+               {
+                       # We chopped in the middle of a character; remove it
+                       $string = $m[1];
+               }
+               return $string;
+       }
+
+       /**
+        * Remove bytes that represent an incomplete Unicode character
+        * at the start of string (e.g. bytes of the char are missing)
+        *
+        * @param $string String
+        * @return string
+        */
+       protected function removeBadCharFirst( $string ) {
+               $char = ord( $string[0] );
+               if ( $char >= 0x80 && $char < 0xc0 ) {
+                       # We chopped in the middle of a character; remove the whole thing
+                       $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
+               }
+               return $string;
+       }
+
+       /*
+        * Truncate a string of valid HTML to a specified length in bytes,
+        * appending an optional string (e.g. for ellipses), and return valid HTML
+        *
+        * This is only intended for styled/linked text, such as HTML with
+        * tags like <span> and <a>, were the tags are self-contained (valid HTML)
+        *
+        * Note: tries to fix broken HTML with MWTidy
+        *
+        * @param string $text String to truncate
+        * @param int $length (zero/positive) Maximum length (excluding ellipses)
+        * @param string $ellipsis String to append to the truncated text
+        * @returns string
+        */
+       function truncateHtml( $text, $length, $ellipsis = '...' ) {
+               # Use the localized ellipsis character
+               if ( $ellipsis == '...' ) {
+                       $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) );
+               }
+               # Check if there is no need to truncate
+               if ( $length <= 0 ) {
+                       return $ellipsis; // no text shown, nothing to format
+               } elseif ( strlen($text) <= $length ) {
+                       return $text; // string short enough even *with* HTML
+               }
+               $text = MWTidy::tidy( $text ); // fix tags
+               $displayLen = 0; // innerHTML legth so far
+               $testingEllipsis = false; // checking if ellipses will make string longer/equal?
+               $tagType = 0; // 0-open, 1-close
+               $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
+               $entityState = 0; // 0-not entity, 1-entity
+               $tag = $ret = $ch = '';
+               $openTags = array();
+               $textLen = strlen($text);
+               for( $pos = 0; $pos < $textLen; ++$pos ) {
+                       $ch = $text[$pos];
+                       $lastCh = $pos ? $text[$pos-1] : '';
+                       $ret .= $ch; // add to result string
+                       if ( $ch == '<' ) {
+                               $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
+                               $entityState = 0; // for bad HTML
+                               $bracketState = 1; // tag started (checking for backslash)
+                       } elseif ( $ch == '>' ) {
+                               $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
+                               $entityState = 0; // for bad HTML
+                               $bracketState = 0; // out of brackets
+                       } elseif ( $bracketState == 1 ) {
+                               if ( $ch == '/' ) {
+                                       $tagType = 1; // close tag (e.g. "</span>")
+                               } else {
+                                       $tagType = 0; // open tag (e.g. "<span>")
+                                       $tag .= $ch;
+                               }
+                               $bracketState = 2; // building tag name
+                       } elseif ( $bracketState == 2 ) {
+                               if ( $ch != ' ' ) {
+                                       $tag .= $ch;
+                               } else {
+                                       // Name found (e.g. "<a href=..."), add on tag attributes...
+                                       $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
+                               }
+                       } elseif ( $bracketState == 0 ) {
+                               if ( $entityState ) {
+                                       if ( $ch == ';' ) {
+                                               $entityState = 0;
+                                               $displayLen++; // entity is one displayed char
+                                       }
+                               } else {
+                                       if ( $ch == '&' ) {
+                                               $entityState = 1; // entity found, (e.g. "&nbsp;")
+                                       } else {
+                                               $displayLen++; // this char is displayed
+                                               // Add on the other display text after this...
+                                               $skipped = $this->truncate_skip(
+                                                       $ret, $text, "<>&", $pos + 1, $length - $displayLen );
+                                               $displayLen += $skipped;
+                                               $pos += $skipped;
+                                       }
+                               }
                        }
-                       # Do not truncate if the ellipsis actually make the string longer. Bug 22181
-                       if ( strlen( $string ) + strlen( $ellipsis ) < strlen( $stringOriginal ) ) {
-                               return $string . $ellipsis;
-                       } else {
-                               return $stringOriginal;
+                       # Consider truncation once the display length has reached the maximim.
+                       # Double-check that we're not in the middle of a bracket/entity...
+                       if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) {
+                               if ( !$testingEllipsis ) {
+                                       $testingEllipsis = true;
+                                       # Save where we are; we will truncate here unless
+                                       # the ellipsis actually makes the string longer.
+                                       $pOpenTags = $openTags; // save state
+                                       $pRet = $ret; // save state
+                               } elseif ( $displayLen > ($length + strlen($ellipsis)) ) {
+                                       # Ellipsis won't make string longer/equal, the truncation point was OK.
+                                       $openTags = $pOpenTags; // reload state
+                                       $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix
+                                       $ret .= $ellipsis; // add ellipsis
+                                       break;
+                               }
                        }
+               }
+               if ( $displayLen == 0 ) {
+                       return ''; // no text shown, nothing to format
+               }
+               $this->truncate_endBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML
+               while ( count( $openTags ) > 0 ) {
+                       $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
+               }
+               return $ret;
+       }
 
-               } else {
-                       $string = substr( $string, $length );
-                       $char = ord( $string[0] );
-                       if( $char >= 0x80 && $char < 0xc0 ) {
-                               # We chopped in the middle of a character; remove the whole thing
-                               $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
-                       }
-                       # Do not truncate if the ellipsis actually make the string longer. Bug 22181
-                       if ( strlen( $string ) + strlen( $ellipsis ) < strlen( $stringOriginal ) ) {
-                               return $ellipsis . $string;
-                       } else {
-                               return $stringOriginal;
+       // truncateHtml() helper function
+       // like strcspn() but adds the skipped chars to $ret
+       private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) {
+               $skipCount = 0;
+               if( $start < strlen($text) ) {
+                       $skipCount = strcspn( $text, $search, $start, $len );
+                       $ret .= substr( $text, $start, $skipCount );
+               }
+               return $skipCount;
+       }
+
+       // truncateHtml() helper function
+       // (a) push or pop $tag from $openTags as needed
+       // (b) clear $tag value
+       private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
+               $tag = ltrim( $tag );
+               if( $tag != '' ) {
+                       if( $tagType == 0 && $lastCh != '/' ) {
+                               $openTags[] = $tag; // tag opened (didn't close itself)
+                       } else if( $tagType == 1 ) {
+                               if( $openTags && $tag == $openTags[count($openTags)-1] ) {
+                                       array_pop( $openTags ); // tag closed
+                               }
                        }
+                       $tag = '';
                }
        }
 
@@ -2365,7 +2540,7 @@ class Language {
         *
         * @param $link String: the name of the link
         * @param $nt Mixed: the title object of the link
-        * @param boolean $ignoreOtherCond: to disable other conditions when
+        * @param $ignoreOtherCond Boolean: to disable other conditions when
         *      we need to transclude a template or update a category's link
         * @return null the input parameters may be modified upon return
         */
@@ -2544,19 +2719,19 @@ class Language {
 
        function formatTimePeriod( $seconds ) {
                if ( $seconds < 10 ) {
-                       return $this->formatNum( sprintf( "%.1f", $seconds ) ) . wfMsg( 'seconds-abbrev' );
+                       return $this->formatNum( sprintf( "%.1f", $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' );
                } elseif ( $seconds < 60 ) {
-                       return $this->formatNum( round( $seconds ) ) . wfMsg( 'seconds-abbrev' );
+                       return $this->formatNum( round( $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' );
                } elseif ( $seconds < 3600 ) {
-                       return $this->formatNum( floor( $seconds / 60 ) ) . wfMsg( 'minutes-abbrev' ) .
-                               $this->formatNum( round( fmod( $seconds, 60 ) ) ) . wfMsg( 'seconds-abbrev' );
+                       return $this->formatNum( floor( $seconds / 60 ) ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' .
+                               $this->formatNum( round( fmod( $seconds, 60 ) ) ) . ' ' . wfMsg( 'seconds-abbrev' );
                } else {
                        $hours = floor( $seconds / 3600 );
                        $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
                        $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
-                       return $this->formatNum( $hours ) . wfMsg( 'hours-abbrev' ) .
-                               $this->formatNum( $minutes ) . wfMsg( 'minutes-abbrev' ) .
-                               $this->formatNum( $secondsPart ) . wfMsg( 'seconds-abbrev' );
+                       return $this->formatNum( $hours ) . ' ' . wfMsg( 'hours-abbrev' ) . ' ' .
+                               $this->formatNum( $minutes ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' .
+                               $this->formatNum( $secondsPart ) . ' ' . wfMsg( 'seconds-abbrev' );
                }
        }