X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguage.php;h=aa1a65c34abe2cf75d6c24636ddb70a0add5b1cf;hb=0372e9b3f679aa761e3741f1bc4e9270e821e55b;hp=facd0edee6dc3f1aad34b3e077dc7647b4de84e1;hpb=ae8e8bcdd0c00ff6d862eaf6e3473c055f4ae54a;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/Language.php b/languages/Language.php index facd0edee6..aa1a65c34a 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -1198,8 +1198,9 @@ class Language { * http://en.wikipedia.org/wiki/Minguo_calendar * http://en.wikipedia.org/wiki/Japanese_era_name * - * @param $ts String: 14-character timestamp, calender name - * @return array converted year, month, day + * @param $ts String: 14-character timestamp + * @param $cName String: calender name + * @return Array: converted year, month, day */ private static function tsToYear( $ts, $cName ) { $gy = substr( $ts, 0, 4 ); @@ -1366,8 +1367,7 @@ class Language { if( $usePrefs ) { $datePreference = $wgUser->getDatePreference(); } else { - $options = User::getDefaultOptions(); - $datePreference = (string)$options['date']; + $datePreference = (string)User::getDefaultOption( 'date' ); } } else { $datePreference = (string)$usePrefs; @@ -1686,16 +1686,27 @@ class Language { function hasWordBreaks() { return true; } + + /** + * Some languages such as Chinese require word segmentation, + * Specify such segmentation when overridden in derived class. + * + * @param $string String + * @return String + */ + function segmentByWord( $string ) { + return $string; + } /** - * Some languages have special punctuation to strip out. + * Some languages have special punctuation need to be normalized. * Make such changes here. * * @param $string String * @return String */ - function stripForSearch( $string, $doStrip = true ) { - return $string; + function normalizeForSearch( $string ) { + return self::convertDoubleWidth($string); } /** @@ -1703,12 +1714,21 @@ class Language { * range: ff00-ff5f ~= 0020-007f */ protected static function convertDoubleWidth( $string ) { - $string = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $string ); - $string = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $string ); + static $full = null; + static $half = null; + + if( $full === null ) { + $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + $full = str_split( $fullWidth, 3 ); + $half = str_split( $halfWidth ); + } + + $string = str_replace( $full, $half, $string ); return $string; } - protected static function wordSegmentation( $string, $pattern ) { + protected static function insertSpace( $string, $pattern ) { $string = preg_replace( $pattern, " $1 ", $string ); $string = preg_replace( '/ +/', ' ', $string ); return $string; @@ -2143,50 +2163,205 @@ class Language { */ function truncate( $string, $length, $ellipsis = '...' ) { # Use the localized ellipsis character - if( $ellipsis == '...' ) { + if ( $ellipsis == '...' ) { $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) ); } - - if( $length == 0 ) { + # Check if there is no need to truncate + if ( $length == 0 ) { return $ellipsis; - } - if ( strlen( $string ) <= abs( $length ) ) { + } elseif ( strlen( $string ) <= abs( $length ) ) { return $string; } $stringOriginal = $string; - if( $length > 0 ) { - $string = substr( $string, 0, $length ); - $char = ord( $string[strlen( $string ) - 1] ); - $m = array(); - if ($char >= 0xc0) { - # We got the first byte only of a multibyte char; remove it. - $string = substr( $string, 0, -1 ); - } elseif( $char >= 0x80 && - preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) { - # We chopped in the middle of a character; remove it - $string = $m[1]; + if ( $length > 0 ) { + $string = substr( $string, 0, $length ); // xyz... + $string = $this->removeBadCharLast( $string ); + $string = $string . $ellipsis; + } else { + $string = substr( $string, $length ); // ...xyz + $string = $this->removeBadCharFirst( $string ); + $string = $ellipsis . $string; + } + # Do not truncate if the ellipsis makes the string longer/equal (bug 22181) + if ( strlen( $string ) < strlen( $stringOriginal ) ) { + return $string; + } else { + return $stringOriginal; + } + } + + /** + * Remove bytes that represent an incomplete Unicode character + * at the end of string (e.g. bytes of the char are missing) + * + * @param $string String + * @return string + */ + protected function removeBadCharLast( $string ) { + $char = ord( $string[strlen( $string ) - 1] ); + $m = array(); + if ( $char >= 0xc0 ) { + # We got the first byte only of a multibyte char; remove it. + $string = substr( $string, 0, -1 ); + } elseif ( $char >= 0x80 && + preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . + '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) + { + # We chopped in the middle of a character; remove it + $string = $m[1]; + } + return $string; + } + + /** + * Remove bytes that represent an incomplete Unicode character + * at the start of string (e.g. bytes of the char are missing) + * + * @param $string String + * @return string + */ + protected function removeBadCharFirst( $string ) { + $char = ord( $string[0] ); + if ( $char >= 0x80 && $char < 0xc0 ) { + # We chopped in the middle of a character; remove the whole thing + $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); + } + return $string; + } + + /* + * Truncate a string of valid HTML to a specified length in bytes, + * appending an optional string (e.g. for ellipses), and return valid HTML + * + * This is only intended for styled/linked text, such as HTML with + * tags like and , were the tags are self-contained (valid HTML) + * + * Note: tries to fix broken HTML with MWTidy + * + * @param string $text String to truncate + * @param int $length (zero/positive) Maximum length (excluding ellipses) + * @param string $ellipsis String to append to the truncated text + * @returns string + */ + function truncateHtml( $text, $length, $ellipsis = '...' ) { + # Use the localized ellipsis character + if ( $ellipsis == '...' ) { + $ellipsis = wfMsgExt( 'ellipsis', array( 'escapenoentities', 'language' => $this ) ); + } + # Check if there is no need to truncate + if ( $length <= 0 ) { + return $ellipsis; // no text shown, nothing to format + } elseif ( strlen($text) <= $length ) { + return $text; // string short enough even *with* HTML + } + $text = MWTidy::tidy( $text ); // fix tags + $displayLen = 0; // innerHTML legth so far + $testingEllipsis = false; // checking if ellipses will make string longer/equal? + $tagType = 0; // 0-open, 1-close + $bracketState = 0; // 1-tag start, 2-tag name, 0-neither + $entityState = 0; // 0-not entity, 1-entity + $tag = $ret = $ch = ''; + $openTags = array(); + $textLen = strlen($text); + for( $pos = 0; $pos < $textLen; ++$pos ) { + $ch = $text[$pos]; + $lastCh = $pos ? $text[$pos-1] : ''; + $ret .= $ch; // add to result string + if ( $ch == '<' ) { + $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML + $entityState = 0; // for bad HTML + $bracketState = 1; // tag started (checking for backslash) + } elseif ( $ch == '>' ) { + $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); + $entityState = 0; // for bad HTML + $bracketState = 0; // out of brackets + } elseif ( $bracketState == 1 ) { + if ( $ch == '/' ) { + $tagType = 1; // close tag (e.g. "") + } else { + $tagType = 0; // open tag (e.g. "") + $tag .= $ch; + } + $bracketState = 2; // building tag name + } elseif ( $bracketState == 2 ) { + if ( $ch != ' ' ) { + $tag .= $ch; + } else { + // Name found (e.g. "truncate_skip( $ret, $text, "<>", $pos + 1 ); + } + } elseif ( $bracketState == 0 ) { + if ( $entityState ) { + if ( $ch == ';' ) { + $entityState = 0; + $displayLen++; // entity is one displayed char + } + } else { + if ( $ch == '&' ) { + $entityState = 1; // entity found, (e.g. " ") + } else { + $displayLen++; // this char is displayed + // Add on the other display text after this... + $skipped = $this->truncate_skip( + $ret, $text, "<>&", $pos + 1, $length - $displayLen ); + $displayLen += $skipped; + $pos += $skipped; + } + } } - # Do not truncate if the ellipsis actually make the string longer. Bug 22181 - if ( strlen( $string ) + strlen( $ellipsis ) < strlen( $stringOriginal ) ) { - return $string . $ellipsis; - } else { - return $stringOriginal; + # Consider truncation once the display length has reached the maximim. + # Double-check that we're not in the middle of a bracket/entity... + if ( $displayLen >= $length && $bracketState == 0 && $entityState == 0 ) { + if ( !$testingEllipsis ) { + $testingEllipsis = true; + # Save where we are; we will truncate here unless + # the ellipsis actually makes the string longer. + $pOpenTags = $openTags; // save state + $pRet = $ret; // save state + } elseif ( $displayLen > ($length + strlen($ellipsis)) ) { + # Ellipsis won't make string longer/equal, the truncation point was OK. + $openTags = $pOpenTags; // reload state + $ret = $this->removeBadCharLast( $pRet ); // reload state, multi-byte char fix + $ret .= $ellipsis; // add ellipsis + break; + } } + } + if ( $displayLen == 0 ) { + return ''; // no text shown, nothing to format + } + $this->truncate_endBracket( $tag, $text[$textLen-1], $tagType, $openTags ); // for bad HTML + while ( count( $openTags ) > 0 ) { + $ret .= ''; // close open tags + } + return $ret; + } - } else { - $string = substr( $string, $length ); - $char = ord( $string[0] ); - if( $char >= 0x80 && $char < 0xc0 ) { - # We chopped in the middle of a character; remove the whole thing - $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); - } - # Do not truncate if the ellipsis actually make the string longer. Bug 22181 - if ( strlen( $string ) + strlen( $ellipsis ) < strlen( $stringOriginal ) ) { - return $ellipsis . $string; - } else { - return $stringOriginal; + // truncateHtml() helper function + // like strcspn() but adds the skipped chars to $ret + private function truncate_skip( &$ret, $text, $search, $start, $len = -1 ) { + $skipCount = 0; + if( $start < strlen($text) ) { + $skipCount = strcspn( $text, $search, $start, $len ); + $ret .= substr( $text, $start, $skipCount ); + } + return $skipCount; + } + + // truncateHtml() helper function + // (a) push or pop $tag from $openTags as needed + // (b) clear $tag value + private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) { + $tag = ltrim( $tag ); + if( $tag != '' ) { + if( $tagType == 0 && $lastCh != '/' ) { + $openTags[] = $tag; // tag opened (didn't close itself) + } else if( $tagType == 1 ) { + if( $openTags && $tag == $openTags[count($openTags)-1] ) { + array_pop( $openTags ); // tag closed + } } + $tag = ''; } } @@ -2365,7 +2540,7 @@ class Language { * * @param $link String: the name of the link * @param $nt Mixed: the title object of the link - * @param boolean $ignoreOtherCond: to disable other conditions when + * @param $ignoreOtherCond Boolean: to disable other conditions when * we need to transclude a template or update a category's link * @return null the input parameters may be modified upon return */ @@ -2544,19 +2719,19 @@ class Language { function formatTimePeriod( $seconds ) { if ( $seconds < 10 ) { - return $this->formatNum( sprintf( "%.1f", $seconds ) ) . wfMsg( 'seconds-abbrev' ); + return $this->formatNum( sprintf( "%.1f", $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' ); } elseif ( $seconds < 60 ) { - return $this->formatNum( round( $seconds ) ) . wfMsg( 'seconds-abbrev' ); + return $this->formatNum( round( $seconds ) ) . ' ' . wfMsg( 'seconds-abbrev' ); } elseif ( $seconds < 3600 ) { - return $this->formatNum( floor( $seconds / 60 ) ) . wfMsg( 'minutes-abbrev' ) . - $this->formatNum( round( fmod( $seconds, 60 ) ) ) . wfMsg( 'seconds-abbrev' ); + return $this->formatNum( floor( $seconds / 60 ) ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' . + $this->formatNum( round( fmod( $seconds, 60 ) ) ) . ' ' . wfMsg( 'seconds-abbrev' ); } else { $hours = floor( $seconds / 3600 ); $minutes = floor( ( $seconds - $hours * 3600 ) / 60 ); $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 ); - return $this->formatNum( $hours ) . wfMsg( 'hours-abbrev' ) . - $this->formatNum( $minutes ) . wfMsg( 'minutes-abbrev' ) . - $this->formatNum( $secondsPart ) . wfMsg( 'seconds-abbrev' ); + return $this->formatNum( $hours ) . ' ' . wfMsg( 'hours-abbrev' ) . ' ' . + $this->formatNum( $minutes ) . ' ' . wfMsg( 'minutes-abbrev' ) . ' ' . + $this->formatNum( $secondsPart ) . ' ' . wfMsg( 'seconds-abbrev' ); } }