X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageUtf8.php;h=d738624b77b490de3c32adbd78356cdf6cf9d34f;hb=502d86767181553745afd7103bc8e8573da9a138;hp=8b86bd08fee082a9f086bd6e2c61cc9e9908a99d;hpb=85558b19f982834ce55616416bdd131d5331590f;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/LanguageUtf8.php b/languages/LanguageUtf8.php index 8b86bd08fe..d738624b77 100644 --- a/languages/LanguageUtf8.php +++ b/languages/LanguageUtf8.php @@ -1,19 +1,29 @@ get( $key1 = "$wgDBname:utf8:upper" ); $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" ); - + if(empty( $wikiUpperChars) || empty($wikiLowerChars )) { require_once( "includes/Utf8Case.php" ); $wgMemc->set( $key1, $wikiUpperChars ); @@ -21,36 +31,71 @@ if (function_exists('mb_internal_encoding')) { } } -# Base stuff useful to all UTF-8 based language files +/** + * Base stuff useful to all UTF-8 based language files + * @package MediaWiki + */ class LanguageUtf8 extends Language { - # These two functions use mbstring library, if it is loaded - # or compiled and character mapping arrays otherwise. + # These functions use mbstring library, if it is loaded + # or compiled and character mapping arrays otherwise. # In case of language-specific character mismatch # it should be dealt with in Language classes. - function ucfirst( $string ) { - if (function_exists('mb_strtoupper')) { - return mb_strtoupper(mb_substr($string,0,1)).mb_substr($string,1); - } else { - global $wikiUpperChars; - return preg_replace ( - "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - "strtr ( \"\$1\" , \$wikiUpperChars )", - $string ); - } + function ucfirst( $str ) { + return LanguageUtf8::uc( $str, true ); } - - function lcfirst( $string ) { - if (function_exists('mb_strtolower')) { - return mb_strtolower(mb_substr($string,0,1)).mb_substr($string,1); - } else { - global $wikiLowerChars; - return preg_replace ( - "/^([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - "strtr ( \"\$1\" , \$wikiLowerChars )", - $string ); - } + + function uc( $str, $first = false ) { + if ( function_exists( 'mb_strtoupper' ) ) + if ( $first ) + if ( LanguageUtf8::isMultibyte( $str ) ) + return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ); + else + return ucfirst( $str ); + else + return LanguageUtf8::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str ); + else + if ( LanguageUtf8::isMultibyte( $str ) ) { + global $wikiUpperChars; + $x = $first ? '^' : ''; + return preg_replace( + "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", + "strtr( \"\$1\" , \$wikiUpperChars )", + $str + ); + } else + return $first ? ucfirst( $str ) : strtoupper( $str ); + } + + function lcfirst( $str ) { + return LanguageUtf8::lc( $str, true ); + } + + function lc( $str, $first = false ) { + if ( function_exists( 'mb_strtolower' ) ) + if ( $first ) + if ( LanguageUtf8::isMultibyte( $str ) ) + return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ); + else + return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ); + else + return LanguageUtf8::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str ); + else + if ( LanguageUtf8::isMultibyte( $str ) ) { + global $wikiLowerChars; + $x = $first ? '^' : ''; + return preg_replace( + "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", + "strtr( \"\$1\" , \$wikiLowerChars )", + $str + ); + } else + return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str ); + } + + function isMultibyte( $str ) { + return (bool)preg_match( '/^[\x80-\xff]/', $str ); } function stripForSearch( $string ) { @@ -61,18 +106,22 @@ class LanguageUtf8 extends Language { # all strtolower on stripped output or argument # should be removed and all stripForSearch # methods adjusted to that. + + wfProfileIn( "LanguageUtf8::stripForSearch" ); if( function_exists( 'mb_strtolower' ) ) { - return preg_replace( + $out = preg_replace( "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", "'U8' . bin2hex( \"$1\" )", mb_strtolower( $string ) ); } else { global $wikiLowerChars; - return preg_replace( + $out = preg_replace( "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", $string ); } + wfProfileOut( "LanguageUtf8::stripForSearch" ); + return $out; } function fallback8bitEncoding() { @@ -85,10 +134,13 @@ class LanguageUtf8 extends Language { function checkTitleEncoding( $s ) { global $wgInputEncoding; + if( is_array( $s ) ) { + wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' ); + } # Check for non-UTF-8 URLs $ishigh = preg_match( '/[\x80-\xff]/', $s); if(!$ishigh) return $s; - + $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); if( $isutf8 ) return $s; @@ -99,7 +151,7 @@ class LanguageUtf8 extends Language { function firstChar( $s ) { preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches); - + return isset( $matches[1] ) ? $matches[1] : ""; }