X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2FLanguageUtf8.php;h=d738624b77b490de3c32adbd78356cdf6cf9d34f;hb=502d86767181553745afd7103bc8e8573da9a138;hp=8b86bd08fee082a9f086bd6e2c61cc9e9908a99d;hpb=85558b19f982834ce55616416bdd131d5331590f;p=lhc%2Fweb%2Fwiklou.git

diff --git a/languages/LanguageUtf8.php b/languages/LanguageUtf8.php
index 8b86bd08fe..d738624b77 100644
--- a/languages/LanguageUtf8.php
+++ b/languages/LanguageUtf8.php
@@ -1,19 +1,29 @@
 <?php
-#$Id$
+/**
+  * @package MediaWiki
+  * @subpackage Language
+  */
+
 if( defined( "MEDIAWIKI" ) ) {
 
+# This file and LanguageLatin1.php may be included from within functions, so
+# we need to have global statements
+
+global $wgInputEncoding, $wgOutputEncoding, $wikiUpperChars, $wikiLowerChars;
+global $wgDBname, $wgMemc;
+
 $wgInputEncoding    = "UTF-8";
 $wgOutputEncoding	= "UTF-8";
 
-if (function_exists('mb_internal_encoding')) {
+if( function_exists( 'mb_strtoupper' ) ) {
 	mb_internal_encoding('UTF-8');
 } else {
 	# Hack our own case conversion routines
-	
+
 	# Loading serialized arrays is faster than parsing code :P
 	$wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
 	$wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
-	
+
 	if(empty( $wikiUpperChars) || empty($wikiLowerChars )) {
 		require_once( "includes/Utf8Case.php" );
 		$wgMemc->set( $key1, $wikiUpperChars );
@@ -21,36 +31,71 @@ if (function_exists('mb_internal_encoding')) {
 	}
 }
 
-# Base stuff useful to all UTF-8 based language files
+/**
+ * Base stuff useful to all UTF-8 based language files
+ * @package MediaWiki
+ */
 class LanguageUtf8 extends Language {
 
-	# These two functions use mbstring library, if it is loaded
-	# or compiled and character mapping arrays otherwise. 
+	# These functions use mbstring library, if it is loaded
+	# or compiled and character mapping arrays otherwise.
 	# In case of language-specific character mismatch
 	# it should be dealt with in Language classes.
 
-	function ucfirst( $string ) {
-		if (function_exists('mb_strtoupper')) {
-			return mb_strtoupper(mb_substr($string,0,1)).mb_substr($string,1);
-		} else {
-		    global $wikiUpperChars;
-		    return preg_replace (
-        	    "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-        	    "strtr ( \"\$1\" , \$wikiUpperChars )",
-        	    $string );
-		}
+	function ucfirst( $str ) {
+		return LanguageUtf8::uc( $str, true );
 	}
-	
-	function lcfirst( $string ) {
-		if (function_exists('mb_strtolower')) {
-			return mb_strtolower(mb_substr($string,0,1)).mb_substr($string,1);
-		} else {
-		    global $wikiLowerChars;
-		    return preg_replace (
-        	    "/^([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
-        	    "strtr ( \"\$1\" , \$wikiLowerChars )",
-        	    $string );
-		}
+
+	function uc( $str, $first = false ) {
+		if ( function_exists( 'mb_strtoupper' ) )
+			if ( $first )
+				if ( LanguageUtf8::isMultibyte( $str ) )
+					return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+				else
+					return ucfirst( $str );
+			else
+				return LanguageUtf8::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
+		else
+			if ( LanguageUtf8::isMultibyte( $str ) ) {
+				global $wikiUpperChars;
+				$x = $first ? '^' : '';
+				return preg_replace(
+					"/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+					"strtr( \"\$1\" , \$wikiUpperChars )",
+					$str
+				);
+			} else
+				return $first ? ucfirst( $str ) : strtoupper( $str );
+	}
+
+	function lcfirst( $str ) {
+		return LanguageUtf8::lc( $str, true );
+	}
+
+	function lc( $str, $first = false ) {
+		if ( function_exists( 'mb_strtolower' ) )
+			if ( $first )
+				if ( LanguageUtf8::isMultibyte( $str ) )
+					return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
+				else
+					return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
+			else
+				return LanguageUtf8::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
+		else
+			if ( LanguageUtf8::isMultibyte( $str ) ) {
+				global $wikiLowerChars;
+				$x = $first ? '^' : '';
+				return preg_replace(
+					"/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
+					"strtr( \"\$1\" , \$wikiLowerChars )",
+					$str
+				);
+			} else
+				return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
+	}
+
+	function isMultibyte( $str ) {
+		return (bool)preg_match( '/^[\x80-\xff]/', $str );
 	}
 
 	function stripForSearch( $string ) {
@@ -61,18 +106,22 @@ class LanguageUtf8 extends Language {
 		# all strtolower on stripped output or argument
 		# should be removed and all stripForSearch
 		# methods adjusted to that.
+
+		wfProfileIn( "LanguageUtf8::stripForSearch" );
 		if( function_exists( 'mb_strtolower' ) ) {
-			return preg_replace(
+			$out = preg_replace(
 				"/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
 				"'U8' . bin2hex( \"$1\" )",
 				mb_strtolower( $string ) );
 		} else {
 			global $wikiLowerChars;
-			return preg_replace(
+			$out = preg_replace(
 				"/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
 				"'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
 				$string );
 		}
+		wfProfileOut( "LanguageUtf8::stripForSearch" );
+		return $out;
 	}
 
 	function fallback8bitEncoding() {
@@ -85,10 +134,13 @@ class LanguageUtf8 extends Language {
 	function checkTitleEncoding( $s ) {
 		global $wgInputEncoding;
 
+		if( is_array( $s ) ) {
+			wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
+		}
 		# Check for non-UTF-8 URLs
 		$ishigh = preg_match( '/[\x80-\xff]/', $s);
 		if(!$ishigh) return $s;
-		
+
 		$isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
 		if( $isutf8 ) return $s;
@@ -99,7 +151,7 @@ class LanguageUtf8 extends Language {
 	function firstChar( $s ) {
 		preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
 		'[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches);
-		
+
 		return isset( $matches[1] ) ? $matches[1] : "";
 	}