Estonian translation from wiki
[lhc/web/wiklou.git] / languages / LanguageUtf8.php
1 <?
2
3 $wgInputEncoding = "utf-8";
4 $wgOutputEncoding = "utf-8";
5
6 $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" );
7 $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" );
8
9 if(($wikiUpperChars === false) || ($wikiLowerChars === false)) {
10 include_once( "Utf8Case.php" );
11 $wgMemc->set( $key1, $wikiUpperChars );
12 $wgMemc->set( $key2, $wikiLowerChars );
13 }
14
15 # Base stuff useful to all UTF-8 based language files
16 class LanguageUtf8 extends Language {
17
18 function ucfirst( $string ) {
19 # For most languages, this is a wrapper for ucfirst()
20 # But that doesn't work right in a UTF-8 locale
21 global $wikiUpperChars, $wikiLowerChars;
22 return preg_replace (
23 "/^([\\x00-\\x7f]|[\\xc0-\\xff][\\x80-\\xbf]*)/e",
24 "strtr ( \"\$1\" , \$wikiUpperChars )",
25 $string );
26 }
27
28 function stripForSearch( $string ) {
29 # MySQL fulltext index doesn't grok utf-8, so we
30 # need to fold cases and convert to hex
31 global $wikiLowerChars;
32 return preg_replace(
33 "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
34 "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
35 $string );
36 }
37
38 function fallback8bitEncoding() {
39 # Windows codepage 1252 is a superset of iso 8859-1
40 # override this to use difference source encoding to
41 # translate incoming 8-bit URLs.
42 return "windows-1252";
43 }
44
45 function checkTitleEncoding( $s ) {
46 global $wgInputEncoding;
47
48 # Check for non-UTF-8 URLs
49 $ishigh = preg_match( '/[\x80-\xff]/', $s);
50 if(!$ishigh) return $s;
51
52 $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
53 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
54 if( $isutf8 ) return $s;
55
56 return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
57 }
58 }
59
60 ?>