7ec447e703a1e99232d419c28661f1cd3d4b63ab
[lhc/web/wiklou.git] / languages / classes / LanguageEo.php
1 <?php
2
3 /** Esperanto (Esperanto)
4 *
5 * @ingroup Language
6 * @author Brion Vibber <brion@pobox.com>
7 */
8 class LanguageEo extends Language {
9 /**
10 * Wrapper for charset conversions.
11 *
12 * In most languages, this calls through to standard system iconv(), but
13 * for Esperanto we're also adding a special pseudo-charset to convert
14 * accented characters to/from the ASCII-friendly "X" surrogate coding:
15 *
16 * cx = ĉ cxx = cx
17 * gx = ĝ gxx = gx
18 * hx = ĥ hxx = hx
19 * jx = ĵ jxx = jx
20 * sx = ŝ sxx = sx
21 * ux = ŭ uxx = ux
22 * xx = x
23 *
24 * http://en.wikipedia.org/wiki/Esperanto_orthography#X-system
25 * http://eo.wikipedia.org/wiki/X-sistemo
26 *
27 * X-conversion is applied, in either direction, between "utf-8" and "x" charsets;
28 * this comes into effect when input is run through $wgRequest->getText() and the
29 * $wgEditEncoding is set to 'x'.
30 *
31 * In the long run, this should be moved out of here and into the client-side
32 * editor behavior; the original server-side translation system dates to 2002-2003
33 * when many browsers with really bad Unicode support were still in use.
34 *
35 * @param string $in input character set
36 * @param string $out output character set
37 * @param string $string text to be converted
38 * @return string
39 */
40 function iconv( $in, $out, $string ) {
41 if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) {
42 return preg_replace_callback (
43 '/([cghjsu]x?)((?:xx)*)(?!x)/i',
44 array( $this, 'strrtxuCallback' ), $string );
45 } else if ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) {
46 # Double Xs only if they follow cxapelutaj literoj.
47 return preg_replace_callback(
48 '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i',
49 array( $this, 'strrtuxCallback' ), $string );
50 }
51 return parent::iconv( $in, $out, $string );
52 }
53
54 function strrtuxCallback( $matches ) {
55 static $ux = array (
56 'x' => 'xx' , 'X' => 'Xx' ,
57 "\xc4\x88" => "Cx" , "\xc4\x89" => "cx" ,
58 "\xc4\x9c" => "Gx" , "\xc4\x9d" => "gx" ,
59 "\xc4\xa4" => "Hx" , "\xc4\xa5" => "hx" ,
60 "\xc4\xb4" => "Jx" , "\xc4\xb5" => "jx" ,
61 "\xc5\x9c" => "Sx" , "\xc5\x9d" => "sx" ,
62 "\xc5\xac" => "Ux" , "\xc5\xad" => "ux"
63 );
64 return strtr( $matches[1], $ux );
65 }
66
67 function strrtxuCallback( $matches ) {
68 static $xu = array (
69 'xx' => 'x' , 'xX' => 'x' ,
70 'Xx' => 'X' , 'XX' => 'X' ,
71 "Cx" => "\xc4\x88" , "CX" => "\xc4\x88" ,
72 "cx" => "\xc4\x89" , "cX" => "\xc4\x89" ,
73 "Gx" => "\xc4\x9c" , "GX" => "\xc4\x9c" ,
74 "gx" => "\xc4\x9d" , "gX" => "\xc4\x9d" ,
75 "Hx" => "\xc4\xa4" , "HX" => "\xc4\xa4" ,
76 "hx" => "\xc4\xa5" , "hX" => "\xc4\xa5" ,
77 "Jx" => "\xc4\xb4" , "JX" => "\xc4\xb4" ,
78 "jx" => "\xc4\xb5" , "jX" => "\xc4\xb5" ,
79 "Sx" => "\xc5\x9c" , "SX" => "\xc5\x9c" ,
80 "sx" => "\xc5\x9d" , "sX" => "\xc5\x9d" ,
81 "Ux" => "\xc5\xac" , "UX" => "\xc5\xac" ,
82 "ux" => "\xc5\xad" , "uX" => "\xc5\xad"
83 );
84 return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu );
85 }
86
87 function checkTitleEncoding( $s ) {
88 # Check for X-system backwards-compatibility URLs
89 $ishigh = preg_match( '/[\x80-\xff]/', $s );
90 $isutf = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
91 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
92
93 if ( $ishigh and !$isutf ) {
94 # Assume Latin1
95 $s = utf8_encode( $s );
96 } else {
97 if ( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]' .
98 '|\xc5[\x9c\x9d\xac\xad])/', $s ) )
99 return $s;
100 }
101
102 // if( preg_match( '/[cghjsu]x/i', $s ) )
103 // return $this->iconv( 'x', 'utf-8', $s );
104 return $s;
105 }
106
107 function initEncoding() {
108 global $wgEditEncoding;
109 $wgEditEncoding = 'x';
110 }
111 }