(bug 37755) Set robot meta tags for 'view source' pages
[lhc/web/wiklou.git] / languages / classes / LanguageEo.php
1 <?php
2 /**
3 * Esperanto (Esperanto) specific code.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @author Brion Vibber <brion@pobox.com>
22 * @ingroup Language
23 */
24
25 /**
26 * Esperanto (Esperanto)
27 *
28 * @ingroup Language
29 */
30 class LanguageEo extends Language {
31 /**
32 * Wrapper for charset conversions.
33 *
34 * In most languages, this calls through to standard system iconv(), but
35 * for Esperanto we're also adding a special pseudo-charset to convert
36 * accented characters to/from the ASCII-friendly "X" surrogate coding:
37 *
38 * cx = ĉ cxx = cx
39 * gx = ĝ gxx = gx
40 * hx = ĥ hxx = hx
41 * jx = ĵ jxx = jx
42 * sx = ŝ sxx = sx
43 * ux = ŭ uxx = ux
44 * xx = x
45 *
46 * http://en.wikipedia.org/wiki/Esperanto_orthography#X-system
47 * http://eo.wikipedia.org/wiki/X-sistemo
48 *
49 * X-conversion is applied, in either direction, between "utf-8" and "x" charsets;
50 * this comes into effect when input is run through $wgRequest->getText() and the
51 * $wgEditEncoding is set to 'x'.
52 *
53 * In the long run, this should be moved out of here and into the client-side
54 * editor behavior; the original server-side translation system dates to 2002-2003
55 * when many browsers with really bad Unicode support were still in use.
56 *
57 * @param string $in input character set
58 * @param string $out output character set
59 * @param string $string text to be converted
60 * @return string
61 */
62 function iconv( $in, $out, $string ) {
63 if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) {
64 return preg_replace_callback (
65 '/([cghjsu]x?)((?:xx)*)(?!x)/i',
66 array( $this, 'strrtxuCallback' ), $string );
67 } elseif ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) {
68 # Double Xs only if they follow cxapelutaj literoj.
69 return preg_replace_callback(
70 '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i',
71 array( $this, 'strrtuxCallback' ), $string );
72 }
73 return parent::iconv( $in, $out, $string );
74 }
75
76 /**
77 * @param $matches array
78 * @return string
79 */
80 function strrtuxCallback( $matches ) {
81 static $ux = array (
82 'x' => 'xx' , 'X' => 'Xx' ,
83 "\xc4\x88" => "Cx" , "\xc4\x89" => "cx" ,
84 "\xc4\x9c" => "Gx" , "\xc4\x9d" => "gx" ,
85 "\xc4\xa4" => "Hx" , "\xc4\xa5" => "hx" ,
86 "\xc4\xb4" => "Jx" , "\xc4\xb5" => "jx" ,
87 "\xc5\x9c" => "Sx" , "\xc5\x9d" => "sx" ,
88 "\xc5\xac" => "Ux" , "\xc5\xad" => "ux"
89 );
90 return strtr( $matches[1], $ux );
91 }
92
93 /**
94 * @param $matches array
95 * @return string
96 */
97 function strrtxuCallback( $matches ) {
98 static $xu = array (
99 'xx' => 'x' , 'xX' => 'x' ,
100 'Xx' => 'X' , 'XX' => 'X' ,
101 "Cx" => "\xc4\x88" , "CX" => "\xc4\x88" ,
102 "cx" => "\xc4\x89" , "cX" => "\xc4\x89" ,
103 "Gx" => "\xc4\x9c" , "GX" => "\xc4\x9c" ,
104 "gx" => "\xc4\x9d" , "gX" => "\xc4\x9d" ,
105 "Hx" => "\xc4\xa4" , "HX" => "\xc4\xa4" ,
106 "hx" => "\xc4\xa5" , "hX" => "\xc4\xa5" ,
107 "Jx" => "\xc4\xb4" , "JX" => "\xc4\xb4" ,
108 "jx" => "\xc4\xb5" , "jX" => "\xc4\xb5" ,
109 "Sx" => "\xc5\x9c" , "SX" => "\xc5\x9c" ,
110 "sx" => "\xc5\x9d" , "sX" => "\xc5\x9d" ,
111 "Ux" => "\xc5\xac" , "UX" => "\xc5\xac" ,
112 "ux" => "\xc5\xad" , "uX" => "\xc5\xad"
113 );
114 return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu );
115 }
116
117 /**
118 * @param $s string
119 * @return string
120 */
121 function checkTitleEncoding( $s ) {
122 # Check for X-system backwards-compatibility URLs
123 $ishigh = preg_match( '/[\x80-\xff]/', $s );
124 $isutf = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
125 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
126
127 if ( $ishigh and !$isutf ) {
128 # Assume Latin1
129 $s = utf8_encode( $s );
130 } else {
131 if ( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]' .
132 '|\xc5[\x9c\x9d\xac\xad])/', $s ) )
133 return $s;
134 }
135
136 // if( preg_match( '/[cghjsu]x/i', $s ) )
137 // return $this->iconv( 'x', 'utf-8', $s );
138 return $s;
139 }
140
141 function initEncoding() {
142 global $wgEditEncoding;
143 $wgEditEncoding = 'x';
144 }
145 }