* changed variable list as per comment on r79954 left only wgDBtype
[lhc/web/wiklou.git] / includes / Fallback.php
1 <?php
2
3 /**
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 * http://www.gnu.org/copyleft/gpl.html
18 *
19 */
20
21 /**
22 * Fallback functions for PHP installed without mbstring support
23 */
24 class Fallback {
25
26 public static function iconv( $from, $to, $string ) {
27 if ( substr( $to, -8 ) == '//IGNORE' ) {
28 $to = substr( $to, 0, strlen( $to ) - 8 );
29 }
30 if( strcasecmp( $from, $to ) == 0 ) {
31 return $string;
32 }
33 if( strcasecmp( $from, 'utf-8' ) == 0 ) {
34 return utf8_decode( $string );
35 }
36 if( strcasecmp( $to, 'utf-8' ) == 0 ) {
37 return utf8_encode( $string );
38 }
39 return $string;
40 }
41
42 /**
43 * Fallback implementation for mb_substr, hardcoded to UTF-8.
44 * Attempts to be at least _moderately_ efficient; best optimized
45 * for relatively small offset and count values -- about 5x slower
46 * than native mb_string in my testing.
47 *
48 * Larger offsets are still fairly efficient for Latin text, but
49 * can be up to 100x slower than native if the text is heavily
50 * multibyte and we have to slog through a few hundred kb.
51 */
52 public static function mb_substr( $str, $start, $count='end' ) {
53 if( $start != 0 ) {
54 $split = self::mb_substr_split_unicode( $str, intval( $start ) );
55 $str = substr( $str, $split );
56 }
57
58 if( $count !== 'end' ) {
59 $split = self::mb_substr_split_unicode( $str, intval( $count ) );
60 $str = substr( $str, 0, $split );
61 }
62
63 return $str;
64 }
65
66 public static function mb_substr_split_unicode( $str, $splitPos ) {
67 if( $splitPos == 0 ) {
68 return 0;
69 }
70
71 $byteLen = strlen( $str );
72
73 if( $splitPos > 0 ) {
74 if( $splitPos > 256 ) {
75 // Optimize large string offsets by skipping ahead N bytes.
76 // This will cut out most of our slow time on Latin-based text,
77 // and 1/2 to 1/3 on East European and Asian scripts.
78 $bytePos = $splitPos;
79 while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
80 ++$bytePos;
81 }
82 $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
83 } else {
84 $charPos = 0;
85 $bytePos = 0;
86 }
87
88 while( $charPos++ < $splitPos ) {
89 ++$bytePos;
90 // Move past any tail bytes
91 while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
92 ++$bytePos;
93 }
94 }
95 } else {
96 $splitPosX = $splitPos + 1;
97 $charPos = 0; // relative to end of string; we don't care about the actual char position here
98 $bytePos = $byteLen;
99 while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
100 --$bytePos;
101 // Move past any tail bytes
102 while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
103 --$bytePos;
104 }
105 }
106 }
107
108 return $bytePos;
109 }
110
111 /**
112 * Fallback implementation of mb_strlen, hardcoded to UTF-8.
113 * @param string $str
114 * @param string $enc optional encoding; ignored
115 * @return int
116 */
117 public static function mb_strlen( $str, $enc = '' ) {
118 $counts = count_chars( $str );
119 $total = 0;
120
121 // Count ASCII bytes
122 for( $i = 0; $i < 0x80; $i++ ) {
123 $total += $counts[$i];
124 }
125
126 // Count multibyte sequence heads
127 for( $i = 0xc0; $i < 0xff; $i++ ) {
128 $total += $counts[$i];
129 }
130 return $total;
131 }
132
133
134 /**
135 * Fallback implementation of mb_strpos, hardcoded to UTF-8.
136 * @param $haystack String
137 * @param $needle String
138 * @param $offset String: optional start position
139 * @param $encoding String: optional encoding; ignored
140 * @return int
141 */
142 public static function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
143 $needle = preg_quote( $needle, '/' );
144
145 $ar = array();
146 preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
147
148 if( isset( $ar[0][1] ) ) {
149 return $ar[0][1];
150 } else {
151 return false;
152 }
153 }
154
155 /**
156 * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
157 * @param $haystack String
158 * @param $needle String
159 * @param $offset String: optional start position
160 * @param $encoding String: optional encoding; ignored
161 * @return int
162 */
163 public static function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
164 $needle = preg_quote( $needle, '/' );
165
166 $ar = array();
167 preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
168
169 if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
170 isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
171 return $ar[0][count( $ar[0] ) - 1][1];
172 } else {
173 return false;
174 }
175 }
176
177 }