includes/Fallback.php

   1 <?php
   2
   3 /**
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with this program; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17  * http://www.gnu.org/copyleft/gpl.html
  18  *
  19  */
  20
  21 /**
  22  * Fallback functions for PHP installed without mbstring support
  23  */
  24 class Fallback {
  25
  26         public static function iconv( $from, $to, $string ) {
  27                 if ( substr( $to, -8 ) == '//IGNORE' ) {
  28                         $to = substr( $to, 0, strlen( $to ) - 8 );
  29                 }
  30                 if( strcasecmp( $from, $to ) == 0 ) {
  31                         return $string;
  32                 }
  33                 if( strcasecmp( $from, 'utf-8' ) == 0 ) {
  34                         return utf8_decode( $string );
  35                 }
  36                 if( strcasecmp( $to, 'utf-8' ) == 0 ) {
  37                         return utf8_encode( $string );
  38                 }
  39                 return $string;
  40         }
  41
  42         /**
  43          * Fallback implementation for mb_substr, hardcoded to UTF-8.
  44          * Attempts to be at least _moderately_ efficient; best optimized
  45          * for relatively small offset and count values -- about 5x slower
  46          * than native mb_string in my testing.
  47          *
  48          * Larger offsets are still fairly efficient for Latin text, but
  49          * can be up to 100x slower than native if the text is heavily
  50          * multibyte and we have to slog through a few hundred kb.
  51          */
  52         public static function mb_substr( $str, $start, $count='end' ) {
  53                 if( $start != 0 ) {
  54                         $split = self::mb_substr_split_unicode( $str, intval( $start ) );
  55                         $str = substr( $str, $split );
  56                 }
  57
  58                 if( $count !== 'end' ) {
  59                         $split = self::mb_substr_split_unicode( $str, intval( $count ) );
  60                         $str = substr( $str, 0, $split );
  61                 }
  62
  63                 return $str;
  64         }
  65
  66         public static function mb_substr_split_unicode( $str, $splitPos ) {
  67                 if( $splitPos == 0 ) {
  68                         return 0;
  69                 }
  70
  71                 $byteLen = strlen( $str );
  72
  73                 if( $splitPos > 0 ) {
  74                         if( $splitPos > 256 ) {
  75                                 // Optimize large string offsets by skipping ahead N bytes.
  76                                 // This will cut out most of our slow time on Latin-based text,
  77                                 // and 1/2 to 1/3 on East European and Asian scripts.
  78                                 $bytePos = $splitPos;
  79                                 while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
  80                                         ++$bytePos;
  81                                 }
  82                                 $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
  83                         } else {
  84                                 $charPos = 0;
  85                                 $bytePos = 0;
  86                         }
  87
  88                         while( $charPos++ < $splitPos ) {
  89                                 ++$bytePos;
  90                                 // Move past any tail bytes
  91                                 while ( $bytePos < $byteLen && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
  92                                         ++$bytePos;
  93                                 }
  94                         }
  95                 } else {
  96                         $splitPosX = $splitPos + 1;
  97                         $charPos = 0; // relative to end of string; we don't care about the actual char position here
  98                         $bytePos = $byteLen;
  99                         while( $bytePos > 0 && $charPos-- >= $splitPosX ) {
 100                                 --$bytePos;
 101                                 // Move past any tail bytes
 102                                 while ( $bytePos > 0 && $str{$bytePos} >= "\x80" && $str{$bytePos} < "\xc0" ) {
 103                                         --$bytePos;
 104                                 }
 105                         }
 106                 }
 107
 108                 return $bytePos;
 109         }
 110
 111         /**
 112          * Fallback implementation of mb_strlen, hardcoded to UTF-8.
 113          * @param string $str
 114          * @param string $enc optional encoding; ignored
 115          * @return int
 116          */
 117         public static function mb_strlen( $str, $enc = '' ) {
 118                 $counts = count_chars( $str );
 119                 $total = 0;
 120
 121                 // Count ASCII bytes
 122                 for( $i = 0; $i < 0x80; $i++ ) {
 123                         $total += $counts[$i];
 124                 }
 125
 126                 // Count multibyte sequence heads
 127                 for( $i = 0xc0; $i < 0xff; $i++ ) {
 128                         $total += $counts[$i];
 129                 }
 130                 return $total;
 131         }
 132
 133
 134         /**
 135          * Fallback implementation of mb_strpos, hardcoded to UTF-8.
 136          * @param $haystack String
 137          * @param $needle String
 138          * @param $offset String: optional start position
 139          * @param $encoding String: optional encoding; ignored
 140          * @return int
 141          */
 142         public static function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 143                 $needle = preg_quote( $needle, '/' );
 144
 145                 $ar = array();
 146                 preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 147
 148                 if( isset( $ar[0][1] ) ) {
 149                         return $ar[0][1];
 150                 } else {
 151                         return false;
 152                 }
 153         }
 154
 155         /**
 156          * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
 157          * @param $haystack String
 158          * @param $needle String
 159          * @param $offset String: optional start position
 160          * @param $encoding String: optional encoding; ignored
 161          * @return int
 162          */
 163         public static function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 164                 $needle = preg_quote( $needle, '/' );
 165
 166                 $ar = array();
 167                 preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 168
 169                 if( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
 170                         isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
 171                         return $ar[0][count( $ar[0] ) - 1][1];
 172                 } else {
 173                         return false;
 174                 }
 175         }
 176
 177 }