includes/Fallback.php

   1 <?php
   2 /**
   3  * Fallback functions for PHP installed without mbstring support.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  */
  22
  23 /**
  24  * Fallback functions for PHP installed without mbstring support
  25  */
  26 class Fallback {
  27
  28         /**
  29          * @param $from
  30          * @param $to
  31          * @param $string
  32          * @return string
  33          */
  34         public static function iconv( $from, $to, $string ) {
  35                 if ( substr( $to, -8 ) == '//IGNORE' ) {
  36                         $to = substr( $to, 0, strlen( $to ) - 8 );
  37                 }
  38                 if ( strcasecmp( $from, $to ) == 0 ) {
  39                         return $string;
  40                 }
  41                 if ( strcasecmp( $from, 'utf-8' ) == 0 ) {
  42                         return utf8_decode( $string );
  43                 }
  44                 if ( strcasecmp( $to, 'utf-8' ) == 0 ) {
  45                         return utf8_encode( $string );
  46                 }
  47                 return $string;
  48         }
  49
  50         /**
  51          * Fallback implementation for mb_substr, hardcoded to UTF-8.
  52          * Attempts to be at least _moderately_ efficient; best optimized
  53          * for relatively small offset and count values -- about 5x slower
  54          * than native mb_string in my testing.
  55          *
  56          * Larger offsets are still fairly efficient for Latin text, but
  57          * can be up to 100x slower than native if the text is heavily
  58          * multibyte and we have to slog through a few hundred kb.
  59          *
  60          * @param $str
  61          * @param $start
  62          * @param $count string
  63          *
  64          * @return string
  65          */
  66         public static function mb_substr( $str, $start, $count = 'end' ) {
  67                 if ( $start != 0 ) {
  68                         $split = self::mb_substr_split_unicode( $str, intval( $start ) );
  69                         $str = substr( $str, $split );
  70                 }
  71
  72                 if ( $count !== 'end' ) {
  73                         $split = self::mb_substr_split_unicode( $str, intval( $count ) );
  74                         $str = substr( $str, 0, $split );
  75                 }
  76
  77                 return $str;
  78         }
  79
  80         /**
  81          * @param $str
  82          * @param $splitPos
  83          * @return int
  84          */
  85         public static function mb_substr_split_unicode( $str, $splitPos ) {
  86                 if ( $splitPos == 0 ) {
  87                         return 0;
  88                 }
  89
  90                 $byteLen = strlen( $str );
  91
  92                 if ( $splitPos > 0 ) {
  93                         if ( $splitPos > 256 ) {
  94                                 // Optimize large string offsets by skipping ahead N bytes.
  95                                 // This will cut out most of our slow time on Latin-based text,
  96                                 // and 1/2 to 1/3 on East European and Asian scripts.
  97                                 $bytePos = $splitPos;
  98                                 while ( $bytePos < $byteLen && $str[$bytePos] >= "\x80" && $str[$bytePos] < "\xc0" ) {
  99                                         ++$bytePos;
 100                                 }
 101                                 $charPos = mb_strlen( substr( $str, 0, $bytePos ) );
 102                         } else {
 103                                 $charPos = 0;
 104                                 $bytePos = 0;
 105                         }
 106
 107                         while ( $charPos++ < $splitPos ) {
 108                                 ++$bytePos;
 109                                 // Move past any tail bytes
 110                                 while ( $bytePos < $byteLen && $str[$bytePos] >= "\x80" && $str[$bytePos] < "\xc0" ) {
 111                                         ++$bytePos;
 112                                 }
 113                         }
 114                 } else {
 115                         $splitPosX = $splitPos + 1;
 116                         $charPos = 0; // relative to end of string; we don't care about the actual char position here
 117                         $bytePos = $byteLen;
 118                         while ( $bytePos > 0 && $charPos-- >= $splitPosX ) {
 119                                 --$bytePos;
 120                                 // Move past any tail bytes
 121                                 while ( $bytePos > 0 && $str[$bytePos] >= "\x80" && $str[$bytePos] < "\xc0" ) {
 122                                         --$bytePos;
 123                                 }
 124                         }
 125                 }
 126
 127                 return $bytePos;
 128         }
 129
 130         /**
 131          * Fallback implementation of mb_strlen, hardcoded to UTF-8.
 132          * @param string $str
 133          * @param string $enc optional encoding; ignored
 134          * @return int
 135          */
 136         public static function mb_strlen( $str, $enc = '' ) {
 137                 $counts = count_chars( $str );
 138                 $total = 0;
 139
 140                 // Count ASCII bytes
 141                 for ( $i = 0; $i < 0x80; $i++ ) {
 142                         $total += $counts[$i];
 143                 }
 144
 145                 // Count multibyte sequence heads
 146                 for ( $i = 0xc0; $i < 0xff; $i++ ) {
 147                         $total += $counts[$i];
 148                 }
 149                 return $total;
 150         }
 151
 152         /**
 153          * Fallback implementation of mb_strpos, hardcoded to UTF-8.
 154          * @param $haystack String
 155          * @param $needle String
 156          * @param string $offset optional start position
 157          * @param string $encoding optional encoding; ignored
 158          * @return int
 159          */
 160         public static function mb_strpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 161                 $needle = preg_quote( $needle, '/' );
 162
 163                 $ar = array();
 164                 preg_match( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 165
 166                 if ( isset( $ar[0][1] ) ) {
 167                         return $ar[0][1];
 168                 } else {
 169                         return false;
 170                 }
 171         }
 172
 173         /**
 174          * Fallback implementation of mb_strrpos, hardcoded to UTF-8.
 175          * @param $haystack String
 176          * @param $needle String
 177          * @param string $offset optional start position
 178          * @param string $encoding optional encoding; ignored
 179          * @return int
 180          */
 181         public static function mb_strrpos( $haystack, $needle, $offset = 0, $encoding = '' ) {
 182                 $needle = preg_quote( $needle, '/' );
 183
 184                 $ar = array();
 185                 preg_match_all( '/' . $needle . '/u', $haystack, $ar, PREG_OFFSET_CAPTURE, $offset );
 186
 187                 if ( isset( $ar[0] ) && count( $ar[0] ) > 0 &&
 188                         isset( $ar[0][count( $ar[0] ) - 1][1] ) ) {
 189                         return $ar[0][count( $ar[0] ) - 1][1];
 190                 } else {
 191                         return false;
 192                 }
 193         }
 194 }