lib/pkp/classes/core/String.inc.php

   1 <?php
   2
   3 /**
   4  * @file classes/core/String.inc.php
   5  *
   6  * Copyright (c) 2000-2009 John Willinsky
   7  * Distributed under the GNU GPL v2. For full terms see the file docs/COPYING.
   8  *
   9  * @class String
  10  * @ingroup core
  11  *
  12  * @brief String manipulation wrapper class.
  13  *
  14  */
  15
  16 // $Id: String.inc.php,v 1.18 2009/08/11 21:59:55 mj Exp $
  17
  18 /*
  19  * Perl-compatibile regular expression (PCRE) constants:
  20  * These are defined application-wide for consistency
  21  */
  22
  23 // common URL syntax
  24 define('PCRE_URL', '(http|https|ftp):\/\/(([A-Z0-9][A-Z0-9_-]*)(\.[A-Z0-9][A-Z0-9_-]*)+)(:(\d+))?(\/.)?');
  25
  26 // RFC-2822 email addresses
  27 define('PCRE_EMAIL_ADDRESS',
  28         '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+' . // One or more atom characters.
  29         '(\.' . '[-a-z0-9!#\$%&\'\*\+\/=\?\^_\`\{\|\}~]' . '+)*'. // Followed by zero or more dot separated sets of one or more atom characters.
  30         '@'. // Followed by an "at" character.
  31         '(' . '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{1,63}\.)+'. // Followed by one or max 63 domain characters (dot separated).
  32         '([a-z0-9]([-a-z0-9]*[a-z0-9]+)?)' . '{2,63}' // Must be followed by one set consisting a period of two or max 63 domain characters.
  33         );
  34
  35 class String {
  36         /**
  37         * Perform initialization required for the string wrapper library.
  38         */
  39         function init() {
  40                 $clientCharset = strtolower(Config::getVar('i18n', 'client_charset'));
  41
  42                 // Check if mbstring is installed (requires PHP >= 4.3.0)
  43                 if (String::hasMBString()) {
  44                         // mbstring routines are available
  45                         define('ENABLE_MBSTRING', true);
  46
  47                         // Set up required ini settings for mbstring
  48                         // FIXME Do any other mbstring settings need to be set?
  49                         mb_internal_encoding($clientCharset);
  50                         mb_substitute_character('63');          // question mark
  51                 }
  52
  53                 // Define modifier to be used in regexp_* routines
  54                 // FIXME Should non-UTF-8 encodings be supported with mbstring?
  55                 if ($clientCharset == 'utf-8' && String::hasPCREUTF8()) {
  56                         define('PCRE_UTF8', 'u');
  57                 } else {
  58                         define('PCRE_UTF8', '');
  59                 }
  60         }
  61
  62         /**
  63         * Check if server has the mbstring library.
  64         * Currently requires PHP >= 4.3.0 (for mb_strtolower, mb_strtoupper,
  65         * and mb_substr_count)
  66         * @return boolean
  67         */
  68         function hasMBString() {
  69                 static $hasMBString;
  70                 if (isset($hasMBString)) return $hasMBString;
  71
  72                 // If string overloading is active, it will break many of the
  73                 // native implementations. mbstring.func_overload must be set
  74                 // to 0, 1 or 4 in php.ini (string overloading disabled).
  75                 if (ini_get('mbstring.func_overload') && defined('MB_OVERLOAD_STRING')) {
  76                         $hasMBString = false;
  77                 } else {
  78                         $hasMBString = (
  79                         extension_loaded('mbstring') &&
  80                         function_exists('mb_strlen') &&
  81                         function_exists('mb_strpos') &&
  82                         function_exists('mb_strrpos') &&
  83                         function_exists('mb_substr') &&
  84                         function_exists('mb_strtolower') &&
  85                         function_exists('mb_strtoupper') &&
  86                         function_exists('mb_substr_count') &&
  87                         function_exists('mb_send_mail')
  88                         );
  89                 }
  90                 return $hasMBString;
  91         }
  92
  93         /**
  94         * Check if server supports the PCRE_UTF8 modifier.
  95         * @return boolean
  96         */
  97         function hasPCREUTF8() {
  98                 // The PCRE_UTF8 modifier is only supported on PHP >= 4.1.0 (*nix) or PHP >= 4.2.3 (win32)
  99                 // Evil check to see if PCRE_UTF8 is supported
 100                 if (@preg_match('//u', '')) {
 101                         return true;
 102                 } else {
 103                         return false;
 104                 }
 105         }
 106
 107         //
 108         // Wrappers for basic string manipulation routines.
 109         // See the phputf8 documentation for usage.
 110         //
 111
 112         /**
 113         * @see http://ca.php.net/manual/en/function.strlen.php
 114         */
 115         function strlen($string) {
 116                 if (defined('ENABLE_MBSTRING')) {
 117                         require_once 'mbstring/core.php';
 118                 } else {
 119                         require_once 'utils/unicode.php';
 120                         require_once 'native/core.php';
 121                 }
 122                 return utf8_strlen($string);
 123         }
 124
 125         /**
 126         * @see http://ca.php.net/manual/en/function.strpos.php
 127         */
 128         function strpos($haystack, $needle, $offset = 0) {
 129                 if (defined('ENABLE_MBSTRING')) {
 130                         require_once 'mbstring/core.php';
 131                 } else {
 132                         require_once 'utils/unicode.php';
 133                         require_once 'native/core.php';
 134                 }
 135                 return utf8_strpos($haystack, $needle, $offset);
 136         }
 137
 138         /**
 139         * @see http://ca.php.net/manual/en/function.strrpos.php
 140         */
 141         function strrpos($haystack, $needle) {
 142                 if (defined('ENABLE_MBSTRING')) {
 143                         require_once 'mbstring/core.php';
 144                 } else {
 145                         require_once 'utils/unicode.php';
 146                         require_once 'native/core.php';
 147                 }
 148                 return utf8_strrpos($haystack, $needle, $offset);
 149         }
 150
 151         /**
 152         * @see http://ca.php.net/manual/en/function.substr.php
 153         */
 154         function substr($string, $start, $length = false) {
 155                 if (defined('ENABLE_MBSTRING')) {
 156                         require_once 'mbstring/core.php';
 157                 } else {
 158                         require_once 'utils/unicode.php';
 159                         require_once 'native/core.php';
 160                 }
 161                 return utf8_substr($string, $start, $length);
 162         }
 163
 164         /**
 165         * @see http://ca.php.net/manual/en/function.strtolower.php
 166         */
 167         function strtolower($string) {
 168                 if (defined('ENABLE_MBSTRING')) {
 169                         require_once 'mbstring/core.php';
 170                 } else {
 171                         require_once 'utils/unicode.php';
 172                         require_once 'native/core.php';
 173                 }
 174                 return utf8_strtolower($string);
 175         }
 176
 177         /**
 178         * @see http://ca.php.net/manual/en/function.strtoupper.php
 179         */
 180         function strtoupper($string) {
 181                 if (defined('ENABLE_MBSTRING')) {
 182                         require_once 'mbstring/core.php';
 183                 } else {
 184                         require_once 'utils/unicode.php';
 185                         require_once 'native/core.php';
 186                 }
 187                 return utf8_strtoupper($string);
 188         }
 189
 190         /**
 191         * @see http://ca.php.net/manual/en/function.substr_count.php
 192         */
 193         function substr_count($haystack, $needle) {
 194                 if (defined('ENABLE_MBSTRING')) {
 195                         return mb_substr_count($haystack, $needle); // Requires PHP >= 4.3.0
 196                 } else {
 197                         return substr_count($haystack, $needle);
 198                 }
 199         }
 200
 201         /**
 202         * @see http://ca.php.net/manual/en/function.encode_mime_header.php
 203         */
 204         function encode_mime_header($string) {
 205                 if (defined('ENABLE_MBSTRING')) {
 206                         return mb_encode_mimeheader($string, mb_internal_encoding(), 'B', MAIL_EOL);
 207                 }  else {
 208                         return $string;
 209                 }
 210         }
 211
 212         /**
 213         * @see http://ca.php.net/manual/en/function.mail.php
 214         */
 215         function mail($to, $subject, $message, $additional_headers = '', $additional_parameters = '') {
 216                 // Cannot use mb_send_mail as it base64 encodes the whole body of the email,
 217                 // making it useless for multipart emails
 218                 if (empty($additional_parameters)) {
 219                         return mail($to, $subject, $message, $additional_headers);
 220                 } else {
 221                         return mail($to, $subject, $message, $additional_headers, $additional_parameters);
 222                 }
 223         }
 224
 225         //
 226         // Wrappers for PCRE-compatible regular expression routines.
 227         // See the php.net documentation for usage.
 228         //
 229
 230         /**
 231         * @see http://ca.php.net/manual/en/function.regexp_quote.php
 232         */
 233         function regexp_quote($string, $delimiter = '/') {
 234                 return preg_quote($string, $delimiter);
 235         }
 236
 237         /**
 238         * @see http://ca.php.net/manual/en/function.regexp_grep.php
 239         */
 240         function regexp_grep($pattern, $input) {
 241                 if (PCRE_UTF8 && !String::utf8_compliant($input)) $input = String::utf8_bad_strip($input);
 242                 return preg_grep($pattern . PCRE_UTF8, $input);
 243         }
 244
 245         /**
 246         * @see http://ca.php.net/manual/en/function.regexp_match.php
 247         */
 248         function regexp_match($pattern, $subject) {
 249                 if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
 250                 return preg_match($pattern . PCRE_UTF8, $subject);
 251         }
 252
 253         /**
 254         * @see http://ca.php.net/manual/en/function.regexp_match_get.php
 255         */
 256         function regexp_match_get($pattern, $subject, &$matches) {
 257                 // NOTE: This function was created since PHP < 5.x does not support optional reference parameters
 258                 if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
 259                 return preg_match($pattern . PCRE_UTF8, $subject, $matches);
 260         }
 261
 262         /**
 263         * @see http://ca.php.net/manual/en/function.regexp_match_all.php
 264         */
 265         function regexp_match_all($pattern, $subject, &$matches) {
 266                 if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
 267                 return preg_match_all($pattern . PCRE_UTF8, $subject, $matches);
 268         }
 269
 270         /**
 271         * @see http://ca.php.net/manual/en/function.regexp_replace.php
 272         */
 273         function regexp_replace($pattern, $replacement, $subject, $limit = -1) {
 274                 if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
 275                 return preg_replace($pattern . PCRE_UTF8, $replacement, $subject, $limit);
 276         }
 277
 278         /**
 279         * @see http://ca.php.net/manual/en/function.regexp_replace_callback.php
 280         */
 281         function regexp_replace_callback($pattern, $callback, $subject, $limit = -1) {
 282                 if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
 283                 return preg_replace_callback($pattern . PCRE_UTF8, $callback, $subject, $limit);
 284         }
 285
 286         /**
 287         * @see http://ca.php.net/manual/en/function.regexp_split.php
 288         */
 289         function regexp_split($pattern, $subject, $limit = -1) {
 290                 if (PCRE_UTF8 && !String::utf8_compliant($subject)) $subject = String::utf8_bad_strip($subject);
 291                 return preg_split($pattern . PCRE_UTF8, $subject, $limit);
 292         }
 293
 294         /**
 295         * @see http://ca.php.net/manual/en/function.mime_content_type.php
 296         */
 297         function mime_content_type($filename) {
 298                 if (function_exists('mime_content_type')) {
 299                         return mime_content_type($filename);
 300                 } elseif (function_exists('finfo_open')) {
 301                         $localeFiles =& Registry::get('fileInfo', true, null);
 302                         if ($fi === null) {
 303                                 $fi = finfo_open(FILEINFO_MIME, Config::getVar('finfo', 'mime_database_path'));
 304                         }
 305                         if ($fi !== false) {
 306                                 return strtok(finfo_file($fi, $filename), ' ;');
 307                         }
 308                 }
 309
 310                 // Fall back on an external "file" tool
 311                 $f = escapeshellarg($filename);
 312                 $result = trim(`file --brief --mime $f`);
 313                 // Make sure we just return the mime type.
 314                 if (($i = strpos($result, ';')) !== false) {
 315                         $result = trim(substr($result, 0, $i));
 316                 }
 317                 return $result;
 318         }
 319
 320
 321         /**
 322         * Strip unsafe HTML from the input text. Covers XSS attacks like scripts,
 323         * onclick(...) attributes, javascript: urls, and special characters.
 324         * @param $input string input string
 325         * @return string
 326         */
 327         function stripUnsafeHtml($input) {
 328                 // Parts of this implementation were taken from Horde:
 329                 // see http://cvs.horde.org/co.php/framework/MIME/MIME/Viewer/html.php.
 330
 331                 $allowedHtml = Config::getVar('security', 'allowed_html');
 332                 if ($allowedHtml == '') $allowedHtml = '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd> <b> <i> <u> <img> <sup> <sub> <br> <p>';
 333
 334                 $html = strip_tags($input, $allowedHtml);
 335
 336                 // Change space entities to space characters
 337                 $html = preg_replace('/&#(x0*20|0*32);?/i', ' ', $html);
 338
 339                 // Remove non-printable characters
 340                 $html = preg_replace('/&#x?0*([9A-D]|1[0-3]);/i', '&nbsp;', $html);
 341                 $html = preg_replace('/&#x?0*[9A-D]([^0-9A-F]|$)/i', '&nbsp\\1', $html);
 342                 $html = preg_replace('/&#0*(9|1[0-3])([^0-9]|$)/i', '&nbsp\\2', $html);
 343
 344                 // Remove overly long numeric entities
 345                 $html = preg_replace('/&#x?0*[0-9A-F]{6,};?/i', '&nbsp;', $html);
 346
 347                 /* Get all attribute="javascript:foo()" tags. This is
 348                 * essentially the regex /(=|url\()("?)[^>]* script:/ but
 349                 * expanded to catch camouflage with spaces and entities. */
 350                 $preg   = '/((&#0*61;?|&#x0*3D;?|=)|'
 351                         . '((u|&#0*85;?|&#x0*55;?|&#0*117;?|&#x0*75;?)\s*'
 352                         . '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*'
 353                         . '(l|&#0*76;?|&#x0*4c;?|&#0*108;?|&#x0*6c;?)\s*'
 354                         . '(\()))\s*'
 355                         . '(&#0*34;?|&#x0*22;?|"|&#0*39;?|&#x0*27;?|\')?'
 356                         . '[^>]*\s*'
 357                         . '(s|&#0*83;?|&#x0*53;?|&#0*115;?|&#x0*73;?)\s*'
 358                         . '(c|&#0*67;?|&#x0*43;?|&#0*99;?|&#x0*63;?)\s*'
 359                         . '(r|&#0*82;?|&#x0*52;?|&#0*114;?|&#x0*72;?)\s*'
 360                         . '(i|&#0*73;?|&#x0*49;?|&#0*105;?|&#x0*69;?)\s*'
 361                         . '(p|&#0*80;?|&#x0*50;?|&#0*112;?|&#x0*70;?)\s*'
 362                         . '(t|&#0*84;?|&#x0*54;?|&#0*116;?|&#x0*74;?)\s*'
 363                         . '(:|&#0*58;?|&#x0*3a;?)/i';
 364                 $html = preg_replace($preg, '\1\8PKPCleaned', $html);
 365
 366                 /* Get all on<foo>="bar()". NEVER allow these. */
 367                 $html = preg_replace('/([\s"\']+'
 368                         . '(o|&#0*79;?|&#0*4f;?|&#0*111;?|&#0*6f;?)'
 369                         . '(n|&#0*78;?|&#0*4e;?|&#0*110;?|&#0*6e;?)'
 370                         . '\w+)\s*=/i', '\1PKPCleaned=', $html);
 371
 372                 $pattern = array(
 373                         '|<([^>]*)&{.*}([^>]*)>|',
 374                         '|<([^>]*)mocha:([^>]*)>|i',
 375                         '|<([^>]*)binding:([^>]*)>|i'
 376                 );
 377                 $replace = array('<&{;}\3>', '<\1PKPCleaned:\2>', '<\1PKPCleaned:\2>');
 378                 $html = preg_replace($pattern, $replace, $html);
 379
 380                 return $html;
 381         }
 382
 383         //
 384         // Wrappers for UTF-8 validation routines
 385         // See the phputf8 documentation for usage.
 386         //
 387
 388         /**
 389         * Detect whether a string contains non-ascii multibyte sequences in the UTF-8 range
 390         * @param $input string input string
 391         * @return boolean
 392         */
 393         function utf8_is_valid($str) {
 394                 require_once 'utils/validation.php';
 395                 return utf8_is_valid($str);
 396         }
 397
 398         /**
 399         * Tests whether a string complies as UTF-8; faster and less strict than utf8_is_valid
 400         * see lib/phputf8/utils/validation.php for more details
 401         * @param $input string input string
 402         * @return boolean
 403         */
 404         function utf8_compliant($str) {
 405                 require_once 'utils/validation.php';
 406                 return utf8_compliant($str);
 407         }
 408
 409         /**
 410         * Locates the first bad byte in a UTF-8 string returning it's byte index in the string
 411         * @param $input string input string
 412         * @return string
 413         */
 414         function utf8_bad_find($str) {
 415                 require_once 'utils/bad.php';
 416                 return utf8_bad_find($str);
 417         }
 418
 419         /**
 420         * Strips out any bad bytes from a UTF-8 string and returns the rest
 421         * @param $input string input string
 422         * @return string
 423         */
 424         function utf8_bad_strip($str) {
 425                 require_once 'utils/bad.php';
 426                 return utf8_bad_strip($str);
 427         }
 428
 429         /**
 430         * Replace bad bytes with an alternative character - ASCII character
 431         * @param $str string input string
 432         * @param $replace string optional
 433         * @return string
 434         */
 435         function utf8_bad_replace($str, $replace = '?') {
 436                 require_once 'utils/bad.php';
 437                 return utf8_bad_replace($str, $replace);
 438         }
 439
 440         /**
 441         * Replace bad bytes with an alternative character - ASCII character
 442         * @param $input string input string
 443         * @return string
 444         */
 445         function utf8_strip_ascii_ctrl($str) {
 446                 require_once 'utils/ascii.php';
 447                 return utf8_strip_ascii_ctrl($str);
 448         }
 449
 450         /**
 451         * Normalize a string in an unknown (non-UTF8) encoding into a valid UTF-8 sequence
 452         * @param $input string input string
 453         * @return string
 454         */
 455         function utf8_normalize($str) {
 456                 import('core.Transcoder');
 457
 458                 if (String::hasMBString()) {
 459                         // NB: CP-1252 often segfaults; we've left it out here but it will detect as 'ISO-8859-1'
 460                         $mb_encoding_order = 'UTF-8, UTF-7, ASCII, ISO-8859-1, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP';
 461
 462                         if (checkPhpVersion('4.3.8')) {
 463                                 $detected_encoding = mb_detect_encoding($str, $mb_encoding_order, FALSE);
 464                         } else {
 465                                 $detected_encoding = mb_detect_encoding($str, $mb_encoding_order);
 466                         }
 467
 468                 } elseif (function_exists('iconv') && strlen(iconv('CP1252', 'UTF-8', $str)) != strlen(iconv('ISO-8859-1', 'UTF-8', $str))) {
 469                         // use iconv to detect CP-1252, assuming default ISO-8859-1
 470                         $detected_encoding = 'CP1252';
 471                 } else {
 472                         // assume ISO-8859-1, PHP default
 473                         $detected_encoding = 'ISO-8859-1';
 474                 }
 475
 476                 // transcode CP-1252/ISO-8859-1 into HTML entities; this works because CP-1252 is mapped onto ISO-8859-1
 477                 if ('ISO-8859-1' == $detected_encoding || 'CP1252' == $detected_encoding) {
 478                         $trans = new Transcoder('CP1252', 'HTML-ENTITIES');
 479                         $str = $trans->trans($str);
 480                 }
 481
 482                 // transcode from detected encoding to to UTF-8
 483                 $trans = new Transcoder($detected_encoding, 'UTF-8');
 484                 $str = $trans->trans($str);
 485
 486                 return $str;
 487         }
 488
 489         /**
 490         * Returns the UTF-8 string corresponding to the unicode value
 491         * Does not require any multibyte PHP libraries
 492         * (from php.net, courtesy - romans@void.lv)
 493         * @param $num int
 494         * @return string
 495         */
 496         function code2utf ($num) {
 497                 if ($num < 128) return chr($num);
 498                 if ($num < 2048) return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
 499                 if ($num < 65536) return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
 500                 if ($num < 2097152) return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
 501                 return '';
 502         }
 503
 504         /**
 505         * Convert UTF-8 encoded characters in a string to escaped HTML entities
 506         * This is a helper function for transcoding into HTML or XML for output
 507         * @param $input string input string
 508         * @return string
 509         */
 510         function utf2html ($str) {
 511                 $ret = "";
 512                 $max = strlen($str);
 513                 $last = 0;  // keeps the index of the last regular character
 514
 515                 for ($i=0; $i<$max; $i++) {
 516                         $c = $str{$i};
 517                         $c1 = ord($c);
 518                         if ($c1>>5 == 6) {                                                                              // 110x xxxx, 110 prefix for 2 bytes unicode
 519                                 $ret .= substr($str, $last, $i-$last);                  // append all the regular characters we've passed
 520                                 $c1 &= 31;                                                                                                      // remove the 3 bit two bytes prefix
 521                                 $c2 = ord($str{++$i});                                                          // the next byte
 522                                 $c2 &= 63;                                                                                                      // remove the 2 bit trailing byte prefix
 523                                 $c2 |= (($c1 & 3) << 6);                                                        // last 2 bits of c1 become first 2 of c2
 524                                 $c1 >>= 2;                                                                                                      // c1 shifts 2 to the right
 525                                 $ret .= "&#" . ($c1 * 0x100 + $c2) . ";";       // this is the fastest string concatenation
 526                                 $last = $i+1;
 527                         }
 528                         elseif ($c1>>4 == 14) {                                                                 // 1110 xxxx, 110 prefix for 3 bytes unicode
 529                                 $ret .= substr($str, $last, $i-$last);                  // append all the regular characters we've passed
 530                                 $c2 = ord($str{++$i});                                                          // the next byte
 531                                 $c3 = ord($str{++$i});                                                          // the third byte
 532                                 $c1 &= 15;                                                                                              // remove the 4 bit three bytes prefix
 533                                 $c2 &= 63;                                                                                              // remove the 2 bit trailing byte prefix
 534                                 $c3 &= 63;                                                                                              // remove the 2 bit trailing byte prefix
 535                                 $c3 |= (($c2 & 3) << 6);                                                        // last 2 bits of c2 become first 2 of c3
 536                                 $c2 >>=2;                                                                                                       //c2 shifts 2 to the right
 537                                 $c2 |= (($c1 & 15) << 4);                                                       // last 4 bits of c1 become first 4 of c2
 538                                 $c1 >>= 4;                                                                                              // c1 shifts 4 to the right
 539                                 $ret .= '&#' . (($c1 * 0x10000) + ($c2 * 0x100) + $c3) . ';'; // this is the fastest string concatenation
 540                                 $last = $i+1;
 541                         }
 542                 }
 543                 $str=$ret . substr($str, $last, $i); // append the last batch of regular characters
 544
 545                 return $str;
 546         }
 547
 548         /**
 549         * Convert numeric HTML entities in a string to UTF-8 encoded characters
 550         * This is a native alternative to the buggy html_entity_decode() using UTF8
 551         * @param $str string input string
 552         * @return string
 553         */
 554         function html2utf($str) {
 555                 // convert named entities to numeric entities
 556                 $str = strtr($str, String::getHTMLEntities());
 557
 558                 // use PCRE-aware replace function to replace numeric entities
 559                 $str = String::regexp_replace('~&#x([0-9a-f]+);~ei', 'String::code2utf(hexdec("\\1"))', $str);
 560                 $str = String::regexp_replace('~&#([0-9]+);~e', 'String::code2utf(\\1)', $str);
 561
 562                 return $str;
 563         }
 564
 565         /**
 566         * Return an associative array of named->numeric HTML entities
 567         * Required to support HTML functions without objects in PHP4/PHP5
 568         * From php.net: function.get-html-translation-table.php
 569         * @return string
 570         */
 571         function getHTMLEntities () {
 572                 // define the conversion table
 573                 $html_entities = array(
 574                         "&Aacute;" => "&#193;", "&aacute;" => "&#225;", "&Acirc;" => "&#194;",
 575                         "&acirc;" => "&#226;",  "&acute;" => "&#180;",  "&AElig;" => "&#198;",
 576                         "&aelig;" => "&#230;",  "&Agrave;" => "&#192;", "&agrave;" => "&#224;",
 577                         "&alefsym;" => "&#8501;","&Alpha;" => "&#913;", "&alpha;" => "&#945;",
 578                         "&amp;" => "&#38;",     "&and;" => "&#8743;",   "&ang;" => "&#8736;",
 579                         "&apos;" => "&#39;",    "&Aring;" => "&#197;",  "&aring;" => "&#229;",
 580                         "&asymp;" => "&#8776;", "&Atilde;" => "&#195;", "&atilde;" => "&#227;",
 581                         "&Auml;" => "&#196;",   "&auml;" => "&#228;",   "&bdquo;" => "&#8222;",
 582                         "&Beta;" => "&#914;",   "&beta;" => "&#946;",   "&brvbar;" => "&#166;",
 583                         "&bull;" => "&#8226;",  "&cap;" => "&#8745;",   "&Ccedil;" => "&#199;",
 584                         "&ccedil;" => "&#231;", "&cedil;" => "&#184;",  "&cent;" => "&#162;",
 585                         "&Chi;" => "&#935;",    "&chi;" => "&#967;",    "&circ;" => "&#94;",
 586                         "&clubs;" => "&#9827;", "&cong;" => "&#8773;",  "&copy;" => "&#169;",
 587                         "&crarr;" => "&#8629;", "&cup;" => "&#8746;",   "&curren;" => "&#164;",
 588                         "&dagger;" => "&#8224;","&Dagger;" => "&#8225;", "&darr;" => "&#8595;",
 589                         "&dArr;" => "&#8659;",  "&deg;" => "&#176;",    "&Delta;" => "&#916;",
 590                         "&delta;" => "&#948;",  "&diams;" => "&#9830;", "&divide;" => "&#247;",
 591                         "&Eacute;" => "&#201;", "&eacute;" => "&#233;", "&Ecirc;" => "&#202;",
 592                         "&ecirc;" => "&#234;",  "&Egrave;" => "&#200;", "&egrave;" => "&#232;",
 593                         "&empty;" => "&#8709;", "&emsp;" => "&#8195;",  "&ensp;" => "&#8194;",
 594                         "&Epsilon;" => "&#917;","&epsilon;" => "&#949;","&equiv;" => "&#8801;",
 595                         "&Eta;" => "&#919;",    "&eta;" => "&#951;",    "&ETH;" => "&#208;",
 596                         "&eth;" => "&#240;",    "&Euml;" => "&#203;",   "&euml;" => "&#235;",
 597                         "&euro;" => "&#8364;",  "&exist;" => "&#8707;", "&fnof;" => "&#402;",
 598                         "&forall;" => "&#8704;","&frac12;" => "&#189;", "&frac14;" => "&#188;",
 599                         "&frac34;" => "&#190;", "&frasl;" => "&#8260;", "&Gamma;" => "&#915;",
 600                         "&gamma;" => "&#947;",  "&ge;" => "&#8805;",    "&gt;" => "&#62;",
 601                         "&harr;" => "&#8596;",  "&hArr;" => "&#8660;",  "&hearts;" => "&#9829;",
 602                         "&hellip;" => "&#8230;","&Iacute;" => "&#205;", "&iacute;" => "&#237;",
 603                         "&Icirc;" => "&#206;",  "&icirc;" => "&#238;",  "&iexcl;" => "&#161;",
 604                         "&Igrave;" => "&#204;", "&igrave;" => "&#236;", "&image;" => "&#8465;",
 605                         "&infin;" => "&#8734;", "&int;" => "&#8747;",   "&Iota;" => "&#921;",
 606                         "&iota;" => "&#953;",   "&iquest;" => "&#191;", "&isin;" => "&#8712;",
 607                         "&Iuml;" => "&#207;",   "&iuml;" => "&#239;",   "&Kappa;" => "&#922;",
 608                         "&kappa;" => "&#954;",  "&Lambda;" => "&#923;", "&lambda;" => "&#955;",
 609                         "&lang;" => "&#9001;",  "&laquo;" => "&#171;",  "&larr;" => "&#8592;",
 610                         "&lArr;" => "&#8656;",  "&lceil;" => "&#8968;",
 611                         "&ldquo;" => "&#8220;", "&le;" => "&#8804;",    "&lfloor;" => "&#8970;",
 612                         "&lowast;" => "&#8727;","&loz;" => "&#9674;",   "&lrm;" => "&#8206;",
 613                         "&lsaquo;" => "&#8249;","&lsquo;" => "&#8216;", "&lt;" => "&#60;",
 614                         "&macr;" => "&#175;",   "&mdash;" => "&#8212;", "&micro;" => "&#181;",
 615                         "&middot;" => "&#183;", "&minus;" => "&#45;",   "&Mu;" => "&#924;",
 616                         "&mu;" => "&#956;",     "&nabla;" => "&#8711;", "&nbsp;" => "&#160;",
 617                         "&ndash;" => "&#8211;", "&ne;" => "&#8800;",    "&ni;" => "&#8715;",
 618                         "&not;" => "&#172;",    "&notin;" => "&#8713;", "&nsub;" => "&#8836;",
 619                         "&Ntilde;" => "&#209;", "&ntilde;" => "&#241;", "&Nu;" => "&#925;",
 620                         "&nu;" => "&#957;",     "&Oacute;" => "&#211;", "&oacute;" => "&#243;",
 621                         "&Ocirc;" => "&#212;",  "&ocirc;" => "&#244;",  "&OElig;" => "&#338;",
 622                         "&oelig;" => "&#339;",  "&Ograve;" => "&#210;", "&ograve;" => "&#242;",
 623                         "&oline;" => "&#8254;", "&Omega;" => "&#937;",  "&omega;" => "&#969;",
 624                         "&Omicron;" => "&#927;","&omicron;" => "&#959;","&oplus;" => "&#8853;",
 625                         "&or;" => "&#8744;",    "&ordf;" => "&#170;",   "&ordm;" => "&#186;",
 626                         "&Oslash;" => "&#216;", "&oslash;" => "&#248;", "&Otilde;" => "&#213;",
 627                         "&otilde;" => "&#245;", "&otimes;" => "&#8855;","&Ouml;" => "&#214;",
 628                         "&ouml;" => "&#246;",   "&para;" => "&#182;",   "&part;" => "&#8706;",
 629                         "&permil;" => "&#8240;","&perp;" => "&#8869;",  "&Phi;" => "&#934;",
 630                         "&phi;" => "&#966;",    "&Pi;" => "&#928;",     "&pi;" => "&#960;",
 631                         "&piv;" => "&#982;",    "&plusmn;" => "&#177;", "&pound;" => "&#163;",
 632                         "&prime;" => "&#8242;", "&Prime;" => "&#8243;", "&prod;" => "&#8719;",
 633                         "&prop;" => "&#8733;",  "&Psi;" => "&#936;",    "&psi;" => "&#968;",
 634                         "&quot;" => "&#34;",    "&radic;" => "&#8730;", "&rang;" => "&#9002;",
 635                         "&raquo;" => "&#187;",  "&rarr;" => "&#8594;",  "&rArr;" => "&#8658;",
 636                         "&rceil;" => "&#8969;", "&rdquo;" => "&#8221;", "&real;" => "&#8476;",
 637                         "&reg;" => "&#174;",    "&rfloor;" => "&#8971;","&Rho;" => "&#929;",
 638                         "&rho;" => "&#961;",    "&rlm;" => "&#8207;",   "&rsaquo;" => "&#8250;",
 639                         "&rsquo;" => "&#8217;", "&sbquo;" => "&#8218;", "&Scaron;" => "&#352;",
 640                         "&scaron;" => "&#353;", "&sdot;" => "&#8901;",  "&sect;" => "&#167;",
 641                         "&shy;" => "&#173;",    "&Sigma;" => "&#931;",  "&sigma;" => "&#963;",
 642                         "&sigmaf;" => "&#962;", "&sim;" => "&#8764;",   "&spades;" => "&#9824;",
 643                         "&sub;" => "&#8834;",   "&sube;" => "&#8838;",  "&sum;" => "&#8721;",
 644                         "&sup1;" => "&#185;",   "&sup2;" => "&#178;",   "&sup3;" => "&#179;",
 645                         "&sup;" => "&#8835;",   "&supe;" => "&#8839;",  "&szlig;" => "&#223;",
 646                         "&Tau;" => "&#932;",    "&tau;" => "&#964;",    "&there4;" => "&#8756;",
 647                         "&Theta;" => "&#920;",  "&theta;" => "&#952;",  "&thetasym;" => "&#977;",
 648                         "&thinsp;" => "&#8201;","&THORN;" => "&#222;",  "&thorn;" => "&#254;",
 649                         "&tilde;" => "&#126;",  "&times;" => "&#215;",  "&trade;" => "&#8482;",
 650                         "&Uacute;" => "&#218;", "&uacute;" => "&#250;", "&uarr;" => "&#8593;",
 651                         "&uArr;" => "&#8657;",  "&Ucirc;" => "&#219;",  "&ucirc;" => "&#251;",
 652                         "&Ugrave;" => "&#217;", "&ugrave;" => "&#249;", "&uml;" => "&#168;",
 653                         "&upsih;" => "&#978;",  "&Upsilon;" => "&#933;","&upsilon;" => "&#965;",
 654                         "&Uuml;" => "&#220;",   "&uuml;" => "&#252;",   "&weierp;" => "&#8472;",
 655                         "&Xi;" => "&#926;",     "&xi;" => "&#958;",     "&Yacute;" => "&#221;",
 656                         "&yacute;" => "&#253;", "&yen;" => "&#165;",    "&yuml;" => "&#255;",
 657                         "&Yuml;" => "&#376;",   "&Zeta;" => "&#918;",   "&zeta;" => "&#950;",
 658                         "&zwj;" => "&#8205;",   "&zwnj;" => "&#8204;"
 659                 );
 660
 661                 return $html_entities;
 662         }
 663
 664         /**
 665         * Wrapper around fputcsv for systems that may or may not support it
 666         * (i.e. PHP before 5.1.0); see PHP documentation for fputcsv.
 667         */
 668         function fputcsv(&$handle, $fields = array(), $delimiter = ',', $enclosure = '"') {
 669                 // From PHP website, thanks to boefje at hotmail dot com
 670                 if (function_exists('fputcsv')) {
 671                         return fputcsv($handle, $fields, $delimiter, $enclosure);
 672                 }
 673                 $str = '';
 674                 $escape_char = '\\';
 675                 foreach ($fields as $value) {
 676                         if (    strpos($value, $delimiter) !== false ||
 677                                 strpos($value, $enclosure) !== false ||
 678                                 strpos($value, "\n") !== false ||
 679                                 strpos($value, "\r") !== false ||
 680                                 strpos($value, "\t") !== false ||
 681                                 strpos($value, ' ') !== false
 682                         ) {
 683                                 $str2 = $enclosure;
 684                                 $escaped = 0;
 685                                 $len = strlen($value);
 686                                 for ($i=0; $i<$len; $i++) {
 687                                         if ($value[$i] == $escape_char) $escaped = 1;
 688                                         elseif (!$escaped && $value[$i] == $enclosure) $str2 .= $enclosure;
 689                                         else $escaped = 0;
 690                                         $str2 .= $value[$i];
 691                                 }
 692                                 $str2 .= $enclosure;
 693                                 $str .= $str2 . $delimiter;
 694                         } else {
 695                                 $str .= $value . $delimiter;
 696                         }
 697                 }
 698                 $str = substr($str, 0, -1);
 699                 $str .= "\n";
 700                 return fwrite($handle, $str);
 701         }
 702
 703         /**
 704         * Construct a JSON string to use for AJAX communication
 705         * @param $status string The status of an event (e.g. false if form validation fails)
 706         * @param $content string The message to be delivered back to the calling script
 707         * @return string
 708         */
 709         function buildJSON($status = 'true', $content) {
 710                 return "{'status': $status, 'content': '$content'}";
 711         }
 712 }
 713
 714 ?>