inc/geshi.php

   1 <?php
   2 /**
   3  * GeSHi - Generic Syntax Highlighter
   4  *
   5  * The GeSHi class for Generic Syntax Highlighting. Please refer to the
   6  * documentation at http://qbnz.com/highlighter/documentation.php for more
   7  * information about how to use this class.
   8  *
   9  * For changes, release notes, TODOs etc, see the relevant files in the docs/
  10  * directory.
  11  *
  12  *   This file is part of GeSHi.
  13  *
  14  *  GeSHi is free software; you can redistribute it and/or modify
  15  *  it under the terms of the GNU General Public License as published by
  16  *  the Free Software Foundation; either version 2 of the License, or
  17  *  (at your option) any later version.
  18  *
  19  *  GeSHi is distributed in the hope that it will be useful,
  20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  *  GNU General Public License for more details.
  23  *
  24  *  You should have received a copy of the GNU General Public License
  25  *  along with GeSHi; if not, write to the Free Software
  26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27  *
  28  * @package    geshi
  29  * @subpackage core
  30  * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
  31  * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
  32  * @license    http://gnu.org/copyleft/gpl.html GNU GPL
  33  *
  34  */
  35
  36 //
  37 // GeSHi Constants
  38 // You should use these constant names in your programs instead of
  39 // their values - you never know when a value may change in a future
  40 // version
  41 //
  42
  43 /** The version of this GeSHi file */
  44 define('GESHI_VERSION', '1.0.8.6');
  45
  46 // Define the root directory for the GeSHi code tree
  47 if (!defined('GESHI_ROOT')) {
  48     /** The root directory for GeSHi */
  49     define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
  50 }
  51 /** The language file directory for GeSHi
  52     @access private */
  53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
  54
  55 // Define if GeSHi should be paranoid about security
  56 if (!defined('GESHI_SECURITY_PARANOID')) {
  57     /** Tells GeSHi to be paranoid about security settings */
  58     define('GESHI_SECURITY_PARANOID', false);
  59 }
  60
  61 // Line numbers - use with enable_line_numbers()
  62 /** Use no line numbers when building the result */
  63 define('GESHI_NO_LINE_NUMBERS', 0);
  64 /** Use normal line numbers when building the result */
  65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
  66 /** Use fancy line numbers when building the result */
  67 define('GESHI_FANCY_LINE_NUMBERS', 2);
  68
  69 // Container HTML type
  70 /** Use nothing to surround the source */
  71 define('GESHI_HEADER_NONE', 0);
  72 /** Use a "div" to surround the source */
  73 define('GESHI_HEADER_DIV', 1);
  74 /** Use a "pre" to surround the source */
  75 define('GESHI_HEADER_PRE', 2);
  76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
  77 define('GESHI_HEADER_PRE_VALID', 3);
  78 /**
  79  * Use a "table" to surround the source:
  80  *
  81  *  <table>
  82  *    <thead><tr><td colspan="2">$header</td></tr></thead>
  83  *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
  84  *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
  85  *  </table>
  86  *
  87  * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
  88  * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
  89  * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
  90  */
  91 define('GESHI_HEADER_PRE_TABLE', 4);
  92
  93 // Capatalisation constants
  94 /** Lowercase keywords found */
  95 define('GESHI_CAPS_NO_CHANGE', 0);
  96 /** Uppercase keywords found */
  97 define('GESHI_CAPS_UPPER', 1);
  98 /** Leave keywords found as the case that they are */
  99 define('GESHI_CAPS_LOWER', 2);
 100
 101 // Link style constants
 102 /** Links in the source in the :link state */
 103 define('GESHI_LINK', 0);
 104 /** Links in the source in the :hover state */
 105 define('GESHI_HOVER', 1);
 106 /** Links in the source in the :active state */
 107 define('GESHI_ACTIVE', 2);
 108 /** Links in the source in the :visited state */
 109 define('GESHI_VISITED', 3);
 110
 111 // Important string starter/finisher
 112 // Note that if you change these, they should be as-is: i.e., don't
 113 // write them as if they had been run through htmlentities()
 114 /** The starter for important parts of the source */
 115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
 116 /** The ender for important parts of the source */
 117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
 118
 119 /**#@+
 120  *  @access private
 121  */
 122 // When strict mode applies for a language
 123 /** Strict mode never applies (this is the most common) */
 124 define('GESHI_NEVER', 0);
 125 /** Strict mode *might* apply, and can be enabled or
 126     disabled by {@link GeSHi->enable_strict_mode()} */
 127 define('GESHI_MAYBE', 1);
 128 /** Strict mode always applies */
 129 define('GESHI_ALWAYS', 2);
 130
 131 // Advanced regexp handling constants, used in language files
 132 /** The key of the regex array defining what to search for */
 133 define('GESHI_SEARCH', 0);
 134 /** The key of the regex array defining what bracket group in a
 135     matched search to use as a replacement */
 136 define('GESHI_REPLACE', 1);
 137 /** The key of the regex array defining any modifiers to the regular expression */
 138 define('GESHI_MODIFIERS', 2);
 139 /** The key of the regex array defining what bracket group in a
 140     matched search to put before the replacement */
 141 define('GESHI_BEFORE', 3);
 142 /** The key of the regex array defining what bracket group in a
 143     matched search to put after the replacement */
 144 define('GESHI_AFTER', 4);
 145 /** The key of the regex array defining a custom keyword to use
 146     for this regexp's html tag class */
 147 define('GESHI_CLASS', 5);
 148
 149 /** Used in language files to mark comments */
 150 define('GESHI_COMMENTS', 0);
 151
 152 /** Used to work around missing PHP features **/
 153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
 154
 155 /** make sure we can call stripos **/
 156 if (!function_exists('stripos')) {
 157     // the offset param of preg_match is not supported below PHP 4.3.3
 158     if (GESHI_PHP_PRE_433) {
 159         /**
 160          * @ignore
 161          */
 162         function stripos($haystack, $needle, $offset = null) {
 163             if (!is_null($offset)) {
 164                 $haystack = substr($haystack, $offset);
 165             }
 166             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
 167                 return $match[0][1];
 168             }
 169             return false;
 170         }
 171     }
 172     else {
 173         /**
 174          * @ignore
 175          */
 176         function stripos($haystack, $needle, $offset = null) {
 177             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
 178                 return $match[0][1];
 179             }
 180             return false;
 181         }
 182     }
 183 }
 184
 185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
 186     regular expressions. Set this to false if your PCRE lib is up to date
 187     @see GeSHi->optimize_regexp_list()
 188     **/
 189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
 190 /** it's also important not to generate too long regular expressions
 191     be generous here... but keep in mind, that when reaching this limit we
 192     still have to close open patterns. 12k should do just fine on a 16k limit.
 193     @see GeSHi->optimize_regexp_list()
 194     **/
 195 define('GESHI_MAX_PCRE_LENGTH', 12288);
 196
 197 //Number format specification
 198 /** Basic number format for integers */
 199 define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
 200 /** Enhanced number format for integers like seen in C */
 201 define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
 202 /** Number format to highlight binary numbers with a suffix "b" */
 203 define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
 204 /** Number format to highlight binary numbers with a prefix % */
 205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
 206 /** Number format to highlight binary numbers with a prefix 0b (C) */
 207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
 208 /** Number format to highlight octal numbers with a leading zero */
 209 define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
 210 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
 211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
 212 /** Number format to highlight octal numbers with a suffix of o */
 213 define('GESHI_NUMBER_OCT_SUFFIX', 1024);           //[0-7]+[oO]
 214 /** Number format to highlight hex numbers with a prefix 0x */
 215 define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
 216 /** Number format to highlight hex numbers with a suffix of h */
 217 define('GESHI_NUMBER_HEX_SUFFIX', 8192);           //[0-9][0-9a-fA-F]*h
 218 /** Number format to highlight floating-point numbers without support for scientific notation */
 219 define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
 220 /** Number format to highlight floating-point numbers without support for scientific notation */
 221 define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
 222 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
 223 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
 224 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
 225 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
 226 //Custom formats are passed by RX array
 227
 228 // Error detection - use these to analyse faults
 229 /** No sourcecode to highlight was specified
 230  * @deprecated
 231  */
 232 define('GESHI_ERROR_NO_INPUT', 1);
 233 /** The language specified does not exist */
 234 define('GESHI_ERROR_NO_SUCH_LANG', 2);
 235 /** GeSHi could not open a file for reading (generally a language file) */
 236 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
 237 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
 238 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
 239 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
 240 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
 241 /**#@-*/
 242
 243
 244 /**
 245  * The GeSHi Class.
 246  *
 247  * Please refer to the documentation for GeSHi 1.0.X that is available
 248  * at http://qbnz.com/highlighter/documentation.php for more information
 249  * about how to use this class.
 250  *
 251  * @package   geshi
 252  * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
 253  * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
 254  */
 255 class GeSHi {
 256     /**#@+
 257      * @access private
 258      */
 259     /**
 260      * The source code to highlight
 261      * @var string
 262      */
 263     var $source = '';
 264
 265     /**
 266      * The language to use when highlighting
 267      * @var string
 268      */
 269     var $language = '';
 270
 271     /**
 272      * The data for the language used
 273      * @var array
 274      */
 275     var $language_data = array();
 276
 277     /**
 278      * The path to the language files
 279      * @var string
 280      */
 281     var $language_path = GESHI_LANG_ROOT;
 282
 283     /**
 284      * The error message associated with an error
 285      * @var string
 286      * @todo check err reporting works
 287      */
 288     var $error = false;
 289
 290     /**
 291      * Possible error messages
 292      * @var array
 293      */
 294     var $error_messages = array(
 295         GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
 296         GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
 297         GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
 298         GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
 299     );
 300
 301     /**
 302      * Whether highlighting is strict or not
 303      * @var boolean
 304      */
 305     var $strict_mode = false;
 306
 307     /**
 308      * Whether to use CSS classes in output
 309      * @var boolean
 310      */
 311     var $use_classes = false;
 312
 313     /**
 314      * The type of header to use. Can be one of the following
 315      * values:
 316      *
 317      * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
 318      * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
 319      * - GESHI_HEADER_NONE: No header is outputted.
 320      *
 321      * @var int
 322      */
 323     var $header_type = GESHI_HEADER_PRE;
 324
 325     /**
 326      * Array of permissions for which lexics should be highlighted
 327      * @var array
 328      */
 329     var $lexic_permissions = array(
 330         'KEYWORDS' =>    array(),
 331         'COMMENTS' =>    array('MULTI' => true),
 332         'REGEXPS' =>     array(),
 333         'ESCAPE_CHAR' => true,
 334         'BRACKETS' =>    true,
 335         'SYMBOLS' =>     false,
 336         'STRINGS' =>     true,
 337         'NUMBERS' =>     true,
 338         'METHODS' =>     true,
 339         'SCRIPT' =>      true
 340     );
 341
 342     /**
 343      * The time it took to parse the code
 344      * @var double
 345      */
 346     var $time = 0;
 347
 348     /**
 349      * The content of the header block
 350      * @var string
 351      */
 352     var $header_content = '';
 353
 354     /**
 355      * The content of the footer block
 356      * @var string
 357      */
 358     var $footer_content = '';
 359
 360     /**
 361      * The style of the header block
 362      * @var string
 363      */
 364     var $header_content_style = '';
 365
 366     /**
 367      * The style of the footer block
 368      * @var string
 369      */
 370     var $footer_content_style = '';
 371
 372     /**
 373      * Tells if a block around the highlighted source should be forced
 374      * if not using line numbering
 375      * @var boolean
 376      */
 377     var $force_code_block = false;
 378
 379     /**
 380      * The styles for hyperlinks in the code
 381      * @var array
 382      */
 383     var $link_styles = array();
 384
 385     /**
 386      * Whether important blocks should be recognised or not
 387      * @var boolean
 388      * @deprecated
 389      * @todo REMOVE THIS FUNCTIONALITY!
 390      */
 391     var $enable_important_blocks = false;
 392
 393     /**
 394      * Styles for important parts of the code
 395      * @var string
 396      * @deprecated
 397      * @todo As above - rethink the whole idea of important blocks as it is buggy and
 398      * will be hard to implement in 1.2
 399      */
 400     var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
 401
 402     /**
 403      * Whether CSS IDs should be added to the code
 404      * @var boolean
 405      */
 406     var $add_ids = false;
 407
 408     /**
 409      * Lines that should be highlighted extra
 410      * @var array
 411      */
 412     var $highlight_extra_lines = array();
 413
 414     /**
 415      * Styles of lines that should be highlighted extra
 416      * @var array
 417      */
 418     var $highlight_extra_lines_styles = array();
 419
 420     /**
 421      * Styles of extra-highlighted lines
 422      * @var string
 423      */
 424     var $highlight_extra_lines_style = 'background-color: #ffc;';
 425
 426     /**
 427      * The line ending
 428      * If null, nl2br() will be used on the result string.
 429      * Otherwise, all instances of \n will be replaced with $line_ending
 430      * @var string
 431      */
 432     var $line_ending = null;
 433
 434     /**
 435      * Number at which line numbers should start at
 436      * @var int
 437      */
 438     var $line_numbers_start = 1;
 439
 440     /**
 441      * The overall style for this code block
 442      * @var string
 443      */
 444     var $overall_style = 'font-family:monospace;';
 445
 446     /**
 447      *  The style for the actual code
 448      * @var string
 449      */
 450     var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
 451
 452     /**
 453      * The overall class for this code block
 454      * @var string
 455      */
 456     var $overall_class = '';
 457
 458     /**
 459      * The overall ID for this code block
 460      * @var string
 461      */
 462     var $overall_id = '';
 463
 464     /**
 465      * Line number styles
 466      * @var string
 467      */
 468     var $line_style1 = 'font-weight: normal; vertical-align:top;';
 469
 470     /**
 471      * Line number styles for fancy lines
 472      * @var string
 473      */
 474     var $line_style2 = 'font-weight: bold; vertical-align:top;';
 475
 476     /**
 477      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
 478      * @var string
 479      */
 480     var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
 481
 482     /**
 483      * Flag for how line numbers are displayed
 484      * @var boolean
 485      */
 486     var $line_numbers = GESHI_NO_LINE_NUMBERS;
 487
 488     /**
 489      * Flag to decide if multi line spans are allowed. Set it to false to make sure
 490      * each tag is closed before and reopened after each linefeed.
 491      * @var boolean
 492      */
 493     var $allow_multiline_span = true;
 494
 495     /**
 496      * The "nth" value for fancy line highlighting
 497      * @var int
 498      */
 499     var $line_nth_row = 0;
 500
 501     /**
 502      * The size of tab stops
 503      * @var int
 504      */
 505     var $tab_width = 8;
 506
 507     /**
 508      * Should we use language-defined tab stop widths?
 509      * @var int
 510      */
 511     var $use_language_tab_width = false;
 512
 513     /**
 514      * Default target for keyword links
 515      * @var string
 516      */
 517     var $link_target = '';
 518
 519     /**
 520      * The encoding to use for entity encoding
 521      * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
 522      * @var string
 523      */
 524     var $encoding = 'utf-8';
 525
 526     /**
 527      * Should keywords be linked?
 528      * @var boolean
 529      */
 530     var $keyword_links = true;
 531
 532     /**
 533      * Currently loaded language file
 534      * @var string
 535      * @since 1.0.7.22
 536      */
 537     var $loaded_language = '';
 538
 539     /**
 540      * Wether the caches needed for parsing are built or not
 541      *
 542      * @var bool
 543      * @since 1.0.8
 544      */
 545     var $parse_cache_built = false;
 546
 547     /**
 548      * Work around for Suhosin Patch with disabled /e modifier
 549      *
 550      * Note from suhosins author in config file:
 551      * <blockquote>
 552      *   The /e modifier inside <code>preg_replace()</code> allows code execution.
 553      *   Often it is the cause for remote code execution exploits. It is wise to
 554      *   deactivate this feature and test where in the application it is used.
 555      *   The developer using the /e modifier should be made aware that he should
 556      *   use <code>preg_replace_callback()</code> instead
 557      * </blockquote>
 558      *
 559      * @var array
 560      * @since 1.0.8
 561      */
 562     var $_kw_replace_group = 0;
 563     var $_rx_key = 0;
 564
 565     /**
 566      * some "callback parameters" for handle_multiline_regexps
 567      *
 568      * @since 1.0.8
 569      * @access private
 570      * @var string
 571      */
 572     var $_hmr_before = '';
 573     var $_hmr_replace = '';
 574     var $_hmr_after = '';
 575     var $_hmr_key = 0;
 576
 577     /**#@-*/
 578
 579     /**
 580      * Creates a new GeSHi object, with source and language
 581      *
 582      * @param string The source code to highlight
 583      * @param string The language to highlight the source with
 584      * @param string The path to the language file directory. <b>This
 585      *               is deprecated!</b> I've backported the auto path
 586      *               detection from the 1.1.X dev branch, so now it
 587      *               should be automatically set correctly. If you have
 588      *               renamed the language directory however, you will
 589      *               still need to set the path using this parameter or
 590      *               {@link GeSHi->set_language_path()}
 591      * @since 1.0.0
 592      */
 593     function GeSHi($source = '', $language = '', $path = '') {
 594         if (!empty($source)) {
 595             $this->set_source($source);
 596         }
 597         if (!empty($language)) {
 598             $this->set_language($language);
 599         }
 600         $this->set_language_path($path);
 601     }
 602
 603     /**
 604      * Returns an error message associated with the last GeSHi operation,
 605      * or false if no error has occured
 606      *
 607      * @return string|false An error message if there has been an error, else false
 608      * @since  1.0.0
 609      */
 610     function error() {
 611         if ($this->error) {
 612             //Put some template variables for debugging here ...
 613             $debug_tpl_vars = array(
 614                 '{LANGUAGE}' => $this->language,
 615                 '{PATH}' => $this->language_path
 616             );
 617             $msg = str_replace(
 618                 array_keys($debug_tpl_vars),
 619                 array_values($debug_tpl_vars),
 620                 $this->error_messages[$this->error]);
 621
 622             return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
 623         }
 624         return false;
 625     }
 626
 627     /**
 628      * Gets a human-readable language name (thanks to Simon Patterson
 629      * for the idea :))
 630      *
 631      * @return string The name for the current language
 632      * @since  1.0.2
 633      */
 634     function get_language_name() {
 635         if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
 636             return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
 637         }
 638         return $this->language_data['LANG_NAME'];
 639     }
 640
 641     /**
 642      * Sets the source code for this object
 643      *
 644      * @param string The source code to highlight
 645      * @since 1.0.0
 646      */
 647     function set_source($source) {
 648         $this->source = $source;
 649         $this->highlight_extra_lines = array();
 650     }
 651
 652     /**
 653      * Sets the language for this object
 654      *
 655      * @note since 1.0.8 this function won't reset language-settings by default anymore!
 656      *       if you need this set $force_reset = true
 657      *
 658      * @param string The name of the language to use
 659      * @since 1.0.0
 660      */
 661     function set_language($language, $force_reset = false) {
 662         if ($force_reset) {
 663             $this->loaded_language = false;
 664         }
 665
 666         //Clean up the language name to prevent malicious code injection
 667         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 668
 669         $language = strtolower($language);
 670
 671         //Retreive the full filename
 672         $file_name = $this->language_path . $language . '.php';
 673         if ($file_name == $this->loaded_language) {
 674             // this language is already loaded!
 675             return;
 676         }
 677
 678         $this->language = $language;
 679
 680         $this->error = false;
 681         $this->strict_mode = GESHI_NEVER;
 682
 683         //Check if we can read the desired file
 684         if (!is_readable($file_name)) {
 685             $this->error = GESHI_ERROR_NO_SUCH_LANG;
 686             return;
 687         }
 688
 689         // Load the language for parsing
 690         $this->load_language($file_name);
 691     }
 692
 693     /**
 694      * Sets the path to the directory containing the language files. Note
 695      * that this path is relative to the directory of the script that included
 696      * geshi.php, NOT geshi.php itself.
 697      *
 698      * @param string The path to the language directory
 699      * @since 1.0.0
 700      * @deprecated The path to the language files should now be automatically
 701      *             detected, so this method should no longer be needed. The
 702      *             1.1.X branch handles manual setting of the path differently
 703      *             so this method will disappear in 1.2.0.
 704      */
 705     function set_language_path($path) {
 706         if(strpos($path,':')) {
 707             //Security Fix to prevent external directories using fopen wrappers.
 708             if(DIRECTORY_SEPARATOR == "\\") {
 709                 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
 710                     return;
 711                 }
 712             } else {
 713                 return;
 714             }
 715         }
 716         if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
 717             //Security Fix to prevent external directories using fopen wrappers.
 718             return;
 719         }
 720         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
 721             //Security Fix to prevent external directories using fopen wrappers.
 722             return;
 723         }
 724         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
 725             //Security Fix to prevent external directories using fopen wrappers.
 726             return;
 727         }
 728         if ($path) {
 729             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
 730             $this->set_language($this->language); // otherwise set_language_path has no effect
 731         }
 732     }
 733
 734     /**
 735      * Sets the type of header to be used.
 736      *
 737      * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
 738      * means more source code but more control over tab width and line-wrapping.
 739      * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
 740      * control. Default is GESHI_HEADER_PRE.
 741      *
 742      * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
 743      * should be outputted.
 744      *
 745      * @param int The type of header to be used
 746      * @since 1.0.0
 747      */
 748     function set_header_type($type) {
 749         //Check if we got a valid header type
 750         if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
 751             GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
 752             $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
 753             return;
 754         }
 755
 756         //Set that new header type
 757         $this->header_type = $type;
 758     }
 759
 760     /**
 761      * Sets the styles for the code that will be outputted
 762      * when this object is parsed. The style should be a
 763      * string of valid stylesheet declarations
 764      *
 765      * @param string  The overall style for the outputted code block
 766      * @param boolean Whether to merge the styles with the current styles or not
 767      * @since 1.0.0
 768      */
 769     function set_overall_style($style, $preserve_defaults = false) {
 770         if (!$preserve_defaults) {
 771             $this->overall_style = $style;
 772         } else {
 773             $this->overall_style .= $style;
 774         }
 775     }
 776
 777     /**
 778      * Sets the overall classname for this block of code. This
 779      * class can then be used in a stylesheet to style this object's
 780      * output
 781      *
 782      * @param string The class name to use for this block of code
 783      * @since 1.0.0
 784      */
 785     function set_overall_class($class) {
 786         $this->overall_class = $class;
 787     }
 788
 789     /**
 790      * Sets the overall id for this block of code. This id can then
 791      * be used in a stylesheet to style this object's output
 792      *
 793      * @param string The ID to use for this block of code
 794      * @since 1.0.0
 795      */
 796     function set_overall_id($id) {
 797         $this->overall_id = $id;
 798     }
 799
 800     /**
 801      * Sets whether CSS classes should be used to highlight the source. Default
 802      * is off, calling this method with no arguments will turn it on
 803      *
 804      * @param boolean Whether to turn classes on or not
 805      * @since 1.0.0
 806      */
 807     function enable_classes($flag = true) {
 808         $this->use_classes = ($flag) ? true : false;
 809     }
 810
 811     /**
 812      * Sets the style for the actual code. This should be a string
 813      * containing valid stylesheet declarations. If $preserve_defaults is
 814      * true, then styles are merged with the default styles, with the
 815      * user defined styles having priority
 816      *
 817      * Note: Use this method to override any style changes you made to
 818      * the line numbers if you are using line numbers, else the line of
 819      * code will have the same style as the line number! Consult the
 820      * GeSHi documentation for more information about this.
 821      *
 822      * @param string  The style to use for actual code
 823      * @param boolean Whether to merge the current styles with the new styles
 824      * @since 1.0.2
 825      */
 826     function set_code_style($style, $preserve_defaults = false) {
 827         if (!$preserve_defaults) {
 828             $this->code_style = $style;
 829         } else {
 830             $this->code_style .= $style;
 831         }
 832     }
 833
 834     /**
 835      * Sets the styles for the line numbers.
 836      *
 837      * @param string The style for the line numbers that are "normal"
 838      * @param string|boolean If a string, this is the style of the line
 839      *        numbers that are "fancy", otherwise if boolean then this
 840      *        defines whether the normal styles should be merged with the
 841      *        new normal styles or not
 842      * @param boolean If set, is the flag for whether to merge the "fancy"
 843      *        styles with the current styles or not
 844      * @since 1.0.2
 845      */
 846     function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
 847         //Check if we got 2 or three parameters
 848         if (is_bool($style2)) {
 849             $preserve_defaults = $style2;
 850             $style2 = '';
 851         }
 852
 853         //Actually set the new styles
 854         if (!$preserve_defaults) {
 855             $this->line_style1 = $style1;
 856             $this->line_style2 = $style2;
 857         } else {
 858             $this->line_style1 .= $style1;
 859             $this->line_style2 .= $style2;
 860         }
 861     }
 862
 863     /**
 864      * Sets whether line numbers should be displayed.
 865      *
 866      * Valid values for the first parameter are:
 867      *
 868      *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
 869      *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
 870      *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
 871      *
 872      * For fancy line numbers, the second parameter is used to signal which lines
 873      * are to be fancy. For example, if the value of this parameter is 5 then every
 874      * 5th line will be fancy.
 875      *
 876      * @param int How line numbers should be displayed
 877      * @param int Defines which lines are fancy
 878      * @since 1.0.0
 879      */
 880     function enable_line_numbers($flag, $nth_row = 5) {
 881         if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
 882             && GESHI_FANCY_LINE_NUMBERS != $flag) {
 883             $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
 884         }
 885         $this->line_numbers = $flag;
 886         $this->line_nth_row = $nth_row;
 887     }
 888
 889     /**
 890      * Sets wether spans and other HTML markup generated by GeSHi can
 891      * span over multiple lines or not. Defaults to true to reduce overhead.
 892      * Set it to false if you want to manipulate the output or manually display
 893      * the code in an ordered list.
 894      *
 895      * @param boolean Wether multiline spans are allowed or not
 896      * @since 1.0.7.22
 897      */
 898     function enable_multiline_span($flag) {
 899         $this->allow_multiline_span = (bool) $flag;
 900     }
 901
 902     /**
 903      * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
 904      *
 905      * @see enable_multiline_span
 906      * @return bool
 907      */
 908     function get_multiline_span() {
 909         return $this->allow_multiline_span;
 910     }
 911
 912     /**
 913      * Sets the style for a keyword group. If $preserve_defaults is
 914      * true, then styles are merged with the default styles, with the
 915      * user defined styles having priority
 916      *
 917      * @param int     The key of the keyword group to change the styles of
 918      * @param string  The style to make the keywords
 919      * @param boolean Whether to merge the new styles with the old or just
 920      *                to overwrite them
 921      * @since 1.0.0
 922      */
 923     function set_keyword_group_style($key, $style, $preserve_defaults = false) {
 924         //Set the style for this keyword group
 925         if (!$preserve_defaults) {
 926             $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
 927         } else {
 928             $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
 929         }
 930
 931         //Update the lexic permissions
 932         if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
 933             $this->lexic_permissions['KEYWORDS'][$key] = true;
 934         }
 935     }
 936
 937     /**
 938      * Turns highlighting on/off for a keyword group
 939      *
 940      * @param int     The key of the keyword group to turn on or off
 941      * @param boolean Whether to turn highlighting for that group on or off
 942      * @since 1.0.0
 943      */
 944     function set_keyword_group_highlighting($key, $flag = true) {
 945         $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
 946     }
 947
 948     /**
 949      * Sets the styles for comment groups.  If $preserve_defaults is
 950      * true, then styles are merged with the default styles, with the
 951      * user defined styles having priority
 952      *
 953      * @param int     The key of the comment group to change the styles of
 954      * @param string  The style to make the comments
 955      * @param boolean Whether to merge the new styles with the old or just
 956      *                to overwrite them
 957      * @since 1.0.0
 958      */
 959     function set_comments_style($key, $style, $preserve_defaults = false) {
 960         if (!$preserve_defaults) {
 961             $this->language_data['STYLES']['COMMENTS'][$key] = $style;
 962         } else {
 963             $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
 964         }
 965     }
 966
 967     /**
 968      * Turns highlighting on/off for comment groups
 969      *
 970      * @param int     The key of the comment group to turn on or off
 971      * @param boolean Whether to turn highlighting for that group on or off
 972      * @since 1.0.0
 973      */
 974     function set_comments_highlighting($key, $flag = true) {
 975         $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
 976     }
 977
 978     /**
 979      * Sets the styles for escaped characters. If $preserve_defaults is
 980      * true, then styles are merged with the default styles, with the
 981      * user defined styles having priority
 982      *
 983      * @param string  The style to make the escape characters
 984      * @param boolean Whether to merge the new styles with the old or just
 985      *                to overwrite them
 986      * @since 1.0.0
 987      */
 988     function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
 989         if (!$preserve_defaults) {
 990             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
 991         } else {
 992             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
 993         }
 994     }
 995
 996     /**
 997      * Turns highlighting on/off for escaped characters
 998      *
 999      * @param boolean Whether to turn highlighting for escape characters on or off
1000      * @since 1.0.0
1001      */
1002     function set_escape_characters_highlighting($flag = true) {
1003         $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1004     }
1005
1006     /**
1007      * Sets the styles for brackets. If $preserve_defaults is
1008      * true, then styles are merged with the default styles, with the
1009      * user defined styles having priority
1010      *
1011      * This method is DEPRECATED: use set_symbols_style instead.
1012      * This method will be removed in 1.2.X
1013      *
1014      * @param string  The style to make the brackets
1015      * @param boolean Whether to merge the new styles with the old or just
1016      *                to overwrite them
1017      * @since 1.0.0
1018      * @deprecated In favour of set_symbols_style
1019      */
1020     function set_brackets_style($style, $preserve_defaults = false) {
1021         if (!$preserve_defaults) {
1022             $this->language_data['STYLES']['BRACKETS'][0] = $style;
1023         } else {
1024             $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1025         }
1026     }
1027
1028     /**
1029      * Turns highlighting on/off for brackets
1030      *
1031      * This method is DEPRECATED: use set_symbols_highlighting instead.
1032      * This method will be remove in 1.2.X
1033      *
1034      * @param boolean Whether to turn highlighting for brackets on or off
1035      * @since 1.0.0
1036      * @deprecated In favour of set_symbols_highlighting
1037      */
1038     function set_brackets_highlighting($flag) {
1039         $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1040     }
1041
1042     /**
1043      * Sets the styles for symbols. If $preserve_defaults is
1044      * true, then styles are merged with the default styles, with the
1045      * user defined styles having priority
1046      *
1047      * @param string  The style to make the symbols
1048      * @param boolean Whether to merge the new styles with the old or just
1049      *                to overwrite them
1050      * @param int     Tells the group of symbols for which style should be set.
1051      * @since 1.0.1
1052      */
1053     function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1054         // Update the style of symbols
1055         if (!$preserve_defaults) {
1056             $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1057         } else {
1058             $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1059         }
1060
1061         // For backward compatibility
1062         if (0 == $group) {
1063             $this->set_brackets_style ($style, $preserve_defaults);
1064         }
1065     }
1066
1067     /**
1068      * Turns highlighting on/off for symbols
1069      *
1070      * @param boolean Whether to turn highlighting for symbols on or off
1071      * @since 1.0.0
1072      */
1073     function set_symbols_highlighting($flag) {
1074         // Update lexic permissions for this symbol group
1075         $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1076
1077         // For backward compatibility
1078         $this->set_brackets_highlighting ($flag);
1079     }
1080
1081     /**
1082      * Sets the styles for strings. If $preserve_defaults is
1083      * true, then styles are merged with the default styles, with the
1084      * user defined styles having priority
1085      *
1086      * @param string  The style to make the escape characters
1087      * @param boolean Whether to merge the new styles with the old or just
1088      *                to overwrite them
1089      * @param int     Tells the group of strings for which style should be set.
1090      * @since 1.0.0
1091      */
1092     function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1093         if (!$preserve_defaults) {
1094             $this->language_data['STYLES']['STRINGS'][$group] = $style;
1095         } else {
1096             $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1097         }
1098     }
1099
1100     /**
1101      * Turns highlighting on/off for strings
1102      *
1103      * @param boolean Whether to turn highlighting for strings on or off
1104      * @since 1.0.0
1105      */
1106     function set_strings_highlighting($flag) {
1107         $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1108     }
1109
1110     /**
1111      * Sets the styles for strict code blocks. If $preserve_defaults is
1112      * true, then styles are merged with the default styles, with the
1113      * user defined styles having priority
1114      *
1115      * @param string  The style to make the script blocks
1116      * @param boolean Whether to merge the new styles with the old or just
1117      *                to overwrite them
1118      * @param int     Tells the group of script blocks for which style should be set.
1119      * @since 1.0.8.4
1120      */
1121     function set_script_style($style, $preserve_defaults = false, $group = 0) {
1122         // Update the style of symbols
1123         if (!$preserve_defaults) {
1124             $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1125         } else {
1126             $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1127         }
1128     }
1129
1130     /**
1131      * Sets the styles for numbers. If $preserve_defaults is
1132      * true, then styles are merged with the default styles, with the
1133      * user defined styles having priority
1134      *
1135      * @param string  The style to make the numbers
1136      * @param boolean Whether to merge the new styles with the old or just
1137      *                to overwrite them
1138      * @param int     Tells the group of numbers for which style should be set.
1139      * @since 1.0.0
1140      */
1141     function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1142         if (!$preserve_defaults) {
1143             $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1144         } else {
1145             $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1146         }
1147     }
1148
1149     /**
1150      * Turns highlighting on/off for numbers
1151      *
1152      * @param boolean Whether to turn highlighting for numbers on or off
1153      * @since 1.0.0
1154      */
1155     function set_numbers_highlighting($flag) {
1156         $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1157     }
1158
1159     /**
1160      * Sets the styles for methods. $key is a number that references the
1161      * appropriate "object splitter" - see the language file for the language
1162      * you are highlighting to get this number. If $preserve_defaults is
1163      * true, then styles are merged with the default styles, with the
1164      * user defined styles having priority
1165      *
1166      * @param int     The key of the object splitter to change the styles of
1167      * @param string  The style to make the methods
1168      * @param boolean Whether to merge the new styles with the old or just
1169      *                to overwrite them
1170      * @since 1.0.0
1171      */
1172     function set_methods_style($key, $style, $preserve_defaults = false) {
1173         if (!$preserve_defaults) {
1174             $this->language_data['STYLES']['METHODS'][$key] = $style;
1175         } else {
1176             $this->language_data['STYLES']['METHODS'][$key] .= $style;
1177         }
1178     }
1179
1180     /**
1181      * Turns highlighting on/off for methods
1182      *
1183      * @param boolean Whether to turn highlighting for methods on or off
1184      * @since 1.0.0
1185      */
1186     function set_methods_highlighting($flag) {
1187         $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1188     }
1189
1190     /**
1191      * Sets the styles for regexps. If $preserve_defaults is
1192      * true, then styles are merged with the default styles, with the
1193      * user defined styles having priority
1194      *
1195      * @param string  The style to make the regular expression matches
1196      * @param boolean Whether to merge the new styles with the old or just
1197      *                to overwrite them
1198      * @since 1.0.0
1199      */
1200     function set_regexps_style($key, $style, $preserve_defaults = false) {
1201         if (!$preserve_defaults) {
1202             $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1203         } else {
1204             $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1205         }
1206     }
1207
1208     /**
1209      * Turns highlighting on/off for regexps
1210      *
1211      * @param int     The key of the regular expression group to turn on or off
1212      * @param boolean Whether to turn highlighting for the regular expression group on or off
1213      * @since 1.0.0
1214      */
1215     function set_regexps_highlighting($key, $flag) {
1216         $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1217     }
1218
1219     /**
1220      * Sets whether a set of keywords are checked for in a case sensitive manner
1221      *
1222      * @param int The key of the keyword group to change the case sensitivity of
1223      * @param boolean Whether to check in a case sensitive manner or not
1224      * @since 1.0.0
1225      */
1226     function set_case_sensitivity($key, $case) {
1227         $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1228     }
1229
1230     /**
1231      * Sets the case that keywords should use when found. Use the constants:
1232      *
1233      *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1234      *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1235      *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1236      *
1237      * @param int A constant specifying what to do with matched keywords
1238      * @since 1.0.1
1239      */
1240     function set_case_keywords($case) {
1241         if (in_array($case, array(
1242             GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1243             $this->language_data['CASE_KEYWORDS'] = $case;
1244         }
1245     }
1246
1247     /**
1248      * Sets how many spaces a tab is substituted for
1249      *
1250      * Widths below zero are ignored
1251      *
1252      * @param int The tab width
1253      * @since 1.0.0
1254      */
1255     function set_tab_width($width) {
1256         $this->tab_width = intval($width);
1257
1258         //Check if it fit's the constraints:
1259         if ($this->tab_width < 1) {
1260             //Return it to the default
1261             $this->tab_width = 8;
1262         }
1263     }
1264
1265     /**
1266      * Sets whether or not to use tab-stop width specifed by language
1267      *
1268      * @param boolean Whether to use language-specific tab-stop widths
1269      * @since 1.0.7.20
1270      */
1271     function set_use_language_tab_width($use) {
1272         $this->use_language_tab_width = (bool) $use;
1273     }
1274
1275     /**
1276      * Returns the tab width to use, based on the current language and user
1277      * preference
1278      *
1279      * @return int Tab width
1280      * @since 1.0.7.20
1281      */
1282     function get_real_tab_width() {
1283         if (!$this->use_language_tab_width ||
1284             !isset($this->language_data['TAB_WIDTH'])) {
1285             return $this->tab_width;
1286         } else {
1287             return $this->language_data['TAB_WIDTH'];
1288         }
1289     }
1290
1291     /**
1292      * Enables/disables strict highlighting. Default is off, calling this
1293      * method without parameters will turn it on. See documentation
1294      * for more details on strict mode and where to use it.
1295      *
1296      * @param boolean Whether to enable strict mode or not
1297      * @since 1.0.0
1298      */
1299     function enable_strict_mode($mode = true) {
1300         if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1301             $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1302         }
1303     }
1304
1305     /**
1306      * Disables all highlighting
1307      *
1308      * @since 1.0.0
1309      * @todo  Rewrite with array traversal
1310      * @deprecated In favour of enable_highlighting
1311      */
1312     function disable_highlighting() {
1313         $this->enable_highlighting(false);
1314     }
1315
1316     /**
1317      * Enables all highlighting
1318      *
1319      * The optional flag parameter was added in version 1.0.7.21 and can be used
1320      * to enable (true) or disable (false) all highlighting.
1321      *
1322      * @since 1.0.0
1323      * @param boolean A flag specifying whether to enable or disable all highlighting
1324      * @todo  Rewrite with array traversal
1325      */
1326     function enable_highlighting($flag = true) {
1327         $flag = $flag ? true : false;
1328         foreach ($this->lexic_permissions as $key => $value) {
1329             if (is_array($value)) {
1330                 foreach ($value as $k => $v) {
1331                     $this->lexic_permissions[$key][$k] = $flag;
1332                 }
1333             } else {
1334                 $this->lexic_permissions[$key] = $flag;
1335             }
1336         }
1337
1338         // Context blocks
1339         $this->enable_important_blocks = $flag;
1340     }
1341
1342     /**
1343      * Given a file extension, this method returns either a valid geshi language
1344      * name, or the empty string if it couldn't be found
1345      *
1346      * @param string The extension to get a language name for
1347      * @param array  A lookup array to use instead of the default one
1348      * @since 1.0.5
1349      * @todo Re-think about how this method works (maybe make it private and/or make it
1350      *       a extension->lang lookup?)
1351      * @todo static?
1352      */
1353     function get_language_name_from_extension( $extension, $lookup = array() ) {
1354         if ( !is_array($lookup) || empty($lookup)) {
1355             $lookup = array(
1356                 'abap' => array('abap'),
1357                 'actionscript' => array('as'),
1358                 'ada' => array('a', 'ada', 'adb', 'ads'),
1359                 'apache' => array('conf'),
1360                 'asm' => array('ash', 'asm', 'inc'),
1361                 'asp' => array('asp'),
1362                 'bash' => array('sh'),
1363                 'bf' => array('bf'),
1364                 'c' => array('c', 'h'),
1365                 'c_mac' => array('c', 'h'),
1366                 'caddcl' => array(),
1367                 'cadlisp' => array(),
1368                 'cdfg' => array('cdfg'),
1369                 'cobol' => array('cbl'),
1370                 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1371                 'csharp' => array('cs'),
1372                 'css' => array('css'),
1373                 'd' => array('d'),
1374                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1375                 'diff' => array('diff', 'patch'),
1376                 'dos' => array('bat', 'cmd'),
1377                 'gdb' => array('kcrash', 'crash', 'bt'),
1378                 'gettext' => array('po', 'pot'),
1379                 'gml' => array('gml'),
1380                 'gnuplot' => array('plt'),
1381                 'groovy' => array('groovy'),
1382                 'haskell' => array('hs'),
1383                 'html4strict' => array('html', 'htm'),
1384                 'ini' => array('ini', 'desktop'),
1385                 'java' => array('java'),
1386                 'javascript' => array('js'),
1387                 'klonec' => array('kl1'),
1388                 'klonecpp' => array('klx'),
1389                 'latex' => array('tex'),
1390                 'lisp' => array('lisp'),
1391                 'lua' => array('lua'),
1392                 'matlab' => array('m'),
1393                 'mpasm' => array(),
1394                 'mysql' => array('sql'),
1395                 'nsis' => array(),
1396                 'objc' => array(),
1397                 'oobas' => array(),
1398                 'oracle8' => array(),
1399                 'oracle10' => array(),
1400                 'pascal' => array('pas'),
1401                 'perl' => array('pl', 'pm'),
1402                 'php' => array('php', 'php5', 'phtml', 'phps'),
1403                 'povray' => array('pov'),
1404                 'providex' => array('pvc', 'pvx'),
1405                 'prolog' => array('pl'),
1406                 'python' => array('py'),
1407                 'qbasic' => array('bi'),
1408                 'reg' => array('reg'),
1409                 'ruby' => array('rb'),
1410                 'sas' => array('sas'),
1411                 'scala' => array('scala'),
1412                 'scheme' => array('scm'),
1413                 'scilab' => array('sci'),
1414                 'smalltalk' => array('st'),
1415                 'smarty' => array(),
1416                 'tcl' => array('tcl'),
1417                 'vb' => array('bas'),
1418                 'vbnet' => array(),
1419                 'visualfoxpro' => array(),
1420                 'whitespace' => array('ws'),
1421                 'xml' => array('xml', 'svg', 'xrc'),
1422                 'z80' => array('z80', 'asm', 'inc')
1423             );
1424         }
1425
1426         foreach ($lookup as $lang => $extensions) {
1427             if (in_array($extension, $extensions)) {
1428                 return $lang;
1429             }
1430         }
1431         return '';
1432     }
1433
1434     /**
1435      * Given a file name, this method loads its contents in, and attempts
1436      * to set the language automatically. An optional lookup table can be
1437      * passed for looking up the language name. If not specified a default
1438      * table is used
1439      *
1440      * The language table is in the form
1441      * <pre>array(
1442      *   'lang_name' => array('extension', 'extension', ...),
1443      *   'lang_name' ...
1444      * );</pre>
1445      *
1446      * @param string The filename to load the source from
1447      * @param array  A lookup array to use instead of the default one
1448      * @todo Complete rethink of this and above method
1449      * @since 1.0.5
1450      */
1451     function load_from_file($file_name, $lookup = array()) {
1452         if (is_readable($file_name)) {
1453             $this->set_source(file_get_contents($file_name));
1454             $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1455         } else {
1456             $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1457         }
1458     }
1459
1460     /**
1461      * Adds a keyword to a keyword group for highlighting
1462      *
1463      * @param int    The key of the keyword group to add the keyword to
1464      * @param string The word to add to the keyword group
1465      * @since 1.0.0
1466      */
1467     function add_keyword($key, $word) {
1468         if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1469             $this->language_data['KEYWORDS'][$key][] = $word;
1470
1471             //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1472             if ($this->parse_cache_built) {
1473                 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1474                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1475             }
1476         }
1477     }
1478
1479     /**
1480      * Removes a keyword from a keyword group
1481      *
1482      * @param int    The key of the keyword group to remove the keyword from
1483      * @param string The word to remove from the keyword group
1484      * @param bool   Wether to automatically recompile the optimized regexp list or not.
1485      *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1486      *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1487      *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1488      *               it might be too expensive to recompile the regexp list for every removal if you want to
1489      *               remove a lot of keywords.
1490      * @since 1.0.0
1491      */
1492     function remove_keyword($key, $word, $recompile = true) {
1493         $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1494         if ($key_to_remove !== false) {
1495             unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1496
1497             //NEW in 1.0.8, optionally recompile keyword group
1498             if ($recompile && $this->parse_cache_built) {
1499                 $this->optimize_keyword_group($key);
1500             }
1501         }
1502     }
1503
1504     /**
1505      * Creates a new keyword group
1506      *
1507      * @param int    The key of the keyword group to create
1508      * @param string The styles for the keyword group
1509      * @param boolean Whether the keyword group is case sensitive ornot
1510      * @param array  The words to use for the keyword group
1511      * @since 1.0.0
1512      */
1513     function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1514         $words = (array) $words;
1515         if  (empty($words)) {
1516             // empty word lists mess up highlighting
1517             return false;
1518         }
1519
1520         //Add the new keyword group internally
1521         $this->language_data['KEYWORDS'][$key] = $words;
1522         $this->lexic_permissions['KEYWORDS'][$key] = true;
1523         $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1524         $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1525
1526         //NEW in 1.0.8, cache keyword regexp
1527         if ($this->parse_cache_built) {
1528             $this->optimize_keyword_group($key);
1529         }
1530     }
1531
1532     /**
1533      * Removes a keyword group
1534      *
1535      * @param int    The key of the keyword group to remove
1536      * @since 1.0.0
1537      */
1538     function remove_keyword_group ($key) {
1539         //Remove the keyword group internally
1540         unset($this->language_data['KEYWORDS'][$key]);
1541         unset($this->lexic_permissions['KEYWORDS'][$key]);
1542         unset($this->language_data['CASE_SENSITIVE'][$key]);
1543         unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1544
1545         //NEW in 1.0.8
1546         unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1547     }
1548
1549     /**
1550      * compile optimized regexp list for keyword group
1551      *
1552      * @param int   The key of the keyword group to compile & optimize
1553      * @since 1.0.8
1554      */
1555     function optimize_keyword_group($key) {
1556         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1557             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1558         $space_as_whitespace = false;
1559         if(isset($this->language_data['PARSER_CONTROL'])) {
1560             if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1561                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1562                     $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1563                 }
1564                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1565                     if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1566                         $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1567                     }
1568                 }
1569             }
1570         }
1571         if($space_as_whitespace) {
1572             foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1573                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1574                     str_replace(" ", "\\s+", $rxv);
1575             }
1576         }
1577     }
1578
1579     /**
1580      * Sets the content of the header block
1581      *
1582      * @param string The content of the header block
1583      * @since 1.0.2
1584      */
1585     function set_header_content($content) {
1586         $this->header_content = $content;
1587     }
1588
1589     /**
1590      * Sets the content of the footer block
1591      *
1592      * @param string The content of the footer block
1593      * @since 1.0.2
1594      */
1595     function set_footer_content($content) {
1596         $this->footer_content = $content;
1597     }
1598
1599     /**
1600      * Sets the style for the header content
1601      *
1602      * @param string The style for the header content
1603      * @since 1.0.2
1604      */
1605     function set_header_content_style($style) {
1606         $this->header_content_style = $style;
1607     }
1608
1609     /**
1610      * Sets the style for the footer content
1611      *
1612      * @param string The style for the footer content
1613      * @since 1.0.2
1614      */
1615     function set_footer_content_style($style) {
1616         $this->footer_content_style = $style;
1617     }
1618
1619     /**
1620      * Sets whether to force a surrounding block around
1621      * the highlighted code or not
1622      *
1623      * @param boolean Tells whether to enable or disable this feature
1624      * @since 1.0.7.20
1625      */
1626     function enable_inner_code_block($flag) {
1627         $this->force_code_block = (bool)$flag;
1628     }
1629
1630     /**
1631      * Sets the base URL to be used for keywords
1632      *
1633      * @param int The key of the keyword group to set the URL for
1634      * @param string The URL to set for the group. If {FNAME} is in
1635      *               the url somewhere, it is replaced by the keyword
1636      *               that the URL is being made for
1637      * @since 1.0.2
1638      */
1639     function set_url_for_keyword_group($group, $url) {
1640         $this->language_data['URLS'][$group] = $url;
1641     }
1642
1643     /**
1644      * Sets styles for links in code
1645      *
1646      * @param int A constant that specifies what state the style is being
1647      *            set for - e.g. :hover or :visited
1648      * @param string The styles to use for that state
1649      * @since 1.0.2
1650      */
1651     function set_link_styles($type, $styles) {
1652         $this->link_styles[$type] = $styles;
1653     }
1654
1655     /**
1656      * Sets the target for links in code
1657      *
1658      * @param string The target for links in the code, e.g. _blank
1659      * @since 1.0.3
1660      */
1661     function set_link_target($target) {
1662         if (!$target) {
1663             $this->link_target = '';
1664         } else {
1665             $this->link_target = ' target="' . $target . '"';
1666         }
1667     }
1668
1669     /**
1670      * Sets styles for important parts of the code
1671      *
1672      * @param string The styles to use on important parts of the code
1673      * @since 1.0.2
1674      */
1675     function set_important_styles($styles) {
1676         $this->important_styles = $styles;
1677     }
1678
1679     /**
1680      * Sets whether context-important blocks are highlighted
1681      *
1682      * @param boolean Tells whether to enable or disable highlighting of important blocks
1683      * @todo REMOVE THIS SHIZ FROM GESHI!
1684      * @deprecated
1685      * @since 1.0.2
1686      */
1687     function enable_important_blocks($flag) {
1688         $this->enable_important_blocks = ( $flag ) ? true : false;
1689     }
1690
1691     /**
1692      * Whether CSS IDs should be added to each line
1693      *
1694      * @param boolean If true, IDs will be added to each line.
1695      * @since 1.0.2
1696      */
1697     function enable_ids($flag = true) {
1698         $this->add_ids = ($flag) ? true : false;
1699     }
1700
1701     /**
1702      * Specifies which lines to highlight extra
1703      *
1704      * The extra style parameter was added in 1.0.7.21.
1705      *
1706      * @param mixed An array of line numbers to highlight, or just a line
1707      *              number on its own.
1708      * @param string A string specifying the style to use for this line.
1709      *              If null is specified, the default style is used.
1710      *              If false is specified, the line will be removed from
1711      *              special highlighting
1712      * @since 1.0.2
1713      * @todo  Some data replication here that could be cut down on
1714      */
1715     function highlight_lines_extra($lines, $style = null) {
1716         if (is_array($lines)) {
1717             //Split up the job using single lines at a time
1718             foreach ($lines as $line) {
1719                 $this->highlight_lines_extra($line, $style);
1720             }
1721         } else {
1722             //Mark the line as being highlighted specially
1723             $lines = intval($lines);
1724             $this->highlight_extra_lines[$lines] = $lines;
1725
1726             //Decide on which style to use
1727             if ($style === null) { //Check if we should use default style
1728                 unset($this->highlight_extra_lines_styles[$lines]);
1729             } else if ($style === false) { //Check if to remove this line
1730                 unset($this->highlight_extra_lines[$lines]);
1731                 unset($this->highlight_extra_lines_styles[$lines]);
1732             } else {
1733                 $this->highlight_extra_lines_styles[$lines] = $style;
1734             }
1735         }
1736     }
1737
1738     /**
1739      * Sets the style for extra-highlighted lines
1740      *
1741      * @param string The style for extra-highlighted lines
1742      * @since 1.0.2
1743      */
1744     function set_highlight_lines_extra_style($styles) {
1745         $this->highlight_extra_lines_style = $styles;
1746     }
1747
1748     /**
1749      * Sets the line-ending
1750      *
1751      * @param string The new line-ending
1752      * @since 1.0.2
1753      */
1754     function set_line_ending($line_ending) {
1755         $this->line_ending = (string)$line_ending;
1756     }
1757
1758     /**
1759      * Sets what number line numbers should start at. Should
1760      * be a positive integer, and will be converted to one.
1761      *
1762      * <b>Warning:</b> Using this method will add the "start"
1763      * attribute to the &lt;ol&gt; that is used for line numbering.
1764      * This is <b>not</b> valid XHTML strict, so if that's what you
1765      * care about then don't use this method. Firefox is getting
1766      * support for the CSS method of doing this in 1.1 and Opera
1767      * has support for the CSS method, but (of course) IE doesn't
1768      * so it's not worth doing it the CSS way yet.
1769      *
1770      * @param int The number to start line numbers at
1771      * @since 1.0.2
1772      */
1773     function start_line_numbers_at($number) {
1774         $this->line_numbers_start = abs(intval($number));
1775     }
1776
1777     /**
1778      * Sets the encoding used for htmlspecialchars(), for international
1779      * support.
1780      *
1781      * NOTE: This is not needed for now because htmlspecialchars() is not
1782      * being used (it has a security hole in PHP4 that has not been patched).
1783      * Maybe in a future version it may make a return for speed reasons, but
1784      * I doubt it.
1785      *
1786      * @param string The encoding to use for the source
1787      * @since 1.0.3
1788      */
1789     function set_encoding($encoding) {
1790         if ($encoding) {
1791           $this->encoding = strtolower($encoding);
1792         }
1793     }
1794
1795     /**
1796      * Turns linking of keywords on or off.
1797      *
1798      * @param boolean If true, links will be added to keywords
1799      * @since 1.0.2
1800      */
1801     function enable_keyword_links($enable = true) {
1802         $this->keyword_links = (bool) $enable;
1803     }
1804
1805     /**
1806      * Setup caches needed for styling. This is automatically called in
1807      * parse_code() and get_stylesheet() when appropriate. This function helps
1808      * stylesheet generators as they rely on some style information being
1809      * preprocessed
1810      *
1811      * @since 1.0.8
1812      * @access private
1813      */
1814     function build_style_cache() {
1815         //Build the style cache needed to highlight numbers appropriate
1816         if($this->lexic_permissions['NUMBERS']) {
1817             //First check what way highlighting information for numbers are given
1818             if(!isset($this->language_data['NUMBERS'])) {
1819                 $this->language_data['NUMBERS'] = 0;
1820             }
1821
1822             if(is_array($this->language_data['NUMBERS'])) {
1823                 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1824             } else {
1825                 $this->language_data['NUMBERS_CACHE'] = array();
1826                 if(!$this->language_data['NUMBERS']) {
1827                     $this->language_data['NUMBERS'] =
1828                         GESHI_NUMBER_INT_BASIC |
1829                         GESHI_NUMBER_FLT_NONSCI;
1830                 }
1831
1832                 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1833                     //Rearrange style indices if required ...
1834                     if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1835                         $this->language_data['STYLES']['NUMBERS'][$i] =
1836                             $this->language_data['STYLES']['NUMBERS'][1<<$i];
1837                         unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1838                     }
1839
1840                     //Check if this bit is set for highlighting
1841                     if($j&1) {
1842                         //So this bit is set ...
1843                         //Check if it belongs to group 0 or the actual stylegroup
1844                         if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1845                             $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1846                         } else {
1847                             if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1848                                 $this->language_data['NUMBERS_CACHE'][0] = 0;
1849                             }
1850                             $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1851                         }
1852                     }
1853                 }
1854             }
1855         }
1856     }
1857
1858     /**
1859      * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1860      * This function makes stylesheet generators much faster as they do not need these caches.
1861      *
1862      * @since 1.0.8
1863      * @access private
1864      */
1865     function build_parse_cache() {
1866         // cache symbol regexp
1867         //As this is a costy operation, we avoid doing it for multiple groups ...
1868         //Instead we perform it for all symbols at once.
1869         //
1870         //For this to work, we need to reorganize the data arrays.
1871         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1872             $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1873
1874             $this->language_data['SYMBOL_DATA'] = array();
1875             $symbol_preg_multi = array(); // multi char symbols
1876             $symbol_preg_single = array(); // single char symbols
1877             foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1878                 if (is_array($symbols)) {
1879                     foreach ($symbols as $sym) {
1880                         $sym = $this->hsc($sym);
1881                         if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1882                             $this->language_data['SYMBOL_DATA'][$sym] = $key;
1883                             if (isset($sym[1])) { // multiple chars
1884                                 $symbol_preg_multi[] = preg_quote($sym, '/');
1885                             } else { // single char
1886                                 if ($sym == '-') {
1887                                     // don't trigger range out of order error
1888                                     $symbol_preg_single[] = '\-';
1889                                 } else {
1890                                     $symbol_preg_single[] = preg_quote($sym, '/');
1891                                 }
1892                             }
1893                         }
1894                     }
1895                 } else {
1896                     $symbols = $this->hsc($symbols);
1897                     if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1898                         $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1899                         if (isset($symbols[1])) { // multiple chars
1900                             $symbol_preg_multi[] = preg_quote($symbols, '/');
1901                         } else if ($symbols == '-') {
1902                             // don't trigger range out of order error
1903                             $symbol_preg_single[] = '\-';
1904                         } else { // single char
1905                             $symbol_preg_single[] = preg_quote($symbols, '/');
1906                         }
1907                     }
1908                 }
1909             }
1910
1911             //Now we have an array with each possible symbol as the key and the style as the actual data.
1912             //This way we can set the correct style just the moment we highlight ...
1913             //
1914             //Now we need to rewrite our array to get a search string that
1915             $symbol_preg = array();
1916             if (!empty($symbol_preg_multi)) {
1917                 rsort($symbol_preg_multi);
1918                 $symbol_preg[] = implode('|', $symbol_preg_multi);
1919             }
1920             if (!empty($symbol_preg_single)) {
1921                 rsort($symbol_preg_single);
1922                 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1923             }
1924             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1925         }
1926
1927         // cache optimized regexp for keyword matching
1928         // remove old cache
1929         $this->language_data['CACHED_KEYWORD_LISTS'] = array();
1930         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1931             if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
1932                     $this->lexic_permissions['KEYWORDS'][$key]) {
1933                 $this->optimize_keyword_group($key);
1934             }
1935         }
1936
1937         // brackets
1938         if ($this->lexic_permissions['BRACKETS']) {
1939             $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1940             if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1941                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1942                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
1943                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
1944                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
1945                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
1946                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
1947                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
1948                 );
1949             }
1950             else {
1951                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1952                     '<| class="br0">&#91;|>',
1953                     '<| class="br0">&#93;|>',
1954                     '<| class="br0">&#40;|>',
1955                     '<| class="br0">&#41;|>',
1956                     '<| class="br0">&#123;|>',
1957                     '<| class="br0">&#125;|>',
1958                 );
1959             }
1960         }
1961
1962         //Build the parse cache needed to highlight numbers appropriate
1963         if($this->lexic_permissions['NUMBERS']) {
1964             //Check if the style rearrangements have been processed ...
1965             //This also does some preprocessing to check which style groups are useable ...
1966             if(!isset($this->language_data['NUMBERS_CACHE'])) {
1967                 $this->build_style_cache();
1968             }
1969
1970             //Number format specification
1971             //All this formats are matched case-insensitively!
1972             static $numbers_format = array(
1973                 GESHI_NUMBER_INT_BASIC =>
1974                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1975                 GESHI_NUMBER_INT_CSTYLE =>
1976                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1977                 GESHI_NUMBER_BIN_SUFFIX =>
1978                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1979                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1980                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1981                 GESHI_NUMBER_BIN_PREFIX_0B =>
1982                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1983                 GESHI_NUMBER_OCT_PREFIX =>
1984                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1985                 GESHI_NUMBER_OCT_PREFIX_0O =>
1986                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1987                 GESHI_NUMBER_OCT_SUFFIX =>
1988                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1989                 GESHI_NUMBER_HEX_PREFIX =>
1990                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1991                 GESHI_NUMBER_HEX_SUFFIX =>
1992                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1993                 GESHI_NUMBER_FLT_NONSCI =>
1994                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1995                 GESHI_NUMBER_FLT_NONSCI_F =>
1996                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1997                 GESHI_NUMBER_FLT_SCI_SHORT =>
1998                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1999                 GESHI_NUMBER_FLT_SCI_ZERO =>
2000                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2001                 );
2002
2003             //At this step we have an associative array with flag groups for a
2004             //specific style or an string denoting a regexp given its index.
2005             $this->language_data['NUMBERS_RXCACHE'] = array();
2006             foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2007                 if(is_string($rxdata)) {
2008                     $regexp = $rxdata;
2009                 } else {
2010                     //This is a bitfield of number flags to highlight:
2011                     //Build an array, implode them together and make this the actual RX
2012                     $rxuse = array();
2013                     for($i = 1; $i <= $rxdata; $i<<=1) {
2014                         if($rxdata & $i) {
2015                             $rxuse[] = $numbers_format[$i];
2016                         }
2017                     }
2018                     $regexp = implode("|", $rxuse);
2019                 }
2020
2021                 $this->language_data['NUMBERS_RXCACHE'][$key] =
2022                     "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|[^\<])+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2023             }
2024         }
2025
2026         $this->parse_cache_built = true;
2027     }
2028
2029     /**
2030      * Returns the code in $this->source, highlighted and surrounded by the
2031      * nessecary HTML.
2032      *
2033      * This should only be called ONCE, cos it's SLOW! If you want to highlight
2034      * the same source multiple times, you're better off doing a whole lot of
2035      * str_replaces to replace the &lt;span&gt;s
2036      *
2037      * @since 1.0.0
2038      */
2039     function parse_code () {
2040         // Start the timer
2041         $start_time = microtime();
2042
2043         // Replace all newlines to a common form.
2044         $code = str_replace("\r\n", "\n", $this->source);
2045         $code = str_replace("\r", "\n", $code);
2046
2047         // Firstly, if there is an error, we won't highlight
2048         if ($this->error) {
2049             //Escape the source for output
2050             $result = $this->hsc($this->source);
2051
2052             //This fix is related to SF#1923020, but has to be applied regardless of
2053             //actually highlighting symbols.
2054             $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2055
2056             // Timing is irrelevant
2057             $this->set_time($start_time, $start_time);
2058             $this->finalise($result);
2059             return $result;
2060         }
2061
2062         // make sure the parse cache is up2date
2063         if (!$this->parse_cache_built) {
2064             $this->build_parse_cache();
2065         }
2066
2067         // Initialise various stuff
2068         $length           = strlen($code);
2069         $COMMENT_MATCHED  = false;
2070         $stuff_to_parse   = '';
2071         $endresult        = '';
2072
2073         // "Important" selections are handled like multiline comments
2074         // @todo GET RID OF THIS SHIZ
2075         if ($this->enable_important_blocks) {
2076             $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2077         }
2078
2079         if ($this->strict_mode) {
2080             // Break the source into bits. Each bit will be a portion of the code
2081             // within script delimiters - for example, HTML between < and >
2082             $k = 0;
2083             $parts = array();
2084             $matches = array();
2085             $next_match_pointer = null;
2086             // we use a copy to unset delimiters on demand (when they are not found)
2087             $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2088             $i = 0;
2089             while ($i < $length) {
2090                 $next_match_pos = $length + 1; // never true
2091                 foreach ($delim_copy as $dk => $delimiters) {
2092                     if(is_array($delimiters)) {
2093                         foreach ($delimiters as $open => $close) {
2094                             // make sure the cache is setup properly
2095                             if (!isset($matches[$dk][$open])) {
2096                                 $matches[$dk][$open] = array(
2097                                     'next_match' => -1,
2098                                     'dk' => $dk,
2099
2100                                     'open' => $open, // needed for grouping of adjacent code blocks (see below)
2101                                     'open_strlen' => strlen($open),
2102
2103                                     'close' => $close,
2104                                     'close_strlen' => strlen($close),
2105                                 );
2106                             }
2107                             // Get the next little bit for this opening string
2108                             if ($matches[$dk][$open]['next_match'] < $i) {
2109                                 // only find the next pos if it was not already cached
2110                                 $open_pos = strpos($code, $open, $i);
2111                                 if ($open_pos === false) {
2112                                     // no match for this delimiter ever
2113                                     unset($delim_copy[$dk][$open]);
2114                                     continue;
2115                                 }
2116                                 $matches[$dk][$open]['next_match'] = $open_pos;
2117                             }
2118                             if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2119                                 //So we got a new match, update the close_pos
2120                                 $matches[$dk][$open]['close_pos'] =
2121                                     strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2122
2123                                 $next_match_pointer =& $matches[$dk][$open];
2124                                 $next_match_pos = $matches[$dk][$open]['next_match'];
2125                             }
2126                         }
2127                     } else {
2128                         //So we should match an RegExp as Strict Block ...
2129                         /**
2130                          * The value in $delimiters is expected to be an RegExp
2131                          * containing exactly 2 matching groups:
2132                          *  - Group 1 is the opener
2133                          *  - Group 2 is the closer
2134                          */
2135                         if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2136                             preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2137                             //We got a match ...
2138                             if(isset($matches_rx['start']) && isset($matches_rx['end']))
2139                             {
2140                                 $matches[$dk] = array(
2141                                     'next_match' => $matches_rx['start'][1],
2142                                     'dk' => $dk,
2143
2144                                     'close_strlen' => strlen($matches_rx['end'][0]),
2145                                     'close_pos' => $matches_rx['end'][1],
2146                                     );
2147                             } else {
2148                                 $matches[$dk] = array(
2149                                     'next_match' => $matches_rx[1][1],
2150                                     'dk' => $dk,
2151
2152                                     'close_strlen' => strlen($matches_rx[2][0]),
2153                                     'close_pos' => $matches_rx[2][1],
2154                                     );
2155                             }
2156                         } else {
2157                             // no match for this delimiter ever
2158                             unset($delim_copy[$dk]);
2159                             continue;
2160                         }
2161
2162                         if ($matches[$dk]['next_match'] <= $next_match_pos) {
2163                             $next_match_pointer =& $matches[$dk];
2164                             $next_match_pos = $matches[$dk]['next_match'];
2165                         }
2166                     }
2167                 }
2168
2169                 // non-highlightable text
2170                 $parts[$k] = array(
2171                     1 => substr($code, $i, $next_match_pos - $i)
2172                 );
2173                 ++$k;
2174
2175                 if ($next_match_pos > $length) {
2176                     // out of bounds means no next match was found
2177                     break;
2178                 }
2179
2180                 // highlightable code
2181                 $parts[$k][0] = $next_match_pointer['dk'];
2182
2183                 //Only combine for non-rx script blocks
2184                 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2185                     // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2186                     $i = $next_match_pos + $next_match_pointer['open_strlen'];
2187                     while (true) {
2188                         $close_pos = strpos($code, $next_match_pointer['close'], $i);
2189                         if ($close_pos == false) {
2190                             break;
2191                         }
2192                         $i = $close_pos + $next_match_pointer['close_strlen'];
2193                         if ($i == $length) {
2194                             break;
2195                         }
2196                         if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2197                             substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2198                             // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2199                             foreach ($matches as $submatches) {
2200                                 foreach ($submatches as $match) {
2201                                     if ($match['next_match'] == $i) {
2202                                         // a different block already matches here!
2203                                         break 3;
2204                                     }
2205                                 }
2206                             }
2207                         } else {
2208                             break;
2209                         }
2210                     }
2211                 } else {
2212                     $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2213                     $i = $close_pos;
2214                 }
2215
2216                 if ($close_pos === false) {
2217                     // no closing delimiter found!
2218                     $parts[$k][1] = substr($code, $next_match_pos);
2219                     ++$k;
2220                     break;
2221                 } else {
2222                     $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2223                     ++$k;
2224                 }
2225             }
2226             unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2227             $num_parts = $k;
2228
2229             if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2230                 // when we have only one part, we don't have anything to highlight at all.
2231                 // if we have a "maybe" strict language, this should be handled as highlightable code
2232                 $parts = array(
2233                     0 => array(
2234                         0 => '',
2235                         1 => ''
2236                     ),
2237                     1 => array(
2238                         0 => null,
2239                         1 => $parts[0][1]
2240                     )
2241                 );
2242                 $num_parts = 2;
2243             }
2244
2245         } else {
2246             // Not strict mode - simply dump the source into
2247             // the array at index 1 (the first highlightable block)
2248             $parts = array(
2249                 0 => array(
2250                     0 => '',
2251                     1 => ''
2252                 ),
2253                 1 => array(
2254                     0 => null,
2255                     1 => $code
2256                 )
2257             );
2258             $num_parts = 2;
2259         }
2260
2261         //Unset variables we won't need any longer
2262         unset($code);
2263
2264         //Preload some repeatedly used values regarding hardquotes ...
2265         $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2266         $hq_strlen = strlen($hq);
2267
2268         //Preload if line numbers are to be generated afterwards
2269         //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2270         $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2271             !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2272
2273         //preload the escape char for faster checking ...
2274         $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2275
2276         // this is used for single-line comments
2277         $sc_disallowed_before = "";
2278         $sc_disallowed_after = "";
2279
2280         if (isset($this->language_data['PARSER_CONTROL'])) {
2281             if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2282                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2283                     $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2284                 }
2285                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2286                     $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2287                 }
2288             }
2289         }
2290
2291         //Fix for SF#1932083: Multichar Quotemarks unsupported
2292         $is_string_starter = array();
2293         if ($this->lexic_permissions['STRINGS']) {
2294             foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2295                 if (!isset($is_string_starter[$quotemark[0]])) {
2296                     $is_string_starter[$quotemark[0]] = (string)$quotemark;
2297                 } else if (is_string($is_string_starter[$quotemark[0]])) {
2298                     $is_string_starter[$quotemark[0]] = array(
2299                         $is_string_starter[$quotemark[0]],
2300                         $quotemark);
2301                 } else {
2302                     $is_string_starter[$quotemark[0]][] = $quotemark;
2303                 }
2304             }
2305         }
2306
2307         // Now we go through each part. We know that even-indexed parts are
2308         // code that shouldn't be highlighted, and odd-indexed parts should
2309         // be highlighted
2310         for ($key = 0; $key < $num_parts; ++$key) {
2311             $STRICTATTRS = '';
2312
2313             // If this block should be highlighted...
2314             if (!($key & 1)) {
2315                 // Else not a block to highlight
2316                 $endresult .= $this->hsc($parts[$key][1]);
2317                 unset($parts[$key]);
2318                 continue;
2319             }
2320
2321             $result = '';
2322             $part = $parts[$key][1];
2323
2324             $highlight_part = true;
2325             if ($this->strict_mode && !is_null($parts[$key][0])) {
2326                 // get the class key for this block of code
2327                 $script_key = $parts[$key][0];
2328                 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2329                 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2330                     $this->lexic_permissions['SCRIPT']) {
2331                     // Add a span element around the source to
2332                     // highlight the overall source block
2333                     if (!$this->use_classes &&
2334                         $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2335                         $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2336                     } else {
2337                         $attributes = ' class="sc' . $script_key . '"';
2338                     }
2339                     $result .= "<span$attributes>";
2340                     $STRICTATTRS = $attributes;
2341                 }
2342             }
2343
2344             if ($highlight_part) {
2345                 // Now, highlight the code in this block. This code
2346                 // is really the engine of GeSHi (along with the method
2347                 // parse_non_string_part).
2348
2349                 // cache comment regexps incrementally
2350                 $next_comment_regexp_key = '';
2351                 $next_comment_regexp_pos = -1;
2352                 $next_comment_multi_pos = -1;
2353                 $next_comment_single_pos = -1;
2354                 $comment_regexp_cache_per_key = array();
2355                 $comment_multi_cache_per_key = array();
2356                 $comment_single_cache_per_key = array();
2357                 $next_open_comment_multi = '';
2358                 $next_comment_single_key = '';
2359                 $escape_regexp_cache_per_key = array();
2360                 $next_escape_regexp_key = '';
2361                 $next_escape_regexp_pos = -1;
2362
2363                 $length = strlen($part);
2364                 for ($i = 0; $i < $length; ++$i) {
2365                     // Get the next char
2366                     $char = $part[$i];
2367                     $char_len = 1;
2368
2369                     // update regexp comment cache if needed
2370                     if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2371                         $next_comment_regexp_pos = $length;
2372                         foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2373                             $match_i = false;
2374                             if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2375                                 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2376                                  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2377                                 // we have already matched something
2378                                 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2379                                     // this comment is never matched
2380                                     continue;
2381                                 }
2382                                 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2383                             } else if (
2384                                 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2385                                 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2386                                 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2387                                 ) {
2388                                 $match_i = $match[0][1];
2389                                 if (GESHI_PHP_PRE_433) {
2390                                     $match_i += $i;
2391                                 }
2392
2393                                 $comment_regexp_cache_per_key[$comment_key] = array(
2394                                     'key' => $comment_key,
2395                                     'length' => strlen($match[0][0]),
2396                                     'pos' => $match_i
2397                                 );
2398                             } else {
2399                                 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2400                                 continue;
2401                             }
2402
2403                             if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2404                                 $next_comment_regexp_pos = $match_i;
2405                                 $next_comment_regexp_key = $comment_key;
2406                                 if ($match_i === $i) {
2407                                     break;
2408                                 }
2409                             }
2410                         }
2411                     }
2412
2413                     $string_started = false;
2414
2415                     if (isset($is_string_starter[$char])) {
2416                         // Possibly the start of a new string ...
2417
2418                         //Check which starter it was ...
2419                         //Fix for SF#1932083: Multichar Quotemarks unsupported
2420                         if (is_array($is_string_starter[$char])) {
2421                             $char_new = '';
2422                             foreach ($is_string_starter[$char] as $testchar) {
2423                                 if ($testchar === substr($part, $i, strlen($testchar)) &&
2424                                     strlen($testchar) > strlen($char_new)) {
2425                                     $char_new = $testchar;
2426                                     $string_started = true;
2427                                 }
2428                             }
2429                             if ($string_started) {
2430                                 $char = $char_new;
2431                             }
2432                         } else {
2433                             $testchar = $is_string_starter[$char];
2434                             if ($testchar === substr($part, $i, strlen($testchar))) {
2435                                 $char = $testchar;
2436                                 $string_started = true;
2437                             }
2438                         }
2439                         $char_len = strlen($char);
2440                     }
2441
2442                     if ($string_started && ($i != $next_comment_regexp_pos)) {
2443                         // Hand out the correct style information for this string
2444                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2445                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2446                             !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2447                             $string_key = 0;
2448                         }
2449
2450                         // parse the stuff before this
2451                         $result .= $this->parse_non_string_part($stuff_to_parse);
2452                         $stuff_to_parse = '';
2453
2454                         if (!$this->use_classes) {
2455                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2456                         } else {
2457                             $string_attributes = ' class="st'.$string_key.'"';
2458                         }
2459
2460                         // now handle the string
2461                         $string = "<span$string_attributes>" . GeSHi::hsc($char);
2462                         $start = $i + $char_len;
2463                         $string_open = true;
2464
2465                         if(empty($this->language_data['ESCAPE_REGEXP'])) {
2466                             $next_escape_regexp_pos = $length;
2467                         }
2468
2469                         do {
2470                             //Get the regular ending pos ...
2471                             $close_pos = strpos($part, $char, $start);
2472                             if(false === $close_pos) {
2473                                 $close_pos = $length;
2474                             }
2475
2476                             if($this->lexic_permissions['ESCAPE_CHAR']) {
2477                                 // update escape regexp cache if needed
2478                                 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2479                                     $next_escape_regexp_pos = $length;
2480                                     foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2481                                         $match_i = false;
2482                                         if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2483                                             ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2484                                              $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2485                                             // we have already matched something
2486                                             if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2487                                                 // this comment is never matched
2488                                                 continue;
2489                                             }
2490                                             $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2491                                         } else if (
2492                                             //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2493                                             (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2494                                             (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2495                                             ) {
2496                                             $match_i = $match[0][1];
2497                                             if (GESHI_PHP_PRE_433) {
2498                                                 $match_i += $start;
2499                                             }
2500
2501                                             $escape_regexp_cache_per_key[$escape_key] = array(
2502                                                 'key' => $escape_key,
2503                                                 'length' => strlen($match[0][0]),
2504                                                 'pos' => $match_i
2505                                             );
2506                                         } else {
2507                                             $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2508                                             continue;
2509                                         }
2510
2511                                         if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2512                                             $next_escape_regexp_pos = $match_i;
2513                                             $next_escape_regexp_key = $escape_key;
2514                                             if ($match_i === $start) {
2515                                                 break;
2516                                             }
2517                                         }
2518                                     }
2519                                 }
2520
2521                                 //Find the next simple escape position
2522                                 if('' != $this->language_data['ESCAPE_CHAR']) {
2523                                     $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2524                                     if(false === $simple_escape) {
2525                                         $simple_escape = $length;
2526                                     }
2527                                 } else {
2528                                     $simple_escape = $length;
2529                                 }
2530                             } else {
2531                                 $next_escape_regexp_pos = $length;
2532                                 $simple_escape = $length;
2533                             }
2534
2535                             if($simple_escape < $next_escape_regexp_pos &&
2536                                 $simple_escape < $length &&
2537                                 $simple_escape < $close_pos) {
2538                                 //The nexxt escape sequence is a simple one ...
2539                                 $es_pos = $simple_escape;
2540
2541                                 //Add the stuff not in the string yet ...
2542                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2543
2544                                 //Get the style for this escaped char ...
2545                                 if (!$this->use_classes) {
2546                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2547                                 } else {
2548                                     $escape_char_attributes = ' class="es0"';
2549                                 }
2550
2551                                 //Add the style for the escape char ...
2552                                 $string .= "<span$escape_char_attributes>" .
2553                                     GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2554
2555                                 //Get the byte AFTER the ESCAPE_CHAR we just found
2556                                 $es_char = $part[$es_pos + 1];
2557                                 if ($es_char == "\n") {
2558                                     // don't put a newline around newlines
2559                                     $string .= "</span>\n";
2560                                     $start = $es_pos + 2;
2561                                 } else if (ord($es_char) >= 128) {
2562                                     //This is an non-ASCII char (UTF8 or single byte)
2563                                     //This code tries to work around SF#2037598 ...
2564                                     if(function_exists('mb_substr')) {
2565                                         $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2566                                         $string .= $es_char_m . '</span>';
2567                                     } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2568                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2569                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
2570                                             "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2571                                             "|\xED[\x80-\x9F][\x80-\xBF]".
2572                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2573                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
2574                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2575                                             $part, $es_char_m, null, $es_pos + 1)) {
2576                                             $es_char_m = $es_char_m[0];
2577                                         } else {
2578                                             $es_char_m = $es_char;
2579                                         }
2580                                         $string .= $this->hsc($es_char_m) . '</span>';
2581                                     } else {
2582                                         $es_char_m = $this->hsc($es_char);
2583                                     }
2584                                     $start = $es_pos + strlen($es_char_m) + 1;
2585                                 } else {
2586                                     $string .= $this->hsc($es_char) . '</span>';
2587                                     $start = $es_pos + 2;
2588                                 }
2589                             } else if ($next_escape_regexp_pos < $length &&
2590                                 $next_escape_regexp_pos < $close_pos) {
2591                                 $es_pos = $next_escape_regexp_pos;
2592                                 //Add the stuff not in the string yet ...
2593                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2594
2595                                 //Get the key and length of this match ...
2596                                 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2597                                 $escape_str = substr($part, $es_pos, $escape['length']);
2598                                 $escape_key = $escape['key'];
2599
2600                                 //Get the style for this escaped char ...
2601                                 if (!$this->use_classes) {
2602                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2603                                 } else {
2604                                     $escape_char_attributes = ' class="es' . $escape_key . '"';
2605                                 }
2606
2607                                 //Add the style for the escape char ...
2608                                 $string .= "<span$escape_char_attributes>" .
2609                                     $this->hsc($escape_str) . '</span>';
2610
2611                                 $start = $es_pos + $escape['length'];
2612                             } else {
2613                                 //Copy the remainder of the string ...
2614                                 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2615                                 $start = $close_pos + $char_len;
2616                                 $string_open = false;
2617                             }
2618                         } while($string_open);
2619
2620                         if ($check_linenumbers) {
2621                             // Are line numbers used? If, we should end the string before
2622                             // the newline and begin it again (so when <li>s are put in the source
2623                             // remains XHTML compliant)
2624                             // note to self: This opens up possibility of config files specifying
2625                             // that languages can/cannot have multiline strings???
2626                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2627                         }
2628
2629                         $result .= $string;
2630                         $string = '';
2631                         $i = $start - 1;
2632                         continue;
2633                     } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2634                         substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2635                         // The start of a hard quoted string
2636                         if (!$this->use_classes) {
2637                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2638                             $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2639                         } else {
2640                             $string_attributes = ' class="st_h"';
2641                             $escape_char_attributes = ' class="es_h"';
2642                         }
2643                         // parse the stuff before this
2644                         $result .= $this->parse_non_string_part($stuff_to_parse);
2645                         $stuff_to_parse = '';
2646
2647                         // now handle the string
2648                         $string = '';
2649
2650                         // look for closing quote
2651                         $start = $i + $hq_strlen;
2652                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2653                             $start = $close_pos + 1;
2654                             if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR']) {
2655                                 // make sure this quote is not escaped
2656                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2657                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2658                                         // check wether this quote is escaped or if it is something like '\\'
2659                                         $escape_char_pos = $close_pos - 1;
2660                                         while ($escape_char_pos > 0
2661                                                 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2662                                             --$escape_char_pos;
2663                                         }
2664                                         if (($close_pos - $escape_char_pos) & 1) {
2665                                             // uneven number of escape chars => this quote is escaped
2666                                             continue 2;
2667                                         }
2668                                     }
2669                                 }
2670                             }
2671
2672                             // found closing quote
2673                             break;
2674                         }
2675
2676                         //Found the closing delimiter?
2677                         if (!$close_pos) {
2678                             // span till the end of this $part when no closing delimiter is found
2679                             $close_pos = $length;
2680                         }
2681
2682                         //Get the actual string
2683                         $string = substr($part, $i, $close_pos - $i + 1);
2684                         $i = $close_pos;
2685
2686                         // handle escape chars and encode html chars
2687                         // (special because when we have escape chars within our string they may not be escaped)
2688                         if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2689                             $start = 0;
2690                             $new_string = '';
2691                             while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2692                                 // hmtl escape stuff before
2693                                 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2694                                 // check if this is a hard escape
2695                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2696                                     if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2697                                         // indeed, this is a hardescape
2698                                         $new_string .= "<span$escape_char_attributes>" .
2699                                             $this->hsc($hardescape) . '</span>';
2700                                         $start = $es_pos + strlen($hardescape);
2701                                         continue 2;
2702                                     }
2703                                 }
2704                                 // not a hard escape, but a normal escape
2705                                 // they come in pairs of two
2706                                 $c = 0;
2707                                 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2708                                     && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2709                                     && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2710                                     $c += 2;
2711                                 }
2712                                 if ($c) {
2713                                     $new_string .= "<span$escape_char_attributes>" .
2714                                         str_repeat($escaped_escape_char, $c) .
2715                                         '</span>';
2716                                     $start = $es_pos + $c;
2717                                 } else {
2718                                     // this is just a single lonely escape char...
2719                                     $new_string .= $escaped_escape_char;
2720                                     $start = $es_pos + 1;
2721                                 }
2722                             }
2723                             $string = $new_string . $this->hsc(substr($string, $start));
2724                         } else {
2725                             $string = $this->hsc($string);
2726                         }
2727
2728                         if ($check_linenumbers) {
2729                             // Are line numbers used? If, we should end the string before
2730                             // the newline and begin it again (so when <li>s are put in the source
2731                             // remains XHTML compliant)
2732                             // note to self: This opens up possibility of config files specifying
2733                             // that languages can/cannot have multiline strings???
2734                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2735                         }
2736
2737                         $result .= "<span$string_attributes>" . $string . '</span>';
2738                         $string = '';
2739                         continue;
2740                     } else {
2741                         //Have a look for regexp comments
2742                         if ($i == $next_comment_regexp_pos) {
2743                             $COMMENT_MATCHED = true;
2744                             $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2745                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
2746
2747                             //@todo If remove important do remove here
2748                             if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2749                                 if (!$this->use_classes) {
2750                                     $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2751                                 } else {
2752                                     $attributes = ' class="co' . $comment['key'] . '"';
2753                                 }
2754
2755                                 $test_str = "<span$attributes>" . $test_str . "</span>";
2756
2757                                 // Short-cut through all the multiline code
2758                                 if ($check_linenumbers) {
2759                                     // strreplace to put close span and open span around multiline newlines
2760                                     $test_str = str_replace(
2761                                         "\n", "</span>\n<span$attributes>",
2762                                         str_replace("\n ", "\n&nbsp;", $test_str)
2763                                     );
2764                                 }
2765                             }
2766
2767                             $i += $comment['length'] - 1;
2768
2769                             // parse the rest
2770                             $result .= $this->parse_non_string_part($stuff_to_parse);
2771                             $stuff_to_parse = '';
2772                         }
2773
2774                         // If we haven't matched a regexp comment, try multi-line comments
2775                         if (!$COMMENT_MATCHED) {
2776                             // Is this a multiline comment?
2777                             if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2778                                 $next_comment_multi_pos = $length;
2779                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2780                                     $match_i = false;
2781                                     if (isset($comment_multi_cache_per_key[$open]) &&
2782                                         ($comment_multi_cache_per_key[$open] >= $i ||
2783                                          $comment_multi_cache_per_key[$open] === false)) {
2784                                         // we have already matched something
2785                                         if ($comment_multi_cache_per_key[$open] === false) {
2786                                             // this comment is never matched
2787                                             continue;
2788                                         }
2789                                         $match_i = $comment_multi_cache_per_key[$open];
2790                                     } else if (($match_i = stripos($part, $open, $i)) !== false) {
2791                                         $comment_multi_cache_per_key[$open] = $match_i;
2792                                     } else {
2793                                         $comment_multi_cache_per_key[$open] = false;
2794                                         continue;
2795                                     }
2796                                     if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2797                                         $next_comment_multi_pos = $match_i;
2798                                         $next_open_comment_multi = $open;
2799                                         if ($match_i === $i) {
2800                                             break;
2801                                         }
2802                                     }
2803                                 }
2804                             }
2805                             if ($i == $next_comment_multi_pos) {
2806                                 $open = $next_open_comment_multi;
2807                                 $close = $this->language_data['COMMENT_MULTI'][$open];
2808                                 $open_strlen = strlen($open);
2809                                 $close_strlen = strlen($close);
2810                                 $COMMENT_MATCHED = true;
2811                                 $test_str_match = $open;
2812                                 //@todo If remove important do remove here
2813                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2814                                     $open == GESHI_START_IMPORTANT) {
2815                                     if ($open != GESHI_START_IMPORTANT) {
2816                                         if (!$this->use_classes) {
2817                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2818                                         } else {
2819                                             $attributes = ' class="coMULTI"';
2820                                         }
2821                                         $test_str = "<span$attributes>" . $this->hsc($open);
2822                                     } else {
2823                                         if (!$this->use_classes) {
2824                                             $attributes = ' style="' . $this->important_styles . '"';
2825                                         } else {
2826                                             $attributes = ' class="imp"';
2827                                         }
2828
2829                                         // We don't include the start of the comment if it's an
2830                                         // "important" part
2831                                         $test_str = "<span$attributes>";
2832                                     }
2833                                 } else {
2834                                     $test_str = $this->hsc($open);
2835                                 }
2836
2837                                 $close_pos = strpos( $part, $close, $i + $open_strlen );
2838
2839                                 if ($close_pos === false) {
2840                                     $close_pos = $length;
2841                                 }
2842
2843                                 // Short-cut through all the multiline code
2844                                 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2845                                 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2846                                     $test_str_match == GESHI_START_IMPORTANT) &&
2847                                     $check_linenumbers) {
2848
2849                                     // strreplace to put close span and open span around multiline newlines
2850                                     $test_str .= str_replace(
2851                                         "\n", "</span>\n<span$attributes>",
2852                                         str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2853                                     );
2854                                 } else {
2855                                     $test_str .= $rest_of_comment;
2856                                 }
2857
2858                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2859                                     $test_str_match == GESHI_START_IMPORTANT) {
2860                                     $test_str .= '</span>';
2861                                 }
2862
2863                                 $i = $close_pos + $close_strlen - 1;
2864
2865                                 // parse the rest
2866                                 $result .= $this->parse_non_string_part($stuff_to_parse);
2867                                 $stuff_to_parse = '';
2868                             }
2869                         }
2870
2871                         // If we haven't matched a multiline comment, try single-line comments
2872                         if (!$COMMENT_MATCHED) {
2873                             // cache potential single line comment occurances
2874                             if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2875                                 $next_comment_single_pos = $length;
2876                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2877                                     $match_i = false;
2878                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
2879                                         ($comment_single_cache_per_key[$comment_key] >= $i ||
2880                                          $comment_single_cache_per_key[$comment_key] === false)) {
2881                                         // we have already matched something
2882                                         if ($comment_single_cache_per_key[$comment_key] === false) {
2883                                             // this comment is never matched
2884                                             continue;
2885                                         }
2886                                         $match_i = $comment_single_cache_per_key[$comment_key];
2887                                     } else if (
2888                                         // case sensitive comments
2889                                         ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2890                                         ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2891                                         // non case sensitive
2892                                         (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2893                                           (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2894                                         $comment_single_cache_per_key[$comment_key] = $match_i;
2895                                     } else {
2896                                         $comment_single_cache_per_key[$comment_key] = false;
2897                                         continue;
2898                                     }
2899                                     if ($match_i !== false && $match_i < $next_comment_single_pos) {
2900                                         $next_comment_single_pos = $match_i;
2901                                         $next_comment_single_key = $comment_key;
2902                                         if ($match_i === $i) {
2903                                             break;
2904                                         }
2905                                     }
2906                                 }
2907                             }
2908                             if ($next_comment_single_pos == $i) {
2909                                 $comment_key = $next_comment_single_key;
2910                                 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2911                                 $com_len = strlen($comment_mark);
2912
2913                                 // This check will find special variables like $# in bash
2914                                 // or compiler directives of Delphi beginning {$
2915                                 if ((empty($sc_disallowed_before) || ($i == 0) ||
2916                                     (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2917                                     (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
2918                                     (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2919                                 {
2920                                     // this is a valid comment
2921                                     $COMMENT_MATCHED = true;
2922                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2923                                         if (!$this->use_classes) {
2924                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2925                                         } else {
2926                                             $attributes = ' class="co' . $comment_key . '"';
2927                                         }
2928                                         $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2929                                     } else {
2930                                         $test_str = $this->hsc($comment_mark);
2931                                     }
2932
2933                                     //Check if this comment is the last in the source
2934                                     $close_pos = strpos($part, "\n", $i);
2935                                     $oops = false;
2936                                     if ($close_pos === false) {
2937                                         $close_pos = $length;
2938                                         $oops = true;
2939                                     }
2940                                     $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2941                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2942                                         $test_str .= "</span>";
2943                                     }
2944
2945                                     // Take into account that the comment might be the last in the source
2946                                     if (!$oops) {
2947                                       $test_str .= "\n";
2948                                     }
2949
2950                                     $i = $close_pos;
2951
2952                                     // parse the rest
2953                                     $result .= $this->parse_non_string_part($stuff_to_parse);
2954                                     $stuff_to_parse = '';
2955                                 }
2956                             }
2957                         }
2958                     }
2959
2960                     // Where are we adding this char?
2961                     if (!$COMMENT_MATCHED) {
2962                         $stuff_to_parse .= $char;
2963                     } else {
2964                         $result .= $test_str;
2965                         unset($test_str);
2966                         $COMMENT_MATCHED = false;
2967                     }
2968                 }
2969                 // Parse the last bit
2970                 $result .= $this->parse_non_string_part($stuff_to_parse);
2971                 $stuff_to_parse = '';
2972             } else {
2973                 $result .= $this->hsc($part);
2974             }
2975             // Close the <span> that surrounds the block
2976             if ($STRICTATTRS != '') {
2977                 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2978                 $result .= '</span>';
2979             }
2980
2981             $endresult .= $result;
2982             unset($part, $parts[$key], $result);
2983         }
2984
2985         //This fix is related to SF#1923020, but has to be applied regardless of
2986         //actually highlighting symbols.
2987         /** NOTE: memorypeak #3 */
2988         $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2989
2990 //        // Parse the last stuff (redundant?)
2991 //        $result .= $this->parse_non_string_part($stuff_to_parse);
2992
2993         // Lop off the very first and last spaces
2994 //        $result = substr($result, 1, -1);
2995
2996         // We're finished: stop timing
2997         $this->set_time($start_time, microtime());
2998
2999         $this->finalise($endresult);
3000         return $endresult;
3001     }
3002
3003     /**
3004      * Swaps out spaces and tabs for HTML indentation. Not needed if
3005      * the code is in a pre block...
3006      *
3007      * @param  string The source to indent (reference!)
3008      * @since  1.0.0
3009      * @access private
3010      */
3011     function indent(&$result) {
3012         /// Replace tabs with the correct number of spaces
3013         if (false !== strpos($result, "\t")) {
3014             $lines = explode("\n", $result);
3015             $result = null;//Save memory while we process the lines individually
3016             $tab_width = $this->get_real_tab_width();
3017             $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3018
3019             for ($key = 0, $n = count($lines); $key < $n; $key++) {
3020                 $line = $lines[$key];
3021                 if (false === strpos($line, "\t")) {
3022                     continue;
3023                 }
3024
3025                 $pos = 0;
3026                 $length = strlen($line);
3027                 $lines[$key] = ''; // reduce memory
3028
3029                 $IN_TAG = false;
3030                 for ($i = 0; $i < $length; ++$i) {
3031                     $char = $line[$i];
3032                     // Simple engine to work out whether we're in a tag.
3033                     // If we are we modify $pos. This is so we ignore HTML
3034                     // in the line and only workout the tab replacement
3035                     // via the actual content of the string
3036                     // This test could be improved to include strings in the
3037                     // html so that < or > would be allowed in user's styles
3038                     // (e.g. quotes: '<' '>'; or similar)
3039                     if ($IN_TAG) {
3040                         if ('>' == $char) {
3041                             $IN_TAG = false;
3042                         }
3043                         $lines[$key] .= $char;
3044                     } else if ('<' == $char) {
3045                         $IN_TAG = true;
3046                         $lines[$key] .= '<';
3047                     } else if ('&' == $char) {
3048                         $substr = substr($line, $i + 3, 5);
3049                         $posi = strpos($substr, ';');
3050                         if (false === $posi) {
3051                             ++$pos;
3052                         } else {
3053                             $pos -= $posi+2;
3054                         }
3055                         $lines[$key] .= $char;
3056                     } else if ("\t" == $char) {
3057                         $str = '';
3058                         // OPTIMISE - move $strs out. Make an array:
3059                         // $tabs = array(
3060                         //  1 => '&nbsp;',
3061                         //  2 => '&nbsp; ',
3062                         //  3 => '&nbsp; &nbsp;' etc etc
3063                         // to use instead of building a string every time
3064                         $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3065                         if (($pos & 1) || 1 == $tab_end_width) {
3066                             $str .= substr($tab_string, 6, $tab_end_width);
3067                         } else {
3068                             $str .= substr($tab_string, 0, $tab_end_width+5);
3069                         }
3070                         $lines[$key] .= $str;
3071                         $pos += $tab_end_width;
3072
3073                         if (false === strpos($line, "\t", $i + 1)) {
3074                             $lines[$key] .= substr($line, $i + 1);
3075                             break;
3076                         }
3077                     } else if (0 == $pos && ' ' == $char) {
3078                         $lines[$key] .= '&nbsp;';
3079                         ++$pos;
3080                     } else {
3081                         $lines[$key] .= $char;
3082                         ++$pos;
3083                     }
3084                 }
3085             }
3086             $result = implode("\n", $lines);
3087             unset($lines);//We don't need the lines separated beyond this --- free them!
3088         }
3089         // Other whitespace
3090         // BenBE: Fix to reduce the number of replacements to be done
3091         $result = preg_replace('/^ /m', '&nbsp;', $result);
3092         $result = str_replace('  ', ' &nbsp;', $result);
3093
3094         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3095             if ($this->line_ending === null) {
3096                 $result = nl2br($result);
3097             } else {
3098                 $result = str_replace("\n", $this->line_ending, $result);
3099             }
3100         }
3101     }
3102
3103     /**
3104      * Changes the case of a keyword for those languages where a change is asked for
3105      *
3106      * @param  string The keyword to change the case of
3107      * @return string The keyword with its case changed
3108      * @since  1.0.0
3109      * @access private
3110      */
3111     function change_case($instr) {
3112         switch ($this->language_data['CASE_KEYWORDS']) {
3113             case GESHI_CAPS_UPPER:
3114                 return strtoupper($instr);
3115             case GESHI_CAPS_LOWER:
3116                 return strtolower($instr);
3117             default:
3118                 return $instr;
3119         }
3120     }
3121
3122     /**
3123      * Handles replacements of keywords to include markup and links if requested
3124      *
3125      * @param  string The keyword to add the Markup to
3126      * @return The HTML for the match found
3127      * @since  1.0.8
3128      * @access private
3129      *
3130      * @todo   Get rid of ender in keyword links
3131      */
3132     function handle_keyword_replace($match) {
3133         $k = $this->_kw_replace_group;
3134         $keyword = $match[0];
3135
3136         $before = '';
3137         $after = '';
3138
3139         if ($this->keyword_links) {
3140             // Keyword links have been ebabled
3141
3142             if (isset($this->language_data['URLS'][$k]) &&
3143                 $this->language_data['URLS'][$k] != '') {
3144                 // There is a base group for this keyword
3145
3146                 // Old system: strtolower
3147                 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3148                 // New system: get keyword from language file to get correct case
3149                 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3150                     strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3151                     foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3152                         if (strcasecmp($word, $keyword) == 0) {
3153                             break;
3154                         }
3155                     }
3156                 } else {
3157                     $word = $keyword;
3158                 }
3159
3160                 $before = '<|UR1|"' .
3161                     str_replace(
3162                         array(
3163                             '{FNAME}',
3164                             '{FNAMEL}',
3165                             '{FNAMEU}',
3166                             '.'),
3167                         array(
3168                             str_replace('+', '%20', urlencode($this->hsc($word))),
3169                             str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3170                             str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3171                             '<DOT>'),
3172                         $this->language_data['URLS'][$k]
3173                     ) . '">';
3174                 $after = '</a>';
3175             }
3176         }
3177
3178         return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3179     }
3180
3181     /**
3182      * handles regular expressions highlighting-definitions with callback functions
3183      *
3184      * @note this is a callback, don't use it directly
3185      *
3186      * @param array the matches array
3187      * @return The highlighted string
3188      * @since 1.0.8
3189      * @access private
3190      */
3191     function handle_regexps_callback($matches) {
3192         // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3193         return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3194     }
3195
3196     /**
3197      * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3198      *
3199      * @note this is a callback, don't use it directly
3200      *
3201      * @param array the matches array
3202      * @return string
3203      * @since 1.0.8
3204      * @access private
3205      */
3206     function handle_multiline_regexps($matches) {
3207         $before = $this->_hmr_before;
3208         $after = $this->_hmr_after;
3209         if ($this->_hmr_replace) {
3210             $replace = $this->_hmr_replace;
3211             $search = array();
3212
3213             foreach (array_keys($matches) as $k) {
3214                 $search[] = '\\' . $k;
3215             }
3216
3217             $before = str_replace($search, $matches, $before);
3218             $after = str_replace($search, $matches, $after);
3219             $replace = str_replace($search, $matches, $replace);
3220         } else {
3221             $replace = $matches[0];
3222         }
3223         return $before
3224                     . '<|!REG3XP' . $this->_hmr_key .'!>'
3225                         . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3226                     . '|>'
3227               . $after;
3228     }
3229
3230     /**
3231      * Takes a string that has no strings or comments in it, and highlights
3232      * stuff like keywords, numbers and methods.
3233      *
3234      * @param string The string to parse for keyword, numbers etc.
3235      * @since 1.0.0
3236      * @access private
3237      * @todo BUGGY! Why? Why not build string and return?
3238      */
3239     function parse_non_string_part($stuff_to_parse) {
3240         $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3241
3242         // Highlight keywords
3243         $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3244         $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3245         if ($this->lexic_permissions['STRINGS']) {
3246             $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3247             $disallowed_before .= $quotemarks;
3248             $disallowed_after .= $quotemarks;
3249         }
3250         $disallowed_before .= "])";
3251         $disallowed_after .= "])";
3252
3253         $parser_control_pergroup = false;
3254         if (isset($this->language_data['PARSER_CONTROL'])) {
3255             if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3256                 $x = 0; // check wether per-keyword-group parser_control is enabled
3257                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3258                     $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3259                     ++$x;
3260                 }
3261                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3262                     $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3263                     ++$x;
3264                 }
3265                 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3266             }
3267         }
3268
3269         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3270             if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3271             $this->lexic_permissions['KEYWORDS'][$k]) {
3272
3273                 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3274                 $modifiers = $case_sensitive ? '' : 'i';
3275
3276                 // NEW in 1.0.8 - per-keyword-group parser control
3277                 $disallowed_before_local = $disallowed_before;
3278                 $disallowed_after_local = $disallowed_after;
3279                 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3280                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3281                         $disallowed_before_local =
3282                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3283                     }
3284
3285                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3286                         $disallowed_after_local =
3287                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3288                     }
3289                 }
3290
3291                 $this->_kw_replace_group = $k;
3292
3293                 //NEW in 1.0.8, the cached regexp list
3294                 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3295                 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3296                     $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3297                     // Might make a more unique string for putting the number in soon
3298                     // Basically, we don't put the styles in yet because then the styles themselves will
3299                     // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3300                     $stuff_to_parse = preg_replace_callback(
3301                         "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3302                         array($this, 'handle_keyword_replace'),
3303                         $stuff_to_parse
3304                         );
3305                 }
3306             }
3307         }
3308
3309         // Regular expressions
3310         foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3311             if ($this->lexic_permissions['REGEXPS'][$key]) {
3312                 if (is_array($regexp)) {
3313                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3314                         // produce valid HTML when we match multiple lines
3315                         $this->_hmr_replace = $regexp[GESHI_REPLACE];
3316                         $this->_hmr_before = $regexp[GESHI_BEFORE];
3317                         $this->_hmr_key = $key;
3318                         $this->_hmr_after = $regexp[GESHI_AFTER];
3319                         $stuff_to_parse = preg_replace_callback(
3320                             "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3321                             array($this, 'handle_multiline_regexps'),
3322                             $stuff_to_parse);
3323                         $this->_hmr_replace = false;
3324                         $this->_hmr_before = '';
3325                         $this->_hmr_after = '';
3326                     } else {
3327                         $stuff_to_parse = preg_replace(
3328                             '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3329                             $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3330                             $stuff_to_parse);
3331                     }
3332                 } else {
3333                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3334                         // produce valid HTML when we match multiple lines
3335                         $this->_hmr_key = $key;
3336                         $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3337                                               array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3338                         $this->_hmr_key = '';
3339                     } else {
3340                         $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3341                     }
3342                 }
3343             }
3344         }
3345
3346         // Highlight numbers. As of 1.0.8 we support different types of numbers
3347         $numbers_found = false;
3348         if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3349             $numbers_found = true;
3350
3351             //For each of the formats ...
3352             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3353                 //Check if it should be highlighted ...
3354                 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3355             }
3356         }
3357
3358         //
3359         // Now that's all done, replace /[number]/ with the correct styles
3360         //
3361         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3362             if (!$this->use_classes) {
3363                 $attributes = ' style="' .
3364                     (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3365                     $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3366             } else {
3367                 $attributes = ' class="kw' . $k . '"';
3368             }
3369             $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3370         }
3371
3372         if ($numbers_found) {
3373             // Put number styles in
3374             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3375                 //Commented out for now, as this needs some review ...
3376                 //                if ($numbers_permissions & $id) {
3377                 //Get the appropriate style ...
3378                 //Checking for unset styles is done by the style cache builder ...
3379                 if (!$this->use_classes) {
3380                     $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3381                 } else {
3382                     $attributes = ' class="nu'.$id.'"';
3383                 }
3384
3385                 //Set in the correct styles ...
3386                 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3387                 //                }
3388             }
3389         }
3390
3391         // Highlight methods and fields in objects
3392         if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3393             $oolang_spaces = "[\s]*";
3394             $oolang_before = "";
3395             $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3396             if (isset($this->language_data['PARSER_CONTROL'])) {
3397                 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3398                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3399                         $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3400                     }
3401                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3402                         $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3403                     }
3404                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3405                         $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3406                     }
3407                 }
3408             }
3409
3410             foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3411                 if (false !== strpos($stuff_to_parse, $splitter)) {
3412                     if (!$this->use_classes) {
3413                         $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3414                     } else {
3415                         $attributes = ' class="me' . $key . '"';
3416                     }
3417                     $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3418                 }
3419             }
3420         }
3421
3422         //
3423         // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3424         // You try it, and see what happens ;)
3425         // TODO: Fix lexic permissions not converting entities if shouldn't
3426         // be highlighting regardless
3427         //
3428         if ($this->lexic_permissions['BRACKETS']) {
3429             $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3430                               $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3431         }
3432
3433
3434         //FIX for symbol highlighting ...
3435         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3436             //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3437             $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3438             $global_offset = 0;
3439             for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3440                 $symbol_match = $pot_symbols[$s_id][0][0];
3441                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3442                     // already highlighted blocks _must_ include either < or >
3443                     // so if this conditional applies, we have to skip this match
3444                     // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3445                     if(strpos($symbol_match, '<SEMI>') === false &&
3446                         strpos($symbol_match, '<PIPE>') === false) {
3447                         continue;
3448                     }
3449                 }
3450
3451                 // if we reach this point, we have a valid match which needs to be highlighted
3452
3453                 $symbol_length = strlen($symbol_match);
3454                 $symbol_offset = $pot_symbols[$s_id][0][1];
3455                 unset($pot_symbols[$s_id]);
3456                 $symbol_end = $symbol_length + $symbol_offset;
3457                 $symbol_hl = "";
3458
3459                 // if we have multiple styles, we have to handle them properly
3460                 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3461                     $old_sym = -1;
3462                     // Split the current stuff to replace into its atomic symbols ...
3463                     preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3464                     foreach ($sym_match_syms[0] as $sym_ms) {
3465                         //Check if consequtive symbols belong to the same group to save output ...
3466                         if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3467                             && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3468                             if (-1 != $old_sym) {
3469                                 $symbol_hl .= "|>";
3470                             }
3471                             $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3472                             if (!$this->use_classes) {
3473                                 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3474                             } else {
3475                                 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3476                             }
3477                         }
3478                         $symbol_hl .= $sym_ms;
3479                     }
3480                     unset($sym_match_syms);
3481
3482                     //Close remaining tags and insert the replacement at the right position ...
3483                     //Take caution if symbol_hl is empty to avoid doubled closing spans.
3484                     if (-1 != $old_sym) {
3485                         $symbol_hl .= "|>";
3486                     }
3487                 } else {
3488                     if (!$this->use_classes) {
3489                         $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3490                     } else {
3491                         $symbol_hl = '<| class="sy0">';
3492                     }
3493                     $symbol_hl .= $symbol_match . '|>';
3494                 }
3495
3496                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3497
3498                 // since we replace old text with something of different size,
3499                 // we'll have to keep track of the differences
3500                 $global_offset += strlen($symbol_hl) - $symbol_length;
3501             }
3502         }
3503         //FIX for symbol highlighting ...
3504
3505         // Add class/style for regexps
3506         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3507             if ($this->lexic_permissions['REGEXPS'][$key]) {
3508                 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3509                     $this->_rx_key = $key;
3510                     $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3511                         array($this, 'handle_regexps_callback'),
3512                         $stuff_to_parse);
3513                 } else {
3514                     if (!$this->use_classes) {
3515                         $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3516                     } else {
3517                         if (is_array($this->language_data['REGEXPS'][$key]) &&
3518                             array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3519                             $attributes = ' class="' .
3520                                 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3521                         } else {
3522                            $attributes = ' class="re' . $key . '"';
3523                         }
3524                     }
3525                     $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3526                 }
3527             }
3528         }
3529
3530         // Replace <DOT> with . for urls
3531         $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3532         // Replace <|UR1| with <a href= for urls also
3533         if (isset($this->link_styles[GESHI_LINK])) {
3534             if ($this->use_classes) {
3535                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3536             } else {
3537                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3538             }
3539         } else {
3540             $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3541         }
3542
3543         //
3544         // NOW we add the span thingy ;)
3545         //
3546
3547         $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3548         $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3549         return substr($stuff_to_parse, 1);
3550     }
3551
3552     /**
3553      * Sets the time taken to parse the code
3554      *
3555      * @param microtime The time when parsing started
3556      * @param microtime The time when parsing ended
3557      * @since 1.0.2
3558      * @access private
3559      */
3560     function set_time($start_time, $end_time) {
3561         $start = explode(' ', $start_time);
3562         $end = explode(' ', $end_time);
3563         $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3564     }
3565
3566     /**
3567      * Gets the time taken to parse the code
3568      *
3569      * @return double The time taken to parse the code
3570      * @since  1.0.2
3571      */
3572     function get_time() {
3573         return $this->time;
3574     }
3575
3576     /**
3577      * Merges arrays recursively, overwriting values of the first array with values of later arrays
3578      *
3579      * @since 1.0.8
3580      * @access private
3581      */
3582     function merge_arrays() {
3583         $arrays = func_get_args();
3584         $narrays = count($arrays);
3585
3586         // check arguments
3587         // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3588         for ($i = 0; $i < $narrays; $i ++) {
3589             if (!is_array($arrays[$i])) {
3590                 // also array_merge_recursive returns nothing in this case
3591                 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3592                 return false;
3593             }
3594         }
3595
3596         // the first array is in the output set in every case
3597         $ret = $arrays[0];
3598
3599         // merege $ret with the remaining arrays
3600         for ($i = 1; $i < $narrays; $i ++) {
3601             foreach ($arrays[$i] as $key => $value) {
3602                 if (is_array($value) && isset($ret[$key])) {
3603                     // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3604                     // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3605                     $ret[$key] = $this->merge_arrays($ret[$key], $value);
3606                 } else {
3607                     $ret[$key] = $value;
3608                 }
3609             }
3610         }
3611
3612         return $ret;
3613     }
3614
3615     /**
3616      * Gets language information and stores it for later use
3617      *
3618      * @param string The filename of the language file you want to load
3619      * @since 1.0.0
3620      * @access private
3621      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3622      */
3623     function load_language($file_name) {
3624         if ($file_name == $this->loaded_language) {
3625             // this file is already loaded!
3626             return;
3627         }
3628
3629         //Prepare some stuff before actually loading the language file
3630         $this->loaded_language = $file_name;
3631         $this->parse_cache_built = false;
3632         $this->enable_highlighting();
3633         $language_data = array();
3634
3635         //Load the language file
3636         require $file_name;
3637
3638         // Perhaps some checking might be added here later to check that
3639         // $language data is a valid thing but maybe not
3640         $this->language_data = $language_data;
3641
3642         // Set strict mode if should be set
3643         $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3644
3645         // Set permissions for all lexics to true
3646         // so they'll be highlighted by default
3647         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3648             if (!empty($this->language_data['KEYWORDS'][$key])) {
3649                 $this->lexic_permissions['KEYWORDS'][$key] = true;
3650             } else {
3651                 $this->lexic_permissions['KEYWORDS'][$key] = false;
3652             }
3653         }
3654
3655         foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3656             $this->lexic_permissions['COMMENTS'][$key] = true;
3657         }
3658         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3659             $this->lexic_permissions['REGEXPS'][$key] = true;
3660         }
3661
3662         // for BenBE and future code reviews:
3663         // we can use empty here since we only check for existance and emptiness of an array
3664         // if it is not an array at all but rather false or null this will work as intended as well
3665         // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3666         if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3667             foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3668                 // it's either true or false and maybe is true as well
3669                 $perm = $value !== GESHI_NEVER;
3670                 if ($flag == 'ALL') {
3671                     $this->enable_highlighting($perm);
3672                     continue;
3673                 }
3674                 if (!isset($this->lexic_permissions[$flag])) {
3675                     // unknown lexic permission
3676                     continue;
3677                 }
3678                 if (is_array($this->lexic_permissions[$flag])) {
3679                     foreach ($this->lexic_permissions[$flag] as $key => $val) {
3680                         $this->lexic_permissions[$flag][$key] = $perm;
3681                     }
3682                 } else {
3683                     $this->lexic_permissions[$flag] = $perm;
3684                 }
3685             }
3686             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3687         }
3688
3689         //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3690         //You need to set one for HARDESCAPES only in this case.
3691         if(!isset($this->language_data['HARDCHAR'])) {
3692             $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3693         }
3694
3695         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3696         $style_filename = substr($file_name, 0, -4) . '.style.php';
3697         if (is_readable($style_filename)) {
3698             //Clear any style_data that could have been set before ...
3699             if (isset($style_data)) {
3700                 unset($style_data);
3701             }
3702
3703             //Read the Style Information from the style file
3704             include $style_filename;
3705
3706             //Apply the new styles to our current language styles
3707             if (isset($style_data) && is_array($style_data)) {
3708                 $this->language_data['STYLES'] =
3709                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
3710             }
3711         }
3712     }
3713
3714     /**
3715      * Takes the parsed code and various options, and creates the HTML
3716      * surrounding it to make it look nice.
3717      *
3718      * @param  string The code already parsed (reference!)
3719      * @since  1.0.0
3720      * @access private
3721      */
3722     function finalise(&$parsed_code) {
3723         // Remove end parts of important declarations
3724         // This is BUGGY!! My fault for bad code: fix coming in 1.2
3725         // @todo Remove this crap
3726         if ($this->enable_important_blocks &&
3727             (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3728             $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3729         }
3730
3731         // Add HTML whitespace stuff if we're using the <div> header
3732         if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3733             $this->indent($parsed_code);
3734         }
3735
3736         // purge some unnecessary stuff
3737         /** NOTE: memorypeak #1 */
3738         $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3739
3740         // If we are using IDs for line numbers, there needs to be an overall
3741         // ID set to prevent collisions.
3742         if ($this->add_ids && !$this->overall_id) {
3743             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3744         }
3745
3746         // Get code into lines
3747         /** NOTE: memorypeak #2 */
3748         $code = explode("\n", $parsed_code);
3749         $parsed_code = $this->header();
3750
3751         // If we're using line numbers, we insert <li>s and appropriate
3752         // markup to style them (otherwise we don't need to do anything)
3753         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3754             // If we're using the <pre> header, we shouldn't add newlines because
3755             // the <pre> will line-break them (and the <li>s already do this for us)
3756             $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3757
3758             // Set vars to defaults for following loop
3759             $i = 0;
3760
3761             // Foreach line...
3762             for ($i = 0, $n = count($code); $i < $n;) {
3763                 //Reset the attributes for a new line ...
3764                 $attrs = array();
3765
3766                 // Make lines have at least one space in them if they're empty
3767                 // BenBE: Checking emptiness using trim instead of relying on blanks
3768                 if ('' == trim($code[$i])) {
3769                     $code[$i] = '&nbsp;';
3770                 }
3771
3772                 // If this is a "special line"...
3773                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3774                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3775                     // Set the attributes to style the line
3776                     if ($this->use_classes) {
3777                         //$attr = ' class="li2"';
3778                         $attrs['class'][] = 'li2';
3779                         $def_attr = ' class="de2"';
3780                     } else {
3781                         //$attr = ' style="' . $this->line_style2 . '"';
3782                         $attrs['style'][] = $this->line_style2;
3783                         // This style "covers up" the special styles set for special lines
3784                         // so that styles applied to special lines don't apply to the actual
3785                         // code on that line
3786                         $def_attr = ' style="' . $this->code_style . '"';
3787                     }
3788                 } else {
3789                     if ($this->use_classes) {
3790                         //$attr = ' class="li1"';
3791                         $attrs['class'][] = 'li1';
3792                         $def_attr = ' class="de1"';
3793                     } else {
3794                         //$attr = ' style="' . $this->line_style1 . '"';
3795                         $attrs['style'][] = $this->line_style1;
3796                         $def_attr = ' style="' . $this->code_style . '"';
3797                     }
3798                 }
3799
3800                 //Check which type of tag to insert for this line
3801                 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3802                     $start = "<pre$def_attr>";
3803                     $end = '</pre>';
3804                 } else {
3805                     // Span or div?
3806                     $start = "<div$def_attr>";
3807                     $end = '</div>';
3808                 }
3809
3810                 ++$i;
3811
3812                 // Are we supposed to use ids? If so, add them
3813                 if ($this->add_ids) {
3814                     $attrs['id'][] = "$this->overall_id-$i";
3815                 }
3816
3817                 //Is this some line with extra styles???
3818                 if (in_array($i, $this->highlight_extra_lines)) {
3819                     if ($this->use_classes) {
3820                         if (isset($this->highlight_extra_lines_styles[$i])) {
3821                             $attrs['class'][] = "lx$i";
3822                         } else {
3823                             $attrs['class'][] = "ln-xtra";
3824                         }
3825                     } else {
3826                         array_push($attrs['style'], $this->get_line_style($i));
3827                     }
3828                 }
3829
3830                 // Add in the line surrounded by appropriate list HTML
3831                 $attr_string = '';
3832                 foreach ($attrs as $key => $attr) {
3833                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3834                 }
3835
3836                 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3837                 unset($code[$i - 1]);
3838             }
3839         } else {
3840             $n = count($code);
3841             if ($this->use_classes) {
3842                 $attributes = ' class="de1"';
3843             } else {
3844                 $attributes = ' style="'. $this->code_style .'"';
3845             }
3846             if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3847                 $parsed_code .= '<pre'. $attributes .'>';
3848             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3849                 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3850                     if ($this->use_classes) {
3851                         $attrs = ' class="ln"';
3852                     } else {
3853                         $attrs = ' style="'. $this->table_linenumber_style .'"';
3854                     }
3855                     $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3856                     // get linenumbers
3857                     // we don't merge it with the for below, since it should be better for
3858                     // memory consumption this way
3859                     // @todo: but... actually it would still be somewhat nice to merge the two loops
3860                     //        the mem peaks are at different positions
3861                     for ($i = 0; $i < $n; ++$i) {
3862                         $close = 0;
3863                         // fancy lines
3864                         if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3865                             $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3866                             // Set the attributes to style the line
3867                             if ($this->use_classes) {
3868                                 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3869                             } else {
3870                                 // This style "covers up" the special styles set for special lines
3871                                 // so that styles applied to special lines don't apply to the actual
3872                                 // code on that line
3873                                 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3874                                                   .'<span style="' . $this->code_style .'">';
3875                             }
3876                             $close += 2;
3877                         }
3878                         //Is this some line with extra styles???
3879                         if (in_array($i + 1, $this->highlight_extra_lines)) {
3880                             if ($this->use_classes) {
3881                                 if (isset($this->highlight_extra_lines_styles[$i])) {
3882                                     $parsed_code .= "<span class=\"xtra lx$i\">";
3883                                 } else {
3884                                     $parsed_code .= "<span class=\"xtra ln-xtra\">";
3885                                 }
3886                             } else {
3887                                 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3888                             }
3889                             ++$close;
3890                         }
3891                         $parsed_code .= $this->line_numbers_start + $i;
3892                         if ($close) {
3893                             $parsed_code .= str_repeat('</span>', $close);
3894                         } else if ($i != $n) {
3895                             $parsed_code .= "\n";
3896                         }
3897                     }
3898                     $parsed_code .= '</pre></td><td'.$attributes.'>';
3899                 }
3900                 $parsed_code .= '<pre'. $attributes .'>';
3901             }
3902             // No line numbers, but still need to handle highlighting lines extra.
3903             // Have to use divs so the full width of the code is highlighted
3904             $close = 0;
3905             for ($i = 0; $i < $n; ++$i) {
3906                 // Make lines have at least one space in them if they're empty
3907                 // BenBE: Checking emptiness using trim instead of relying on blanks
3908                 if ('' == trim($code[$i])) {
3909                     $code[$i] = '&nbsp;';
3910                 }
3911                 // fancy lines
3912                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3913                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3914                     // Set the attributes to style the line
3915                     if ($this->use_classes) {
3916                         $parsed_code .= '<span class="xtra li2"><span class="de2">';
3917                     } else {
3918                         // This style "covers up" the special styles set for special lines
3919                         // so that styles applied to special lines don't apply to the actual
3920                         // code on that line
3921                         $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3922                                           .'<span style="' . $this->code_style .'">';
3923                     }
3924                     $close += 2;
3925                 }
3926                 //Is this some line with extra styles???
3927                 if (in_array($i + 1, $this->highlight_extra_lines)) {
3928                     if ($this->use_classes) {
3929                         if (isset($this->highlight_extra_lines_styles[$i])) {
3930                             $parsed_code .= "<span class=\"xtra lx$i\">";
3931                         } else {
3932                             $parsed_code .= "<span class=\"xtra ln-xtra\">";
3933                         }
3934                     } else {
3935                         $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3936                     }
3937                     ++$close;
3938                 }
3939
3940                 $parsed_code .= $code[$i];
3941
3942                 if ($close) {
3943                   $parsed_code .= str_repeat('</span>', $close);
3944                   $close = 0;
3945                 }
3946                 elseif ($i + 1 < $n) {
3947                     $parsed_code .= "\n";
3948                 }
3949                 unset($code[$i]);
3950             }
3951
3952             if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
3953                 $parsed_code .= '</pre>';
3954             }
3955             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3956                 $parsed_code .= '</td>';
3957             }
3958         }
3959
3960         $parsed_code .= $this->footer();
3961     }
3962
3963     /**
3964      * Creates the header for the code block (with correct attributes)
3965      *
3966      * @return string The header for the code block
3967      * @since  1.0.0
3968      * @access private
3969      */
3970     function header() {
3971         // Get attributes needed
3972         /**
3973          * @todo   Document behaviour change - class is outputted regardless of whether
3974          *         we're using classes or not. Same with style
3975          */
3976         $attributes = ' class="' . $this->_genCSSName($this->language);
3977         if ($this->overall_class != '') {
3978             $attributes .= " ".$this->_genCSSName($this->overall_class);
3979         }
3980         $attributes .= '"';
3981
3982         if ($this->overall_id != '') {
3983             $attributes .= " id=\"{$this->overall_id}\"";
3984         }
3985         if ($this->overall_style != '' && !$this->use_classes) {
3986             $attributes .= ' style="' . $this->overall_style . '"';
3987         }
3988
3989         $ol_attributes = '';
3990
3991         if ($this->line_numbers_start != 1) {
3992             $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3993         }
3994
3995         // Get the header HTML
3996         $header = $this->header_content;
3997         if ($header) {
3998             if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
3999                 $header = str_replace("\n", '', $header);
4000             }
4001             $header = $this->replace_keywords($header);
4002
4003             if ($this->use_classes) {
4004                 $attr = ' class="head"';
4005             } else {
4006                 $attr = " style=\"{$this->header_content_style}\"";
4007             }
4008             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4009                 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4010             } else {
4011                 $header = "<div$attr>$header</div>";
4012             }
4013         }
4014
4015         if (GESHI_HEADER_NONE == $this->header_type) {
4016             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4017                 return "$header<ol$attributes$ol_attributes>";
4018             }
4019             return $header . ($this->force_code_block ? '<div>' : '');
4020         }
4021
4022         // Work out what to return and do it
4023         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4024             if ($this->header_type == GESHI_HEADER_PRE) {
4025                 return "<pre$attributes>$header<ol$ol_attributes>";
4026             } else if ($this->header_type == GESHI_HEADER_DIV ||
4027                 $this->header_type == GESHI_HEADER_PRE_VALID) {
4028                 return "<div$attributes>$header<ol$ol_attributes>";
4029             } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4030                 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4031             }
4032         } else {
4033             if ($this->header_type == GESHI_HEADER_PRE) {
4034                 return "<pre$attributes>$header"  .
4035                     ($this->force_code_block ? '<div>' : '');
4036             } else {
4037                 return "<div$attributes>$header" .
4038                     ($this->force_code_block ? '<div>' : '');
4039             }
4040         }
4041     }
4042
4043     /**
4044      * Returns the footer for the code block.
4045      *
4046      * @return string The footer for the code block
4047      * @since  1.0.0
4048      * @access private
4049      */
4050     function footer() {
4051         $footer = $this->footer_content;
4052         if ($footer) {
4053             if ($this->header_type == GESHI_HEADER_PRE) {
4054                 $footer = str_replace("\n", '', $footer);;
4055             }
4056             $footer = $this->replace_keywords($footer);
4057
4058             if ($this->use_classes) {
4059                 $attr = ' class="foot"';
4060             } else {
4061                 $attr = " style=\"{$this->footer_content_style}\"";
4062             }
4063             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4064                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4065             } else {
4066                 $footer = "<div$attr>$footer</div>";
4067             }
4068         }
4069
4070         if (GESHI_HEADER_NONE == $this->header_type) {
4071             return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4072         }
4073
4074         if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4075             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4076                 return "</ol>$footer</div>";
4077             }
4078             return ($this->force_code_block ? '</div>' : '') .
4079                 "$footer</div>";
4080         }
4081         elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4082             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4083                 return "</tr></tbody>$footer</table>";
4084             }
4085             return ($this->force_code_block ? '</div>' : '') .
4086                 "$footer</div>";
4087         }
4088         else {
4089             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4090                 return "</ol>$footer</pre>";
4091             }
4092             return ($this->force_code_block ? '</div>' : '') .
4093                 "$footer</pre>";
4094         }
4095     }
4096
4097     /**
4098      * Replaces certain keywords in the header and footer with
4099      * certain configuration values
4100      *
4101      * @param  string The header or footer content to do replacement on
4102      * @return string The header or footer with replaced keywords
4103      * @since  1.0.2
4104      * @access private
4105      */
4106     function replace_keywords($instr) {
4107         $keywords = $replacements = array();
4108
4109         $keywords[] = '<TIME>';
4110         $keywords[] = '{TIME}';
4111         $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4112
4113         $keywords[] = '<LANGUAGE>';
4114         $keywords[] = '{LANGUAGE}';
4115         $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4116
4117         $keywords[] = '<VERSION>';
4118         $keywords[] = '{VERSION}';
4119         $replacements[] = $replacements[] = GESHI_VERSION;
4120
4121         $keywords[] = '<SPEED>';
4122         $keywords[] = '{SPEED}';
4123         if ($time <= 0) {
4124             $speed = 'N/A';
4125         } else {
4126             $speed = strlen($this->source) / $time;
4127             if ($speed >= 1024) {
4128                 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4129             } else {
4130                 $speed = sprintf("%.0f B/s", $speed);
4131             }
4132         }
4133         $replacements[] = $replacements[] = $speed;
4134
4135         return str_replace($keywords, $replacements, $instr);
4136     }
4137
4138     /**
4139      * Secure replacement for PHP built-in function htmlspecialchars().
4140      *
4141      * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4142      * for this replacement function.
4143      *
4144      * The INTERFACE for this function is almost the same as that for
4145      * htmlspecialchars(), with the same default for quote style; however, there
4146      * is no 'charset' parameter. The reason for this is as follows:
4147      *
4148      * The PHP docs say:
4149      *      "The third argument charset defines character set used in conversion."
4150      *
4151      * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4152      * thus _needs_ to know (or asssume) a character set because the special
4153      * characters to be replaced could exist at different code points in
4154      * different character sets. (If indeed htmlspecialchars() works at
4155      * byte-value level that goes some  way towards explaining why the
4156      * vulnerability would exist in this function, too, and not only in
4157      * htmlentities() which certainly is working at byte-value level.)
4158      *
4159      * This replacement function however works at character level and should
4160      * therefore be "immune" to character set differences - so no charset
4161      * parameter is needed or provided. If a third parameter is passed, it will
4162      * be silently ignored.
4163      *
4164      * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4165      * of PHP's '&#039;' for a single quote: this provides compatibility with
4166      *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4167      * (see comment by mikiwoz at yahoo dot co dot uk on
4168      * http://php.net/htmlspecialchars); it also matches the entity definition
4169      * for XML 1.0
4170      * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4171      * Like PHP we use a numeric character reference instead of '&apos;' for the
4172      * single quote. For the other special characters we use the named entity
4173      * references, as PHP is doing.
4174      *
4175      * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4176      *
4177      * @license     http://www.gnu.org/copyleft/lgpl.html
4178      *              GNU Lesser General Public License
4179      * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4180      *              Wikka Development Team}
4181      *
4182      * @access      private
4183      * @param       string  $string string to be converted
4184      * @param       integer $quote_style
4185      *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4186      *                      - ENT_NOQUOTES: escapes only &, < and >
4187      *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4188      * @return      string  converted string
4189      * @since       1.0.7.18
4190      */
4191     function hsc($string, $quote_style = ENT_COMPAT) {
4192         // init
4193         static $aTransSpecchar = array(
4194             '&' => '&amp;',
4195             '"' => '&quot;',
4196             '<' => '&lt;',
4197             '>' => '&gt;',
4198
4199             //This fix is related to SF#1923020, but has to be applied
4200             //regardless of actually highlighting symbols.
4201
4202             //Circumvent a bug with symbol highlighting
4203             //This is required as ; would produce undesirable side-effects if it
4204             //was not to be processed as an entity.
4205             ';' => '<SEMI>', // Force ; to be processed as entity
4206             '|' => '<PIPE>' // Force | to be processed as entity
4207             );                      // ENT_COMPAT set
4208
4209         switch ($quote_style) {
4210             case ENT_NOQUOTES: // don't convert double quotes
4211                 unset($aTransSpecchar['"']);
4212                 break;
4213             case ENT_QUOTES: // convert single quotes as well
4214                 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4215                 break;
4216         }
4217
4218         // return translated string
4219         return strtr($string, $aTransSpecchar);
4220     }
4221
4222     function _genCSSName($name){
4223         return (is_numeric($name[0]) ? '_' : '') . $name;
4224     }
4225
4226     /**
4227      * Returns a stylesheet for the highlighted code. If $economy mode
4228      * is true, we only return the stylesheet declarations that matter for
4229      * this code block instead of the whole thing
4230      *
4231      * @param  boolean Whether to use economy mode or not
4232      * @return string A stylesheet built on the data for the current language
4233      * @since  1.0.0
4234      */
4235     function get_stylesheet($economy_mode = true) {
4236         // If there's an error, chances are that the language file
4237         // won't have populated the language data file, so we can't
4238         // risk getting a stylesheet...
4239         if ($this->error) {
4240             return '';
4241         }
4242
4243         //Check if the style rearrangements have been processed ...
4244         //This also does some preprocessing to check which style groups are useable ...
4245         if(!isset($this->language_data['NUMBERS_CACHE'])) {
4246             $this->build_style_cache();
4247         }
4248
4249         // First, work out what the selector should be. If there's an ID,
4250         // that should be used, the same for a class. Otherwise, a selector
4251         // of '' means that these styles will be applied anywhere
4252         if ($this->overall_id) {
4253             $selector = '#' . $this->_genCSSName($this->overall_id);
4254         } else {
4255             $selector = '.' . $this->_genCSSName($this->language);
4256             if ($this->overall_class) {
4257                 $selector .= '.' . $this->_genCSSName($this->overall_class);
4258             }
4259         }
4260         $selector .= ' ';
4261
4262         // Header of the stylesheet
4263         if (!$economy_mode) {
4264             $stylesheet = "/**\n".
4265                 " * GeSHi Dynamically Generated Stylesheet\n".
4266                 " * --------------------------------------\n".
4267                 " * Dynamically generated stylesheet for {$this->language}\n".
4268                 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4269                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4270                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4271                 " * --------------------------------------\n".
4272                 " */\n";
4273         } else {
4274             $stylesheet = "/**\n".
4275                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4276                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4277                 " */\n";
4278         }
4279
4280         // Set the <ol> to have no effect at all if there are line numbers
4281         // (<ol>s have margins that should be destroyed so all layout is
4282         // controlled by the set_overall_style method, which works on the
4283         // <pre> or <div> container). Additionally, set default styles for lines
4284         if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4285             //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4286             $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4287         }
4288
4289         // Add overall styles
4290         // note: neglect economy_mode, empty styles are meaningless
4291         if ($this->overall_style != '') {
4292             $stylesheet .= "$selector {{$this->overall_style}}\n";
4293         }
4294
4295         // Add styles for links
4296         // note: economy mode does not make _any_ sense here
4297         //       either the style is empty and thus no selector is needed
4298         //       or the appropriate key is given.
4299         foreach ($this->link_styles as $key => $style) {
4300             if ($style != '') {
4301                 switch ($key) {
4302                     case GESHI_LINK:
4303                         $stylesheet .= "{$selector}a:link {{$style}}\n";
4304                         break;
4305                     case GESHI_HOVER:
4306                         $stylesheet .= "{$selector}a:hover {{$style}}\n";
4307                         break;
4308                     case GESHI_ACTIVE:
4309                         $stylesheet .= "{$selector}a:active {{$style}}\n";
4310                         break;
4311                     case GESHI_VISITED:
4312                         $stylesheet .= "{$selector}a:visited {{$style}}\n";
4313                         break;
4314                 }
4315             }
4316         }
4317
4318         // Header and footer
4319         // note: neglect economy_mode, empty styles are meaningless
4320         if ($this->header_content_style != '') {
4321             $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4322         }
4323         if ($this->footer_content_style != '') {
4324             $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4325         }
4326
4327         // Styles for important stuff
4328         // note: neglect economy_mode, empty styles are meaningless
4329         if ($this->important_styles != '') {
4330             $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4331         }
4332
4333         // Simple line number styles
4334         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4335             $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4336         }
4337         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4338             $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4339         }
4340         // If there is a style set for fancy line numbers, echo it out
4341         if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4342             $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4343         }
4344
4345         // note: empty styles are meaningless
4346         foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4347             if ($styles != '' && (!$economy_mode ||
4348                 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4349                 $this->lexic_permissions['KEYWORDS'][$group]))) {
4350                 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4351             }
4352         }
4353         foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4354             if ($styles != '' && (!$economy_mode ||
4355                 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4356                 $this->lexic_permissions['COMMENTS'][$group]) ||
4357                 (!empty($this->language_data['COMMENT_REGEXP']) &&
4358                 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4359                 $stylesheet .= "$selector.co$group {{$styles}}\n";
4360             }
4361         }
4362         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4363             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4364                 // NEW: since 1.0.8 we have to handle hardescapes
4365                 if ($group === 'HARD') {
4366                     $group = '_h';
4367                 }
4368                 $stylesheet .= "$selector.es$group {{$styles}}\n";
4369             }
4370         }
4371         foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4372             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4373                 $stylesheet .= "$selector.br$group {{$styles}}\n";
4374             }
4375         }
4376         foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4377             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4378                 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4379             }
4380         }
4381         foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4382             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4383                 // NEW: since 1.0.8 we have to handle hardquotes
4384                 if ($group === 'HARD') {
4385                     $group = '_h';
4386                 }
4387                 $stylesheet .= "$selector.st$group {{$styles}}\n";
4388             }
4389         }
4390         foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4391             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4392                 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4393             }
4394         }
4395         foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4396             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4397                 $stylesheet .= "$selector.me$group {{$styles}}\n";
4398             }
4399         }
4400         // note: neglect economy_mode, empty styles are meaningless
4401         foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4402             if ($styles != '') {
4403                 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4404             }
4405         }
4406         foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4407             if ($styles != '' && (!$economy_mode ||
4408                 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4409                 $this->lexic_permissions['REGEXPS'][$group]))) {
4410                 if (is_array($this->language_data['REGEXPS'][$group]) &&
4411                     array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4412                     $stylesheet .= "$selector.";
4413                     $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4414                     $stylesheet .= " {{$styles}}\n";
4415                 } else {
4416                     $stylesheet .= "$selector.re$group {{$styles}}\n";
4417                 }
4418             }
4419         }
4420         // Styles for lines being highlighted extra
4421         if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4422             $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4423         }
4424         $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4425         foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4426             $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4427         }
4428
4429         return $stylesheet;
4430     }
4431
4432     /**
4433      * Get's the style that is used for the specified line
4434      *
4435      * @param int The line number information is requested for
4436      * @access private
4437      * @since 1.0.7.21
4438      */
4439     function get_line_style($line) {
4440         //$style = null;
4441         $style = null;
4442         if (isset($this->highlight_extra_lines_styles[$line])) {
4443             $style = $this->highlight_extra_lines_styles[$line];
4444         } else { // if no "extra" style assigned
4445             $style = $this->highlight_extra_lines_style;
4446         }
4447
4448         return $style;
4449     }
4450
4451     /**
4452     * this functions creates an optimized regular expression list
4453     * of an array of strings.
4454     *
4455     * Example:
4456     * <code>$list = array('faa', 'foo', 'foobar');
4457     *          => string 'f(aa|oo(bar)?)'</code>
4458     *
4459     * @param $list array of (unquoted) strings
4460     * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4461     * @return string for regular expression
4462     * @author Milian Wolff <mail@milianw.de>
4463     * @since 1.0.8
4464     * @access private
4465     */
4466     function optimize_regexp_list($list, $regexp_delimiter = '/') {
4467         $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4468             '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4469         sort($list);
4470         $regexp_list = array('');
4471         $num_subpatterns = 0;
4472         $list_key = 0;
4473
4474         // the tokens which we will use to generate the regexp list
4475         $tokens = array();
4476         $prev_keys = array();
4477         // go through all entries of the list and generate the token list
4478         $cur_len = 0;
4479         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4480             if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4481                 // seems like the length of this pcre is growing exorbitantly
4482                 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4483                 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4484                 $tokens = array();
4485                 $cur_len = 0;
4486             }
4487             $level = 0;
4488             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4489             $pointer = &$tokens;
4490             // properly assign the new entry to the correct position in the token array
4491             // possibly generate smaller common denominator keys
4492             while (true) {
4493                 // get the common denominator
4494                 if (isset($prev_keys[$level])) {
4495                     if ($prev_keys[$level] == $entry) {
4496                         // this is a duplicate entry, skip it
4497                         continue 2;
4498                     }
4499                     $char = 0;
4500                     while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4501                             && $entry[$char] == $prev_keys[$level][$char]) {
4502                         ++$char;
4503                     }
4504                     if ($char > 0) {
4505                         // this entry has at least some chars in common with the current key
4506                         if ($char == strlen($prev_keys[$level])) {
4507                             // current key is totally matched, i.e. this entry has just some bits appended
4508                             $pointer = &$pointer[$prev_keys[$level]];
4509                         } else {
4510                             // only part of the keys match
4511                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
4512                             $new_key_part2 = substr($prev_keys[$level], $char);
4513
4514                             if (in_array($new_key_part1[0], $regex_chars)
4515                                 || in_array($new_key_part2[0], $regex_chars)) {
4516                                 // this is bad, a regex char as first character
4517                                 $pointer[$entry] = array('' => true);
4518                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
4519                                 $cur_len += strlen($entry);
4520                                 continue;
4521                             } else {
4522                                 // relocate previous tokens
4523                                 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4524                                 unset($pointer[$prev_keys[$level]]);
4525                                 $pointer = &$pointer[$new_key_part1];
4526                                 // recreate key index
4527                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4528                                 $cur_len += strlen($new_key_part2);
4529                             }
4530                         }
4531                         ++$level;
4532                         $entry = substr($entry, $char);
4533                         continue;
4534                     }
4535                     // else: fall trough, i.e. no common denominator was found
4536                 }
4537                 if ($level == 0 && !empty($tokens)) {
4538                     // we can dump current tokens into the string and throw them away afterwards
4539                     $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4540                     $new_subpatterns = substr_count($new_entry, '(?:');
4541                     if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4542                         $regexp_list[++$list_key] = $new_entry;
4543                         $num_subpatterns = $new_subpatterns;
4544                     } else {
4545                         if (!empty($regexp_list[$list_key])) {
4546                             $new_entry = '|' . $new_entry;
4547                         }
4548                         $regexp_list[$list_key] .= $new_entry;
4549                         $num_subpatterns += $new_subpatterns;
4550                     }
4551                     $tokens = array();
4552                     $cur_len = 0;
4553                 }
4554                 // no further common denominator found
4555                 $pointer[$entry] = array('' => true);
4556                 array_splice($prev_keys, $level, count($prev_keys), $entry);
4557
4558                 $cur_len += strlen($entry);
4559                 break;
4560             }
4561             unset($list[$i]);
4562         }
4563         // make sure the last tokens get converted as well
4564         $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4565         if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4566             if ( !empty($regexp_list[$list_key]) ) {
4567               ++$list_key;
4568             }
4569             $regexp_list[$list_key] = $new_entry;
4570         } else {
4571             if (!empty($regexp_list[$list_key])) {
4572                 $new_entry = '|' . $new_entry;
4573             }
4574             $regexp_list[$list_key] .= $new_entry;
4575         }
4576         return $regexp_list;
4577     }
4578     /**
4579     * this function creates the appropriate regexp string of an token array
4580     * you should not call this function directly, @see $this->optimize_regexp_list().
4581     *
4582     * @param &$tokens array of tokens
4583     * @param $recursed bool to know wether we recursed or not
4584     * @return string
4585     * @author Milian Wolff <mail@milianw.de>
4586     * @since 1.0.8
4587     * @access private
4588     */
4589     function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4590         $list = '';
4591         foreach ($tokens as $token => $sub_tokens) {
4592             $list .= $token;
4593             $close_entry = isset($sub_tokens['']);
4594             unset($sub_tokens['']);
4595             if (!empty($sub_tokens)) {
4596                 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4597                 if ($close_entry) {
4598                     // make sub_tokens optional
4599                     $list .= '?';
4600                 }
4601             }
4602             $list .= '|';
4603         }
4604         if (!$recursed) {
4605             // do some optimizations
4606             // common trailing strings
4607             // BUGGY!
4608             //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4609             //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4610             // (?:p)? => p?
4611             $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4612             // (?:a|b|c|d|...)? => [abcd...]?
4613             // TODO: a|bb|c => [ac]|bb
4614             static $callback_2;
4615             if (!isset($callback_2)) {
4616                 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4617             }
4618             $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4619         }
4620         // return $list without trailing pipe
4621         return substr($list, 0, -1);
4622     }
4623 } // End Class GeSHi
4624
4625
4626 if (!function_exists('geshi_highlight')) {
4627     /**
4628      * Easy way to highlight stuff. Behaves just like highlight_string
4629      *
4630      * @param string The code to highlight
4631      * @param string The language to highlight the code in
4632      * @param string The path to the language files. You can leave this blank if you need
4633      *               as from version 1.0.7 the path should be automatically detected
4634      * @param boolean Whether to return the result or to echo
4635      * @return string The code highlighted (if $return is true)
4636      * @since 1.0.2
4637      */
4638     function geshi_highlight($string, $language, $path = null, $return = false) {
4639         $geshi = new GeSHi($string, $language, $path);
4640         $geshi->set_header_type(GESHI_HEADER_NONE);
4641
4642         if ($return) {
4643             return '<code>' . $geshi->parse_code() . '</code>';
4644         }
4645
4646         echo '<code>' . $geshi->parse_code() . '</code>';
4647
4648         if ($geshi->error()) {
4649             return false;
4650         }
4651         return true;
4652     }
4653 }
4654
4655 ?>