3 * GeSHi - Generic Syntax Highlighter
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
12 * This file is part of GeSHi.
14 * GeSHi is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * GeSHi is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with GeSHi; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license http://gnu.org/copyleft/gpl.html GNU GPL
38 // You should use these constant names in your programs instead of
39 // their values - you never know when a value may change in a future
43 /** The version of this GeSHi file */
44 define('GESHI_VERSION', '1.0.8.6');
46 // Define the root directory for the GeSHi code tree
47 if (!defined('GESHI_ROOT')) {
48 /** The root directory for GeSHi */
49 define('GESHI_ROOT', dirname(__FILE__
) . DIRECTORY_SEPARATOR
);
51 /** The language file directory for GeSHi
53 define('GESHI_LANG_ROOT', GESHI_ROOT
. 'geshi' . DIRECTORY_SEPARATOR
);
55 // Define if GeSHi should be paranoid about security
56 if (!defined('GESHI_SECURITY_PARANOID')) {
57 /** Tells GeSHi to be paranoid about security settings */
58 define('GESHI_SECURITY_PARANOID', false);
61 // Line numbers - use with enable_line_numbers()
62 /** Use no line numbers when building the result */
63 define('GESHI_NO_LINE_NUMBERS', 0);
64 /** Use normal line numbers when building the result */
65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
66 /** Use fancy line numbers when building the result */
67 define('GESHI_FANCY_LINE_NUMBERS', 2);
69 // Container HTML type
70 /** Use nothing to surround the source */
71 define('GESHI_HEADER_NONE', 0);
72 /** Use a "div" to surround the source */
73 define('GESHI_HEADER_DIV', 1);
74 /** Use a "pre" to surround the source */
75 define('GESHI_HEADER_PRE', 2);
76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77 define('GESHI_HEADER_PRE_VALID', 3);
79 * Use a "table" to surround the source:
82 * <thead><tr><td colspan="2">$header</td></tr></thead>
83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
91 define('GESHI_HEADER_PRE_TABLE', 4);
93 // Capatalisation constants
94 /** Lowercase keywords found */
95 define('GESHI_CAPS_NO_CHANGE', 0);
96 /** Uppercase keywords found */
97 define('GESHI_CAPS_UPPER', 1);
98 /** Leave keywords found as the case that they are */
99 define('GESHI_CAPS_LOWER', 2);
101 // Link style constants
102 /** Links in the source in the :link state */
103 define('GESHI_LINK', 0);
104 /** Links in the source in the :hover state */
105 define('GESHI_HOVER', 1);
106 /** Links in the source in the :active state */
107 define('GESHI_ACTIVE', 2);
108 /** Links in the source in the :visited state */
109 define('GESHI_VISITED', 3);
111 // Important string starter/finisher
112 // Note that if you change these, they should be as-is: i.e., don't
113 // write them as if they had been run through htmlentities()
114 /** The starter for important parts of the source */
115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116 /** The ender for important parts of the source */
117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
122 // When strict mode applies for a language
123 /** Strict mode never applies (this is the most common) */
124 define('GESHI_NEVER', 0);
125 /** Strict mode *might* apply, and can be enabled or
126 disabled by {@link GeSHi->enable_strict_mode()} */
127 define('GESHI_MAYBE', 1);
128 /** Strict mode always applies */
129 define('GESHI_ALWAYS', 2);
131 // Advanced regexp handling constants, used in language files
132 /** The key of the regex array defining what to search for */
133 define('GESHI_SEARCH', 0);
134 /** The key of the regex array defining what bracket group in a
135 matched search to use as a replacement */
136 define('GESHI_REPLACE', 1);
137 /** The key of the regex array defining any modifiers to the regular expression */
138 define('GESHI_MODIFIERS', 2);
139 /** The key of the regex array defining what bracket group in a
140 matched search to put before the replacement */
141 define('GESHI_BEFORE', 3);
142 /** The key of the regex array defining what bracket group in a
143 matched search to put after the replacement */
144 define('GESHI_AFTER', 4);
145 /** The key of the regex array defining a custom keyword to use
146 for this regexp's html tag class */
147 define('GESHI_CLASS', 5);
149 /** Used in language files to mark comments */
150 define('GESHI_COMMENTS', 0);
152 /** Used to work around missing PHP features **/
153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION
, '4.3.3') === 1));
155 /** make sure we can call stripos **/
156 if (!function_exists('stripos')) {
157 // the offset param of preg_match is not supported below PHP 4.3.3
158 if (GESHI_PHP_PRE_433
) {
162 function stripos($haystack, $needle, $offset = null) {
163 if (!is_null($offset)) {
164 $haystack = substr($haystack, $offset);
166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE
)) {
176 function stripos($haystack, $needle, $offset = null) {
177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE
, $offset)) {
185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186 regular expressions. Set this to false if your PCRE lib is up to date
187 @see GeSHi->optimize_regexp_list()
189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190 /** it's also important not to generate too long regular expressions
191 be generous here... but keep in mind, that when reaching this limit we
192 still have to close open patterns. 12k should do just fine on a 16k limit.
193 @see GeSHi->optimize_regexp_list()
195 define('GESHI_MAX_PCRE_LENGTH', 12288);
197 //Number format specification
198 /** Basic number format for integers */
199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200 /** Enhanced number format for integers like seen in C */
201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202 /** Number format to highlight binary numbers with a suffix "b" */
203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204 /** Number format to highlight binary numbers with a prefix % */
205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206 /** Number format to highlight binary numbers with a prefix 0b (C) */
207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208 /** Number format to highlight octal numbers with a leading zero */
209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
212 /** Number format to highlight octal numbers with a suffix of o */
213 define('GESHI_NUMBER_OCT_SUFFIX', 1024); //[0-7]+[oO]
214 /** Number format to highlight hex numbers with a prefix 0x */
215 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
216 /** Number format to highlight hex numbers with a suffix of h */
217 define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h
218 /** Number format to highlight floating-point numbers without support for scientific notation */
219 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
220 /** Number format to highlight floating-point numbers without support for scientific notation */
221 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
222 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
223 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
224 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
225 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
226 //Custom formats are passed by RX array
228 // Error detection - use these to analyse faults
229 /** No sourcecode to highlight was specified
232 define('GESHI_ERROR_NO_INPUT', 1);
233 /** The language specified does not exist */
234 define('GESHI_ERROR_NO_SUCH_LANG', 2);
235 /** GeSHi could not open a file for reading (generally a language file) */
236 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
237 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
238 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
239 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
240 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
247 * Please refer to the documentation for GeSHi 1.0.X that is available
248 * at http://qbnz.com/highlighter/documentation.php for more information
249 * about how to use this class.
252 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
253 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
260 * The source code to highlight
266 * The language to use when highlighting
272 * The data for the language used
275 var $language_data = array();
278 * The path to the language files
281 var $language_path = GESHI_LANG_ROOT
;
284 * The error message associated with an error
286 * @todo check err reporting works
291 * Possible error messages
294 var $error_messages = array(
295 GESHI_ERROR_NO_SUCH_LANG
=> 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
296 GESHI_ERROR_FILE_NOT_READABLE
=> 'The file specified for load_from_file was not readable',
297 GESHI_ERROR_INVALID_HEADER_TYPE
=> 'The header type specified is invalid',
298 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE
=> 'The line number type specified is invalid'
302 * Whether highlighting is strict or not
305 var $strict_mode = false;
308 * Whether to use CSS classes in output
311 var $use_classes = false;
314 * The type of header to use. Can be one of the following
317 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
318 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
319 * - GESHI_HEADER_NONE: No header is outputted.
323 var $header_type = GESHI_HEADER_PRE
;
326 * Array of permissions for which lexics should be highlighted
329 var $lexic_permissions = array(
330 'KEYWORDS' => array(),
331 'COMMENTS' => array('MULTI' => true),
332 'REGEXPS' => array(),
333 'ESCAPE_CHAR' => true,
343 * The time it took to parse the code
349 * The content of the header block
352 var $header_content = '';
355 * The content of the footer block
358 var $footer_content = '';
361 * The style of the header block
364 var $header_content_style = '';
367 * The style of the footer block
370 var $footer_content_style = '';
373 * Tells if a block around the highlighted source should be forced
374 * if not using line numbering
377 var $force_code_block = false;
380 * The styles for hyperlinks in the code
383 var $link_styles = array();
386 * Whether important blocks should be recognised or not
389 * @todo REMOVE THIS FUNCTIONALITY!
391 var $enable_important_blocks = false;
394 * Styles for important parts of the code
397 * @todo As above - rethink the whole idea of important blocks as it is buggy and
398 * will be hard to implement in 1.2
400 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
403 * Whether CSS IDs should be added to the code
406 var $add_ids = false;
409 * Lines that should be highlighted extra
412 var $highlight_extra_lines = array();
415 * Styles of lines that should be highlighted extra
418 var $highlight_extra_lines_styles = array();
421 * Styles of extra-highlighted lines
424 var $highlight_extra_lines_style = 'background-color: #ffc;';
428 * If null, nl2br() will be used on the result string.
429 * Otherwise, all instances of \n will be replaced with $line_ending
432 var $line_ending = null;
435 * Number at which line numbers should start at
438 var $line_numbers_start = 1;
441 * The overall style for this code block
444 var $overall_style = 'font-family:monospace;';
447 * The style for the actual code
450 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
453 * The overall class for this code block
456 var $overall_class = '';
459 * The overall ID for this code block
462 var $overall_id = '';
468 var $line_style1 = 'font-weight: normal; vertical-align:top;';
471 * Line number styles for fancy lines
474 var $line_style2 = 'font-weight: bold; vertical-align:top;';
477 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
480 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
483 * Flag for how line numbers are displayed
486 var $line_numbers = GESHI_NO_LINE_NUMBERS
;
489 * Flag to decide if multi line spans are allowed. Set it to false to make sure
490 * each tag is closed before and reopened after each linefeed.
493 var $allow_multiline_span = true;
496 * The "nth" value for fancy line highlighting
499 var $line_nth_row = 0;
502 * The size of tab stops
508 * Should we use language-defined tab stop widths?
511 var $use_language_tab_width = false;
514 * Default target for keyword links
517 var $link_target = '';
520 * The encoding to use for entity encoding
521 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
524 var $encoding = 'utf-8';
527 * Should keywords be linked?
530 var $keyword_links = true;
533 * Currently loaded language file
537 var $loaded_language = '';
540 * Wether the caches needed for parsing are built or not
545 var $parse_cache_built = false;
548 * Work around for Suhosin Patch with disabled /e modifier
550 * Note from suhosins author in config file:
552 * The /e modifier inside <code>preg_replace()</code> allows code execution.
553 * Often it is the cause for remote code execution exploits. It is wise to
554 * deactivate this feature and test where in the application it is used.
555 * The developer using the /e modifier should be made aware that he should
556 * use <code>preg_replace_callback()</code> instead
562 var $_kw_replace_group = 0;
566 * some "callback parameters" for handle_multiline_regexps
572 var $_hmr_before = '';
573 var $_hmr_replace = '';
574 var $_hmr_after = '';
580 * Creates a new GeSHi object, with source and language
582 * @param string The source code to highlight
583 * @param string The language to highlight the source with
584 * @param string The path to the language file directory. <b>This
585 * is deprecated!</b> I've backported the auto path
586 * detection from the 1.1.X dev branch, so now it
587 * should be automatically set correctly. If you have
588 * renamed the language directory however, you will
589 * still need to set the path using this parameter or
590 * {@link GeSHi->set_language_path()}
593 function GeSHi($source = '', $language = '', $path = '') {
594 if (!empty($source)) {
595 $this->set_source($source);
597 if (!empty($language)) {
598 $this->set_language($language);
600 $this->set_language_path($path);
604 * Returns an error message associated with the last GeSHi operation,
605 * or false if no error has occured
607 * @return string|false An error message if there has been an error, else false
612 //Put some template variables for debugging here ...
613 $debug_tpl_vars = array(
614 '{LANGUAGE}' => $this->language
,
615 '{PATH}' => $this->language_path
618 array_keys($debug_tpl_vars),
619 array_values($debug_tpl_vars),
620 $this->error_messages
[$this->error
]);
622 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
628 * Gets a human-readable language name (thanks to Simon Patterson
631 * @return string The name for the current language
634 function get_language_name() {
635 if (GESHI_ERROR_NO_SUCH_LANG
== $this->error
) {
636 return $this->language_data
['LANG_NAME'] . ' (Unknown Language)';
638 return $this->language_data
['LANG_NAME'];
642 * Sets the source code for this object
644 * @param string The source code to highlight
647 function set_source($source) {
648 $this->source
= $source;
649 $this->highlight_extra_lines
= array();
653 * Sets the language for this object
655 * @note since 1.0.8 this function won't reset language-settings by default anymore!
656 * if you need this set $force_reset = true
658 * @param string The name of the language to use
661 function set_language($language, $force_reset = false) {
663 $this->loaded_language
= false;
666 //Clean up the language name to prevent malicious code injection
667 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
669 $language = strtolower($language);
671 //Retreive the full filename
672 $file_name = $this->language_path
. $language . '.php';
673 if ($file_name == $this->loaded_language
) {
674 // this language is already loaded!
678 $this->language
= $language;
680 $this->error
= false;
681 $this->strict_mode
= GESHI_NEVER
;
683 //Check if we can read the desired file
684 if (!is_readable($file_name)) {
685 $this->error
= GESHI_ERROR_NO_SUCH_LANG
;
689 // Load the language for parsing
690 $this->load_language($file_name);
694 * Sets the path to the directory containing the language files. Note
695 * that this path is relative to the directory of the script that included
696 * geshi.php, NOT geshi.php itself.
698 * @param string The path to the language directory
700 * @deprecated The path to the language files should now be automatically
701 * detected, so this method should no longer be needed. The
702 * 1.1.X branch handles manual setting of the path differently
703 * so this method will disappear in 1.2.0.
705 function set_language_path($path) {
706 if(strpos($path,':')) {
707 //Security Fix to prevent external directories using fopen wrappers.
708 if(DIRECTORY_SEPARATOR
== "\\") {
709 if(!preg_match('#^[a-zA-Z]:#', $path) ||
false !== strpos($path, ':', 2)) {
716 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
717 //Security Fix to prevent external directories using fopen wrappers.
720 if(GESHI_SECURITY_PARANOID
&& false !== strpos($path, '/.')) {
721 //Security Fix to prevent external directories using fopen wrappers.
724 if(GESHI_SECURITY_PARANOID
&& false !== strpos($path, '..')) {
725 //Security Fix to prevent external directories using fopen wrappers.
729 $this->language_path
= ('/' == $path[strlen($path) - 1]) ?
$path : $path . '/';
730 $this->set_language($this->language
); // otherwise set_language_path has no effect
735 * Sets the type of header to be used.
737 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
738 * means more source code but more control over tab width and line-wrapping.
739 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
740 * control. Default is GESHI_HEADER_PRE.
742 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
743 * should be outputted.
745 * @param int The type of header to be used
748 function set_header_type($type) {
749 //Check if we got a valid header type
750 if (!in_array($type, array(GESHI_HEADER_NONE
, GESHI_HEADER_DIV
,
751 GESHI_HEADER_PRE
, GESHI_HEADER_PRE_VALID
, GESHI_HEADER_PRE_TABLE
))) {
752 $this->error
= GESHI_ERROR_INVALID_HEADER_TYPE
;
756 //Set that new header type
757 $this->header_type
= $type;
761 * Sets the styles for the code that will be outputted
762 * when this object is parsed. The style should be a
763 * string of valid stylesheet declarations
765 * @param string The overall style for the outputted code block
766 * @param boolean Whether to merge the styles with the current styles or not
769 function set_overall_style($style, $preserve_defaults = false) {
770 if (!$preserve_defaults) {
771 $this->overall_style
= $style;
773 $this->overall_style
.= $style;
778 * Sets the overall classname for this block of code. This
779 * class can then be used in a stylesheet to style this object's
782 * @param string The class name to use for this block of code
785 function set_overall_class($class) {
786 $this->overall_class
= $class;
790 * Sets the overall id for this block of code. This id can then
791 * be used in a stylesheet to style this object's output
793 * @param string The ID to use for this block of code
796 function set_overall_id($id) {
797 $this->overall_id
= $id;
801 * Sets whether CSS classes should be used to highlight the source. Default
802 * is off, calling this method with no arguments will turn it on
804 * @param boolean Whether to turn classes on or not
807 function enable_classes($flag = true) {
808 $this->use_classes
= ($flag) ?
true : false;
812 * Sets the style for the actual code. This should be a string
813 * containing valid stylesheet declarations. If $preserve_defaults is
814 * true, then styles are merged with the default styles, with the
815 * user defined styles having priority
817 * Note: Use this method to override any style changes you made to
818 * the line numbers if you are using line numbers, else the line of
819 * code will have the same style as the line number! Consult the
820 * GeSHi documentation for more information about this.
822 * @param string The style to use for actual code
823 * @param boolean Whether to merge the current styles with the new styles
826 function set_code_style($style, $preserve_defaults = false) {
827 if (!$preserve_defaults) {
828 $this->code_style
= $style;
830 $this->code_style
.= $style;
835 * Sets the styles for the line numbers.
837 * @param string The style for the line numbers that are "normal"
838 * @param string|boolean If a string, this is the style of the line
839 * numbers that are "fancy", otherwise if boolean then this
840 * defines whether the normal styles should be merged with the
841 * new normal styles or not
842 * @param boolean If set, is the flag for whether to merge the "fancy"
843 * styles with the current styles or not
846 function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
847 //Check if we got 2 or three parameters
848 if (is_bool($style2)) {
849 $preserve_defaults = $style2;
853 //Actually set the new styles
854 if (!$preserve_defaults) {
855 $this->line_style1
= $style1;
856 $this->line_style2
= $style2;
858 $this->line_style1
.= $style1;
859 $this->line_style2
.= $style2;
864 * Sets whether line numbers should be displayed.
866 * Valid values for the first parameter are:
868 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
869 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
870 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
872 * For fancy line numbers, the second parameter is used to signal which lines
873 * are to be fancy. For example, if the value of this parameter is 5 then every
874 * 5th line will be fancy.
876 * @param int How line numbers should be displayed
877 * @param int Defines which lines are fancy
880 function enable_line_numbers($flag, $nth_row = 5) {
881 if (GESHI_NO_LINE_NUMBERS
!= $flag && GESHI_NORMAL_LINE_NUMBERS
!= $flag
882 && GESHI_FANCY_LINE_NUMBERS
!= $flag) {
883 $this->error
= GESHI_ERROR_INVALID_LINE_NUMBER_TYPE
;
885 $this->line_numbers
= $flag;
886 $this->line_nth_row
= $nth_row;
890 * Sets wether spans and other HTML markup generated by GeSHi can
891 * span over multiple lines or not. Defaults to true to reduce overhead.
892 * Set it to false if you want to manipulate the output or manually display
893 * the code in an ordered list.
895 * @param boolean Wether multiline spans are allowed or not
898 function enable_multiline_span($flag) {
899 $this->allow_multiline_span
= (bool) $flag;
903 * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
905 * @see enable_multiline_span
908 function get_multiline_span() {
909 return $this->allow_multiline_span
;
913 * Sets the style for a keyword group. If $preserve_defaults is
914 * true, then styles are merged with the default styles, with the
915 * user defined styles having priority
917 * @param int The key of the keyword group to change the styles of
918 * @param string The style to make the keywords
919 * @param boolean Whether to merge the new styles with the old or just
923 function set_keyword_group_style($key, $style, $preserve_defaults = false) {
924 //Set the style for this keyword group
925 if (!$preserve_defaults) {
926 $this->language_data
['STYLES']['KEYWORDS'][$key] = $style;
928 $this->language_data
['STYLES']['KEYWORDS'][$key] .= $style;
931 //Update the lexic permissions
932 if (!isset($this->lexic_permissions
['KEYWORDS'][$key])) {
933 $this->lexic_permissions
['KEYWORDS'][$key] = true;
938 * Turns highlighting on/off for a keyword group
940 * @param int The key of the keyword group to turn on or off
941 * @param boolean Whether to turn highlighting for that group on or off
944 function set_keyword_group_highlighting($key, $flag = true) {
945 $this->lexic_permissions
['KEYWORDS'][$key] = ($flag) ?
true : false;
949 * Sets the styles for comment groups. If $preserve_defaults is
950 * true, then styles are merged with the default styles, with the
951 * user defined styles having priority
953 * @param int The key of the comment group to change the styles of
954 * @param string The style to make the comments
955 * @param boolean Whether to merge the new styles with the old or just
959 function set_comments_style($key, $style, $preserve_defaults = false) {
960 if (!$preserve_defaults) {
961 $this->language_data
['STYLES']['COMMENTS'][$key] = $style;
963 $this->language_data
['STYLES']['COMMENTS'][$key] .= $style;
968 * Turns highlighting on/off for comment groups
970 * @param int The key of the comment group to turn on or off
971 * @param boolean Whether to turn highlighting for that group on or off
974 function set_comments_highlighting($key, $flag = true) {
975 $this->lexic_permissions
['COMMENTS'][$key] = ($flag) ?
true : false;
979 * Sets the styles for escaped characters. If $preserve_defaults is
980 * true, then styles are merged with the default styles, with the
981 * user defined styles having priority
983 * @param string The style to make the escape characters
984 * @param boolean Whether to merge the new styles with the old or just
988 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
989 if (!$preserve_defaults) {
990 $this->language_data
['STYLES']['ESCAPE_CHAR'][$group] = $style;
992 $this->language_data
['STYLES']['ESCAPE_CHAR'][$group] .= $style;
997 * Turns highlighting on/off for escaped characters
999 * @param boolean Whether to turn highlighting for escape characters on or off
1002 function set_escape_characters_highlighting($flag = true) {
1003 $this->lexic_permissions
['ESCAPE_CHAR'] = ($flag) ?
true : false;
1007 * Sets the styles for brackets. If $preserve_defaults is
1008 * true, then styles are merged with the default styles, with the
1009 * user defined styles having priority
1011 * This method is DEPRECATED: use set_symbols_style instead.
1012 * This method will be removed in 1.2.X
1014 * @param string The style to make the brackets
1015 * @param boolean Whether to merge the new styles with the old or just
1018 * @deprecated In favour of set_symbols_style
1020 function set_brackets_style($style, $preserve_defaults = false) {
1021 if (!$preserve_defaults) {
1022 $this->language_data
['STYLES']['BRACKETS'][0] = $style;
1024 $this->language_data
['STYLES']['BRACKETS'][0] .= $style;
1029 * Turns highlighting on/off for brackets
1031 * This method is DEPRECATED: use set_symbols_highlighting instead.
1032 * This method will be remove in 1.2.X
1034 * @param boolean Whether to turn highlighting for brackets on or off
1036 * @deprecated In favour of set_symbols_highlighting
1038 function set_brackets_highlighting($flag) {
1039 $this->lexic_permissions
['BRACKETS'] = ($flag) ?
true : false;
1043 * Sets the styles for symbols. If $preserve_defaults is
1044 * true, then styles are merged with the default styles, with the
1045 * user defined styles having priority
1047 * @param string The style to make the symbols
1048 * @param boolean Whether to merge the new styles with the old or just
1050 * @param int Tells the group of symbols for which style should be set.
1053 function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1054 // Update the style of symbols
1055 if (!$preserve_defaults) {
1056 $this->language_data
['STYLES']['SYMBOLS'][$group] = $style;
1058 $this->language_data
['STYLES']['SYMBOLS'][$group] .= $style;
1061 // For backward compatibility
1063 $this->set_brackets_style ($style, $preserve_defaults);
1068 * Turns highlighting on/off for symbols
1070 * @param boolean Whether to turn highlighting for symbols on or off
1073 function set_symbols_highlighting($flag) {
1074 // Update lexic permissions for this symbol group
1075 $this->lexic_permissions
['SYMBOLS'] = ($flag) ?
true : false;
1077 // For backward compatibility
1078 $this->set_brackets_highlighting ($flag);
1082 * Sets the styles for strings. If $preserve_defaults is
1083 * true, then styles are merged with the default styles, with the
1084 * user defined styles having priority
1086 * @param string The style to make the escape characters
1087 * @param boolean Whether to merge the new styles with the old or just
1089 * @param int Tells the group of strings for which style should be set.
1092 function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1093 if (!$preserve_defaults) {
1094 $this->language_data
['STYLES']['STRINGS'][$group] = $style;
1096 $this->language_data
['STYLES']['STRINGS'][$group] .= $style;
1101 * Turns highlighting on/off for strings
1103 * @param boolean Whether to turn highlighting for strings on or off
1106 function set_strings_highlighting($flag) {
1107 $this->lexic_permissions
['STRINGS'] = ($flag) ?
true : false;
1111 * Sets the styles for strict code blocks. If $preserve_defaults is
1112 * true, then styles are merged with the default styles, with the
1113 * user defined styles having priority
1115 * @param string The style to make the script blocks
1116 * @param boolean Whether to merge the new styles with the old or just
1118 * @param int Tells the group of script blocks for which style should be set.
1121 function set_script_style($style, $preserve_defaults = false, $group = 0) {
1122 // Update the style of symbols
1123 if (!$preserve_defaults) {
1124 $this->language_data
['STYLES']['SCRIPT'][$group] = $style;
1126 $this->language_data
['STYLES']['SCRIPT'][$group] .= $style;
1131 * Sets the styles for numbers. If $preserve_defaults is
1132 * true, then styles are merged with the default styles, with the
1133 * user defined styles having priority
1135 * @param string The style to make the numbers
1136 * @param boolean Whether to merge the new styles with the old or just
1138 * @param int Tells the group of numbers for which style should be set.
1141 function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1142 if (!$preserve_defaults) {
1143 $this->language_data
['STYLES']['NUMBERS'][$group] = $style;
1145 $this->language_data
['STYLES']['NUMBERS'][$group] .= $style;
1150 * Turns highlighting on/off for numbers
1152 * @param boolean Whether to turn highlighting for numbers on or off
1155 function set_numbers_highlighting($flag) {
1156 $this->lexic_permissions
['NUMBERS'] = ($flag) ?
true : false;
1160 * Sets the styles for methods. $key is a number that references the
1161 * appropriate "object splitter" - see the language file for the language
1162 * you are highlighting to get this number. If $preserve_defaults is
1163 * true, then styles are merged with the default styles, with the
1164 * user defined styles having priority
1166 * @param int The key of the object splitter to change the styles of
1167 * @param string The style to make the methods
1168 * @param boolean Whether to merge the new styles with the old or just
1172 function set_methods_style($key, $style, $preserve_defaults = false) {
1173 if (!$preserve_defaults) {
1174 $this->language_data
['STYLES']['METHODS'][$key] = $style;
1176 $this->language_data
['STYLES']['METHODS'][$key] .= $style;
1181 * Turns highlighting on/off for methods
1183 * @param boolean Whether to turn highlighting for methods on or off
1186 function set_methods_highlighting($flag) {
1187 $this->lexic_permissions
['METHODS'] = ($flag) ?
true : false;
1191 * Sets the styles for regexps. If $preserve_defaults is
1192 * true, then styles are merged with the default styles, with the
1193 * user defined styles having priority
1195 * @param string The style to make the regular expression matches
1196 * @param boolean Whether to merge the new styles with the old or just
1200 function set_regexps_style($key, $style, $preserve_defaults = false) {
1201 if (!$preserve_defaults) {
1202 $this->language_data
['STYLES']['REGEXPS'][$key] = $style;
1204 $this->language_data
['STYLES']['REGEXPS'][$key] .= $style;
1209 * Turns highlighting on/off for regexps
1211 * @param int The key of the regular expression group to turn on or off
1212 * @param boolean Whether to turn highlighting for the regular expression group on or off
1215 function set_regexps_highlighting($key, $flag) {
1216 $this->lexic_permissions
['REGEXPS'][$key] = ($flag) ?
true : false;
1220 * Sets whether a set of keywords are checked for in a case sensitive manner
1222 * @param int The key of the keyword group to change the case sensitivity of
1223 * @param boolean Whether to check in a case sensitive manner or not
1226 function set_case_sensitivity($key, $case) {
1227 $this->language_data
['CASE_SENSITIVE'][$key] = ($case) ?
true : false;
1231 * Sets the case that keywords should use when found. Use the constants:
1233 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1234 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1235 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1237 * @param int A constant specifying what to do with matched keywords
1240 function set_case_keywords($case) {
1241 if (in_array($case, array(
1242 GESHI_CAPS_NO_CHANGE
, GESHI_CAPS_UPPER
, GESHI_CAPS_LOWER
))) {
1243 $this->language_data
['CASE_KEYWORDS'] = $case;
1248 * Sets how many spaces a tab is substituted for
1250 * Widths below zero are ignored
1252 * @param int The tab width
1255 function set_tab_width($width) {
1256 $this->tab_width
= intval($width);
1258 //Check if it fit's the constraints:
1259 if ($this->tab_width
< 1) {
1260 //Return it to the default
1261 $this->tab_width
= 8;
1266 * Sets whether or not to use tab-stop width specifed by language
1268 * @param boolean Whether to use language-specific tab-stop widths
1271 function set_use_language_tab_width($use) {
1272 $this->use_language_tab_width
= (bool) $use;
1276 * Returns the tab width to use, based on the current language and user
1279 * @return int Tab width
1282 function get_real_tab_width() {
1283 if (!$this->use_language_tab_width ||
1284 !isset($this->language_data
['TAB_WIDTH'])) {
1285 return $this->tab_width
;
1287 return $this->language_data
['TAB_WIDTH'];
1292 * Enables/disables strict highlighting. Default is off, calling this
1293 * method without parameters will turn it on. See documentation
1294 * for more details on strict mode and where to use it.
1296 * @param boolean Whether to enable strict mode or not
1299 function enable_strict_mode($mode = true) {
1300 if (GESHI_MAYBE
== $this->language_data
['STRICT_MODE_APPLIES']) {
1301 $this->strict_mode
= ($mode) ? GESHI_ALWAYS
: GESHI_NEVER
;
1306 * Disables all highlighting
1309 * @todo Rewrite with array traversal
1310 * @deprecated In favour of enable_highlighting
1312 function disable_highlighting() {
1313 $this->enable_highlighting(false);
1317 * Enables all highlighting
1319 * The optional flag parameter was added in version 1.0.7.21 and can be used
1320 * to enable (true) or disable (false) all highlighting.
1323 * @param boolean A flag specifying whether to enable or disable all highlighting
1324 * @todo Rewrite with array traversal
1326 function enable_highlighting($flag = true) {
1327 $flag = $flag ?
true : false;
1328 foreach ($this->lexic_permissions
as $key => $value) {
1329 if (is_array($value)) {
1330 foreach ($value as $k => $v) {
1331 $this->lexic_permissions
[$key][$k] = $flag;
1334 $this->lexic_permissions
[$key] = $flag;
1339 $this->enable_important_blocks
= $flag;
1343 * Given a file extension, this method returns either a valid geshi language
1344 * name, or the empty string if it couldn't be found
1346 * @param string The extension to get a language name for
1347 * @param array A lookup array to use instead of the default one
1349 * @todo Re-think about how this method works (maybe make it private and/or make it
1350 * a extension->lang lookup?)
1353 function get_language_name_from_extension( $extension, $lookup = array() ) {
1354 if ( !is_array($lookup) ||
empty($lookup)) {
1356 'abap' => array('abap'),
1357 'actionscript' => array('as'),
1358 'ada' => array('a', 'ada', 'adb', 'ads'),
1359 'apache' => array('conf'),
1360 'asm' => array('ash', 'asm', 'inc'),
1361 'asp' => array('asp'),
1362 'bash' => array('sh'),
1363 'bf' => array('bf'),
1364 'c' => array('c', 'h'),
1365 'c_mac' => array('c', 'h'),
1366 'caddcl' => array(),
1367 'cadlisp' => array(),
1368 'cdfg' => array('cdfg'),
1369 'cobol' => array('cbl'),
1370 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1371 'csharp' => array('cs'),
1372 'css' => array('css'),
1374 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1375 'diff' => array('diff', 'patch'),
1376 'dos' => array('bat', 'cmd'),
1377 'gdb' => array('kcrash', 'crash', 'bt'),
1378 'gettext' => array('po', 'pot'),
1379 'gml' => array('gml'),
1380 'gnuplot' => array('plt'),
1381 'groovy' => array('groovy'),
1382 'haskell' => array('hs'),
1383 'html4strict' => array('html', 'htm'),
1384 'ini' => array('ini', 'desktop'),
1385 'java' => array('java'),
1386 'javascript' => array('js'),
1387 'klonec' => array('kl1'),
1388 'klonecpp' => array('klx'),
1389 'latex' => array('tex'),
1390 'lisp' => array('lisp'),
1391 'lua' => array('lua'),
1392 'matlab' => array('m'),
1394 'mysql' => array('sql'),
1398 'oracle8' => array(),
1399 'oracle10' => array(),
1400 'pascal' => array('pas'),
1401 'perl' => array('pl', 'pm'),
1402 'php' => array('php', 'php5', 'phtml', 'phps'),
1403 'povray' => array('pov'),
1404 'providex' => array('pvc', 'pvx'),
1405 'prolog' => array('pl'),
1406 'python' => array('py'),
1407 'qbasic' => array('bi'),
1408 'reg' => array('reg'),
1409 'ruby' => array('rb'),
1410 'sas' => array('sas'),
1411 'scala' => array('scala'),
1412 'scheme' => array('scm'),
1413 'scilab' => array('sci'),
1414 'smalltalk' => array('st'),
1415 'smarty' => array(),
1416 'tcl' => array('tcl'),
1417 'vb' => array('bas'),
1419 'visualfoxpro' => array(),
1420 'whitespace' => array('ws'),
1421 'xml' => array('xml', 'svg', 'xrc'),
1422 'z80' => array('z80', 'asm', 'inc')
1426 foreach ($lookup as $lang => $extensions) {
1427 if (in_array($extension, $extensions)) {
1435 * Given a file name, this method loads its contents in, and attempts
1436 * to set the language automatically. An optional lookup table can be
1437 * passed for looking up the language name. If not specified a default
1440 * The language table is in the form
1442 * 'lang_name' => array('extension', 'extension', ...),
1446 * @param string The filename to load the source from
1447 * @param array A lookup array to use instead of the default one
1448 * @todo Complete rethink of this and above method
1451 function load_from_file($file_name, $lookup = array()) {
1452 if (is_readable($file_name)) {
1453 $this->set_source(file_get_contents($file_name));
1454 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1456 $this->error
= GESHI_ERROR_FILE_NOT_READABLE
;
1461 * Adds a keyword to a keyword group for highlighting
1463 * @param int The key of the keyword group to add the keyword to
1464 * @param string The word to add to the keyword group
1467 function add_keyword($key, $word) {
1468 if (!in_array($word, $this->language_data
['KEYWORDS'][$key])) {
1469 $this->language_data
['KEYWORDS'][$key][] = $word;
1471 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1472 if ($this->parse_cache_built
) {
1473 $subkey = count($this->language_data
['CACHED_KEYWORD_LISTS'][$key]) - 1;
1474 $this->language_data
['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1480 * Removes a keyword from a keyword group
1482 * @param int The key of the keyword group to remove the keyword from
1483 * @param string The word to remove from the keyword group
1484 * @param bool Wether to automatically recompile the optimized regexp list or not.
1485 * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1486 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1487 * or the removed keyword will stay in cache and still be highlighted! On the other hand
1488 * it might be too expensive to recompile the regexp list for every removal if you want to
1489 * remove a lot of keywords.
1492 function remove_keyword($key, $word, $recompile = true) {
1493 $key_to_remove = array_search($word, $this->language_data
['KEYWORDS'][$key]);
1494 if ($key_to_remove !== false) {
1495 unset($this->language_data
['KEYWORDS'][$key][$key_to_remove]);
1497 //NEW in 1.0.8, optionally recompile keyword group
1498 if ($recompile && $this->parse_cache_built
) {
1499 $this->optimize_keyword_group($key);
1505 * Creates a new keyword group
1507 * @param int The key of the keyword group to create
1508 * @param string The styles for the keyword group
1509 * @param boolean Whether the keyword group is case sensitive ornot
1510 * @param array The words to use for the keyword group
1513 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1514 $words = (array) $words;
1515 if (empty($words)) {
1516 // empty word lists mess up highlighting
1520 //Add the new keyword group internally
1521 $this->language_data
['KEYWORDS'][$key] = $words;
1522 $this->lexic_permissions
['KEYWORDS'][$key] = true;
1523 $this->language_data
['CASE_SENSITIVE'][$key] = $case_sensitive;
1524 $this->language_data
['STYLES']['KEYWORDS'][$key] = $styles;
1526 //NEW in 1.0.8, cache keyword regexp
1527 if ($this->parse_cache_built
) {
1528 $this->optimize_keyword_group($key);
1533 * Removes a keyword group
1535 * @param int The key of the keyword group to remove
1538 function remove_keyword_group ($key) {
1539 //Remove the keyword group internally
1540 unset($this->language_data
['KEYWORDS'][$key]);
1541 unset($this->lexic_permissions
['KEYWORDS'][$key]);
1542 unset($this->language_data
['CASE_SENSITIVE'][$key]);
1543 unset($this->language_data
['STYLES']['KEYWORDS'][$key]);
1546 unset($this->language_data
['CACHED_KEYWORD_LISTS'][$key]);
1550 * compile optimized regexp list for keyword group
1552 * @param int The key of the keyword group to compile & optimize
1555 function optimize_keyword_group($key) {
1556 $this->language_data
['CACHED_KEYWORD_LISTS'][$key] =
1557 $this->optimize_regexp_list($this->language_data
['KEYWORDS'][$key]);
1558 $space_as_whitespace = false;
1559 if(isset($this->language_data
['PARSER_CONTROL'])) {
1560 if(isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'])) {
1561 if(isset($this->language_data
['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1562 $space_as_whitespace = $this->language_data
['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1564 if(isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1565 if(isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1566 $space_as_whitespace = $this->language_data
['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1571 if($space_as_whitespace) {
1572 foreach($this->language_data
['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1573 $this->language_data
['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1574 str_replace(" ", "\\s+", $rxv);
1580 * Sets the content of the header block
1582 * @param string The content of the header block
1585 function set_header_content($content) {
1586 $this->header_content
= $content;
1590 * Sets the content of the footer block
1592 * @param string The content of the footer block
1595 function set_footer_content($content) {
1596 $this->footer_content
= $content;
1600 * Sets the style for the header content
1602 * @param string The style for the header content
1605 function set_header_content_style($style) {
1606 $this->header_content_style
= $style;
1610 * Sets the style for the footer content
1612 * @param string The style for the footer content
1615 function set_footer_content_style($style) {
1616 $this->footer_content_style
= $style;
1620 * Sets whether to force a surrounding block around
1621 * the highlighted code or not
1623 * @param boolean Tells whether to enable or disable this feature
1626 function enable_inner_code_block($flag) {
1627 $this->force_code_block
= (bool)$flag;
1631 * Sets the base URL to be used for keywords
1633 * @param int The key of the keyword group to set the URL for
1634 * @param string The URL to set for the group. If {FNAME} is in
1635 * the url somewhere, it is replaced by the keyword
1636 * that the URL is being made for
1639 function set_url_for_keyword_group($group, $url) {
1640 $this->language_data
['URLS'][$group] = $url;
1644 * Sets styles for links in code
1646 * @param int A constant that specifies what state the style is being
1647 * set for - e.g. :hover or :visited
1648 * @param string The styles to use for that state
1651 function set_link_styles($type, $styles) {
1652 $this->link_styles
[$type] = $styles;
1656 * Sets the target for links in code
1658 * @param string The target for links in the code, e.g. _blank
1661 function set_link_target($target) {
1663 $this->link_target
= '';
1665 $this->link_target
= ' target="' . $target . '"';
1670 * Sets styles for important parts of the code
1672 * @param string The styles to use on important parts of the code
1675 function set_important_styles($styles) {
1676 $this->important_styles
= $styles;
1680 * Sets whether context-important blocks are highlighted
1682 * @param boolean Tells whether to enable or disable highlighting of important blocks
1683 * @todo REMOVE THIS SHIZ FROM GESHI!
1687 function enable_important_blocks($flag) {
1688 $this->enable_important_blocks
= ( $flag ) ?
true : false;
1692 * Whether CSS IDs should be added to each line
1694 * @param boolean If true, IDs will be added to each line.
1697 function enable_ids($flag = true) {
1698 $this->add_ids
= ($flag) ?
true : false;
1702 * Specifies which lines to highlight extra
1704 * The extra style parameter was added in 1.0.7.21.
1706 * @param mixed An array of line numbers to highlight, or just a line
1707 * number on its own.
1708 * @param string A string specifying the style to use for this line.
1709 * If null is specified, the default style is used.
1710 * If false is specified, the line will be removed from
1711 * special highlighting
1713 * @todo Some data replication here that could be cut down on
1715 function highlight_lines_extra($lines, $style = null) {
1716 if (is_array($lines)) {
1717 //Split up the job using single lines at a time
1718 foreach ($lines as $line) {
1719 $this->highlight_lines_extra($line, $style);
1722 //Mark the line as being highlighted specially
1723 $lines = intval($lines);
1724 $this->highlight_extra_lines
[$lines] = $lines;
1726 //Decide on which style to use
1727 if ($style === null) { //Check if we should use default style
1728 unset($this->highlight_extra_lines_styles
[$lines]);
1729 } else if ($style === false) { //Check if to remove this line
1730 unset($this->highlight_extra_lines
[$lines]);
1731 unset($this->highlight_extra_lines_styles
[$lines]);
1733 $this->highlight_extra_lines_styles
[$lines] = $style;
1739 * Sets the style for extra-highlighted lines
1741 * @param string The style for extra-highlighted lines
1744 function set_highlight_lines_extra_style($styles) {
1745 $this->highlight_extra_lines_style
= $styles;
1749 * Sets the line-ending
1751 * @param string The new line-ending
1754 function set_line_ending($line_ending) {
1755 $this->line_ending
= (string)$line_ending;
1759 * Sets what number line numbers should start at. Should
1760 * be a positive integer, and will be converted to one.
1762 * <b>Warning:</b> Using this method will add the "start"
1763 * attribute to the <ol> that is used for line numbering.
1764 * This is <b>not</b> valid XHTML strict, so if that's what you
1765 * care about then don't use this method. Firefox is getting
1766 * support for the CSS method of doing this in 1.1 and Opera
1767 * has support for the CSS method, but (of course) IE doesn't
1768 * so it's not worth doing it the CSS way yet.
1770 * @param int The number to start line numbers at
1773 function start_line_numbers_at($number) {
1774 $this->line_numbers_start
= abs(intval($number));
1778 * Sets the encoding used for htmlspecialchars(), for international
1781 * NOTE: This is not needed for now because htmlspecialchars() is not
1782 * being used (it has a security hole in PHP4 that has not been patched).
1783 * Maybe in a future version it may make a return for speed reasons, but
1786 * @param string The encoding to use for the source
1789 function set_encoding($encoding) {
1791 $this->encoding
= strtolower($encoding);
1796 * Turns linking of keywords on or off.
1798 * @param boolean If true, links will be added to keywords
1801 function enable_keyword_links($enable = true) {
1802 $this->keyword_links
= (bool) $enable;
1806 * Setup caches needed for styling. This is automatically called in
1807 * parse_code() and get_stylesheet() when appropriate. This function helps
1808 * stylesheet generators as they rely on some style information being
1814 function build_style_cache() {
1815 //Build the style cache needed to highlight numbers appropriate
1816 if($this->lexic_permissions
['NUMBERS']) {
1817 //First check what way highlighting information for numbers are given
1818 if(!isset($this->language_data
['NUMBERS'])) {
1819 $this->language_data
['NUMBERS'] = 0;
1822 if(is_array($this->language_data
['NUMBERS'])) {
1823 $this->language_data
['NUMBERS_CACHE'] = $this->language_data
['NUMBERS'];
1825 $this->language_data
['NUMBERS_CACHE'] = array();
1826 if(!$this->language_data
['NUMBERS']) {
1827 $this->language_data
['NUMBERS'] =
1828 GESHI_NUMBER_INT_BASIC |
1829 GESHI_NUMBER_FLT_NONSCI
;
1832 for($i = 0, $j = $this->language_data
['NUMBERS']; $j > 0; ++
$i, $j>>=1) {
1833 //Rearrange style indices if required ...
1834 if(isset($this->language_data
['STYLES']['NUMBERS'][1<<$i])) {
1835 $this->language_data
['STYLES']['NUMBERS'][$i] =
1836 $this->language_data
['STYLES']['NUMBERS'][1<<$i];
1837 unset($this->language_data
['STYLES']['NUMBERS'][1<<$i]);
1840 //Check if this bit is set for highlighting
1842 //So this bit is set ...
1843 //Check if it belongs to group 0 or the actual stylegroup
1844 if(isset($this->language_data
['STYLES']['NUMBERS'][$i])) {
1845 $this->language_data
['NUMBERS_CACHE'][$i] = 1 << $i;
1847 if(!isset($this->language_data
['NUMBERS_CACHE'][0])) {
1848 $this->language_data
['NUMBERS_CACHE'][0] = 0;
1850 $this->language_data
['NUMBERS_CACHE'][0] |
= 1 << $i;
1859 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1860 * This function makes stylesheet generators much faster as they do not need these caches.
1865 function build_parse_cache() {
1866 // cache symbol regexp
1867 //As this is a costy operation, we avoid doing it for multiple groups ...
1868 //Instead we perform it for all symbols at once.
1870 //For this to work, we need to reorganize the data arrays.
1871 if ($this->lexic_permissions
['SYMBOLS'] && !empty($this->language_data
['SYMBOLS'])) {
1872 $this->language_data
['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data
['STYLES']['SYMBOLS']) > 1;
1874 $this->language_data
['SYMBOL_DATA'] = array();
1875 $symbol_preg_multi = array(); // multi char symbols
1876 $symbol_preg_single = array(); // single char symbols
1877 foreach ($this->language_data
['SYMBOLS'] as $key => $symbols) {
1878 if (is_array($symbols)) {
1879 foreach ($symbols as $sym) {
1880 $sym = $this->hsc($sym);
1881 if (!isset($this->language_data
['SYMBOL_DATA'][$sym])) {
1882 $this->language_data
['SYMBOL_DATA'][$sym] = $key;
1883 if (isset($sym[1])) { // multiple chars
1884 $symbol_preg_multi[] = preg_quote($sym, '/');
1885 } else { // single char
1887 // don't trigger range out of order error
1888 $symbol_preg_single[] = '\-';
1890 $symbol_preg_single[] = preg_quote($sym, '/');
1896 $symbols = $this->hsc($symbols);
1897 if (!isset($this->language_data
['SYMBOL_DATA'][$symbols])) {
1898 $this->language_data
['SYMBOL_DATA'][$symbols] = 0;
1899 if (isset($symbols[1])) { // multiple chars
1900 $symbol_preg_multi[] = preg_quote($symbols, '/');
1901 } else if ($symbols == '-') {
1902 // don't trigger range out of order error
1903 $symbol_preg_single[] = '\-';
1904 } else { // single char
1905 $symbol_preg_single[] = preg_quote($symbols, '/');
1911 //Now we have an array with each possible symbol as the key and the style as the actual data.
1912 //This way we can set the correct style just the moment we highlight ...
1914 //Now we need to rewrite our array to get a search string that
1915 $symbol_preg = array();
1916 if (!empty($symbol_preg_multi)) {
1917 rsort($symbol_preg_multi);
1918 $symbol_preg[] = implode('|', $symbol_preg_multi);
1920 if (!empty($symbol_preg_single)) {
1921 rsort($symbol_preg_single);
1922 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1924 $this->language_data
['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1927 // cache optimized regexp for keyword matching
1929 $this->language_data
['CACHED_KEYWORD_LISTS'] = array();
1930 foreach (array_keys($this->language_data
['KEYWORDS']) as $key) {
1931 if (!isset($this->lexic_permissions
['KEYWORDS'][$key]) ||
1932 $this->lexic_permissions
['KEYWORDS'][$key]) {
1933 $this->optimize_keyword_group($key);
1938 if ($this->lexic_permissions
['BRACKETS']) {
1939 $this->language_data
['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1940 if (!$this->use_classes
&& isset($this->language_data
['STYLES']['BRACKETS'][0])) {
1941 $this->language_data
['CACHE_BRACKET_REPLACE'] = array(
1942 '<| style="' . $this->language_data
['STYLES']['BRACKETS'][0] . '">[|>',
1943 '<| style="' . $this->language_data
['STYLES']['BRACKETS'][0] . '">]|>',
1944 '<| style="' . $this->language_data
['STYLES']['BRACKETS'][0] . '">(|>',
1945 '<| style="' . $this->language_data
['STYLES']['BRACKETS'][0] . '">)|>',
1946 '<| style="' . $this->language_data
['STYLES']['BRACKETS'][0] . '">{|>',
1947 '<| style="' . $this->language_data
['STYLES']['BRACKETS'][0] . '">}|>',
1951 $this->language_data
['CACHE_BRACKET_REPLACE'] = array(
1952 '<| class="br0">[|>',
1953 '<| class="br0">]|>',
1954 '<| class="br0">(|>',
1955 '<| class="br0">)|>',
1956 '<| class="br0">{|>',
1957 '<| class="br0">}|>',
1962 //Build the parse cache needed to highlight numbers appropriate
1963 if($this->lexic_permissions
['NUMBERS']) {
1964 //Check if the style rearrangements have been processed ...
1965 //This also does some preprocessing to check which style groups are useable ...
1966 if(!isset($this->language_data
['NUMBERS_CACHE'])) {
1967 $this->build_style_cache();
1970 //Number format specification
1971 //All this formats are matched case-insensitively!
1972 static $numbers_format = array(
1973 GESHI_NUMBER_INT_BASIC
=>
1974 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1975 GESHI_NUMBER_INT_CSTYLE
=>
1976 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1977 GESHI_NUMBER_BIN_SUFFIX
=>
1978 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1979 GESHI_NUMBER_BIN_PREFIX_PERCENT
=>
1980 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1981 GESHI_NUMBER_BIN_PREFIX_0B
=>
1982 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1983 GESHI_NUMBER_OCT_PREFIX
=>
1984 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1985 GESHI_NUMBER_OCT_PREFIX_0O
=>
1986 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1987 GESHI_NUMBER_OCT_SUFFIX
=>
1988 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1989 GESHI_NUMBER_HEX_PREFIX
=>
1990 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1991 GESHI_NUMBER_HEX_SUFFIX
=>
1992 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1993 GESHI_NUMBER_FLT_NONSCI
=>
1994 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1995 GESHI_NUMBER_FLT_NONSCI_F
=>
1996 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1997 GESHI_NUMBER_FLT_SCI_SHORT
=>
1998 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
1999 GESHI_NUMBER_FLT_SCI_ZERO
=>
2000 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2003 //At this step we have an associative array with flag groups for a
2004 //specific style or an string denoting a regexp given its index.
2005 $this->language_data
['NUMBERS_RXCACHE'] = array();
2006 foreach($this->language_data
['NUMBERS_CACHE'] as $key => $rxdata) {
2007 if(is_string($rxdata)) {
2010 //This is a bitfield of number flags to highlight:
2011 //Build an array, implode them together and make this the actual RX
2013 for($i = 1; $i <= $rxdata; $i<<=1) {
2015 $rxuse[] = $numbers_format[$i];
2018 $regexp = implode("|", $rxuse);
2021 $this->language_data
['NUMBERS_RXCACHE'][$key] =
2022 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|[^\<])+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2026 $this->parse_cache_built
= true;
2030 * Returns the code in $this->source, highlighted and surrounded by the
2033 * This should only be called ONCE, cos it's SLOW! If you want to highlight
2034 * the same source multiple times, you're better off doing a whole lot of
2035 * str_replaces to replace the <span>s
2039 function parse_code () {
2041 $start_time = microtime();
2043 // Replace all newlines to a common form.
2044 $code = str_replace("\r\n", "\n", $this->source
);
2045 $code = str_replace("\r", "\n", $code);
2047 // Firstly, if there is an error, we won't highlight
2049 //Escape the source for output
2050 $result = $this->hsc($this->source
);
2052 //This fix is related to SF#1923020, but has to be applied regardless of
2053 //actually highlighting symbols.
2054 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2056 // Timing is irrelevant
2057 $this->set_time($start_time, $start_time);
2058 $this->finalise($result);
2062 // make sure the parse cache is up2date
2063 if (!$this->parse_cache_built
) {
2064 $this->build_parse_cache();
2067 // Initialise various stuff
2068 $length = strlen($code);
2069 $COMMENT_MATCHED = false;
2070 $stuff_to_parse = '';
2073 // "Important" selections are handled like multiline comments
2074 // @todo GET RID OF THIS SHIZ
2075 if ($this->enable_important_blocks
) {
2076 $this->language_data
['COMMENT_MULTI'][GESHI_START_IMPORTANT
] = GESHI_END_IMPORTANT
;
2079 if ($this->strict_mode
) {
2080 // Break the source into bits. Each bit will be a portion of the code
2081 // within script delimiters - for example, HTML between < and >
2085 $next_match_pointer = null;
2086 // we use a copy to unset delimiters on demand (when they are not found)
2087 $delim_copy = $this->language_data
['SCRIPT_DELIMITERS'];
2089 while ($i < $length) {
2090 $next_match_pos = $length +
1; // never true
2091 foreach ($delim_copy as $dk => $delimiters) {
2092 if(is_array($delimiters)) {
2093 foreach ($delimiters as $open => $close) {
2094 // make sure the cache is setup properly
2095 if (!isset($matches[$dk][$open])) {
2096 $matches[$dk][$open] = array(
2100 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2101 'open_strlen' => strlen($open),
2104 'close_strlen' => strlen($close),
2107 // Get the next little bit for this opening string
2108 if ($matches[$dk][$open]['next_match'] < $i) {
2109 // only find the next pos if it was not already cached
2110 $open_pos = strpos($code, $open, $i);
2111 if ($open_pos === false) {
2112 // no match for this delimiter ever
2113 unset($delim_copy[$dk][$open]);
2116 $matches[$dk][$open]['next_match'] = $open_pos;
2118 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2119 //So we got a new match, update the close_pos
2120 $matches[$dk][$open]['close_pos'] =
2121 strpos($code, $close, $matches[$dk][$open]['next_match']+
1);
2123 $next_match_pointer =& $matches[$dk][$open];
2124 $next_match_pos = $matches[$dk][$open]['next_match'];
2128 //So we should match an RegExp as Strict Block ...
2130 * The value in $delimiters is expected to be an RegExp
2131 * containing exactly 2 matching groups:
2132 * - Group 1 is the opener
2133 * - Group 2 is the closer
2135 if(!GESHI_PHP_PRE_433
&& //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2136 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE
, $i)) {
2137 //We got a match ...
2138 if(isset($matches_rx['start']) && isset($matches_rx['end']))
2140 $matches[$dk] = array(
2141 'next_match' => $matches_rx['start'][1],
2144 'close_strlen' => strlen($matches_rx['end'][0]),
2145 'close_pos' => $matches_rx['end'][1],
2148 $matches[$dk] = array(
2149 'next_match' => $matches_rx[1][1],
2152 'close_strlen' => strlen($matches_rx[2][0]),
2153 'close_pos' => $matches_rx[2][1],
2157 // no match for this delimiter ever
2158 unset($delim_copy[$dk]);
2162 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2163 $next_match_pointer =& $matches[$dk];
2164 $next_match_pos = $matches[$dk]['next_match'];
2169 // non-highlightable text
2171 1 => substr($code, $i, $next_match_pos - $i)
2175 if ($next_match_pos > $length) {
2176 // out of bounds means no next match was found
2180 // highlightable code
2181 $parts[$k][0] = $next_match_pointer['dk'];
2183 //Only combine for non-rx script blocks
2184 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2185 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2186 $i = $next_match_pos +
$next_match_pointer['open_strlen'];
2188 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2189 if ($close_pos == false) {
2192 $i = $close_pos +
$next_match_pointer['close_strlen'];
2193 if ($i == $length) {
2196 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2197 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2198 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2199 foreach ($matches as $submatches) {
2200 foreach ($submatches as $match) {
2201 if ($match['next_match'] == $i) {
2202 // a different block already matches here!
2212 $close_pos = $next_match_pointer['close_pos'] +
$next_match_pointer['close_strlen'];
2216 if ($close_pos === false) {
2217 // no closing delimiter found!
2218 $parts[$k][1] = substr($code, $next_match_pos);
2222 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2226 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2229 if ($num_parts == 1 && $this->strict_mode
== GESHI_MAYBE
) {
2230 // when we have only one part, we don't have anything to highlight at all.
2231 // if we have a "maybe" strict language, this should be handled as highlightable code
2246 // Not strict mode - simply dump the source into
2247 // the array at index 1 (the first highlightable block)
2261 //Unset variables we won't need any longer
2264 //Preload some repeatedly used values regarding hardquotes ...
2265 $hq = isset($this->language_data
['HARDQUOTE']) ?
$this->language_data
['HARDQUOTE'][0] : false;
2266 $hq_strlen = strlen($hq);
2268 //Preload if line numbers are to be generated afterwards
2269 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2270 $check_linenumbers = $this->line_numbers
!= GESHI_NO_LINE_NUMBERS ||
2271 !empty($this->highlight_extra_lines
) ||
!$this->allow_multiline_span
;
2273 //preload the escape char for faster checking ...
2274 $escaped_escape_char = $this->hsc($this->language_data
['ESCAPE_CHAR']);
2276 // this is used for single-line comments
2277 $sc_disallowed_before = "";
2278 $sc_disallowed_after = "";
2280 if (isset($this->language_data
['PARSER_CONTROL'])) {
2281 if (isset($this->language_data
['PARSER_CONTROL']['COMMENTS'])) {
2282 if (isset($this->language_data
['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2283 $sc_disallowed_before = $this->language_data
['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2285 if (isset($this->language_data
['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2286 $sc_disallowed_after = $this->language_data
['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2291 //Fix for SF#1932083: Multichar Quotemarks unsupported
2292 $is_string_starter = array();
2293 if ($this->lexic_permissions
['STRINGS']) {
2294 foreach ($this->language_data
['QUOTEMARKS'] as $quotemark) {
2295 if (!isset($is_string_starter[$quotemark[0]])) {
2296 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2297 } else if (is_string($is_string_starter[$quotemark[0]])) {
2298 $is_string_starter[$quotemark[0]] = array(
2299 $is_string_starter[$quotemark[0]],
2302 $is_string_starter[$quotemark[0]][] = $quotemark;
2307 // Now we go through each part. We know that even-indexed parts are
2308 // code that shouldn't be highlighted, and odd-indexed parts should
2310 for ($key = 0; $key < $num_parts; ++
$key) {
2313 // If this block should be highlighted...
2315 // Else not a block to highlight
2316 $endresult .= $this->hsc($parts[$key][1]);
2317 unset($parts[$key]);
2322 $part = $parts[$key][1];
2324 $highlight_part = true;
2325 if ($this->strict_mode
&& !is_null($parts[$key][0])) {
2326 // get the class key for this block of code
2327 $script_key = $parts[$key][0];
2328 $highlight_part = $this->language_data
['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2329 if ($this->language_data
['STYLES']['SCRIPT'][$script_key] != '' &&
2330 $this->lexic_permissions
['SCRIPT']) {
2331 // Add a span element around the source to
2332 // highlight the overall source block
2333 if (!$this->use_classes
&&
2334 $this->language_data
['STYLES']['SCRIPT'][$script_key] != '') {
2335 $attributes = ' style="' . $this->language_data
['STYLES']['SCRIPT'][$script_key] . '"';
2337 $attributes = ' class="sc' . $script_key . '"';
2339 $result .= "<span$attributes>";
2340 $STRICTATTRS = $attributes;
2344 if ($highlight_part) {
2345 // Now, highlight the code in this block. This code
2346 // is really the engine of GeSHi (along with the method
2347 // parse_non_string_part).
2349 // cache comment regexps incrementally
2350 $next_comment_regexp_key = '';
2351 $next_comment_regexp_pos = -1;
2352 $next_comment_multi_pos = -1;
2353 $next_comment_single_pos = -1;
2354 $comment_regexp_cache_per_key = array();
2355 $comment_multi_cache_per_key = array();
2356 $comment_single_cache_per_key = array();
2357 $next_open_comment_multi = '';
2358 $next_comment_single_key = '';
2359 $escape_regexp_cache_per_key = array();
2360 $next_escape_regexp_key = '';
2361 $next_escape_regexp_pos = -1;
2363 $length = strlen($part);
2364 for ($i = 0; $i < $length; ++
$i) {
2365 // Get the next char
2369 // update regexp comment cache if needed
2370 if (isset($this->language_data
['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2371 $next_comment_regexp_pos = $length;
2372 foreach ($this->language_data
['COMMENT_REGEXP'] as $comment_key => $regexp) {
2374 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2375 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2376 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2377 // we have already matched something
2378 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2379 // this comment is never matched
2382 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2384 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2385 (GESHI_PHP_PRE_433
&& preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE
)) ||
2386 (!GESHI_PHP_PRE_433
&& preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE
, $i))
2388 $match_i = $match[0][1];
2389 if (GESHI_PHP_PRE_433
) {
2393 $comment_regexp_cache_per_key[$comment_key] = array(
2394 'key' => $comment_key,
2395 'length' => strlen($match[0][0]),
2399 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2403 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2404 $next_comment_regexp_pos = $match_i;
2405 $next_comment_regexp_key = $comment_key;
2406 if ($match_i === $i) {
2413 $string_started = false;
2415 if (isset($is_string_starter[$char])) {
2416 // Possibly the start of a new string ...
2418 //Check which starter it was ...
2419 //Fix for SF#1932083: Multichar Quotemarks unsupported
2420 if (is_array($is_string_starter[$char])) {
2422 foreach ($is_string_starter[$char] as $testchar) {
2423 if ($testchar === substr($part, $i, strlen($testchar)) &&
2424 strlen($testchar) > strlen($char_new)) {
2425 $char_new = $testchar;
2426 $string_started = true;
2429 if ($string_started) {
2433 $testchar = $is_string_starter[$char];
2434 if ($testchar === substr($part, $i, strlen($testchar))) {
2436 $string_started = true;
2439 $char_len = strlen($char);
2442 if ($string_started && ($i != $next_comment_regexp_pos)) {
2443 // Hand out the correct style information for this string
2444 $string_key = array_search($char, $this->language_data
['QUOTEMARKS']);
2445 if (!isset($this->language_data
['STYLES']['STRINGS'][$string_key]) ||
2446 !isset($this->language_data
['STYLES']['ESCAPE_CHAR'][$string_key])) {
2450 // parse the stuff before this
2451 $result .= $this->parse_non_string_part($stuff_to_parse);
2452 $stuff_to_parse = '';
2454 if (!$this->use_classes
) {
2455 $string_attributes = ' style="' . $this->language_data
['STYLES']['STRINGS'][$string_key] . '"';
2457 $string_attributes = ' class="st'.$string_key.'"';
2460 // now handle the string
2461 $string = "<span$string_attributes>" . GeSHi
::hsc($char);
2462 $start = $i +
$char_len;
2463 $string_open = true;
2465 if(empty($this->language_data
['ESCAPE_REGEXP'])) {
2466 $next_escape_regexp_pos = $length;
2470 //Get the regular ending pos ...
2471 $close_pos = strpos($part, $char, $start);
2472 if(false === $close_pos) {
2473 $close_pos = $length;
2476 if($this->lexic_permissions
['ESCAPE_CHAR']) {
2477 // update escape regexp cache if needed
2478 if (isset($this->language_data
['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2479 $next_escape_regexp_pos = $length;
2480 foreach ($this->language_data
['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2482 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2483 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2484 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2485 // we have already matched something
2486 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2487 // this comment is never matched
2490 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2492 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2493 (GESHI_PHP_PRE_433
&& preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE
)) ||
2494 (!GESHI_PHP_PRE_433
&& preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE
, $start))
2496 $match_i = $match[0][1];
2497 if (GESHI_PHP_PRE_433
) {
2501 $escape_regexp_cache_per_key[$escape_key] = array(
2502 'key' => $escape_key,
2503 'length' => strlen($match[0][0]),
2507 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2511 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2512 $next_escape_regexp_pos = $match_i;
2513 $next_escape_regexp_key = $escape_key;
2514 if ($match_i === $start) {
2521 //Find the next simple escape position
2522 if('' != $this->language_data
['ESCAPE_CHAR']) {
2523 $simple_escape = strpos($part, $this->language_data
['ESCAPE_CHAR'], $start);
2524 if(false === $simple_escape) {
2525 $simple_escape = $length;
2528 $simple_escape = $length;
2531 $next_escape_regexp_pos = $length;
2532 $simple_escape = $length;
2535 if($simple_escape < $next_escape_regexp_pos &&
2536 $simple_escape < $length &&
2537 $simple_escape < $close_pos) {
2538 //The nexxt escape sequence is a simple one ...
2539 $es_pos = $simple_escape;
2541 //Add the stuff not in the string yet ...
2542 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2544 //Get the style for this escaped char ...
2545 if (!$this->use_classes
) {
2546 $escape_char_attributes = ' style="' . $this->language_data
['STYLES']['ESCAPE_CHAR'][0] . '"';
2548 $escape_char_attributes = ' class="es0"';
2551 //Add the style for the escape char ...
2552 $string .= "<span$escape_char_attributes>" .
2553 GeSHi
::hsc($this->language_data
['ESCAPE_CHAR']);
2555 //Get the byte AFTER the ESCAPE_CHAR we just found
2556 $es_char = $part[$es_pos +
1];
2557 if ($es_char == "\n") {
2558 // don't put a newline around newlines
2559 $string .= "</span>\n";
2560 $start = $es_pos +
2;
2561 } else if (ord($es_char) >= 128) {
2562 //This is an non-ASCII char (UTF8 or single byte)
2563 //This code tries to work around SF#2037598 ...
2564 if(function_exists('mb_substr')) {
2565 $es_char_m = mb_substr(substr($part, $es_pos+
1, 16), 0, 1, $this->encoding
);
2566 $string .= $es_char_m . '</span>';
2567 } else if (!GESHI_PHP_PRE_433
&& 'utf-8' == $this->encoding
) {
2568 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2569 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2570 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2571 "|\xED[\x80-\x9F][\x80-\xBF]".
2572 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2573 "|[\xF1-\xF3][\x80-\xBF]{3}".
2574 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2575 $part, $es_char_m, null, $es_pos +
1)) {
2576 $es_char_m = $es_char_m[0];
2578 $es_char_m = $es_char;
2580 $string .= $this->hsc($es_char_m) . '</span>';
2582 $es_char_m = $this->hsc($es_char);
2584 $start = $es_pos +
strlen($es_char_m) +
1;
2586 $string .= $this->hsc($es_char) . '</span>';
2587 $start = $es_pos +
2;
2589 } else if ($next_escape_regexp_pos < $length &&
2590 $next_escape_regexp_pos < $close_pos) {
2591 $es_pos = $next_escape_regexp_pos;
2592 //Add the stuff not in the string yet ...
2593 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2595 //Get the key and length of this match ...
2596 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2597 $escape_str = substr($part, $es_pos, $escape['length']);
2598 $escape_key = $escape['key'];
2600 //Get the style for this escaped char ...
2601 if (!$this->use_classes
) {
2602 $escape_char_attributes = ' style="' . $this->language_data
['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2604 $escape_char_attributes = ' class="es' . $escape_key . '"';
2607 //Add the style for the escape char ...
2608 $string .= "<span$escape_char_attributes>" .
2609 $this->hsc($escape_str) . '</span>';
2611 $start = $es_pos +
$escape['length'];
2613 //Copy the remainder of the string ...
2614 $string .= $this->hsc(substr($part, $start, $close_pos - $start +
$char_len)) . '</span>';
2615 $start = $close_pos +
$char_len;
2616 $string_open = false;
2618 } while($string_open);
2620 if ($check_linenumbers) {
2621 // Are line numbers used? If, we should end the string before
2622 // the newline and begin it again (so when <li>s are put in the source
2623 // remains XHTML compliant)
2624 // note to self: This opens up possibility of config files specifying
2625 // that languages can/cannot have multiline strings???
2626 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2633 } else if ($this->lexic_permissions
['STRINGS'] && $hq && $hq[0] == $char &&
2634 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2635 // The start of a hard quoted string
2636 if (!$this->use_classes
) {
2637 $string_attributes = ' style="' . $this->language_data
['STYLES']['STRINGS']['HARD'] . '"';
2638 $escape_char_attributes = ' style="' . $this->language_data
['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2640 $string_attributes = ' class="st_h"';
2641 $escape_char_attributes = ' class="es_h"';
2643 // parse the stuff before this
2644 $result .= $this->parse_non_string_part($stuff_to_parse);
2645 $stuff_to_parse = '';
2647 // now handle the string
2650 // look for closing quote
2651 $start = $i +
$hq_strlen;
2652 while ($close_pos = strpos($part, $this->language_data
['HARDQUOTE'][1], $start)) {
2653 $start = $close_pos +
1;
2654 if ($this->lexic_permissions
['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data
['HARDCHAR']) {
2655 // make sure this quote is not escaped
2656 foreach ($this->language_data
['HARDESCAPE'] as $hardescape) {
2657 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2658 // check wether this quote is escaped or if it is something like '\\'
2659 $escape_char_pos = $close_pos - 1;
2660 while ($escape_char_pos > 0
2661 && $part[$escape_char_pos - 1] == $this->language_data
['HARDCHAR']) {
2664 if (($close_pos - $escape_char_pos) & 1) {
2665 // uneven number of escape chars => this quote is escaped
2672 // found closing quote
2676 //Found the closing delimiter?
2678 // span till the end of this $part when no closing delimiter is found
2679 $close_pos = $length;
2682 //Get the actual string
2683 $string = substr($part, $i, $close_pos - $i +
1);
2686 // handle escape chars and encode html chars
2687 // (special because when we have escape chars within our string they may not be escaped)
2688 if ($this->lexic_permissions
['ESCAPE_CHAR'] && $this->language_data
['ESCAPE_CHAR']) {
2691 while ($es_pos = strpos($string, $this->language_data
['ESCAPE_CHAR'], $start)) {
2692 // hmtl escape stuff before
2693 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2694 // check if this is a hard escape
2695 foreach ($this->language_data
['HARDESCAPE'] as $hardescape) {
2696 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2697 // indeed, this is a hardescape
2698 $new_string .= "<span$escape_char_attributes>" .
2699 $this->hsc($hardescape) . '</span>';
2700 $start = $es_pos +
strlen($hardescape);
2704 // not a hard escape, but a normal escape
2705 // they come in pairs of two
2707 while (isset($string[$es_pos +
$c]) && isset($string[$es_pos +
$c +
1])
2708 && $string[$es_pos +
$c] == $this->language_data
['ESCAPE_CHAR']
2709 && $string[$es_pos +
$c +
1] == $this->language_data
['ESCAPE_CHAR']) {
2713 $new_string .= "<span$escape_char_attributes>" .
2714 str_repeat($escaped_escape_char, $c) .
2716 $start = $es_pos +
$c;
2718 // this is just a single lonely escape char...
2719 $new_string .= $escaped_escape_char;
2720 $start = $es_pos +
1;
2723 $string = $new_string . $this->hsc(substr($string, $start));
2725 $string = $this->hsc($string);
2728 if ($check_linenumbers) {
2729 // Are line numbers used? If, we should end the string before
2730 // the newline and begin it again (so when <li>s are put in the source
2731 // remains XHTML compliant)
2732 // note to self: This opens up possibility of config files specifying
2733 // that languages can/cannot have multiline strings???
2734 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2737 $result .= "<span$string_attributes>" . $string . '</span>';
2741 //Have a look for regexp comments
2742 if ($i == $next_comment_regexp_pos) {
2743 $COMMENT_MATCHED = true;
2744 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2745 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2747 //@todo If remove important do remove here
2748 if ($this->lexic_permissions
['COMMENTS']['MULTI']) {
2749 if (!$this->use_classes
) {
2750 $attributes = ' style="' . $this->language_data
['STYLES']['COMMENTS'][$comment['key']] . '"';
2752 $attributes = ' class="co' . $comment['key'] . '"';
2755 $test_str = "<span$attributes>" . $test_str . "</span>";
2757 // Short-cut through all the multiline code
2758 if ($check_linenumbers) {
2759 // strreplace to put close span and open span around multiline newlines
2760 $test_str = str_replace(
2761 "\n", "</span>\n<span$attributes>",
2762 str_replace("\n ", "\n ", $test_str)
2767 $i +
= $comment['length'] - 1;
2770 $result .= $this->parse_non_string_part($stuff_to_parse);
2771 $stuff_to_parse = '';
2774 // If we haven't matched a regexp comment, try multi-line comments
2775 if (!$COMMENT_MATCHED) {
2776 // Is this a multiline comment?
2777 if (!empty($this->language_data
['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2778 $next_comment_multi_pos = $length;
2779 foreach ($this->language_data
['COMMENT_MULTI'] as $open => $close) {
2781 if (isset($comment_multi_cache_per_key[$open]) &&
2782 ($comment_multi_cache_per_key[$open] >= $i ||
2783 $comment_multi_cache_per_key[$open] === false)) {
2784 // we have already matched something
2785 if ($comment_multi_cache_per_key[$open] === false) {
2786 // this comment is never matched
2789 $match_i = $comment_multi_cache_per_key[$open];
2790 } else if (($match_i = stripos($part, $open, $i)) !== false) {
2791 $comment_multi_cache_per_key[$open] = $match_i;
2793 $comment_multi_cache_per_key[$open] = false;
2796 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2797 $next_comment_multi_pos = $match_i;
2798 $next_open_comment_multi = $open;
2799 if ($match_i === $i) {
2805 if ($i == $next_comment_multi_pos) {
2806 $open = $next_open_comment_multi;
2807 $close = $this->language_data
['COMMENT_MULTI'][$open];
2808 $open_strlen = strlen($open);
2809 $close_strlen = strlen($close);
2810 $COMMENT_MATCHED = true;
2811 $test_str_match = $open;
2812 //@todo If remove important do remove here
2813 if ($this->lexic_permissions
['COMMENTS']['MULTI'] ||
2814 $open == GESHI_START_IMPORTANT
) {
2815 if ($open != GESHI_START_IMPORTANT
) {
2816 if (!$this->use_classes
) {
2817 $attributes = ' style="' . $this->language_data
['STYLES']['COMMENTS']['MULTI'] . '"';
2819 $attributes = ' class="coMULTI"';
2821 $test_str = "<span$attributes>" . $this->hsc($open);
2823 if (!$this->use_classes
) {
2824 $attributes = ' style="' . $this->important_styles
. '"';
2826 $attributes = ' class="imp"';
2829 // We don't include the start of the comment if it's an
2831 $test_str = "<span$attributes>";
2834 $test_str = $this->hsc($open);
2837 $close_pos = strpos( $part, $close, $i +
$open_strlen );
2839 if ($close_pos === false) {
2840 $close_pos = $length;
2843 // Short-cut through all the multiline code
2844 $rest_of_comment = $this->hsc(substr($part, $i +
$open_strlen, $close_pos - $i - $open_strlen +
$close_strlen));
2845 if (($this->lexic_permissions
['COMMENTS']['MULTI'] ||
2846 $test_str_match == GESHI_START_IMPORTANT
) &&
2847 $check_linenumbers) {
2849 // strreplace to put close span and open span around multiline newlines
2850 $test_str .= str_replace(
2851 "\n", "</span>\n<span$attributes>",
2852 str_replace("\n ", "\n ", $rest_of_comment)
2855 $test_str .= $rest_of_comment;
2858 if ($this->lexic_permissions
['COMMENTS']['MULTI'] ||
2859 $test_str_match == GESHI_START_IMPORTANT
) {
2860 $test_str .= '</span>';
2863 $i = $close_pos +
$close_strlen - 1;
2866 $result .= $this->parse_non_string_part($stuff_to_parse);
2867 $stuff_to_parse = '';
2871 // If we haven't matched a multiline comment, try single-line comments
2872 if (!$COMMENT_MATCHED) {
2873 // cache potential single line comment occurances
2874 if (!empty($this->language_data
['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2875 $next_comment_single_pos = $length;
2876 foreach ($this->language_data
['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2878 if (isset($comment_single_cache_per_key[$comment_key]) &&
2879 ($comment_single_cache_per_key[$comment_key] >= $i ||
2880 $comment_single_cache_per_key[$comment_key] === false)) {
2881 // we have already matched something
2882 if ($comment_single_cache_per_key[$comment_key] === false) {
2883 // this comment is never matched
2886 $match_i = $comment_single_cache_per_key[$comment_key];
2888 // case sensitive comments
2889 ($this->language_data
['CASE_SENSITIVE'][GESHI_COMMENTS
] &&
2890 ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2891 // non case sensitive
2892 (!$this->language_data
['CASE_SENSITIVE'][GESHI_COMMENTS
] &&
2893 (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2894 $comment_single_cache_per_key[$comment_key] = $match_i;
2896 $comment_single_cache_per_key[$comment_key] = false;
2899 if ($match_i !== false && $match_i < $next_comment_single_pos) {
2900 $next_comment_single_pos = $match_i;
2901 $next_comment_single_key = $comment_key;
2902 if ($match_i === $i) {
2908 if ($next_comment_single_pos == $i) {
2909 $comment_key = $next_comment_single_key;
2910 $comment_mark = $this->language_data
['COMMENT_SINGLE'][$comment_key];
2911 $com_len = strlen($comment_mark);
2913 // This check will find special variables like $# in bash
2914 // or compiler directives of Delphi beginning {$
2915 if ((empty($sc_disallowed_before) ||
($i == 0) ||
2916 (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2917 (empty($sc_disallowed_after) ||
($length <= $i +
$com_len) ||
2918 (false === strpos($sc_disallowed_after, $part[$i +
$com_len]))))
2920 // this is a valid comment
2921 $COMMENT_MATCHED = true;
2922 if ($this->lexic_permissions
['COMMENTS'][$comment_key]) {
2923 if (!$this->use_classes
) {
2924 $attributes = ' style="' . $this->language_data
['STYLES']['COMMENTS'][$comment_key] . '"';
2926 $attributes = ' class="co' . $comment_key . '"';
2928 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2930 $test_str = $this->hsc($comment_mark);
2933 //Check if this comment is the last in the source
2934 $close_pos = strpos($part, "\n", $i);
2936 if ($close_pos === false) {
2937 $close_pos = $length;
2940 $test_str .= $this->hsc(substr($part, $i +
$com_len, $close_pos - $i - $com_len));
2941 if ($this->lexic_permissions
['COMMENTS'][$comment_key]) {
2942 $test_str .= "</span>";
2945 // Take into account that the comment might be the last in the source
2953 $result .= $this->parse_non_string_part($stuff_to_parse);
2954 $stuff_to_parse = '';
2960 // Where are we adding this char?
2961 if (!$COMMENT_MATCHED) {
2962 $stuff_to_parse .= $char;
2964 $result .= $test_str;
2966 $COMMENT_MATCHED = false;
2969 // Parse the last bit
2970 $result .= $this->parse_non_string_part($stuff_to_parse);
2971 $stuff_to_parse = '';
2973 $result .= $this->hsc($part);
2975 // Close the <span> that surrounds the block
2976 if ($STRICTATTRS != '') {
2977 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2978 $result .= '</span>';
2981 $endresult .= $result;
2982 unset($part, $parts[$key], $result);
2985 //This fix is related to SF#1923020, but has to be applied regardless of
2986 //actually highlighting symbols.
2987 /** NOTE: memorypeak #3 */
2988 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2990 // // Parse the last stuff (redundant?)
2991 // $result .= $this->parse_non_string_part($stuff_to_parse);
2993 // Lop off the very first and last spaces
2994 // $result = substr($result, 1, -1);
2996 // We're finished: stop timing
2997 $this->set_time($start_time, microtime());
2999 $this->finalise($endresult);
3004 * Swaps out spaces and tabs for HTML indentation. Not needed if
3005 * the code is in a pre block...
3007 * @param string The source to indent (reference!)
3011 function indent(&$result) {
3012 /// Replace tabs with the correct number of spaces
3013 if (false !== strpos($result, "\t")) {
3014 $lines = explode("\n", $result);
3015 $result = null;//Save memory while we process the lines individually
3016 $tab_width = $this->get_real_tab_width();
3017 $tab_string = ' ' . str_repeat(' ', $tab_width);
3019 for ($key = 0, $n = count($lines); $key < $n; $key++
) {
3020 $line = $lines[$key];
3021 if (false === strpos($line, "\t")) {
3026 $length = strlen($line);
3027 $lines[$key] = ''; // reduce memory
3030 for ($i = 0; $i < $length; ++
$i) {
3032 // Simple engine to work out whether we're in a tag.
3033 // If we are we modify $pos. This is so we ignore HTML
3034 // in the line and only workout the tab replacement
3035 // via the actual content of the string
3036 // This test could be improved to include strings in the
3037 // html so that < or > would be allowed in user's styles
3038 // (e.g. quotes: '<' '>'; or similar)
3043 $lines[$key] .= $char;
3044 } else if ('<' == $char) {
3046 $lines[$key] .= '<';
3047 } else if ('&' == $char) {
3048 $substr = substr($line, $i +
3, 5);
3049 $posi = strpos($substr, ';');
3050 if (false === $posi) {
3055 $lines[$key] .= $char;
3056 } else if ("\t" == $char) {
3058 // OPTIMISE - move $strs out. Make an array:
3062 // 3 => ' ' etc etc
3063 // to use instead of building a string every time
3064 $tab_end_width = $tab_width - ($pos %
$tab_width); //Moved out of the look as it doesn't change within the loop
3065 if (($pos & 1) ||
1 == $tab_end_width) {
3066 $str .= substr($tab_string, 6, $tab_end_width);
3068 $str .= substr($tab_string, 0, $tab_end_width+
5);
3070 $lines[$key] .= $str;
3071 $pos +
= $tab_end_width;
3073 if (false === strpos($line, "\t", $i +
1)) {
3074 $lines[$key] .= substr($line, $i +
1);
3077 } else if (0 == $pos && ' ' == $char) {
3078 $lines[$key] .= ' ';
3081 $lines[$key] .= $char;
3086 $result = implode("\n", $lines);
3087 unset($lines);//We don't need the lines separated beyond this --- free them!
3090 // BenBE: Fix to reduce the number of replacements to be done
3091 $result = preg_replace('/^ /m', ' ', $result);
3092 $result = str_replace(' ', ' ', $result);
3094 if ($this->line_numbers
== GESHI_NO_LINE_NUMBERS
&& $this->header_type
!= GESHI_HEADER_PRE_TABLE
) {
3095 if ($this->line_ending
=== null) {
3096 $result = nl2br($result);
3098 $result = str_replace("\n", $this->line_ending
, $result);
3104 * Changes the case of a keyword for those languages where a change is asked for
3106 * @param string The keyword to change the case of
3107 * @return string The keyword with its case changed
3111 function change_case($instr) {
3112 switch ($this->language_data
['CASE_KEYWORDS']) {
3113 case GESHI_CAPS_UPPER
:
3114 return strtoupper($instr);
3115 case GESHI_CAPS_LOWER
:
3116 return strtolower($instr);
3123 * Handles replacements of keywords to include markup and links if requested
3125 * @param string The keyword to add the Markup to
3126 * @return The HTML for the match found
3130 * @todo Get rid of ender in keyword links
3132 function handle_keyword_replace($match) {
3133 $k = $this->_kw_replace_group
;
3134 $keyword = $match[0];
3139 if ($this->keyword_links
) {
3140 // Keyword links have been ebabled
3142 if (isset($this->language_data
['URLS'][$k]) &&
3143 $this->language_data
['URLS'][$k] != '') {
3144 // There is a base group for this keyword
3146 // Old system: strtolower
3147 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3148 // New system: get keyword from language file to get correct case
3149 if (!$this->language_data
['CASE_SENSITIVE'][$k] &&
3150 strpos($this->language_data
['URLS'][$k], '{FNAME}') !== false) {
3151 foreach ($this->language_data
['KEYWORDS'][$k] as $word) {
3152 if (strcasecmp($word, $keyword) == 0) {
3160 $before = '<|UR1|"' .
3168 str_replace('+', '%20', urlencode($this->hsc($word))),
3169 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3170 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3172 $this->language_data
['URLS'][$k]
3178 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3182 * handles regular expressions highlighting-definitions with callback functions
3184 * @note this is a callback, don't use it directly
3186 * @param array the matches array
3187 * @return The highlighted string
3191 function handle_regexps_callback($matches) {
3192 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3193 return ' style="' . call_user_func($this->language_data
['STYLES']['REGEXPS'][$this->_rx_key
], $matches[1]) . '"'. $matches[1] . '|>';
3197 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3199 * @note this is a callback, don't use it directly
3201 * @param array the matches array
3206 function handle_multiline_regexps($matches) {
3207 $before = $this->_hmr_before
;
3208 $after = $this->_hmr_after
;
3209 if ($this->_hmr_replace
) {
3210 $replace = $this->_hmr_replace
;
3213 foreach (array_keys($matches) as $k) {
3214 $search[] = '\\' . $k;
3217 $before = str_replace($search, $matches, $before);
3218 $after = str_replace($search, $matches, $after);
3219 $replace = str_replace($search, $matches, $replace);
3221 $replace = $matches[0];
3224 . '<|!REG3XP' . $this->_hmr_key
.'!>'
3225 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key
. '!>', $replace)
3231 * Takes a string that has no strings or comments in it, and highlights
3232 * stuff like keywords, numbers and methods.
3234 * @param string The string to parse for keyword, numbers etc.
3237 * @todo BUGGY! Why? Why not build string and return?
3239 function parse_non_string_part($stuff_to_parse) {
3240 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3242 // Highlight keywords
3243 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3244 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3245 if ($this->lexic_permissions
['STRINGS']) {
3246 $quotemarks = preg_quote(implode($this->language_data
['QUOTEMARKS']), '/');
3247 $disallowed_before .= $quotemarks;
3248 $disallowed_after .= $quotemarks;
3250 $disallowed_before .= "])";
3251 $disallowed_after .= "])";
3253 $parser_control_pergroup = false;
3254 if (isset($this->language_data
['PARSER_CONTROL'])) {
3255 if (isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'])) {
3256 $x = 0; // check wether per-keyword-group parser_control is enabled
3257 if (isset($this->language_data
['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3258 $disallowed_before = $this->language_data
['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3261 if (isset($this->language_data
['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3262 $disallowed_after = $this->language_data
['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3265 $parser_control_pergroup = (count($this->language_data
['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3269 foreach (array_keys($this->language_data
['KEYWORDS']) as $k) {
3270 if (!isset($this->lexic_permissions
['KEYWORDS'][$k]) ||
3271 $this->lexic_permissions
['KEYWORDS'][$k]) {
3273 $case_sensitive = $this->language_data
['CASE_SENSITIVE'][$k];
3274 $modifiers = $case_sensitive ?
'' : 'i';
3276 // NEW in 1.0.8 - per-keyword-group parser control
3277 $disallowed_before_local = $disallowed_before;
3278 $disallowed_after_local = $disallowed_after;
3279 if ($parser_control_pergroup && isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'][$k])) {
3280 if (isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3281 $disallowed_before_local =
3282 $this->language_data
['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3285 if (isset($this->language_data
['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3286 $disallowed_after_local =
3287 $this->language_data
['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3291 $this->_kw_replace_group
= $k;
3293 //NEW in 1.0.8, the cached regexp list
3294 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3295 for ($set = 0, $set_length = count($this->language_data
['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++
$set) {
3296 $keywordset =& $this->language_data
['CACHED_KEYWORD_LISTS'][$k][$set];
3297 // Might make a more unique string for putting the number in soon
3298 // Basically, we don't put the styles in yet because then the styles themselves will
3299 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3300 $stuff_to_parse = preg_replace_callback(
3301 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3302 array($this, 'handle_keyword_replace'),
3309 // Regular expressions
3310 foreach ($this->language_data
['REGEXPS'] as $key => $regexp) {
3311 if ($this->lexic_permissions
['REGEXPS'][$key]) {
3312 if (is_array($regexp)) {
3313 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
3314 // produce valid HTML when we match multiple lines
3315 $this->_hmr_replace
= $regexp[GESHI_REPLACE
];
3316 $this->_hmr_before
= $regexp[GESHI_BEFORE
];
3317 $this->_hmr_key
= $key;
3318 $this->_hmr_after
= $regexp[GESHI_AFTER
];
3319 $stuff_to_parse = preg_replace_callback(
3320 "/" . $regexp[GESHI_SEARCH
] . "/{$regexp[GESHI_MODIFIERS]}",
3321 array($this, 'handle_multiline_regexps'),
3323 $this->_hmr_replace
= false;
3324 $this->_hmr_before
= '';
3325 $this->_hmr_after
= '';
3327 $stuff_to_parse = preg_replace(
3328 '/' . $regexp[GESHI_SEARCH
] . '/' . $regexp[GESHI_MODIFIERS
],
3329 $regexp[GESHI_BEFORE
] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE
] . '|>' . $regexp[GESHI_AFTER
],
3333 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
3334 // produce valid HTML when we match multiple lines
3335 $this->_hmr_key
= $key;
3336 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3337 array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3338 $this->_hmr_key
= '';
3340 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3346 // Highlight numbers. As of 1.0.8 we support different types of numbers
3347 $numbers_found = false;
3348 if ($this->lexic_permissions
['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3349 $numbers_found = true;
3351 //For each of the formats ...
3352 foreach($this->language_data
['NUMBERS_RXCACHE'] as $id => $regexp) {
3353 //Check if it should be highlighted ...
3354 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3359 // Now that's all done, replace /[number]/ with the correct styles
3361 foreach (array_keys($this->language_data
['KEYWORDS']) as $k) {
3362 if (!$this->use_classes
) {
3363 $attributes = ' style="' .
3364 (isset($this->language_data
['STYLES']['KEYWORDS'][$k]) ?
3365 $this->language_data
['STYLES']['KEYWORDS'][$k] : "") . '"';
3367 $attributes = ' class="kw' . $k . '"';
3369 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3372 if ($numbers_found) {
3373 // Put number styles in
3374 foreach($this->language_data
['NUMBERS_RXCACHE'] as $id => $regexp) {
3375 //Commented out for now, as this needs some review ...
3376 // if ($numbers_permissions & $id) {
3377 //Get the appropriate style ...
3378 //Checking for unset styles is done by the style cache builder ...
3379 if (!$this->use_classes
) {
3380 $attributes = ' style="' . $this->language_data
['STYLES']['NUMBERS'][$id] . '"';
3382 $attributes = ' class="nu'.$id.'"';
3385 //Set in the correct styles ...
3386 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3391 // Highlight methods and fields in objects
3392 if ($this->lexic_permissions
['METHODS'] && $this->language_data
['OOLANG']) {
3393 $oolang_spaces = "[\s]*";
3394 $oolang_before = "";
3395 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3396 if (isset($this->language_data
['PARSER_CONTROL'])) {
3397 if (isset($this->language_data
['PARSER_CONTROL']['OOLANG'])) {
3398 if (isset($this->language_data
['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3399 $oolang_before = $this->language_data
['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3401 if (isset($this->language_data
['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3402 $oolang_after = $this->language_data
['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3404 if (isset($this->language_data
['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3405 $oolang_spaces = $this->language_data
['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3410 foreach ($this->language_data
['OBJECT_SPLITTERS'] as $key => $splitter) {
3411 if (false !== strpos($stuff_to_parse, $splitter)) {
3412 if (!$this->use_classes
) {
3413 $attributes = ' style="' . $this->language_data
['STYLES']['METHODS'][$key] . '"';
3415 $attributes = ' class="me' . $key . '"';
3417 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data
['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3423 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3424 // You try it, and see what happens ;)
3425 // TODO: Fix lexic permissions not converting entities if shouldn't
3426 // be highlighting regardless
3428 if ($this->lexic_permissions
['BRACKETS']) {
3429 $stuff_to_parse = str_replace( $this->language_data
['CACHE_BRACKET_MATCH'],
3430 $this->language_data
['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3434 //FIX for symbol highlighting ...
3435 if ($this->lexic_permissions
['SYMBOLS'] && !empty($this->language_data
['SYMBOLS'])) {
3436 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3437 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data
['SYMBOL_SEARCH'] . ")+(?![^<]+?>)/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER
);
3439 for ($s_id = 0; $s_id < $n_symbols; ++
$s_id) {
3440 $symbol_match = $pot_symbols[$s_id][0][0];
3441 if (strpos($symbol_match, '<') !== false ||
strpos($symbol_match, '>') !== false) {
3442 // already highlighted blocks _must_ include either < or >
3443 // so if this conditional applies, we have to skip this match
3444 // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3445 if(strpos($symbol_match, '<SEMI>') === false &&
3446 strpos($symbol_match, '<PIPE>') === false) {
3451 // if we reach this point, we have a valid match which needs to be highlighted
3453 $symbol_length = strlen($symbol_match);
3454 $symbol_offset = $pot_symbols[$s_id][0][1];
3455 unset($pot_symbols[$s_id]);
3456 $symbol_end = $symbol_length +
$symbol_offset;
3459 // if we have multiple styles, we have to handle them properly
3460 if ($this->language_data
['MULTIPLE_SYMBOL_GROUPS']) {
3462 // Split the current stuff to replace into its atomic symbols ...
3463 preg_match_all("/" . $this->language_data
['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER
);
3464 foreach ($sym_match_syms[0] as $sym_ms) {
3465 //Check if consequtive symbols belong to the same group to save output ...
3466 if (isset($this->language_data
['SYMBOL_DATA'][$sym_ms])
3467 && ($this->language_data
['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3468 if (-1 != $old_sym) {
3471 $old_sym = $this->language_data
['SYMBOL_DATA'][$sym_ms];
3472 if (!$this->use_classes
) {
3473 $symbol_hl .= '<| style="' . $this->language_data
['STYLES']['SYMBOLS'][$old_sym] . '">';
3475 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3478 $symbol_hl .= $sym_ms;
3480 unset($sym_match_syms);
3482 //Close remaining tags and insert the replacement at the right position ...
3483 //Take caution if symbol_hl is empty to avoid doubled closing spans.
3484 if (-1 != $old_sym) {
3488 if (!$this->use_classes
) {
3489 $symbol_hl = '<| style="' . $this->language_data
['STYLES']['SYMBOLS'][0] . '">';
3491 $symbol_hl = '<| class="sy0">';
3493 $symbol_hl .= $symbol_match . '|>';
3496 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset +
$global_offset, $symbol_length);
3498 // since we replace old text with something of different size,
3499 // we'll have to keep track of the differences
3500 $global_offset +
= strlen($symbol_hl) - $symbol_length;
3503 //FIX for symbol highlighting ...
3505 // Add class/style for regexps
3506 foreach (array_keys($this->language_data
['REGEXPS']) as $key) {
3507 if ($this->lexic_permissions
['REGEXPS'][$key]) {
3508 if (is_callable($this->language_data
['STYLES']['REGEXPS'][$key])) {
3509 $this->_rx_key
= $key;
3510 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3511 array($this, 'handle_regexps_callback'),
3514 if (!$this->use_classes
) {
3515 $attributes = ' style="' . $this->language_data
['STYLES']['REGEXPS'][$key] . '"';
3517 if (is_array($this->language_data
['REGEXPS'][$key]) &&
3518 array_key_exists(GESHI_CLASS
, $this->language_data
['REGEXPS'][$key])) {
3519 $attributes = ' class="' .
3520 $this->language_data
['REGEXPS'][$key][GESHI_CLASS
] . '"';
3522 $attributes = ' class="re' . $key . '"';
3525 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3530 // Replace <DOT> with . for urls
3531 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3532 // Replace <|UR1| with <a href= for urls also
3533 if (isset($this->link_styles
[GESHI_LINK
])) {
3534 if ($this->use_classes
) {
3535 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target
. ' href=', $stuff_to_parse);
3537 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target
. ' style="' . $this->link_styles
[GESHI_LINK
] . '" href=', $stuff_to_parse);
3540 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target
. ' href=', $stuff_to_parse);
3544 // NOW we add the span thingy ;)
3547 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3548 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3549 return substr($stuff_to_parse, 1);
3553 * Sets the time taken to parse the code
3555 * @param microtime The time when parsing started
3556 * @param microtime The time when parsing ended
3560 function set_time($start_time, $end_time) {
3561 $start = explode(' ', $start_time);
3562 $end = explode(' ', $end_time);
3563 $this->time
= $end[0] +
$end[1] - $start[0] - $start[1];
3567 * Gets the time taken to parse the code
3569 * @return double The time taken to parse the code
3572 function get_time() {
3577 * Merges arrays recursively, overwriting values of the first array with values of later arrays
3582 function merge_arrays() {
3583 $arrays = func_get_args();
3584 $narrays = count($arrays);
3587 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3588 for ($i = 0; $i < $narrays; $i ++
) {
3589 if (!is_array($arrays[$i])) {
3590 // also array_merge_recursive returns nothing in this case
3591 trigger_error('Argument #' . ($i+
1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING
);
3596 // the first array is in the output set in every case
3599 // merege $ret with the remaining arrays
3600 for ($i = 1; $i < $narrays; $i ++
) {
3601 foreach ($arrays[$i] as $key => $value) {
3602 if (is_array($value) && isset($ret[$key])) {
3603 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3604 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3605 $ret[$key] = $this->merge_arrays($ret[$key], $value);
3607 $ret[$key] = $value;
3616 * Gets language information and stores it for later use
3618 * @param string The filename of the language file you want to load
3621 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3623 function load_language($file_name) {
3624 if ($file_name == $this->loaded_language
) {
3625 // this file is already loaded!
3629 //Prepare some stuff before actually loading the language file
3630 $this->loaded_language
= $file_name;
3631 $this->parse_cache_built
= false;
3632 $this->enable_highlighting();
3633 $language_data = array();
3635 //Load the language file
3638 // Perhaps some checking might be added here later to check that
3639 // $language data is a valid thing but maybe not
3640 $this->language_data
= $language_data;
3642 // Set strict mode if should be set
3643 $this->strict_mode
= $this->language_data
['STRICT_MODE_APPLIES'];
3645 // Set permissions for all lexics to true
3646 // so they'll be highlighted by default
3647 foreach (array_keys($this->language_data
['KEYWORDS']) as $key) {
3648 if (!empty($this->language_data
['KEYWORDS'][$key])) {
3649 $this->lexic_permissions
['KEYWORDS'][$key] = true;
3651 $this->lexic_permissions
['KEYWORDS'][$key] = false;
3655 foreach (array_keys($this->language_data
['COMMENT_SINGLE']) as $key) {
3656 $this->lexic_permissions
['COMMENTS'][$key] = true;
3658 foreach (array_keys($this->language_data
['REGEXPS']) as $key) {
3659 $this->lexic_permissions
['REGEXPS'][$key] = true;
3662 // for BenBE and future code reviews:
3663 // we can use empty here since we only check for existance and emptiness of an array
3664 // if it is not an array at all but rather false or null this will work as intended as well
3665 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3666 if (!empty($this->language_data
['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3667 foreach ($this->language_data
['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3668 // it's either true or false and maybe is true as well
3669 $perm = $value !== GESHI_NEVER
;
3670 if ($flag == 'ALL') {
3671 $this->enable_highlighting($perm);
3674 if (!isset($this->lexic_permissions
[$flag])) {
3675 // unknown lexic permission
3678 if (is_array($this->lexic_permissions
[$flag])) {
3679 foreach ($this->lexic_permissions
[$flag] as $key => $val) {
3680 $this->lexic_permissions
[$flag][$key] = $perm;
3683 $this->lexic_permissions
[$flag] = $perm;
3686 unset($this->language_data
['PARSER_CONTROL']['ENABLE_FLAGS']);
3689 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3690 //You need to set one for HARDESCAPES only in this case.
3691 if(!isset($this->language_data
['HARDCHAR'])) {
3692 $this->language_data
['HARDCHAR'] = $this->language_data
['ESCAPE_CHAR'];
3695 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3696 $style_filename = substr($file_name, 0, -4) . '.style.php';
3697 if (is_readable($style_filename)) {
3698 //Clear any style_data that could have been set before ...
3699 if (isset($style_data)) {
3703 //Read the Style Information from the style file
3704 include $style_filename;
3706 //Apply the new styles to our current language styles
3707 if (isset($style_data) && is_array($style_data)) {
3708 $this->language_data
['STYLES'] =
3709 $this->merge_arrays($this->language_data
['STYLES'], $style_data);
3715 * Takes the parsed code and various options, and creates the HTML
3716 * surrounding it to make it look nice.
3718 * @param string The code already parsed (reference!)
3722 function finalise(&$parsed_code) {
3723 // Remove end parts of important declarations
3724 // This is BUGGY!! My fault for bad code: fix coming in 1.2
3725 // @todo Remove this crap
3726 if ($this->enable_important_blocks
&&
3727 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT
)) === false)) {
3728 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT
), '', $parsed_code);
3731 // Add HTML whitespace stuff if we're using the <div> header
3732 if ($this->header_type
!= GESHI_HEADER_PRE
&& $this->header_type
!= GESHI_HEADER_PRE_VALID
) {
3733 $this->indent($parsed_code);
3736 // purge some unnecessary stuff
3737 /** NOTE: memorypeak #1 */
3738 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3740 // If we are using IDs for line numbers, there needs to be an overall
3741 // ID set to prevent collisions.
3742 if ($this->add_ids
&& !$this->overall_id
) {
3743 $this->overall_id
= 'geshi-' . substr(md5(microtime()), 0, 4);
3746 // Get code into lines
3747 /** NOTE: memorypeak #2 */
3748 $code = explode("\n", $parsed_code);
3749 $parsed_code = $this->header();
3751 // If we're using line numbers, we insert <li>s and appropriate
3752 // markup to style them (otherwise we don't need to do anything)
3753 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
&& $this->header_type
!= GESHI_HEADER_PRE_TABLE
) {
3754 // If we're using the <pre> header, we shouldn't add newlines because
3755 // the <pre> will line-break them (and the <li>s already do this for us)
3756 $ls = ($this->header_type
!= GESHI_HEADER_PRE
&& $this->header_type
!= GESHI_HEADER_PRE_VALID
) ?
"\n" : '';
3758 // Set vars to defaults for following loop
3762 for ($i = 0, $n = count($code); $i < $n;) {
3763 //Reset the attributes for a new line ...
3766 // Make lines have at least one space in them if they're empty
3767 // BenBE: Checking emptiness using trim instead of relying on blanks
3768 if ('' == trim($code[$i])) {
3769 $code[$i] = ' ';
3772 // If this is a "special line"...
3773 if ($this->line_numbers
== GESHI_FANCY_LINE_NUMBERS
&&
3774 $i %
$this->line_nth_row
== ($this->line_nth_row
- 1)) {
3775 // Set the attributes to style the line
3776 if ($this->use_classes
) {
3777 //$attr = ' class="li2"';
3778 $attrs['class'][] = 'li2';
3779 $def_attr = ' class="de2"';
3781 //$attr = ' style="' . $this->line_style2 . '"';
3782 $attrs['style'][] = $this->line_style2
;
3783 // This style "covers up" the special styles set for special lines
3784 // so that styles applied to special lines don't apply to the actual
3785 // code on that line
3786 $def_attr = ' style="' . $this->code_style
. '"';
3789 if ($this->use_classes
) {
3790 //$attr = ' class="li1"';
3791 $attrs['class'][] = 'li1';
3792 $def_attr = ' class="de1"';
3794 //$attr = ' style="' . $this->line_style1 . '"';
3795 $attrs['style'][] = $this->line_style1
;
3796 $def_attr = ' style="' . $this->code_style
. '"';
3800 //Check which type of tag to insert for this line
3801 if ($this->header_type
== GESHI_HEADER_PRE_VALID
) {
3802 $start = "<pre$def_attr>";
3806 $start = "<div$def_attr>";
3812 // Are we supposed to use ids? If so, add them
3813 if ($this->add_ids
) {
3814 $attrs['id'][] = "$this->overall_id-$i";
3817 //Is this some line with extra styles???
3818 if (in_array($i, $this->highlight_extra_lines
)) {
3819 if ($this->use_classes
) {
3820 if (isset($this->highlight_extra_lines_styles
[$i])) {
3821 $attrs['class'][] = "lx$i";
3823 $attrs['class'][] = "ln-xtra";
3826 array_push($attrs['style'], $this->get_line_style($i));
3830 // Add in the line surrounded by appropriate list HTML
3832 foreach ($attrs as $key => $attr) {
3833 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3836 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3837 unset($code[$i - 1]);
3841 if ($this->use_classes
) {
3842 $attributes = ' class="de1"';
3844 $attributes = ' style="'. $this->code_style
.'"';
3846 if ($this->header_type
== GESHI_HEADER_PRE_VALID
) {
3847 $parsed_code .= '<pre'. $attributes .'>';
3848 } elseif ($this->header_type
== GESHI_HEADER_PRE_TABLE
) {
3849 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
3850 if ($this->use_classes
) {
3851 $attrs = ' class="ln"';
3853 $attrs = ' style="'. $this->table_linenumber_style
.'"';
3855 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3857 // we don't merge it with the for below, since it should be better for
3858 // memory consumption this way
3859 // @todo: but... actually it would still be somewhat nice to merge the two loops
3860 // the mem peaks are at different positions
3861 for ($i = 0; $i < $n; ++
$i) {
3864 if ($this->line_numbers
== GESHI_FANCY_LINE_NUMBERS
&&
3865 $i %
$this->line_nth_row
== ($this->line_nth_row
- 1)) {
3866 // Set the attributes to style the line
3867 if ($this->use_classes
) {
3868 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3870 // This style "covers up" the special styles set for special lines
3871 // so that styles applied to special lines don't apply to the actual
3872 // code on that line
3873 $parsed_code .= '<span style="display:block;' . $this->line_style2
. '">'
3874 .'<span style="' . $this->code_style
.'">';
3878 //Is this some line with extra styles???
3879 if (in_array($i +
1, $this->highlight_extra_lines
)) {
3880 if ($this->use_classes
) {
3881 if (isset($this->highlight_extra_lines_styles
[$i])) {
3882 $parsed_code .= "<span class=\"xtra lx$i\">";
3884 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3887 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3891 $parsed_code .= $this->line_numbers_start +
$i;
3893 $parsed_code .= str_repeat('</span>', $close);
3894 } else if ($i != $n) {
3895 $parsed_code .= "\n";
3898 $parsed_code .= '</pre></td><td'.$attributes.'>';
3900 $parsed_code .= '<pre'. $attributes .'>';
3902 // No line numbers, but still need to handle highlighting lines extra.
3903 // Have to use divs so the full width of the code is highlighted
3905 for ($i = 0; $i < $n; ++
$i) {
3906 // Make lines have at least one space in them if they're empty
3907 // BenBE: Checking emptiness using trim instead of relying on blanks
3908 if ('' == trim($code[$i])) {
3909 $code[$i] = ' ';
3912 if ($this->line_numbers
== GESHI_FANCY_LINE_NUMBERS
&&
3913 $i %
$this->line_nth_row
== ($this->line_nth_row
- 1)) {
3914 // Set the attributes to style the line
3915 if ($this->use_classes
) {
3916 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3918 // This style "covers up" the special styles set for special lines
3919 // so that styles applied to special lines don't apply to the actual
3920 // code on that line
3921 $parsed_code .= '<span style="display:block;' . $this->line_style2
. '">'
3922 .'<span style="' . $this->code_style
.'">';
3926 //Is this some line with extra styles???
3927 if (in_array($i +
1, $this->highlight_extra_lines
)) {
3928 if ($this->use_classes
) {
3929 if (isset($this->highlight_extra_lines_styles
[$i])) {
3930 $parsed_code .= "<span class=\"xtra lx$i\">";
3932 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3935 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3940 $parsed_code .= $code[$i];
3943 $parsed_code .= str_repeat('</span>', $close);
3946 elseif ($i +
1 < $n) {
3947 $parsed_code .= "\n";
3952 if ($this->header_type
== GESHI_HEADER_PRE_VALID ||
$this->header_type
== GESHI_HEADER_PRE_TABLE
) {
3953 $parsed_code .= '</pre>';
3955 if ($this->header_type
== GESHI_HEADER_PRE_TABLE
&& $this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
3956 $parsed_code .= '</td>';
3960 $parsed_code .= $this->footer();
3964 * Creates the header for the code block (with correct attributes)
3966 * @return string The header for the code block
3971 // Get attributes needed
3973 * @todo Document behaviour change - class is outputted regardless of whether
3974 * we're using classes or not. Same with style
3976 $attributes = ' class="' . $this->_genCSSName($this->language
);
3977 if ($this->overall_class
!= '') {
3978 $attributes .= " ".$this->_genCSSName($this->overall_class
);
3982 if ($this->overall_id
!= '') {
3983 $attributes .= " id=\"{$this->overall_id}\"";
3985 if ($this->overall_style
!= '' && !$this->use_classes
) {
3986 $attributes .= ' style="' . $this->overall_style
. '"';
3989 $ol_attributes = '';
3991 if ($this->line_numbers_start
!= 1) {
3992 $ol_attributes .= ' start="' . $this->line_numbers_start
. '"';
3995 // Get the header HTML
3996 $header = $this->header_content
;
3998 if ($this->header_type
== GESHI_HEADER_PRE ||
$this->header_type
== GESHI_HEADER_PRE_VALID
) {
3999 $header = str_replace("\n", '', $header);
4001 $header = $this->replace_keywords($header);
4003 if ($this->use_classes
) {
4004 $attr = ' class="head"';
4006 $attr = " style=\"{$this->header_content_style}\"";
4008 if ($this->header_type
== GESHI_HEADER_PRE_TABLE
&& $this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4009 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4011 $header = "<div$attr>$header</div>";
4015 if (GESHI_HEADER_NONE
== $this->header_type
) {
4016 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4017 return "$header<ol$attributes$ol_attributes>";
4019 return $header . ($this->force_code_block ?
'<div>' : '');
4022 // Work out what to return and do it
4023 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4024 if ($this->header_type
== GESHI_HEADER_PRE
) {
4025 return "<pre$attributes>$header<ol$ol_attributes>";
4026 } else if ($this->header_type
== GESHI_HEADER_DIV ||
4027 $this->header_type
== GESHI_HEADER_PRE_VALID
) {
4028 return "<div$attributes>$header<ol$ol_attributes>";
4029 } else if ($this->header_type
== GESHI_HEADER_PRE_TABLE
) {
4030 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4033 if ($this->header_type
== GESHI_HEADER_PRE
) {
4034 return "<pre$attributes>$header" .
4035 ($this->force_code_block ?
'<div>' : '');
4037 return "<div$attributes>$header" .
4038 ($this->force_code_block ?
'<div>' : '');
4044 * Returns the footer for the code block.
4046 * @return string The footer for the code block
4051 $footer = $this->footer_content
;
4053 if ($this->header_type
== GESHI_HEADER_PRE
) {
4054 $footer = str_replace("\n", '', $footer);;
4056 $footer = $this->replace_keywords($footer);
4058 if ($this->use_classes
) {
4059 $attr = ' class="foot"';
4061 $attr = " style=\"{$this->footer_content_style}\"";
4063 if ($this->header_type
== GESHI_HEADER_PRE_TABLE
&& $this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4064 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4066 $footer = "<div$attr>$footer</div>";
4070 if (GESHI_HEADER_NONE
== $this->header_type
) {
4071 return ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) ?
'</ol>' . $footer : $footer;
4074 if ($this->header_type
== GESHI_HEADER_DIV ||
$this->header_type
== GESHI_HEADER_PRE_VALID
) {
4075 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4076 return "</ol>$footer</div>";
4078 return ($this->force_code_block ?
'</div>' : '') .
4081 elseif ($this->header_type
== GESHI_HEADER_PRE_TABLE
) {
4082 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4083 return "</tr></tbody>$footer</table>";
4085 return ($this->force_code_block ?
'</div>' : '') .
4089 if ($this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4090 return "</ol>$footer</pre>";
4092 return ($this->force_code_block ?
'</div>' : '') .
4098 * Replaces certain keywords in the header and footer with
4099 * certain configuration values
4101 * @param string The header or footer content to do replacement on
4102 * @return string The header or footer with replaced keywords
4106 function replace_keywords($instr) {
4107 $keywords = $replacements = array();
4109 $keywords[] = '<TIME>';
4110 $keywords[] = '{TIME}';
4111 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4113 $keywords[] = '<LANGUAGE>';
4114 $keywords[] = '{LANGUAGE}';
4115 $replacements[] = $replacements[] = $this->language_data
['LANG_NAME'];
4117 $keywords[] = '<VERSION>';
4118 $keywords[] = '{VERSION}';
4119 $replacements[] = $replacements[] = GESHI_VERSION
;
4121 $keywords[] = '<SPEED>';
4122 $keywords[] = '{SPEED}';
4126 $speed = strlen($this->source
) / $time;
4127 if ($speed >= 1024) {
4128 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4130 $speed = sprintf("%.0f B/s", $speed);
4133 $replacements[] = $replacements[] = $speed;
4135 return str_replace($keywords, $replacements, $instr);
4139 * Secure replacement for PHP built-in function htmlspecialchars().
4141 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4142 * for this replacement function.
4144 * The INTERFACE for this function is almost the same as that for
4145 * htmlspecialchars(), with the same default for quote style; however, there
4146 * is no 'charset' parameter. The reason for this is as follows:
4149 * "The third argument charset defines character set used in conversion."
4151 * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4152 * thus _needs_ to know (or asssume) a character set because the special
4153 * characters to be replaced could exist at different code points in
4154 * different character sets. (If indeed htmlspecialchars() works at
4155 * byte-value level that goes some way towards explaining why the
4156 * vulnerability would exist in this function, too, and not only in
4157 * htmlentities() which certainly is working at byte-value level.)
4159 * This replacement function however works at character level and should
4160 * therefore be "immune" to character set differences - so no charset
4161 * parameter is needed or provided. If a third parameter is passed, it will
4162 * be silently ignored.
4164 * In the OUTPUT there is a minor difference in that we use ''' instead
4165 * of PHP's ''' for a single quote: this provides compatibility with
4166 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4167 * (see comment by mikiwoz at yahoo dot co dot uk on
4168 * http://php.net/htmlspecialchars); it also matches the entity definition
4170 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4171 * Like PHP we use a numeric character reference instead of ''' for the
4172 * single quote. For the other special characters we use the named entity
4173 * references, as PHP is doing.
4175 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4177 * @license http://www.gnu.org/copyleft/lgpl.html
4178 * GNU Lesser General Public License
4179 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4180 * Wikka Development Team}
4183 * @param string $string string to be converted
4184 * @param integer $quote_style
4185 * - ENT_COMPAT: escapes &, <, > and double quote (default)
4186 * - ENT_NOQUOTES: escapes only &, < and >
4187 * - ENT_QUOTES: escapes &, <, >, double and single quotes
4188 * @return string converted string
4191 function hsc($string, $quote_style = ENT_COMPAT
) {
4193 static $aTransSpecchar = array(
4199 //This fix is related to SF#1923020, but has to be applied
4200 //regardless of actually highlighting symbols.
4202 //Circumvent a bug with symbol highlighting
4203 //This is required as ; would produce undesirable side-effects if it
4204 //was not to be processed as an entity.
4205 ';' => '<SEMI>', // Force ; to be processed as entity
4206 '|' => '<PIPE>' // Force | to be processed as entity
4207 ); // ENT_COMPAT set
4209 switch ($quote_style) {
4210 case ENT_NOQUOTES
: // don't convert double quotes
4211 unset($aTransSpecchar['"']);
4213 case ENT_QUOTES
: // convert single quotes as well
4214 $aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() uses '''
4218 // return translated string
4219 return strtr($string, $aTransSpecchar);
4222 function _genCSSName($name){
4223 return (is_numeric($name[0]) ?
'_' : '') . $name;
4227 * Returns a stylesheet for the highlighted code. If $economy mode
4228 * is true, we only return the stylesheet declarations that matter for
4229 * this code block instead of the whole thing
4231 * @param boolean Whether to use economy mode or not
4232 * @return string A stylesheet built on the data for the current language
4235 function get_stylesheet($economy_mode = true) {
4236 // If there's an error, chances are that the language file
4237 // won't have populated the language data file, so we can't
4238 // risk getting a stylesheet...
4243 //Check if the style rearrangements have been processed ...
4244 //This also does some preprocessing to check which style groups are useable ...
4245 if(!isset($this->language_data
['NUMBERS_CACHE'])) {
4246 $this->build_style_cache();
4249 // First, work out what the selector should be. If there's an ID,
4250 // that should be used, the same for a class. Otherwise, a selector
4251 // of '' means that these styles will be applied anywhere
4252 if ($this->overall_id
) {
4253 $selector = '#' . $this->_genCSSName($this->overall_id
);
4255 $selector = '.' . $this->_genCSSName($this->language
);
4256 if ($this->overall_class
) {
4257 $selector .= '.' . $this->_genCSSName($this->overall_class
);
4262 // Header of the stylesheet
4263 if (!$economy_mode) {
4264 $stylesheet = "/**\n".
4265 " * GeSHi Dynamically Generated Stylesheet\n".
4266 " * --------------------------------------\n".
4267 " * Dynamically generated stylesheet for {$this->language}\n".
4268 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4269 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4270 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4271 " * --------------------------------------\n".
4274 $stylesheet = "/**\n".
4275 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4276 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4280 // Set the <ol> to have no effect at all if there are line numbers
4281 // (<ol>s have margins that should be destroyed so all layout is
4282 // controlled by the set_overall_style method, which works on the
4283 // <pre> or <div> container). Additionally, set default styles for lines
4284 if (!$economy_mode ||
$this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) {
4285 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4286 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4289 // Add overall styles
4290 // note: neglect economy_mode, empty styles are meaningless
4291 if ($this->overall_style
!= '') {
4292 $stylesheet .= "$selector {{$this->overall_style}}\n";
4295 // Add styles for links
4296 // note: economy mode does not make _any_ sense here
4297 // either the style is empty and thus no selector is needed
4298 // or the appropriate key is given.
4299 foreach ($this->link_styles
as $key => $style) {
4303 $stylesheet .= "{$selector}a:link {{$style}}\n";
4306 $stylesheet .= "{$selector}a:hover {{$style}}\n";
4309 $stylesheet .= "{$selector}a:active {{$style}}\n";
4312 $stylesheet .= "{$selector}a:visited {{$style}}\n";
4318 // Header and footer
4319 // note: neglect economy_mode, empty styles are meaningless
4320 if ($this->header_content_style
!= '') {
4321 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4323 if ($this->footer_content_style
!= '') {
4324 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4327 // Styles for important stuff
4328 // note: neglect economy_mode, empty styles are meaningless
4329 if ($this->important_styles
!= '') {
4330 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4333 // Simple line number styles
4334 if ((!$economy_mode ||
$this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) && $this->line_style1
!= '') {
4335 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4337 if ((!$economy_mode ||
$this->line_numbers
!= GESHI_NO_LINE_NUMBERS
) && $this->table_linenumber_style
!= '') {
4338 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4340 // If there is a style set for fancy line numbers, echo it out
4341 if ((!$economy_mode ||
$this->line_numbers
== GESHI_FANCY_LINE_NUMBERS
) && $this->line_style2
!= '') {
4342 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4345 // note: empty styles are meaningless
4346 foreach ($this->language_data
['STYLES']['KEYWORDS'] as $group => $styles) {
4347 if ($styles != '' && (!$economy_mode ||
4348 (isset($this->lexic_permissions
['KEYWORDS'][$group]) &&
4349 $this->lexic_permissions
['KEYWORDS'][$group]))) {
4350 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4353 foreach ($this->language_data
['STYLES']['COMMENTS'] as $group => $styles) {
4354 if ($styles != '' && (!$economy_mode ||
4355 (isset($this->lexic_permissions
['COMMENTS'][$group]) &&
4356 $this->lexic_permissions
['COMMENTS'][$group]) ||
4357 (!empty($this->language_data
['COMMENT_REGEXP']) &&
4358 !empty($this->language_data
['COMMENT_REGEXP'][$group])))) {
4359 $stylesheet .= "$selector.co$group {{$styles}}\n";
4362 foreach ($this->language_data
['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4363 if ($styles != '' && (!$economy_mode ||
$this->lexic_permissions
['ESCAPE_CHAR'])) {
4364 // NEW: since 1.0.8 we have to handle hardescapes
4365 if ($group === 'HARD') {
4368 $stylesheet .= "$selector.es$group {{$styles}}\n";
4371 foreach ($this->language_data
['STYLES']['BRACKETS'] as $group => $styles) {
4372 if ($styles != '' && (!$economy_mode ||
$this->lexic_permissions
['BRACKETS'])) {
4373 $stylesheet .= "$selector.br$group {{$styles}}\n";
4376 foreach ($this->language_data
['STYLES']['SYMBOLS'] as $group => $styles) {
4377 if ($styles != '' && (!$economy_mode ||
$this->lexic_permissions
['SYMBOLS'])) {
4378 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4381 foreach ($this->language_data
['STYLES']['STRINGS'] as $group => $styles) {
4382 if ($styles != '' && (!$economy_mode ||
$this->lexic_permissions
['STRINGS'])) {
4383 // NEW: since 1.0.8 we have to handle hardquotes
4384 if ($group === 'HARD') {
4387 $stylesheet .= "$selector.st$group {{$styles}}\n";
4390 foreach ($this->language_data
['STYLES']['NUMBERS'] as $group => $styles) {
4391 if ($styles != '' && (!$economy_mode ||
$this->lexic_permissions
['NUMBERS'])) {
4392 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4395 foreach ($this->language_data
['STYLES']['METHODS'] as $group => $styles) {
4396 if ($styles != '' && (!$economy_mode ||
$this->lexic_permissions
['METHODS'])) {
4397 $stylesheet .= "$selector.me$group {{$styles}}\n";
4400 // note: neglect economy_mode, empty styles are meaningless
4401 foreach ($this->language_data
['STYLES']['SCRIPT'] as $group => $styles) {
4402 if ($styles != '') {
4403 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4406 foreach ($this->language_data
['STYLES']['REGEXPS'] as $group => $styles) {
4407 if ($styles != '' && (!$economy_mode ||
4408 (isset($this->lexic_permissions
['REGEXPS'][$group]) &&
4409 $this->lexic_permissions
['REGEXPS'][$group]))) {
4410 if (is_array($this->language_data
['REGEXPS'][$group]) &&
4411 array_key_exists(GESHI_CLASS
, $this->language_data
['REGEXPS'][$group])) {
4412 $stylesheet .= "$selector.";
4413 $stylesheet .= $this->language_data
['REGEXPS'][$group][GESHI_CLASS
];
4414 $stylesheet .= " {{$styles}}\n";
4416 $stylesheet .= "$selector.re$group {{$styles}}\n";
4420 // Styles for lines being highlighted extra
4421 if (!$economy_mode ||
(count($this->highlight_extra_lines
)!=count($this->highlight_extra_lines_styles
))) {
4422 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4424 $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4425 foreach ($this->highlight_extra_lines_styles
as $lineid => $linestyle) {
4426 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4433 * Get's the style that is used for the specified line
4435 * @param int The line number information is requested for
4439 function get_line_style($line) {
4442 if (isset($this->highlight_extra_lines_styles
[$line])) {
4443 $style = $this->highlight_extra_lines_styles
[$line];
4444 } else { // if no "extra" style assigned
4445 $style = $this->highlight_extra_lines_style
;
4452 * this functions creates an optimized regular expression list
4453 * of an array of strings.
4456 * <code>$list = array('faa', 'foo', 'foobar');
4457 * => string 'f(aa|oo(bar)?)'</code>
4459 * @param $list array of (unquoted) strings
4460 * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4461 * @return string for regular expression
4462 * @author Milian Wolff <mail@milianw.de>
4466 function optimize_regexp_list($list, $regexp_delimiter = '/') {
4467 $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4468 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4470 $regexp_list = array('');
4471 $num_subpatterns = 0;
4474 // the tokens which we will use to generate the regexp list
4476 $prev_keys = array();
4477 // go through all entries of the list and generate the token list
4479 for ($i = 0, $i_max = count($list); $i < $i_max; ++
$i) {
4480 if ($cur_len > GESHI_MAX_PCRE_LENGTH
) {
4481 // seems like the length of this pcre is growing exorbitantly
4482 $regexp_list[++
$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4483 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4488 $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4489 $pointer = &$tokens;
4490 // properly assign the new entry to the correct position in the token array
4491 // possibly generate smaller common denominator keys
4493 // get the common denominator
4494 if (isset($prev_keys[$level])) {
4495 if ($prev_keys[$level] == $entry) {
4496 // this is a duplicate entry, skip it
4500 while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4501 && $entry[$char] == $prev_keys[$level][$char]) {
4505 // this entry has at least some chars in common with the current key
4506 if ($char == strlen($prev_keys[$level])) {
4507 // current key is totally matched, i.e. this entry has just some bits appended
4508 $pointer = &$pointer[$prev_keys[$level]];
4510 // only part of the keys match
4511 $new_key_part1 = substr($prev_keys[$level], 0, $char);
4512 $new_key_part2 = substr($prev_keys[$level], $char);
4514 if (in_array($new_key_part1[0], $regex_chars)
4515 ||
in_array($new_key_part2[0], $regex_chars)) {
4516 // this is bad, a regex char as first character
4517 $pointer[$entry] = array('' => true);
4518 array_splice($prev_keys, $level, count($prev_keys), $entry);
4519 $cur_len +
= strlen($entry);
4522 // relocate previous tokens
4523 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4524 unset($pointer[$prev_keys[$level]]);
4525 $pointer = &$pointer[$new_key_part1];
4526 // recreate key index
4527 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4528 $cur_len +
= strlen($new_key_part2);
4532 $entry = substr($entry, $char);
4535 // else: fall trough, i.e. no common denominator was found
4537 if ($level == 0 && !empty($tokens)) {
4538 // we can dump current tokens into the string and throw them away afterwards
4539 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4540 $new_subpatterns = substr_count($new_entry, '(?:');
4541 if (GESHI_MAX_PCRE_SUBPATTERNS
&& $num_subpatterns +
$new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS
) {
4542 $regexp_list[++
$list_key] = $new_entry;
4543 $num_subpatterns = $new_subpatterns;
4545 if (!empty($regexp_list[$list_key])) {
4546 $new_entry = '|' . $new_entry;
4548 $regexp_list[$list_key] .= $new_entry;
4549 $num_subpatterns +
= $new_subpatterns;
4554 // no further common denominator found
4555 $pointer[$entry] = array('' => true);
4556 array_splice($prev_keys, $level, count($prev_keys), $entry);
4558 $cur_len +
= strlen($entry);
4563 // make sure the last tokens get converted as well
4564 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4565 if (GESHI_MAX_PCRE_SUBPATTERNS
&& $num_subpatterns +
substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS
) {
4566 if ( !empty($regexp_list[$list_key]) ) {
4569 $regexp_list[$list_key] = $new_entry;
4571 if (!empty($regexp_list[$list_key])) {
4572 $new_entry = '|' . $new_entry;
4574 $regexp_list[$list_key] .= $new_entry;
4576 return $regexp_list;
4579 * this function creates the appropriate regexp string of an token array
4580 * you should not call this function directly, @see $this->optimize_regexp_list().
4582 * @param &$tokens array of tokens
4583 * @param $recursed bool to know wether we recursed or not
4585 * @author Milian Wolff <mail@milianw.de>
4589 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4591 foreach ($tokens as $token => $sub_tokens) {
4593 $close_entry = isset($sub_tokens['']);
4594 unset($sub_tokens['']);
4595 if (!empty($sub_tokens)) {
4596 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4598 // make sub_tokens optional
4605 // do some optimizations
4606 // common trailing strings
4608 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4609 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4611 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4612 // (?:a|b|c|d|...)? => [abcd...]?
4613 // TODO: a|bb|c => [ac]|bb
4615 if (!isset($callback_2)) {
4616 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4618 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4620 // return $list without trailing pipe
4621 return substr($list, 0, -1);
4623 } // End Class GeSHi
4626 if (!function_exists('geshi_highlight')) {
4628 * Easy way to highlight stuff. Behaves just like highlight_string
4630 * @param string The code to highlight
4631 * @param string The language to highlight the code in
4632 * @param string The path to the language files. You can leave this blank if you need
4633 * as from version 1.0.7 the path should be automatically detected
4634 * @param boolean Whether to return the result or to echo
4635 * @return string The code highlighted (if $return is true)
4638 function geshi_highlight($string, $language, $path = null, $return = false) {
4639 $geshi = new GeSHi($string, $language, $path);
4640 $geshi->set_header_type(GESHI_HEADER_NONE
);
4643 return '<code>' . $geshi->parse_code() . '</code>';
4646 echo '<code>' . $geshi->parse_code() . '</code>';
4648 if ($geshi->error()) {