2 if(!defined('DOKU_INC')) die('meh.');
3 require_once DOKU_INC
. 'inc/parser/lexer.php';
4 require_once DOKU_INC
. 'inc/parser/handler.php';
8 * Define various types of modes used by the parser - they are used to
9 * populate the list of modes another mode accepts
12 $PARSER_MODES = array(
13 // containers are complex modes that can contain many other modes
14 // hr breaks the principle but they shouldn't be used in tables / lists
15 // so they are put here
16 'container' => array('listblock','table','quote','hr'),
18 // some mode are allowed inside the base mode only
19 'baseonly' => array('header'),
21 // modes for styling text -- footnote behaves similar to styling
22 'formatting' => array('strong', 'emphasis', 'underline', 'monospace',
23 'subscript', 'superscript', 'deleted', 'footnote'),
25 // modes where the token is simply replaced - they can not contain any
27 'substition' => array('acronym','smiley','wordblock','entity',
28 'camelcaselink', 'internallink','media',
29 'externallink','linebreak','emaillink',
30 'windowssharelink','filelink','notoc',
31 'nocache','multiplyentity','quotes','rss'),
33 // modes which have a start and end token but inside which
34 // no other modes should be applied
35 'protected' => array('preformatted','code','file','php','html','htmlblock','phpblock'),
37 // inside this mode no wiki markup should be applied but lineendings
38 // and whitespace isn't preserved
39 'disabled' => array('unformatted'),
41 // used to mark paragraph boundaries
42 'paragraphs' => array('eol')
45 //-------------------------------------------------------------------
48 * Sets up the Lexer with modes and points it to the Handler
49 * For an intro to the Lexer see: wiki:parser
59 var $connected = false;
61 function addBaseMode(& $BaseMode) {
62 $this->modes
['base'] = & $BaseMode;
63 if ( !$this->Lexer
) {
64 $this->Lexer
= & new Doku_Lexer($this->Handler
,'base', true);
66 $this->modes
['base']->Lexer
= & $this->Lexer
;
70 * PHP preserves order of associative elements
71 * Mode sequence is important
73 function addMode($name, & $Mode) {
74 if ( !isset($this->modes
['base']) ) {
75 $this->addBaseMode(new Doku_Parser_Mode_base());
77 $Mode->Lexer
= & $this->Lexer
;
78 $this->modes
[$name] = & $Mode;
81 function connectModes() {
83 if ( $this->connected
) {
87 foreach ( array_keys($this->modes
) as $mode ) {
89 // Base isn't connected to anything
90 if ( $mode == 'base' ) {
94 $this->modes
[$mode]->preConnect();
96 foreach ( array_keys($this->modes
) as $cm ) {
98 if ( $this->modes
[$cm]->accepts($mode) ) {
99 $this->modes
[$mode]->connectTo($cm);
104 $this->modes
[$mode]->postConnect();
107 $this->connected
= true;
110 function parse($doc) {
111 if ( $this->Lexer
) {
112 $this->connectModes();
113 // Normalize CRs and pad doc
114 $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
115 $this->Lexer
->parse($doc);
116 $this->Handler
->_finalize();
117 return $this->Handler
->calls
;
125 //-------------------------------------------------------------------
127 * This class and all the subclasses below are
128 * used to reduce the effort required to register
129 * modes with the Lexer. For performance these
130 * could all be eliminated later perhaps, or
131 * the Parser could be serialized to a file once
132 * all modes are registered
134 * @author Harry Fuecks <hfuecks@gmail.com>
136 class Doku_Parser_Mode
{
140 var $allowedModes = array();
142 // returns a number used to determine in which order modes are added
144 trigger_error('getSort() not implemented in '.get_class($this), E_USER_WARNING
);
147 // Called before any calls to connectTo
148 function preConnect() {}
151 function connectTo($mode) {}
153 // Called after all calls to connectTo
154 function postConnect() {}
156 function accepts($mode) {
157 return in_array($mode, (array) $this->allowedModes
);
162 //-------------------------------------------------------------------
163 class Doku_Parser_Mode_base
extends Doku_Parser_Mode
{
165 function Doku_Parser_Mode_base() {
166 global $PARSER_MODES;
168 $this->allowedModes
= array_merge (
169 $PARSER_MODES['container'],
170 $PARSER_MODES['baseonly'],
171 $PARSER_MODES['paragraphs'],
172 $PARSER_MODES['formatting'],
173 $PARSER_MODES['substition'],
174 $PARSER_MODES['protected'],
175 $PARSER_MODES['disabled']
184 //-------------------------------------------------------------------
185 class Doku_Parser_Mode_footnote
extends Doku_Parser_Mode
{
187 function Doku_Parser_Mode_footnote() {
188 global $PARSER_MODES;
190 $this->allowedModes
= array_merge (
191 $PARSER_MODES['container'],
192 $PARSER_MODES['formatting'],
193 $PARSER_MODES['substition'],
194 $PARSER_MODES['protected'],
195 $PARSER_MODES['disabled']
198 unset($this->allowedModes
[array_search('footnote', $this->allowedModes
)]);
201 function connectTo($mode) {
202 $this->Lexer
->addEntryPattern(
203 '\x28\x28(?=.*\x29\x29)',$mode,'footnote'
207 function postConnect() {
208 $this->Lexer
->addExitPattern(
209 '\x29\x29','footnote'
218 //-------------------------------------------------------------------
219 class Doku_Parser_Mode_header
extends Doku_Parser_Mode
{
221 function preConnect() {
222 //we're not picky about the closing ones, two are enough
223 $this->Lexer
->addSpecialPattern(
224 '[ \t]*={2,}[^\n]+={2,}[ \t]*(?=\n)',
235 //-------------------------------------------------------------------
236 class Doku_Parser_Mode_notoc
extends Doku_Parser_Mode
{
238 function connectTo($mode) {
239 $this->Lexer
->addSpecialPattern('~~NOTOC~~',$mode,'notoc');
247 //-------------------------------------------------------------------
248 class Doku_Parser_Mode_nocache
extends Doku_Parser_Mode
{
250 function connectTo($mode) {
251 $this->Lexer
->addSpecialPattern('~~NOCACHE~~',$mode,'nocache');
259 //-------------------------------------------------------------------
260 class Doku_Parser_Mode_linebreak
extends Doku_Parser_Mode
{
262 function connectTo($mode) {
263 $this->Lexer
->addSpecialPattern('\x5C{2}(?:[ \t]|(?=\n))',$mode,'linebreak');
271 //-------------------------------------------------------------------
272 class Doku_Parser_Mode_eol
extends Doku_Parser_Mode
{
274 function connectTo($mode) {
275 $badModes = array('listblock','table');
276 if ( in_array($mode, $badModes) ) {
279 // see FS#1652, pattern extended to swallow preceding whitespace to avoid issues with lines that only contain whitespace
280 $this->Lexer
->addSpecialPattern('(?:^[ \t]*)?\n',$mode,'eol');
288 //-------------------------------------------------------------------
289 class Doku_Parser_Mode_hr
extends Doku_Parser_Mode
{
291 function connectTo($mode) {
292 $this->Lexer
->addSpecialPattern('\n[ \t]*-{4,}[ \t]*(?=\n)',$mode,'hr');
300 //-------------------------------------------------------------------
301 class Doku_Parser_Mode_formatting
extends Doku_Parser_Mode
{
304 var $formatting = array (
306 'entry'=>'\*\*(?=.*\*\*)',
312 'entry'=>'//(?=[^\x00]*[^:])', //hack for bugs #384 #763 #1468
317 'underline'=> array (
318 'entry'=>'__(?=.*__)',
323 'monospace'=> array (
324 'entry'=>'\x27\x27(?=.*\x27\x27)',
329 'subscript'=> array (
330 'entry'=>'<sub>(?=.*</sub>)',
335 'superscript'=> array (
336 'entry'=>'<sup>(?=.*</sup>)',
342 'entry'=>'<del>(?=.*</del>)',
348 function Doku_Parser_Mode_formatting($type) {
349 global $PARSER_MODES;
351 if ( !array_key_exists($type, $this->formatting
) ) {
352 trigger_error('Invalid formatting type '.$type, E_USER_WARNING
);
357 // formatting may contain other formatting but not it self
358 $modes = $PARSER_MODES['formatting'];
359 $key = array_search($type, $modes);
360 if ( is_int($key) ) {
364 $this->allowedModes
= array_merge (
366 $PARSER_MODES['substition'],
367 $PARSER_MODES['disabled']
371 function connectTo($mode) {
373 // Can't nest formatting in itself
374 if ( $mode == $this->type
) {
378 $this->Lexer
->addEntryPattern(
379 $this->formatting
[$this->type
]['entry'],
385 function postConnect() {
387 $this->Lexer
->addExitPattern(
388 $this->formatting
[$this->type
]['exit'],
395 return $this->formatting
[$this->type
]['sort'];
399 //-------------------------------------------------------------------
400 class Doku_Parser_Mode_listblock
extends Doku_Parser_Mode
{
402 function Doku_Parser_Mode_listblock() {
403 global $PARSER_MODES;
405 $this->allowedModes
= array_merge (
406 $PARSER_MODES['formatting'],
407 $PARSER_MODES['substition'],
408 $PARSER_MODES['disabled'],
409 $PARSER_MODES['protected'] #XXX new
412 // $this->allowedModes[] = 'footnote';
415 function connectTo($mode) {
416 $this->Lexer
->addEntryPattern('\n {2,}[\-\*]',$mode,'listblock');
417 $this->Lexer
->addEntryPattern('\n\t{1,}[\-\*]',$mode,'listblock');
419 $this->Lexer
->addPattern('\n {2,}[\-\*]','listblock');
420 $this->Lexer
->addPattern('\n\t{1,}[\-\*]','listblock');
424 function postConnect() {
425 $this->Lexer
->addExitPattern('\n','listblock');
433 //-------------------------------------------------------------------
434 class Doku_Parser_Mode_table
extends Doku_Parser_Mode
{
436 function Doku_Parser_Mode_table() {
437 global $PARSER_MODES;
439 $this->allowedModes
= array_merge (
440 $PARSER_MODES['formatting'],
441 $PARSER_MODES['substition'],
442 $PARSER_MODES['disabled'],
443 $PARSER_MODES['protected']
447 function connectTo($mode) {
448 $this->Lexer
->addEntryPattern('\n\^',$mode,'table');
449 $this->Lexer
->addEntryPattern('\n\|',$mode,'table');
452 function postConnect() {
453 $this->Lexer
->addPattern('\n\^','table');
454 $this->Lexer
->addPattern('\n\|','table');
455 $this->Lexer
->addPattern('[\t ]*:::[\t ]*(?=[\|\^])','table');
456 $this->Lexer
->addPattern('[\t ]+','table');
457 $this->Lexer
->addPattern('\^','table');
458 $this->Lexer
->addPattern('\|','table');
459 $this->Lexer
->addExitPattern('\n','table');
467 //-------------------------------------------------------------------
468 class Doku_Parser_Mode_unformatted
extends Doku_Parser_Mode
{
470 function connectTo($mode) {
471 $this->Lexer
->addEntryPattern('<nowiki>(?=.*</nowiki>)',$mode,'unformatted');
472 $this->Lexer
->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt');
475 function postConnect() {
476 $this->Lexer
->addExitPattern('</nowiki>','unformatted');
477 $this->Lexer
->addExitPattern('%%','unformattedalt');
478 $this->Lexer
->mapHandler('unformattedalt','unformatted');
486 //-------------------------------------------------------------------
487 class Doku_Parser_Mode_php
extends Doku_Parser_Mode
{
489 function connectTo($mode) {
490 $this->Lexer
->addEntryPattern('<php>(?=.*</php>)',$mode,'php');
491 $this->Lexer
->addEntryPattern('<PHP>(?=.*</PHP>)',$mode,'phpblock');
494 function postConnect() {
495 $this->Lexer
->addExitPattern('</php>','php');
496 $this->Lexer
->addExitPattern('</PHP>','phpblock');
504 //-------------------------------------------------------------------
505 class Doku_Parser_Mode_html
extends Doku_Parser_Mode
{
507 function connectTo($mode) {
508 $this->Lexer
->addEntryPattern('<html>(?=.*</html>)',$mode,'html');
509 $this->Lexer
->addEntryPattern('<HTML>(?=.*</HTML>)',$mode,'htmlblock');
512 function postConnect() {
513 $this->Lexer
->addExitPattern('</html>','html');
514 $this->Lexer
->addExitPattern('</HTML>','htmlblock');
522 //-------------------------------------------------------------------
523 class Doku_Parser_Mode_preformatted
extends Doku_Parser_Mode
{
525 function connectTo($mode) {
526 // Has hard coded awareness of lists...
527 $this->Lexer
->addEntryPattern('\n (?![\*\-])',$mode,'preformatted');
528 $this->Lexer
->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted');
530 // How to effect a sub pattern with the Lexer!
531 $this->Lexer
->addPattern('\n ','preformatted');
532 $this->Lexer
->addPattern('\n\t','preformatted');
536 function postConnect() {
537 $this->Lexer
->addExitPattern('\n','preformatted');
545 //-------------------------------------------------------------------
546 class Doku_Parser_Mode_code
extends Doku_Parser_Mode
{
548 function connectTo($mode) {
549 $this->Lexer
->addEntryPattern('<code(?=.*</code>)',$mode,'code');
552 function postConnect() {
553 $this->Lexer
->addExitPattern('</code>','code');
561 //-------------------------------------------------------------------
562 class Doku_Parser_Mode_file
extends Doku_Parser_Mode
{
564 function connectTo($mode) {
565 $this->Lexer
->addEntryPattern('<file(?=.*</file>)',$mode,'file');
568 function postConnect() {
569 $this->Lexer
->addExitPattern('</file>','file');
577 //-------------------------------------------------------------------
578 class Doku_Parser_Mode_quote
extends Doku_Parser_Mode
{
580 function Doku_Parser_Mode_quote() {
581 global $PARSER_MODES;
583 $this->allowedModes
= array_merge (
584 $PARSER_MODES['formatting'],
585 $PARSER_MODES['substition'],
586 $PARSER_MODES['disabled'],
587 $PARSER_MODES['protected'] #XXX new
589 #$this->allowedModes[] = 'footnote';
590 #$this->allowedModes[] = 'preformatted';
591 #$this->allowedModes[] = 'unformatted';
594 function connectTo($mode) {
595 $this->Lexer
->addEntryPattern('\n>{1,}',$mode,'quote');
598 function postConnect() {
599 $this->Lexer
->addPattern('\n>{1,}','quote');
600 $this->Lexer
->addExitPattern('\n','quote');
608 //-------------------------------------------------------------------
609 class Doku_Parser_Mode_acronym
extends Doku_Parser_Mode
{
611 var $acronyms = array();
614 function Doku_Parser_Mode_acronym($acronyms) {
615 usort($acronyms,array($this,'_compare'));
616 $this->acronyms
= $acronyms;
619 function preConnect() {
620 if(!count($this->acronyms
)) return;
622 $bound = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]';
623 $acronyms = array_map('Doku_Lexer_Escape',$this->acronyms
);
624 $this->pattern
= '(?<=^|'.$bound.')(?:'.join('|',$acronyms).')(?='.$bound.')';
627 function connectTo($mode) {
628 if(!count($this->acronyms
)) return;
630 if ( strlen($this->pattern
) > 0 ) {
631 $this->Lexer
->addSpecialPattern($this->pattern
,$mode,'acronym');
640 * sort callback to order by string length descending
642 function _compare($a,$b) {
645 if ($a_len > $b_len) {
647 } else if ($a_len < $b_len) {
655 //-------------------------------------------------------------------
656 class Doku_Parser_Mode_smiley
extends Doku_Parser_Mode
{
658 var $smileys = array();
661 function Doku_Parser_Mode_smiley($smileys) {
662 $this->smileys
= $smileys;
665 function preConnect() {
666 if(!count($this->smileys
) ||
$this->pattern
!= '') return;
669 foreach ( $this->smileys
as $smiley ) {
670 $this->pattern
.= $sep.'(?<=\W|^)'.Doku_Lexer_Escape($smiley).'(?=\W|$)';
675 function connectTo($mode) {
676 if(!count($this->smileys
)) return;
678 if ( strlen($this->pattern
) > 0 ) {
679 $this->Lexer
->addSpecialPattern($this->pattern
,$mode,'smiley');
688 //-------------------------------------------------------------------
689 class Doku_Parser_Mode_wordblock
extends Doku_Parser_Mode
{
691 var $badwords = array();
694 function Doku_Parser_Mode_wordblock($badwords) {
695 $this->badwords
= $badwords;
698 function preConnect() {
700 if ( count($this->badwords
) == 0 ||
$this->pattern
!= '') {
705 foreach ( $this->badwords
as $badword ) {
706 $this->pattern
.= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)';
712 function connectTo($mode) {
713 if ( strlen($this->pattern
) > 0 ) {
714 $this->Lexer
->addSpecialPattern($this->pattern
,$mode,'wordblock');
723 //-------------------------------------------------------------------
724 class Doku_Parser_Mode_entity
extends Doku_Parser_Mode
{
726 var $entities = array();
729 function Doku_Parser_Mode_entity($entities) {
730 $this->entities
= $entities;
733 function preConnect() {
734 if(!count($this->entities
) ||
$this->pattern
!= '') return;
737 foreach ( $this->entities
as $entity ) {
738 $this->pattern
.= $sep.Doku_Lexer_Escape($entity);
743 function connectTo($mode) {
744 if(!count($this->entities
)) return;
746 if ( strlen($this->pattern
) > 0 ) {
747 $this->Lexer
->addSpecialPattern($this->pattern
,$mode,'entity');
756 //-------------------------------------------------------------------
757 // Implements the 640x480 replacement
758 class Doku_Parser_Mode_multiplyentity
extends Doku_Parser_Mode
{
760 function connectTo($mode) {
762 $this->Lexer
->addSpecialPattern(
763 '(?<=\b)(?:[1-9]|\d{2,})[xX]\d+(?=\b)',$mode,'multiplyentity'
773 //-------------------------------------------------------------------
774 class Doku_Parser_Mode_quotes
extends Doku_Parser_Mode
{
776 function connectTo($mode) {
779 $ws = '\s/\#~:+=&%@\-\x28\x29\]\[{}><"\''; // whitespace
782 if($conf['typography'] == 2){
783 $this->Lexer
->addSpecialPattern(
784 "(?<=^|[$ws])'(?=[^$ws$punc])",$mode,'singlequoteopening'
786 $this->Lexer
->addSpecialPattern(
787 "(?<=^|[^$ws]|[$punc])'(?=$|[$ws$punc])",$mode,'singlequoteclosing'
789 $this->Lexer
->addSpecialPattern(
790 "(?<=^|[^$ws$punc])'(?=$|[^$ws$punc])",$mode,'apostrophe'
794 $this->Lexer
->addSpecialPattern(
795 "(?<=^|[$ws])\"(?=[^$ws$punc])",$mode,'doublequoteopening'
797 $this->Lexer
->addSpecialPattern(
798 "\"",$mode,'doublequoteclosing'
809 //-------------------------------------------------------------------
810 class Doku_Parser_Mode_camelcaselink
extends Doku_Parser_Mode
{
812 function connectTo($mode) {
813 $this->Lexer
->addSpecialPattern(
814 '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink'
823 //-------------------------------------------------------------------
824 class Doku_Parser_Mode_internallink
extends Doku_Parser_Mode
{
826 function connectTo($mode) {
828 $this->Lexer
->addSpecialPattern("\[\[.+?\]\]",$mode,'internallink');
836 //-------------------------------------------------------------------
837 class Doku_Parser_Mode_media
extends Doku_Parser_Mode
{
839 function connectTo($mode) {
841 $this->Lexer
->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media');
849 //-------------------------------------------------------------------
850 class Doku_Parser_Mode_rss
extends Doku_Parser_Mode
{
852 function connectTo($mode) {
853 $this->Lexer
->addSpecialPattern("\{\{rss>[^\}]+\}\}",$mode,'rss');
861 //-------------------------------------------------------------------
862 class Doku_Parser_Mode_externallink
extends Doku_Parser_Mode
{
863 var $schemes = array();
864 var $patterns = array();
866 function preConnect() {
867 if(count($this->patterns
)) return;
870 $gunk = '/\#~:.?+=&%@!\-';
873 $any = $ltrs.$gunk.$punc;
875 $this->schemes
= getSchemes();
876 foreach ( $this->schemes
as $scheme ) {
877 $this->patterns
[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
880 $this->patterns
[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
881 $this->patterns
[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
884 function connectTo($mode) {
886 foreach ( $this->patterns
as $pattern ) {
887 $this->Lexer
->addSpecialPattern($pattern,$mode,'externallink');
896 //-------------------------------------------------------------------
897 class Doku_Parser_Mode_filelink
extends Doku_Parser_Mode
{
901 function preConnect() {
904 $gunk = '/\#~:.?+=&%@!\-';
907 $any = $ltrs.$gunk.$punc;
909 $this->pattern
= '\b(?i)file(?-i)://['.$any.']+?['.
910 $punc.']*[^'.$any.']';
913 function connectTo($mode) {
914 $this->Lexer
->addSpecialPattern(
915 $this->pattern
,$mode,'filelink');
923 //-------------------------------------------------------------------
924 class Doku_Parser_Mode_windowssharelink
extends Doku_Parser_Mode
{
928 function preConnect() {
929 $this->pattern
= "\\\\\\\\\w+?(?:\\\\[\w$]+)+";
932 function connectTo($mode) {
933 $this->Lexer
->addSpecialPattern(
934 $this->pattern
,$mode,'windowssharelink');
942 //-------------------------------------------------------------------
943 class Doku_Parser_Mode_emaillink
extends Doku_Parser_Mode
{
945 function connectTo($mode) {
946 // pattern below is defined in inc/mail.php
947 $this->Lexer
->addSpecialPattern('<'.PREG_PATTERN_VALID_EMAIL
.'>',$mode,'emaillink');
956 //Setup VIM: ex: et ts=4 enc=utf-8 :