3 +---------------------------------------------------------------------------------+
4 | Copyright (c) 2010 César Rodas and Menéame Comunicacions S.L. |
5 +---------------------------------------------------------------------------------+
6 | Redistribution and use in source and binary forms, with or without |
7 | modification, are permitted provided that the following conditions are met: |
8 | 1. Redistributions of source code must retain the above copyright |
9 | notice, this list of conditions and the following disclaimer. |
11 | 2. Redistributions in binary form must reproduce the above copyright |
12 | notice, this list of conditions and the following disclaimer in the |
13 | documentation and/or other materials provided with the distribution. |
15 | 3. All advertising materials mentioning features or use of this software |
16 | must display the following acknowledgement: |
17 | This product includes software developed by César D. Rodas. |
19 | 4. Neither the name of the César D. Rodas nor the |
20 | names of its contributors may be used to endorse or promote products |
21 | derived from this software without specific prior written permission. |
23 | THIS SOFTWARE IS PROVIDED BY CÉSAR D. RODAS ''AS IS'' AND ANY |
24 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
26 | DISCLAIMED. IN NO EVENT SHALL CÉSAR D. RODAS BE LIABLE FOR ANY |
27 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
30 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE |
33 +---------------------------------------------------------------------------------+
34 | Authors: César Rodas <crodas@php.net> |
35 +---------------------------------------------------------------------------------+
38 class HG_Parser
Extends Haanga_Compiler_Parser
40 /* subclass to made easier references to constants */
45 * Hand-written Tokenizer class inspired by SQLite's tokenize.c
48 class Haanga_Compiler_Tokenizer
50 /* they are case sensitive and sorted! */
51 static $keywords = array(
52 'AND' => HG_Parser
::T_AND
,
53 'FALSE' => HG_Parser
::T_FALSE
,
54 'NOT' => HG_Parser
::T_NOT
,
55 'OR' => HG_Parser
::T_OR
,
56 'TRUE' => HG_Parser
::T_TRUE
,
57 '_(' => HG_Parser
::T_INTL
,
58 'as' => HG_Parser
::T_AS
,
59 'autoescape' => HG_Parser
::T_AUTOESCAPE
,
60 'block' => HG_Parser
::T_BLOCK
,
61 'by' => HG_Parser
::T_BY
,
62 'else' => HG_Parser
::T_ELSE
,
63 'empty' => HG_Parser
::T_EMPTY
,
64 'extends' => HG_Parser
::T_EXTENDS
,
65 'filter' => HG_Parser
::T_FILTER
,
66 'for' => HG_Parser
::T_FOR
,
67 'if' => HG_Parser
::T_IF
,
68 'ifchanged' => HG_Parser
::T_IFCHANGED
,
69 'ifequal' => HG_Parser
::T_IFEQUAL
,
70 'ifnotequal' => HG_Parser
::T_IFNOTEQUAL
,
71 'in' => HG_Parser
::T_IN
,
72 'include' => HG_Parser
::T_INCLUDE
,
73 'load' => HG_Parser
::T_LOAD
,
74 'not' => HG_Parser
::T_NOT
,
75 'regroup' => HG_Parser
::T_REGROUP
,
76 'set' => HG_Parser
::T_SET
,
77 'spacefull' => HG_Parser
::T_SPACEFULL
,
78 'step' => HG_Parser
::T_STEP
,
79 'with' => HG_Parser
::T_WITH
,
82 /* common operations */
83 static $operators_single = array(
84 '!' => HG_Parser
::T_NOT
,
85 '%' => HG_Parser
::T_MOD
,
86 '&' => HG_Parser
::T_BITWISE
,
87 '(' => HG_Parser
::T_LPARENT
,
88 ')' => HG_Parser
::T_RPARENT
,
89 '*' => HG_Parser
::T_TIMES
,
90 '+' => HG_Parser
::T_PLUS
,
91 ',' => HG_Parser
::T_COMMA
,
92 '-' => HG_Parser
::T_MINUS
,
93 '.' => HG_Parser
::T_DOT
,
94 '/' => HG_Parser
::T_DIV
,
95 ':' => HG_Parser
::T_COLON
,
96 '<' => HG_Parser
::T_LT
,
97 '=' => HG_Parser
::T_ASSIGN
,
98 '>' => HG_Parser
::T_GT
,
99 '[' => HG_Parser
::T_BRACKETS_OPEN
,
100 ']' => HG_Parser
::T_BRACKETS_CLOSE
,
101 '|' => HG_Parser
::T_PIPE
,
103 static $operators = array(
104 '!==' => HG_Parser
::T_NE
,
105 '!=' => HG_Parser
::T_NE
,
106 '&&' => HG_Parser
::T_AND
,
107 '->' => HG_Parser
::T_OBJ
,
108 '..' => HG_Parser
::T_DOTDOT
,
109 '<<' => HG_Parser
::T_BITWISE
,
110 '<=' => HG_Parser
::T_LE
,
111 '===' => HG_Parser
::T_EQ
,
112 '==' => HG_Parser
::T_EQ
,
113 '>=' => HG_Parser
::T_GE
,
114 '>>' => HG_Parser
::T_BITWISE
,
115 '||' => HG_Parser
::T_OR
,
118 static $close_tags = array();
120 static $open_tag = "{%";
121 static $end_tag = "%}";
122 static $open_comment = "{#";
123 static $end_comment = "#}";
124 static $open_print = "{{";
125 static $end_print = "}}";
130 public $status = self
::IN_NONE
;
137 function __construct($data, $compiler, $file)
140 $this->compiler
= $compiler;
144 $this->length
= strlen($data);
147 /*$tmp1 = self::$operators;
150 var_dump($tmp2, $tmp1 === $tmp2);die(); /**/
152 self
::$close_tags =array(
153 self
::$end_tag => HG_Parser
::T_TAG_CLOSE
,
154 self
::$end_print => HG_Parser
::T_PRINT_CLOSE
,
158 $this->open_tags
= array(
159 self
::$open_tag => HG_Parser
::T_TAG_OPEN
,
160 self
::$open_print => HG_Parser
::T_PRINT_OPEN
,
161 self
::$open_comment => HG_Parser
::T_COMMENT
,
169 if ($this->length
== $this->N
) {
170 if ($this->status
!= self
::IN_NONE
&& $this->status
!= self
::IN_HTML
) {
171 $this->Error("Unexpected end");
176 if ($this->status
== self
::IN_NONE
) {
178 $data = substr($this->data
, $i, 12);
180 static $lencache = array();
181 foreach ($this->open_tags
as $value => $token) {
182 if (!isset($lencache[$value])) {
183 $lencache[$value] = strlen($value);
185 $len = $lencache[$value];
186 if (strncmp($data, $value, $len) == 0) {
187 $this->value
= $value;
188 $this->token
= $token;
190 switch ($this->token
) {
191 case HG_Parser
::T_TAG_OPEN
:
192 $this->status
= self
::IN_TAG
;
194 case HG_Parser
::T_COMMENT
:
195 $zdata = & $this->data
;
197 if (($pos=strpos($zdata, self
::$end_comment, $i)) === FALSE) {
198 $this->error("unexpected end");
201 $this->value
= substr($zdata, $i, $pos-2);
202 $this->status
= self
::IN_NONE
;
205 case HG_Parser
::T_PRINT_OPEN
:
206 $this->status
= self
::IN_ECHO
;
213 $this->status
= self
::IN_HTML
;
216 switch ($this->status
)
227 if (empty($this->token
)) {
228 if ($this->status
!= self
::IN_NONE
&& $this->status
!= self
::IN_HTML
) {
229 $this->Error("Unexpected end");
238 function yylex_html()
240 $data = &$this->data
;
243 foreach ($this->open_tags
as $value => $status) {
244 $pos = strpos($data, $value, $i);
245 if ($pos === FALSE) {
248 if (!isset($lowest_pos) ||
$lowest_pos > $pos) {
253 if (isset($lowest_pos)) {
254 $this->value
= substr($data, $i, $lowest_pos-$i);
255 $this->token
= HG_Parser
::T_HTML
;
256 $this->status
= self
::IN_NONE
;
257 $i +
= $lowest_pos - $i;
259 $this->value
= substr($data, $i);
260 $this->token
= HG_Parser
::T_HTML
;
264 $this->line +
= substr_count($this->value
, "\n");
269 function yylex_main()
271 $data = &$this->data
;
273 for ($i=&$this->N
; is_null($this->token
) && $i < $this->length
; ++
$i) {
281 while ($data[++
$i] != $end) {
284 switch ($data[++
$i]) {
299 if ($data[$i] == "\n") {
304 if (!isset($data[$i+
1])) {
305 $this->Error("unclosed string");
308 $this->value
= $value;
309 $this->token
= HG_Parser
::T_STRING
;
314 case '0': case '1': case '2': case '3': case '4':
315 case '5': case '6': case '7': case '8': case '9':
318 for ($e=0; $i < $this->length
; ++
$e, ++
$i) {
320 case '0': case '1': case '2': case '3': case '4':
321 case '5': case '6': case '7': case '8': case '9':
329 $this->error("Invalid number");
333 break 2; /* break the main loop */
336 if (!$this->is_token_end($data[$i]) &&
337 !isset(self
::$operators_single[$data[$i]]) ||
$value[$e-1] == '.') {
338 $this->error("Unexpected '{$data[$i]}'");
340 $this->value
= $value;
341 $this->token
= HG_Parser
::T_NUMERIC
;
345 case "\n": case " ": case "\t": case "\r": case "\f":
346 for (; is_null($this->token
) && $i < $this->length
; ++
$i) {
350 case " ": case "\t": case "\r": case "\f":
353 if ($data[$i+
1] != '.') {
354 $this->token
= HG_Parser
::T_CONCAT
;
360 /* break main loop */
361 /* and decrease because last processed byte */
362 /* wasn't a dot (T_CONCAT) */
367 break; /* whitespaces are ignored */
369 if (!$this->getTag() && !$this->getOperator()) {
370 $alpha = $this->getAlpha();
371 if ($alpha === FALSE) {
372 $this->error("error: unexpected ".substr($data, $i));
376 $tag = Haanga_Extension
::getInstance('Tag');
378 $value = $tag->isValid($alpha);
379 $this->token
= $value ?
$value : HG_Parser
::T_ALPHA
;
380 $this->value
= $alpha;
387 if ($this->token
== HG_Parser
::T_TAG_CLOSE ||
388 $this->token
== HG_Parser
::T_PRINT_CLOSE
) {
389 $this->status
= self
::IN_NONE
;
396 static $lencache = array();
399 $data = substr($this->data
, $i, 12);
400 foreach (self
::$close_tags as $value => $token) {
401 if (!isset($lencache[$value])) {
402 $lencache[$value] = strlen($value);
404 $len = $lencache[$value];
405 if (strncmp($data, $value, $len) == 0) {
406 $this->token
= $token;
407 $this->value
= $value;
413 foreach (self
::$keywords as $value => $token) {
414 if (!isset($lencache[$value])) {
415 $lencache[$value] = strlen($value);
417 $len = $lencache[$value];
418 switch (strncmp($data, $value, $len)) {
422 if (isset($data[$len]) && !$this->is_token_end($data[$len])) {
423 /* probably a variable name TRUEfoo (and not TRUE) */
426 $this->token
= $token;
427 $this->value
= $value;
433 /* /end([a-zA-Z][a-zA-Z0-9]*)/ */
434 if (strncmp($data, "end", 3) == 0) {
435 $this->value
= $this->getAlpha();
436 $this->token
= HG_Parser
::T_CUSTOM_END
;
443 function Error($text)
445 throw new Haanga_Compiler_Exception($text." in ".$this->file
.":".$this->line
);
448 function getOperator()
450 static $lencache = array();
453 $data = substr($this->data
, $i, 12);
455 foreach (self
::$operators as $value => $token) {
456 if (!isset($lencache[$value])) {
457 $lencache[$value] = strlen($value);
459 $len = $lencache[$value];
460 switch (strncmp($data, $value, $len)) {
462 if (strlen($data) == $len) {
467 $this->token
= $token;
468 $this->value
= $value;
474 $data = $this->data
[$i];
475 foreach (self
::$operators_single as $value => $token) {
476 if ($value == $data) {
477 $this->token
= $token;
478 $this->value
= $value;
481 } else if ($value > $data) {
492 * Return TRUE if $letter is a valid "token_end". We use token_end
493 * to avoid confuse T_ALPHA TRUEfoo with TRUE and foo (T_ALPHA)
495 * @param string $letter
499 protected function is_token_end($letter)
503 ('a' <= $letter && 'z' >= $letter) ||
504 ('A' <= $letter && 'Z' >= $letter) ||
505 ('0' <= $letter && '9' >= $letter) ||
512 /* [a-zA-Z_][a-zA-Z0-9_]* */
514 $data = &$this->data
;
516 if ( !('a' <= $data[$i] && 'z' >= $data[$i]) &&
517 !('A' <= $data[$i] && 'Z' >= $data[$i]) && $data[$i] != '_') {
522 for (; $i < $this->length
; ++
$i) {
524 ('a' <= $data[$i] && 'z' >= $data[$i]) ||
525 ('A' <= $data[$i] && 'Z' >= $data[$i]) ||
526 ('0' <= $data[$i] && '9' >= $data[$i]) ||
544 static function init($template, $compiler, $file='')
546 $lexer = new Haanga_Compiler_Tokenizer($template, $compiler, $file);
547 $parser = new Haanga_Compiler_Parser($lexer, $file);
549 $parser->compiler
= $compiler;
553 if (!$lexer->yylex()) {
556 $parser->doParse($lexer->token
, $lexer->value
);
558 } catch (Exception
$e) {
559 /* destroy the parser */
561 $parser->doParse(0,0);
562 } catch (Exception
$e) {}
563 throw $e; /* re-throw exception */
566 $parser->doParse(0, 0);
568 return (array)$parser->body
;