+ Moved variables resolution in the compiler Parser. (Temp. solution until the new...
[haanga.git] / lib / Haanga / Compiler / Tokenizer.php
blob7db11392a1b0cfdcaefe639cd87699d1d5f59349
1 <?php
2 /*
3 +---------------------------------------------------------------------------------+
4 | Copyright (c) 2010 César Rodas and Menéame Comunicacions S.L. |
5 +---------------------------------------------------------------------------------+
6 | Redistribution and use in source and binary forms, with or without |
7 | modification, are permitted provided that the following conditions are met: |
8 | 1. Redistributions of source code must retain the above copyright |
9 | notice, this list of conditions and the following disclaimer. |
10 | |
11 | 2. Redistributions in binary form must reproduce the above copyright |
12 | notice, this list of conditions and the following disclaimer in the |
13 | documentation and/or other materials provided with the distribution. |
14 | |
15 | 3. All advertising materials mentioning features or use of this software |
16 | must display the following acknowledgement: |
17 | This product includes software developed by César D. Rodas. |
18 | |
19 | 4. Neither the name of the César D. Rodas nor the |
20 | names of its contributors may be used to endorse or promote products |
21 | derived from this software without specific prior written permission. |
22 | |
23 | THIS SOFTWARE IS PROVIDED BY CÉSAR D. RODAS ''AS IS'' AND ANY |
24 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
25 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
26 | DISCLAIMED. IN NO EVENT SHALL CÉSAR D. RODAS BE LIABLE FOR ANY |
27 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
28 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
29 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
30 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE |
33 +---------------------------------------------------------------------------------+
34 | Authors: César Rodas <crodas@php.net> |
35 +---------------------------------------------------------------------------------+
38 class HG_Parser Extends Haanga_Compiler_Parser
40 /* subclass to made easier references to constants */
44 /**
45 * Hand-written Tokenizer class inspired by SQLite's tokenize.c
48 class Haanga_Compiler_Tokenizer
50 /* they are case sensitive and sorted! */
51 static $keywords = array(
52 'AND' => HG_Parser::T_AND,
53 'FALSE' => HG_Parser::T_FALSE,
54 'NOT' => HG_Parser::T_NOT,
55 'OR' => HG_Parser::T_OR,
56 'TRUE' => HG_Parser::T_TRUE,
57 '_(' => HG_Parser::T_INTL,
58 'as' => HG_Parser::T_AS,
59 'autoescape' => HG_Parser::T_AUTOESCAPE,
60 'block' => HG_Parser::T_BLOCK,
61 'by' => HG_Parser::T_BY,
62 'else' => HG_Parser::T_ELSE,
63 'empty' => HG_Parser::T_EMPTY,
64 'extends' => HG_Parser::T_EXTENDS,
65 'filter' => HG_Parser::T_FILTER,
66 'for' => HG_Parser::T_FOR,
67 'if' => HG_Parser::T_IF,
68 'ifchanged' => HG_Parser::T_IFCHANGED,
69 'ifequal' => HG_Parser::T_IFEQUAL,
70 'ifnotequal' => HG_Parser::T_IFNOTEQUAL,
71 'in' => HG_Parser::T_IN,
72 'include' => HG_Parser::T_INCLUDE,
73 'load' => HG_Parser::T_LOAD,
74 'not' => HG_Parser::T_NOT,
75 'regroup' => HG_Parser::T_REGROUP,
76 'set' => HG_Parser::T_SET,
77 'spacefull' => HG_Parser::T_SPACEFULL,
78 'step' => HG_Parser::T_STEP,
79 'with' => HG_Parser::T_WITH,
82 /* common operations */
83 static $operators_single = array(
84 '!' => HG_Parser::T_NOT,
85 '%' => HG_Parser::T_MOD,
86 '&' => HG_Parser::T_BITWISE,
87 '(' => HG_Parser::T_LPARENT,
88 ')' => HG_Parser::T_RPARENT,
89 '*' => HG_Parser::T_TIMES,
90 '+' => HG_Parser::T_PLUS,
91 ',' => HG_Parser::T_COMMA,
92 '-' => HG_Parser::T_MINUS,
93 '.' => HG_Parser::T_DOT,
94 '/' => HG_Parser::T_DIV,
95 ':' => HG_Parser::T_COLON,
96 '<' => HG_Parser::T_LT,
97 '=' => HG_Parser::T_ASSIGN,
98 '>' => HG_Parser::T_GT,
99 '[' => HG_Parser::T_BRACKETS_OPEN,
100 ']' => HG_Parser::T_BRACKETS_CLOSE,
101 '|' => HG_Parser::T_PIPE,
103 static $operators = array(
104 '!==' => HG_Parser::T_NE,
105 '!=' => HG_Parser::T_NE,
106 '&&' => HG_Parser::T_AND,
107 '->' => HG_Parser::T_OBJ,
108 '..' => HG_Parser::T_DOTDOT,
109 '::' => HG_Parser::T_CLASS,
110 '<<' => HG_Parser::T_BITWISE,
111 '<=' => HG_Parser::T_LE,
112 '===' => HG_Parser::T_EQ,
113 '==' => HG_Parser::T_EQ,
114 '>=' => HG_Parser::T_GE,
115 '>>' => HG_Parser::T_BITWISE,
116 '||' => HG_Parser::T_OR,
119 static $close_tags = array();
121 static $open_tag = "{%";
122 static $end_tag = "%}";
123 static $open_comment = "{#";
124 static $end_comment = "#}";
125 static $open_print = "{{";
126 static $end_print = "}}";
128 public $open_tags;
129 public $value;
130 public $token;
131 public $status = self::IN_NONE;
133 const IN_NONE = 0;
134 const IN_HTML = 1;
135 const IN_TAG = 2;
136 const IN_ECHO = 3;
138 function __construct($data, $compiler, $file)
140 $this->data = $data;
141 $this->compiler = $compiler;
142 $this->line = 1;
143 $this->N = 0;
144 $this->file = $file;
145 $this->length = strlen($data);
148 /*$tmp1 = self::$operators;
149 $tmp2 = $tmp1;
150 ksort($tmp2);
151 var_dump($tmp2, $tmp1 === $tmp2);die();/**/
153 self::$close_tags =array(
154 self::$end_tag => HG_Parser::T_TAG_CLOSE,
155 self::$end_print => HG_Parser::T_PRINT_CLOSE,
159 $this->open_tags = array(
160 self::$open_tag => HG_Parser::T_TAG_OPEN,
161 self::$open_print => HG_Parser::T_PRINT_OPEN,
162 self::$open_comment => HG_Parser::T_COMMENT,
166 function yylex()
168 $this->token = NULL;
170 if ($this->length == $this->N) {
171 if ($this->status != self::IN_NONE && $this->status != self::IN_HTML) {
172 $this->Error("Unexpected end");
174 return FALSE;
177 if ($this->status == self::IN_NONE) {
178 $i = &$this->N;
179 $data = substr($this->data, $i, 12);
181 static $lencache = array();
182 foreach ($this->open_tags as $value => $token) {
183 if (!isset($lencache[$value])) {
184 $lencache[$value] = strlen($value);
186 $len = $lencache[$value];
187 if (strncmp($data, $value, $len) == 0) {
188 $this->value = $value;
189 $this->token = $token;
190 $i += $len;
191 switch ($this->token) {
192 case HG_Parser::T_TAG_OPEN:
193 $this->status = self::IN_TAG;
194 break;
195 case HG_Parser::T_COMMENT:
196 $zdata = & $this->data;
198 if (($pos=strpos($zdata, self::$end_comment, $i)) === FALSE) {
199 $this->error("unexpected end");
202 $this->value = substr($zdata, $i, $pos-2);
203 $this->status = self::IN_NONE;
204 $i = $pos + 2;
205 break;
206 case HG_Parser::T_PRINT_OPEN:
207 $this->status = self::IN_ECHO;
208 break;
210 return TRUE;
214 $this->status = self::IN_HTML;
217 switch ($this->status)
219 case self::IN_TAG:
220 case self::IN_ECHO:
221 $this->yylex_main();
222 break;
223 default:
224 $this->yylex_html();
228 if (empty($this->token)) {
229 if ($this->status != self::IN_NONE && $this->status != self::IN_HTML) {
230 $this->Error("Unexpected end");
232 return FALSE;
235 return TRUE;
239 function yylex_html()
241 $data = &$this->data;
242 $i = &$this->N;
244 foreach ($this->open_tags as $value => $status) {
245 $pos = strpos($data, $value, $i);
246 if ($pos === FALSE) {
247 continue;
249 if (!isset($lowest_pos) || $lowest_pos > $pos) {
250 $lowest_pos = $pos;
254 if (isset($lowest_pos)) {
255 $this->value = substr($data, $i, $lowest_pos-$i);
256 $this->token = HG_Parser::T_HTML;
257 $this->status = self::IN_NONE;
258 $i += $lowest_pos - $i;
259 } else {
260 $this->value = substr($data, $i);
261 $this->token = HG_Parser::T_HTML;
262 $i = $this->length;
265 $this->line += substr_count($this->value, "\n");
270 function yylex_main()
272 $data = &$this->data;
274 for ($i=&$this->N; is_null($this->token) && $i < $this->length; ++$i) {
275 switch ($data[$i]) {
277 /* strings {{{ */
278 case '"':
279 case "'":
280 $end = $data[$i];
281 $value = "";
282 while ($data[++$i] != $end) {
283 switch ($data[$i]) {
284 case "\\":
285 switch ($data[++$i]) {
286 case "n":
287 $value .= "\n";
288 break;
289 case "t":
290 $value .= "\t";
291 break;
292 default:
293 $value .= $data[$i];
295 break;
296 case $end:
297 --$i;
298 break 2;
299 default:
300 if ($data[$i] == "\n") {
301 $this->line++;
303 $value .= $data[$i];
305 if (!isset($data[$i+1])) {
306 $this->Error("unclosed string");
309 $this->value = $value;
310 $this->token = HG_Parser::T_STRING;
311 break;
312 /* }}} */
314 /* number {{{ */
315 case '0': case '1': case '2': case '3': case '4':
316 case '5': case '6': case '7': case '8': case '9':
317 $value = "";
318 $dot = FALSE;
319 for ($e=0; $i < $this->length; ++$e, ++$i) {
320 switch ($data[$i]) {
321 case '0': case '1': case '2': case '3': case '4':
322 case '5': case '6': case '7': case '8': case '9':
323 $value .= $data[$i];
324 break;
325 case '.':
326 if (!$dot) {
327 $value .= ".";
328 $dot = TRUE;
329 } else {
330 $this->error("Invalid number");
332 break;
333 default:
334 break 2; /* break the main loop */
337 if (!$this->is_token_end($data[$i]) &&
338 !isset(self::$operators_single[$data[$i]]) || $value[$e-1] == '.') {
339 $this->error("Unexpected '{$data[$i]}'");
341 $this->value = $value;
342 $this->token = HG_Parser::T_NUMERIC;
343 break 2;
344 /* }}} */
346 case "\n": case " ": case "\t": case "\r": case "\f":
347 for (; is_null($this->token) && $i < $this->length; ++$i) {
348 switch ($data[$i]) {
349 case "\n":
350 $this->line++;
351 case " ": case "\t": case "\r": case "\f":
352 break;
353 case '.':
354 if ($data[$i+1] != '.') {
355 $this->token = HG_Parser::T_CONCAT;
356 $this->value = '.';
357 $i++;
358 return;
360 default:
361 /* break main loop */
362 /* and decrease because last processed byte */
363 /* wasn't a dot (T_CONCAT) */
364 --$i;
365 break 2;
368 break; /* whitespaces are ignored */
369 default:
370 if (!$this->getTag() && !$this->getOperator()) {
371 $alpha = $this->getAlpha();
372 if ($alpha === FALSE) {
373 $this->error("error: unexpected ".substr($data, $i));
375 static $tag=NULL;
376 if (!$tag) {
377 $tag = Haanga_Extension::getInstance('Tag');
379 $value = $tag->isValid($alpha);
380 $this->token = $value ? $value : HG_Parser::T_ALPHA;
381 $this->value = $alpha;
384 break 2;
388 if ($this->token == HG_Parser::T_TAG_CLOSE ||
389 $this->token == HG_Parser::T_PRINT_CLOSE) {
390 $this->status = self::IN_NONE;
395 function getTag()
397 static $lencache = array();
399 $i = &$this->N;
400 $data = substr($this->data, $i, 12);
401 foreach (self::$close_tags as $value => $token) {
402 if (!isset($lencache[$value])) {
403 $lencache[$value] = strlen($value);
405 $len = $lencache[$value];
406 if (strncmp($data, $value, $len) == 0) {
407 $this->token = $token;
408 $this->value = $value;
409 $i += $len;
410 return TRUE;
414 foreach (self::$keywords as $value => $token) {
415 if (!isset($lencache[$value])) {
416 $lencache[$value] = strlen($value);
418 $len = $lencache[$value];
419 switch (strncmp($data, $value, $len)) {
420 case -1:
421 break 2;
422 case 0: // match
423 if (isset($data[$len]) && !$this->is_token_end($data[$len])) {
424 /* probably a variable name TRUEfoo (and not TRUE) */
425 continue;
427 $this->token = $token;
428 $this->value = $value;
429 $i += $len;
430 return TRUE;
434 /* /end([a-zA-Z][a-zA-Z0-9]*)/ */
435 if (strncmp($data, "end", 3) == 0) {
436 $this->value = $this->getAlpha();
437 $this->token = HG_Parser::T_CUSTOM_END;
438 return TRUE;
441 return FALSE;
444 function Error($text)
446 throw new Haanga_Compiler_Exception($text." in ".$this->file.":".$this->line);
449 function getOperator()
451 static $lencache = array();
453 $i = &$this->N;
454 $data = substr($this->data, $i, 12);
456 foreach (self::$operators as $value => $token) {
457 if (!isset($lencache[$value])) {
458 $lencache[$value] = strlen($value);
460 $len = $lencache[$value];
461 switch (strncmp($data, $value, $len)) {
462 case -1:
463 if (strlen($data) == $len) {
464 break 2;
466 break;
467 case 0:
468 $this->token = $token;
469 $this->value = $value;
470 $i += $len;
471 return TRUE;
475 $data = $this->data[$i];
476 foreach (self::$operators_single as $value => $token) {
477 if ($value == $data) {
478 $this->token = $token;
479 $this->value = $value;
480 $i += 1;
481 return TRUE;
482 } else if ($value > $data) {
483 break;
488 return FALSE;
493 * Return TRUE if $letter is a valid "token_end". We use token_end
494 * to avoid confuse T_ALPHA TRUEfoo with TRUE and foo (T_ALPHA)
496 * @param string $letter
498 * @return bool
500 protected function is_token_end($letter)
502 /* [^a-zA-Z0-9_] */
503 return !(
504 ('a' <= $letter && 'z' >= $letter) ||
505 ('A' <= $letter && 'Z' >= $letter) ||
506 ('0' <= $letter && '9' >= $letter) ||
507 $letter == "_"
511 function getAlpha()
513 /* [a-zA-Z_][a-zA-Z0-9_]* */
514 $i = &$this->N;
515 $data = &$this->data;
517 if ( !('a' <= $data[$i] && 'z' >= $data[$i]) &&
518 !('A' <= $data[$i] && 'Z' >= $data[$i]) && $data[$i] != '_') {
519 return FALSE;
522 $value = "";
523 for (; $i < $this->length; ++$i) {
524 if (
525 ('a' <= $data[$i] && 'z' >= $data[$i]) ||
526 ('A' <= $data[$i] && 'Z' >= $data[$i]) ||
527 ('0' <= $data[$i] && '9' >= $data[$i]) ||
528 $data[$i] == "_"
530 $value .= $data[$i];
531 } else {
532 break;
536 return $value;
539 function getLine()
541 return $this->line;
545 static function init($template, $compiler, $file='')
547 $lexer = new Haanga_Compiler_Tokenizer($template, $compiler, $file);
548 $parser = new Haanga_Compiler_Parser($lexer, $file);
550 $parser->compiler = $compiler;
552 try {
553 for($i=0; ; $i++) {
554 if (!$lexer->yylex()) {
555 break;
557 $parser->doParse($lexer->token, $lexer->value);
559 } catch (Exception $e) {
560 /* destroy the parser */
561 try {
562 $parser->doParse(0,0);
563 } catch (Exception $e) {}
564 throw $e; /* re-throw exception */
567 $parser->doParse(0, 0);
569 return (array)$parser->body;