D-Bus: Remove extra semicolon to avoid C warning
[vala-lang.git] / vala / valageniescanner.vala
blobc853827632f8b6cda40ab72c132e23b426eb4c49
1 /* valageniescanner.vala
3 * Copyright (C) 2008 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Author:
21 * Jamie McCracken jamiemcc gnome org
24 using GLib;
26 /**
27 * Lexical scanner for Genie source files.
29 public class Vala.Genie.Scanner {
30 public SourceFile source_file { get; private set; }
32 public int indent_spaces { get; set;}
34 char* begin;
35 char* current;
36 char* end;
38 int line;
39 int column;
41 int current_indent_level;
42 int indent_level;
43 int pending_dedents;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count;
47 int open_brace_count;
49 TokenType last_token;
50 bool parse_started;
52 Comment _comment;
54 Conditional[] conditional_stack;
56 struct Conditional {
57 public bool matched;
58 public bool else_found;
59 public bool skip_section;
62 State[] state_stack;
64 enum State {
65 PARENS,
66 BRACE,
67 BRACKET,
68 REGEX_LITERAL,
69 TEMPLATE,
70 TEMPLATE_PART
73 public Scanner (SourceFile source_file) {
74 this.source_file = source_file;
76 begin = source_file.get_mapped_contents ();
77 end = begin + source_file.get_mapped_length ();
79 current = begin;
81 _indent_spaces = 0;
82 line = 1;
83 column = 1;
84 current_indent_level = 0;
85 indent_level = 0;
86 pending_dedents = 0;
88 open_parens_count = 0;
89 open_brace_count = 0;
91 parse_started = false;
92 last_token = TokenType.NONE;
96 bool in_template () {
97 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
100 bool in_template_part () {
101 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
104 bool is_ident_char (char c) {
105 return (c.isalnum () || c == '_');
108 bool in_regex_literal () {
109 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
113 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
114 TokenType type;
115 char* begin = current;
116 token_begin.pos = begin;
117 token_begin.line = line;
118 token_begin.column = column;
120 int token_length_in_chars = -1;
122 if (current >= end) {
123 type = TokenType.EOF;
124 } else {
125 switch (current[0]) {
126 case '/':
127 type = TokenType.CLOSE_REGEX_LITERAL;
128 current++;
129 state_stack.length--;
130 var fl_i = false;
131 var fl_s = false;
132 var fl_m = false;
133 var fl_x = false;
134 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
135 switch (current[0]) {
136 case 'i':
137 if (fl_i) {
138 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
140 fl_i = true;
141 break;
142 case 's':
143 if (fl_s) {
144 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
146 fl_s = true;
147 break;
148 case 'm':
149 if (fl_m) {
150 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
152 fl_m = true;
153 break;
154 case 'x':
155 if (fl_x) {
156 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
158 fl_x = true;
159 break;
161 current++;
162 token_length_in_chars++;
164 break;
165 default:
166 type = TokenType.REGEX_LITERAL;
167 token_length_in_chars = 0;
168 while (current < end && current[0] != '/') {
169 if (current[0] == '\\') {
170 current++;
171 token_length_in_chars++;
172 if (current >= end) {
173 break;
176 switch (current[0]) {
177 case '\'':
178 case '"':
179 case '\\':
180 case '/':
181 case '^':
182 case '$':
183 case '.':
184 case '[':
185 case ']':
186 case '{':
187 case '}':
188 case '(':
189 case ')':
190 case '?':
191 case '*':
192 case '+':
193 case '-':
194 case '#':
195 case '&':
196 case '~':
197 case ':':
198 case ';':
199 case '<':
200 case '>':
201 case '|':
202 case '%':
203 case '=':
204 case '@':
205 case '0':
206 case 'b':
207 case 'B':
208 case 'f':
209 case 'n':
210 case 'r':
211 case 't':
212 case 'a':
213 case 'A':
214 case 'p':
215 case 'P':
216 case 'e':
217 case 'd':
218 case 'D':
219 case 's':
220 case 'S':
221 case 'w':
222 case 'W':
223 case 'G':
224 case 'z':
225 case 'Z':
226 current++;
227 token_length_in_chars++;
228 break;
229 case 'x':
230 // hexadecimal escape character
231 current++;
232 token_length_in_chars++;
233 while (current < end && current[0].isxdigit ()) {
234 current++;
235 token_length_in_chars++;
237 break;
238 default:
239 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
240 break;
242 } else if (current[0] == '\n') {
243 break;
244 } else {
245 unichar u = ((string) current).get_char_validated ((long) (end - current));
246 if (u != (unichar) (-1)) {
247 current += u.to_utf8 (null);
248 token_length_in_chars++;
249 } else {
250 current++;
251 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
255 if (current >= end || current[0] == '\n') {
256 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
257 state_stack.length--;
258 return read_token (out token_begin, out token_end);
260 break;
264 if (token_length_in_chars < 0) {
265 column += (int) (current - begin);
266 } else {
267 column += token_length_in_chars;
270 token_end.pos = current;
271 token_end.line = line;
272 token_end.column = column - 1;
274 return type;
278 public void seek (SourceLocation location) {
279 current = location.pos;
280 line = location.line;
281 column = location.column;
283 conditional_stack = null;
284 state_stack = null;
287 TokenType get_identifier_or_keyword (char* begin, int len) {
288 switch (len) {
289 case 2:
290 switch (begin[0]) {
291 case 'a':
292 if (matches (begin, "as")) return TokenType.AS;
293 break;
294 case 'd':
295 if (matches (begin, "do")) return TokenType.DO;
296 break;
297 case 'i':
298 switch (begin[1]) {
299 case 'f':
300 return TokenType.IF;
301 case 'n':
302 return TokenType.IN;
303 case 's':
304 return TokenType.IS;
306 break;
307 case 'o':
308 if (matches (begin, "of")) return TokenType.OF;
310 if (matches (begin, "or")) return TokenType.OP_OR;
311 break;
312 case 't':
313 if (matches (begin, "to")) return TokenType.TO;
314 break;
316 break;
317 case 3:
318 switch (begin[0]) {
319 case 'a':
320 if (matches (begin, "and")) return TokenType.OP_AND;
321 break;
322 case 'd':
323 if (matches (begin, "def")) return TokenType.DEF;
324 break;
325 case 'f':
326 if (matches (begin, "for")) return TokenType.FOR;
327 break;
328 case 'g':
329 if (matches (begin, "get")) return TokenType.GET;
330 break;
331 case 'i':
332 if (matches (begin, "isa")) return TokenType.ISA;
333 break;
334 case 'n':
335 switch (begin[1]) {
336 case 'e':
337 if (matches (begin, "new")) return TokenType.NEW;
338 break;
339 case 'o':
340 if (matches (begin, "not")) return TokenType.OP_NEG;
341 break;
343 break;
344 case 'o':
345 if (matches (begin, "out")) return TokenType.OUT;
346 break;
347 case 'r':
348 if (matches (begin, "ref")) return TokenType.REF;
349 break;
350 case 's':
351 if (matches (begin, "set")) return TokenType.SET;
352 break;
353 case 't':
354 if (matches (begin, "try")) return TokenType.TRY;
355 break;
356 case 'v':
357 if (matches (begin, "var")) return TokenType.VAR;
358 break;
360 break;
361 case 4:
362 switch (begin[0]) {
363 case 'c':
364 if (matches (begin, "case")) return TokenType.CASE;
365 break;
366 case 'd':
367 if (matches (begin, "dict")) return TokenType.DICT;
368 break;
369 case 'e':
370 switch (begin[1]) {
371 case 'l':
372 if (matches (begin, "else")) return TokenType.ELSE;
373 break;
374 case 'n':
375 if (matches (begin, "enum")) return TokenType.ENUM;
376 break;
378 break;
379 case 'i':
380 if (matches (begin, "init")) return TokenType.INIT;
381 break;
382 case 'l':
383 switch (begin[1]) {
384 case 'i':
385 if (matches (begin, "list")) return TokenType.LIST;
386 break;
387 case 'o':
388 if (matches (begin, "lock")) return TokenType.LOCK;
389 break;
391 break;
393 case 'n':
394 if (matches (begin, "null")) return TokenType.NULL;
395 break;
396 case 'p':
397 switch (begin[1]) {
398 case 'a':
399 if (matches (begin, "pass")) return TokenType.PASS;
400 break;
401 case 'r':
402 if (matches (begin, "prop")) return TokenType.PROP;
403 break;
405 break;
406 case 's':
407 if (matches (begin, "self")) return TokenType.THIS;
408 break;
409 case 't':
410 if (matches (begin, "true")) return TokenType.TRUE;
411 break;
412 case 'u':
413 if (matches (begin, "uses")) return TokenType.USES;
414 break;
415 case 'v':
416 if (matches (begin, "void")) return TokenType.VOID;
417 break;
418 case 'w':
419 switch (begin[1]) {
420 case 'e':
421 if (matches (begin, "weak")) return TokenType.WEAK;
422 break;
423 case 'h':
424 if (matches (begin, "when")) return TokenType.WHEN;
425 break;
427 break;
429 break;
430 case 5:
431 switch (begin[0]) {
432 case 'a':
433 switch (begin[1]) {
434 case 'r':
435 if (matches (begin, "array")) return TokenType.ARRAY;
436 break;
437 case 's':
438 if (matches (begin, "async")) return TokenType.ASYNC;
439 break;
441 break;
442 case 'b':
443 if (matches (begin, "break")) return TokenType.BREAK;
444 break;
445 case 'c':
446 switch (begin[1]) {
447 case 'l':
448 if (matches (begin, "class")) return TokenType.CLASS;
449 break;
450 case 'o':
451 if (matches (begin, "const")) return TokenType.CONST;
452 break;
454 break;
455 case 'e':
456 if (matches (begin, "event")) return TokenType.EVENT;
457 break;
458 case 'f':
459 switch (begin[1]) {
460 case 'a':
461 if (matches (begin, "false")) return TokenType.FALSE;
462 break;
463 case 'i':
464 if (matches (begin, "final")) return TokenType.FINAL;
465 break;
467 break;
468 case 'o':
469 if (matches (begin, "owned")) return TokenType.OWNED;
470 break;
471 case 'p':
472 if (matches (begin, "print")) return TokenType.PRINT;
473 break;
474 case 's':
475 if (matches (begin, "super")) return TokenType.SUPER;
476 break;
477 case 'r':
478 if (matches (begin, "raise")) return TokenType.RAISE;
479 break;
480 case 'w':
481 if (matches (begin, "while")) return TokenType.WHILE;
482 break;
483 case 'y':
484 if (matches (begin, "yield")) return TokenType.YIELD;
485 break;
487 break;
488 case 6:
489 switch (begin[0]) {
490 case 'a':
491 if (matches (begin, "assert")) return TokenType.ASSERT;
492 break;
493 case 'd':
494 switch (begin[1]) {
495 case 'e':
496 if (matches (begin, "delete")) return TokenType.DELETE;
497 break;
498 case 'o':
499 if (matches (begin, "downto")) return TokenType.DOWNTO;
500 break;
502 break;
503 case 'e':
504 switch (begin[1]) {
505 case 'x':
506 switch (begin[2]) {
507 case 'c':
508 if (matches (begin, "except")) return TokenType.EXCEPT;
509 break;
510 case 't':
511 if (matches (begin, "extern")) return TokenType.EXTERN;
512 break;
514 break;
516 break;
517 case 'i':
518 if (matches (begin, "inline")) return TokenType.INLINE;
519 break;
520 case 'p':
521 switch (begin[1]) {
522 case 'a':
523 if (matches (begin, "params")) return TokenType.PARAMS;
524 break;
525 case 'u':
526 if (matches (begin, "public")) return TokenType.PUBLIC;
527 break;
529 break;
530 case 'r':
531 switch (begin[1]) {
532 case 'a':
533 if (matches (begin, "raises")) return TokenType.RAISES;
534 break;
535 case 'e':
536 if (matches (begin, "return")) return TokenType.RETURN;
537 break;
539 break;
540 case 's':
541 switch (begin[1]) {
542 case 'i':
543 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
544 break;
545 case 't':
546 switch (begin[2]) {
547 case 'a':
548 if (matches (begin, "static")) return TokenType.STATIC;
549 break;
550 case 'r':
551 if (matches (begin, "struct")) return TokenType.STRUCT;
552 break;
554 break;
556 break;
557 case 't':
558 if (matches (begin, "typeof")) return TokenType.TYPEOF;
559 break;
561 break;
562 case 7:
563 switch (begin[0]) {
564 case 'd':
565 switch (begin[1]) {
566 case 'e':
567 if (matches (begin, "default")) return TokenType.DEFAULT;
568 break;
569 case 'y':
570 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
571 break;
573 break;
574 case 'e':
575 if (matches (begin, "ensures")) return TokenType.ENSURES;
576 break;
577 case 'f':
578 switch (begin[1]) {
579 case 'i':
580 if (matches (begin, "finally")) return TokenType.FINALLY;
581 break;
583 break;
584 case 'p':
585 if (matches (begin, "private")) return TokenType.PRIVATE;
586 break;
587 case 'u':
588 if (matches (begin, "unowned")) return TokenType.UNOWNED;
589 break;
590 case 'v':
591 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
592 break;
594 break;
595 case 8:
596 switch (begin[0]) {
597 case 'a':
598 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
599 break;
600 case 'c':
601 if (matches (begin, "continue")) return TokenType.CONTINUE;
602 break;
603 case 'd':
604 if (matches (begin, "delegate")) return TokenType.DELEGATE;
605 break;
606 case 'i':
607 if (matches (begin, "internal")) return TokenType.INTERNAL;
608 break;
609 case 'o':
610 if (matches (begin, "override")) return TokenType.OVERRIDE;
611 break;
612 case 'r':
613 switch (begin[2]) {
614 case 'a':
615 if (matches (begin, "readonly")) return TokenType.READONLY;
616 break;
617 case 'q':
618 if (matches (begin, "requires")) return TokenType.REQUIRES;
619 break;
621 break;
622 case 'v':
623 if (matches (begin, "volatile")) return TokenType.VOLATILE;
624 break;
626 break;
627 case 9:
628 switch (begin[0]) {
629 case 'c':
630 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
631 break;
632 case 'e':
633 if (matches (begin, "exception")) return TokenType.ERRORDOMAIN;
634 break;
635 case 'i':
636 if (matches (begin, "interface")) return TokenType.INTERFACE;
637 break;
638 case 'n':
639 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
640 break;
641 case 'p':
642 if (matches (begin, "protected")) return TokenType.PROTECTED;
643 break;
644 case 'w':
645 if (matches (begin, "writeonly")) return TokenType.WRITEONLY;
646 break;
648 break;
649 case 10:
650 switch (begin[0]) {
651 case 'i':
652 if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
653 break;
655 break;
657 return TokenType.IDENTIFIER;
661 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
662 TokenType type;
663 char* begin = current;
664 token_begin.pos = begin;
665 token_begin.line = line;
666 token_begin.column = column;
668 int token_length_in_chars = -1;
670 if (current >= end) {
671 type = TokenType.EOF;
672 } else {
673 switch (current[0]) {
674 case '"':
675 type = TokenType.CLOSE_TEMPLATE;
676 current++;
677 state_stack.length--;
678 break;
679 case '$':
680 token_begin.pos++; // $ is not part of following token
681 current++;
682 if (current[0].isalpha () || current[0] == '_') {
683 int len = 0;
684 while (current < end && is_ident_char (current[0])) {
685 current++;
686 len++;
688 type = TokenType.IDENTIFIER;
689 state_stack += State.TEMPLATE_PART;
690 } else if (current[0] == '(') {
691 current++;
692 column += 2;
693 state_stack += State.PARENS;
694 return read_token (out token_begin, out token_end);
695 } else if (current[0] == '$') {
696 type = TokenType.TEMPLATE_STRING_LITERAL;
697 current++;
698 state_stack += State.TEMPLATE_PART;
699 } else {
700 Report.error (new SourceReference (source_file, line, column + 1, line, column + 1), "unexpected character");
701 return read_template_token (out token_begin, out token_end);
703 break;
704 default:
705 type = TokenType.TEMPLATE_STRING_LITERAL;
706 token_length_in_chars = 0;
707 while (current < end && current[0] != '"' && current[0] != '$') {
708 if (current[0] == '\\') {
709 current++;
710 token_length_in_chars++;
711 if (current >= end) {
712 break;
715 switch (current[0]) {
716 case '\'':
717 case '"':
718 case '\\':
719 case '0':
720 case 'b':
721 case 'f':
722 case 'n':
723 case 'r':
724 case 't':
725 current++;
726 token_length_in_chars++;
727 break;
728 case 'x':
729 // hexadecimal escape character
730 current++;
731 token_length_in_chars++;
732 while (current < end && current[0].isxdigit ()) {
733 current++;
734 token_length_in_chars++;
736 break;
737 default:
738 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
739 break;
741 } else if (current[0] == '\n') {
742 break;
743 } else {
744 unichar u = ((string) current).get_char_validated ((long) (end - current));
745 if (u != (unichar) (-1)) {
746 current += u.to_utf8 (null);
747 token_length_in_chars++;
748 } else {
749 current++;
750 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
754 if (current >= end || current[0] == '\n') {
755 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
756 state_stack.length--;
757 return read_token (out token_begin, out token_end);
759 state_stack += State.TEMPLATE_PART;
760 break;
764 if (token_length_in_chars < 0) {
765 column += (int) (current - begin);
766 } else {
767 column += token_length_in_chars;
770 token_end.pos = current;
771 token_end.line = line;
772 token_end.column = column - 1;
774 return type;
778 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
781 if (in_template ()) {
782 return read_template_token (out token_begin, out token_end);
783 } else if (in_template_part ()) {
784 state_stack.length--;
786 token_begin.pos = current;
787 token_begin.line = line;
788 token_begin.column = column;
790 token_end.pos = current;
791 token_end.line = line;
792 token_end.column = column - 1;
794 return TokenType.COMMA;
795 } else if (in_regex_literal ()) {
796 return read_regex_token (out token_begin, out token_end);
801 /* emit dedents if outstanding before checking any other chars */
803 if (pending_dedents > 0) {
804 pending_dedents--;
805 indent_level--;
808 token_begin.pos = current;
809 token_begin.line = line;
810 token_begin.column = column;
812 token_end.pos = current;
813 token_end.line = line;
814 token_end.column = column;
816 last_token = TokenType.DEDENT;
818 return TokenType.DEDENT;
821 if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
822 /* scrub whitespace (excluding newlines) and comments */
823 space ();
827 /* handle explicit line continuation (lines ending with "\") */
828 while (current < end && current[0] == '\\' && current[1] == '\n') {
829 current += 2;
830 line++;
831 skip_space_tabs ();
834 /* handle automatic line continuations (when inside parens or braces) */
835 while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
836 current++;
837 line++;
838 skip_space_tabs ();
842 /* handle non-consecutive new line once parsing is underway - EOL */
843 if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
844 token_begin.pos = current;
845 token_begin.line = line;
846 token_begin.column = column;
848 token_end.pos = current;
849 token_end.line = line;
850 token_end.column = column;
852 last_token = TokenType.EOL;
854 return TokenType.EOL;
858 while (skip_newlines ()) {
859 token_begin.pos = current;
860 token_begin.line = line;
861 token_begin.column = column;
863 current_indent_level = count_tabs ();
865 /* if its an empty new line then ignore */
866 if (current_indent_level == -1) {
867 continue;
870 if (current_indent_level > indent_level) {
871 indent_level = current_indent_level;
873 token_end.pos = current;
874 token_end.line = line;
875 token_end.column = column;
877 last_token = TokenType.INDENT;
879 return TokenType.INDENT;
880 } else if (current_indent_level < indent_level) {
881 indent_level--;
883 pending_dedents = (indent_level - current_indent_level);
885 token_end.pos = current;
886 token_end.line = line;
887 token_end.column = column;
889 last_token = TokenType.DEDENT;
891 return TokenType.DEDENT;
895 TokenType type;
896 char* begin = current;
897 token_begin.pos = begin;
898 token_begin.line = line;
899 token_begin.column = column;
901 int token_length_in_chars = -1;
903 parse_started = true;
905 if (current >= end) {
906 if (indent_level > 0) {
907 indent_level--;
909 pending_dedents = indent_level;
911 type = TokenType.DEDENT;
912 } else {
913 type = TokenType.EOF;
915 } else if (current[0].isalpha () || current[0] == '_') {
916 int len = 0;
917 while (current < end && is_ident_char (current[0])) {
918 current++;
919 len++;
921 type = get_identifier_or_keyword (begin, len);
922 } else if (current[0] == '@') {
923 if (current < end - 1 && current[1] == '"') {
924 type = TokenType.OPEN_TEMPLATE;
925 current += 2;
926 state_stack += State.TEMPLATE;
927 } else {
928 token_begin.pos++; // @ is not part of the identifier
929 current++;
930 int len = 0;
931 while (current < end && is_ident_char (current[0])) {
932 current++;
933 len++;
935 type = TokenType.IDENTIFIER;
937 } else if (current[0].isdigit ()) {
938 while (current < end && current[0].isdigit ()) {
939 current++;
941 type = TokenType.INTEGER_LITERAL;
942 if (current < end && current[0].tolower () == 'l') {
943 current++;
944 if (current < end && current[0].tolower () == 'l') {
945 current++;
947 } else if (current < end && current[0].tolower () == 'u') {
948 current++;
949 if (current < end && current[0].tolower () == 'l') {
950 current++;
951 if (current < end && current[0].tolower () == 'l') {
952 current++;
955 } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
956 current++;
957 while (current < end && current[0].isdigit ()) {
958 current++;
960 if (current < end && current[0].tolower () == 'e') {
961 current++;
962 if (current < end && (current[0] == '+' || current[0] == '-')) {
963 current++;
965 while (current < end && current[0].isdigit ()) {
966 current++;
969 if (current < end && current[0].tolower () == 'f') {
970 current++;
972 type = TokenType.REAL_LITERAL;
973 } else if (current < end && current == begin + 1
974 && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
975 // hexadecimal integer literal
976 current++;
977 while (current < end && current[0].isxdigit ()) {
978 current++;
980 } else if (current < end && is_ident_char (current[0])) {
981 // allow identifiers to start with a digit
982 // as long as they contain at least one char
983 while (current < end && is_ident_char (current[0])) {
984 current++;
986 type = TokenType.IDENTIFIER;
988 } else {
989 switch (current[0]) {
990 case '{':
991 type = TokenType.OPEN_BRACE;
992 open_brace_count++;
993 state_stack += State.BRACE;
994 current++;
995 break;
996 case '}':
997 type = TokenType.CLOSE_BRACE;
998 open_brace_count--;
999 if (state_stack.length > 0) {
1000 state_stack.length--;
1002 current++;
1003 break;
1004 case '(':
1005 type = TokenType.OPEN_PARENS;
1006 open_parens_count++;
1007 state_stack += State.PARENS;
1008 current++;
1009 break;
1010 case ')':
1011 type = TokenType.CLOSE_PARENS;
1012 open_parens_count--;
1013 current++;
1014 if (state_stack.length > 0) {
1015 state_stack.length--;
1017 if (in_template ()) {
1018 type = TokenType.COMMA;
1020 break;
1021 case '[':
1022 type = TokenType.OPEN_BRACKET;
1023 state_stack += State.BRACKET;
1024 current++;
1025 break;
1026 case ']':
1027 type = TokenType.CLOSE_BRACKET;
1028 if (state_stack.length > 0) {
1029 state_stack.length--;
1031 current++;
1032 break;
1033 case '.':
1034 type = TokenType.DOT;
1035 current++;
1036 if (current < end - 1) {
1037 if (current[0] == '.' && current[1] == '.') {
1038 type = TokenType.ELLIPSIS;
1039 current += 2;
1042 break;
1043 case ':':
1044 type = TokenType.COLON;
1045 current++;
1046 break;
1047 case ',':
1048 type = TokenType.COMMA;
1049 current++;
1050 break;
1051 case ';':
1052 type = TokenType.SEMICOLON;
1053 current++;
1054 break;
1055 case '#':
1056 type = TokenType.HASH;
1057 current++;
1058 break;
1059 case '?':
1060 type = TokenType.INTERR;
1061 current++;
1062 break;
1063 case '|':
1064 type = TokenType.BITWISE_OR;
1065 current++;
1066 if (current < end) {
1067 switch (current[0]) {
1068 case '=':
1069 type = TokenType.ASSIGN_BITWISE_OR;
1070 current++;
1071 break;
1072 case '|':
1073 type = TokenType.OP_OR;
1074 current++;
1075 break;
1078 break;
1079 case '&':
1080 type = TokenType.BITWISE_AND;
1081 current++;
1082 if (current < end) {
1083 switch (current[0]) {
1084 case '=':
1085 type = TokenType.ASSIGN_BITWISE_AND;
1086 current++;
1087 break;
1088 case '&':
1089 type = TokenType.OP_AND;
1090 current++;
1091 break;
1094 break;
1095 case '^':
1096 type = TokenType.CARRET;
1097 current++;
1098 if (current < end && current[0] == '=') {
1099 type = TokenType.ASSIGN_BITWISE_XOR;
1100 current++;
1102 break;
1103 case '~':
1104 type = TokenType.TILDE;
1105 current++;
1106 break;
1107 case '=':
1108 type = TokenType.ASSIGN;
1109 current++;
1110 if (current < end) {
1111 switch (current[0]) {
1112 case '=':
1113 type = TokenType.OP_EQ;
1114 current++;
1115 break;
1116 case '>':
1117 type = TokenType.LAMBDA;
1118 current++;
1119 break;
1122 break;
1123 case '<':
1124 type = TokenType.OP_LT;
1125 current++;
1126 if (current < end) {
1127 switch (current[0]) {
1128 case '=':
1129 type = TokenType.OP_LE;
1130 current++;
1131 break;
1132 case '<':
1133 type = TokenType.OP_SHIFT_LEFT;
1134 current++;
1135 if (current < end && current[0] == '=') {
1136 type = TokenType.ASSIGN_SHIFT_LEFT;
1137 current++;
1139 break;
1142 break;
1143 case '>':
1144 type = TokenType.OP_GT;
1145 current++;
1146 if (current < end && current[0] == '=') {
1147 type = TokenType.OP_GE;
1148 current++;
1150 break;
1151 case '!':
1152 type = TokenType.OP_NEG;
1153 current++;
1154 if (current < end && current[0] == '=') {
1155 type = TokenType.OP_NE;
1156 current++;
1158 break;
1159 case '+':
1160 type = TokenType.PLUS;
1161 current++;
1162 if (current < end) {
1163 switch (current[0]) {
1164 case '=':
1165 type = TokenType.ASSIGN_ADD;
1166 current++;
1167 break;
1168 case '+':
1169 type = TokenType.OP_INC;
1170 current++;
1171 break;
1174 break;
1175 case '-':
1176 type = TokenType.MINUS;
1177 current++;
1178 if (current < end) {
1179 switch (current[0]) {
1180 case '=':
1181 type = TokenType.ASSIGN_SUB;
1182 current++;
1183 break;
1184 case '-':
1185 type = TokenType.OP_DEC;
1186 current++;
1187 break;
1188 case '>':
1189 type = TokenType.OP_PTR;
1190 current++;
1191 break;
1194 break;
1195 case '*':
1196 type = TokenType.STAR;
1197 current++;
1198 if (current < end && current[0] == '=') {
1199 type = TokenType.ASSIGN_MUL;
1200 current++;
1202 break;
1203 case '/':
1204 switch (last_token) {
1205 case TokenType.ASSIGN:
1206 case TokenType.COMMA:
1207 case TokenType.MINUS:
1208 case TokenType.OP_AND:
1209 case TokenType.OP_DEC:
1210 case TokenType.OP_EQ:
1211 case TokenType.OP_GE:
1212 case TokenType.OP_GT:
1213 case TokenType.OP_INC:
1214 case TokenType.OP_LE:
1215 case TokenType.OP_LT:
1216 case TokenType.OP_NE:
1217 case TokenType.OP_NEG:
1218 case TokenType.OP_OR:
1219 case TokenType.OPEN_BRACE:
1220 case TokenType.OPEN_PARENS:
1221 case TokenType.PLUS:
1222 case TokenType.RETURN:
1223 type = TokenType.OPEN_REGEX_LITERAL;
1224 state_stack += State.REGEX_LITERAL;
1225 current++;
1226 break;
1227 default:
1228 type = TokenType.DIV;
1229 current++;
1230 if (current < end && current[0] == '=') {
1231 type = TokenType.ASSIGN_DIV;
1232 current++;
1234 break;
1236 break;
1238 case '%':
1239 type = TokenType.PERCENT;
1240 current++;
1241 if (current < end && current[0] == '=') {
1242 type = TokenType.ASSIGN_PERCENT;
1243 current++;
1245 break;
1246 case '\'':
1247 case '"':
1248 if (begin[0] == '\'') {
1249 type = TokenType.CHARACTER_LITERAL;
1250 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1251 type = TokenType.VERBATIM_STRING_LITERAL;
1252 token_length_in_chars = 6;
1253 current += 3;
1254 while (current < end - 4) {
1255 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1256 break;
1257 } else if (current[0] == '\n') {
1258 current++;
1259 line++;
1260 column = 1;
1261 token_length_in_chars = 3;
1262 } else {
1263 unichar u = ((string) current).get_char_validated ((long) (end - current));
1264 if (u != (unichar) (-1)) {
1265 current += u.to_utf8 (null);
1266 token_length_in_chars++;
1267 } else {
1268 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1272 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1273 current += 3;
1274 } else {
1275 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
1277 break;
1278 } else {
1279 type = TokenType.STRING_LITERAL;
1281 token_length_in_chars = 2;
1282 current++;
1283 while (current < end && current[0] != begin[0]) {
1284 if (current[0] == '\\') {
1285 current++;
1286 token_length_in_chars++;
1287 if (current >= end) {
1288 break;
1291 switch (current[0]) {
1292 case '\'':
1293 case '"':
1294 case '\\':
1295 case '0':
1296 case 'b':
1297 case 'f':
1298 case 'n':
1299 case 'r':
1300 case 't':
1301 current++;
1302 token_length_in_chars++;
1303 break;
1304 case 'x':
1305 // hexadecimal escape character
1306 current++;
1307 token_length_in_chars++;
1308 while (current < end && current[0].isxdigit ()) {
1309 current++;
1310 token_length_in_chars++;
1312 break;
1313 default:
1314 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
1315 break;
1317 } else if (current[0] == '\n') {
1318 break;
1319 } else {
1320 unichar u = ((string) current).get_char_validated ((long) (end - current));
1321 if (u != (unichar) (-1)) {
1322 current += u.to_utf8 (null);
1323 token_length_in_chars++;
1324 } else {
1325 current++;
1326 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1330 if (current < end && current[0] != '\n') {
1331 current++;
1332 } else {
1333 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1335 break;
1336 default:
1337 unichar u = ((string) current).get_char_validated ((long) (end - current));
1338 if (u != (unichar) (-1)) {
1339 current += u.to_utf8 (null);
1340 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
1341 } else {
1342 current++;
1343 Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
1345 column++;
1346 last_token = TokenType.STRING_LITERAL;
1347 return read_token (out token_begin, out token_end);
1351 if (token_length_in_chars < 0) {
1352 column += (int) (current - begin);
1353 } else {
1354 column += token_length_in_chars;
1357 token_end.pos = current;
1358 token_end.line = line;
1359 token_end.column = column - 1;
1360 last_token = type;
1362 return type;
1365 int count_tabs ()
1368 int tab_count = 0;
1371 if (_indent_spaces == 0) {
1372 while (current < end && current[0] == '\t') {
1373 current++;
1374 column++;
1375 tab_count++;
1377 } else {
1378 int space_count = 0;
1379 while (current < end && current[0] == ' ') {
1380 current++;
1381 column++;
1382 space_count++;
1385 tab_count = space_count / _indent_spaces;
1389 /* ignore comments and whitspace and other lines that contain no code */
1391 space ();
1393 if ((current < end) && (current[0] == '\n')) return -1;
1395 return tab_count;
1398 bool matches (char* begin, string keyword) {
1399 char* keyword_array = (char *) keyword;
1400 long len = keyword.length;
1401 for (int i = 0; i < len; i++) {
1402 if (begin[i] != keyword_array[i]) {
1403 return false;
1406 return true;
1409 bool whitespace () {
1410 bool found = false;
1411 while (current < end && current[0].isspace () && current[0] != '\n' ) {
1413 found = true;
1414 current++;
1415 column++;
1418 if ((column == 1) && (current < end) && (current[0] == '#')) {
1419 pp_directive ();
1420 return true;
1423 return found;
1426 inline bool newline () {
1427 if (current[0] == '\n') {
1428 return true;
1431 return false;
1434 bool skip_newlines () {
1435 bool new_lines = false;
1437 while (newline ()) {
1438 current++;
1440 line++;
1441 column = 1;
1442 current_indent_level = 0;
1444 new_lines = true;
1447 return new_lines;
1450 bool comment (bool file_comment = false) {
1451 if (current > end - 2
1452 || current[0] != '/'
1453 || (current[1] != '/' && current[1] != '*')) {
1454 return false;
1458 if (current[1] == '/') {
1459 // single-line comment
1461 SourceReference source_reference = null;
1462 if (file_comment) {
1463 source_reference = new SourceReference (source_file, line, column, line, column);
1466 current += 2;
1468 // skip until end of line or end of file
1469 while (current < end && current[0] != '\n') {
1470 current++;
1473 /* do not ignore EOL if comment does not exclusively occupy the line */
1474 if (current[0] == '\n' && last_token == TokenType.EOL) {
1475 current++;
1476 line++;
1477 column = 1;
1478 current_indent_level = 0;
1481 if (source_reference != null) {
1482 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1485 } else {
1486 // delimited comment
1487 SourceReference source_reference = null;
1488 if (file_comment && current[2] == '*') {
1489 return false;
1492 if (current[2] == '*' || file_comment) {
1493 source_reference = new SourceReference (source_file, line, column, line, column);
1496 current += 2;
1497 char* begin = current;
1499 while (current < end - 1
1500 && (current[0] != '*' || current[1] != '/')) {
1501 if (current[0] == '\n') {
1502 line++;
1503 column = 0;
1505 current++;
1506 column++;
1508 if (current == end - 1) {
1509 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
1510 return true;
1513 if (source_reference != null) {
1514 string comment = ((string) begin).substring (0, (long) (current - begin));
1515 push_comment (comment, source_reference, file_comment);
1518 current += 2;
1519 column += 2;
1522 return true;
1525 bool skip_tabs () {
1526 bool found = false;
1527 while (current < end && current[0] == '\t' ) {
1528 current++;
1529 column++;
1530 found = true;
1533 return found;
1536 void skip_space_tabs () {
1537 while (whitespace () || skip_tabs () || comment () ) {
1542 void space () {
1543 while (whitespace () || comment ()) {
1547 public void parse_file_comments () {
1548 while (whitespace () || comment (true)) {
1553 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1554 if (comment_item[0] == '*') {
1555 _comment = new Comment (comment_item, source_reference);
1558 if (file_comment) {
1559 source_file.add_comment (new Comment (comment_item, source_reference));
1560 _comment = null;
1565 * Clears and returns the content of the comment stack.
1567 * @return saved comment
1569 public Comment? pop_comment () {
1570 if (_comment == null) {
1571 return null;
1574 var comment = _comment;
1575 _comment = null;
1576 return comment;
1579 bool pp_whitespace () {
1580 bool found = false;
1581 while (current < end && current[0].isspace () && current[0] != '\n') {
1582 found = true;
1583 current++;
1584 column++;
1586 return found;
1589 void pp_directive () {
1590 // hash sign
1591 current++;
1592 column++;
1594 pp_whitespace ();
1596 char* begin = current;
1597 int len = 0;
1598 while (current < end && current[0].isalnum ()) {
1599 current++;
1600 column++;
1601 len++;
1604 if (len == 2 && matches (begin, "if")) {
1605 parse_pp_if ();
1606 } else if (len == 4 && matches (begin, "elif")) {
1607 parse_pp_elif ();
1608 } else if (len == 4 && matches (begin, "else")) {
1609 parse_pp_else ();
1610 } else if (len == 5 && matches (begin, "endif")) {
1611 parse_pp_endif ();
1612 } else {
1613 Report.error (new SourceReference (source_file, line, column - len, line, column), "syntax error, invalid preprocessing directive");
1616 if (conditional_stack.length > 0
1617 && conditional_stack[conditional_stack.length - 1].skip_section) {
1618 // skip lines until next preprocessing directive
1619 bool bol = false;
1620 while (current < end) {
1621 if (bol && current[0] == '#') {
1622 // go back to begin of line
1623 current -= (column - 1);
1624 column = 1;
1625 return;
1627 if (current[0] == '\n') {
1628 line++;
1629 column = 0;
1630 bol = true;
1631 } else if (!current[0].isspace ()) {
1632 bol = false;
1634 current++;
1635 column++;
1640 void pp_eol () {
1641 pp_whitespace ();
1642 if (current >= end || current[0] != '\n') {
1643 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected newline");
1647 void parse_pp_if () {
1648 pp_whitespace ();
1650 bool condition = parse_pp_expression ();
1652 pp_eol ();
1654 conditional_stack += Conditional ();
1656 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1657 // condition true => process code within if
1658 conditional_stack[conditional_stack.length - 1].matched = true;
1659 } else {
1660 // skip lines until next preprocessing directive
1661 conditional_stack[conditional_stack.length - 1].skip_section = true;
1665 void parse_pp_elif () {
1666 pp_whitespace ();
1668 bool condition = parse_pp_expression ();
1670 pp_eol ();
1672 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1673 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #elif");
1674 return;
1677 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1678 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1679 // condition true => process code within if
1680 conditional_stack[conditional_stack.length - 1].matched = true;
1681 conditional_stack[conditional_stack.length - 1].skip_section = false;
1682 } else {
1683 // skip lines until next preprocessing directive
1684 conditional_stack[conditional_stack.length - 1].skip_section = true;
1688 void parse_pp_else () {
1689 pp_eol ();
1691 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1692 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #else");
1693 return;
1696 if (!conditional_stack[conditional_stack.length - 1].matched
1697 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1698 // condition true => process code within if
1699 conditional_stack[conditional_stack.length - 1].matched = true;
1700 conditional_stack[conditional_stack.length - 1].skip_section = false;
1701 } else {
1702 // skip lines until next preprocessing directive
1703 conditional_stack[conditional_stack.length - 1].skip_section = true;
1707 void parse_pp_endif () {
1708 pp_eol ();
1710 if (conditional_stack.length == 0) {
1711 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #endif");
1712 return;
1715 conditional_stack.length--;
1718 bool parse_pp_symbol () {
1719 int len = 0;
1720 while (current < end && is_ident_char (current[0])) {
1721 current++;
1722 column++;
1723 len++;
1726 if (len == 0) {
1727 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1728 return false;
1731 string identifier = ((string) (current - len)).substring (0, len);
1732 bool defined;
1733 if (identifier == "true") {
1734 defined = true;
1735 } else if (identifier == "false") {
1736 defined = false;
1737 } else {
1738 defined = source_file.context.is_defined (identifier);
1741 return defined;
1744 bool parse_pp_primary_expression () {
1745 if (current >= end) {
1746 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1747 } else if (is_ident_char (current[0])) {
1748 return parse_pp_symbol ();
1749 } else if (current[0] == '(') {
1750 current++;
1751 column++;
1752 pp_whitespace ();
1753 bool result = parse_pp_expression ();
1754 pp_whitespace ();
1755 if (current < end && current[0] == ')') {
1756 current++;
1757 column++;
1758 } else {
1759 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected `)'");
1761 return result;
1762 } else {
1763 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1765 return false;
1768 bool parse_pp_unary_expression () {
1769 if (current < end && current[0] == '!') {
1770 current++;
1771 column++;
1772 pp_whitespace ();
1773 return !parse_pp_unary_expression ();
1776 return parse_pp_primary_expression ();
1779 bool parse_pp_equality_expression () {
1780 bool left = parse_pp_unary_expression ();
1781 pp_whitespace ();
1782 while (true) {
1783 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1784 current += 2;
1785 column += 2;
1786 pp_whitespace ();
1787 bool right = parse_pp_unary_expression ();
1788 left = (left == right);
1789 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1790 current += 2;
1791 column += 2;
1792 pp_whitespace ();
1793 bool right = parse_pp_unary_expression ();
1794 left = (left != right);
1795 } else {
1796 break;
1799 return left;
1802 bool parse_pp_and_expression () {
1803 bool left = parse_pp_equality_expression ();
1804 pp_whitespace ();
1805 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1806 current += 2;
1807 column += 2;
1808 pp_whitespace ();
1809 bool right = parse_pp_equality_expression ();
1810 left = left && right;
1812 return left;
1815 bool parse_pp_or_expression () {
1816 bool left = parse_pp_and_expression ();
1817 pp_whitespace ();
1818 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1819 current += 2;
1820 column += 2;
1821 pp_whitespace ();
1822 bool right = parse_pp_and_expression ();
1823 left = left || right;
1825 return left;
1828 bool parse_pp_expression () {
1829 return parse_pp_or_expression ();