Release 0.7.8
[vala-lang.git] / vala / valascanner.vala
blob109dca03f83eee773334c0747a632454fde89b6a
1 /* valascanner.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
23 using GLib;
25 /**
26 * Lexical scanner for Vala source files.
28 public class Vala.Scanner {
29 public SourceFile source_file { get; private set; }
31 char* current;
32 char* end;
34 int line;
35 int column;
37 Comment _comment;
39 Conditional[] conditional_stack;
41 struct Conditional {
42 public bool matched;
43 public bool else_found;
44 public bool skip_section;
47 State[] state_stack;
49 enum State {
50 PARENS,
51 BRACE,
52 BRACKET,
53 TEMPLATE,
54 TEMPLATE_PART
57 public Scanner (SourceFile source_file) {
58 this.source_file = source_file;
60 char* begin = source_file.get_mapped_contents ();
61 end = begin + source_file.get_mapped_length ();
63 current = begin;
65 line = 1;
66 column = 1;
69 bool in_template () {
70 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
73 bool in_template_part () {
74 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
77 bool is_ident_char (char c) {
78 return (c.isalnum () || c == '_');
81 public static TokenType get_identifier_or_keyword (char* begin, int len) {
82 switch (len) {
83 case 2:
84 switch (begin[0]) {
85 case 'a':
86 if (matches (begin, "as")) return TokenType.AS;
87 break;
88 case 'd':
89 if (matches (begin, "do")) return TokenType.DO;
90 break;
91 case 'i':
92 switch (begin[1]) {
93 case 'f':
94 return TokenType.IF;
95 case 'n':
96 return TokenType.IN;
97 case 's':
98 return TokenType.IS;
100 break;
102 break;
103 case 3:
104 switch (begin[0]) {
105 case 'f':
106 if (matches (begin, "for")) return TokenType.FOR;
107 break;
108 case 'g':
109 if (matches (begin, "get")) return TokenType.GET;
110 break;
111 case 'n':
112 if (matches (begin, "new")) return TokenType.NEW;
113 break;
114 case 'o':
115 if (matches (begin, "out")) return TokenType.OUT;
116 break;
117 case 'r':
118 if (matches (begin, "ref")) return TokenType.REF;
119 break;
120 case 's':
121 if (matches (begin, "set")) return TokenType.SET;
122 break;
123 case 't':
124 if (matches (begin, "try")) return TokenType.TRY;
125 break;
126 case 'v':
127 if (matches (begin, "var")) return TokenType.VAR;
128 break;
130 break;
131 case 4:
132 switch (begin[0]) {
133 case 'b':
134 if (matches (begin, "base")) return TokenType.BASE;
135 break;
136 case 'c':
137 if (matches (begin, "case")) return TokenType.CASE;
138 break;
139 case 'e':
140 switch (begin[1]) {
141 case 'l':
142 if (matches (begin, "else")) return TokenType.ELSE;
143 break;
144 case 'n':
145 if (matches (begin, "enum")) return TokenType.ENUM;
146 break;
148 break;
149 case 'l':
150 if (matches (begin, "lock")) return TokenType.LOCK;
151 break;
152 case 'n':
153 if (matches (begin, "null")) return TokenType.NULL;
154 break;
155 case 't':
156 switch (begin[1]) {
157 case 'h':
158 if (matches (begin, "this")) return TokenType.THIS;
159 break;
160 case 'r':
161 if (matches (begin, "true")) return TokenType.TRUE;
162 break;
164 break;
165 case 'v':
166 if (matches (begin, "void")) return TokenType.VOID;
167 break;
168 case 'w':
169 if (matches (begin, "weak")) return TokenType.WEAK;
170 break;
172 break;
173 case 5:
174 switch (begin[0]) {
175 case 'a':
176 if (matches (begin, "async")) return TokenType.ASYNC;
177 break;
178 case 'b':
179 if (matches (begin, "break")) return TokenType.BREAK;
180 break;
181 case 'c':
182 switch (begin[1]) {
183 case 'a':
184 if (matches (begin, "catch")) return TokenType.CATCH;
185 break;
186 case 'l':
187 if (matches (begin, "class")) return TokenType.CLASS;
188 break;
189 case 'o':
190 if (matches (begin, "const")) return TokenType.CONST;
191 break;
193 break;
194 case 'f':
195 if (matches (begin, "false")) return TokenType.FALSE;
196 break;
197 case 'o':
198 if (matches (begin, "owned")) return TokenType.OWNED;
199 break;
200 case 't':
201 if (matches (begin, "throw")) return TokenType.THROW;
202 break;
203 case 'u':
204 if (matches (begin, "using")) return TokenType.USING;
205 break;
206 case 'w':
207 if (matches (begin, "while")) return TokenType.WHILE;
208 break;
209 case 'y':
210 if (matches (begin, "yield")) return TokenType.YIELD;
211 break;
213 break;
214 case 6:
215 switch (begin[0]) {
216 case 'd':
217 if (matches (begin, "delete")) return TokenType.DELETE;
218 break;
219 case 'e':
220 if (matches (begin, "extern")) return TokenType.EXTERN;
221 break;
222 case 'i':
223 if (matches (begin, "inline")) return TokenType.INLINE;
224 break;
225 case 'p':
226 switch (begin[1]) {
227 case 'a':
228 if (matches (begin, "params")) return TokenType.PARAMS;
229 break;
230 case 'u':
231 if (matches (begin, "public")) return TokenType.PUBLIC;
232 break;
234 break;
235 case 'r':
236 if (matches (begin, "return")) return TokenType.RETURN;
237 break;
238 case 's':
239 switch (begin[1]) {
240 case 'i':
241 switch (begin[2]) {
242 case 'g':
243 if (matches (begin, "signal")) return TokenType.SIGNAL;
244 break;
245 case 'z':
246 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
247 break;
249 break;
250 case 't':
251 switch (begin[2]) {
252 case 'a':
253 if (matches (begin, "static")) return TokenType.STATIC;
254 break;
255 case 'r':
256 if (matches (begin, "struct")) return TokenType.STRUCT;
257 break;
259 break;
260 case 'w':
261 if (matches (begin, "switch")) return TokenType.SWITCH;
262 break;
264 break;
265 case 't':
266 switch (begin[1]) {
267 case 'h':
268 if (matches (begin, "throws")) return TokenType.THROWS;
269 break;
270 case 'y':
271 if (matches (begin, "typeof")) return TokenType.TYPEOF;
272 break;
274 break;
276 break;
277 case 7:
278 switch (begin[0]) {
279 case 'd':
280 switch (begin[1]) {
281 case 'e':
282 if (matches (begin, "default")) return TokenType.DEFAULT;
283 break;
284 case 'y':
285 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
286 break;
288 break;
289 case 'e':
290 if (matches (begin, "ensures")) return TokenType.ENSURES;
291 break;
292 case 'f':
293 switch (begin[1]) {
294 case 'i':
295 if (matches (begin, "finally")) return TokenType.FINALLY;
296 break;
297 case 'o':
298 if (matches (begin, "foreach")) return TokenType.FOREACH;
299 break;
301 break;
302 case 'p':
303 if (matches (begin, "private")) return TokenType.PRIVATE;
304 break;
305 case 'u':
306 if (matches (begin, "unowned")) return TokenType.UNOWNED;
307 break;
308 case 'v':
309 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
310 break;
312 break;
313 case 8:
314 switch (begin[0]) {
315 case 'a':
316 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
317 break;
318 case 'c':
319 if (matches (begin, "continue")) return TokenType.CONTINUE;
320 break;
321 case 'd':
322 if (matches (begin, "delegate")) return TokenType.DELEGATE;
323 break;
324 case 'i':
325 if (matches (begin, "internal")) return TokenType.INTERNAL;
326 break;
327 case 'o':
328 if (matches (begin, "override")) return TokenType.OVERRIDE;
329 break;
330 case 'r':
331 if (matches (begin, "requires")) return TokenType.REQUIRES;
332 break;
333 case 'v':
334 if (matches (begin, "volatile")) return TokenType.VOLATILE;
335 break;
337 break;
338 case 9:
339 switch (begin[0]) {
340 case 'c':
341 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
342 break;
343 case 'i':
344 if (matches (begin, "interface")) return TokenType.INTERFACE;
345 break;
346 case 'n':
347 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
348 break;
349 case 'p':
350 if (matches (begin, "protected")) return TokenType.PROTECTED;
351 break;
353 break;
354 case 11:
355 if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
356 break;
358 return TokenType.IDENTIFIER;
361 TokenType read_number () {
362 var type = TokenType.INTEGER_LITERAL;
364 // integer part
365 if (current < end - 2 && current[0] == '0'
366 && current[1] == 'x' && current[2].isxdigit ()) {
367 // hexadecimal integer literal
368 current += 2;
369 while (current < end && current[0].isxdigit ()) {
370 current++;
372 } else {
373 // decimal number
374 while (current < end && current[0].isdigit ()) {
375 current++;
379 // fractional part
380 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
381 type = TokenType.REAL_LITERAL;
382 current++;
383 while (current < end && current[0].isdigit ()) {
384 current++;
388 // exponent part
389 if (current < end && current[0].tolower () == 'e') {
390 type = TokenType.REAL_LITERAL;
391 current++;
392 if (current < end && (current[0] == '+' || current[0] == '-')) {
393 current++;
395 while (current < end && current[0].isdigit ()) {
396 current++;
400 // type suffix
401 if (current < end) {
402 bool real_literal = (type == TokenType.REAL_LITERAL);
404 switch (current[0]) {
405 case 'l':
406 case 'L':
407 if (type == TokenType.INTEGER_LITERAL) {
408 current++;
409 if (current < end && current[0].tolower () == 'l') {
410 current++;
413 break;
414 case 'u':
415 case 'U':
416 if (type == TokenType.INTEGER_LITERAL) {
417 current++;
418 if (current < end && current[0].tolower () == 'l') {
419 current++;
420 if (current < end && current[0].tolower () == 'l') {
421 current++;
425 break;
426 case 'f':
427 case 'F':
428 case 'd':
429 case 'D':
430 type = TokenType.REAL_LITERAL;
431 current++;
432 break;
435 if (!real_literal && is_ident_char (current[0])) {
436 // allow identifiers to start with a digit
437 // as long as they contain at least one char
438 while (current < end && is_ident_char (current[0])) {
439 current++;
441 type = TokenType.IDENTIFIER;
445 return type;
448 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
449 TokenType type;
450 char* begin = current;
451 token_begin.pos = begin;
452 token_begin.line = line;
453 token_begin.column = column;
455 int token_length_in_chars = -1;
457 if (current >= end) {
458 type = TokenType.EOF;
459 } else {
460 switch (current[0]) {
461 case '"':
462 type = TokenType.CLOSE_TEMPLATE;
463 current++;
464 state_stack.length--;
465 break;
466 case '$':
467 token_begin.pos++; // $ is not part of following token
468 current++;
469 if (current[0].isalpha () || current[0] == '_') {
470 int len = 0;
471 while (current < end && is_ident_char (current[0])) {
472 current++;
473 len++;
475 type = TokenType.IDENTIFIER;
476 state_stack += State.TEMPLATE_PART;
477 } else if (current[0] == '(') {
478 current++;
479 column += 2;
480 state_stack += State.PARENS;
481 return read_token (out token_begin, out token_end);
482 } else if (current[0] == '$') {
483 type = TokenType.TEMPLATE_STRING_LITERAL;
484 current++;
485 state_stack += State.TEMPLATE_PART;
486 } else {
487 Report.error (new SourceReference (source_file, line, column + 1, line, column + 1), "unexpected character");
488 return read_template_token (out token_begin, out token_end);
490 break;
491 default:
492 type = TokenType.TEMPLATE_STRING_LITERAL;
493 token_length_in_chars = 0;
494 while (current < end && current[0] != '"' && current[0] != '$') {
495 if (current[0] == '\\') {
496 current++;
497 token_length_in_chars++;
498 if (current >= end) {
499 break;
502 switch (current[0]) {
503 case '\'':
504 case '"':
505 case '\\':
506 case '0':
507 case 'b':
508 case 'f':
509 case 'n':
510 case 'r':
511 case 't':
512 current++;
513 token_length_in_chars++;
514 break;
515 case 'x':
516 // hexadecimal escape character
517 current++;
518 token_length_in_chars++;
519 while (current < end && current[0].isxdigit ()) {
520 current++;
521 token_length_in_chars++;
523 break;
524 default:
525 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
526 break;
528 } else if (current[0] == '\n') {
529 break;
530 } else {
531 unichar u = ((string) current).get_char_validated ((long) (end - current));
532 if (u != (unichar) (-1)) {
533 current += u.to_utf8 (null);
534 token_length_in_chars++;
535 } else {
536 current++;
537 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
541 if (current >= end || current[0] == '\n') {
542 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
543 state_stack.length--;
544 return read_token (out token_begin, out token_end);
546 state_stack += State.TEMPLATE_PART;
547 break;
551 if (token_length_in_chars < 0) {
552 column += (int) (current - begin);
553 } else {
554 column += token_length_in_chars;
557 token_end.pos = current;
558 token_end.line = line;
559 token_end.column = column - 1;
561 return type;
564 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
565 if (in_template ()) {
566 return read_template_token (out token_begin, out token_end);
567 } else if (in_template_part ()) {
568 state_stack.length--;
570 token_begin.pos = current;
571 token_begin.line = line;
572 token_begin.column = column;
574 token_end.pos = current;
575 token_end.line = line;
576 token_end.column = column - 1;
578 return TokenType.COMMA;
581 space ();
583 TokenType type;
584 char* begin = current;
585 token_begin.pos = begin;
586 token_begin.line = line;
587 token_begin.column = column;
589 int token_length_in_chars = -1;
591 if (current >= end) {
592 type = TokenType.EOF;
593 } else if (current[0].isalpha () || current[0] == '_') {
594 int len = 0;
595 while (current < end && is_ident_char (current[0])) {
596 current++;
597 len++;
599 type = get_identifier_or_keyword (begin, len);
600 } else if (current[0] == '@') {
601 if (current < end - 1 && current[1] == '"') {
602 type = TokenType.OPEN_TEMPLATE;
603 current += 2;
604 state_stack += State.TEMPLATE;
605 } else {
606 token_begin.pos++; // @ is not part of the identifier
607 current++;
608 int len = 0;
609 while (current < end && is_ident_char (current[0])) {
610 current++;
611 len++;
613 type = TokenType.IDENTIFIER;
615 } else if (current[0].isdigit ()) {
616 type = read_number ();
617 } else {
618 switch (current[0]) {
619 case '{':
620 type = TokenType.OPEN_BRACE;
621 current++;
622 state_stack += State.BRACE;
623 break;
624 case '}':
625 type = TokenType.CLOSE_BRACE;
626 current++;
627 state_stack.length--;
628 break;
629 case '(':
630 type = TokenType.OPEN_PARENS;
631 current++;
632 state_stack += State.PARENS;
633 break;
634 case ')':
635 type = TokenType.CLOSE_PARENS;
636 current++;
637 state_stack.length--;
638 if (in_template ()) {
639 type = TokenType.COMMA;
641 break;
642 case '[':
643 type = TokenType.OPEN_BRACKET;
644 current++;
645 state_stack += State.BRACKET;
646 break;
647 case ']':
648 type = TokenType.CLOSE_BRACKET;
649 current++;
650 state_stack.length--;
651 break;
652 case '.':
653 type = TokenType.DOT;
654 current++;
655 if (current < end - 1) {
656 if (current[0] == '.' && current[1] == '.') {
657 type = TokenType.ELLIPSIS;
658 current += 2;
661 break;
662 case ':':
663 type = TokenType.COLON;
664 current++;
665 if (current < end && current[0] == ':') {
666 type = TokenType.DOUBLE_COLON;
667 current++;
669 break;
670 case ',':
671 type = TokenType.COMMA;
672 current++;
673 break;
674 case ';':
675 type = TokenType.SEMICOLON;
676 current++;
677 break;
678 case '#':
679 type = TokenType.HASH;
680 current++;
681 break;
682 case '?':
683 type = TokenType.INTERR;
684 current++;
685 break;
686 case '|':
687 type = TokenType.BITWISE_OR;
688 current++;
689 if (current < end) {
690 switch (current[0]) {
691 case '=':
692 type = TokenType.ASSIGN_BITWISE_OR;
693 current++;
694 break;
695 case '|':
696 type = TokenType.OP_OR;
697 current++;
698 break;
701 break;
702 case '&':
703 type = TokenType.BITWISE_AND;
704 current++;
705 if (current < end) {
706 switch (current[0]) {
707 case '=':
708 type = TokenType.ASSIGN_BITWISE_AND;
709 current++;
710 break;
711 case '&':
712 type = TokenType.OP_AND;
713 current++;
714 break;
717 break;
718 case '^':
719 type = TokenType.CARRET;
720 current++;
721 if (current < end && current[0] == '=') {
722 type = TokenType.ASSIGN_BITWISE_XOR;
723 current++;
725 break;
726 case '~':
727 type = TokenType.TILDE;
728 current++;
729 break;
730 case '=':
731 type = TokenType.ASSIGN;
732 current++;
733 if (current < end) {
734 switch (current[0]) {
735 case '=':
736 type = TokenType.OP_EQ;
737 current++;
738 break;
739 case '>':
740 type = TokenType.LAMBDA;
741 current++;
742 break;
745 break;
746 case '<':
747 type = TokenType.OP_LT;
748 current++;
749 if (current < end) {
750 switch (current[0]) {
751 case '=':
752 type = TokenType.OP_LE;
753 current++;
754 break;
755 case '<':
756 type = TokenType.OP_SHIFT_LEFT;
757 current++;
758 if (current < end && current[0] == '=') {
759 type = TokenType.ASSIGN_SHIFT_LEFT;
760 current++;
762 break;
765 break;
766 case '>':
767 type = TokenType.OP_GT;
768 current++;
769 if (current < end && current[0] == '=') {
770 type = TokenType.OP_GE;
771 current++;
773 break;
774 case '!':
775 type = TokenType.OP_NEG;
776 current++;
777 if (current < end && current[0] == '=') {
778 type = TokenType.OP_NE;
779 current++;
781 break;
782 case '+':
783 type = TokenType.PLUS;
784 current++;
785 if (current < end) {
786 switch (current[0]) {
787 case '=':
788 type = TokenType.ASSIGN_ADD;
789 current++;
790 break;
791 case '+':
792 type = TokenType.OP_INC;
793 current++;
794 break;
797 break;
798 case '-':
799 type = TokenType.MINUS;
800 current++;
801 if (current < end) {
802 switch (current[0]) {
803 case '=':
804 type = TokenType.ASSIGN_SUB;
805 current++;
806 break;
807 case '-':
808 type = TokenType.OP_DEC;
809 current++;
810 break;
811 case '>':
812 type = TokenType.OP_PTR;
813 current++;
814 break;
817 break;
818 case '*':
819 type = TokenType.STAR;
820 current++;
821 if (current < end && current[0] == '=') {
822 type = TokenType.ASSIGN_MUL;
823 current++;
825 break;
826 case '/':
827 type = TokenType.DIV;
828 current++;
829 if (current < end && current[0] == '=') {
830 type = TokenType.ASSIGN_DIV;
831 current++;
833 break;
834 case '%':
835 type = TokenType.PERCENT;
836 current++;
837 if (current < end && current[0] == '=') {
838 type = TokenType.ASSIGN_PERCENT;
839 current++;
841 break;
842 case '\'':
843 case '"':
844 if (begin[0] == '\'') {
845 type = TokenType.CHARACTER_LITERAL;
846 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
847 type = TokenType.VERBATIM_STRING_LITERAL;
848 token_length_in_chars = 6;
849 current += 3;
850 while (current < end - 4) {
851 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
852 break;
853 } else if (current[0] == '\n') {
854 current++;
855 line++;
856 column = 1;
857 token_length_in_chars = 3;
858 } else {
859 unichar u = ((string) current).get_char_validated ((long) (end - current));
860 if (u != (unichar) (-1)) {
861 current += u.to_utf8 (null);
862 token_length_in_chars++;
863 } else {
864 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
868 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
869 current += 3;
870 } else {
871 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
873 break;
874 } else {
875 type = TokenType.STRING_LITERAL;
877 token_length_in_chars = 2;
878 current++;
879 while (current < end && current[0] != begin[0]) {
880 if (current[0] == '\\') {
881 current++;
882 token_length_in_chars++;
883 if (current >= end) {
884 break;
887 switch (current[0]) {
888 case '\'':
889 case '"':
890 case '\\':
891 case '0':
892 case 'b':
893 case 'f':
894 case 'n':
895 case 'r':
896 case 't':
897 current++;
898 token_length_in_chars++;
899 break;
900 case 'x':
901 // hexadecimal escape character
902 current++;
903 token_length_in_chars++;
904 while (current < end && current[0].isxdigit ()) {
905 current++;
906 token_length_in_chars++;
908 break;
909 default:
910 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
911 break;
913 } else if (current[0] == '\n') {
914 break;
915 } else {
916 unichar u = ((string) current).get_char_validated ((long) (end - current));
917 if (u != (unichar) (-1)) {
918 current += u.to_utf8 (null);
919 token_length_in_chars++;
920 } else {
921 current++;
922 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
926 if (current < end && current[0] != '\n') {
927 current++;
928 } else {
929 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
931 break;
932 default:
933 unichar u = ((string) current).get_char_validated ((long) (end - current));
934 if (u != (unichar) (-1)) {
935 current += u.to_utf8 (null);
936 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
937 } else {
938 current++;
939 Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
941 column++;
942 return read_token (out token_begin, out token_end);
946 if (token_length_in_chars < 0) {
947 column += (int) (current - begin);
948 } else {
949 column += token_length_in_chars;
952 token_end.pos = current;
953 token_end.line = line;
954 token_end.column = column - 1;
956 return type;
959 static bool matches (char* begin, string keyword) {
960 char* keyword_array = (char*) keyword;
961 long len = keyword.len ();
962 for (int i = 0; i < len; i++) {
963 if (begin[i] != keyword_array[i]) {
964 return false;
967 return true;
970 bool pp_whitespace () {
971 bool found = false;
972 while (current < end && current[0].isspace () && current[0] != '\n') {
973 found = true;
974 current++;
975 column++;
977 return found;
980 void pp_directive () {
981 // hash sign
982 current++;
983 column++;
985 pp_whitespace ();
987 char* begin = current;
988 int len = 0;
989 while (current < end && current[0].isalnum ()) {
990 current++;
991 column++;
992 len++;
995 if (len == 2 && matches (begin, "if")) {
996 parse_pp_if ();
997 } else if (len == 4 && matches (begin, "elif")) {
998 parse_pp_elif ();
999 } else if (len == 4 && matches (begin, "else")) {
1000 parse_pp_else ();
1001 } else if (len == 5 && matches (begin, "endif")) {
1002 parse_pp_endif ();
1003 } else {
1004 Report.error (new SourceReference (source_file, line, column - len, line, column), "syntax error, invalid preprocessing directive");
1007 if (conditional_stack.length > 0
1008 && conditional_stack[conditional_stack.length - 1].skip_section) {
1009 // skip lines until next preprocessing directive
1010 bool bol = false;
1011 while (current < end) {
1012 if (bol && current[0] == '#') {
1013 // go back to begin of line
1014 current -= (column - 1);
1015 column = 1;
1016 return;
1018 if (current[0] == '\n') {
1019 line++;
1020 column = 0;
1021 bol = true;
1022 } else if (!current[0].isspace ()) {
1023 bol = false;
1025 current++;
1026 column++;
1031 void pp_eol () {
1032 pp_whitespace ();
1033 if (current >= end || current[0] != '\n') {
1034 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected newline");
1038 void parse_pp_if () {
1039 pp_whitespace ();
1041 bool condition = parse_pp_expression ();
1043 pp_eol ();
1045 conditional_stack += Conditional ();
1047 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1048 // condition true => process code within if
1049 conditional_stack[conditional_stack.length - 1].matched = true;
1050 } else {
1051 // skip lines until next preprocessing directive
1052 conditional_stack[conditional_stack.length - 1].skip_section = true;
1056 void parse_pp_elif () {
1057 pp_whitespace ();
1059 bool condition = parse_pp_expression ();
1061 pp_eol ();
1063 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1064 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #elif");
1065 return;
1068 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1069 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1070 // condition true => process code within if
1071 conditional_stack[conditional_stack.length - 1].matched = true;
1072 conditional_stack[conditional_stack.length - 1].skip_section = false;
1073 } else {
1074 // skip lines until next preprocessing directive
1075 conditional_stack[conditional_stack.length - 1].skip_section = true;
1079 void parse_pp_else () {
1080 pp_eol ();
1082 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1083 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #else");
1084 return;
1087 if (!conditional_stack[conditional_stack.length - 1].matched
1088 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1089 // condition true => process code within if
1090 conditional_stack[conditional_stack.length - 1].matched = true;
1091 conditional_stack[conditional_stack.length - 1].skip_section = false;
1092 } else {
1093 // skip lines until next preprocessing directive
1094 conditional_stack[conditional_stack.length - 1].skip_section = true;
1098 void parse_pp_endif () {
1099 pp_eol ();
1101 if (conditional_stack.length == 0) {
1102 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #endif");
1103 return;
1106 conditional_stack.length--;
1109 bool parse_pp_symbol () {
1110 int len = 0;
1111 while (current < end && is_ident_char (current[0])) {
1112 current++;
1113 column++;
1114 len++;
1117 if (len == 0) {
1118 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1119 return false;
1122 string identifier = ((string) (current - len)).ndup (len);
1123 bool defined;
1124 if (identifier == "true") {
1125 defined = true;
1126 } else if (identifier == "false") {
1127 defined = false;
1128 } else {
1129 defined = source_file.context.is_defined (identifier);
1132 return defined;
1135 bool parse_pp_primary_expression () {
1136 if (current >= end) {
1137 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1138 } else if (is_ident_char (current[0])) {
1139 return parse_pp_symbol ();
1140 } else if (current[0] == '(') {
1141 current++;
1142 column++;
1143 pp_whitespace ();
1144 bool result = parse_pp_expression ();
1145 pp_whitespace ();
1146 if (current < end && current[0] == ')') {
1147 current++;
1148 column++;
1149 } else {
1150 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected `)'");
1152 return result;
1153 } else {
1154 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1156 return false;
1159 bool parse_pp_unary_expression () {
1160 if (current < end && current[0] == '!') {
1161 current++;
1162 column++;
1163 pp_whitespace ();
1164 return !parse_pp_unary_expression ();
1167 return parse_pp_primary_expression ();
1170 bool parse_pp_equality_expression () {
1171 bool left = parse_pp_unary_expression ();
1172 pp_whitespace ();
1173 while (true) {
1174 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1175 current += 2;
1176 column += 2;
1177 pp_whitespace ();
1178 bool right = parse_pp_unary_expression ();
1179 left = (left == right);
1180 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1181 current += 2;
1182 column += 2;
1183 pp_whitespace ();
1184 bool right = parse_pp_unary_expression ();
1185 left = (left != right);
1186 } else {
1187 break;
1190 return left;
1193 bool parse_pp_and_expression () {
1194 bool left = parse_pp_equality_expression ();
1195 pp_whitespace ();
1196 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1197 current += 2;
1198 column += 2;
1199 pp_whitespace ();
1200 bool right = parse_pp_equality_expression ();
1201 left = left && right;
1203 return left;
1206 bool parse_pp_or_expression () {
1207 bool left = parse_pp_and_expression ();
1208 pp_whitespace ();
1209 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1210 current += 2;
1211 column += 2;
1212 pp_whitespace ();
1213 bool right = parse_pp_and_expression ();
1214 left = left || right;
1216 return left;
1219 bool parse_pp_expression () {
1220 return parse_pp_or_expression ();
1223 bool whitespace () {
1224 bool found = false;
1225 bool bol = (column == 1);
1226 while (current < end && current[0].isspace ()) {
1227 if (current[0] == '\n') {
1228 line++;
1229 column = 0;
1230 bol = true;
1232 found = true;
1233 current++;
1234 column++;
1236 if (bol && current < end && current[0] == '#') {
1237 pp_directive ();
1238 return true;
1240 return found;
1243 bool comment (bool file_comment = false) {
1244 if (current > end - 2
1245 || current[0] != '/'
1246 || (current[1] != '/' && current[1] != '*')) {
1247 return false;
1250 if (current[1] == '/') {
1251 SourceReference source_reference = null;
1252 if (file_comment) {
1253 source_reference = new SourceReference (source_file, line, column, line, column);
1256 // single-line comment
1257 current += 2;
1258 char* begin = current;
1260 // skip until end of line or end of file
1261 while (current < end && current[0] != '\n') {
1262 current++;
1265 if (source_reference != null) {
1266 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1268 } else {
1269 SourceReference source_reference = null;
1271 if (file_comment && current[2] == '*') {
1272 return false;
1275 if (current[2] == '*' || file_comment) {
1276 source_reference = new SourceReference (source_file, line, column, line, column);
1279 current += 2;
1281 char* begin = current;
1282 while (current < end - 1
1283 && (current[0] != '*' || current[1] != '/')) {
1284 if (current[0] == '\n') {
1285 line++;
1286 column = 0;
1288 current++;
1289 column++;
1292 if (current == end - 1) {
1293 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
1294 return true;
1297 if (source_reference != null) {
1298 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1301 current += 2;
1302 column += 2;
1305 return true;
1308 void space () {
1309 while (whitespace () || comment ()) {
1313 public void parse_file_comments () {
1314 while (whitespace () || comment (true)) {
1318 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1319 if (comment_item[0] == '*') {
1320 _comment = new Comment (comment_item, source_reference);
1323 if (file_comment) {
1324 source_file.add_comment (new Comment (comment_item, source_reference));
1325 _comment = null;
1330 * Clears and returns the content of the comment stack.
1332 * @return saved comment
1334 public Comment? pop_comment () {
1335 if (_comment == null) {
1336 return null;
1339 var comment = _comment;
1340 _comment = null;
1341 return comment;