codegen: Fix floating reference regression with Variants
[vala-gnome.git] / vala / valascanner.vala
blob718b654717d07b37b7a87fb37360ef0da12296b5
1 /* valascanner.vala
3 * Copyright (C) 2008-2012 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
21 * Jukka-Pekka Iivonen <jp0409@jippii.fi>
24 using GLib;
26 /**
27 * Lexical scanner for Vala source files.
29 public class Vala.Scanner {
30 public SourceFile source_file { get; private set; }
32 TokenType previous;
33 char* current;
34 char* end;
36 int line;
37 int column;
39 Comment _comment;
41 Conditional[] conditional_stack;
43 struct Conditional {
44 public bool matched;
45 public bool else_found;
46 public bool skip_section;
49 State[] state_stack;
51 enum State {
52 PARENS,
53 BRACE,
54 BRACKET,
55 TEMPLATE,
56 TEMPLATE_PART,
57 REGEX_LITERAL
60 public Scanner (SourceFile source_file) {
61 this.source_file = source_file;
63 char* begin = source_file.get_mapped_contents ();
64 end = begin + source_file.get_mapped_length ();
66 current = begin;
68 line = 1;
69 column = 1;
72 public void seek (SourceLocation location) {
73 current = location.pos;
74 line = location.line;
75 column = location.column;
77 conditional_stack = null;
78 state_stack = null;
81 bool in_template () {
82 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
85 bool in_template_part () {
86 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
89 bool in_regex_literal () {
90 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
93 bool is_ident_char (char c) {
94 return (c.isalnum () || c == '_');
97 SourceReference get_source_reference (int offset, int length = 0) {
98 return new SourceReference (source_file, SourceLocation (current, line, column + offset), SourceLocation (current + length, line, column + offset + length));
101 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
102 TokenType type;
103 char* begin = current;
104 token_begin = SourceLocation (begin, line, column);
106 int token_length_in_chars = -1;
108 if (current >= end) {
109 type = TokenType.EOF;
110 } else {
111 switch (current[0]) {
112 case '/':
113 type = TokenType.CLOSE_REGEX_LITERAL;
114 current++;
115 state_stack.length--;
116 var fl_i = false;
117 var fl_s = false;
118 var fl_m = false;
119 var fl_x = false;
120 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
121 switch (current[0]) {
122 case 'i':
123 if (fl_i) {
124 Report.error (get_source_reference (token_length_in_chars), "modifier 'i' used more than once");
126 fl_i = true;
127 break;
128 case 's':
129 if (fl_s) {
130 Report.error (get_source_reference (token_length_in_chars), "modifier 's' used more than once");
132 fl_s = true;
133 break;
134 case 'm':
135 if (fl_m) {
136 Report.error (get_source_reference (token_length_in_chars), "modifier 'm' used more than once");
138 fl_m = true;
139 break;
140 case 'x':
141 if (fl_x) {
142 Report.error (get_source_reference (token_length_in_chars), "modifier 'x' used more than once");
144 fl_x = true;
145 break;
147 current++;
148 token_length_in_chars++;
150 break;
151 default:
152 type = TokenType.REGEX_LITERAL;
153 token_length_in_chars = 0;
154 while (current < end && current[0] != '/') {
155 if (current[0] == '\\') {
156 current++;
157 token_length_in_chars++;
158 if (current >= end) {
159 break;
162 switch (current[0]) {
163 case '\'':
164 case '"':
165 case '\\':
166 case '/':
167 case '^':
168 case '$':
169 case '.':
170 case '[':
171 case ']':
172 case '{':
173 case '}':
174 case '(':
175 case ')':
176 case '?':
177 case '*':
178 case '+':
179 case '-':
180 case '#':
181 case '&':
182 case '~':
183 case ':':
184 case ';':
185 case '<':
186 case '>':
187 case '|':
188 case '%':
189 case '=':
190 case '@':
191 case '0':
192 case 'b':
193 case 'B':
194 case 'f':
195 case 'n':
196 case 'N':
197 case 'r':
198 case 'R':
199 case 't':
200 case 'v':
201 case 'a':
202 case 'A':
203 case 'p':
204 case 'P':
205 case 'e':
206 case 'd':
207 case 'D':
208 case 's':
209 case 'S':
210 case 'w':
211 case 'W':
212 case 'G':
213 case 'z':
214 case 'Z':
215 current++;
216 token_length_in_chars++;
217 break;
218 case 'u':
219 // u escape character has four hex digits
220 current++;
221 token_length_in_chars++;
222 int digit_length;
223 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
224 current++;
225 token_length_in_chars++;
227 if (digit_length != 4) {
228 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
230 break;
231 case 'x':
232 // hexadecimal escape character requires two hex digits
233 current++;
234 token_length_in_chars++;
235 int digit_length;
236 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
237 current++;
238 token_length_in_chars++;
240 if (digit_length < 1) {
241 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
243 break;
244 default:
245 // back references \1 through \99
246 if (current[0].isdigit ()) {
247 current++;
248 token_length_in_chars++;
249 if (current[0].isdigit ()) {
250 current++;
251 token_length_in_chars++;
253 } else {
254 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
256 break;
258 } else if (current[0] == '\n') {
259 break;
260 } else {
261 unichar u = ((string) current).get_char_validated ((long) (end - current));
262 if (u != (unichar) (-1)) {
263 current += u.to_utf8 (null);
264 token_length_in_chars++;
265 } else {
266 current++;
267 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
271 if (current >= end || current[0] == '\n') {
272 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
273 state_stack.length--;
274 return read_token (out token_begin, out token_end);
276 break;
280 if (token_length_in_chars < 0) {
281 column += (int) (current - begin);
282 } else {
283 column += token_length_in_chars;
286 token_end = SourceLocation (current, line, column - 1);
288 return type;
291 public static TokenType get_identifier_or_keyword (char* begin, int len) {
292 switch (len) {
293 case 2:
294 switch (begin[0]) {
295 case 'a':
296 if (matches (begin, "as")) return TokenType.AS;
297 break;
298 case 'd':
299 if (matches (begin, "do")) return TokenType.DO;
300 break;
301 case 'i':
302 switch (begin[1]) {
303 case 'f':
304 return TokenType.IF;
305 case 'n':
306 return TokenType.IN;
307 case 's':
308 return TokenType.IS;
310 break;
312 break;
313 case 3:
314 switch (begin[0]) {
315 case 'f':
316 if (matches (begin, "for")) return TokenType.FOR;
317 break;
318 case 'g':
319 if (matches (begin, "get")) return TokenType.GET;
320 break;
321 case 'n':
322 if (matches (begin, "new")) return TokenType.NEW;
323 break;
324 case 'o':
325 if (matches (begin, "out")) return TokenType.OUT;
326 break;
327 case 'r':
328 if (matches (begin, "ref")) return TokenType.REF;
329 break;
330 case 's':
331 if (matches (begin, "set")) return TokenType.SET;
332 break;
333 case 't':
334 if (matches (begin, "try")) return TokenType.TRY;
335 break;
336 case 'v':
337 if (matches (begin, "var")) return TokenType.VAR;
338 break;
340 break;
341 case 4:
342 switch (begin[0]) {
343 case 'b':
344 if (matches (begin, "base")) return TokenType.BASE;
345 break;
346 case 'c':
347 if (matches (begin, "case")) return TokenType.CASE;
348 break;
349 case 'e':
350 switch (begin[1]) {
351 case 'l':
352 if (matches (begin, "else")) return TokenType.ELSE;
353 break;
354 case 'n':
355 if (matches (begin, "enum")) return TokenType.ENUM;
356 break;
358 break;
359 case 'l':
360 if (matches (begin, "lock")) return TokenType.LOCK;
361 break;
362 case 'n':
363 if (matches (begin, "null")) return TokenType.NULL;
364 break;
365 case 't':
366 switch (begin[1]) {
367 case 'h':
368 if (matches (begin, "this")) return TokenType.THIS;
369 break;
370 case 'r':
371 if (matches (begin, "true")) return TokenType.TRUE;
372 break;
374 break;
375 case 'v':
376 if (matches (begin, "void")) return TokenType.VOID;
377 break;
378 case 'w':
379 if (matches (begin, "weak")) return TokenType.WEAK;
380 break;
382 break;
383 case 5:
384 switch (begin[0]) {
385 case 'a':
386 if (matches (begin, "async")) return TokenType.ASYNC;
387 break;
388 case 'b':
389 if (matches (begin, "break")) return TokenType.BREAK;
390 break;
391 case 'c':
392 switch (begin[1]) {
393 case 'a':
394 if (matches (begin, "catch")) return TokenType.CATCH;
395 break;
396 case 'l':
397 if (matches (begin, "class")) return TokenType.CLASS;
398 break;
399 case 'o':
400 if (matches (begin, "const")) return TokenType.CONST;
401 break;
403 break;
404 case 'f':
405 if (matches (begin, "false")) return TokenType.FALSE;
406 break;
407 case 'o':
408 if (matches (begin, "owned")) return TokenType.OWNED;
409 break;
410 case 't':
411 if (matches (begin, "throw")) return TokenType.THROW;
412 break;
413 case 'u':
414 if (matches (begin, "using")) return TokenType.USING;
415 break;
416 case 'w':
417 if (matches (begin, "while")) return TokenType.WHILE;
418 break;
419 case 'y':
420 if (matches (begin, "yield")) return TokenType.YIELD;
421 break;
423 break;
424 case 6:
425 switch (begin[0]) {
426 case 'd':
427 if (matches (begin, "delete")) return TokenType.DELETE;
428 break;
429 case 'e':
430 if (matches (begin, "extern")) return TokenType.EXTERN;
431 break;
432 case 'i':
433 if (matches (begin, "inline")) return TokenType.INLINE;
434 break;
435 case 'p':
436 switch (begin[1]) {
437 case 'a':
438 if (matches (begin, "params")) return TokenType.PARAMS;
439 break;
440 case 'u':
441 if (matches (begin, "public")) return TokenType.PUBLIC;
442 break;
444 break;
445 case 'r':
446 if (matches (begin, "return")) return TokenType.RETURN;
447 break;
448 case 's':
449 switch (begin[1]) {
450 case 'e':
451 if (matches (begin, "sealed")) return TokenType.SEALED;
452 break;
453 case 'i':
454 switch (begin[2]) {
455 case 'g':
456 if (matches (begin, "signal")) return TokenType.SIGNAL;
457 break;
458 case 'z':
459 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
460 break;
462 break;
463 case 't':
464 switch (begin[2]) {
465 case 'a':
466 if (matches (begin, "static")) return TokenType.STATIC;
467 break;
468 case 'r':
469 if (matches (begin, "struct")) return TokenType.STRUCT;
470 break;
472 break;
473 case 'w':
474 if (matches (begin, "switch")) return TokenType.SWITCH;
475 break;
477 break;
478 case 't':
479 switch (begin[1]) {
480 case 'h':
481 if (matches (begin, "throws")) return TokenType.THROWS;
482 break;
483 case 'y':
484 if (matches (begin, "typeof")) return TokenType.TYPEOF;
485 break;
487 break;
488 case 'u':
489 if (matches (begin, "unlock")) return TokenType.UNLOCK;
490 break;
492 break;
493 case 7:
494 switch (begin[0]) {
495 case 'd':
496 switch (begin[1]) {
497 case 'e':
498 if (matches (begin, "default")) return TokenType.DEFAULT;
499 break;
500 case 'y':
501 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
502 break;
504 break;
505 case 'e':
506 if (matches (begin, "ensures")) return TokenType.ENSURES;
507 break;
508 case 'f':
509 switch (begin[1]) {
510 case 'i':
511 if (matches (begin, "finally")) return TokenType.FINALLY;
512 break;
513 case 'o':
514 if (matches (begin, "foreach")) return TokenType.FOREACH;
515 break;
517 break;
518 case 'p':
519 if (matches (begin, "private")) return TokenType.PRIVATE;
520 break;
521 case 'u':
522 if (matches (begin, "unowned")) return TokenType.UNOWNED;
523 break;
524 case 'v':
525 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
526 break;
528 break;
529 case 8:
530 switch (begin[0]) {
531 case 'a':
532 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
533 break;
534 case 'c':
535 if (matches (begin, "continue")) return TokenType.CONTINUE;
536 break;
537 case 'd':
538 if (matches (begin, "delegate")) return TokenType.DELEGATE;
539 break;
540 case 'i':
541 if (matches (begin, "internal")) return TokenType.INTERNAL;
542 break;
543 case 'o':
544 if (matches (begin, "override")) return TokenType.OVERRIDE;
545 break;
546 case 'r':
547 if (matches (begin, "requires")) return TokenType.REQUIRES;
548 break;
549 case 'v':
550 if (matches (begin, "volatile")) return TokenType.VOLATILE;
551 break;
553 break;
554 case 9:
555 switch (begin[0]) {
556 case 'c':
557 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
558 break;
559 case 'i':
560 if (matches (begin, "interface")) return TokenType.INTERFACE;
561 break;
562 case 'n':
563 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
564 break;
565 case 'p':
566 if (matches (begin, "protected")) return TokenType.PROTECTED;
567 break;
569 break;
570 case 11:
571 if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
572 break;
574 return TokenType.IDENTIFIER;
577 TokenType read_number () {
578 var type = TokenType.INTEGER_LITERAL;
580 // integer part
581 if (current < end - 2 && current[0] == '0'
582 && current[1] == 'x' && current[2].isxdigit ()) {
583 // hexadecimal integer literal
584 current += 2;
585 while (current < end && current[0].isxdigit ()) {
586 current++;
588 } else {
589 // decimal number
590 while (current < end && current[0].isdigit ()) {
591 current++;
595 // fractional part
596 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
597 type = TokenType.REAL_LITERAL;
598 current++;
599 while (current < end && current[0].isdigit ()) {
600 current++;
604 // exponent part
605 if (current < end && current[0].tolower () == 'e') {
606 type = TokenType.REAL_LITERAL;
607 current++;
608 if (current < end && (current[0] == '+' || current[0] == '-')) {
609 current++;
611 while (current < end && current[0].isdigit ()) {
612 current++;
616 // type suffix
617 if (current < end) {
618 bool real_literal = (type == TokenType.REAL_LITERAL);
620 switch (current[0]) {
621 case 'l':
622 case 'L':
623 if (type == TokenType.INTEGER_LITERAL) {
624 current++;
625 if (current < end && current[0].tolower () == 'l') {
626 current++;
629 break;
630 case 'u':
631 case 'U':
632 if (type == TokenType.INTEGER_LITERAL) {
633 current++;
634 if (current < end && current[0].tolower () == 'l') {
635 current++;
636 if (current < end && current[0].tolower () == 'l') {
637 current++;
641 break;
642 case 'f':
643 case 'F':
644 case 'd':
645 case 'D':
646 type = TokenType.REAL_LITERAL;
647 current++;
648 break;
651 if (!real_literal && is_ident_char (current[0])) {
652 // allow identifiers to start with a digit
653 // as long as they contain at least one char
654 while (current < end && is_ident_char (current[0])) {
655 current++;
657 type = TokenType.IDENTIFIER;
661 return type;
664 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
665 TokenType type;
666 char* begin = current;
667 token_begin = SourceLocation (begin, line, column);
669 int token_length_in_chars = -1;
671 if (current >= end) {
672 type = TokenType.EOF;
673 } else {
674 switch (current[0]) {
675 case '"':
676 type = TokenType.CLOSE_TEMPLATE;
677 current++;
678 state_stack.length--;
679 break;
680 case '$':
681 token_begin.pos++; // $ is not part of following token
682 current++;
683 if (current[0].isalpha () || current[0] == '_') {
684 int len = 0;
685 while (current < end && is_ident_char (current[0])) {
686 current++;
687 len++;
689 type = TokenType.IDENTIFIER;
690 state_stack += State.TEMPLATE_PART;
691 } else if (current[0] == '(') {
692 current++;
693 column += 2;
694 state_stack += State.PARENS;
695 return read_token (out token_begin, out token_end);
696 } else if (current[0] == '$') {
697 type = TokenType.TEMPLATE_STRING_LITERAL;
698 current++;
699 state_stack += State.TEMPLATE_PART;
700 } else {
701 Report.error (get_source_reference (1), "unexpected character");
702 return read_template_token (out token_begin, out token_end);
704 break;
705 default:
706 type = TokenType.TEMPLATE_STRING_LITERAL;
707 token_length_in_chars = 0;
708 while (current < end && current[0] != '"' && current[0] != '$') {
709 if (current[0] == '\\') {
710 current++;
711 token_length_in_chars++;
712 if (current >= end) {
713 break;
716 switch (current[0]) {
717 case '\'':
718 case '"':
719 case '\\':
720 case '0':
721 case 'b':
722 case 'f':
723 case 'n':
724 case 'r':
725 case 't':
726 case 'v':
727 current++;
728 token_length_in_chars++;
729 break;
730 case 'u':
731 // u escape character has four hex digits
732 current++;
733 token_length_in_chars++;
734 int digit_length;
735 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
736 current++;
737 token_length_in_chars++;
739 if (digit_length != 4) {
740 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
742 break;
743 case 'x':
744 // hexadecimal escape character requires two hex digits
745 current++;
746 token_length_in_chars++;
747 int digit_length;
748 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
749 current++;
750 token_length_in_chars++;
752 if (digit_length < 1) {
753 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
755 break;
756 default:
757 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
758 break;
760 } else if (current[0] == '\n') {
761 current++;
762 line++;
763 column = 1;
764 token_length_in_chars = 1;
765 } else {
766 unichar u = ((string) current).get_char_validated ((long) (end - current));
767 if (u != (unichar) (-1)) {
768 current += u.to_utf8 (null);
769 token_length_in_chars++;
770 } else {
771 current++;
772 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
776 if (current >= end) {
777 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"");
778 state_stack.length--;
779 return read_token (out token_begin, out token_end);
781 state_stack += State.TEMPLATE_PART;
782 break;
786 if (token_length_in_chars < 0) {
787 column += (int) (current - begin);
788 } else {
789 column += token_length_in_chars;
792 token_end = SourceLocation (current, line, column - 1);
794 return type;
797 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
798 if (in_template ()) {
799 return read_template_token (out token_begin, out token_end);
800 } else if (in_template_part ()) {
801 state_stack.length--;
803 token_begin = SourceLocation (current, line, column);
804 token_end = SourceLocation (current, line, column - 1);
806 return TokenType.COMMA;
807 } else if (in_regex_literal ()) {
808 return read_regex_token (out token_begin, out token_end);
811 space ();
813 TokenType type;
814 char* begin = current;
815 token_begin = SourceLocation (begin, line, column);
817 int token_length_in_chars = -1;
819 if (current >= end) {
820 type = TokenType.EOF;
821 } else if (current[0].isalpha () || current[0] == '_') {
822 int len = 0;
823 while (current < end && is_ident_char (current[0])) {
824 current++;
825 len++;
827 type = get_identifier_or_keyword (begin, len);
828 } else if (current[0] == '@') {
829 if (current < end - 1 && current[1] == '"') {
830 type = TokenType.OPEN_TEMPLATE;
831 current += 2;
832 state_stack += State.TEMPLATE;
833 } else {
834 token_begin.pos++; // @ is not part of the identifier
835 current++;
836 int len = 0;
837 while (current < end && is_ident_char (current[0])) {
838 current++;
839 len++;
841 type = TokenType.IDENTIFIER;
843 } else if (current[0].isdigit ()) {
844 type = read_number ();
845 } else {
846 switch (current[0]) {
847 case '{':
848 type = TokenType.OPEN_BRACE;
849 current++;
850 state_stack += State.BRACE;
851 break;
852 case '}':
853 type = TokenType.CLOSE_BRACE;
854 current++;
855 if (state_stack.length > 0) {
856 state_stack.length--;
858 break;
859 case '(':
860 type = TokenType.OPEN_PARENS;
861 current++;
862 state_stack += State.PARENS;
863 break;
864 case ')':
865 type = TokenType.CLOSE_PARENS;
866 current++;
867 if (state_stack.length > 0) {
868 state_stack.length--;
870 if (in_template ()) {
871 type = TokenType.COMMA;
873 break;
874 case '[':
875 type = TokenType.OPEN_BRACKET;
876 current++;
877 state_stack += State.BRACKET;
878 break;
879 case ']':
880 type = TokenType.CLOSE_BRACKET;
881 current++;
882 if (state_stack.length > 0) {
883 state_stack.length--;
885 break;
886 case '.':
887 type = TokenType.DOT;
888 current++;
889 if (current < end - 1) {
890 if (current[0] == '.' && current[1] == '.') {
891 type = TokenType.ELLIPSIS;
892 current += 2;
895 break;
896 case ':':
897 type = TokenType.COLON;
898 current++;
899 if (current < end && current[0] == ':') {
900 type = TokenType.DOUBLE_COLON;
901 current++;
903 break;
904 case ',':
905 type = TokenType.COMMA;
906 current++;
907 break;
908 case ';':
909 type = TokenType.SEMICOLON;
910 current++;
911 break;
912 case '#':
913 type = TokenType.HASH;
914 current++;
915 break;
916 case '?':
917 type = TokenType.INTERR;
918 current++;
919 if (current < end && current[0] == '?') {
920 type = TokenType.OP_COALESCING;
921 current++;
923 break;
924 case '|':
925 type = TokenType.BITWISE_OR;
926 current++;
927 if (current < end) {
928 switch (current[0]) {
929 case '=':
930 type = TokenType.ASSIGN_BITWISE_OR;
931 current++;
932 break;
933 case '|':
934 type = TokenType.OP_OR;
935 current++;
936 break;
939 break;
940 case '&':
941 type = TokenType.BITWISE_AND;
942 current++;
943 if (current < end) {
944 switch (current[0]) {
945 case '=':
946 type = TokenType.ASSIGN_BITWISE_AND;
947 current++;
948 break;
949 case '&':
950 type = TokenType.OP_AND;
951 current++;
952 break;
955 break;
956 case '^':
957 type = TokenType.CARRET;
958 current++;
959 if (current < end && current[0] == '=') {
960 type = TokenType.ASSIGN_BITWISE_XOR;
961 current++;
963 break;
964 case '~':
965 type = TokenType.TILDE;
966 current++;
967 break;
968 case '=':
969 type = TokenType.ASSIGN;
970 current++;
971 if (current < end) {
972 switch (current[0]) {
973 case '=':
974 type = TokenType.OP_EQ;
975 current++;
976 break;
977 case '>':
978 type = TokenType.LAMBDA;
979 current++;
980 break;
983 break;
984 case '<':
985 type = TokenType.OP_LT;
986 current++;
987 if (current < end) {
988 switch (current[0]) {
989 case '=':
990 type = TokenType.OP_LE;
991 current++;
992 break;
993 case '<':
994 type = TokenType.OP_SHIFT_LEFT;
995 current++;
996 if (current < end && current[0] == '=') {
997 type = TokenType.ASSIGN_SHIFT_LEFT;
998 current++;
1000 break;
1003 break;
1004 case '>':
1005 type = TokenType.OP_GT;
1006 current++;
1007 if (current < end && current[0] == '=') {
1008 type = TokenType.OP_GE;
1009 current++;
1011 break;
1012 case '!':
1013 type = TokenType.OP_NEG;
1014 current++;
1015 if (current < end && current[0] == '=') {
1016 type = TokenType.OP_NE;
1017 current++;
1019 break;
1020 case '+':
1021 type = TokenType.PLUS;
1022 current++;
1023 if (current < end) {
1024 switch (current[0]) {
1025 case '=':
1026 type = TokenType.ASSIGN_ADD;
1027 current++;
1028 break;
1029 case '+':
1030 type = TokenType.OP_INC;
1031 current++;
1032 break;
1035 break;
1036 case '-':
1037 type = TokenType.MINUS;
1038 current++;
1039 if (current < end) {
1040 switch (current[0]) {
1041 case '=':
1042 type = TokenType.ASSIGN_SUB;
1043 current++;
1044 break;
1045 case '-':
1046 type = TokenType.OP_DEC;
1047 current++;
1048 break;
1049 case '>':
1050 type = TokenType.OP_PTR;
1051 current++;
1052 break;
1055 break;
1056 case '*':
1057 type = TokenType.STAR;
1058 current++;
1059 if (current < end && current[0] == '=') {
1060 type = TokenType.ASSIGN_MUL;
1061 current++;
1063 break;
1064 case '/':
1065 switch (previous) {
1066 case TokenType.ASSIGN:
1067 case TokenType.COMMA:
1068 case TokenType.MINUS:
1069 case TokenType.OP_AND:
1070 case TokenType.OP_COALESCING:
1071 case TokenType.OP_EQ:
1072 case TokenType.OP_GE:
1073 case TokenType.OP_GT:
1074 case TokenType.OP_LE:
1075 case TokenType.OP_LT:
1076 case TokenType.OP_NE:
1077 case TokenType.OP_NEG:
1078 case TokenType.OP_OR:
1079 case TokenType.OPEN_BRACE:
1080 case TokenType.OPEN_PARENS:
1081 case TokenType.PLUS:
1082 case TokenType.RETURN:
1083 type = TokenType.OPEN_REGEX_LITERAL;
1084 state_stack += State.REGEX_LITERAL;
1085 current++;
1086 break;
1087 default:
1088 type = TokenType.DIV;
1089 current++;
1090 if (current < end && current[0] == '=') {
1091 type = TokenType.ASSIGN_DIV;
1092 current++;
1094 break;
1096 break;
1097 case '%':
1098 type = TokenType.PERCENT;
1099 current++;
1100 if (current < end && current[0] == '=') {
1101 type = TokenType.ASSIGN_PERCENT;
1102 current++;
1104 break;
1105 case '\'':
1106 case '"':
1107 if (begin[0] == '\'') {
1108 type = TokenType.CHARACTER_LITERAL;
1109 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1110 type = TokenType.VERBATIM_STRING_LITERAL;
1111 token_length_in_chars = 6;
1112 current += 3;
1113 while (current < end - 4) {
1114 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1115 break;
1116 } else if (current[0] == '\n') {
1117 current++;
1118 line++;
1119 column = 1;
1120 token_length_in_chars = 3;
1121 } else {
1122 unichar u = ((string) current).get_char_validated ((long) (end - current));
1123 if (u != (unichar) (-1)) {
1124 current += u.to_utf8 (null);
1125 token_length_in_chars++;
1126 } else {
1127 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1131 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1132 current += 3;
1133 } else {
1134 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected \"\"\"");
1136 break;
1137 } else {
1138 type = TokenType.STRING_LITERAL;
1140 token_length_in_chars = 2;
1141 current++;
1142 while (current < end && current[0] != begin[0]) {
1143 if (current[0] == '\\') {
1144 current++;
1145 token_length_in_chars++;
1146 if (current >= end) {
1147 break;
1150 switch (current[0]) {
1151 case '\'':
1152 case '"':
1153 case '\\':
1154 case '0':
1155 case 'b':
1156 case 'f':
1157 case 'n':
1158 case 'r':
1159 case 't':
1160 case 'v':
1161 case '$':
1162 current++;
1163 token_length_in_chars++;
1164 break;
1165 case 'u':
1166 // u escape character has four hex digits
1167 current++;
1168 token_length_in_chars++;
1169 int digit_length;
1170 for (digit_length = 0; digit_length < 4 && current < end && current[0].isxdigit (); digit_length++) {
1171 current++;
1172 token_length_in_chars++;
1174 if (digit_length != 4) {
1175 Report.error (get_source_reference (token_length_in_chars), "\\u requires four hex digits");
1177 break;
1178 case 'x':
1179 // hexadecimal escape character requires two hex digits
1180 current++;
1181 token_length_in_chars++;
1182 int digit_length;
1183 for (digit_length = 0; current < end && current[0].isxdigit (); digit_length++) {
1184 current++;
1185 token_length_in_chars++;
1187 if (digit_length < 1) {
1188 Report.error (get_source_reference (token_length_in_chars), "\\x requires at least one hex digit");
1190 break;
1191 default:
1192 Report.error (get_source_reference (token_length_in_chars), "invalid escape sequence");
1193 break;
1195 } else if (current[0] == '\n') {
1196 current++;
1197 line++;
1198 column = 1;
1199 token_length_in_chars = 1;
1200 } else {
1201 unichar u = ((string) current).get_char_validated ((long) (end - current));
1202 if (u != (unichar) (-1)) {
1203 current += u.to_utf8 (null);
1204 token_length_in_chars++;
1205 } else {
1206 current++;
1207 Report.error (get_source_reference (token_length_in_chars), "invalid UTF-8 character");
1210 if (current < end && begin[0] == '\'' && current[0] != '\'') {
1211 // multiple characters in single character literal
1212 Report.error (get_source_reference (token_length_in_chars), "invalid character literal");
1215 if (current < end) {
1216 current++;
1217 } else {
1218 Report.error (get_source_reference (token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1220 break;
1221 default:
1222 unichar u = ((string) current).get_char_validated ((long) (end - current));
1223 if (u != (unichar) (-1)) {
1224 current += u.to_utf8 (null);
1225 Report.error (get_source_reference (0), "syntax error, unexpected character");
1226 } else {
1227 current++;
1228 Report.error (get_source_reference (0), "invalid UTF-8 character");
1230 column++;
1231 return read_token (out token_begin, out token_end);
1235 if (token_length_in_chars < 0) {
1236 column += (int) (current - begin);
1237 } else {
1238 column += token_length_in_chars;
1241 token_end = SourceLocation (current, line, column - 1);
1242 previous = type;
1244 return type;
1247 static bool matches (char* begin, string keyword) {
1248 char* keyword_array = (char*) keyword;
1249 long len = keyword.length;
1250 for (int i = 0; i < len; i++) {
1251 if (begin[i] != keyword_array[i]) {
1252 return false;
1255 return true;
1258 bool pp_whitespace () {
1259 bool found = false;
1260 while (current < end && current[0].isspace () && current[0] != '\n') {
1261 found = true;
1262 current++;
1263 column++;
1265 return found;
1268 void pp_space () {
1269 while (pp_whitespace () || comment ()) {
1273 void pp_directive () {
1274 // hash sign
1275 current++;
1276 column++;
1278 if (line == 1 && column == 2 && current < end && current[0] == '!') {
1279 // hash bang: #!
1280 // skip until end of line or end of file
1281 while (current < end && current[0] != '\n') {
1282 current++;
1284 return;
1287 pp_space ();
1289 char* begin = current;
1290 int len = 0;
1291 while (current < end && current[0].isalnum ()) {
1292 current++;
1293 column++;
1294 len++;
1297 if (len == 2 && matches (begin, "if")) {
1298 parse_pp_if ();
1299 } else if (len == 4 && matches (begin, "elif")) {
1300 parse_pp_elif ();
1301 } else if (len == 4 && matches (begin, "else")) {
1302 parse_pp_else ();
1303 } else if (len == 5 && matches (begin, "endif")) {
1304 parse_pp_endif ();
1305 } else {
1306 Report.error (get_source_reference (-len, len), "syntax error, invalid preprocessing directive");
1309 if (conditional_stack.length > 0
1310 && conditional_stack[conditional_stack.length - 1].skip_section) {
1311 // skip lines until next preprocessing directive
1312 bool bol = false;
1313 while (current < end) {
1314 if (bol && current[0] == '#') {
1315 // go back to begin of line
1316 current -= (column - 1);
1317 column = 1;
1318 return;
1320 if (current[0] == '\n') {
1321 line++;
1322 column = 0;
1323 bol = true;
1324 } else if (!current[0].isspace ()) {
1325 bol = false;
1327 current++;
1328 column++;
1333 void pp_eol () {
1334 pp_space ();
1335 if (current >= end || current[0] != '\n') {
1336 Report.error (get_source_reference (0), "syntax error, expected newline");
1340 void parse_pp_if () {
1341 pp_space ();
1343 bool condition = parse_pp_expression ();
1345 pp_eol ();
1347 conditional_stack += Conditional ();
1349 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1350 // condition true => process code within if
1351 conditional_stack[conditional_stack.length - 1].matched = true;
1352 } else {
1353 // skip lines until next preprocessing directive
1354 conditional_stack[conditional_stack.length - 1].skip_section = true;
1358 void parse_pp_elif () {
1359 pp_space ();
1361 bool condition = parse_pp_expression ();
1363 pp_eol ();
1365 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1366 Report.error (get_source_reference (0), "syntax error, unexpected #elif");
1367 return;
1370 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1371 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1372 // condition true => process code within if
1373 conditional_stack[conditional_stack.length - 1].matched = true;
1374 conditional_stack[conditional_stack.length - 1].skip_section = false;
1375 } else {
1376 // skip lines until next preprocessing directive
1377 conditional_stack[conditional_stack.length - 1].skip_section = true;
1381 void parse_pp_else () {
1382 pp_eol ();
1384 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1385 Report.error (get_source_reference (0), "syntax error, unexpected #else");
1386 return;
1389 if (!conditional_stack[conditional_stack.length - 1].matched
1390 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1391 // condition true => process code within if
1392 conditional_stack[conditional_stack.length - 1].matched = true;
1393 conditional_stack[conditional_stack.length - 1].skip_section = false;
1394 } else {
1395 // skip lines until next preprocessing directive
1396 conditional_stack[conditional_stack.length - 1].skip_section = true;
1400 void parse_pp_endif () {
1401 pp_eol ();
1403 if (conditional_stack.length == 0) {
1404 Report.error (get_source_reference (0), "syntax error, unexpected #endif");
1405 return;
1408 conditional_stack.length--;
1411 bool parse_pp_symbol () {
1412 int len = 0;
1413 while (current < end && is_ident_char (current[0])) {
1414 current++;
1415 column++;
1416 len++;
1419 if (len == 0) {
1420 Report.error (get_source_reference (0), "syntax error, expected identifier");
1421 return false;
1424 string identifier = ((string) (current - len)).substring (0, len);
1425 bool defined;
1426 if (identifier == "true") {
1427 defined = true;
1428 } else if (identifier == "false") {
1429 defined = false;
1430 } else {
1431 defined = source_file.context.is_defined (identifier);
1434 return defined;
1437 bool parse_pp_primary_expression () {
1438 if (current >= end) {
1439 Report.error (get_source_reference (0), "syntax error, expected identifier");
1440 } else if (is_ident_char (current[0])) {
1441 return parse_pp_symbol ();
1442 } else if (current[0] == '(') {
1443 current++;
1444 column++;
1445 pp_space ();
1446 bool result = parse_pp_expression ();
1447 pp_space ();
1448 if (current < end && current[0] == ')') {
1449 current++;
1450 column++;
1451 } else {
1452 Report.error (get_source_reference (0), "syntax error, expected `)'");
1454 return result;
1455 } else {
1456 Report.error (get_source_reference (0), "syntax error, expected identifier");
1458 return false;
1461 bool parse_pp_unary_expression () {
1462 if (current < end && current[0] == '!') {
1463 current++;
1464 column++;
1465 pp_space ();
1466 return !parse_pp_unary_expression ();
1469 return parse_pp_primary_expression ();
1472 bool parse_pp_equality_expression () {
1473 bool left = parse_pp_unary_expression ();
1474 pp_space ();
1475 while (true) {
1476 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1477 current += 2;
1478 column += 2;
1479 pp_space ();
1480 bool right = parse_pp_unary_expression ();
1481 left = (left == right);
1482 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1483 current += 2;
1484 column += 2;
1485 pp_space ();
1486 bool right = parse_pp_unary_expression ();
1487 left = (left != right);
1488 } else {
1489 break;
1492 return left;
1495 bool parse_pp_and_expression () {
1496 bool left = parse_pp_equality_expression ();
1497 pp_space ();
1498 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1499 current += 2;
1500 column += 2;
1501 pp_space ();
1502 bool right = parse_pp_equality_expression ();
1503 left = left && right;
1505 return left;
1508 bool parse_pp_or_expression () {
1509 bool left = parse_pp_and_expression ();
1510 pp_space ();
1511 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1512 current += 2;
1513 column += 2;
1514 pp_space ();
1515 bool right = parse_pp_and_expression ();
1516 left = left || right;
1518 return left;
1521 bool parse_pp_expression () {
1522 return parse_pp_or_expression ();
1525 bool whitespace () {
1526 bool found = false;
1527 bool bol = (column == 1);
1528 while (current < end && current[0].isspace ()) {
1529 if (current[0] == '\n') {
1530 line++;
1531 column = 0;
1532 bol = true;
1534 found = true;
1535 current++;
1536 column++;
1538 if (bol && current < end && current[0] == '#') {
1539 pp_directive ();
1540 return true;
1542 return found;
1545 bool comment (bool file_comment = false) {
1546 if (current == null
1547 || current > end - 2
1548 || current[0] != '/'
1549 || (current[1] != '/' && current[1] != '*')) {
1550 return false;
1553 if (current[1] == '/') {
1554 SourceReference source_reference = null;
1555 if (file_comment) {
1556 source_reference = get_source_reference (0);
1559 // single-line comment
1560 current += 2;
1561 char* begin = current;
1563 // skip until end of line or end of file
1564 while (current < end && current[0] != '\n') {
1565 current++;
1568 if (source_reference != null) {
1569 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1571 } else {
1572 SourceReference source_reference = null;
1574 if (file_comment && current[2] == '*') {
1575 return false;
1578 if (current[2] == '*' || file_comment) {
1579 source_reference = get_source_reference (0);
1582 current += 2;
1583 column += 2;
1585 char* begin = current;
1586 while (current < end - 1
1587 && (current[0] != '*' || current[1] != '/')) {
1588 if (current[0] == '\n') {
1589 line++;
1590 column = 0;
1592 current++;
1593 column++;
1596 if (current == end - 1) {
1597 Report.error (get_source_reference (0), "syntax error, expected */");
1598 return true;
1601 if (source_reference != null) {
1602 push_comment (((string) begin).substring (0, (long) (current - begin)), source_reference, file_comment);
1605 current += 2;
1606 column += 2;
1609 return true;
1612 void space () {
1613 while (whitespace () || comment ()) {
1617 public void parse_file_comments () {
1618 while (whitespace () || comment (true)) {
1622 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1623 if (comment_item[0] == '*') {
1624 if (_comment != null) {
1625 // extra doc comment, add it to source file comments
1626 source_file.add_comment (_comment);
1628 _comment = new Comment (comment_item, source_reference);
1631 if (file_comment) {
1632 source_file.add_comment (new Comment (comment_item, source_reference));
1633 _comment = null;
1638 * Clears and returns the content of the comment stack.
1640 * @return saved comment
1642 public Comment? pop_comment () {
1643 if (_comment == null) {
1644 return null;
1647 var comment = _comment;
1648 _comment = null;
1649 return comment;