glib-2.0: add g_strsignal binding
[vala-lang.git] / vala / valascanner.vala
blob955b9a0944ce2d2abe311cac82801ce3b011507b
1 /* valascanner.vala
3 * Copyright (C) 2008-2010 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
21 * Jukka-Pekka Iivonen <jp0409@jippii.fi>
24 using GLib;
26 /**
27 * Lexical scanner for Vala source files.
29 public class Vala.Scanner {
30 public SourceFile source_file { get; private set; }
32 TokenType previous;
33 char* current;
34 char* end;
36 int line;
37 int column;
39 Comment _comment;
41 Conditional[] conditional_stack;
43 struct Conditional {
44 public bool matched;
45 public bool else_found;
46 public bool skip_section;
49 State[] state_stack;
51 enum State {
52 PARENS,
53 BRACE,
54 BRACKET,
55 TEMPLATE,
56 TEMPLATE_PART,
57 REGEX_LITERAL
60 public Scanner (SourceFile source_file) {
61 this.source_file = source_file;
63 char* begin = source_file.get_mapped_contents ();
64 end = begin + source_file.get_mapped_length ();
66 current = begin;
68 line = 1;
69 column = 1;
72 public void seek (SourceLocation location) {
73 current = location.pos;
74 line = location.line;
75 column = location.column;
77 conditional_stack = null;
78 state_stack = null;
81 bool in_template () {
82 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
85 bool in_template_part () {
86 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
89 bool in_regex_literal () {
90 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
93 bool is_ident_char (char c) {
94 return (c.isalnum () || c == '_');
97 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
98 TokenType type;
99 char* begin = current;
100 token_begin.pos = begin;
101 token_begin.line = line;
102 token_begin.column = column;
104 int token_length_in_chars = -1;
106 if (current >= end) {
107 type = TokenType.EOF;
108 } else {
109 switch (current[0]) {
110 case '/':
111 type = TokenType.CLOSE_REGEX_LITERAL;
112 current++;
113 state_stack.length--;
114 var fl_i = false;
115 var fl_s = false;
116 var fl_m = false;
117 var fl_x = false;
118 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
119 switch (current[0]) {
120 case 'i':
121 if (fl_i) {
122 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
124 fl_i = true;
125 break;
126 case 's':
127 if (fl_s) {
128 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
130 fl_s = true;
131 break;
132 case 'm':
133 if (fl_m) {
134 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
136 fl_m = true;
137 break;
138 case 'x':
139 if (fl_x) {
140 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
142 fl_x = true;
143 break;
145 current++;
146 token_length_in_chars++;
148 break;
149 default:
150 type = TokenType.REGEX_LITERAL;
151 token_length_in_chars = 0;
152 while (current < end && current[0] != '/') {
153 if (current[0] == '\\') {
154 current++;
155 token_length_in_chars++;
156 if (current >= end) {
157 break;
160 switch (current[0]) {
161 case '\'':
162 case '"':
163 case '\\':
164 case '/':
165 case '^':
166 case '$':
167 case '.':
168 case '[':
169 case ']':
170 case '{':
171 case '}':
172 case '(':
173 case ')':
174 case '?':
175 case '*':
176 case '+':
177 case '-':
178 case '#':
179 case '&':
180 case '~':
181 case ':':
182 case ';':
183 case '<':
184 case '>':
185 case '|':
186 case '%':
187 case '=':
188 case '@':
189 case '0':
190 case 'b':
191 case 'B':
192 case 'f':
193 case 'n':
194 case 'r':
195 case 't':
196 case 'a':
197 case 'A':
198 case 'p':
199 case 'P':
200 case 'e':
201 case 'd':
202 case 'D':
203 case 's':
204 case 'S':
205 case 'w':
206 case 'W':
207 case 'G':
208 case 'z':
209 case 'Z':
210 current++;
211 token_length_in_chars++;
212 break;
213 case 'x':
214 // hexadecimal escape character
215 current++;
216 token_length_in_chars++;
217 while (current < end && current[0].isxdigit ()) {
218 current++;
219 token_length_in_chars++;
221 break;
222 default:
223 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
224 break;
226 } else if (current[0] == '\n') {
227 break;
228 } else {
229 unichar u = ((string) current).get_char_validated ((long) (end - current));
230 if (u != (unichar) (-1)) {
231 current += u.to_utf8 (null);
232 token_length_in_chars++;
233 } else {
234 current++;
235 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
239 if (current >= end || current[0] == '\n') {
240 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
241 state_stack.length--;
242 return read_token (out token_begin, out token_end);
244 break;
248 if (token_length_in_chars < 0) {
249 column += (int) (current - begin);
250 } else {
251 column += token_length_in_chars;
254 token_end.pos = current;
255 token_end.line = line;
256 token_end.column = column - 1;
258 return type;
261 public static TokenType get_identifier_or_keyword (char* begin, int len) {
262 switch (len) {
263 case 2:
264 switch (begin[0]) {
265 case 'a':
266 if (matches (begin, "as")) return TokenType.AS;
267 break;
268 case 'd':
269 if (matches (begin, "do")) return TokenType.DO;
270 break;
271 case 'i':
272 switch (begin[1]) {
273 case 'f':
274 return TokenType.IF;
275 case 'n':
276 return TokenType.IN;
277 case 's':
278 return TokenType.IS;
280 break;
282 break;
283 case 3:
284 switch (begin[0]) {
285 case 'f':
286 if (matches (begin, "for")) return TokenType.FOR;
287 break;
288 case 'g':
289 if (matches (begin, "get")) return TokenType.GET;
290 break;
291 case 'n':
292 if (matches (begin, "new")) return TokenType.NEW;
293 break;
294 case 'o':
295 if (matches (begin, "out")) return TokenType.OUT;
296 break;
297 case 'r':
298 if (matches (begin, "ref")) return TokenType.REF;
299 break;
300 case 's':
301 if (matches (begin, "set")) return TokenType.SET;
302 break;
303 case 't':
304 if (matches (begin, "try")) return TokenType.TRY;
305 break;
306 case 'v':
307 if (matches (begin, "var")) return TokenType.VAR;
308 break;
310 break;
311 case 4:
312 switch (begin[0]) {
313 case 'b':
314 if (matches (begin, "base")) return TokenType.BASE;
315 break;
316 case 'c':
317 if (matches (begin, "case")) return TokenType.CASE;
318 break;
319 case 'e':
320 switch (begin[1]) {
321 case 'l':
322 if (matches (begin, "else")) return TokenType.ELSE;
323 break;
324 case 'n':
325 if (matches (begin, "enum")) return TokenType.ENUM;
326 break;
328 break;
329 case 'l':
330 if (matches (begin, "lock")) return TokenType.LOCK;
331 break;
332 case 'n':
333 if (matches (begin, "null")) return TokenType.NULL;
334 break;
335 case 't':
336 switch (begin[1]) {
337 case 'h':
338 if (matches (begin, "this")) return TokenType.THIS;
339 break;
340 case 'r':
341 if (matches (begin, "true")) return TokenType.TRUE;
342 break;
344 break;
345 case 'v':
346 if (matches (begin, "void")) return TokenType.VOID;
347 break;
348 case 'w':
349 if (matches (begin, "weak")) return TokenType.WEAK;
350 break;
352 break;
353 case 5:
354 switch (begin[0]) {
355 case 'a':
356 if (matches (begin, "async")) return TokenType.ASYNC;
357 break;
358 case 'b':
359 if (matches (begin, "break")) return TokenType.BREAK;
360 break;
361 case 'c':
362 switch (begin[1]) {
363 case 'a':
364 if (matches (begin, "catch")) return TokenType.CATCH;
365 break;
366 case 'l':
367 if (matches (begin, "class")) return TokenType.CLASS;
368 break;
369 case 'o':
370 if (matches (begin, "const")) return TokenType.CONST;
371 break;
373 break;
374 case 'f':
375 if (matches (begin, "false")) return TokenType.FALSE;
376 break;
377 case 'o':
378 if (matches (begin, "owned")) return TokenType.OWNED;
379 break;
380 case 't':
381 if (matches (begin, "throw")) return TokenType.THROW;
382 break;
383 case 'u':
384 if (matches (begin, "using")) return TokenType.USING;
385 break;
386 case 'w':
387 if (matches (begin, "while")) return TokenType.WHILE;
388 break;
389 case 'y':
390 if (matches (begin, "yield")) return TokenType.YIELD;
391 break;
393 break;
394 case 6:
395 switch (begin[0]) {
396 case 'd':
397 if (matches (begin, "delete")) return TokenType.DELETE;
398 break;
399 case 'e':
400 if (matches (begin, "extern")) return TokenType.EXTERN;
401 break;
402 case 'i':
403 if (matches (begin, "inline")) return TokenType.INLINE;
404 break;
405 case 'p':
406 switch (begin[1]) {
407 case 'a':
408 if (matches (begin, "params")) return TokenType.PARAMS;
409 break;
410 case 'u':
411 if (matches (begin, "public")) return TokenType.PUBLIC;
412 break;
414 break;
415 case 'r':
416 if (matches (begin, "return")) return TokenType.RETURN;
417 break;
418 case 's':
419 switch (begin[1]) {
420 case 'e':
421 if (matches (begin, "sealed")) return TokenType.SEALED;
422 break;
423 case 'i':
424 switch (begin[2]) {
425 case 'g':
426 if (matches (begin, "signal")) return TokenType.SIGNAL;
427 break;
428 case 'z':
429 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
430 break;
432 break;
433 case 't':
434 switch (begin[2]) {
435 case 'a':
436 if (matches (begin, "static")) return TokenType.STATIC;
437 break;
438 case 'r':
439 if (matches (begin, "struct")) return TokenType.STRUCT;
440 break;
442 break;
443 case 'w':
444 if (matches (begin, "switch")) return TokenType.SWITCH;
445 break;
447 break;
448 case 't':
449 switch (begin[1]) {
450 case 'h':
451 if (matches (begin, "throws")) return TokenType.THROWS;
452 break;
453 case 'y':
454 if (matches (begin, "typeof")) return TokenType.TYPEOF;
455 break;
457 break;
459 break;
460 case 7:
461 switch (begin[0]) {
462 case 'd':
463 switch (begin[1]) {
464 case 'e':
465 if (matches (begin, "default")) return TokenType.DEFAULT;
466 break;
467 case 'y':
468 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
469 break;
471 break;
472 case 'e':
473 if (matches (begin, "ensures")) return TokenType.ENSURES;
474 break;
475 case 'f':
476 switch (begin[1]) {
477 case 'i':
478 if (matches (begin, "finally")) return TokenType.FINALLY;
479 break;
480 case 'o':
481 if (matches (begin, "foreach")) return TokenType.FOREACH;
482 break;
484 break;
485 case 'p':
486 if (matches (begin, "private")) return TokenType.PRIVATE;
487 break;
488 case 'u':
489 if (matches (begin, "unowned")) return TokenType.UNOWNED;
490 break;
491 case 'v':
492 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
493 break;
495 break;
496 case 8:
497 switch (begin[0]) {
498 case 'a':
499 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
500 break;
501 case 'c':
502 if (matches (begin, "continue")) return TokenType.CONTINUE;
503 break;
504 case 'd':
505 if (matches (begin, "delegate")) return TokenType.DELEGATE;
506 break;
507 case 'i':
508 if (matches (begin, "internal")) return TokenType.INTERNAL;
509 break;
510 case 'o':
511 if (matches (begin, "override")) return TokenType.OVERRIDE;
512 break;
513 case 'r':
514 if (matches (begin, "requires")) return TokenType.REQUIRES;
515 break;
516 case 'v':
517 if (matches (begin, "volatile")) return TokenType.VOLATILE;
518 break;
520 break;
521 case 9:
522 switch (begin[0]) {
523 case 'c':
524 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
525 break;
526 case 'i':
527 if (matches (begin, "interface")) return TokenType.INTERFACE;
528 break;
529 case 'n':
530 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
531 break;
532 case 'p':
533 if (matches (begin, "protected")) return TokenType.PROTECTED;
534 break;
536 break;
537 case 11:
538 if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
539 break;
541 return TokenType.IDENTIFIER;
544 TokenType read_number () {
545 var type = TokenType.INTEGER_LITERAL;
547 // integer part
548 if (current < end - 2 && current[0] == '0'
549 && current[1] == 'x' && current[2].isxdigit ()) {
550 // hexadecimal integer literal
551 current += 2;
552 while (current < end && current[0].isxdigit ()) {
553 current++;
555 } else {
556 // decimal number
557 while (current < end && current[0].isdigit ()) {
558 current++;
562 // fractional part
563 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
564 type = TokenType.REAL_LITERAL;
565 current++;
566 while (current < end && current[0].isdigit ()) {
567 current++;
571 // exponent part
572 if (current < end && current[0].tolower () == 'e') {
573 type = TokenType.REAL_LITERAL;
574 current++;
575 if (current < end && (current[0] == '+' || current[0] == '-')) {
576 current++;
578 while (current < end && current[0].isdigit ()) {
579 current++;
583 // type suffix
584 if (current < end) {
585 bool real_literal = (type == TokenType.REAL_LITERAL);
587 switch (current[0]) {
588 case 'l':
589 case 'L':
590 if (type == TokenType.INTEGER_LITERAL) {
591 current++;
592 if (current < end && current[0].tolower () == 'l') {
593 current++;
596 break;
597 case 'u':
598 case 'U':
599 if (type == TokenType.INTEGER_LITERAL) {
600 current++;
601 if (current < end && current[0].tolower () == 'l') {
602 current++;
603 if (current < end && current[0].tolower () == 'l') {
604 current++;
608 break;
609 case 'f':
610 case 'F':
611 case 'd':
612 case 'D':
613 type = TokenType.REAL_LITERAL;
614 current++;
615 break;
618 if (!real_literal && is_ident_char (current[0])) {
619 // allow identifiers to start with a digit
620 // as long as they contain at least one char
621 while (current < end && is_ident_char (current[0])) {
622 current++;
624 type = TokenType.IDENTIFIER;
628 return type;
631 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
632 TokenType type;
633 char* begin = current;
634 token_begin.pos = begin;
635 token_begin.line = line;
636 token_begin.column = column;
638 int token_length_in_chars = -1;
640 if (current >= end) {
641 type = TokenType.EOF;
642 } else {
643 switch (current[0]) {
644 case '"':
645 type = TokenType.CLOSE_TEMPLATE;
646 current++;
647 state_stack.length--;
648 break;
649 case '$':
650 token_begin.pos++; // $ is not part of following token
651 current++;
652 if (current[0].isalpha () || current[0] == '_') {
653 int len = 0;
654 while (current < end && is_ident_char (current[0])) {
655 current++;
656 len++;
658 type = TokenType.IDENTIFIER;
659 state_stack += State.TEMPLATE_PART;
660 } else if (current[0] == '(') {
661 current++;
662 column += 2;
663 state_stack += State.PARENS;
664 return read_token (out token_begin, out token_end);
665 } else if (current[0] == '$') {
666 type = TokenType.TEMPLATE_STRING_LITERAL;
667 current++;
668 state_stack += State.TEMPLATE_PART;
669 } else {
670 Report.error (new SourceReference (source_file, line, column + 1, line, column + 1), "unexpected character");
671 return read_template_token (out token_begin, out token_end);
673 break;
674 default:
675 type = TokenType.TEMPLATE_STRING_LITERAL;
676 token_length_in_chars = 0;
677 while (current < end && current[0] != '"' && current[0] != '$') {
678 if (current[0] == '\\') {
679 current++;
680 token_length_in_chars++;
681 if (current >= end) {
682 break;
685 switch (current[0]) {
686 case '\'':
687 case '"':
688 case '\\':
689 case '0':
690 case 'b':
691 case 'f':
692 case 'n':
693 case 'r':
694 case 't':
695 current++;
696 token_length_in_chars++;
697 break;
698 case 'x':
699 // hexadecimal escape character
700 current++;
701 token_length_in_chars++;
702 while (current < end && current[0].isxdigit ()) {
703 current++;
704 token_length_in_chars++;
706 break;
707 default:
708 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
709 break;
711 } else if (current[0] == '\n') {
712 current++;
713 line++;
714 column = 1;
715 token_length_in_chars = 1;
716 } else {
717 unichar u = ((string) current).get_char_validated ((long) (end - current));
718 if (u != (unichar) (-1)) {
719 current += u.to_utf8 (null);
720 token_length_in_chars++;
721 } else {
722 current++;
723 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
727 if (current >= end) {
728 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
729 state_stack.length--;
730 return read_token (out token_begin, out token_end);
732 state_stack += State.TEMPLATE_PART;
733 break;
737 if (token_length_in_chars < 0) {
738 column += (int) (current - begin);
739 } else {
740 column += token_length_in_chars;
743 token_end.pos = current;
744 token_end.line = line;
745 token_end.column = column - 1;
747 return type;
750 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
751 if (in_template ()) {
752 return read_template_token (out token_begin, out token_end);
753 } else if (in_template_part ()) {
754 state_stack.length--;
756 token_begin.pos = current;
757 token_begin.line = line;
758 token_begin.column = column;
760 token_end.pos = current;
761 token_end.line = line;
762 token_end.column = column - 1;
764 return TokenType.COMMA;
765 } else if (in_regex_literal ()) {
766 return read_regex_token (out token_begin, out token_end);
769 space ();
771 TokenType type;
772 char* begin = current;
773 token_begin.pos = begin;
774 token_begin.line = line;
775 token_begin.column = column;
777 int token_length_in_chars = -1;
779 if (current >= end) {
780 type = TokenType.EOF;
781 } else if (current[0].isalpha () || current[0] == '_') {
782 int len = 0;
783 while (current < end && is_ident_char (current[0])) {
784 current++;
785 len++;
787 type = get_identifier_or_keyword (begin, len);
788 } else if (current[0] == '@' && source_file.context.profile != Profile.DOVA) {
789 if (current < end - 1 && current[1] == '"') {
790 type = TokenType.OPEN_TEMPLATE;
791 current += 2;
792 state_stack += State.TEMPLATE;
793 } else {
794 token_begin.pos++; // @ is not part of the identifier
795 current++;
796 int len = 0;
797 while (current < end && is_ident_char (current[0])) {
798 current++;
799 len++;
801 type = TokenType.IDENTIFIER;
803 } else if (current[0].isdigit ()) {
804 type = read_number ();
805 } else {
806 switch (current[0]) {
807 case '{':
808 type = TokenType.OPEN_BRACE;
809 current++;
810 state_stack += State.BRACE;
811 break;
812 case '}':
813 type = TokenType.CLOSE_BRACE;
814 current++;
815 if (state_stack.length > 0) {
816 state_stack.length--;
818 break;
819 case '(':
820 type = TokenType.OPEN_PARENS;
821 current++;
822 state_stack += State.PARENS;
823 break;
824 case ')':
825 type = TokenType.CLOSE_PARENS;
826 current++;
827 if (state_stack.length > 0) {
828 state_stack.length--;
830 if (in_template ()) {
831 type = TokenType.COMMA;
833 break;
834 case '[':
835 type = TokenType.OPEN_BRACKET;
836 current++;
837 state_stack += State.BRACKET;
838 break;
839 case ']':
840 type = TokenType.CLOSE_BRACKET;
841 current++;
842 if (state_stack.length > 0) {
843 state_stack.length--;
845 break;
846 case '.':
847 type = TokenType.DOT;
848 current++;
849 if (current < end - 1) {
850 if (current[0] == '.' && current[1] == '.') {
851 type = TokenType.ELLIPSIS;
852 current += 2;
855 break;
856 case ':':
857 type = TokenType.COLON;
858 current++;
859 if (current < end && current[0] == ':') {
860 type = TokenType.DOUBLE_COLON;
861 current++;
863 break;
864 case ',':
865 type = TokenType.COMMA;
866 current++;
867 break;
868 case ';':
869 type = TokenType.SEMICOLON;
870 current++;
871 break;
872 case '#':
873 type = TokenType.HASH;
874 current++;
875 break;
876 case '?':
877 type = TokenType.INTERR;
878 current++;
879 if (current < end && current[0] == '?') {
880 type = TokenType.OP_COALESCING;
881 current++;
883 break;
884 case '|':
885 type = TokenType.BITWISE_OR;
886 current++;
887 if (current < end) {
888 switch (current[0]) {
889 case '=':
890 type = TokenType.ASSIGN_BITWISE_OR;
891 current++;
892 break;
893 case '|':
894 type = TokenType.OP_OR;
895 current++;
896 break;
899 break;
900 case '&':
901 type = TokenType.BITWISE_AND;
902 current++;
903 if (current < end) {
904 switch (current[0]) {
905 case '=':
906 type = TokenType.ASSIGN_BITWISE_AND;
907 current++;
908 break;
909 case '&':
910 type = TokenType.OP_AND;
911 current++;
912 break;
915 break;
916 case '^':
917 type = TokenType.CARRET;
918 current++;
919 if (current < end && current[0] == '=') {
920 type = TokenType.ASSIGN_BITWISE_XOR;
921 current++;
923 break;
924 case '~':
925 type = TokenType.TILDE;
926 current++;
927 break;
928 case '=':
929 type = TokenType.ASSIGN;
930 current++;
931 if (current < end) {
932 switch (current[0]) {
933 case '=':
934 type = TokenType.OP_EQ;
935 current++;
936 break;
937 case '>':
938 type = TokenType.LAMBDA;
939 current++;
940 break;
943 break;
944 case '<':
945 type = TokenType.OP_LT;
946 current++;
947 if (current < end) {
948 switch (current[0]) {
949 case '=':
950 type = TokenType.OP_LE;
951 current++;
952 break;
953 case '<':
954 type = TokenType.OP_SHIFT_LEFT;
955 current++;
956 if (current < end && current[0] == '=') {
957 type = TokenType.ASSIGN_SHIFT_LEFT;
958 current++;
960 break;
963 break;
964 case '>':
965 type = TokenType.OP_GT;
966 current++;
967 if (current < end && current[0] == '=') {
968 type = TokenType.OP_GE;
969 current++;
971 break;
972 case '!':
973 type = TokenType.OP_NEG;
974 current++;
975 if (current < end && current[0] == '=') {
976 type = TokenType.OP_NE;
977 current++;
979 break;
980 case '+':
981 type = TokenType.PLUS;
982 current++;
983 if (current < end) {
984 switch (current[0]) {
985 case '=':
986 type = TokenType.ASSIGN_ADD;
987 current++;
988 break;
989 case '+':
990 type = TokenType.OP_INC;
991 current++;
992 break;
995 break;
996 case '-':
997 type = TokenType.MINUS;
998 current++;
999 if (current < end) {
1000 switch (current[0]) {
1001 case '=':
1002 type = TokenType.ASSIGN_SUB;
1003 current++;
1004 break;
1005 case '-':
1006 type = TokenType.OP_DEC;
1007 current++;
1008 break;
1009 case '>':
1010 type = TokenType.OP_PTR;
1011 current++;
1012 break;
1015 break;
1016 case '*':
1017 type = TokenType.STAR;
1018 current++;
1019 if (current < end && current[0] == '=') {
1020 type = TokenType.ASSIGN_MUL;
1021 current++;
1023 break;
1024 case '/':
1025 switch (previous) {
1026 case TokenType.ASSIGN:
1027 case TokenType.COMMA:
1028 case TokenType.MINUS:
1029 case TokenType.OP_AND:
1030 case TokenType.OP_COALESCING:
1031 case TokenType.OP_EQ:
1032 case TokenType.OP_GE:
1033 case TokenType.OP_GT:
1034 case TokenType.OP_LE:
1035 case TokenType.OP_LT:
1036 case TokenType.OP_NE:
1037 case TokenType.OP_NEG:
1038 case TokenType.OP_OR:
1039 case TokenType.OPEN_BRACE:
1040 case TokenType.OPEN_PARENS:
1041 case TokenType.PLUS:
1042 case TokenType.RETURN:
1043 type = TokenType.OPEN_REGEX_LITERAL;
1044 state_stack += State.REGEX_LITERAL;
1045 current++;
1046 break;
1047 default:
1048 type = TokenType.DIV;
1049 current++;
1050 if (current < end && current[0] == '=') {
1051 type = TokenType.ASSIGN_DIV;
1052 current++;
1054 break;
1056 break;
1057 case '%':
1058 type = TokenType.PERCENT;
1059 current++;
1060 if (current < end && current[0] == '=') {
1061 type = TokenType.ASSIGN_PERCENT;
1062 current++;
1064 break;
1065 case '\'':
1066 case '"':
1067 if (begin[0] == '\'') {
1068 type = TokenType.CHARACTER_LITERAL;
1069 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1070 type = TokenType.VERBATIM_STRING_LITERAL;
1071 token_length_in_chars = 6;
1072 current += 3;
1073 while (current < end - 4) {
1074 if (current[0] == '"' && current[1] == '"' && current[2] == '"' && current[3] != '"') {
1075 break;
1076 } else if (current[0] == '\n') {
1077 current++;
1078 line++;
1079 column = 1;
1080 token_length_in_chars = 3;
1081 } else {
1082 unichar u = ((string) current).get_char_validated ((long) (end - current));
1083 if (u != (unichar) (-1)) {
1084 current += u.to_utf8 (null);
1085 token_length_in_chars++;
1086 } else {
1087 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1091 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1092 current += 3;
1093 } else {
1094 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
1096 break;
1097 } else {
1098 type = TokenType.STRING_LITERAL;
1100 token_length_in_chars = 2;
1101 current++;
1102 while (current < end && current[0] != begin[0]) {
1103 if (current[0] == '\\') {
1104 current++;
1105 token_length_in_chars++;
1106 if (current >= end) {
1107 break;
1110 switch (current[0]) {
1111 case '\'':
1112 case '"':
1113 case '\\':
1114 case '0':
1115 case 'b':
1116 case 'f':
1117 case 'n':
1118 case 'r':
1119 case 't':
1120 case '$':
1121 current++;
1122 token_length_in_chars++;
1123 break;
1124 case 'x':
1125 // hexadecimal escape character
1126 current++;
1127 token_length_in_chars++;
1128 while (current < end && current[0].isxdigit ()) {
1129 current++;
1130 token_length_in_chars++;
1132 break;
1133 default:
1134 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
1135 break;
1137 } else if (current[0] == '\n') {
1138 current++;
1139 line++;
1140 column = 1;
1141 token_length_in_chars = 1;
1142 } else {
1143 if (type == TokenType.STRING_LITERAL && source_file.context.profile == Profile.DOVA && current[0] == '$') {
1144 // string template
1145 type = TokenType.OPEN_TEMPLATE;
1146 current = begin;
1147 state_stack += State.TEMPLATE;
1148 break;
1150 unichar u = ((string) current).get_char_validated ((long) (end - current));
1151 if (u != (unichar) (-1)) {
1152 current += u.to_utf8 (null);
1153 token_length_in_chars++;
1154 } else {
1155 current++;
1156 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1159 if (current < end && begin[0] == '\'' && current[0] != '\'') {
1160 // multiple characters in single character literal
1161 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid character literal");
1164 if (current < end) {
1165 current++;
1166 } else {
1167 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1169 break;
1170 default:
1171 unichar u = ((string) current).get_char_validated ((long) (end - current));
1172 if (u != (unichar) (-1)) {
1173 current += u.to_utf8 (null);
1174 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
1175 } else {
1176 current++;
1177 Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
1179 column++;
1180 return read_token (out token_begin, out token_end);
1184 if (token_length_in_chars < 0) {
1185 column += (int) (current - begin);
1186 } else {
1187 column += token_length_in_chars;
1190 token_end.pos = current;
1191 token_end.line = line;
1192 token_end.column = column - 1;
1193 previous = type;
1195 return type;
1198 static bool matches (char* begin, string keyword) {
1199 char* keyword_array = (char*) keyword;
1200 long len = keyword.length;
1201 for (int i = 0; i < len; i++) {
1202 if (begin[i] != keyword_array[i]) {
1203 return false;
1206 return true;
1209 bool pp_whitespace () {
1210 bool found = false;
1211 while (current < end && current[0].isspace () && current[0] != '\n') {
1212 found = true;
1213 current++;
1214 column++;
1216 return found;
1219 void pp_directive () {
1220 // hash sign
1221 current++;
1222 column++;
1224 if (line == 1 && column == 2 && current < end && current[0] == '!') {
1225 // hash bang: #!
1226 // skip until end of line or end of file
1227 while (current < end && current[0] != '\n') {
1228 current++;
1230 return;
1233 pp_whitespace ();
1235 char* begin = current;
1236 int len = 0;
1237 while (current < end && current[0].isalnum ()) {
1238 current++;
1239 column++;
1240 len++;
1243 if (len == 2 && matches (begin, "if")) {
1244 parse_pp_if ();
1245 } else if (len == 4 && matches (begin, "elif")) {
1246 parse_pp_elif ();
1247 } else if (len == 4 && matches (begin, "else")) {
1248 parse_pp_else ();
1249 } else if (len == 5 && matches (begin, "endif")) {
1250 parse_pp_endif ();
1251 } else {
1252 Report.error (new SourceReference (source_file, line, column - len, line, column), "syntax error, invalid preprocessing directive");
1255 if (conditional_stack.length > 0
1256 && conditional_stack[conditional_stack.length - 1].skip_section) {
1257 // skip lines until next preprocessing directive
1258 bool bol = false;
1259 while (current < end) {
1260 if (bol && current[0] == '#') {
1261 // go back to begin of line
1262 current -= (column - 1);
1263 column = 1;
1264 return;
1266 if (current[0] == '\n') {
1267 line++;
1268 column = 0;
1269 bol = true;
1270 } else if (!current[0].isspace ()) {
1271 bol = false;
1273 current++;
1274 column++;
1279 void pp_eol () {
1280 pp_whitespace ();
1281 if (current >= end || current[0] != '\n') {
1282 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected newline");
1286 void parse_pp_if () {
1287 pp_whitespace ();
1289 bool condition = parse_pp_expression ();
1291 pp_eol ();
1293 conditional_stack += Conditional ();
1295 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1296 // condition true => process code within if
1297 conditional_stack[conditional_stack.length - 1].matched = true;
1298 } else {
1299 // skip lines until next preprocessing directive
1300 conditional_stack[conditional_stack.length - 1].skip_section = true;
1304 void parse_pp_elif () {
1305 pp_whitespace ();
1307 bool condition = parse_pp_expression ();
1309 pp_eol ();
1311 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1312 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #elif");
1313 return;
1316 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1317 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1318 // condition true => process code within if
1319 conditional_stack[conditional_stack.length - 1].matched = true;
1320 conditional_stack[conditional_stack.length - 1].skip_section = false;
1321 } else {
1322 // skip lines until next preprocessing directive
1323 conditional_stack[conditional_stack.length - 1].skip_section = true;
1327 void parse_pp_else () {
1328 pp_eol ();
1330 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1331 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #else");
1332 return;
1335 if (!conditional_stack[conditional_stack.length - 1].matched
1336 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1337 // condition true => process code within if
1338 conditional_stack[conditional_stack.length - 1].matched = true;
1339 conditional_stack[conditional_stack.length - 1].skip_section = false;
1340 } else {
1341 // skip lines until next preprocessing directive
1342 conditional_stack[conditional_stack.length - 1].skip_section = true;
1346 void parse_pp_endif () {
1347 pp_eol ();
1349 if (conditional_stack.length == 0) {
1350 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #endif");
1351 return;
1354 conditional_stack.length--;
1357 bool parse_pp_symbol () {
1358 int len = 0;
1359 while (current < end && is_ident_char (current[0])) {
1360 current++;
1361 column++;
1362 len++;
1365 if (len == 0) {
1366 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1367 return false;
1370 string identifier = ((string) (current - len)).ndup (len);
1371 bool defined;
1372 if (identifier == "true") {
1373 defined = true;
1374 } else if (identifier == "false") {
1375 defined = false;
1376 } else {
1377 defined = source_file.context.is_defined (identifier);
1380 return defined;
1383 bool parse_pp_primary_expression () {
1384 if (current >= end) {
1385 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1386 } else if (is_ident_char (current[0])) {
1387 return parse_pp_symbol ();
1388 } else if (current[0] == '(') {
1389 current++;
1390 column++;
1391 pp_whitespace ();
1392 bool result = parse_pp_expression ();
1393 pp_whitespace ();
1394 if (current < end && current[0] == ')') {
1395 current++;
1396 column++;
1397 } else {
1398 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected `)'");
1400 return result;
1401 } else {
1402 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1404 return false;
1407 bool parse_pp_unary_expression () {
1408 if (current < end && current[0] == '!') {
1409 current++;
1410 column++;
1411 pp_whitespace ();
1412 return !parse_pp_unary_expression ();
1415 return parse_pp_primary_expression ();
1418 bool parse_pp_equality_expression () {
1419 bool left = parse_pp_unary_expression ();
1420 pp_whitespace ();
1421 while (true) {
1422 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1423 current += 2;
1424 column += 2;
1425 pp_whitespace ();
1426 bool right = parse_pp_unary_expression ();
1427 left = (left == right);
1428 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1429 current += 2;
1430 column += 2;
1431 pp_whitespace ();
1432 bool right = parse_pp_unary_expression ();
1433 left = (left != right);
1434 } else {
1435 break;
1438 return left;
1441 bool parse_pp_and_expression () {
1442 bool left = parse_pp_equality_expression ();
1443 pp_whitespace ();
1444 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1445 current += 2;
1446 column += 2;
1447 pp_whitespace ();
1448 bool right = parse_pp_equality_expression ();
1449 left = left && right;
1451 return left;
1454 bool parse_pp_or_expression () {
1455 bool left = parse_pp_and_expression ();
1456 pp_whitespace ();
1457 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1458 current += 2;
1459 column += 2;
1460 pp_whitespace ();
1461 bool right = parse_pp_and_expression ();
1462 left = left || right;
1464 return left;
1467 bool parse_pp_expression () {
1468 return parse_pp_or_expression ();
1471 bool whitespace () {
1472 bool found = false;
1473 bool bol = (column == 1);
1474 while (current < end && current[0].isspace ()) {
1475 if (current[0] == '\n') {
1476 line++;
1477 column = 0;
1478 bol = true;
1480 found = true;
1481 current++;
1482 column++;
1484 if (bol && current < end && current[0] == '#') {
1485 pp_directive ();
1486 return true;
1488 return found;
1491 bool comment (bool file_comment = false) {
1492 if (current == null
1493 || current > end - 2
1494 || current[0] != '/'
1495 || (current[1] != '/' && current[1] != '*')) {
1496 return false;
1499 if (current[1] == '/') {
1500 SourceReference source_reference = null;
1501 if (file_comment) {
1502 source_reference = new SourceReference (source_file, line, column, line, column);
1505 // single-line comment
1506 current += 2;
1507 char* begin = current;
1509 // skip until end of line or end of file
1510 while (current < end && current[0] != '\n') {
1511 current++;
1514 if (source_reference != null) {
1515 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1517 } else {
1518 SourceReference source_reference = null;
1520 if (file_comment && current[2] == '*') {
1521 return false;
1524 if (current[2] == '*' || file_comment) {
1525 source_reference = new SourceReference (source_file, line, column, line, column);
1528 current += 2;
1530 char* begin = current;
1531 while (current < end - 1
1532 && (current[0] != '*' || current[1] != '/')) {
1533 if (current[0] == '\n') {
1534 line++;
1535 column = 0;
1537 current++;
1538 column++;
1541 if (current == end - 1) {
1542 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
1543 return true;
1546 if (source_reference != null) {
1547 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1550 current += 2;
1551 column += 2;
1554 return true;
1557 void space () {
1558 while (whitespace () || comment ()) {
1562 public void parse_file_comments () {
1563 while (whitespace () || comment (true)) {
1567 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1568 if (comment_item[0] == '*') {
1569 if (_comment != null) {
1570 // extra doc comment, add it to source file comments
1571 source_file.add_comment (_comment);
1573 _comment = new Comment (comment_item, source_reference);
1576 if (file_comment) {
1577 source_file.add_comment (new Comment (comment_item, source_reference));
1578 _comment = null;
1583 * Clears and returns the content of the comment stack.
1585 * @return saved comment
1587 public Comment? pop_comment () {
1588 if (_comment == null) {
1589 return null;
1592 var comment = _comment;
1593 _comment = null;
1594 return comment;