libxml-2.0: Make Doc.save_format_file return an int
[vala-lang.git] / vala / valageniescanner.vala
blob87e9b457b13975cda40a5d37503e3710106a6344
1 /* valageniescanner.vala
3 * Copyright (C) 2008 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Author:
21 * Jamie McCracken jamiemcc gnome org
24 using GLib;
26 /**
27 * Lexical scanner for Genie source files.
29 public class Vala.Genie.Scanner {
30 public SourceFile source_file { get; private set; }
32 public int indent_spaces { get; set;}
34 char* begin;
35 char* current;
36 char* end;
38 int line;
39 int column;
41 int current_indent_level;
42 int indent_level;
43 int pending_dedents;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count;
47 int open_brace_count;
49 TokenType last_token;
50 bool parse_started;
52 Comment _comment;
54 Conditional[] conditional_stack;
56 struct Conditional {
57 public bool matched;
58 public bool else_found;
59 public bool skip_section;
62 State[] state_stack;
64 enum State {
65 PARENS,
66 BRACE,
67 BRACKET,
68 REGEX_LITERAL,
69 TEMPLATE,
70 TEMPLATE_PART
73 public Scanner (SourceFile source_file) {
74 this.source_file = source_file;
76 begin = source_file.get_mapped_contents ();
77 end = begin + source_file.get_mapped_length ();
79 current = begin;
81 _indent_spaces = 0;
82 line = 1;
83 column = 1;
84 current_indent_level = 0;
85 indent_level = 0;
86 pending_dedents = 0;
88 open_parens_count = 0;
89 open_brace_count = 0;
91 parse_started = false;
92 last_token = TokenType.NONE;
96 bool in_template () {
97 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
100 bool in_template_part () {
101 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
104 bool is_ident_char (char c) {
105 return (c.isalnum () || c == '_');
108 bool in_regex_literal () {
109 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
113 public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
114 TokenType type;
115 char* begin = current;
116 token_begin.pos = begin;
117 token_begin.line = line;
118 token_begin.column = column;
120 int token_length_in_chars = -1;
122 if (current >= end) {
123 type = TokenType.EOF;
124 } else {
125 switch (current[0]) {
126 case '/':
127 type = TokenType.CLOSE_REGEX_LITERAL;
128 current++;
129 state_stack.length--;
130 var fl_i = false;
131 var fl_s = false;
132 var fl_m = false;
133 var fl_x = false;
134 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
135 switch (current[0]) {
136 case 'i':
137 if (fl_i) {
138 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
140 fl_i = true;
141 break;
142 case 's':
143 if (fl_s) {
144 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
146 fl_s = true;
147 break;
148 case 'm':
149 if (fl_m) {
150 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
152 fl_m = true;
153 break;
154 case 'x':
155 if (fl_x) {
156 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
158 fl_x = true;
159 break;
161 current++;
162 token_length_in_chars++;
164 break;
165 default:
166 type = TokenType.REGEX_LITERAL;
167 token_length_in_chars = 0;
168 while (current < end && current[0] != '/') {
169 if (current[0] == '\\') {
170 current++;
171 token_length_in_chars++;
172 if (current >= end) {
173 break;
176 switch (current[0]) {
177 case '\'':
178 case '"':
179 case '\\':
180 case '/':
181 case '^':
182 case '$':
183 case '.':
184 case '[':
185 case ']':
186 case '{':
187 case '}':
188 case '(':
189 case ')':
190 case '?':
191 case '*':
192 case '+':
193 case '-':
194 case '#':
195 case '&':
196 case '~':
197 case ':':
198 case ';':
199 case '<':
200 case '>':
201 case '|':
202 case '%':
203 case '=':
204 case '@':
205 case '0':
206 case 'b':
207 case 'B':
208 case 'f':
209 case 'n':
210 case 'r':
211 case 't':
212 case 'a':
213 case 'A':
214 case 'p':
215 case 'P':
216 case 'e':
217 case 'd':
218 case 'D':
219 case 's':
220 case 'S':
221 case 'w':
222 case 'W':
223 case 'G':
224 case 'z':
225 case 'Z':
226 current++;
227 token_length_in_chars++;
228 break;
229 case 'x':
230 // hexadecimal escape character
231 current++;
232 token_length_in_chars++;
233 while (current < end && current[0].isxdigit ()) {
234 current++;
235 token_length_in_chars++;
237 break;
238 default:
239 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
240 break;
242 } else if (current[0] == '\n') {
243 break;
244 } else {
245 unichar u = ((string) current).get_char_validated ((long) (end - current));
246 if (u != (unichar) (-1)) {
247 current += u.to_utf8 (null);
248 token_length_in_chars++;
249 } else {
250 current++;
251 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
255 if (current >= end || current[0] == '\n') {
256 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
257 state_stack.length--;
258 return read_token (out token_begin, out token_end);
260 break;
264 if (token_length_in_chars < 0) {
265 column += (int) (current - begin);
266 } else {
267 column += token_length_in_chars;
270 token_end.pos = current;
271 token_end.line = line;
272 token_end.column = column - 1;
274 return type;
278 public void seek (SourceLocation location) {
279 current = location.pos;
280 line = location.line;
281 column = location.column;
283 conditional_stack = null;
284 state_stack = null;
287 TokenType get_identifier_or_keyword (char* begin, int len) {
288 switch (len) {
289 case 2:
290 switch (begin[0]) {
291 case 'a':
292 if (matches (begin, "as")) return TokenType.AS;
293 break;
294 case 'd':
295 if (matches (begin, "do")) return TokenType.DO;
296 break;
297 case 'i':
298 switch (begin[1]) {
299 case 'f':
300 return TokenType.IF;
301 case 'n':
302 return TokenType.IN;
303 case 's':
304 return TokenType.IS;
306 break;
307 case 'o':
308 if (matches (begin, "of")) return TokenType.OF;
310 if (matches (begin, "or")) return TokenType.OP_OR;
311 break;
312 case 't':
313 if (matches (begin, "to")) return TokenType.TO;
314 break;
316 break;
317 case 3:
318 switch (begin[0]) {
319 case 'a':
320 if (matches (begin, "and")) return TokenType.OP_AND;
321 break;
322 case 'd':
323 if (matches (begin, "def")) return TokenType.DEF;
324 break;
325 case 'f':
326 if (matches (begin, "for")) return TokenType.FOR;
327 break;
328 case 'g':
329 if (matches (begin, "get")) return TokenType.GET;
330 break;
331 case 'i':
332 if (matches (begin, "isa")) return TokenType.ISA;
333 break;
334 case 'n':
335 switch (begin[1]) {
336 case 'e':
337 if (matches (begin, "new")) return TokenType.NEW;
338 break;
339 case 'o':
340 if (matches (begin, "not")) return TokenType.OP_NEG;
341 break;
343 break;
344 case 'o':
345 if (matches (begin, "out")) return TokenType.OUT;
346 break;
347 case 'r':
348 if (matches (begin, "ref")) return TokenType.REF;
349 break;
350 case 's':
351 if (matches (begin, "set")) return TokenType.SET;
352 break;
353 case 't':
354 if (matches (begin, "try")) return TokenType.TRY;
355 break;
356 case 'v':
357 if (matches (begin, "var")) return TokenType.VAR;
358 break;
360 break;
361 case 4:
362 switch (begin[0]) {
363 case 'c':
364 if (matches (begin, "case")) return TokenType.CASE;
365 break;
366 case 'd':
367 if (matches (begin, "dict")) return TokenType.DICT;
368 break;
369 case 'e':
370 switch (begin[1]) {
371 case 'l':
372 if (matches (begin, "else")) return TokenType.ELSE;
373 break;
374 case 'n':
375 if (matches (begin, "enum")) return TokenType.ENUM;
376 break;
378 break;
379 case 'i':
380 if (matches (begin, "init")) return TokenType.INIT;
381 break;
382 case 'l':
383 switch (begin[1]) {
384 case 'i':
385 if (matches (begin, "list")) return TokenType.LIST;
386 break;
387 case 'o':
388 if (matches (begin, "lock")) return TokenType.LOCK;
389 break;
391 break;
393 case 'n':
394 if (matches (begin, "null")) return TokenType.NULL;
395 break;
396 case 'p':
397 switch (begin[1]) {
398 case 'a':
399 if (matches (begin, "pass")) return TokenType.PASS;
400 break;
401 case 'r':
402 if (matches (begin, "prop")) return TokenType.PROP;
403 break;
405 break;
406 case 's':
407 if (matches (begin, "self")) return TokenType.THIS;
408 break;
409 case 't':
410 if (matches (begin, "true")) return TokenType.TRUE;
411 break;
412 case 'u':
413 if (matches (begin, "uses")) return TokenType.USES;
414 break;
415 case 'v':
416 if (matches (begin, "void")) return TokenType.VOID;
417 break;
418 case 'w':
419 switch (begin[1]) {
420 case 'e':
421 if (matches (begin, "weak")) return TokenType.WEAK;
422 break;
423 case 'h':
424 if (matches (begin, "when")) return TokenType.WHEN;
425 break;
427 break;
429 break;
430 case 5:
431 switch (begin[0]) {
432 case 'a':
433 switch (begin[1]) {
434 case 'r':
435 if (matches (begin, "array")) return TokenType.ARRAY;
436 break;
437 case 's':
438 if (matches (begin, "async")) return TokenType.ASYNC;
439 break;
441 break;
442 case 'b':
443 if (matches (begin, "break")) return TokenType.BREAK;
444 break;
445 case 'c':
446 switch (begin[1]) {
447 case 'l':
448 if (matches (begin, "class")) return TokenType.CLASS;
449 break;
450 case 'o':
451 if (matches (begin, "const")) return TokenType.CONST;
452 break;
454 break;
455 case 'e':
456 if (matches (begin, "event")) return TokenType.EVENT;
457 break;
458 case 'f':
459 switch (begin[1]) {
460 case 'a':
461 if (matches (begin, "false")) return TokenType.FALSE;
462 break;
463 case 'i':
464 if (matches (begin, "final")) return TokenType.FINAL;
465 break;
467 break;
468 case 'o':
469 if (matches (begin, "owned")) return TokenType.OWNED;
470 break;
471 case 'p':
472 if (matches (begin, "print")) return TokenType.PRINT;
473 break;
474 case 's':
475 if (matches (begin, "super")) return TokenType.SUPER;
476 break;
477 case 'r':
478 if (matches (begin, "raise")) return TokenType.RAISE;
479 break;
480 case 'w':
481 if (matches (begin, "while")) return TokenType.WHILE;
482 break;
483 case 'y':
484 if (matches (begin, "yield")) return TokenType.YIELD;
485 break;
487 break;
488 case 6:
489 switch (begin[0]) {
490 case 'a':
491 if (matches (begin, "assert")) return TokenType.ASSERT;
492 break;
493 case 'd':
494 switch (begin[1]) {
495 case 'e':
496 if (matches (begin, "delete")) return TokenType.DELETE;
497 break;
498 case 'o':
499 if (matches (begin, "downto")) return TokenType.DOWNTO;
500 break;
502 break;
503 case 'e':
504 switch (begin[1]) {
505 case 'x':
506 switch (begin[2]) {
507 case 'c':
508 if (matches (begin, "except")) return TokenType.EXCEPT;
509 break;
510 case 't':
511 if (matches (begin, "extern")) return TokenType.EXTERN;
512 break;
514 break;
516 break;
517 case 'i':
518 if (matches (begin, "inline")) return TokenType.INLINE;
519 break;
520 case 'p':
521 switch (begin[1]) {
522 case 'a':
523 if (matches (begin, "params")) return TokenType.PARAMS;
524 break;
525 case 'u':
526 if (matches (begin, "public")) return TokenType.PUBLIC;
527 break;
529 break;
530 case 'r':
531 switch (begin[1]) {
532 case 'a':
533 if (matches (begin, "raises")) return TokenType.RAISES;
534 break;
535 case 'e':
536 if (matches (begin, "return")) return TokenType.RETURN;
537 break;
539 break;
540 case 's':
541 switch (begin[1]) {
542 case 'i':
543 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
544 break;
545 case 't':
546 switch (begin[2]) {
547 case 'a':
548 if (matches (begin, "static")) return TokenType.STATIC;
549 break;
550 case 'r':
551 if (matches (begin, "struct")) return TokenType.STRUCT;
552 break;
554 break;
556 break;
557 case 't':
558 if (matches (begin, "typeof")) return TokenType.TYPEOF;
559 break;
561 break;
562 case 7:
563 switch (begin[0]) {
564 case 'd':
565 switch (begin[1]) {
566 case 'e':
567 if (matches (begin, "default")) return TokenType.DEFAULT;
568 break;
569 case 'y':
570 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
571 break;
573 break;
574 case 'e':
575 if (matches (begin, "ensures")) return TokenType.ENSURES;
576 break;
577 case 'f':
578 switch (begin[1]) {
579 case 'i':
580 if (matches (begin, "finally")) return TokenType.FINALLY;
581 break;
583 break;
584 case 'p':
585 if (matches (begin, "private")) return TokenType.PRIVATE;
586 break;
587 case 'u':
588 if (matches (begin, "unowned")) return TokenType.UNOWNED;
589 break;
590 case 'v':
591 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
592 break;
594 break;
595 case 8:
596 switch (begin[0]) {
597 case 'a':
598 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
599 break;
600 case 'c':
601 if (matches (begin, "continue")) return TokenType.CONTINUE;
602 break;
603 case 'd':
604 if (matches (begin, "delegate")) return TokenType.DELEGATE;
605 break;
606 case 'i':
607 if (matches (begin, "internal")) return TokenType.INTERNAL;
608 break;
609 case 'o':
610 if (matches (begin, "override")) return TokenType.OVERRIDE;
611 break;
612 case 'r':
613 switch (begin[2]) {
614 case 'a':
615 if (matches (begin, "readonly")) return TokenType.READONLY;
616 break;
617 case 'q':
618 if (matches (begin, "requires")) return TokenType.REQUIRES;
619 break;
621 break;
622 case 'v':
623 if (matches (begin, "volatile")) return TokenType.VOLATILE;
624 break;
626 break;
627 case 9:
628 switch (begin[0]) {
629 case 'c':
630 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
631 break;
632 case 'e':
633 if (matches (begin, "exception")) return TokenType.ERRORDOMAIN;
634 break;
635 case 'i':
636 if (matches (begin, "interface")) return TokenType.INTERFACE;
637 break;
638 case 'n':
639 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
640 break;
641 case 'p':
642 if (matches (begin, "protected")) return TokenType.PROTECTED;
643 break;
644 case 'w':
645 if (matches (begin, "writeonly")) return TokenType.WRITEONLY;
646 break;
648 break;
649 case 10:
650 switch (begin[0]) {
651 case 'i':
652 if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
653 break;
655 break;
657 return TokenType.IDENTIFIER;
661 public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
662 TokenType type;
663 char* begin = current;
664 token_begin.pos = begin;
665 token_begin.line = line;
666 token_begin.column = column;
668 int token_length_in_chars = -1;
670 if (current >= end) {
671 type = TokenType.EOF;
672 } else {
673 switch (current[0]) {
674 case '"':
675 type = TokenType.CLOSE_TEMPLATE;
676 current++;
677 state_stack.length--;
678 break;
679 case '$':
680 token_begin.pos++; // $ is not part of following token
681 current++;
682 if (current[0].isalpha () || current[0] == '_') {
683 int len = 0;
684 while (current < end && is_ident_char (current[0])) {
685 current++;
686 len++;
688 type = TokenType.IDENTIFIER;
689 state_stack += State.TEMPLATE_PART;
690 } else if (current[0] == '(') {
691 current++;
692 column += 2;
693 state_stack += State.PARENS;
694 return read_token (out token_begin, out token_end);
695 } else if (current[0] == '$') {
696 type = TokenType.TEMPLATE_STRING_LITERAL;
697 current++;
698 state_stack += State.TEMPLATE_PART;
699 } else {
700 Report.error (new SourceReference (source_file, line, column + 1, line, column + 1), "unexpected character");
701 return read_template_token (out token_begin, out token_end);
703 break;
704 default:
705 type = TokenType.TEMPLATE_STRING_LITERAL;
706 token_length_in_chars = 0;
707 while (current < end && current[0] != '"' && current[0] != '$') {
708 if (current[0] == '\\') {
709 current++;
710 token_length_in_chars++;
711 if (current >= end) {
712 break;
715 switch (current[0]) {
716 case '\'':
717 case '"':
718 case '\\':
719 case '0':
720 case 'b':
721 case 'f':
722 case 'n':
723 case 'r':
724 case 't':
725 current++;
726 token_length_in_chars++;
727 break;
728 case 'x':
729 // hexadecimal escape character
730 current++;
731 token_length_in_chars++;
732 while (current < end && current[0].isxdigit ()) {
733 current++;
734 token_length_in_chars++;
736 break;
737 default:
738 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
739 break;
741 } else if (current[0] == '\n') {
742 break;
743 } else {
744 unichar u = ((string) current).get_char_validated ((long) (end - current));
745 if (u != (unichar) (-1)) {
746 current += u.to_utf8 (null);
747 token_length_in_chars++;
748 } else {
749 current++;
750 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
754 if (current >= end || current[0] == '\n') {
755 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
756 state_stack.length--;
757 return read_token (out token_begin, out token_end);
759 state_stack += State.TEMPLATE_PART;
760 break;
764 if (token_length_in_chars < 0) {
765 column += (int) (current - begin);
766 } else {
767 column += token_length_in_chars;
770 token_end.pos = current;
771 token_end.line = line;
772 token_end.column = column - 1;
774 return type;
778 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
781 if (in_template ()) {
782 return read_template_token (out token_begin, out token_end);
783 } else if (in_template_part ()) {
784 state_stack.length--;
786 token_begin.pos = current;
787 token_begin.line = line;
788 token_begin.column = column;
790 token_end.pos = current;
791 token_end.line = line;
792 token_end.column = column - 1;
794 return TokenType.COMMA;
795 } else if (in_regex_literal ()) {
796 return read_regex_token (out token_begin, out token_end);
801 /* emit dedents if outstanding before checking any other chars */
803 if (pending_dedents > 0) {
804 pending_dedents--;
805 indent_level--;
808 token_begin.pos = current;
809 token_begin.line = line;
810 token_begin.column = column;
812 token_end.pos = current;
813 token_end.line = line;
814 token_end.column = column;
816 last_token = TokenType.DEDENT;
818 return TokenType.DEDENT;
821 if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
822 /* scrub whitespace (excluding newlines) and comments */
823 space ();
827 /* handle explicit line continuation (lines ending with "\") */
828 while (current < end && current[0] == '\\' && current[1] == '\n') {
829 current += 2;
830 line++;
831 skip_space_tabs ();
834 /* handle automatic line continuations (when inside parens or braces) */
835 while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
836 current++;
837 line++;
838 skip_space_tabs ();
842 /* handle non-consecutive new line once parsing is underway - EOL */
843 if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
844 token_begin.pos = current;
845 token_begin.line = line;
846 token_begin.column = column;
848 token_end.pos = current;
849 token_end.line = line;
850 token_end.column = column;
852 last_token = TokenType.EOL;
854 return TokenType.EOL;
858 while (skip_newlines ()) {
859 token_begin.pos = current;
860 token_begin.line = line;
861 token_begin.column = column;
863 current_indent_level = count_tabs ();
865 /* if its an empty new line then ignore */
866 if (current_indent_level == -1) {
867 continue;
870 if (current_indent_level > indent_level) {
871 indent_level = current_indent_level;
873 token_end.pos = current;
874 token_end.line = line;
875 token_end.column = column;
877 last_token = TokenType.INDENT;
879 return TokenType.INDENT;
880 } else if (current_indent_level < indent_level) {
881 indent_level--;
883 pending_dedents = (indent_level - current_indent_level);
885 token_end.pos = current;
886 token_end.line = line;
887 token_end.column = column;
889 last_token = TokenType.DEDENT;
891 return TokenType.DEDENT;
895 TokenType type;
896 char* begin = current;
897 token_begin.pos = begin;
898 token_begin.line = line;
899 token_begin.column = column;
901 int token_length_in_chars = -1;
903 parse_started = true;
905 if (current >= end) {
906 if (indent_level > 0) {
907 indent_level--;
909 pending_dedents = indent_level;
911 type = TokenType.DEDENT;
912 } else {
913 type = TokenType.EOF;
915 } else if (current[0].isalpha () || current[0] == '_') {
916 int len = 0;
917 while (current < end && is_ident_char (current[0])) {
918 current++;
919 len++;
921 type = get_identifier_or_keyword (begin, len);
922 } else if (current[0] == '@') {
923 if (current < end - 1 && current[1] == '"') {
924 type = TokenType.OPEN_TEMPLATE;
925 current += 2;
926 state_stack += State.TEMPLATE;
927 } else {
928 token_begin.pos++; // @ is not part of the identifier
929 current++;
930 int len = 0;
931 while (current < end && is_ident_char (current[0])) {
932 current++;
933 len++;
935 type = TokenType.IDENTIFIER;
937 } else if (current[0].isdigit ()) {
938 while (current < end && current[0].isdigit ()) {
939 current++;
941 type = TokenType.INTEGER_LITERAL;
942 if (current < end && current[0].tolower () == 'l') {
943 current++;
944 if (current < end && current[0].tolower () == 'l') {
945 current++;
947 } else if (current < end && current[0].tolower () == 'u') {
948 current++;
949 if (current < end && current[0].tolower () == 'l') {
950 current++;
951 if (current < end && current[0].tolower () == 'l') {
952 current++;
955 } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
956 current++;
957 while (current < end && current[0].isdigit ()) {
958 current++;
960 if (current < end && current[0].tolower () == 'e') {
961 current++;
962 if (current < end && (current[0] == '+' || current[0] == '-')) {
963 current++;
965 while (current < end && current[0].isdigit ()) {
966 current++;
969 if (current < end && current[0].tolower () == 'f') {
970 current++;
972 type = TokenType.REAL_LITERAL;
973 } else if (current < end && current == begin + 1
974 && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
975 // hexadecimal integer literal
976 current++;
977 while (current < end && current[0].isxdigit ()) {
978 current++;
980 } else if (current < end && is_ident_char (current[0])) {
981 // allow identifiers to start with a digit
982 // as long as they contain at least one char
983 while (current < end && is_ident_char (current[0])) {
984 current++;
986 type = TokenType.IDENTIFIER;
988 } else {
989 switch (current[0]) {
990 case '{':
991 type = TokenType.OPEN_BRACE;
992 open_brace_count++;
993 state_stack += State.BRACE;
994 current++;
995 break;
996 case '}':
997 type = TokenType.CLOSE_BRACE;
998 open_brace_count--;
999 state_stack.length--;
1000 current++;
1001 break;
1002 case '(':
1003 type = TokenType.OPEN_PARENS;
1004 open_parens_count++;
1005 state_stack += State.PARENS;
1006 current++;
1007 break;
1008 case ')':
1009 type = TokenType.CLOSE_PARENS;
1010 open_parens_count--;
1011 current++;
1012 state_stack.length--;
1013 if (in_template ()) {
1014 type = TokenType.COMMA;
1016 break;
1017 case '[':
1018 type = TokenType.OPEN_BRACKET;
1019 state_stack += State.BRACKET;
1020 current++;
1021 break;
1022 case ']':
1023 type = TokenType.CLOSE_BRACKET;
1024 state_stack.length--;
1025 current++;
1026 break;
1027 case '.':
1028 type = TokenType.DOT;
1029 current++;
1030 if (current < end - 1) {
1031 if (current[0] == '.' && current[1] == '.') {
1032 type = TokenType.ELLIPSIS;
1033 current += 2;
1036 break;
1037 case ':':
1038 type = TokenType.COLON;
1039 current++;
1040 break;
1041 case ',':
1042 type = TokenType.COMMA;
1043 current++;
1044 break;
1045 case ';':
1046 type = TokenType.SEMICOLON;
1047 current++;
1048 break;
1049 case '#':
1050 type = TokenType.HASH;
1051 current++;
1052 break;
1053 case '?':
1054 type = TokenType.INTERR;
1055 current++;
1056 break;
1057 case '|':
1058 type = TokenType.BITWISE_OR;
1059 current++;
1060 if (current < end) {
1061 switch (current[0]) {
1062 case '=':
1063 type = TokenType.ASSIGN_BITWISE_OR;
1064 current++;
1065 break;
1066 case '|':
1067 type = TokenType.OP_OR;
1068 current++;
1069 break;
1072 break;
1073 case '&':
1074 type = TokenType.BITWISE_AND;
1075 current++;
1076 if (current < end) {
1077 switch (current[0]) {
1078 case '=':
1079 type = TokenType.ASSIGN_BITWISE_AND;
1080 current++;
1081 break;
1082 case '&':
1083 type = TokenType.OP_AND;
1084 current++;
1085 break;
1088 break;
1089 case '^':
1090 type = TokenType.CARRET;
1091 current++;
1092 if (current < end && current[0] == '=') {
1093 type = TokenType.ASSIGN_BITWISE_XOR;
1094 current++;
1096 break;
1097 case '~':
1098 type = TokenType.TILDE;
1099 current++;
1100 break;
1101 case '=':
1102 type = TokenType.ASSIGN;
1103 current++;
1104 if (current < end) {
1105 switch (current[0]) {
1106 case '=':
1107 type = TokenType.OP_EQ;
1108 current++;
1109 break;
1110 case '>':
1111 type = TokenType.LAMBDA;
1112 current++;
1113 break;
1116 break;
1117 case '<':
1118 type = TokenType.OP_LT;
1119 current++;
1120 if (current < end) {
1121 switch (current[0]) {
1122 case '=':
1123 type = TokenType.OP_LE;
1124 current++;
1125 break;
1126 case '<':
1127 type = TokenType.OP_SHIFT_LEFT;
1128 current++;
1129 if (current < end && current[0] == '=') {
1130 type = TokenType.ASSIGN_SHIFT_LEFT;
1131 current++;
1133 break;
1136 break;
1137 case '>':
1138 type = TokenType.OP_GT;
1139 current++;
1140 if (current < end && current[0] == '=') {
1141 type = TokenType.OP_GE;
1142 current++;
1144 break;
1145 case '!':
1146 type = TokenType.OP_NEG;
1147 current++;
1148 if (current < end && current[0] == '=') {
1149 type = TokenType.OP_NE;
1150 current++;
1152 break;
1153 case '+':
1154 type = TokenType.PLUS;
1155 current++;
1156 if (current < end) {
1157 switch (current[0]) {
1158 case '=':
1159 type = TokenType.ASSIGN_ADD;
1160 current++;
1161 break;
1162 case '+':
1163 type = TokenType.OP_INC;
1164 current++;
1165 break;
1168 break;
1169 case '-':
1170 type = TokenType.MINUS;
1171 current++;
1172 if (current < end) {
1173 switch (current[0]) {
1174 case '=':
1175 type = TokenType.ASSIGN_SUB;
1176 current++;
1177 break;
1178 case '-':
1179 type = TokenType.OP_DEC;
1180 current++;
1181 break;
1182 case '>':
1183 type = TokenType.OP_PTR;
1184 current++;
1185 break;
1188 break;
1189 case '*':
1190 type = TokenType.STAR;
1191 current++;
1192 if (current < end && current[0] == '=') {
1193 type = TokenType.ASSIGN_MUL;
1194 current++;
1196 break;
1197 case '/':
1198 switch (last_token) {
1199 case TokenType.ASSIGN:
1200 case TokenType.COMMA:
1201 case TokenType.MINUS:
1202 case TokenType.OP_AND:
1203 case TokenType.OP_DEC:
1204 case TokenType.OP_EQ:
1205 case TokenType.OP_GE:
1206 case TokenType.OP_GT:
1207 case TokenType.OP_INC:
1208 case TokenType.OP_LE:
1209 case TokenType.OP_LT:
1210 case TokenType.OP_NE:
1211 case TokenType.OP_NEG:
1212 case TokenType.OP_OR:
1213 case TokenType.OPEN_BRACE:
1214 case TokenType.OPEN_PARENS:
1215 case TokenType.PLUS:
1216 case TokenType.RETURN:
1217 type = TokenType.OPEN_REGEX_LITERAL;
1218 state_stack += State.REGEX_LITERAL;
1219 current++;
1220 break;
1221 default:
1222 type = TokenType.DIV;
1223 current++;
1224 if (current < end && current[0] == '=') {
1225 type = TokenType.ASSIGN_DIV;
1226 current++;
1228 break;
1230 break;
1232 case '%':
1233 type = TokenType.PERCENT;
1234 current++;
1235 if (current < end && current[0] == '=') {
1236 type = TokenType.ASSIGN_PERCENT;
1237 current++;
1239 break;
1240 case '\'':
1241 case '"':
1242 if (begin[0] == '\'') {
1243 type = TokenType.CHARACTER_LITERAL;
1244 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1245 type = TokenType.VERBATIM_STRING_LITERAL;
1246 token_length_in_chars = 6;
1247 current += 3;
1248 while (current < end - 4) {
1249 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1250 break;
1251 } else if (current[0] == '\n') {
1252 current++;
1253 line++;
1254 column = 1;
1255 token_length_in_chars = 3;
1256 } else {
1257 unichar u = ((string) current).get_char_validated ((long) (end - current));
1258 if (u != (unichar) (-1)) {
1259 current += u.to_utf8 (null);
1260 token_length_in_chars++;
1261 } else {
1262 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1266 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1267 current += 3;
1268 } else {
1269 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
1271 break;
1272 } else {
1273 type = TokenType.STRING_LITERAL;
1275 token_length_in_chars = 2;
1276 current++;
1277 while (current < end && current[0] != begin[0]) {
1278 if (current[0] == '\\') {
1279 current++;
1280 token_length_in_chars++;
1281 if (current >= end) {
1282 break;
1285 switch (current[0]) {
1286 case '\'':
1287 case '"':
1288 case '\\':
1289 case '0':
1290 case 'b':
1291 case 'f':
1292 case 'n':
1293 case 'r':
1294 case 't':
1295 current++;
1296 token_length_in_chars++;
1297 break;
1298 case 'x':
1299 // hexadecimal escape character
1300 current++;
1301 token_length_in_chars++;
1302 while (current < end && current[0].isxdigit ()) {
1303 current++;
1304 token_length_in_chars++;
1306 break;
1307 default:
1308 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
1309 break;
1311 } else if (current[0] == '\n') {
1312 break;
1313 } else {
1314 unichar u = ((string) current).get_char_validated ((long) (end - current));
1315 if (u != (unichar) (-1)) {
1316 current += u.to_utf8 (null);
1317 token_length_in_chars++;
1318 } else {
1319 current++;
1320 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1324 if (current < end && current[0] != '\n') {
1325 current++;
1326 } else {
1327 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1329 break;
1330 default:
1331 unichar u = ((string) current).get_char_validated ((long) (end - current));
1332 if (u != (unichar) (-1)) {
1333 current += u.to_utf8 (null);
1334 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
1335 } else {
1336 current++;
1337 Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
1339 column++;
1340 last_token = TokenType.STRING_LITERAL;
1341 return read_token (out token_begin, out token_end);
1345 if (token_length_in_chars < 0) {
1346 column += (int) (current - begin);
1347 } else {
1348 column += token_length_in_chars;
1351 token_end.pos = current;
1352 token_end.line = line;
1353 token_end.column = column - 1;
1354 last_token = type;
1356 return type;
1359 int count_tabs ()
1362 int tab_count = 0;
1365 if (_indent_spaces == 0) {
1366 while (current < end && current[0] == '\t') {
1367 current++;
1368 column++;
1369 tab_count++;
1371 } else {
1372 int space_count = 0;
1373 while (current < end && current[0] == ' ') {
1374 current++;
1375 column++;
1376 space_count++;
1379 tab_count = space_count / _indent_spaces;
1383 /* ignore comments and whitspace and other lines that contain no code */
1385 space ();
1387 if ((current < end) && (current[0] == '\n')) return -1;
1389 return tab_count;
1392 bool matches (char* begin, string keyword) {
1393 char* keyword_array = (char *) keyword;
1394 long len = keyword.len ();
1395 for (int i = 0; i < len; i++) {
1396 if (begin[i] != keyword_array[i]) {
1397 return false;
1400 return true;
1403 bool whitespace () {
1404 bool found = false;
1405 while (current < end && current[0].isspace () && current[0] != '\n' ) {
1407 found = true;
1408 current++;
1409 column++;
1412 if ((column == 1) && (current < end) && (current[0] == '#')) {
1413 pp_directive ();
1414 return true;
1417 return found;
1420 inline bool newline () {
1421 if (current[0] == '\n') {
1422 return true;
1425 return false;
1428 bool skip_newlines () {
1429 bool new_lines = false;
1431 while (newline ()) {
1432 current++;
1434 line++;
1435 column = 1;
1436 current_indent_level = 0;
1438 new_lines = true;
1441 return new_lines;
1444 bool comment (bool file_comment = false) {
1445 if (current > end - 2
1446 || current[0] != '/'
1447 || (current[1] != '/' && current[1] != '*')) {
1448 return false;
1452 if (current[1] == '/') {
1453 // single-line comment
1455 SourceReference source_reference = null;
1456 if (file_comment) {
1457 source_reference = new SourceReference (source_file, line, column, line, column);
1460 current += 2;
1462 // skip until end of line or end of file
1463 while (current < end && current[0] != '\n') {
1464 current++;
1467 /* do not ignore EOL if comment does not exclusively occupy the line */
1468 if (current[0] == '\n' && last_token == TokenType.EOL) {
1469 current++;
1470 line++;
1471 column = 1;
1472 current_indent_level = 0;
1475 if (source_reference != null) {
1476 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1479 } else {
1480 // delimited comment
1481 SourceReference source_reference = null;
1482 if (file_comment && current[2] == '*') {
1483 return false;
1486 if (current[2] == '*' || file_comment) {
1487 source_reference = new SourceReference (source_file, line, column, line, column);
1490 current += 2;
1491 char* begin = current;
1493 while (current < end - 1
1494 && (current[0] != '*' || current[1] != '/')) {
1495 if (current[0] == '\n') {
1496 line++;
1497 column = 0;
1499 current++;
1500 column++;
1502 if (current == end - 1) {
1503 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
1504 return true;
1507 if (source_reference != null) {
1508 string comment = ((string) begin).ndup ((long) (current - begin));
1509 push_comment (comment, source_reference, file_comment);
1512 current += 2;
1513 column += 2;
1516 return true;
1519 bool skip_tabs () {
1520 bool found = false;
1521 while (current < end && current[0] == '\t' ) {
1522 current++;
1523 column++;
1524 found = true;
1527 return found;
1530 void skip_space_tabs () {
1531 while (whitespace () || skip_tabs () || comment () ) {
1536 void space () {
1537 while (whitespace () || comment ()) {
1541 public void parse_file_comments () {
1542 while (whitespace () || comment (true)) {
1547 void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1548 if (comment_item[0] == '*') {
1549 _comment = new Comment (comment_item, source_reference);
1552 if (file_comment) {
1553 source_file.add_comment (new Comment (comment_item, source_reference));
1554 _comment = null;
1559 * Clears and returns the content of the comment stack.
1561 * @return saved comment
1563 public Comment? pop_comment () {
1564 if (_comment == null) {
1565 return null;
1568 var comment = _comment;
1569 _comment = null;
1570 return comment;
1573 bool pp_whitespace () {
1574 bool found = false;
1575 while (current < end && current[0].isspace () && current[0] != '\n') {
1576 found = true;
1577 current++;
1578 column++;
1580 return found;
1583 void pp_directive () {
1584 // hash sign
1585 current++;
1586 column++;
1588 pp_whitespace ();
1590 char* begin = current;
1591 int len = 0;
1592 while (current < end && current[0].isalnum ()) {
1593 current++;
1594 column++;
1595 len++;
1598 if (len == 2 && matches (begin, "if")) {
1599 parse_pp_if ();
1600 } else if (len == 4 && matches (begin, "elif")) {
1601 parse_pp_elif ();
1602 } else if (len == 4 && matches (begin, "else")) {
1603 parse_pp_else ();
1604 } else if (len == 5 && matches (begin, "endif")) {
1605 parse_pp_endif ();
1606 } else {
1607 Report.error (new SourceReference (source_file, line, column - len, line, column), "syntax error, invalid preprocessing directive");
1610 if (conditional_stack.length > 0
1611 && conditional_stack[conditional_stack.length - 1].skip_section) {
1612 // skip lines until next preprocessing directive
1613 bool bol = false;
1614 while (current < end) {
1615 if (bol && current[0] == '#') {
1616 // go back to begin of line
1617 current -= (column - 1);
1618 column = 1;
1619 return;
1621 if (current[0] == '\n') {
1622 line++;
1623 column = 0;
1624 bol = true;
1625 } else if (!current[0].isspace ()) {
1626 bol = false;
1628 current++;
1629 column++;
1634 void pp_eol () {
1635 pp_whitespace ();
1636 if (current >= end || current[0] != '\n') {
1637 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected newline");
1641 void parse_pp_if () {
1642 pp_whitespace ();
1644 bool condition = parse_pp_expression ();
1646 pp_eol ();
1648 conditional_stack += Conditional ();
1650 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1651 // condition true => process code within if
1652 conditional_stack[conditional_stack.length - 1].matched = true;
1653 } else {
1654 // skip lines until next preprocessing directive
1655 conditional_stack[conditional_stack.length - 1].skip_section = true;
1659 void parse_pp_elif () {
1660 pp_whitespace ();
1662 bool condition = parse_pp_expression ();
1664 pp_eol ();
1666 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1667 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #elif");
1668 return;
1671 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1672 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1673 // condition true => process code within if
1674 conditional_stack[conditional_stack.length - 1].matched = true;
1675 conditional_stack[conditional_stack.length - 1].skip_section = false;
1676 } else {
1677 // skip lines until next preprocessing directive
1678 conditional_stack[conditional_stack.length - 1].skip_section = true;
1682 void parse_pp_else () {
1683 pp_eol ();
1685 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1686 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #else");
1687 return;
1690 if (!conditional_stack[conditional_stack.length - 1].matched
1691 && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1692 // condition true => process code within if
1693 conditional_stack[conditional_stack.length - 1].matched = true;
1694 conditional_stack[conditional_stack.length - 1].skip_section = false;
1695 } else {
1696 // skip lines until next preprocessing directive
1697 conditional_stack[conditional_stack.length - 1].skip_section = true;
1701 void parse_pp_endif () {
1702 pp_eol ();
1704 if (conditional_stack.length == 0) {
1705 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #endif");
1706 return;
1709 conditional_stack.length--;
1712 bool parse_pp_symbol () {
1713 int len = 0;
1714 while (current < end && is_ident_char (current[0])) {
1715 current++;
1716 column++;
1717 len++;
1720 if (len == 0) {
1721 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1722 return false;
1725 string identifier = ((string) (current - len)).ndup (len);
1726 bool defined;
1727 if (identifier == "true") {
1728 defined = true;
1729 } else if (identifier == "false") {
1730 defined = false;
1731 } else {
1732 defined = source_file.context.is_defined (identifier);
1735 return defined;
1738 bool parse_pp_primary_expression () {
1739 if (current >= end) {
1740 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1741 } else if (is_ident_char (current[0])) {
1742 return parse_pp_symbol ();
1743 } else if (current[0] == '(') {
1744 current++;
1745 column++;
1746 pp_whitespace ();
1747 bool result = parse_pp_expression ();
1748 pp_whitespace ();
1749 if (current < end && current[0] == ')') {
1750 current++;
1751 column++;
1752 } else {
1753 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected `)'");
1755 return result;
1756 } else {
1757 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1759 return false;
1762 bool parse_pp_unary_expression () {
1763 if (current < end && current[0] == '!') {
1764 current++;
1765 column++;
1766 pp_whitespace ();
1767 return !parse_pp_unary_expression ();
1770 return parse_pp_primary_expression ();
1773 bool parse_pp_equality_expression () {
1774 bool left = parse_pp_unary_expression ();
1775 pp_whitespace ();
1776 while (true) {
1777 if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1778 current += 2;
1779 column += 2;
1780 pp_whitespace ();
1781 bool right = parse_pp_unary_expression ();
1782 left = (left == right);
1783 } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1784 current += 2;
1785 column += 2;
1786 pp_whitespace ();
1787 bool right = parse_pp_unary_expression ();
1788 left = (left != right);
1789 } else {
1790 break;
1793 return left;
1796 bool parse_pp_and_expression () {
1797 bool left = parse_pp_equality_expression ();
1798 pp_whitespace ();
1799 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1800 current += 2;
1801 column += 2;
1802 pp_whitespace ();
1803 bool right = parse_pp_equality_expression ();
1804 left = left && right;
1806 return left;
1809 bool parse_pp_or_expression () {
1810 bool left = parse_pp_and_expression ();
1811 pp_whitespace ();
1812 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1813 current += 2;
1814 column += 2;
1815 pp_whitespace ();
1816 bool right = parse_pp_and_expression ();
1817 left = left || right;
1819 return left;
1822 bool parse_pp_expression () {
1823 return parse_pp_or_expression ();