Add `owned' type modifier and `(owned)' cast to replace `#', add `unowned'
[vala-lang.git] / vala / valascanner.vala
blob390f8161298514d32346a83dbb2717fbd07b869b
1 /* valascanner.vala
3 * Copyright (C) 2008 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
23 using GLib;
24 using Gee;
26 /**
27 * Lexical scanner for Vala source files.
29 public class Vala.Scanner {
30 public SourceFile source_file { get; private set; }
32 char* current;
33 char* end;
35 int line;
36 int column;
38 string _comment;
40 public Scanner (SourceFile source_file) {
41 this.source_file = source_file;
43 char* begin = source_file.get_mapped_contents ();
44 end = begin + source_file.get_mapped_length ();
46 current = begin;
48 line = 1;
49 column = 1;
52 bool is_ident_char (char c) {
53 return (c.isalnum () || c == '_');
56 TokenType get_identifier_or_keyword (char* begin, int len) {
57 switch (len) {
58 case 2:
59 switch (begin[0]) {
60 case 'a':
61 if (matches (begin, "as")) return TokenType.AS;
62 break;
63 case 'd':
64 if (matches (begin, "do")) return TokenType.DO;
65 break;
66 case 'i':
67 switch (begin[1]) {
68 case 'f':
69 return TokenType.IF;
70 case 'n':
71 return TokenType.IN;
72 case 's':
73 return TokenType.IS;
75 break;
77 break;
78 case 3:
79 switch (begin[0]) {
80 case 'f':
81 if (matches (begin, "for")) return TokenType.FOR;
82 break;
83 case 'g':
84 if (matches (begin, "get")) return TokenType.GET;
85 break;
86 case 'n':
87 if (matches (begin, "new")) return TokenType.NEW;
88 break;
89 case 'o':
90 if (matches (begin, "out")) return TokenType.OUT;
91 break;
92 case 'r':
93 if (matches (begin, "ref")) return TokenType.REF;
94 break;
95 case 's':
96 if (matches (begin, "set")) return TokenType.SET;
97 break;
98 case 't':
99 if (matches (begin, "try")) return TokenType.TRY;
100 break;
101 case 'v':
102 if (matches (begin, "var")) return TokenType.VAR;
103 break;
105 break;
106 case 4:
107 switch (begin[0]) {
108 case 'b':
109 if (matches (begin, "base")) return TokenType.BASE;
110 break;
111 case 'c':
112 if (matches (begin, "case")) return TokenType.CASE;
113 break;
114 case 'e':
115 switch (begin[1]) {
116 case 'l':
117 if (matches (begin, "else")) return TokenType.ELSE;
118 break;
119 case 'n':
120 if (matches (begin, "enum")) return TokenType.ENUM;
121 break;
123 break;
124 case 'l':
125 if (matches (begin, "lock")) return TokenType.LOCK;
126 break;
127 case 'n':
128 if (matches (begin, "null")) return TokenType.NULL;
129 break;
130 case 't':
131 switch (begin[1]) {
132 case 'h':
133 if (matches (begin, "this")) return TokenType.THIS;
134 break;
135 case 'r':
136 if (matches (begin, "true")) return TokenType.TRUE;
137 break;
139 break;
140 case 'v':
141 if (matches (begin, "void")) return TokenType.VOID;
142 break;
143 case 'w':
144 if (matches (begin, "weak")) return TokenType.WEAK;
145 break;
147 break;
148 case 5:
149 switch (begin[0]) {
150 case 'b':
151 if (matches (begin, "break")) return TokenType.BREAK;
152 break;
153 case 'c':
154 switch (begin[1]) {
155 case 'a':
156 if (matches (begin, "catch")) return TokenType.CATCH;
157 break;
158 case 'l':
159 if (matches (begin, "class")) return TokenType.CLASS;
160 break;
161 case 'o':
162 if (matches (begin, "const")) return TokenType.CONST;
163 break;
165 break;
166 case 'f':
167 if (matches (begin, "false")) return TokenType.FALSE;
168 break;
169 case 'o':
170 if (matches (begin, "owned")) return TokenType.OWNED;
171 break;
172 case 't':
173 if (matches (begin, "throw")) return TokenType.THROW;
174 break;
175 case 'u':
176 if (matches (begin, "using")) return TokenType.USING;
177 break;
178 case 'w':
179 if (matches (begin, "while")) return TokenType.WHILE;
180 break;
181 case 'y':
182 if (matches (begin, "yield")) return TokenType.YIELD;
183 break;
185 break;
186 case 6:
187 switch (begin[0]) {
188 case 'd':
189 if (matches (begin, "delete")) return TokenType.DELETE;
190 break;
191 case 'e':
192 if (matches (begin, "extern")) return TokenType.EXTERN;
193 break;
194 case 'i':
195 if (matches (begin, "inline")) return TokenType.INLINE;
196 break;
197 case 'p':
198 switch (begin[1]) {
199 case 'a':
200 if (matches (begin, "params")) return TokenType.PARAMS;
201 break;
202 case 'u':
203 if (matches (begin, "public")) return TokenType.PUBLIC;
204 break;
206 break;
207 case 'r':
208 if (matches (begin, "return")) return TokenType.RETURN;
209 break;
210 case 's':
211 switch (begin[1]) {
212 case 'i':
213 switch (begin[2]) {
214 case 'g':
215 if (matches (begin, "signal")) return TokenType.SIGNAL;
216 break;
217 case 'z':
218 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
219 break;
221 break;
222 case 't':
223 switch (begin[2]) {
224 case 'a':
225 if (matches (begin, "static")) return TokenType.STATIC;
226 break;
227 case 'r':
228 if (matches (begin, "struct")) return TokenType.STRUCT;
229 break;
231 break;
232 case 'w':
233 if (matches (begin, "switch")) return TokenType.SWITCH;
234 break;
236 break;
237 case 't':
238 switch (begin[1]) {
239 case 'h':
240 if (matches (begin, "throws")) return TokenType.THROWS;
241 break;
242 case 'y':
243 if (matches (begin, "typeof")) return TokenType.TYPEOF;
244 break;
246 break;
247 case 'y':
248 if (matches (begin, "yields")) return TokenType.YIELDS;
249 break;
251 break;
252 case 7:
253 switch (begin[0]) {
254 case 'd':
255 switch (begin[1]) {
256 case 'e':
257 if (matches (begin, "default")) return TokenType.DEFAULT;
258 break;
259 case 'y':
260 if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
261 break;
263 break;
264 case 'e':
265 if (matches (begin, "ensures")) return TokenType.ENSURES;
266 break;
267 case 'f':
268 switch (begin[1]) {
269 case 'i':
270 if (matches (begin, "finally")) return TokenType.FINALLY;
271 break;
272 case 'o':
273 if (matches (begin, "foreach")) return TokenType.FOREACH;
274 break;
276 break;
277 case 'p':
278 if (matches (begin, "private")) return TokenType.PRIVATE;
279 break;
280 case 'u':
281 if (matches (begin, "unowned")) return TokenType.UNOWNED;
282 break;
283 case 'v':
284 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
285 break;
287 break;
288 case 8:
289 switch (begin[0]) {
290 case 'a':
291 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
292 break;
293 case 'c':
294 if (matches (begin, "continue")) return TokenType.CONTINUE;
295 break;
296 case 'd':
297 if (matches (begin, "delegate")) return TokenType.DELEGATE;
298 break;
299 case 'i':
300 if (matches (begin, "internal")) return TokenType.INTERNAL;
301 break;
302 case 'o':
303 if (matches (begin, "override")) return TokenType.OVERRIDE;
304 break;
305 case 'r':
306 if (matches (begin, "requires")) return TokenType.REQUIRES;
307 break;
308 case 'v':
309 if (matches (begin, "volatile")) return TokenType.VOLATILE;
310 break;
312 break;
313 case 9:
314 switch (begin[0]) {
315 case 'c':
316 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
317 break;
318 case 'i':
319 if (matches (begin, "interface")) return TokenType.INTERFACE;
320 break;
321 case 'n':
322 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
323 break;
324 case 'p':
325 if (matches (begin, "protected")) return TokenType.PROTECTED;
326 break;
328 break;
329 case 11:
330 if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
331 break;
333 return TokenType.IDENTIFIER;
336 public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
337 space ();
339 TokenType type;
340 char* begin = current;
341 token_begin.pos = begin;
342 token_begin.line = line;
343 token_begin.column = column;
345 int token_length_in_chars = -1;
347 if (current >= end) {
348 type = TokenType.EOF;
349 } else if (current[0].isalpha () || current[0] == '_') {
350 int len = 0;
351 while (current < end && is_ident_char (current[0])) {
352 current++;
353 len++;
355 type = get_identifier_or_keyword (begin, len);
356 } else if (current[0] == '@') {
357 token_begin.pos++; // @ is not part of the identifier
358 current++;
359 int len = 0;
360 while (current < end && is_ident_char (current[0])) {
361 current++;
362 len++;
364 type = TokenType.IDENTIFIER;
365 } else if (current[0].isdigit ()) {
366 while (current < end && current[0].isdigit ()) {
367 current++;
369 type = TokenType.INTEGER_LITERAL;
370 if (current < end && current[0].tolower () == 'l') {
371 current++;
372 if (current < end && current[0].tolower () == 'l') {
373 current++;
375 } else if (current < end && current[0].tolower () == 'u') {
376 current++;
377 if (current < end && current[0].tolower () == 'l') {
378 current++;
379 if (current < end && current[0].tolower () == 'l') {
380 current++;
383 } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
384 current++;
385 while (current < end && current[0].isdigit ()) {
386 current++;
388 if (current < end && current[0].tolower () == 'e') {
389 current++;
390 if (current < end && (current[0] == '+' || current[0] == '-')) {
391 current++;
393 while (current < end && current[0].isdigit ()) {
394 current++;
397 if (current < end && current[0].tolower () == 'f') {
398 current++;
400 type = TokenType.REAL_LITERAL;
401 } else if (current < end && current == begin + 1
402 && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
403 // hexadecimal integer literal
404 current++;
405 while (current < end && current[0].isxdigit ()) {
406 current++;
408 } else if (current < end && is_ident_char (current[0])) {
409 // allow identifiers to start with a digit
410 // as long as they contain at least one char
411 while (current < end && is_ident_char (current[0])) {
412 current++;
414 type = TokenType.IDENTIFIER;
416 } else {
417 switch (current[0]) {
418 case '{':
419 type = TokenType.OPEN_BRACE;
420 current++;
421 break;
422 case '}':
423 type = TokenType.CLOSE_BRACE;
424 current++;
425 break;
426 case '(':
427 type = TokenType.OPEN_PARENS;
428 current++;
429 break;
430 case ')':
431 type = TokenType.CLOSE_PARENS;
432 current++;
433 break;
434 case '[':
435 type = TokenType.OPEN_BRACKET;
436 current++;
437 break;
438 case ']':
439 type = TokenType.CLOSE_BRACKET;
440 current++;
441 break;
442 case '.':
443 type = TokenType.DOT;
444 current++;
445 if (current < end - 1) {
446 if (current[0] == '.' && current[1] == '.') {
447 type = TokenType.ELLIPSIS;
448 current += 2;
451 break;
452 case ':':
453 type = TokenType.COLON;
454 current++;
455 if (current < end && current[0] == ':') {
456 type = TokenType.DOUBLE_COLON;
457 current++;
459 break;
460 case ',':
461 type = TokenType.COMMA;
462 current++;
463 break;
464 case ';':
465 type = TokenType.SEMICOLON;
466 current++;
467 break;
468 case '#':
469 type = TokenType.HASH;
470 current++;
471 break;
472 case '?':
473 type = TokenType.INTERR;
474 current++;
475 break;
476 case '|':
477 type = TokenType.BITWISE_OR;
478 current++;
479 if (current < end) {
480 switch (current[0]) {
481 case '=':
482 type = TokenType.ASSIGN_BITWISE_OR;
483 current++;
484 break;
485 case '|':
486 type = TokenType.OP_OR;
487 current++;
488 break;
491 break;
492 case '&':
493 type = TokenType.BITWISE_AND;
494 current++;
495 if (current < end) {
496 switch (current[0]) {
497 case '=':
498 type = TokenType.ASSIGN_BITWISE_AND;
499 current++;
500 break;
501 case '&':
502 type = TokenType.OP_AND;
503 current++;
504 break;
507 break;
508 case '^':
509 type = TokenType.CARRET;
510 current++;
511 if (current < end && current[0] == '=') {
512 type = TokenType.ASSIGN_BITWISE_XOR;
513 current++;
515 break;
516 case '~':
517 type = TokenType.TILDE;
518 current++;
519 break;
520 case '=':
521 type = TokenType.ASSIGN;
522 current++;
523 if (current < end) {
524 switch (current[0]) {
525 case '=':
526 type = TokenType.OP_EQ;
527 current++;
528 break;
529 case '>':
530 type = TokenType.LAMBDA;
531 current++;
532 break;
535 break;
536 case '<':
537 type = TokenType.OP_LT;
538 current++;
539 if (current < end) {
540 switch (current[0]) {
541 case '=':
542 type = TokenType.OP_LE;
543 current++;
544 break;
545 case '<':
546 type = TokenType.OP_SHIFT_LEFT;
547 current++;
548 if (current < end && current[0] == '=') {
549 type = TokenType.ASSIGN_SHIFT_LEFT;
550 current++;
552 break;
555 break;
556 case '>':
557 type = TokenType.OP_GT;
558 current++;
559 if (current < end && current[0] == '=') {
560 type = TokenType.OP_GE;
561 current++;
563 break;
564 case '!':
565 type = TokenType.OP_NEG;
566 current++;
567 if (current < end && current[0] == '=') {
568 type = TokenType.OP_NE;
569 current++;
571 break;
572 case '+':
573 type = TokenType.PLUS;
574 current++;
575 if (current < end) {
576 switch (current[0]) {
577 case '=':
578 type = TokenType.ASSIGN_ADD;
579 current++;
580 break;
581 case '+':
582 type = TokenType.OP_INC;
583 current++;
584 break;
587 break;
588 case '-':
589 type = TokenType.MINUS;
590 current++;
591 if (current < end) {
592 switch (current[0]) {
593 case '=':
594 type = TokenType.ASSIGN_SUB;
595 current++;
596 break;
597 case '-':
598 type = TokenType.OP_DEC;
599 current++;
600 break;
601 case '>':
602 type = TokenType.OP_PTR;
603 current++;
604 break;
607 break;
608 case '*':
609 type = TokenType.STAR;
610 current++;
611 if (current < end && current[0] == '=') {
612 type = TokenType.ASSIGN_MUL;
613 current++;
615 break;
616 case '/':
617 type = TokenType.DIV;
618 current++;
619 if (current < end && current[0] == '=') {
620 type = TokenType.ASSIGN_DIV;
621 current++;
623 break;
624 case '%':
625 type = TokenType.PERCENT;
626 current++;
627 if (current < end && current[0] == '=') {
628 type = TokenType.ASSIGN_PERCENT;
629 current++;
631 break;
632 case '\'':
633 case '"':
634 if (begin[0] == '\'') {
635 type = TokenType.CHARACTER_LITERAL;
636 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
637 type = TokenType.VERBATIM_STRING_LITERAL;
638 token_length_in_chars = 6;
639 current += 3;
640 while (current < end - 4) {
641 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
642 break;
643 } else if (current[0] == '\n') {
644 current++;
645 line++;
646 column = 1;
647 token_length_in_chars = 3;
648 } else {
649 unichar u = ((string) current).get_char_validated ((long) (end - current));
650 if (u != (unichar) (-1)) {
651 current += u.to_utf8 (null);
652 token_length_in_chars++;
653 } else {
654 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
658 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
659 current += 3;
660 } else {
661 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
663 break;
664 } else {
665 type = TokenType.STRING_LITERAL;
667 token_length_in_chars = 2;
668 current++;
669 while (current < end && current[0] != begin[0]) {
670 if (current[0] == '\\') {
671 current++;
672 token_length_in_chars++;
673 if (current < end && current[0] == 'x') {
674 // hexadecimal escape character
675 current++;
676 token_length_in_chars++;
677 while (current < end && current[0].isxdigit ()) {
678 current++;
679 token_length_in_chars++;
681 } else {
682 current++;
683 token_length_in_chars++;
685 } else if (current[0] == '\n') {
686 break;
687 } else {
688 unichar u = ((string) current).get_char_validated ((long) (end - current));
689 if (u != (unichar) (-1)) {
690 current += u.to_utf8 (null);
691 token_length_in_chars++;
692 } else {
693 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
697 if (current < end && current[0] != '\n') {
698 current++;
699 } else {
700 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
702 break;
703 default:
704 unichar u = ((string) current).get_char_validated ((long) (end - current));
705 if (u != (unichar) (-1)) {
706 current += u.to_utf8 (null);
707 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
708 } else {
709 current++;
710 Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
712 column++;
713 return read_token (out token_begin, out token_end);
717 if (token_length_in_chars < 0) {
718 column += (int) (current - begin);
719 } else {
720 column += token_length_in_chars;
723 token_end.pos = current;
724 token_end.line = line;
725 token_end.column = column - 1;
727 return type;
730 bool matches (char* begin, string keyword) {
731 char* keyword_array = keyword;
732 long len = keyword.len ();
733 for (int i = 0; i < len; i++) {
734 if (begin[i] != keyword_array[i]) {
735 return false;
738 return true;
741 bool whitespace () {
742 bool found = false;
743 while (current < end && current[0].isspace ()) {
744 if (current[0] == '\n') {
745 line++;
746 column = 0;
748 found = true;
749 current++;
750 column++;
752 return found;
755 bool comment () {
756 if (current > end - 2
757 || current[0] != '/'
758 || (current[1] != '/' && current[1] != '*')) {
759 return false;
762 if (current[1] == '/') {
763 // single-line comment
764 current += 2;
765 char* begin = current;
766 // skip until end of line or end of file
767 while (current < end && current[0] != '\n') {
768 current++;
770 push_comment (((string) begin).ndup ((long) (current - begin)), line == 1);
771 } else {
772 // delimited comment
773 current += 2;
774 char* begin = current;
775 int begin_line = line;
776 while (current < end - 1
777 && (current[0] != '*' || current[1] != '/')) {
778 if (current[0] == '\n') {
779 line++;
780 column = 0;
782 current++;
783 column++;
785 if (current == end - 1) {
786 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
787 return true;
789 push_comment (((string) begin).ndup ((long) (current - begin)), begin_line == 1);
790 current += 2;
791 column += 2;
794 return true;
797 void space () {
798 while (whitespace () || comment ()) {
802 void push_comment (string comment_item, bool file_comment) {
803 if (_comment == null) {
804 _comment = comment_item;
805 } else {
806 _comment = "%s\n%s".printf (_comment, comment_item);
808 if (file_comment) {
809 source_file.comment = _comment;
810 _comment = null;
815 * Clears and returns the content of the comment stack.
817 * @return saved comment
819 public string? pop_comment () {
820 if (_comment == null) {
821 return null;
824 var result = new StringBuilder (_comment);
825 _comment = null;
827 weak string index;
828 while ((index = result.str.chr (-1, '\t')) != null) {
829 result.erase (result.str.pointer_to_offset (index), 1);
832 return result.str;