3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Jürg Billeter <j@bitron.ch>
27 * Lexical scanner for Vala source files.
29 public class Vala
.Scanner
{
30 public SourceFile source_file
{ get; private set; }
40 Conditional
[] conditional_stack
;
44 public bool else_found
;
45 public bool skip_section
;
48 public Scanner (SourceFile source_file
) {
49 this
.source_file
= source_file
;
51 char* begin
= source_file
.get_mapped_contents ();
52 end
= begin
+ source_file
.get_mapped_length ();
60 bool is_ident_char (char c
) {
61 return (c
.isalnum () || c
== '_');
64 public static TokenType
get_identifier_or_keyword (char* begin
, int len
) {
69 if (matches (begin
, "as")) return TokenType
.AS
;
72 if (matches (begin
, "do")) return TokenType
.DO
;
89 if (matches (begin
, "for")) return TokenType
.FOR
;
92 if (matches (begin
, "get")) return TokenType
.GET
;
95 if (matches (begin
, "new")) return TokenType
.NEW
;
98 if (matches (begin
, "out")) return TokenType
.OUT
;
101 if (matches (begin
, "ref")) return TokenType
.REF
;
104 if (matches (begin
, "set")) return TokenType
.SET
;
107 if (matches (begin
, "try")) return TokenType
.TRY
;
110 if (matches (begin
, "var")) return TokenType
.VAR
;
117 if (matches (begin
, "base")) return TokenType
.BASE
;
120 if (matches (begin
, "case")) return TokenType
.CASE
;
125 if (matches (begin
, "else")) return TokenType
.ELSE
;
128 if (matches (begin
, "enum")) return TokenType
.ENUM
;
133 if (matches (begin
, "lock")) return TokenType
.LOCK
;
136 if (matches (begin
, "null")) return TokenType
.NULL
;
141 if (matches (begin
, "this")) return TokenType
.THIS
;
144 if (matches (begin
, "true")) return TokenType
.TRUE
;
149 if (matches (begin
, "void")) return TokenType
.VOID
;
152 if (matches (begin
, "weak")) return TokenType
.WEAK
;
159 if (matches (begin
, "async")) return TokenType
.ASYNC
;
162 if (matches (begin
, "break")) return TokenType
.BREAK
;
167 if (matches (begin
, "catch")) return TokenType
.CATCH
;
170 if (matches (begin
, "class")) return TokenType
.CLASS
;
173 if (matches (begin
, "const")) return TokenType
.CONST
;
178 if (matches (begin
, "false")) return TokenType
.FALSE
;
181 if (matches (begin
, "owned")) return TokenType
.OWNED
;
184 if (matches (begin
, "throw")) return TokenType
.THROW
;
187 if (matches (begin
, "using")) return TokenType
.USING
;
190 if (matches (begin
, "while")) return TokenType
.WHILE
;
193 if (matches (begin
, "yield")) return TokenType
.YIELD
;
200 if (matches (begin
, "delete")) return TokenType
.DELETE
;
203 if (matches (begin
, "extern")) return TokenType
.EXTERN
;
206 if (matches (begin
, "inline")) return TokenType
.INLINE
;
211 if (matches (begin
, "params")) return TokenType
.PARAMS
;
214 if (matches (begin
, "public")) return TokenType
.PUBLIC
;
219 if (matches (begin
, "return")) return TokenType
.RETURN
;
226 if (matches (begin
, "signal")) return TokenType
.SIGNAL
;
229 if (matches (begin
, "sizeof")) return TokenType
.SIZEOF
;
236 if (matches (begin
, "static")) return TokenType
.STATIC
;
239 if (matches (begin
, "struct")) return TokenType
.STRUCT
;
244 if (matches (begin
, "switch")) return TokenType
.SWITCH
;
251 if (matches (begin
, "throws")) return TokenType
.THROWS
;
254 if (matches (begin
, "typeof")) return TokenType
.TYPEOF
;
265 if (matches (begin
, "default")) return TokenType
.DEFAULT
;
268 if (matches (begin
, "dynamic")) return TokenType
.DYNAMIC
;
273 if (matches (begin
, "ensures")) return TokenType
.ENSURES
;
278 if (matches (begin
, "finally")) return TokenType
.FINALLY
;
281 if (matches (begin
, "foreach")) return TokenType
.FOREACH
;
286 if (matches (begin
, "private")) return TokenType
.PRIVATE
;
289 if (matches (begin
, "unowned")) return TokenType
.UNOWNED
;
292 if (matches (begin
, "virtual")) return TokenType
.VIRTUAL
;
299 if (matches (begin
, "abstract")) return TokenType
.ABSTRACT
;
302 if (matches (begin
, "continue")) return TokenType
.CONTINUE
;
305 if (matches (begin
, "delegate")) return TokenType
.DELEGATE
;
308 if (matches (begin
, "internal")) return TokenType
.INTERNAL
;
311 if (matches (begin
, "override")) return TokenType
.OVERRIDE
;
314 if (matches (begin
, "requires")) return TokenType
.REQUIRES
;
317 if (matches (begin
, "volatile")) return TokenType
.VOLATILE
;
324 if (matches (begin
, "construct")) return TokenType
.CONSTRUCT
;
327 if (matches (begin
, "interface")) return TokenType
.INTERFACE
;
330 if (matches (begin
, "namespace")) return TokenType
.NAMESPACE
;
333 if (matches (begin
, "protected")) return TokenType
.PROTECTED
;
338 if (matches (begin
, "errordomain")) return TokenType
.ERRORDOMAIN
;
341 return TokenType
.IDENTIFIER
;
344 TokenType
read_number () {
345 var type
= TokenType
.INTEGER_LITERAL
;
348 if (current
< end
- 2 && current
[0] == '0'
349 && current
[1] == 'x' && current
[2].isxdigit ()) {
350 // hexadecimal integer literal
352 while (current
< end
&& current
[0].isxdigit ()) {
357 while (current
< end
&& current
[0].isdigit ()) {
363 if (current
< end
- 1 && current
[0] == '.' && current
[1].isdigit ()) {
364 type
= TokenType
.REAL_LITERAL
;
366 while (current
< end
&& current
[0].isdigit ()) {
372 if (current
< end
&& current
[0].tolower () == 'e') {
373 type
= TokenType
.REAL_LITERAL
;
375 if (current
< end
&& (current
[0] == '+' || current
[0] == '-')) {
378 while (current
< end
&& current
[0].isdigit ()) {
385 bool real_literal
= (type
== TokenType
.REAL_LITERAL
);
387 switch (current
[0]) {
390 if (type
== TokenType
.INTEGER_LITERAL
) {
392 if (current
< end
&& current
[0].tolower () == 'l') {
399 if (type
== TokenType
.INTEGER_LITERAL
) {
401 if (current
< end
&& current
[0].tolower () == 'l') {
403 if (current
< end
&& current
[0].tolower () == 'l') {
413 type
= TokenType
.REAL_LITERAL
;
418 if (!real_literal
&& is_ident_char (current
[0])) {
419 // allow identifiers to start with a digit
420 // as long as they contain at least one char
421 while (current
< end
&& is_ident_char (current
[0])) {
424 type
= TokenType
.IDENTIFIER
;
431 public TokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
435 char* begin
= current
;
436 token_begin
.pos
= begin
;
437 token_begin
.line
= line
;
438 token_begin
.column
= column
;
440 int token_length_in_chars
= -1;
442 if (current
>= end
) {
443 type
= TokenType
.EOF
;
444 } else if (current
[0].isalpha () || current
[0] == '_') {
446 while (current
< end
&& is_ident_char (current
[0])) {
450 type
= get_identifier_or_keyword (begin
, len
);
451 } else if (current
[0] == '@') {
452 token_begin
.pos
++; // @ is not part of the identifier
455 while (current
< end
&& is_ident_char (current
[0])) {
459 type
= TokenType
.IDENTIFIER
;
460 } else if (current
[0].isdigit ()) {
461 type
= read_number ();
463 switch (current
[0]) {
465 type
= TokenType
.OPEN_BRACE
;
469 type
= TokenType
.CLOSE_BRACE
;
473 type
= TokenType
.OPEN_PARENS
;
477 type
= TokenType
.CLOSE_PARENS
;
481 type
= TokenType
.OPEN_BRACKET
;
485 type
= TokenType
.CLOSE_BRACKET
;
489 type
= TokenType
.DOT
;
491 if (current
< end
- 1) {
492 if (current
[0] == '.' && current
[1] == '.') {
493 type
= TokenType
.ELLIPSIS
;
499 type
= TokenType
.COLON
;
501 if (current
< end
&& current
[0] == ':') {
502 type
= TokenType
.DOUBLE_COLON
;
507 type
= TokenType
.COMMA
;
511 type
= TokenType
.SEMICOLON
;
515 type
= TokenType
.HASH
;
519 type
= TokenType
.INTERR
;
523 type
= TokenType
.BITWISE_OR
;
526 switch (current
[0]) {
528 type
= TokenType
.ASSIGN_BITWISE_OR
;
532 type
= TokenType
.OP_OR
;
539 type
= TokenType
.BITWISE_AND
;
542 switch (current
[0]) {
544 type
= TokenType
.ASSIGN_BITWISE_AND
;
548 type
= TokenType
.OP_AND
;
555 type
= TokenType
.CARRET
;
557 if (current
< end
&& current
[0] == '=') {
558 type
= TokenType
.ASSIGN_BITWISE_XOR
;
563 type
= TokenType
.TILDE
;
567 type
= TokenType
.ASSIGN
;
570 switch (current
[0]) {
572 type
= TokenType
.OP_EQ
;
576 type
= TokenType
.LAMBDA
;
583 type
= TokenType
.OP_LT
;
586 switch (current
[0]) {
588 type
= TokenType
.OP_LE
;
592 type
= TokenType
.OP_SHIFT_LEFT
;
594 if (current
< end
&& current
[0] == '=') {
595 type
= TokenType
.ASSIGN_SHIFT_LEFT
;
603 type
= TokenType
.OP_GT
;
605 if (current
< end
&& current
[0] == '=') {
606 type
= TokenType
.OP_GE
;
611 type
= TokenType
.OP_NEG
;
613 if (current
< end
&& current
[0] == '=') {
614 type
= TokenType
.OP_NE
;
619 type
= TokenType
.PLUS
;
622 switch (current
[0]) {
624 type
= TokenType
.ASSIGN_ADD
;
628 type
= TokenType
.OP_INC
;
635 type
= TokenType
.MINUS
;
638 switch (current
[0]) {
640 type
= TokenType
.ASSIGN_SUB
;
644 type
= TokenType
.OP_DEC
;
648 type
= TokenType
.OP_PTR
;
655 type
= TokenType
.STAR
;
657 if (current
< end
&& current
[0] == '=') {
658 type
= TokenType
.ASSIGN_MUL
;
663 type
= TokenType
.DIV
;
665 if (current
< end
&& current
[0] == '=') {
666 type
= TokenType
.ASSIGN_DIV
;
671 type
= TokenType
.PERCENT
;
673 if (current
< end
&& current
[0] == '=') {
674 type
= TokenType
.ASSIGN_PERCENT
;
680 if (begin
[0] == '\'') {
681 type
= TokenType
.CHARACTER_LITERAL
;
682 } else if (current
< end
- 6 && begin
[1] == '"' && begin
[2] == '"') {
683 type
= TokenType
.VERBATIM_STRING_LITERAL
;
684 token_length_in_chars
= 6;
686 while (current
< end
- 4) {
687 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
689 } else if (current
[0] == '\n') {
693 token_length_in_chars
= 3;
695 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
696 if (u
!= (unichar
) (-1)) {
697 current
+= u
.to_utf8 (null);
698 token_length_in_chars
++;
700 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
704 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
707 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"\"\"");
711 type
= TokenType
.STRING_LITERAL
;
713 token_length_in_chars
= 2;
715 while (current
< end
&& current
[0] != begin
[0]) {
716 if (current
[0] == '\\') {
718 token_length_in_chars
++;
719 if (current
>= end
) {
723 switch (current
[0]) {
734 token_length_in_chars
++;
737 // hexadecimal escape character
739 token_length_in_chars
++;
740 while (current
< end
&& current
[0].isxdigit ()) {
742 token_length_in_chars
++;
746 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
749 } else if (current
[0] == '\n') {
752 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
753 if (u
!= (unichar
) (-1)) {
754 current
+= u
.to_utf8 (null);
755 token_length_in_chars
++;
758 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
762 if (current
< end
&& current
[0] != '\n') {
765 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected %c".printf (begin
[0]));
769 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
770 if (u
!= (unichar
) (-1)) {
771 current
+= u
.to_utf8 (null);
772 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected character");
775 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "invalid UTF-8 character");
778 return read_token (out token_begin
, out token_end
);
782 if (token_length_in_chars
< 0) {
783 column
+= (int) (current
- begin
);
785 column
+= token_length_in_chars
;
788 token_end
.pos
= current
;
789 token_end
.line
= line
;
790 token_end
.column
= column
- 1;
795 static bool matches (char* begin
, string keyword
) {
796 char* keyword_array
= keyword
;
797 long len
= keyword
.len ();
798 for (int i
= 0; i
< len
; i
++) {
799 if (begin
[i
] != keyword_array
[i
]) {
806 bool pp_whitespace () {
808 while (current
< end
&& current
[0].isspace () && current
[0] != '\n') {
816 void pp_directive () {
823 char* begin
= current
;
825 while (current
< end
&& current
[0].isalnum ()) {
831 if (len
== 2 && matches (begin
, "if")) {
833 } else if (len
== 4 && matches (begin
, "elif")) {
835 } else if (len
== 4 && matches (begin
, "else")) {
837 } else if (len
== 5 && matches (begin
, "endif")) {
840 Report
.error (new
SourceReference (source_file
, line
, column
- len
, line
, column
), "syntax error, invalid preprocessing directive");
843 if (conditional_stack
.length
> 0
844 && conditional_stack
[conditional_stack
.length
- 1].skip_section
) {
845 // skip lines until next preprocessing directive
847 while (current
< end
) {
848 if (bol
&& current
[0] == '#') {
849 // go back to begin of line
850 current
-= (column
- 1);
854 if (current
[0] == '\n') {
858 } else if (!current
[0].isspace ()) {
869 if (current
>= end
|| current
[0] != '\n') {
870 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected newline");
874 void parse_pp_if () {
877 bool condition
= parse_pp_expression ();
881 conditional_stack
+= Conditional ();
883 if (condition
&& (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
884 // condition true => process code within if
885 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
887 // skip lines until next preprocessing directive
888 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
892 void parse_pp_elif () {
895 bool condition
= parse_pp_expression ();
899 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
900 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #elif");
904 if (condition
&& !conditional_stack
[conditional_stack
.length
- 1].matched
905 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
906 // condition true => process code within if
907 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
908 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
910 // skip lines until next preprocessing directive
911 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
915 void parse_pp_else () {
918 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
919 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #else");
923 if (!conditional_stack
[conditional_stack
.length
- 1].matched
924 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
925 // condition true => process code within if
926 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
927 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
929 // skip lines until next preprocessing directive
930 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
934 void parse_pp_endif () {
937 if (conditional_stack
.length
== 0) {
938 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #endif");
942 conditional_stack
.length
--;
945 bool parse_pp_symbol () {
947 while (current
< end
&& is_ident_char (current
[0])) {
954 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
958 string identifier
= ((string) (current
- len
)).ndup (len
);
960 if (identifier
== "true") {
962 } else if (identifier
== "false") {
965 defined
= source_file
.context
.is_defined (identifier
);
971 bool parse_pp_primary_expression () {
972 if (current
>= end
) {
973 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
974 } else if (is_ident_char (current
[0])) {
975 return parse_pp_symbol ();
976 } else if (current
[0] == '(') {
980 bool result
= parse_pp_expression ();
982 if (current
< end
&& current
[0] == ')') {
986 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected `)'");
990 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
995 bool parse_pp_unary_expression () {
996 if (current
< end
&& current
[0] == '!') {
1000 return !parse_pp_unary_expression ();
1003 return parse_pp_primary_expression ();
1006 bool parse_pp_equality_expression () {
1007 bool left
= parse_pp_unary_expression ();
1010 if (current
< end
- 1 && current
[0] == '=' && current
[1] == '=') {
1014 bool right
= parse_pp_unary_expression ();
1015 left
= (left
== right
);
1016 } else if (current
< end
- 1 && current
[0] == '!' && current
[1] == '=') {
1020 bool right
= parse_pp_unary_expression ();
1021 left
= (left
!= right
);
1029 bool parse_pp_and_expression () {
1030 bool left
= parse_pp_equality_expression ();
1032 while (current
< end
- 1 && current
[0] == '&' && current
[1] == '&') {
1036 bool right
= parse_pp_equality_expression ();
1037 left
= left
&& right
;
1042 bool parse_pp_or_expression () {
1043 bool left
= parse_pp_and_expression ();
1045 while (current
< end
- 1 && current
[0] == '|' && current
[1] == '|') {
1049 bool right
= parse_pp_and_expression ();
1050 left
= left
|| right
;
1055 bool parse_pp_expression () {
1056 return parse_pp_or_expression ();
1059 bool whitespace () {
1061 bool bol
= (column
== 1);
1062 while (current
< end
&& current
[0].isspace ()) {
1063 if (current
[0] == '\n') {
1072 if (bol
&& current
< end
&& current
[0] == '#') {
1079 bool comment (bool file_comment
= false) {
1080 if (current
> end
- 2
1081 || current
[0] != '/'
1082 || (current
[1] != '/' && current
[1] != '*')) {
1086 if (current
[1] == '/') {
1087 SourceReference source_reference
= null;
1089 source_reference
= new
SourceReference (source_file
, line
, column
, line
, column
);
1092 // single-line comment
1094 char* begin
= current
;
1096 // skip until end of line or end of file
1097 while (current
< end
&& current
[0] != '\n') {
1101 if (source_reference
!= null) {
1102 push_comment (((string) begin
).ndup ((long) (current
- begin
)), source_reference
, file_comment
);
1105 SourceReference source_reference
= null;
1107 if (file_comment
&& current
[2] == '*') {
1111 if (current
[2] == '*' || file_comment
) {
1112 source_reference
= new
SourceReference (source_file
, line
, column
, line
, column
);
1117 char* begin
= current
;
1118 while (current
< end
- 1
1119 && (current
[0] != '*' || current
[1] != '/')) {
1120 if (current
[0] == '\n') {
1128 if (current
== end
- 1) {
1129 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected */");
1133 if (source_reference
!= null) {
1134 push_comment (((string) begin
).ndup ((long) (current
- begin
)), source_reference
, file_comment
);
1145 while (whitespace () || comment ()) {
1149 public void parse_file_comments () {
1150 while (whitespace () || comment (true)) {
1154 void push_comment (string comment_item
, SourceReference source_reference
, bool file_comment
) {
1155 if (comment_item
[0] == '*') {
1156 _comment
= new
Comment (comment_item
, source_reference
);
1160 source_file
.add_comment (new
Comment (comment_item
, source_reference
));
1166 * Clears and returns the content of the comment stack.
1168 * @return saved comment
1170 public Comment?
pop_comment () {
1171 if (_comment
== null) {
1175 var comment
= _comment
;