1 /* valageniescanner.vala
3 * Copyright (C) 2008 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * Jamie McCracken jamiemcc gnome org
27 * Lexical scanner for Genie source files.
29 public class Vala
.Genie
.Scanner
{
30 public SourceFile source_file
{ get; private set; }
32 public int indent_spaces
{ get; set;}
41 int current_indent_level
;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count
;
54 Conditional
[] conditional_stack
;
58 public bool else_found
;
59 public bool skip_section
;
73 public Scanner (SourceFile source_file
) {
74 this
.source_file
= source_file
;
76 begin
= source_file
.get_mapped_contents ();
77 end
= begin
+ source_file
.get_mapped_length ();
84 current_indent_level
= 0;
88 open_parens_count
= 0;
91 parse_started
= false;
92 last_token
= TokenType
.NONE
;
97 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.TEMPLATE
);
100 bool in_template_part () {
101 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.TEMPLATE_PART
);
104 bool is_ident_char (char c
) {
105 return (c
.isalnum () || c
== '_');
108 bool in_regex_literal () {
109 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.REGEX_LITERAL
);
113 public TokenType
read_regex_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
115 char* begin
= current
;
116 token_begin
.pos
= begin
;
117 token_begin
.line
= line
;
118 token_begin
.column
= column
;
120 int token_length_in_chars
= -1;
122 if (current
>= end
) {
123 type
= TokenType
.EOF
;
125 switch (current
[0]) {
127 type
= TokenType
.CLOSE_REGEX_LITERAL
;
129 state_stack
.length
--;
134 while (current
[0] == 'i' || current
[0] == 's' || current
[0] == 'm' || current
[0] == 'x') {
135 switch (current
[0]) {
138 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 'i' used more than once");
144 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 's' used more than once");
150 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 'm' used more than once");
156 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 'x' used more than once");
162 token_length_in_chars
++;
166 type
= TokenType
.REGEX_LITERAL
;
167 token_length_in_chars
= 0;
168 while (current
< end
&& current
[0] != '/') {
169 if (current
[0] == '\\') {
171 token_length_in_chars
++;
172 if (current
>= end
) {
176 switch (current
[0]) {
227 token_length_in_chars
++;
230 // hexadecimal escape character
232 token_length_in_chars
++;
233 while (current
< end
&& current
[0].isxdigit ()) {
235 token_length_in_chars
++;
239 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
242 } else if (current
[0] == '\n') {
245 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
246 if (u
!= (unichar
) (-1)) {
247 current
+= u
.to_utf8 (null);
248 token_length_in_chars
++;
251 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
255 if (current
>= end
|| current
[0] == '\n') {
256 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"");
257 state_stack
.length
--;
258 return read_token (out token_begin
, out token_end
);
264 if (token_length_in_chars
< 0) {
265 column
+= (int) (current
- begin
);
267 column
+= token_length_in_chars
;
270 token_end
.pos
= current
;
271 token_end
.line
= line
;
272 token_end
.column
= column
- 1;
278 public void seek (SourceLocation location
) {
279 current
= location
.pos
;
280 line
= location
.line
;
281 column
= location
.column
;
283 conditional_stack
= null;
287 TokenType
get_identifier_or_keyword (char* begin
, int len
) {
292 if (matches (begin
, "as")) return TokenType
.AS
;
295 if (matches (begin
, "do")) return TokenType
.DO
;
308 if (matches (begin
, "of")) return TokenType
.OF
;
310 if (matches (begin
, "or")) return TokenType
.OP_OR
;
313 if (matches (begin
, "to")) return TokenType
.TO
;
320 if (matches (begin
, "and")) return TokenType
.OP_AND
;
323 if (matches (begin
, "def")) return TokenType
.DEF
;
326 if (matches (begin
, "for")) return TokenType
.FOR
;
329 if (matches (begin
, "get")) return TokenType
.GET
;
332 if (matches (begin
, "isa")) return TokenType
.ISA
;
337 if (matches (begin
, "new")) return TokenType
.NEW
;
340 if (matches (begin
, "not")) return TokenType
.OP_NEG
;
345 if (matches (begin
, "out")) return TokenType
.OUT
;
348 if (matches (begin
, "ref")) return TokenType
.REF
;
351 if (matches (begin
, "set")) return TokenType
.SET
;
354 if (matches (begin
, "try")) return TokenType
.TRY
;
357 if (matches (begin
, "var")) return TokenType
.VAR
;
364 if (matches (begin
, "case")) return TokenType
.CASE
;
367 if (matches (begin
, "dict")) return TokenType
.DICT
;
372 if (matches (begin
, "else")) return TokenType
.ELSE
;
375 if (matches (begin
, "enum")) return TokenType
.ENUM
;
380 if (matches (begin
, "init")) return TokenType
.INIT
;
385 if (matches (begin
, "list")) return TokenType
.LIST
;
388 if (matches (begin
, "lock")) return TokenType
.LOCK
;
394 if (matches (begin
, "null")) return TokenType
.NULL
;
399 if (matches (begin
, "pass")) return TokenType
.PASS
;
402 if (matches (begin
, "prop")) return TokenType
.PROP
;
407 if (matches (begin
, "self")) return TokenType
.THIS
;
410 if (matches (begin
, "true")) return TokenType
.TRUE
;
413 if (matches (begin
, "uses")) return TokenType
.USES
;
416 if (matches (begin
, "void")) return TokenType
.VOID
;
421 if (matches (begin
, "weak")) return TokenType
.WEAK
;
424 if (matches (begin
, "when")) return TokenType
.WHEN
;
435 if (matches (begin
, "array")) return TokenType
.ARRAY
;
438 if (matches (begin
, "async")) return TokenType
.ASYNC
;
443 if (matches (begin
, "break")) return TokenType
.BREAK
;
448 if (matches (begin
, "class")) return TokenType
.CLASS
;
451 if (matches (begin
, "const")) return TokenType
.CONST
;
456 if (matches (begin
, "event")) return TokenType
.EVENT
;
461 if (matches (begin
, "false")) return TokenType
.FALSE
;
464 if (matches (begin
, "final")) return TokenType
.FINAL
;
469 if (matches (begin
, "owned")) return TokenType
.OWNED
;
472 if (matches (begin
, "print")) return TokenType
.PRINT
;
475 if (matches (begin
, "super")) return TokenType
.SUPER
;
478 if (matches (begin
, "raise")) return TokenType
.RAISE
;
481 if (matches (begin
, "while")) return TokenType
.WHILE
;
484 if (matches (begin
, "yield")) return TokenType
.YIELD
;
491 if (matches (begin
, "assert")) return TokenType
.ASSERT
;
496 if (matches (begin
, "delete")) return TokenType
.DELETE
;
499 if (matches (begin
, "downto")) return TokenType
.DOWNTO
;
508 if (matches (begin
, "except")) return TokenType
.EXCEPT
;
511 if (matches (begin
, "extern")) return TokenType
.EXTERN
;
518 if (matches (begin
, "inline")) return TokenType
.INLINE
;
523 if (matches (begin
, "params")) return TokenType
.PARAMS
;
526 if (matches (begin
, "public")) return TokenType
.PUBLIC
;
533 if (matches (begin
, "raises")) return TokenType
.RAISES
;
536 if (matches (begin
, "return")) return TokenType
.RETURN
;
543 if (matches (begin
, "sizeof")) return TokenType
.SIZEOF
;
548 if (matches (begin
, "static")) return TokenType
.STATIC
;
551 if (matches (begin
, "struct")) return TokenType
.STRUCT
;
558 if (matches (begin
, "typeof")) return TokenType
.TYPEOF
;
567 if (matches (begin
, "default")) return TokenType
.DEFAULT
;
570 if (matches (begin
, "dynamic")) return TokenType
.DYNAMIC
;
575 if (matches (begin
, "ensures")) return TokenType
.ENSURES
;
580 if (matches (begin
, "finally")) return TokenType
.FINALLY
;
585 if (matches (begin
, "private")) return TokenType
.PRIVATE
;
588 if (matches (begin
, "unowned")) return TokenType
.UNOWNED
;
591 if (matches (begin
, "virtual")) return TokenType
.VIRTUAL
;
598 if (matches (begin
, "abstract")) return TokenType
.ABSTRACT
;
601 if (matches (begin
, "continue")) return TokenType
.CONTINUE
;
604 if (matches (begin
, "delegate")) return TokenType
.DELEGATE
;
607 if (matches (begin
, "internal")) return TokenType
.INTERNAL
;
610 if (matches (begin
, "override")) return TokenType
.OVERRIDE
;
615 if (matches (begin
, "readonly")) return TokenType
.READONLY
;
618 if (matches (begin
, "requires")) return TokenType
.REQUIRES
;
623 if (matches (begin
, "volatile")) return TokenType
.VOLATILE
;
630 if (matches (begin
, "construct")) return TokenType
.CONSTRUCT
;
633 if (matches (begin
, "exception")) return TokenType
.ERRORDOMAIN
;
636 if (matches (begin
, "interface")) return TokenType
.INTERFACE
;
639 if (matches (begin
, "namespace")) return TokenType
.NAMESPACE
;
642 if (matches (begin
, "protected")) return TokenType
.PROTECTED
;
645 if (matches (begin
, "writeonly")) return TokenType
.WRITEONLY
;
652 if (matches (begin
, "implements")) return TokenType
.IMPLEMENTS
;
657 return TokenType
.IDENTIFIER
;
661 public TokenType
read_template_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
663 char* begin
= current
;
664 token_begin
.pos
= begin
;
665 token_begin
.line
= line
;
666 token_begin
.column
= column
;
668 int token_length_in_chars
= -1;
670 if (current
>= end
) {
671 type
= TokenType
.EOF
;
673 switch (current
[0]) {
675 type
= TokenType
.CLOSE_TEMPLATE
;
677 state_stack
.length
--;
680 token_begin
.pos
++; // $ is not part of following token
682 if (current
[0].isalpha () || current
[0] == '_') {
684 while (current
< end
&& is_ident_char (current
[0])) {
688 type
= TokenType
.IDENTIFIER
;
689 state_stack
+= State
.TEMPLATE_PART
;
690 } else if (current
[0] == '(') {
693 state_stack
+= State
.PARENS
;
694 return read_token (out token_begin
, out token_end
);
695 } else if (current
[0] == '$') {
696 type
= TokenType
.TEMPLATE_STRING_LITERAL
;
698 state_stack
+= State
.TEMPLATE_PART
;
700 Report
.error (new
SourceReference (source_file
, line
, column
+ 1, line
, column
+ 1), "unexpected character");
701 return read_template_token (out token_begin
, out token_end
);
705 type
= TokenType
.TEMPLATE_STRING_LITERAL
;
706 token_length_in_chars
= 0;
707 while (current
< end
&& current
[0] != '"' && current
[0] != '$') {
708 if (current
[0] == '\\') {
710 token_length_in_chars
++;
711 if (current
>= end
) {
715 switch (current
[0]) {
726 token_length_in_chars
++;
729 // hexadecimal escape character
731 token_length_in_chars
++;
732 while (current
< end
&& current
[0].isxdigit ()) {
734 token_length_in_chars
++;
738 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
741 } else if (current
[0] == '\n') {
744 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
745 if (u
!= (unichar
) (-1)) {
746 current
+= u
.to_utf8 (null);
747 token_length_in_chars
++;
750 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
754 if (current
>= end
|| current
[0] == '\n') {
755 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"");
756 state_stack
.length
--;
757 return read_token (out token_begin
, out token_end
);
759 state_stack
+= State
.TEMPLATE_PART
;
764 if (token_length_in_chars
< 0) {
765 column
+= (int) (current
- begin
);
767 column
+= token_length_in_chars
;
770 token_end
.pos
= current
;
771 token_end
.line
= line
;
772 token_end
.column
= column
- 1;
778 public TokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
781 if (in_template ()) {
782 return read_template_token (out token_begin
, out token_end
);
783 } else if (in_template_part ()) {
784 state_stack
.length
--;
786 token_begin
.pos
= current
;
787 token_begin
.line
= line
;
788 token_begin
.column
= column
;
790 token_end
.pos
= current
;
791 token_end
.line
= line
;
792 token_end
.column
= column
- 1;
794 return TokenType
.COMMA
;
795 } else if (in_regex_literal ()) {
796 return read_regex_token (out token_begin
, out token_end
);
801 /* emit dedents if outstanding before checking any other chars */
803 if (pending_dedents
> 0) {
808 token_begin
.pos
= current
;
809 token_begin
.line
= line
;
810 token_begin
.column
= column
;
812 token_end
.pos
= current
;
813 token_end
.line
= line
;
814 token_end
.column
= column
;
816 last_token
= TokenType
.DEDENT
;
818 return TokenType
.DEDENT
;
821 if ((_indent_spaces
== 0 ) || (last_token
!= TokenType
.EOL
)) {
822 /* scrub whitespace (excluding newlines) and comments */
827 /* handle explicit line continuation (lines ending with "\") */
828 while (current
< end
&& current
[0] == '\\' && current
[1] == '\n') {
834 /* handle automatic line continuations (when inside parens or braces) */
835 while (current
< end
&& current
[0] == '\n' && (open_parens_count
> 0 || open_brace_count
> 0)) {
842 /* handle non-consecutive new line once parsing is underway - EOL */
843 if (newline () && parse_started
&& last_token
!= TokenType
.EOL
&& last_token
!= TokenType
.SEMICOLON
) {
844 token_begin
.pos
= current
;
845 token_begin
.line
= line
;
846 token_begin
.column
= column
;
848 token_end
.pos
= current
;
849 token_end
.line
= line
;
850 token_end
.column
= column
;
852 last_token
= TokenType
.EOL
;
854 return TokenType
.EOL
;
858 while (skip_newlines ()) {
859 token_begin
.pos
= current
;
860 token_begin
.line
= line
;
861 token_begin
.column
= column
;
863 current_indent_level
= count_tabs ();
865 /* if its an empty new line then ignore */
866 if (current_indent_level
== -1) {
870 if (current_indent_level
> indent_level
) {
871 indent_level
= current_indent_level
;
873 token_end
.pos
= current
;
874 token_end
.line
= line
;
875 token_end
.column
= column
;
877 last_token
= TokenType
.INDENT
;
879 return TokenType
.INDENT
;
880 } else if (current_indent_level
< indent_level
) {
883 pending_dedents
= (indent_level
- current_indent_level
);
885 token_end
.pos
= current
;
886 token_end
.line
= line
;
887 token_end
.column
= column
;
889 last_token
= TokenType
.DEDENT
;
891 return TokenType
.DEDENT
;
896 char* begin
= current
;
897 token_begin
.pos
= begin
;
898 token_begin
.line
= line
;
899 token_begin
.column
= column
;
901 int token_length_in_chars
= -1;
903 parse_started
= true;
905 if (current
>= end
) {
906 if (indent_level
> 0) {
909 pending_dedents
= indent_level
;
911 type
= TokenType
.DEDENT
;
913 type
= TokenType
.EOF
;
915 } else if (current
[0].isalpha () || current
[0] == '_') {
917 while (current
< end
&& is_ident_char (current
[0])) {
921 type
= get_identifier_or_keyword (begin
, len
);
922 } else if (current
[0] == '@') {
923 if (current
< end
- 1 && current
[1] == '"') {
924 type
= TokenType
.OPEN_TEMPLATE
;
926 state_stack
+= State
.TEMPLATE
;
928 token_begin
.pos
++; // @ is not part of the identifier
931 while (current
< end
&& is_ident_char (current
[0])) {
935 type
= TokenType
.IDENTIFIER
;
937 } else if (current
[0].isdigit ()) {
938 while (current
< end
&& current
[0].isdigit ()) {
941 type
= TokenType
.INTEGER_LITERAL
;
942 if (current
< end
&& current
[0].tolower () == 'l') {
944 if (current
< end
&& current
[0].tolower () == 'l') {
947 } else if (current
< end
&& current
[0].tolower () == 'u') {
949 if (current
< end
&& current
[0].tolower () == 'l') {
951 if (current
< end
&& current
[0].tolower () == 'l') {
955 } else if (current
< end
- 1 && current
[0] == '.' && current
[1].isdigit ()) {
957 while (current
< end
&& current
[0].isdigit ()) {
960 if (current
< end
&& current
[0].tolower () == 'e') {
962 if (current
< end
&& (current
[0] == '+' || current
[0] == '-')) {
965 while (current
< end
&& current
[0].isdigit ()) {
969 if (current
< end
&& current
[0].tolower () == 'f') {
972 type
= TokenType
.REAL_LITERAL
;
973 } else if (current
< end
&& current
== begin
+ 1
974 && begin
[0] == '0' && begin
[1] == 'x' && begin
[2].isxdigit ()) {
975 // hexadecimal integer literal
977 while (current
< end
&& current
[0].isxdigit ()) {
980 } else if (current
< end
&& is_ident_char (current
[0])) {
981 // allow identifiers to start with a digit
982 // as long as they contain at least one char
983 while (current
< end
&& is_ident_char (current
[0])) {
986 type
= TokenType
.IDENTIFIER
;
989 switch (current
[0]) {
991 type
= TokenType
.OPEN_BRACE
;
993 state_stack
+= State
.BRACE
;
997 type
= TokenType
.CLOSE_BRACE
;
999 if (state_stack
.length
> 0) {
1000 state_stack
.length
--;
1005 type
= TokenType
.OPEN_PARENS
;
1006 open_parens_count
++;
1007 state_stack
+= State
.PARENS
;
1011 type
= TokenType
.CLOSE_PARENS
;
1012 open_parens_count
--;
1014 if (state_stack
.length
> 0) {
1015 state_stack
.length
--;
1017 if (in_template ()) {
1018 type
= TokenType
.COMMA
;
1022 type
= TokenType
.OPEN_BRACKET
;
1023 state_stack
+= State
.BRACKET
;
1027 type
= TokenType
.CLOSE_BRACKET
;
1028 if (state_stack
.length
> 0) {
1029 state_stack
.length
--;
1034 type
= TokenType
.DOT
;
1036 if (current
< end
- 1) {
1037 if (current
[0] == '.' && current
[1] == '.') {
1038 type
= TokenType
.ELLIPSIS
;
1044 type
= TokenType
.COLON
;
1048 type
= TokenType
.COMMA
;
1052 type
= TokenType
.SEMICOLON
;
1056 type
= TokenType
.HASH
;
1060 type
= TokenType
.INTERR
;
1064 type
= TokenType
.BITWISE_OR
;
1066 if (current
< end
) {
1067 switch (current
[0]) {
1069 type
= TokenType
.ASSIGN_BITWISE_OR
;
1073 type
= TokenType
.OP_OR
;
1080 type
= TokenType
.BITWISE_AND
;
1082 if (current
< end
) {
1083 switch (current
[0]) {
1085 type
= TokenType
.ASSIGN_BITWISE_AND
;
1089 type
= TokenType
.OP_AND
;
1096 type
= TokenType
.CARRET
;
1098 if (current
< end
&& current
[0] == '=') {
1099 type
= TokenType
.ASSIGN_BITWISE_XOR
;
1104 type
= TokenType
.TILDE
;
1108 type
= TokenType
.ASSIGN
;
1110 if (current
< end
) {
1111 switch (current
[0]) {
1113 type
= TokenType
.OP_EQ
;
1117 type
= TokenType
.LAMBDA
;
1124 type
= TokenType
.OP_LT
;
1126 if (current
< end
) {
1127 switch (current
[0]) {
1129 type
= TokenType
.OP_LE
;
1133 type
= TokenType
.OP_SHIFT_LEFT
;
1135 if (current
< end
&& current
[0] == '=') {
1136 type
= TokenType
.ASSIGN_SHIFT_LEFT
;
1144 type
= TokenType
.OP_GT
;
1146 if (current
< end
&& current
[0] == '=') {
1147 type
= TokenType
.OP_GE
;
1152 type
= TokenType
.OP_NEG
;
1154 if (current
< end
&& current
[0] == '=') {
1155 type
= TokenType
.OP_NE
;
1160 type
= TokenType
.PLUS
;
1162 if (current
< end
) {
1163 switch (current
[0]) {
1165 type
= TokenType
.ASSIGN_ADD
;
1169 type
= TokenType
.OP_INC
;
1176 type
= TokenType
.MINUS
;
1178 if (current
< end
) {
1179 switch (current
[0]) {
1181 type
= TokenType
.ASSIGN_SUB
;
1185 type
= TokenType
.OP_DEC
;
1189 type
= TokenType
.OP_PTR
;
1196 type
= TokenType
.STAR
;
1198 if (current
< end
&& current
[0] == '=') {
1199 type
= TokenType
.ASSIGN_MUL
;
1204 switch (last_token
) {
1205 case TokenType
.ASSIGN
:
1206 case TokenType
.COMMA
:
1207 case TokenType
.MINUS
:
1208 case TokenType
.OP_AND
:
1209 case TokenType
.OP_DEC
:
1210 case TokenType
.OP_EQ
:
1211 case TokenType
.OP_GE
:
1212 case TokenType
.OP_GT
:
1213 case TokenType
.OP_INC
:
1214 case TokenType
.OP_LE
:
1215 case TokenType
.OP_LT
:
1216 case TokenType
.OP_NE
:
1217 case TokenType
.OP_NEG
:
1218 case TokenType
.OP_OR
:
1219 case TokenType
.OPEN_BRACE
:
1220 case TokenType
.OPEN_PARENS
:
1221 case TokenType
.PLUS
:
1222 case TokenType
.RETURN
:
1223 type
= TokenType
.OPEN_REGEX_LITERAL
;
1224 state_stack
+= State
.REGEX_LITERAL
;
1228 type
= TokenType
.DIV
;
1230 if (current
< end
&& current
[0] == '=') {
1231 type
= TokenType
.ASSIGN_DIV
;
1239 type
= TokenType
.PERCENT
;
1241 if (current
< end
&& current
[0] == '=') {
1242 type
= TokenType
.ASSIGN_PERCENT
;
1248 if (begin
[0] == '\'') {
1249 type
= TokenType
.CHARACTER_LITERAL
;
1250 } else if (current
< end
- 6 && begin
[1] == '"' && begin
[2] == '"') {
1251 type
= TokenType
.VERBATIM_STRING_LITERAL
;
1252 token_length_in_chars
= 6;
1254 while (current
< end
- 4) {
1255 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
1257 } else if (current
[0] == '\n') {
1261 token_length_in_chars
= 3;
1263 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1264 if (u
!= (unichar
) (-1)) {
1265 current
+= u
.to_utf8 (null);
1266 token_length_in_chars
++;
1268 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
1272 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
1275 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"\"\"");
1279 type
= TokenType
.STRING_LITERAL
;
1281 token_length_in_chars
= 2;
1283 while (current
< end
&& current
[0] != begin
[0]) {
1284 if (current
[0] == '\\') {
1286 token_length_in_chars
++;
1287 if (current
>= end
) {
1291 switch (current
[0]) {
1302 token_length_in_chars
++;
1305 // hexadecimal escape character
1307 token_length_in_chars
++;
1308 while (current
< end
&& current
[0].isxdigit ()) {
1310 token_length_in_chars
++;
1314 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
1317 } else if (current
[0] == '\n') {
1320 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1321 if (u
!= (unichar
) (-1)) {
1322 current
+= u
.to_utf8 (null);
1323 token_length_in_chars
++;
1326 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
1330 if (current
< end
&& current
[0] != '\n') {
1333 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected %c".printf (begin
[0]));
1337 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1338 if (u
!= (unichar
) (-1)) {
1339 current
+= u
.to_utf8 (null);
1340 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected character");
1343 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "invalid UTF-8 character");
1346 last_token
= TokenType
.STRING_LITERAL
;
1347 return read_token (out token_begin
, out token_end
);
1351 if (token_length_in_chars
< 0) {
1352 column
+= (int) (current
- begin
);
1354 column
+= token_length_in_chars
;
1357 token_end
.pos
= current
;
1358 token_end
.line
= line
;
1359 token_end
.column
= column
- 1;
1371 if (_indent_spaces
== 0) {
1372 while (current
< end
&& current
[0] == '\t') {
1378 int space_count
= 0;
1379 while (current
< end
&& current
[0] == ' ') {
1385 tab_count
= space_count
/ _indent_spaces
;
1389 /* ignore comments and whitspace and other lines that contain no code */
1393 if ((current
< end
) && (current
[0] == '\n')) return -1;
1398 bool matches (char* begin
, string keyword
) {
1399 char* keyword_array
= (char *) keyword
;
1400 long len
= keyword
.length
;
1401 for (int i
= 0; i
< len
; i
++) {
1402 if (begin
[i
] != keyword_array
[i
]) {
1409 bool whitespace () {
1411 while (current
< end
&& current
[0].isspace () && current
[0] != '\n' ) {
1418 if ((column
== 1) && (current
< end
) && (current
[0] == '#')) {
1426 inline
bool newline () {
1427 if (current
[0] == '\n') {
1434 bool skip_newlines () {
1435 bool new_lines
= false;
1437 while (newline ()) {
1442 current_indent_level
= 0;
1450 bool comment (bool file_comment
= false) {
1451 if (current
> end
- 2
1452 || current
[0] != '/'
1453 || (current
[1] != '/' && current
[1] != '*')) {
1458 if (current
[1] == '/') {
1459 // single-line comment
1461 SourceReference source_reference
= null;
1463 source_reference
= new
SourceReference (source_file
, line
, column
, line
, column
);
1468 // skip until end of line or end of file
1469 while (current
< end
&& current
[0] != '\n') {
1473 /* do not ignore EOL if comment does not exclusively occupy the line */
1474 if (current
[0] == '\n' && last_token
== TokenType
.EOL
) {
1478 current_indent_level
= 0;
1481 if (source_reference
!= null) {
1482 push_comment (((string) begin
).substring (0, (long) (current
- begin
)), source_reference
, file_comment
);
1486 // delimited comment
1487 SourceReference source_reference
= null;
1488 if (file_comment
&& current
[2] == '*') {
1492 if (current
[2] == '*' || file_comment
) {
1493 source_reference
= new
SourceReference (source_file
, line
, column
, line
, column
);
1497 char* begin
= current
;
1499 while (current
< end
- 1
1500 && (current
[0] != '*' || current
[1] != '/')) {
1501 if (current
[0] == '\n') {
1508 if (current
== end
- 1) {
1509 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected */");
1513 if (source_reference
!= null) {
1514 string comment
= ((string) begin
).substring (0, (long) (current
- begin
));
1515 push_comment (comment
, source_reference
, file_comment
);
1527 while (current
< end
&& current
[0] == '\t' ) {
1536 void skip_space_tabs () {
1537 while (whitespace () || skip_tabs () || comment () ) {
1543 while (whitespace () || comment ()) {
1547 public void parse_file_comments () {
1548 while (whitespace () || comment (true)) {
1553 void push_comment (string comment_item
, SourceReference source_reference
, bool file_comment
) {
1554 if (comment_item
[0] == '*') {
1555 _comment
= new
Comment (comment_item
, source_reference
);
1559 source_file
.add_comment (new
Comment (comment_item
, source_reference
));
1565 * Clears and returns the content of the comment stack.
1567 * @return saved comment
1569 public Comment?
pop_comment () {
1570 if (_comment
== null) {
1574 var comment
= _comment
;
1579 bool pp_whitespace () {
1581 while (current
< end
&& current
[0].isspace () && current
[0] != '\n') {
1589 void pp_directive () {
1596 char* begin
= current
;
1598 while (current
< end
&& current
[0].isalnum ()) {
1604 if (len
== 2 && matches (begin
, "if")) {
1606 } else if (len
== 4 && matches (begin
, "elif")) {
1608 } else if (len
== 4 && matches (begin
, "else")) {
1610 } else if (len
== 5 && matches (begin
, "endif")) {
1613 Report
.error (new
SourceReference (source_file
, line
, column
- len
, line
, column
), "syntax error, invalid preprocessing directive");
1616 if (conditional_stack
.length
> 0
1617 && conditional_stack
[conditional_stack
.length
- 1].skip_section
) {
1618 // skip lines until next preprocessing directive
1620 while (current
< end
) {
1621 if (bol
&& current
[0] == '#') {
1622 // go back to begin of line
1623 current
-= (column
- 1);
1627 if (current
[0] == '\n') {
1631 } else if (!current
[0].isspace ()) {
1642 if (current
>= end
|| current
[0] != '\n') {
1643 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected newline");
1647 void parse_pp_if () {
1650 bool condition
= parse_pp_expression ();
1654 conditional_stack
+= Conditional ();
1656 if (condition
&& (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1657 // condition true => process code within if
1658 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1660 // skip lines until next preprocessing directive
1661 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1665 void parse_pp_elif () {
1668 bool condition
= parse_pp_expression ();
1672 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
1673 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #elif");
1677 if (condition
&& !conditional_stack
[conditional_stack
.length
- 1].matched
1678 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1679 // condition true => process code within if
1680 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1681 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
1683 // skip lines until next preprocessing directive
1684 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1688 void parse_pp_else () {
1691 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
1692 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #else");
1696 if (!conditional_stack
[conditional_stack
.length
- 1].matched
1697 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1698 // condition true => process code within if
1699 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1700 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
1702 // skip lines until next preprocessing directive
1703 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1707 void parse_pp_endif () {
1710 if (conditional_stack
.length
== 0) {
1711 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #endif");
1715 conditional_stack
.length
--;
1718 bool parse_pp_symbol () {
1720 while (current
< end
&& is_ident_char (current
[0])) {
1727 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
1731 string identifier
= ((string) (current
- len
)).substring (0, len
);
1733 if (identifier
== "true") {
1735 } else if (identifier
== "false") {
1738 defined
= source_file
.context
.is_defined (identifier
);
1744 bool parse_pp_primary_expression () {
1745 if (current
>= end
) {
1746 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
1747 } else if (is_ident_char (current
[0])) {
1748 return parse_pp_symbol ();
1749 } else if (current
[0] == '(') {
1753 bool result
= parse_pp_expression ();
1755 if (current
< end
&& current
[0] == ')') {
1759 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected `)'");
1763 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
1768 bool parse_pp_unary_expression () {
1769 if (current
< end
&& current
[0] == '!') {
1773 return !parse_pp_unary_expression ();
1776 return parse_pp_primary_expression ();
1779 bool parse_pp_equality_expression () {
1780 bool left
= parse_pp_unary_expression ();
1783 if (current
< end
- 1 && current
[0] == '=' && current
[1] == '=') {
1787 bool right
= parse_pp_unary_expression ();
1788 left
= (left
== right
);
1789 } else if (current
< end
- 1 && current
[0] == '!' && current
[1] == '=') {
1793 bool right
= parse_pp_unary_expression ();
1794 left
= (left
!= right
);
1802 bool parse_pp_and_expression () {
1803 bool left
= parse_pp_equality_expression ();
1805 while (current
< end
- 1 && current
[0] == '&' && current
[1] == '&') {
1809 bool right
= parse_pp_equality_expression ();
1810 left
= left
&& right
;
1815 bool parse_pp_or_expression () {
1816 bool left
= parse_pp_and_expression ();
1818 while (current
< end
- 1 && current
[0] == '|' && current
[1] == '|') {
1822 bool right
= parse_pp_and_expression ();
1823 left
= left
|| right
;
1828 bool parse_pp_expression () {
1829 return parse_pp_or_expression ();