1 /* valageniescanner.vala
3 * Copyright (C) 2008 Jamie McCracken, Jürg Billeter
4 * Based on code by Jürg Billeter
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * Jamie McCracken jamiemcc gnome org
27 * Lexical scanner for Genie source files.
29 public class Vala
.Genie
.Scanner
{
30 public SourceFile source_file
{ get; private set; }
32 public int indent_spaces
{ get; set;}
41 int current_indent_level
;
45 /* track open parens and braces for automatic line continuations */
46 int open_parens_count
;
54 Conditional
[] conditional_stack
;
58 public bool else_found
;
59 public bool skip_section
;
73 public Scanner (SourceFile source_file
) {
74 this
.source_file
= source_file
;
76 begin
= source_file
.get_mapped_contents ();
77 end
= begin
+ source_file
.get_mapped_length ();
84 current_indent_level
= 0;
88 open_parens_count
= 0;
91 parse_started
= false;
92 last_token
= TokenType
.NONE
;
97 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.TEMPLATE
);
100 bool in_template_part () {
101 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.TEMPLATE_PART
);
104 bool is_ident_char (char c
) {
105 return (c
.isalnum () || c
== '_');
108 bool in_regex_literal () {
109 return (state_stack
.length
> 0 && state_stack
[state_stack
.length
- 1] == State
.REGEX_LITERAL
);
113 public TokenType
read_regex_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
115 char* begin
= current
;
116 token_begin
.pos
= begin
;
117 token_begin
.line
= line
;
118 token_begin
.column
= column
;
120 int token_length_in_chars
= -1;
122 if (current
>= end
) {
123 type
= TokenType
.EOF
;
125 switch (current
[0]) {
127 type
= TokenType
.CLOSE_REGEX_LITERAL
;
129 state_stack
.length
--;
134 while (current
[0] == 'i' || current
[0] == 's' || current
[0] == 'm' || current
[0] == 'x') {
135 switch (current
[0]) {
138 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 'i' used more than once");
144 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 's' used more than once");
150 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 'm' used more than once");
156 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "modifier 'x' used more than once");
162 token_length_in_chars
++;
166 type
= TokenType
.REGEX_LITERAL
;
167 token_length_in_chars
= 0;
168 while (current
< end
&& current
[0] != '/') {
169 if (current
[0] == '\\') {
171 token_length_in_chars
++;
172 if (current
>= end
) {
176 switch (current
[0]) {
227 token_length_in_chars
++;
230 // hexadecimal escape character
232 token_length_in_chars
++;
233 while (current
< end
&& current
[0].isxdigit ()) {
235 token_length_in_chars
++;
239 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
242 } else if (current
[0] == '\n') {
245 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
246 if (u
!= (unichar
) (-1)) {
247 current
+= u
.to_utf8 (null);
248 token_length_in_chars
++;
251 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
255 if (current
>= end
|| current
[0] == '\n') {
256 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"");
257 state_stack
.length
--;
258 return read_token (out token_begin
, out token_end
);
264 if (token_length_in_chars
< 0) {
265 column
+= (int) (current
- begin
);
267 column
+= token_length_in_chars
;
270 token_end
.pos
= current
;
271 token_end
.line
= line
;
272 token_end
.column
= column
- 1;
278 public void seek (SourceLocation location
) {
279 current
= location
.pos
;
280 line
= location
.line
;
281 column
= location
.column
;
283 conditional_stack
= null;
287 TokenType
get_identifier_or_keyword (char* begin
, int len
) {
292 if (matches (begin
, "as")) return TokenType
.AS
;
295 if (matches (begin
, "do")) return TokenType
.DO
;
308 if (matches (begin
, "of")) return TokenType
.OF
;
310 if (matches (begin
, "or")) return TokenType
.OP_OR
;
313 if (matches (begin
, "to")) return TokenType
.TO
;
320 if (matches (begin
, "and")) return TokenType
.OP_AND
;
323 if (matches (begin
, "def")) return TokenType
.DEF
;
326 if (matches (begin
, "for")) return TokenType
.FOR
;
329 if (matches (begin
, "get")) return TokenType
.GET
;
332 if (matches (begin
, "isa")) return TokenType
.ISA
;
337 if (matches (begin
, "new")) return TokenType
.NEW
;
340 if (matches (begin
, "not")) return TokenType
.OP_NEG
;
345 if (matches (begin
, "out")) return TokenType
.OUT
;
348 if (matches (begin
, "ref")) return TokenType
.REF
;
351 if (matches (begin
, "set")) return TokenType
.SET
;
354 if (matches (begin
, "try")) return TokenType
.TRY
;
357 if (matches (begin
, "var")) return TokenType
.VAR
;
364 if (matches (begin
, "case")) return TokenType
.CASE
;
367 if (matches (begin
, "dict")) return TokenType
.DICT
;
372 if (matches (begin
, "else")) return TokenType
.ELSE
;
375 if (matches (begin
, "enum")) return TokenType
.ENUM
;
380 if (matches (begin
, "init")) return TokenType
.INIT
;
385 if (matches (begin
, "list")) return TokenType
.LIST
;
388 if (matches (begin
, "lock")) return TokenType
.LOCK
;
394 if (matches (begin
, "null")) return TokenType
.NULL
;
399 if (matches (begin
, "pass")) return TokenType
.PASS
;
402 if (matches (begin
, "prop")) return TokenType
.PROP
;
407 if (matches (begin
, "self")) return TokenType
.THIS
;
410 if (matches (begin
, "true")) return TokenType
.TRUE
;
413 if (matches (begin
, "uses")) return TokenType
.USES
;
416 if (matches (begin
, "void")) return TokenType
.VOID
;
421 if (matches (begin
, "weak")) return TokenType
.WEAK
;
424 if (matches (begin
, "when")) return TokenType
.WHEN
;
435 if (matches (begin
, "array")) return TokenType
.ARRAY
;
438 if (matches (begin
, "async")) return TokenType
.ASYNC
;
443 if (matches (begin
, "break")) return TokenType
.BREAK
;
448 if (matches (begin
, "class")) return TokenType
.CLASS
;
451 if (matches (begin
, "const")) return TokenType
.CONST
;
456 if (matches (begin
, "event")) return TokenType
.EVENT
;
461 if (matches (begin
, "false")) return TokenType
.FALSE
;
464 if (matches (begin
, "final")) return TokenType
.FINAL
;
469 if (matches (begin
, "owned")) return TokenType
.OWNED
;
472 if (matches (begin
, "print")) return TokenType
.PRINT
;
475 if (matches (begin
, "super")) return TokenType
.SUPER
;
478 if (matches (begin
, "raise")) return TokenType
.RAISE
;
481 if (matches (begin
, "while")) return TokenType
.WHILE
;
484 if (matches (begin
, "yield")) return TokenType
.YIELD
;
491 if (matches (begin
, "assert")) return TokenType
.ASSERT
;
496 if (matches (begin
, "delete")) return TokenType
.DELETE
;
499 if (matches (begin
, "downto")) return TokenType
.DOWNTO
;
508 if (matches (begin
, "except")) return TokenType
.EXCEPT
;
511 if (matches (begin
, "extern")) return TokenType
.EXTERN
;
518 if (matches (begin
, "inline")) return TokenType
.INLINE
;
523 if (matches (begin
, "params")) return TokenType
.PARAMS
;
526 if (matches (begin
, "public")) return TokenType
.PUBLIC
;
533 if (matches (begin
, "raises")) return TokenType
.RAISES
;
536 if (matches (begin
, "return")) return TokenType
.RETURN
;
543 if (matches (begin
, "sizeof")) return TokenType
.SIZEOF
;
548 if (matches (begin
, "static")) return TokenType
.STATIC
;
551 if (matches (begin
, "struct")) return TokenType
.STRUCT
;
558 if (matches (begin
, "typeof")) return TokenType
.TYPEOF
;
567 if (matches (begin
, "default")) return TokenType
.DEFAULT
;
570 if (matches (begin
, "dynamic")) return TokenType
.DYNAMIC
;
575 if (matches (begin
, "ensures")) return TokenType
.ENSURES
;
580 if (matches (begin
, "finally")) return TokenType
.FINALLY
;
585 if (matches (begin
, "private")) return TokenType
.PRIVATE
;
588 if (matches (begin
, "unowned")) return TokenType
.UNOWNED
;
591 if (matches (begin
, "virtual")) return TokenType
.VIRTUAL
;
598 if (matches (begin
, "abstract")) return TokenType
.ABSTRACT
;
601 if (matches (begin
, "continue")) return TokenType
.CONTINUE
;
604 if (matches (begin
, "delegate")) return TokenType
.DELEGATE
;
607 if (matches (begin
, "internal")) return TokenType
.INTERNAL
;
610 if (matches (begin
, "override")) return TokenType
.OVERRIDE
;
615 if (matches (begin
, "readonly")) return TokenType
.READONLY
;
618 if (matches (begin
, "requires")) return TokenType
.REQUIRES
;
623 if (matches (begin
, "volatile")) return TokenType
.VOLATILE
;
630 if (matches (begin
, "construct")) return TokenType
.CONSTRUCT
;
633 if (matches (begin
, "exception")) return TokenType
.ERRORDOMAIN
;
636 if (matches (begin
, "interface")) return TokenType
.INTERFACE
;
639 if (matches (begin
, "namespace")) return TokenType
.NAMESPACE
;
642 if (matches (begin
, "protected")) return TokenType
.PROTECTED
;
645 if (matches (begin
, "writeonly")) return TokenType
.WRITEONLY
;
652 if (matches (begin
, "implements")) return TokenType
.IMPLEMENTS
;
657 return TokenType
.IDENTIFIER
;
661 public TokenType
read_template_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
663 char* begin
= current
;
664 token_begin
.pos
= begin
;
665 token_begin
.line
= line
;
666 token_begin
.column
= column
;
668 int token_length_in_chars
= -1;
670 if (current
>= end
) {
671 type
= TokenType
.EOF
;
673 switch (current
[0]) {
675 type
= TokenType
.CLOSE_TEMPLATE
;
677 state_stack
.length
--;
680 token_begin
.pos
++; // $ is not part of following token
682 if (current
[0].isalpha () || current
[0] == '_') {
684 while (current
< end
&& is_ident_char (current
[0])) {
688 type
= TokenType
.IDENTIFIER
;
689 state_stack
+= State
.TEMPLATE_PART
;
690 } else if (current
[0] == '(') {
693 state_stack
+= State
.PARENS
;
694 return read_token (out token_begin
, out token_end
);
695 } else if (current
[0] == '$') {
696 type
= TokenType
.TEMPLATE_STRING_LITERAL
;
698 state_stack
+= State
.TEMPLATE_PART
;
700 Report
.error (new
SourceReference (source_file
, line
, column
+ 1, line
, column
+ 1), "unexpected character");
701 return read_template_token (out token_begin
, out token_end
);
705 type
= TokenType
.TEMPLATE_STRING_LITERAL
;
706 token_length_in_chars
= 0;
707 while (current
< end
&& current
[0] != '"' && current
[0] != '$') {
708 if (current
[0] == '\\') {
710 token_length_in_chars
++;
711 if (current
>= end
) {
715 switch (current
[0]) {
726 token_length_in_chars
++;
729 // hexadecimal escape character
731 token_length_in_chars
++;
732 while (current
< end
&& current
[0].isxdigit ()) {
734 token_length_in_chars
++;
738 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
741 } else if (current
[0] == '\n') {
744 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
745 if (u
!= (unichar
) (-1)) {
746 current
+= u
.to_utf8 (null);
747 token_length_in_chars
++;
750 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
754 if (current
>= end
|| current
[0] == '\n') {
755 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"");
756 state_stack
.length
--;
757 return read_token (out token_begin
, out token_end
);
759 state_stack
+= State
.TEMPLATE_PART
;
764 if (token_length_in_chars
< 0) {
765 column
+= (int) (current
- begin
);
767 column
+= token_length_in_chars
;
770 token_end
.pos
= current
;
771 token_end
.line
= line
;
772 token_end
.column
= column
- 1;
778 public TokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
781 if (in_template ()) {
782 return read_template_token (out token_begin
, out token_end
);
783 } else if (in_template_part ()) {
784 state_stack
.length
--;
786 token_begin
.pos
= current
;
787 token_begin
.line
= line
;
788 token_begin
.column
= column
;
790 token_end
.pos
= current
;
791 token_end
.line
= line
;
792 token_end
.column
= column
- 1;
794 return TokenType
.COMMA
;
795 } else if (in_regex_literal ()) {
796 return read_regex_token (out token_begin
, out token_end
);
801 /* emit dedents if outstanding before checking any other chars */
803 if (pending_dedents
> 0) {
808 token_begin
.pos
= current
;
809 token_begin
.line
= line
;
810 token_begin
.column
= column
;
812 token_end
.pos
= current
;
813 token_end
.line
= line
;
814 token_end
.column
= column
;
816 last_token
= TokenType
.DEDENT
;
818 return TokenType
.DEDENT
;
821 if ((_indent_spaces
== 0 ) || (last_token
!= TokenType
.EOL
)) {
822 /* scrub whitespace (excluding newlines) and comments */
827 /* handle explicit line continuation (lines ending with "\") */
828 while (current
< end
&& current
[0] == '\\' && current
[1] == '\n') {
834 /* handle automatic line continuations (when inside parens or braces) */
835 while (current
< end
&& current
[0] == '\n' && (open_parens_count
> 0 || open_brace_count
> 0)) {
842 /* handle non-consecutive new line once parsing is underway - EOL */
843 if (newline () && parse_started
&& last_token
!= TokenType
.EOL
&& last_token
!= TokenType
.SEMICOLON
) {
844 token_begin
.pos
= current
;
845 token_begin
.line
= line
;
846 token_begin
.column
= column
;
848 token_end
.pos
= current
;
849 token_end
.line
= line
;
850 token_end
.column
= column
;
852 last_token
= TokenType
.EOL
;
854 return TokenType
.EOL
;
858 while (skip_newlines ()) {
859 token_begin
.pos
= current
;
860 token_begin
.line
= line
;
861 token_begin
.column
= column
;
863 current_indent_level
= count_tabs ();
865 /* if its an empty new line then ignore */
866 if (current_indent_level
== -1) {
870 if (current_indent_level
> indent_level
) {
871 indent_level
= current_indent_level
;
873 token_end
.pos
= current
;
874 token_end
.line
= line
;
875 token_end
.column
= column
;
877 last_token
= TokenType
.INDENT
;
879 return TokenType
.INDENT
;
880 } else if (current_indent_level
< indent_level
) {
883 pending_dedents
= (indent_level
- current_indent_level
);
885 token_end
.pos
= current
;
886 token_end
.line
= line
;
887 token_end
.column
= column
;
889 last_token
= TokenType
.DEDENT
;
891 return TokenType
.DEDENT
;
896 char* begin
= current
;
897 token_begin
.pos
= begin
;
898 token_begin
.line
= line
;
899 token_begin
.column
= column
;
901 int token_length_in_chars
= -1;
903 parse_started
= true;
905 if (current
>= end
) {
906 if (indent_level
> 0) {
909 pending_dedents
= indent_level
;
911 type
= TokenType
.DEDENT
;
913 type
= TokenType
.EOF
;
915 } else if (current
[0].isalpha () || current
[0] == '_') {
917 while (current
< end
&& is_ident_char (current
[0])) {
921 type
= get_identifier_or_keyword (begin
, len
);
922 } else if (current
[0] == '@') {
923 if (current
< end
- 1 && current
[1] == '"') {
924 type
= TokenType
.OPEN_TEMPLATE
;
926 state_stack
+= State
.TEMPLATE
;
928 token_begin
.pos
++; // @ is not part of the identifier
931 while (current
< end
&& is_ident_char (current
[0])) {
935 type
= TokenType
.IDENTIFIER
;
937 } else if (current
[0].isdigit ()) {
938 while (current
< end
&& current
[0].isdigit ()) {
941 type
= TokenType
.INTEGER_LITERAL
;
942 if (current
< end
&& current
[0].tolower () == 'l') {
944 if (current
< end
&& current
[0].tolower () == 'l') {
947 } else if (current
< end
&& current
[0].tolower () == 'u') {
949 if (current
< end
&& current
[0].tolower () == 'l') {
951 if (current
< end
&& current
[0].tolower () == 'l') {
955 } else if (current
< end
- 1 && current
[0] == '.' && current
[1].isdigit ()) {
957 while (current
< end
&& current
[0].isdigit ()) {
960 if (current
< end
&& current
[0].tolower () == 'e') {
962 if (current
< end
&& (current
[0] == '+' || current
[0] == '-')) {
965 while (current
< end
&& current
[0].isdigit ()) {
969 if (current
< end
&& current
[0].tolower () == 'f') {
972 type
= TokenType
.REAL_LITERAL
;
973 } else if (current
< end
&& current
== begin
+ 1
974 && begin
[0] == '0' && begin
[1] == 'x' && begin
[2].isxdigit ()) {
975 // hexadecimal integer literal
977 while (current
< end
&& current
[0].isxdigit ()) {
980 } else if (current
< end
&& is_ident_char (current
[0])) {
981 // allow identifiers to start with a digit
982 // as long as they contain at least one char
983 while (current
< end
&& is_ident_char (current
[0])) {
986 type
= TokenType
.IDENTIFIER
;
989 switch (current
[0]) {
991 type
= TokenType
.OPEN_BRACE
;
993 state_stack
+= State
.BRACE
;
997 type
= TokenType
.CLOSE_BRACE
;
999 state_stack
.length
--;
1003 type
= TokenType
.OPEN_PARENS
;
1004 open_parens_count
++;
1005 state_stack
+= State
.PARENS
;
1009 type
= TokenType
.CLOSE_PARENS
;
1010 open_parens_count
--;
1012 state_stack
.length
--;
1013 if (in_template ()) {
1014 type
= TokenType
.COMMA
;
1018 type
= TokenType
.OPEN_BRACKET
;
1019 state_stack
+= State
.BRACKET
;
1023 type
= TokenType
.CLOSE_BRACKET
;
1024 state_stack
.length
--;
1028 type
= TokenType
.DOT
;
1030 if (current
< end
- 1) {
1031 if (current
[0] == '.' && current
[1] == '.') {
1032 type
= TokenType
.ELLIPSIS
;
1038 type
= TokenType
.COLON
;
1042 type
= TokenType
.COMMA
;
1046 type
= TokenType
.SEMICOLON
;
1050 type
= TokenType
.HASH
;
1054 type
= TokenType
.INTERR
;
1058 type
= TokenType
.BITWISE_OR
;
1060 if (current
< end
) {
1061 switch (current
[0]) {
1063 type
= TokenType
.ASSIGN_BITWISE_OR
;
1067 type
= TokenType
.OP_OR
;
1074 type
= TokenType
.BITWISE_AND
;
1076 if (current
< end
) {
1077 switch (current
[0]) {
1079 type
= TokenType
.ASSIGN_BITWISE_AND
;
1083 type
= TokenType
.OP_AND
;
1090 type
= TokenType
.CARRET
;
1092 if (current
< end
&& current
[0] == '=') {
1093 type
= TokenType
.ASSIGN_BITWISE_XOR
;
1098 type
= TokenType
.TILDE
;
1102 type
= TokenType
.ASSIGN
;
1104 if (current
< end
) {
1105 switch (current
[0]) {
1107 type
= TokenType
.OP_EQ
;
1111 type
= TokenType
.LAMBDA
;
1118 type
= TokenType
.OP_LT
;
1120 if (current
< end
) {
1121 switch (current
[0]) {
1123 type
= TokenType
.OP_LE
;
1127 type
= TokenType
.OP_SHIFT_LEFT
;
1129 if (current
< end
&& current
[0] == '=') {
1130 type
= TokenType
.ASSIGN_SHIFT_LEFT
;
1138 type
= TokenType
.OP_GT
;
1140 if (current
< end
&& current
[0] == '=') {
1141 type
= TokenType
.OP_GE
;
1146 type
= TokenType
.OP_NEG
;
1148 if (current
< end
&& current
[0] == '=') {
1149 type
= TokenType
.OP_NE
;
1154 type
= TokenType
.PLUS
;
1156 if (current
< end
) {
1157 switch (current
[0]) {
1159 type
= TokenType
.ASSIGN_ADD
;
1163 type
= TokenType
.OP_INC
;
1170 type
= TokenType
.MINUS
;
1172 if (current
< end
) {
1173 switch (current
[0]) {
1175 type
= TokenType
.ASSIGN_SUB
;
1179 type
= TokenType
.OP_DEC
;
1183 type
= TokenType
.OP_PTR
;
1190 type
= TokenType
.STAR
;
1192 if (current
< end
&& current
[0] == '=') {
1193 type
= TokenType
.ASSIGN_MUL
;
1198 switch (last_token
) {
1199 case TokenType
.ASSIGN
:
1200 case TokenType
.COMMA
:
1201 case TokenType
.MINUS
:
1202 case TokenType
.OP_AND
:
1203 case TokenType
.OP_DEC
:
1204 case TokenType
.OP_EQ
:
1205 case TokenType
.OP_GE
:
1206 case TokenType
.OP_GT
:
1207 case TokenType
.OP_INC
:
1208 case TokenType
.OP_LE
:
1209 case TokenType
.OP_LT
:
1210 case TokenType
.OP_NE
:
1211 case TokenType
.OP_NEG
:
1212 case TokenType
.OP_OR
:
1213 case TokenType
.OPEN_BRACE
:
1214 case TokenType
.OPEN_PARENS
:
1215 case TokenType
.PLUS
:
1216 case TokenType
.RETURN
:
1217 type
= TokenType
.OPEN_REGEX_LITERAL
;
1218 state_stack
+= State
.REGEX_LITERAL
;
1222 type
= TokenType
.DIV
;
1224 if (current
< end
&& current
[0] == '=') {
1225 type
= TokenType
.ASSIGN_DIV
;
1233 type
= TokenType
.PERCENT
;
1235 if (current
< end
&& current
[0] == '=') {
1236 type
= TokenType
.ASSIGN_PERCENT
;
1242 if (begin
[0] == '\'') {
1243 type
= TokenType
.CHARACTER_LITERAL
;
1244 } else if (current
< end
- 6 && begin
[1] == '"' && begin
[2] == '"') {
1245 type
= TokenType
.VERBATIM_STRING_LITERAL
;
1246 token_length_in_chars
= 6;
1248 while (current
< end
- 4) {
1249 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
1251 } else if (current
[0] == '\n') {
1255 token_length_in_chars
= 3;
1257 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1258 if (u
!= (unichar
) (-1)) {
1259 current
+= u
.to_utf8 (null);
1260 token_length_in_chars
++;
1262 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
1266 if (current
[0] == '"' && current
[1] == '"' && current
[2] == '"') {
1269 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected \"\"\"");
1273 type
= TokenType
.STRING_LITERAL
;
1275 token_length_in_chars
= 2;
1277 while (current
< end
&& current
[0] != begin
[0]) {
1278 if (current
[0] == '\\') {
1280 token_length_in_chars
++;
1281 if (current
>= end
) {
1285 switch (current
[0]) {
1296 token_length_in_chars
++;
1299 // hexadecimal escape character
1301 token_length_in_chars
++;
1302 while (current
< end
&& current
[0].isxdigit ()) {
1304 token_length_in_chars
++;
1308 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid escape sequence");
1311 } else if (current
[0] == '\n') {
1314 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1315 if (u
!= (unichar
) (-1)) {
1316 current
+= u
.to_utf8 (null);
1317 token_length_in_chars
++;
1320 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "invalid UTF-8 character");
1324 if (current
< end
&& current
[0] != '\n') {
1327 Report
.error (new
SourceReference (source_file
, line
, column
+ token_length_in_chars
, line
, column
+ token_length_in_chars
), "syntax error, expected %c".printf (begin
[0]));
1331 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
1332 if (u
!= (unichar
) (-1)) {
1333 current
+= u
.to_utf8 (null);
1334 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected character");
1337 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "invalid UTF-8 character");
1340 last_token
= TokenType
.STRING_LITERAL
;
1341 return read_token (out token_begin
, out token_end
);
1345 if (token_length_in_chars
< 0) {
1346 column
+= (int) (current
- begin
);
1348 column
+= token_length_in_chars
;
1351 token_end
.pos
= current
;
1352 token_end
.line
= line
;
1353 token_end
.column
= column
- 1;
1365 if (_indent_spaces
== 0) {
1366 while (current
< end
&& current
[0] == '\t') {
1372 int space_count
= 0;
1373 while (current
< end
&& current
[0] == ' ') {
1379 tab_count
= space_count
/ _indent_spaces
;
1383 /* ignore comments and whitspace and other lines that contain no code */
1387 if ((current
< end
) && (current
[0] == '\n')) return -1;
1392 bool matches (char* begin
, string keyword
) {
1393 char* keyword_array
= (char *) keyword
;
1394 long len
= keyword
.len ();
1395 for (int i
= 0; i
< len
; i
++) {
1396 if (begin
[i
] != keyword_array
[i
]) {
1403 bool whitespace () {
1405 while (current
< end
&& current
[0].isspace () && current
[0] != '\n' ) {
1412 if ((column
== 1) && (current
< end
) && (current
[0] == '#')) {
1420 inline
bool newline () {
1421 if (current
[0] == '\n') {
1428 bool skip_newlines () {
1429 bool new_lines
= false;
1431 while (newline ()) {
1436 current_indent_level
= 0;
1444 bool comment (bool file_comment
= false) {
1445 if (current
> end
- 2
1446 || current
[0] != '/'
1447 || (current
[1] != '/' && current
[1] != '*')) {
1452 if (current
[1] == '/') {
1453 // single-line comment
1455 SourceReference source_reference
= null;
1457 source_reference
= new
SourceReference (source_file
, line
, column
, line
, column
);
1462 // skip until end of line or end of file
1463 while (current
< end
&& current
[0] != '\n') {
1467 /* do not ignore EOL if comment does not exclusively occupy the line */
1468 if (current
[0] == '\n' && last_token
== TokenType
.EOL
) {
1472 current_indent_level
= 0;
1475 if (source_reference
!= null) {
1476 push_comment (((string) begin
).ndup ((long) (current
- begin
)), source_reference
, file_comment
);
1480 // delimited comment
1481 SourceReference source_reference
= null;
1482 if (file_comment
&& current
[2] == '*') {
1486 if (current
[2] == '*' || file_comment
) {
1487 source_reference
= new
SourceReference (source_file
, line
, column
, line
, column
);
1491 char* begin
= current
;
1493 while (current
< end
- 1
1494 && (current
[0] != '*' || current
[1] != '/')) {
1495 if (current
[0] == '\n') {
1502 if (current
== end
- 1) {
1503 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected */");
1507 if (source_reference
!= null) {
1508 string comment
= ((string) begin
).ndup ((long) (current
- begin
));
1509 push_comment (comment
, source_reference
, file_comment
);
1521 while (current
< end
&& current
[0] == '\t' ) {
1530 void skip_space_tabs () {
1531 while (whitespace () || skip_tabs () || comment () ) {
1537 while (whitespace () || comment ()) {
1541 public void parse_file_comments () {
1542 while (whitespace () || comment (true)) {
1547 void push_comment (string comment_item
, SourceReference source_reference
, bool file_comment
) {
1548 if (comment_item
[0] == '*') {
1549 _comment
= new
Comment (comment_item
, source_reference
);
1553 source_file
.add_comment (new
Comment (comment_item
, source_reference
));
1559 * Clears and returns the content of the comment stack.
1561 * @return saved comment
1563 public Comment?
pop_comment () {
1564 if (_comment
== null) {
1568 var comment
= _comment
;
1573 bool pp_whitespace () {
1575 while (current
< end
&& current
[0].isspace () && current
[0] != '\n') {
1583 void pp_directive () {
1590 char* begin
= current
;
1592 while (current
< end
&& current
[0].isalnum ()) {
1598 if (len
== 2 && matches (begin
, "if")) {
1600 } else if (len
== 4 && matches (begin
, "elif")) {
1602 } else if (len
== 4 && matches (begin
, "else")) {
1604 } else if (len
== 5 && matches (begin
, "endif")) {
1607 Report
.error (new
SourceReference (source_file
, line
, column
- len
, line
, column
), "syntax error, invalid preprocessing directive");
1610 if (conditional_stack
.length
> 0
1611 && conditional_stack
[conditional_stack
.length
- 1].skip_section
) {
1612 // skip lines until next preprocessing directive
1614 while (current
< end
) {
1615 if (bol
&& current
[0] == '#') {
1616 // go back to begin of line
1617 current
-= (column
- 1);
1621 if (current
[0] == '\n') {
1625 } else if (!current
[0].isspace ()) {
1636 if (current
>= end
|| current
[0] != '\n') {
1637 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected newline");
1641 void parse_pp_if () {
1644 bool condition
= parse_pp_expression ();
1648 conditional_stack
+= Conditional ();
1650 if (condition
&& (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1651 // condition true => process code within if
1652 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1654 // skip lines until next preprocessing directive
1655 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1659 void parse_pp_elif () {
1662 bool condition
= parse_pp_expression ();
1666 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
1667 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #elif");
1671 if (condition
&& !conditional_stack
[conditional_stack
.length
- 1].matched
1672 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1673 // condition true => process code within if
1674 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1675 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
1677 // skip lines until next preprocessing directive
1678 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1682 void parse_pp_else () {
1685 if (conditional_stack
.length
== 0 || conditional_stack
[conditional_stack
.length
- 1].else_found
) {
1686 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #else");
1690 if (!conditional_stack
[conditional_stack
.length
- 1].matched
1691 && (conditional_stack
.length
== 1 || !conditional_stack
[conditional_stack
.length
- 2].skip_section
)) {
1692 // condition true => process code within if
1693 conditional_stack
[conditional_stack
.length
- 1].matched
= true;
1694 conditional_stack
[conditional_stack
.length
- 1].skip_section
= false;
1696 // skip lines until next preprocessing directive
1697 conditional_stack
[conditional_stack
.length
- 1].skip_section
= true;
1701 void parse_pp_endif () {
1704 if (conditional_stack
.length
== 0) {
1705 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, unexpected #endif");
1709 conditional_stack
.length
--;
1712 bool parse_pp_symbol () {
1714 while (current
< end
&& is_ident_char (current
[0])) {
1721 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
1725 string identifier
= ((string) (current
- len
)).ndup (len
);
1727 if (identifier
== "true") {
1729 } else if (identifier
== "false") {
1732 defined
= source_file
.context
.is_defined (identifier
);
1738 bool parse_pp_primary_expression () {
1739 if (current
>= end
) {
1740 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
1741 } else if (is_ident_char (current
[0])) {
1742 return parse_pp_symbol ();
1743 } else if (current
[0] == '(') {
1747 bool result
= parse_pp_expression ();
1749 if (current
< end
&& current
[0] == ')') {
1753 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected `)'");
1757 Report
.error (new
SourceReference (source_file
, line
, column
, line
, column
), "syntax error, expected identifier");
1762 bool parse_pp_unary_expression () {
1763 if (current
< end
&& current
[0] == '!') {
1767 return !parse_pp_unary_expression ();
1770 return parse_pp_primary_expression ();
1773 bool parse_pp_equality_expression () {
1774 bool left
= parse_pp_unary_expression ();
1777 if (current
< end
- 1 && current
[0] == '=' && current
[1] == '=') {
1781 bool right
= parse_pp_unary_expression ();
1782 left
= (left
== right
);
1783 } else if (current
< end
- 1 && current
[0] == '!' && current
[1] == '=') {
1787 bool right
= parse_pp_unary_expression ();
1788 left
= (left
!= right
);
1796 bool parse_pp_and_expression () {
1797 bool left
= parse_pp_equality_expression ();
1799 while (current
< end
- 1 && current
[0] == '&' && current
[1] == '&') {
1803 bool right
= parse_pp_equality_expression ();
1804 left
= left
&& right
;
1809 bool parse_pp_or_expression () {
1810 bool left
= parse_pp_and_expression ();
1812 while (current
< end
- 1 && current
[0] == '|' && current
[1] == '|') {
1816 bool right
= parse_pp_and_expression ();
1817 left
= left
|| right
;
1822 bool parse_pp_expression () {
1823 return parse_pp_or_expression ();