vala/valageniescanner.vala

   1 /* valageniescanner.vala
   2  *
   3  * Copyright (C) 2008  Jamie McCracken, Jürg Billeter
   4  * Based on code by Jürg Billeter
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  19  *
  20  * Author:
  21  *      Jamie McCracken jamiemcc gnome org
  22  */
  23
  24 using GLib;
  25
  26 /**
  27  * Lexical scanner for Genie source files.
  28  */
  29 public class Vala.Genie.Scanner {
  30         public SourceFile source_file { get; private set; }
  31
  32         public int indent_spaces { get; set;}
  33
  34         char* begin;
  35         char* current;
  36         char* end;
  37
  38         int line;
  39         int column;
  40
  41         int current_indent_level;
  42         int indent_level;
  43         int pending_dedents;
  44
  45         /* track open parens and braces for automatic line continuations */
  46         int open_parens_count;
  47         int open_brace_count;
  48
  49         TokenType last_token;
  50         bool parse_started;
  51
  52         Comment _comment;
  53
  54         Conditional[] conditional_stack;
  55
  56         struct Conditional {
  57                 public bool matched;
  58                 public bool else_found;
  59                 public bool skip_section;
  60         }
  61
  62         State[] state_stack;
  63
  64         enum State {
  65                 PARENS,
  66                 BRACE,
  67                 BRACKET,
  68                 REGEX_LITERAL,
  69                 TEMPLATE,
  70                 TEMPLATE_PART
  71         }
  72
  73         public Scanner (SourceFile source_file) {
  74                 this.source_file = source_file;
  75
  76                 begin = source_file.get_mapped_contents ();
  77                 end = begin + source_file.get_mapped_length ();
  78
  79                 current = begin;
  80
  81                 _indent_spaces = 0;
  82                 line = 1;
  83                 column = 1;
  84                 current_indent_level = 0;
  85                 indent_level = 0;
  86                 pending_dedents = 0;
  87
  88                 open_parens_count = 0;
  89                 open_brace_count = 0;
  90
  91                 parse_started = false;
  92                 last_token = TokenType.NONE;
  93
  94         }
  95
  96         bool in_template () {
  97                 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE);
  98         }
  99
 100         bool in_template_part () {
 101                 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.TEMPLATE_PART);
 102         }
 103
 104         bool is_ident_char (char c) {
 105                 return (c.isalnum () || c == '_');
 106         }
 107
 108         bool in_regex_literal () {
 109                 return (state_stack.length > 0 && state_stack[state_stack.length - 1] == State.REGEX_LITERAL);
 110         }
 111
 112
 113         public TokenType read_regex_token (out SourceLocation token_begin, out SourceLocation token_end) {
 114                 TokenType type;
 115                 char* begin = current;
 116                 token_begin.pos = begin;
 117                 token_begin.line = line;
 118                 token_begin.column = column;
 119
 120                 int token_length_in_chars = -1;
 121
 122                 if (current >= end) {
 123                         type = TokenType.EOF;
 124                 } else {
 125                         switch (current[0]) {
 126                         case '/':
 127                                 type = TokenType.CLOSE_REGEX_LITERAL;
 128                                 current++;
 129                                 state_stack.length--;
 130                                 var fl_i = false;
 131                                 var fl_s = false;
 132                                 var fl_m = false;
 133                                 var fl_x = false;
 134                                 while (current[0] == 'i' || current[0] == 's' || current[0] == 'm' || current[0] == 'x') {
 135                                         switch (current[0]) {
 136                                         case 'i':
 137                                                 if (fl_i) {
 138                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'i' used more than once");
 139                                                 }
 140                                                 fl_i = true;
 141                                                 break;
 142                                         case 's':
 143                                                 if (fl_s) {
 144                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 's' used more than once");
 145                                                 }
 146                                                 fl_s = true;
 147                                                 break;
 148                                         case 'm':
 149                                                 if (fl_m) {
 150                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'm' used more than once");
 151                                                 }
 152                                                 fl_m = true;
 153                                                 break;
 154                                         case 'x':
 155                                                 if (fl_x) {
 156                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "modifier 'x' used more than once");
 157                                                 }
 158                                                 fl_x = true;
 159                                                 break;
 160                                         }
 161                                         current++;
 162                                         token_length_in_chars++;
 163                                 }
 164                                 break;
 165                         default:
 166                                 type = TokenType.REGEX_LITERAL;
 167                                 token_length_in_chars = 0;
 168                                 while (current < end && current[0] != '/') {
 169                                         if (current[0] == '\\') {
 170                                                 current++;
 171                                                 token_length_in_chars++;
 172                                                 if (current >= end) {
 173                                                         break;
 174                                                 }
 175
 176                                                 switch (current[0]) {
 177                                                 case '\'':
 178                                                 case '"':
 179                                                 case '\\':
 180                                                 case '/':
 181                                                 case '^':
 182                                                 case '$':
 183                                                 case '.':
 184                                                 case '[':
 185                                                 case ']':
 186                                                 case '{':
 187                                                 case '}':
 188                                                 case '(':
 189                                                 case ')':
 190                                                 case '?':
 191                                                 case '*':
 192                                                 case '+':
 193                                                 case '-':
 194                                                 case '#':
 195                                                 case '&':
 196                                                 case '~':
 197                                                 case ':':
 198                                                 case ';':
 199                                                 case '<':
 200                                                 case '>':
 201                                                 case '|':
 202                                                 case '%':
 203                                                 case '=':
 204                                                 case '@':
 205                                                 case '0':
 206                                                 case 'b':
 207                                                 case 'B':
 208                                                 case 'f':
 209                                                 case 'n':
 210                                                 case 'r':
 211                                                 case 't':
 212                                                 case 'a':
 213                                                 case 'A':
 214                                                 case 'p':
 215                                                 case 'P':
 216                                                 case 'e':
 217                                                 case 'd':
 218                                                 case 'D':
 219                                                 case 's':
 220                                                 case 'S':
 221                                                 case 'w':
 222                                                 case 'W':
 223                                                 case 'G':
 224                                                 case 'z':
 225                                                 case 'Z':
 226                                                         current++;
 227                                                         token_length_in_chars++;
 228                                                         break;
 229                                                 case 'x':
 230                                                         // hexadecimal escape character
 231                                                         current++;
 232                                                         token_length_in_chars++;
 233                                                         while (current < end && current[0].isxdigit ()) {
 234                                                                 current++;
 235                                                                 token_length_in_chars++;
 236                                                         }
 237                                                         break;
 238                                                 default:
 239                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
 240                                                         break;
 241                                                 }
 242                                         } else if (current[0] == '\n') {
 243                                                 break;
 244                                         } else {
 245                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 246                                                 if (u != (unichar) (-1)) {
 247                                                         current += u.to_utf8 (null);
 248                                                         token_length_in_chars++;
 249                                                 } else {
 250                                                         current++;
 251                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
 252                                                 }
 253                                         }
 254                                 }
 255                                 if (current >= end || current[0] == '\n') {
 256                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
 257                                         state_stack.length--;
 258                                         return read_token (out token_begin, out token_end);
 259                                 }
 260                                 break;
 261                         }
 262                 }
 263
 264                 if (token_length_in_chars < 0) {
 265                         column += (int) (current - begin);
 266                 } else {
 267                         column += token_length_in_chars;
 268                 }
 269
 270                 token_end.pos = current;
 271                 token_end.line = line;
 272                 token_end.column = column - 1;
 273
 274                 return type;
 275         }
 276
 277
 278         public void seek (SourceLocation location) {
 279                 current = location.pos;
 280                 line = location.line;
 281                 column = location.column;
 282
 283                 conditional_stack = null;
 284                 state_stack = null;
 285         }
 286
 287         TokenType get_identifier_or_keyword (char* begin, int len) {
 288                 switch (len) {
 289                 case 2:
 290                         switch (begin[0]) {
 291                         case 'a':
 292                                 if (matches (begin, "as")) return TokenType.AS;
 293                                 break;
 294                         case 'd':
 295                                 if (matches (begin, "do")) return TokenType.DO;
 296                                 break;
 297                         case 'i':
 298                                 switch (begin[1]) {
 299                                 case 'f':
 300                                         return TokenType.IF;
 301                                 case 'n':
 302                                         return TokenType.IN;
 303                                 case 's':
 304                                         return TokenType.IS;
 305                                 }
 306                                 break;
 307                         case 'o':
 308                                 if (matches (begin, "of")) return TokenType.OF;
 309
 310                                 if (matches (begin, "or")) return TokenType.OP_OR;
 311                                 break;
 312                         case 't':
 313                                 if (matches (begin, "to")) return TokenType.TO;
 314                                 break;
 315                         }
 316                         break;
 317                 case 3:
 318                         switch (begin[0]) {
 319                         case 'a':
 320                                 if (matches (begin, "and")) return TokenType.OP_AND;
 321                                 break;
 322                         case 'd':
 323                                 if (matches (begin, "def")) return TokenType.DEF;
 324                                 break;
 325                         case 'f':
 326                                 if (matches (begin, "for")) return TokenType.FOR;
 327                                 break;
 328                         case 'g':
 329                                 if (matches (begin, "get")) return TokenType.GET;
 330                                 break;
 331                         case 'i':
 332                                 if (matches (begin, "isa")) return TokenType.ISA;
 333                                 break;
 334                         case 'n':
 335                                 switch (begin[1]) {
 336                                 case 'e':
 337                                         if (matches (begin, "new")) return TokenType.NEW;
 338                                         break;
 339                                 case 'o':
 340                                         if (matches (begin, "not")) return TokenType.OP_NEG;
 341                                         break;
 342                                 }
 343                                 break;
 344                         case 'o':
 345                                 if (matches (begin, "out")) return TokenType.OUT;
 346                                 break;
 347                         case 'r':
 348                                 if (matches (begin, "ref")) return TokenType.REF;
 349                                 break;
 350                         case 's':
 351                                 if (matches (begin, "set")) return TokenType.SET;
 352                                 break;
 353                         case 't':
 354                                 if (matches (begin, "try")) return TokenType.TRY;
 355                                 break;
 356                         case 'v':
 357                                 if (matches (begin, "var")) return TokenType.VAR;
 358                                 break;
 359                         }
 360                         break;
 361                 case 4:
 362                         switch (begin[0]) {
 363                         case 'c':
 364                                 if (matches (begin, "case")) return TokenType.CASE;
 365                                 break;
 366                         case 'd':
 367                                 if (matches (begin, "dict")) return TokenType.DICT;
 368                                 break;
 369                         case 'e':
 370                                 switch (begin[1]) {
 371                                 case 'l':
 372                                         if (matches (begin, "else")) return TokenType.ELSE;
 373                                         break;
 374                                 case 'n':
 375                                         if (matches (begin, "enum")) return TokenType.ENUM;
 376                                         break;
 377                                 }
 378                                 break;
 379                         case 'i':
 380                                 if (matches (begin, "init")) return TokenType.INIT;
 381                                 break;
 382                         case 'l':
 383                                 switch (begin[1]) {
 384                                 case 'i':
 385                                         if (matches (begin, "list")) return TokenType.LIST;
 386                                         break;
 387                                 case 'o':
 388                                         if (matches (begin, "lock")) return TokenType.LOCK;
 389                                         break;
 390                                 }
 391                                 break;
 392
 393                         case 'n':
 394                                 if (matches (begin, "null")) return TokenType.NULL;
 395                                 break;
 396                         case 'p':
 397                                 switch (begin[1]) {
 398                                 case 'a':
 399                                         if (matches (begin, "pass")) return TokenType.PASS;
 400                                         break;
 401                                 case 'r':
 402                                         if (matches (begin, "prop")) return TokenType.PROP;
 403                                         break;
 404                                 }
 405                                 break;
 406                         case 's':
 407                                 if (matches (begin, "self")) return TokenType.THIS;
 408                                 break;
 409                         case 't':
 410                                 if (matches (begin, "true")) return TokenType.TRUE;
 411                                 break;
 412                         case 'u':
 413                                 if (matches (begin, "uses")) return TokenType.USES;
 414                                 break;
 415                         case 'v':
 416                                 if (matches (begin, "void")) return TokenType.VOID;
 417                                 break;
 418                         case 'w':
 419                                 switch (begin[1]) {
 420                                 case 'e':
 421                                         if (matches (begin, "weak")) return TokenType.WEAK;
 422                                         break;
 423                                 case 'h':
 424                                         if (matches (begin, "when")) return TokenType.WHEN;
 425                                         break;
 426                                 }
 427                                 break;
 428                         }
 429                         break;
 430                 case 5:
 431                         switch (begin[0]) {
 432                         case 'a':
 433                                 switch (begin[1]) {
 434                                 case 'r':
 435                                         if (matches (begin, "array")) return TokenType.ARRAY;
 436                                         break;
 437                                 case 's':
 438                                         if (matches (begin, "async")) return TokenType.ASYNC;
 439                                         break;
 440                                 }
 441                                 break;
 442                         case 'b':
 443                                 if (matches (begin, "break")) return TokenType.BREAK;
 444                                 break;
 445                         case 'c':
 446                                 switch (begin[1]) {
 447                                 case 'l':
 448                                         if (matches (begin, "class")) return TokenType.CLASS;
 449                                         break;
 450                                 case 'o':
 451                                         if (matches (begin, "const")) return TokenType.CONST;
 452                                         break;
 453                                 }
 454                                 break;
 455                         case 'e':
 456                                 if (matches (begin, "event")) return TokenType.EVENT;
 457                                 break;
 458                         case 'f':
 459                                 switch (begin[1]) {
 460                                 case 'a':
 461                                         if (matches (begin, "false")) return TokenType.FALSE;
 462                                         break;
 463                                 case 'i':
 464                                         if (matches (begin, "final")) return TokenType.FINAL;
 465                                         break;
 466                                 }
 467                                 break;
 468                         case 'o':
 469                                 if (matches (begin, "owned")) return TokenType.OWNED;
 470                                 break;
 471                         case 'p':
 472                                 if (matches (begin, "print")) return TokenType.PRINT;
 473                                 break;
 474                         case 's':
 475                                 if (matches (begin, "super")) return TokenType.SUPER;
 476                                 break;
 477                         case 'r':
 478                                 if (matches (begin, "raise")) return TokenType.RAISE;
 479                                 break;
 480                         case 'w':
 481                                 if (matches (begin, "while")) return TokenType.WHILE;
 482                                 break;
 483                         case 'y':
 484                                 if (matches (begin, "yield")) return TokenType.YIELD;
 485                                 break;
 486                         }
 487                         break;
 488                 case 6:
 489                         switch (begin[0]) {
 490                         case 'a':
 491                                 if (matches (begin, "assert")) return TokenType.ASSERT;
 492                                 break;
 493                         case 'd':
 494                                 switch (begin[1]) {
 495                                 case 'e':
 496                                         if (matches (begin, "delete")) return TokenType.DELETE;
 497                                         break;
 498                                 case 'o':
 499                                         if (matches (begin, "downto")) return TokenType.DOWNTO;
 500                                         break;
 501                                 }
 502                                 break;
 503                         case 'e':
 504                                 switch (begin[1]) {
 505                                 case 'x':
 506                                         switch (begin[2]) {
 507                                         case 'c':
 508                                                 if (matches (begin, "except")) return TokenType.EXCEPT;
 509                                                 break;
 510                                         case 't':
 511                                                 if (matches (begin, "extern")) return TokenType.EXTERN;
 512                                                 break;
 513                                         }
 514                                         break;
 515                                 }
 516                                 break;
 517                         case 'i':
 518                                 if (matches (begin, "inline")) return TokenType.INLINE;
 519                                 break;
 520                         case 'p':
 521                                 switch (begin[1]) {
 522                                 case 'a':
 523                                         if (matches (begin, "params")) return TokenType.PARAMS;
 524                                         break;
 525                                 case 'u':
 526                                         if (matches (begin, "public")) return TokenType.PUBLIC;
 527                                         break;
 528                                 }
 529                                 break;
 530                         case 'r':
 531                                 switch (begin[1]) {
 532                                 case 'a':
 533                                         if (matches (begin, "raises")) return TokenType.RAISES;
 534                                         break;
 535                                 case 'e':
 536                                         if (matches (begin, "return")) return TokenType.RETURN;
 537                                         break;
 538                                 }
 539                                 break;
 540                         case 's':
 541                                 switch (begin[1]) {
 542                                 case 'i':
 543                                         if (matches (begin, "sizeof")) return TokenType.SIZEOF;
 544                                         break;
 545                                 case 't':
 546                                         switch (begin[2]) {
 547                                         case 'a':
 548                                                 if (matches (begin, "static")) return TokenType.STATIC;
 549                                                 break;
 550                                         case 'r':
 551                                                 if (matches (begin, "struct")) return TokenType.STRUCT;
 552                                                 break;
 553                                         }
 554                                         break;
 555                                 }
 556                                 break;
 557                         case 't':
 558                                 if (matches (begin, "typeof")) return TokenType.TYPEOF;
 559                                 break;
 560                         }
 561                         break;
 562                 case 7:
 563                         switch (begin[0]) {
 564                         case 'd':
 565                                 switch (begin[1]) {
 566                                 case 'e':
 567                                         if (matches (begin, "default")) return TokenType.DEFAULT;
 568                                         break;
 569                                 case 'y':
 570                                         if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
 571                                         break;
 572                                 }
 573                                 break;
 574                         case 'e':
 575                                 if (matches (begin, "ensures")) return TokenType.ENSURES;
 576                                 break;
 577                         case 'f':
 578                                 switch (begin[1]) {
 579                                 case 'i':
 580                                         if (matches (begin, "finally")) return TokenType.FINALLY;
 581                                         break;
 582                                 }
 583                                 break;
 584                         case 'p':
 585                                 if (matches (begin, "private")) return TokenType.PRIVATE;
 586                                 break;
 587                         case 'u':
 588                                 if (matches (begin, "unowned")) return TokenType.UNOWNED;
 589                                 break;
 590                         case 'v':
 591                                 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
 592                                 break;
 593                         }
 594                         break;
 595                 case 8:
 596                         switch (begin[0]) {
 597                         case 'a':
 598                                 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
 599                                 break;
 600                         case 'c':
 601                                 if (matches (begin, "continue")) return TokenType.CONTINUE;
 602                                 break;
 603                         case 'd':
 604                                 if (matches (begin, "delegate")) return TokenType.DELEGATE;
 605                                 break;
 606                         case 'i':
 607                                 if (matches (begin, "internal")) return TokenType.INTERNAL;
 608                                 break;
 609                         case 'o':
 610                                 if (matches (begin, "override")) return TokenType.OVERRIDE;
 611                                 break;
 612                         case 'r':
 613                                 switch (begin[2]) {
 614                                 case 'a':
 615                                         if (matches (begin, "readonly")) return TokenType.READONLY;
 616                                         break;
 617                                 case 'q':
 618                                         if (matches (begin, "requires")) return TokenType.REQUIRES;
 619                                         break;
 620                                 }
 621                                 break;
 622                         case 'v':
 623                                 if (matches (begin, "volatile")) return TokenType.VOLATILE;
 624                                 break;
 625                         }
 626                         break;
 627                 case 9:
 628                         switch (begin[0]) {
 629                         case 'c':
 630                                 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
 631                                 break;
 632                         case 'e':
 633                                 if (matches (begin, "exception")) return TokenType.ERRORDOMAIN;
 634                                 break;
 635                         case 'i':
 636                                 if (matches (begin, "interface")) return TokenType.INTERFACE;
 637                                 break;
 638                         case 'n':
 639                                 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
 640                                 break;
 641                         case 'p':
 642                                 if (matches (begin, "protected")) return TokenType.PROTECTED;
 643                                 break;
 644                         case 'w':
 645                                 if (matches (begin, "writeonly")) return TokenType.WRITEONLY;
 646                                 break;
 647                         }
 648                         break;
 649                 case 10:
 650                         switch (begin[0]) {
 651                         case 'i':
 652                                 if (matches (begin, "implements")) return TokenType.IMPLEMENTS;
 653                                 break;
 654                         }
 655                         break;
 656                 }
 657                 return TokenType.IDENTIFIER;
 658         }
 659
 660
 661         public TokenType read_template_token (out SourceLocation token_begin, out SourceLocation token_end) {
 662                 TokenType type;
 663                 char* begin = current;
 664                 token_begin.pos = begin;
 665                 token_begin.line = line;
 666                 token_begin.column = column;
 667
 668                 int token_length_in_chars = -1;
 669
 670                 if (current >= end) {
 671                         type = TokenType.EOF;
 672                 } else {
 673                         switch (current[0]) {
 674                         case '"':
 675                                 type = TokenType.CLOSE_TEMPLATE;
 676                                 current++;
 677                                 state_stack.length--;
 678                                 break;
 679                         case '$':
 680                                 token_begin.pos++; // $ is not part of following token
 681                                 current++;
 682                                 if (current[0].isalpha () || current[0] == '_') {
 683                                         int len = 0;
 684                                         while (current < end && is_ident_char (current[0])) {
 685                                                 current++;
 686                                                 len++;
 687                                         }
 688                                         type = TokenType.IDENTIFIER;
 689                                         state_stack += State.TEMPLATE_PART;
 690                                 } else if (current[0] == '(') {
 691                                         current++;
 692                                         column += 2;
 693                                         state_stack += State.PARENS;
 694                                         return read_token (out token_begin, out token_end);
 695                                 } else if (current[0] == '$') {
 696                                         type = TokenType.TEMPLATE_STRING_LITERAL;
 697                                         current++;
 698                                         state_stack += State.TEMPLATE_PART;
 699                                 } else {
 700                                         Report.error (new SourceReference (source_file, line, column + 1, line, column + 1), "unexpected character");
 701                                         return read_template_token (out token_begin, out token_end);
 702                                 }
 703                                 break;
 704                         default:
 705                                 type = TokenType.TEMPLATE_STRING_LITERAL;
 706                                 token_length_in_chars = 0;
 707                                 while (current < end && current[0] != '"' && current[0] != '$') {
 708                                         if (current[0] == '\\') {
 709                                                 current++;
 710                                                 token_length_in_chars++;
 711                                                 if (current >= end) {
 712                                                         break;
 713                                                 }
 714
 715                                                 switch (current[0]) {
 716                                                 case '\'':
 717                                                 case '"':
 718                                                 case '\\':
 719                                                 case '0':
 720                                                 case 'b':
 721                                                 case 'f':
 722                                                 case 'n':
 723                                                 case 'r':
 724                                                 case 't':
 725                                                         current++;
 726                                                         token_length_in_chars++;
 727                                                         break;
 728                                                 case 'x':
 729                                                         // hexadecimal escape character
 730                                                         current++;
 731                                                         token_length_in_chars++;
 732                                                         while (current < end && current[0].isxdigit ()) {
 733                                                                 current++;
 734                                                                 token_length_in_chars++;
 735                                                         }
 736                                                         break;
 737                                                 default:
 738                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
 739                                                         break;
 740                                                 }
 741                                         } else if (current[0] == '\n') {
 742                                                 break;
 743                                         } else {
 744                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 745                                                 if (u != (unichar) (-1)) {
 746                                                         current += u.to_utf8 (null);
 747                                                         token_length_in_chars++;
 748                                                 } else {
 749                                                         current++;
 750                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
 751                                                 }
 752                                         }
 753                                 }
 754                                 if (current >= end || current[0] == '\n') {
 755                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"");
 756                                         state_stack.length--;
 757                                         return read_token (out token_begin, out token_end);
 758                                 }
 759                                 state_stack += State.TEMPLATE_PART;
 760                                 break;
 761                         }
 762                 }
 763
 764                 if (token_length_in_chars < 0) {
 765                         column += (int) (current - begin);
 766                 } else {
 767                         column += token_length_in_chars;
 768                 }
 769
 770                 token_end.pos = current;
 771                 token_end.line = line;
 772                 token_end.column = column - 1;
 773
 774                 return type;
 775         }
 776
 777
 778         public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
 779
 780
 781                 if (in_template ()) {
 782                         return read_template_token (out token_begin, out token_end);
 783                 } else if (in_template_part ()) {
 784                         state_stack.length--;
 785
 786                         token_begin.pos = current;
 787                         token_begin.line = line;
 788                         token_begin.column = column;
 789
 790                         token_end.pos = current;
 791                         token_end.line = line;
 792                         token_end.column = column - 1;
 793
 794                         return TokenType.COMMA;
 795                 } else if (in_regex_literal ()) {
 796                         return read_regex_token (out token_begin, out token_end);
 797                 }
 798
 799
 800
 801                 /* emit dedents if outstanding before checking any other chars */
 802
 803                 if (pending_dedents > 0) {
 804                         pending_dedents--;
 805                         indent_level--;
 806
 807
 808                         token_begin.pos = current;
 809                         token_begin.line = line;
 810                         token_begin.column = column;
 811
 812                         token_end.pos = current;
 813                         token_end.line = line;
 814                         token_end.column = column;
 815
 816                         last_token = TokenType.DEDENT;
 817
 818                         return TokenType.DEDENT;
 819                 }
 820
 821                 if ((_indent_spaces == 0 ) || (last_token != TokenType.EOL)) {
 822                         /* scrub whitespace (excluding newlines) and comments */
 823                         space ();
 824                 }
 825
 826
 827                 /* handle explicit line continuation (lines ending with "\") */
 828                 while (current < end && current[0] == '\\' && current[1] == '\n') {
 829                         current += 2;
 830                         line++;
 831                         skip_space_tabs ();
 832                 }
 833
 834                 /* handle automatic line continuations (when inside parens or braces) */
 835                 while (current < end && current[0] == '\n' && (open_parens_count > 0 || open_brace_count > 0)) {
 836                     current++;
 837                         line++;
 838                         skip_space_tabs ();
 839                 }
 840
 841
 842                 /* handle non-consecutive new line once parsing is underway - EOL */
 843                 if (newline () && parse_started && last_token != TokenType.EOL && last_token != TokenType.SEMICOLON) {
 844                         token_begin.pos = current;
 845                         token_begin.line = line;
 846                         token_begin.column = column;
 847
 848                         token_end.pos = current;
 849                         token_end.line = line;
 850                         token_end.column = column;
 851
 852                         last_token = TokenType.EOL;
 853
 854                         return TokenType.EOL;
 855                 }
 856
 857
 858                 while (skip_newlines ()) {
 859                         token_begin.pos = current;
 860                         token_begin.line = line;
 861                         token_begin.column = column;
 862
 863                         current_indent_level = count_tabs ();
 864
 865                         /* if its an empty new line then ignore */
 866                         if (current_indent_level == -1)  {
 867                                 continue;
 868                         }
 869
 870                         if (current_indent_level > indent_level) {
 871                                 indent_level = current_indent_level;
 872
 873                                 token_end.pos = current;
 874                                 token_end.line = line;
 875                                 token_end.column = column;
 876
 877                                 last_token = TokenType.INDENT;
 878
 879                                 return TokenType.INDENT;
 880                         } else if (current_indent_level < indent_level) {
 881                                 indent_level--;
 882
 883                                 pending_dedents = (indent_level - current_indent_level);
 884
 885                                 token_end.pos = current;
 886                                 token_end.line = line;
 887                                 token_end.column = column;
 888
 889                                 last_token = TokenType.DEDENT;
 890
 891                                 return TokenType.DEDENT;
 892                         }
 893                 }
 894
 895                 TokenType type;
 896                 char* begin = current;
 897                 token_begin.pos = begin;
 898                 token_begin.line = line;
 899                 token_begin.column = column;
 900
 901                 int token_length_in_chars = -1;
 902
 903                 parse_started = true;
 904
 905                 if (current >= end) {
 906                         if (indent_level > 0) {
 907                                 indent_level--;
 908
 909                                 pending_dedents = indent_level;
 910
 911                                 type = TokenType.DEDENT;
 912                         } else {
 913                                 type = TokenType.EOF;
 914                         }
 915                 } else if (current[0].isalpha () || current[0] == '_') {
 916                         int len = 0;
 917                         while (current < end && is_ident_char (current[0])) {
 918                                 current++;
 919                                 len++;
 920                         }
 921                         type = get_identifier_or_keyword (begin, len);
 922                 } else if (current[0] == '@') {
 923                         if (current < end - 1 && current[1] == '"') {
 924                                 type = TokenType.OPEN_TEMPLATE;
 925                                 current += 2;
 926                                 state_stack += State.TEMPLATE;
 927                         } else {
 928                                 token_begin.pos++; // @ is not part of the identifier
 929                                 current++;
 930                                 int len = 0;
 931                                 while (current < end && is_ident_char (current[0])) {
 932                                         current++;
 933                                         len++;
 934                                 }
 935                                 type = TokenType.IDENTIFIER;
 936                         }
 937                 } else if (current[0].isdigit ()) {
 938                         while (current < end && current[0].isdigit ()) {
 939                                 current++;
 940                         }
 941                         type = TokenType.INTEGER_LITERAL;
 942                         if (current < end && current[0].tolower () == 'l') {
 943                                 current++;
 944                                 if (current < end && current[0].tolower () == 'l') {
 945                                         current++;
 946                                 }
 947                         } else if (current < end && current[0].tolower () == 'u') {
 948                                 current++;
 949                                 if (current < end && current[0].tolower () == 'l') {
 950                                         current++;
 951                                         if (current < end && current[0].tolower () == 'l') {
 952                                                 current++;
 953                                         }
 954                                 }
 955                         } else if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
 956                                 current++;
 957                                 while (current < end && current[0].isdigit ()) {
 958                                         current++;
 959                                 }
 960                                 if (current < end && current[0].tolower () == 'e') {
 961                                         current++;
 962                                         if (current < end && (current[0] == '+' || current[0] == '-')) {
 963                                                 current++;
 964                                         }
 965                                         while (current < end && current[0].isdigit ()) {
 966                                                 current++;
 967                                         }
 968                                 }
 969                                 if (current < end && current[0].tolower () == 'f') {
 970                                         current++;
 971                                 }
 972                                 type = TokenType.REAL_LITERAL;
 973                         } else if (current < end && current == begin + 1
 974                                    && begin[0] == '0' && begin[1] == 'x' && begin[2].isxdigit ()) {
 975                                 // hexadecimal integer literal
 976                                 current++;
 977                                 while (current < end && current[0].isxdigit ()) {
 978                                         current++;
 979                                 }
 980                         } else if (current < end && is_ident_char (current[0])) {
 981                                 // allow identifiers to start with a digit
 982                                 // as long as they contain at least one char
 983                                 while (current < end && is_ident_char (current[0])) {
 984                                         current++;
 985                                 }
 986                                 type = TokenType.IDENTIFIER;
 987                         }
 988                 } else {
 989                         switch (current[0]) {
 990                         case '{':
 991                                 type = TokenType.OPEN_BRACE;
 992                                 open_brace_count++;
 993                                 state_stack += State.BRACE;
 994                                 current++;
 995                                 break;
 996                         case '}':
 997                                 type = TokenType.CLOSE_BRACE;
 998                                 open_brace_count--;
 999                                 state_stack.length--;
1000                                 current++;
1001                                 break;
1002                         case '(':
1003                                 type = TokenType.OPEN_PARENS;
1004                                 open_parens_count++;
1005                                 state_stack += State.PARENS;
1006                                 current++;
1007                                 break;
1008                         case ')':
1009                                 type = TokenType.CLOSE_PARENS;
1010                                 open_parens_count--;
1011                                 current++;
1012                                 state_stack.length--;
1013                                 if (in_template ()) {
1014                                         type = TokenType.COMMA;
1015                                 }
1016                                 break;
1017                         case '[':
1018                                 type = TokenType.OPEN_BRACKET;
1019                                 state_stack += State.BRACKET;
1020                                 current++;
1021                                 break;
1022                         case ']':
1023                                 type = TokenType.CLOSE_BRACKET;
1024                                 state_stack.length--;
1025                                 current++;
1026                                 break;
1027                         case '.':
1028                                 type = TokenType.DOT;
1029                                 current++;
1030                                 if (current < end - 1) {
1031                                         if (current[0] == '.' && current[1] == '.') {
1032                                                 type = TokenType.ELLIPSIS;
1033                                                 current += 2;
1034                                         }
1035                                 }
1036                                 break;
1037                         case ':':
1038                                 type = TokenType.COLON;
1039                                 current++;
1040                                 break;
1041                         case ',':
1042                                 type = TokenType.COMMA;
1043                                 current++;
1044                                 break;
1045                         case ';':
1046                                 type = TokenType.SEMICOLON;
1047                                 current++;
1048                                 break;
1049                         case '#':
1050                                 type = TokenType.HASH;
1051                                 current++;
1052                                 break;
1053                         case '?':
1054                                 type = TokenType.INTERR;
1055                                 current++;
1056                                 break;
1057                         case '|':
1058                                 type = TokenType.BITWISE_OR;
1059                                 current++;
1060                                 if (current < end) {
1061                                         switch (current[0]) {
1062                                         case '=':
1063                                                 type = TokenType.ASSIGN_BITWISE_OR;
1064                                                 current++;
1065                                                 break;
1066                                         case '|':
1067                                                 type = TokenType.OP_OR;
1068                                                 current++;
1069                                                 break;
1070                                         }
1071                                 }
1072                                 break;
1073                         case '&':
1074                                 type = TokenType.BITWISE_AND;
1075                                 current++;
1076                                 if (current < end) {
1077                                         switch (current[0]) {
1078                                         case '=':
1079                                                 type = TokenType.ASSIGN_BITWISE_AND;
1080                                                 current++;
1081                                                 break;
1082                                         case '&':
1083                                                 type = TokenType.OP_AND;
1084                                                 current++;
1085                                                 break;
1086                                         }
1087                                 }
1088                                 break;
1089                         case '^':
1090                                 type = TokenType.CARRET;
1091                                 current++;
1092                                 if (current < end && current[0] == '=') {
1093                                         type = TokenType.ASSIGN_BITWISE_XOR;
1094                                         current++;
1095                                 }
1096                                 break;
1097                         case '~':
1098                                 type = TokenType.TILDE;
1099                                 current++;
1100                                 break;
1101                         case '=':
1102                                 type = TokenType.ASSIGN;
1103                                 current++;
1104                                 if (current < end) {
1105                                         switch (current[0]) {
1106                                         case '=':
1107                                                 type = TokenType.OP_EQ;
1108                                                 current++;
1109                                                 break;
1110                                         case '>':
1111                                                 type = TokenType.LAMBDA;
1112                                                 current++;
1113                                                 break;
1114                                         }
1115                                 }
1116                                 break;
1117                         case '<':
1118                                 type = TokenType.OP_LT;
1119                                 current++;
1120                                 if (current < end) {
1121                                         switch (current[0]) {
1122                                         case '=':
1123                                                 type = TokenType.OP_LE;
1124                                                 current++;
1125                                                 break;
1126                                         case '<':
1127                                                 type = TokenType.OP_SHIFT_LEFT;
1128                                                 current++;
1129                                                 if (current < end && current[0] == '=') {
1130                                                         type = TokenType.ASSIGN_SHIFT_LEFT;
1131                                                         current++;
1132                                                 }
1133                                                 break;
1134                                         }
1135                                 }
1136                                 break;
1137                         case '>':
1138                                 type = TokenType.OP_GT;
1139                                 current++;
1140                                 if (current < end && current[0] == '=') {
1141                                         type = TokenType.OP_GE;
1142                                         current++;
1143                                 }
1144                                 break;
1145                         case '!':
1146                                 type = TokenType.OP_NEG;
1147                                 current++;
1148                                 if (current < end && current[0] == '=') {
1149                                         type = TokenType.OP_NE;
1150                                         current++;
1151                                 }
1152                                 break;
1153                         case '+':
1154                                 type = TokenType.PLUS;
1155                                 current++;
1156                                 if (current < end) {
1157                                         switch (current[0]) {
1158                                         case '=':
1159                                                 type = TokenType.ASSIGN_ADD;
1160                                                 current++;
1161                                                 break;
1162                                         case '+':
1163                                                 type = TokenType.OP_INC;
1164                                                 current++;
1165                                                 break;
1166                                         }
1167                                 }
1168                                 break;
1169                         case '-':
1170                                 type = TokenType.MINUS;
1171                                 current++;
1172                                 if (current < end) {
1173                                         switch (current[0]) {
1174                                         case '=':
1175                                                 type = TokenType.ASSIGN_SUB;
1176                                                 current++;
1177                                                 break;
1178                                         case '-':
1179                                                 type = TokenType.OP_DEC;
1180                                                 current++;
1181                                                 break;
1182                                         case '>':
1183                                                 type = TokenType.OP_PTR;
1184                                                 current++;
1185                                                 break;
1186                                         }
1187                                 }
1188                                 break;
1189                         case '*':
1190                                 type = TokenType.STAR;
1191                                 current++;
1192                                 if (current < end && current[0] == '=') {
1193                                         type = TokenType.ASSIGN_MUL;
1194                                         current++;
1195                                 }
1196                                 break;
1197                         case '/':
1198                                 switch (last_token) {
1199                                 case TokenType.ASSIGN:
1200                                 case TokenType.COMMA:
1201                                 case TokenType.MINUS:
1202                                 case TokenType.OP_AND:
1203                                 case TokenType.OP_DEC:
1204                                 case TokenType.OP_EQ:
1205                                 case TokenType.OP_GE:
1206                                 case TokenType.OP_GT:
1207                                 case TokenType.OP_INC:
1208                                 case TokenType.OP_LE:
1209                                 case TokenType.OP_LT:
1210                                 case TokenType.OP_NE:
1211                                 case TokenType.OP_NEG:
1212                                 case TokenType.OP_OR:
1213                                 case TokenType.OPEN_BRACE:
1214                                 case TokenType.OPEN_PARENS:
1215                                 case TokenType.PLUS:
1216                                 case TokenType.RETURN:
1217                                         type = TokenType.OPEN_REGEX_LITERAL;
1218                                         state_stack += State.REGEX_LITERAL;
1219                                         current++;
1220                                         break;
1221                                 default:
1222                                         type = TokenType.DIV;
1223                                         current++;
1224                                         if (current < end && current[0] == '=') {
1225                                                 type = TokenType.ASSIGN_DIV;
1226                                                 current++;
1227                                         }
1228                                         break;
1229                                 }
1230                                 break;
1231
1232                         case '%':
1233                                 type = TokenType.PERCENT;
1234                                 current++;
1235                                 if (current < end && current[0] == '=') {
1236                                         type = TokenType.ASSIGN_PERCENT;
1237                                         current++;
1238                                 }
1239                                 break;
1240                         case '\'':
1241                         case '"':
1242                                 if (begin[0] == '\'') {
1243                                         type = TokenType.CHARACTER_LITERAL;
1244                                 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
1245                                         type = TokenType.VERBATIM_STRING_LITERAL;
1246                                         token_length_in_chars = 6;
1247                                         current += 3;
1248                                         while (current < end - 4) {
1249                                                 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1250                                                         break;
1251                                                 } else if (current[0] == '\n') {
1252                                                         current++;
1253                                                         line++;
1254                                                         column = 1;
1255                                                         token_length_in_chars = 3;
1256                                                 } else {
1257                                                         unichar u = ((string) current).get_char_validated ((long) (end - current));
1258                                                         if (u != (unichar) (-1)) {
1259                                                                 current += u.to_utf8 (null);
1260                                                                 token_length_in_chars++;
1261                                                         } else {
1262                                                                 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1263                                                         }
1264                                                 }
1265                                         }
1266                                         if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
1267                                                 current += 3;
1268                                         } else {
1269                                                 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
1270                                         }
1271                                         break;
1272                                 } else {
1273                                         type = TokenType.STRING_LITERAL;
1274                                 }
1275                                 token_length_in_chars = 2;
1276                                 current++;
1277                                 while (current < end && current[0] != begin[0]) {
1278                                         if (current[0] == '\\') {
1279                                                 current++;
1280                                                 token_length_in_chars++;
1281                                                 if (current >= end) {
1282                                                         break;
1283                                                 }
1284
1285                                                 switch (current[0]) {
1286                                                 case '\'':
1287                                                 case '"':
1288                                                 case '\\':
1289                                                 case '0':
1290                                                 case 'b':
1291                                                 case 'f':
1292                                                 case 'n':
1293                                                 case 'r':
1294                                                 case 't':
1295                                                         current++;
1296                                                         token_length_in_chars++;
1297                                                         break;
1298                                                 case 'x':
1299                                                         // hexadecimal escape character
1300                                                         current++;
1301                                                         token_length_in_chars++;
1302                                                         while (current < end && current[0].isxdigit ()) {
1303                                                                 current++;
1304                                                                 token_length_in_chars++;
1305                                                         }
1306                                                         break;
1307                                                 default:
1308                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
1309                                                         break;
1310                                                 }
1311                                         } else if (current[0] == '\n') {
1312                                                 break;
1313                                         } else {
1314                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
1315                                                 if (u != (unichar) (-1)) {
1316                                                         current += u.to_utf8 (null);
1317                                                         token_length_in_chars++;
1318                                                 } else {
1319                                                         current++;
1320                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
1321                                                 }
1322                                         }
1323                                 }
1324                                 if (current < end && current[0] != '\n') {
1325                                         current++;
1326                                 } else {
1327                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
1328                                 }
1329                                 break;
1330                         default:
1331                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
1332                                 if (u != (unichar) (-1)) {
1333                                         current += u.to_utf8 (null);
1334                                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
1335                                 } else {
1336                                         current++;
1337                                         Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
1338                                 }
1339                                 column++;
1340                                 last_token = TokenType.STRING_LITERAL;
1341                                 return read_token (out token_begin, out token_end);
1342                         }
1343                 }
1344
1345                 if (token_length_in_chars < 0) {
1346                         column += (int) (current - begin);
1347                 } else {
1348                         column += token_length_in_chars;
1349                 }
1350
1351                 token_end.pos = current;
1352                 token_end.line = line;
1353                 token_end.column = column - 1;
1354                 last_token = type;
1355
1356                 return type;
1357         }
1358
1359         int count_tabs ()
1360         {
1361
1362                 int tab_count = 0;
1363
1364
1365                 if (_indent_spaces == 0) {
1366                         while (current < end && current[0] == '\t') {
1367                                 current++;
1368                                 column++;
1369                                 tab_count++;
1370                         }
1371                 } else {
1372                         int space_count = 0;
1373                         while (current < end && current[0] == ' ') {
1374                                 current++;
1375                                 column++;
1376                                 space_count++;
1377                         }
1378
1379                         tab_count = space_count / _indent_spaces;
1380
1381                 }
1382
1383                 /* ignore comments and whitspace and other lines that contain no code */
1384
1385                 space ();
1386
1387                 if ((current < end) && (current[0] == '\n')) return -1;
1388
1389                 return tab_count;
1390         }
1391
1392         bool matches (char* begin, string keyword) {
1393                 char* keyword_array = (char *) keyword;
1394                 long len = keyword.len ();
1395                 for (int i = 0; i < len; i++) {
1396                         if (begin[i] != keyword_array[i]) {
1397                                 return false;
1398                         }
1399                 }
1400                 return true;
1401         }
1402
1403         bool whitespace () {
1404                 bool found = false;
1405                 while (current < end && current[0].isspace () && current[0] != '\n' ) {
1406
1407                         found = true;
1408                         current++;
1409                         column++;
1410                 }
1411
1412                 if ((column == 1) && (current < end) && (current[0] == '#')) {
1413                         pp_directive ();
1414                         return true;
1415                 }
1416
1417                 return found;
1418         }
1419
1420         inline bool newline () {
1421                 if (current[0] == '\n') {
1422                         return true;
1423                 }
1424
1425                 return false;
1426         }
1427
1428         bool skip_newlines () {
1429                 bool new_lines = false;
1430
1431                 while (newline ()) {
1432                         current++;
1433
1434                         line++;
1435                         column = 1;
1436                         current_indent_level = 0;
1437
1438                         new_lines = true;
1439                 }
1440
1441                 return new_lines;
1442         }
1443
1444         bool comment (bool file_comment = false) {
1445                 if (current > end - 2
1446                     || current[0] != '/'
1447                     || (current[1] != '/' && current[1] != '*')) {
1448                         return false;
1449                 }
1450
1451
1452                 if (current[1] == '/') {
1453                         // single-line comment
1454
1455                         SourceReference source_reference = null;
1456                         if (file_comment) {
1457                                 source_reference = new SourceReference (source_file, line, column, line, column);
1458                         }
1459
1460                         current += 2;
1461
1462                         // skip until end of line or end of file
1463                         while (current < end && current[0] != '\n') {
1464                                 current++;
1465                         }
1466
1467                         /* do not ignore EOL if comment does not exclusively occupy the line */
1468                         if (current[0] == '\n' && last_token == TokenType.EOL) {
1469                                 current++;
1470                                 line++;
1471                                 column = 1;
1472                                 current_indent_level = 0;
1473                         }
1474
1475                         if (source_reference != null) {
1476                                 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1477                         }
1478
1479                 } else {
1480                         // delimited comment
1481                         SourceReference source_reference = null;
1482                         if (file_comment && current[2] == '*') {
1483                                 return false;
1484                         }
1485
1486             if (current[2] == '*' || file_comment) {
1487                                 source_reference = new SourceReference (source_file, line, column, line, column);
1488                         }
1489
1490                         current += 2;
1491                         char* begin = current;
1492
1493                         while (current < end - 1
1494                                && (current[0] != '*' || current[1] != '/')) {
1495                                 if (current[0] == '\n') {
1496                                         line++;
1497                                         column = 0;
1498                                 }
1499                                 current++;
1500                                 column++;
1501                         }
1502                         if (current == end - 1) {
1503                                 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
1504                                 return true;
1505                         }
1506
1507                         if (source_reference != null) {
1508                                 string comment = ((string) begin).ndup ((long) (current - begin));
1509                                 push_comment (comment, source_reference, file_comment);
1510                         }
1511
1512                         current += 2;
1513                         column += 2;
1514                 }
1515
1516                 return true;
1517         }
1518
1519         bool skip_tabs () {
1520                 bool found = false;
1521                 while (current < end && current[0] == '\t' ) {
1522                         current++;
1523                         column++;
1524                         found = true;
1525                 }
1526
1527                 return found;
1528         }
1529
1530         void skip_space_tabs () {
1531                 while (whitespace () || skip_tabs () || comment () ) {
1532                 }
1533
1534         }
1535
1536         void space () {
1537                 while (whitespace () || comment ()) {
1538                 }
1539         }
1540
1541     public void parse_file_comments () {
1542                 while (whitespace () || comment (true)) {
1543                 }
1544
1545         }
1546
1547         void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1548                 if (comment_item[0] == '*') {
1549                         _comment = new Comment (comment_item, source_reference);
1550                 }
1551
1552                 if (file_comment) {
1553                         source_file.add_comment (new Comment (comment_item, source_reference));
1554                         _comment = null;
1555                 }
1556         }
1557
1558         /**
1559          * Clears and returns the content of the comment stack.
1560          *
1561          * @return saved comment
1562          */
1563         public Comment? pop_comment () {
1564                 if (_comment == null) {
1565                         return null;
1566                 }
1567
1568                 var comment = _comment;
1569                 _comment = null;
1570                 return comment;
1571         }
1572
1573         bool pp_whitespace () {
1574                 bool found = false;
1575                 while (current < end && current[0].isspace () && current[0] != '\n') {
1576                         found = true;
1577                         current++;
1578                         column++;
1579                 }
1580                 return found;
1581         }
1582
1583         void pp_directive () {
1584                 // hash sign
1585                 current++;
1586                 column++;
1587
1588                 pp_whitespace ();
1589
1590                 char* begin = current;
1591                 int len = 0;
1592                 while (current < end && current[0].isalnum ()) {
1593                         current++;
1594                         column++;
1595                         len++;
1596                 }
1597
1598                 if (len == 2 && matches (begin, "if")) {
1599                         parse_pp_if ();
1600                 } else if (len == 4 && matches (begin, "elif")) {
1601                         parse_pp_elif ();
1602                 } else if (len == 4 && matches (begin, "else")) {
1603                         parse_pp_else ();
1604                 } else if (len == 5 && matches (begin, "endif")) {
1605                         parse_pp_endif ();
1606                 } else {
1607                         Report.error (new SourceReference (source_file, line, column - len, line, column), "syntax error, invalid preprocessing directive");
1608                 }
1609
1610                 if (conditional_stack.length > 0
1611                     && conditional_stack[conditional_stack.length - 1].skip_section) {
1612                         // skip lines until next preprocessing directive
1613                         bool bol = false;
1614                         while (current < end) {
1615                                 if (bol && current[0] == '#') {
1616                                         // go back to begin of line
1617                                         current -= (column - 1);
1618                                         column = 1;
1619                                         return;
1620                                 }
1621                                 if (current[0] == '\n') {
1622                                         line++;
1623                                         column = 0;
1624                                         bol = true;
1625                                 } else if (!current[0].isspace ()) {
1626                                         bol = false;
1627                                 }
1628                                 current++;
1629                                 column++;
1630                         }
1631                 }
1632         }
1633
1634         void pp_eol () {
1635                 pp_whitespace ();
1636                 if (current >= end || current[0] != '\n') {
1637                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected newline");
1638                 }
1639         }
1640
1641         void parse_pp_if () {
1642                 pp_whitespace ();
1643
1644                 bool condition = parse_pp_expression ();
1645
1646                 pp_eol ();
1647
1648                 conditional_stack += Conditional ();
1649
1650                 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1651                         // condition true => process code within if
1652                         conditional_stack[conditional_stack.length - 1].matched = true;
1653                 } else {
1654                         // skip lines until next preprocessing directive
1655                         conditional_stack[conditional_stack.length - 1].skip_section = true;
1656                 }
1657         }
1658
1659         void parse_pp_elif () {
1660                 pp_whitespace ();
1661
1662                 bool condition = parse_pp_expression ();
1663
1664                 pp_eol ();
1665
1666                 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1667                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #elif");
1668                         return;
1669                 }
1670
1671                 if (condition && !conditional_stack[conditional_stack.length - 1].matched
1672                     && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1673                         // condition true => process code within if
1674                         conditional_stack[conditional_stack.length - 1].matched = true;
1675                         conditional_stack[conditional_stack.length - 1].skip_section = false;
1676                 } else {
1677                         // skip lines until next preprocessing directive
1678                         conditional_stack[conditional_stack.length - 1].skip_section = true;
1679                 }
1680         }
1681
1682         void parse_pp_else () {
1683                 pp_eol ();
1684
1685                 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
1686                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #else");
1687                         return;
1688                 }
1689
1690                 if (!conditional_stack[conditional_stack.length - 1].matched
1691                     && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
1692                         // condition true => process code within if
1693                         conditional_stack[conditional_stack.length - 1].matched = true;
1694                         conditional_stack[conditional_stack.length - 1].skip_section = false;
1695                 } else {
1696                         // skip lines until next preprocessing directive
1697                         conditional_stack[conditional_stack.length - 1].skip_section = true;
1698                 }
1699         }
1700
1701         void parse_pp_endif () {
1702                 pp_eol ();
1703
1704                 if (conditional_stack.length == 0) {
1705                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #endif");
1706                         return;
1707                 }
1708
1709                 conditional_stack.length--;
1710         }
1711
1712         bool parse_pp_symbol () {
1713                 int len = 0;
1714                 while (current < end && is_ident_char (current[0])) {
1715                         current++;
1716                         column++;
1717                         len++;
1718                 }
1719
1720                 if (len == 0) {
1721                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1722                         return false;
1723                 }
1724
1725                 string identifier = ((string) (current - len)).ndup (len);
1726                 bool defined;
1727                 if (identifier == "true") {
1728                         defined = true;
1729                 } else if (identifier == "false") {
1730                         defined = false;
1731                 } else {
1732                         defined = source_file.context.is_defined (identifier);
1733                 }
1734
1735                 return defined;
1736         }
1737
1738         bool parse_pp_primary_expression () {
1739                 if (current >= end) {
1740                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1741                 } else if (is_ident_char (current[0])) {
1742                         return parse_pp_symbol ();
1743                 } else if (current[0] == '(') {
1744                         current++;
1745                         column++;
1746                         pp_whitespace ();
1747                         bool result = parse_pp_expression ();
1748                         pp_whitespace ();
1749                         if (current < end && current[0] ==  ')') {
1750                                 current++;
1751                                 column++;
1752                         } else {
1753                                 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected `)'");
1754                         }
1755                         return result;
1756                 } else {
1757                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
1758                 }
1759                 return false;
1760         }
1761
1762         bool parse_pp_unary_expression () {
1763                 if (current < end && current[0] == '!') {
1764                         current++;
1765                         column++;
1766                         pp_whitespace ();
1767                         return !parse_pp_unary_expression ();
1768                 }
1769
1770                 return parse_pp_primary_expression ();
1771         }
1772
1773         bool parse_pp_equality_expression () {
1774                 bool left = parse_pp_unary_expression ();
1775                 pp_whitespace ();
1776                 while (true) {
1777                         if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1778                                 current += 2;
1779                                 column += 2;
1780                                 pp_whitespace ();
1781                                 bool right = parse_pp_unary_expression ();
1782                                 left = (left == right);
1783                         } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1784                                 current += 2;
1785                                 column += 2;
1786                                 pp_whitespace ();
1787                                 bool right = parse_pp_unary_expression ();
1788                                 left = (left != right);
1789                         } else {
1790                                 break;
1791                         }
1792                 }
1793                 return left;
1794         }
1795
1796         bool parse_pp_and_expression () {
1797                 bool left = parse_pp_equality_expression ();
1798                 pp_whitespace ();
1799                 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1800                         current += 2;
1801                         column += 2;
1802                         pp_whitespace ();
1803                         bool right = parse_pp_equality_expression ();
1804                         left = left && right;
1805                 }
1806                 return left;
1807         }
1808
1809         bool parse_pp_or_expression () {
1810                 bool left = parse_pp_and_expression ();
1811                 pp_whitespace ();
1812                 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1813                         current += 2;
1814                         column += 2;
1815                         pp_whitespace ();
1816                         bool right = parse_pp_and_expression ();
1817                         left = left || right;
1818                 }
1819                 return left;
1820         }
1821
1822         bool parse_pp_expression () {
1823                 return parse_pp_or_expression ();
1824         }
1825 }
1826