vala/valascanner.vala

   1 /* valascanner.vala
   2  *
   3  * Copyright (C) 2008-2009  Jürg Billeter
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  18  *
  19  * Author:
  20  *      Jürg Billeter <j@bitron.ch>
  21  */
  22
  23 using GLib;
  24 using Gee;
  25
  26 /**
  27  * Lexical scanner for Vala source files.
  28  */
  29 public class Vala.Scanner {
  30         public SourceFile source_file { get; private set; }
  31
  32         char* current;
  33         char* end;
  34
  35         int line;
  36         int column;
  37
  38         Comment _comment;
  39
  40         Conditional[] conditional_stack;
  41
  42         struct Conditional {
  43                 public bool matched;
  44                 public bool else_found;
  45                 public bool skip_section;
  46         }
  47
  48         public Scanner (SourceFile source_file) {
  49                 this.source_file = source_file;
  50
  51                 char* begin = source_file.get_mapped_contents ();
  52                 end = begin + source_file.get_mapped_length ();
  53
  54                 current = begin;
  55
  56                 line = 1;
  57                 column = 1;
  58         }
  59
  60         bool is_ident_char (char c) {
  61                 return (c.isalnum () || c == '_');
  62         }
  63
  64         public static TokenType get_identifier_or_keyword (char* begin, int len) {
  65                 switch (len) {
  66                 case 2:
  67                         switch (begin[0]) {
  68                         case 'a':
  69                                 if (matches (begin, "as")) return TokenType.AS;
  70                                 break;
  71                         case 'd':
  72                                 if (matches (begin, "do")) return TokenType.DO;
  73                                 break;
  74                         case 'i':
  75                                 switch (begin[1]) {
  76                                 case 'f':
  77                                         return TokenType.IF;
  78                                 case 'n':
  79                                         return TokenType.IN;
  80                                 case 's':
  81                                         return TokenType.IS;
  82                                 }
  83                                 break;
  84                         }
  85                         break;
  86                 case 3:
  87                         switch (begin[0]) {
  88                         case 'f':
  89                                 if (matches (begin, "for")) return TokenType.FOR;
  90                                 break;
  91                         case 'g':
  92                                 if (matches (begin, "get")) return TokenType.GET;
  93                                 break;
  94                         case 'n':
  95                                 if (matches (begin, "new")) return TokenType.NEW;
  96                                 break;
  97                         case 'o':
  98                                 if (matches (begin, "out")) return TokenType.OUT;
  99                                 break;
 100                         case 'r':
 101                                 if (matches (begin, "ref")) return TokenType.REF;
 102                                 break;
 103                         case 's':
 104                                 if (matches (begin, "set")) return TokenType.SET;
 105                                 break;
 106                         case 't':
 107                                 if (matches (begin, "try")) return TokenType.TRY;
 108                                 break;
 109                         case 'v':
 110                                 if (matches (begin, "var")) return TokenType.VAR;
 111                                 break;
 112                         }
 113                         break;
 114                 case 4:
 115                         switch (begin[0]) {
 116                         case 'b':
 117                                 if (matches (begin, "base")) return TokenType.BASE;
 118                                 break;
 119                         case 'c':
 120                                 if (matches (begin, "case")) return TokenType.CASE;
 121                                 break;
 122                         case 'e':
 123                                 switch (begin[1]) {
 124                                 case 'l':
 125                                         if (matches (begin, "else")) return TokenType.ELSE;
 126                                         break;
 127                                 case 'n':
 128                                         if (matches (begin, "enum")) return TokenType.ENUM;
 129                                         break;
 130                                 }
 131                                 break;
 132                         case 'l':
 133                                 if (matches (begin, "lock")) return TokenType.LOCK;
 134                                 break;
 135                         case 'n':
 136                                 if (matches (begin, "null")) return TokenType.NULL;
 137                                 break;
 138                         case 't':
 139                                 switch (begin[1]) {
 140                                 case 'h':
 141                                         if (matches (begin, "this")) return TokenType.THIS;
 142                                         break;
 143                                 case 'r':
 144                                         if (matches (begin, "true")) return TokenType.TRUE;
 145                                         break;
 146                                 }
 147                                 break;
 148                         case 'v':
 149                                 if (matches (begin, "void")) return TokenType.VOID;
 150                                 break;
 151                         case 'w':
 152                                 if (matches (begin, "weak")) return TokenType.WEAK;
 153                                 break;
 154                         }
 155                         break;
 156                 case 5:
 157                         switch (begin[0]) {
 158                         case 'a':
 159                                 if (matches (begin, "async")) return TokenType.ASYNC;
 160                                 break;
 161                         case 'b':
 162                                 if (matches (begin, "break")) return TokenType.BREAK;
 163                                 break;
 164                         case 'c':
 165                                 switch (begin[1]) {
 166                                 case 'a':
 167                                         if (matches (begin, "catch")) return TokenType.CATCH;
 168                                         break;
 169                                 case 'l':
 170                                         if (matches (begin, "class")) return TokenType.CLASS;
 171                                         break;
 172                                 case 'o':
 173                                         if (matches (begin, "const")) return TokenType.CONST;
 174                                         break;
 175                                 }
 176                                 break;
 177                         case 'f':
 178                                 if (matches (begin, "false")) return TokenType.FALSE;
 179                                 break;
 180                         case 'o':
 181                                 if (matches (begin, "owned")) return TokenType.OWNED;
 182                                 break;
 183                         case 't':
 184                                 if (matches (begin, "throw")) return TokenType.THROW;
 185                                 break;
 186                         case 'u':
 187                                 if (matches (begin, "using")) return TokenType.USING;
 188                                 break;
 189                         case 'w':
 190                                 if (matches (begin, "while")) return TokenType.WHILE;
 191                                 break;
 192                         case 'y':
 193                                 if (matches (begin, "yield")) return TokenType.YIELD;
 194                                 break;
 195                         }
 196                         break;
 197                 case 6:
 198                         switch (begin[0]) {
 199                         case 'd':
 200                                 if (matches (begin, "delete")) return TokenType.DELETE;
 201                                 break;
 202                         case 'e':
 203                                 if (matches (begin, "extern")) return TokenType.EXTERN;
 204                                 break;
 205                         case 'i':
 206                                 if (matches (begin, "inline")) return TokenType.INLINE;
 207                                 break;
 208                         case 'p':
 209                                 switch (begin[1]) {
 210                                 case 'a':
 211                                         if (matches (begin, "params")) return TokenType.PARAMS;
 212                                         break;
 213                                 case 'u':
 214                                         if (matches (begin, "public")) return TokenType.PUBLIC;
 215                                         break;
 216                                 }
 217                                 break;
 218                         case 'r':
 219                                 if (matches (begin, "return")) return TokenType.RETURN;
 220                                 break;
 221                         case 's':
 222                                 switch (begin[1]) {
 223                                 case 'i':
 224                                         switch (begin[2]) {
 225                                         case 'g':
 226                                                 if (matches (begin, "signal")) return TokenType.SIGNAL;
 227                                                 break;
 228                                         case 'z':
 229                                                 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
 230                                                 break;
 231                                         }
 232                                         break;
 233                                 case 't':
 234                                         switch (begin[2]) {
 235                                         case 'a':
 236                                                 if (matches (begin, "static")) return TokenType.STATIC;
 237                                                 break;
 238                                         case 'r':
 239                                                 if (matches (begin, "struct")) return TokenType.STRUCT;
 240                                                 break;
 241                                         }
 242                                         break;
 243                                 case 'w':
 244                                         if (matches (begin, "switch")) return TokenType.SWITCH;
 245                                         break;
 246                                 }
 247                                 break;
 248                         case 't':
 249                                 switch (begin[1]) {
 250                                 case 'h':
 251                                         if (matches (begin, "throws")) return TokenType.THROWS;
 252                                         break;
 253                                 case 'y':
 254                                         if (matches (begin, "typeof")) return TokenType.TYPEOF;
 255                                         break;
 256                                 }
 257                                 break;
 258                         }
 259                         break;
 260                 case 7:
 261                         switch (begin[0]) {
 262                         case 'd':
 263                                 switch (begin[1]) {
 264                                 case 'e':
 265                                         if (matches (begin, "default")) return TokenType.DEFAULT;
 266                                         break;
 267                                 case 'y':
 268                                         if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
 269                                         break;
 270                                 }
 271                                 break;
 272                         case 'e':
 273                                 if (matches (begin, "ensures")) return TokenType.ENSURES;
 274                                 break;
 275                         case 'f':
 276                                 switch (begin[1]) {
 277                                 case 'i':
 278                                         if (matches (begin, "finally")) return TokenType.FINALLY;
 279                                         break;
 280                                 case 'o':
 281                                         if (matches (begin, "foreach")) return TokenType.FOREACH;
 282                                         break;
 283                                 }
 284                                 break;
 285                         case 'p':
 286                                 if (matches (begin, "private")) return TokenType.PRIVATE;
 287                                 break;
 288                         case 'u':
 289                                 if (matches (begin, "unowned")) return TokenType.UNOWNED;
 290                                 break;
 291                         case 'v':
 292                                 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
 293                                 break;
 294                         }
 295                         break;
 296                 case 8:
 297                         switch (begin[0]) {
 298                         case 'a':
 299                                 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
 300                                 break;
 301                         case 'c':
 302                                 if (matches (begin, "continue")) return TokenType.CONTINUE;
 303                                 break;
 304                         case 'd':
 305                                 if (matches (begin, "delegate")) return TokenType.DELEGATE;
 306                                 break;
 307                         case 'i':
 308                                 if (matches (begin, "internal")) return TokenType.INTERNAL;
 309                                 break;
 310                         case 'o':
 311                                 if (matches (begin, "override")) return TokenType.OVERRIDE;
 312                                 break;
 313                         case 'r':
 314                                 if (matches (begin, "requires")) return TokenType.REQUIRES;
 315                                 break;
 316                         case 'v':
 317                                 if (matches (begin, "volatile")) return TokenType.VOLATILE;
 318                                 break;
 319                         }
 320                         break;
 321                 case 9:
 322                         switch (begin[0]) {
 323                         case 'c':
 324                                 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
 325                                 break;
 326                         case 'i':
 327                                 if (matches (begin, "interface")) return TokenType.INTERFACE;
 328                                 break;
 329                         case 'n':
 330                                 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
 331                                 break;
 332                         case 'p':
 333                                 if (matches (begin, "protected")) return TokenType.PROTECTED;
 334                                 break;
 335                         }
 336                         break;
 337                 case 11:
 338                         if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
 339                         break;
 340                 }
 341                 return TokenType.IDENTIFIER;
 342         }
 343
 344         TokenType read_number () {
 345                 var type = TokenType.INTEGER_LITERAL;
 346
 347                 // integer part
 348                 if (current < end - 2 && current[0] == '0'
 349                     && current[1] == 'x' && current[2].isxdigit ()) {
 350                         // hexadecimal integer literal
 351                         current += 2;
 352                         while (current < end && current[0].isxdigit ()) {
 353                                 current++;
 354                         }
 355                 } else {
 356                         // decimal number
 357                         while (current < end && current[0].isdigit ()) {
 358                                 current++;
 359                         }
 360                 }
 361
 362                 // fractional part
 363                 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
 364                         type = TokenType.REAL_LITERAL;
 365                         current++;
 366                         while (current < end && current[0].isdigit ()) {
 367                                 current++;
 368                         }
 369                 }
 370
 371                 // exponent part
 372                 if (current < end && current[0].tolower () == 'e') {
 373                         type = TokenType.REAL_LITERAL;
 374                         current++;
 375                         if (current < end && (current[0] == '+' || current[0] == '-')) {
 376                                 current++;
 377                         }
 378                         while (current < end && current[0].isdigit ()) {
 379                                 current++;
 380                         }
 381                 }
 382
 383                 // type suffix
 384                 if (current < end) {
 385                         bool real_literal = (type == TokenType.REAL_LITERAL);
 386
 387                         switch (current[0]) {
 388                         case 'l':
 389                         case 'L':
 390                                 if (type == TokenType.INTEGER_LITERAL) {
 391                                         current++;
 392                                         if (current < end && current[0].tolower () == 'l') {
 393                                                 current++;
 394                                         }
 395                                 }
 396                                 break;
 397                         case 'u':
 398                         case 'U':
 399                                 if (type == TokenType.INTEGER_LITERAL) {
 400                                         current++;
 401                                         if (current < end && current[0].tolower () == 'l') {
 402                                                 current++;
 403                                                 if (current < end && current[0].tolower () == 'l') {
 404                                                         current++;
 405                                                 }
 406                                         }
 407                                 }
 408                                 break;
 409                         case 'f':
 410                         case 'F':
 411                         case 'd':
 412                         case 'D':
 413                                 type = TokenType.REAL_LITERAL;
 414                                 current++;
 415                                 break;
 416                         }
 417
 418                         if (!real_literal && is_ident_char (current[0])) {
 419                                 // allow identifiers to start with a digit
 420                                 // as long as they contain at least one char
 421                                 while (current < end && is_ident_char (current[0])) {
 422                                         current++;
 423                                 }
 424                                 type = TokenType.IDENTIFIER;
 425                         }
 426                 }
 427
 428                 return type;
 429         }
 430
 431         public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
 432                 space ();
 433
 434                 TokenType type;
 435                 char* begin = current;
 436                 token_begin.pos = begin;
 437                 token_begin.line = line;
 438                 token_begin.column = column;
 439
 440                 int token_length_in_chars = -1;
 441
 442                 if (current >= end) {
 443                         type = TokenType.EOF;
 444                 } else if (current[0].isalpha () || current[0] == '_') {
 445                         int len = 0;
 446                         while (current < end && is_ident_char (current[0])) {
 447                                 current++;
 448                                 len++;
 449                         }
 450                         type = get_identifier_or_keyword (begin, len);
 451                 } else if (current[0] == '@') {
 452                         token_begin.pos++; // @ is not part of the identifier
 453                         current++;
 454                         int len = 0;
 455                         while (current < end && is_ident_char (current[0])) {
 456                                 current++;
 457                                 len++;
 458                         }
 459                         type = TokenType.IDENTIFIER;
 460                 } else if (current[0].isdigit ()) {
 461                         type = read_number ();
 462                 } else {
 463                         switch (current[0]) {
 464                         case '{':
 465                                 type = TokenType.OPEN_BRACE;
 466                                 current++;
 467                                 break;
 468                         case '}':
 469                                 type = TokenType.CLOSE_BRACE;
 470                                 current++;
 471                                 break;
 472                         case '(':
 473                                 type = TokenType.OPEN_PARENS;
 474                                 current++;
 475                                 break;
 476                         case ')':
 477                                 type = TokenType.CLOSE_PARENS;
 478                                 current++;
 479                                 break;
 480                         case '[':
 481                                 type = TokenType.OPEN_BRACKET;
 482                                 current++;
 483                                 break;
 484                         case ']':
 485                                 type = TokenType.CLOSE_BRACKET;
 486                                 current++;
 487                                 break;
 488                         case '.':
 489                                 type = TokenType.DOT;
 490                                 current++;
 491                                 if (current < end - 1) {
 492                                         if (current[0] == '.' && current[1] == '.') {
 493                                                 type = TokenType.ELLIPSIS;
 494                                                 current += 2;
 495                                         }
 496                                 }
 497                                 break;
 498                         case ':':
 499                                 type = TokenType.COLON;
 500                                 current++;
 501                                 if (current < end && current[0] == ':') {
 502                                         type = TokenType.DOUBLE_COLON;
 503                                         current++;
 504                                 }
 505                                 break;
 506                         case ',':
 507                                 type = TokenType.COMMA;
 508                                 current++;
 509                                 break;
 510                         case ';':
 511                                 type = TokenType.SEMICOLON;
 512                                 current++;
 513                                 break;
 514                         case '#':
 515                                 type = TokenType.HASH;
 516                                 current++;
 517                                 break;
 518                         case '?':
 519                                 type = TokenType.INTERR;
 520                                 current++;
 521                                 break;
 522                         case '|':
 523                                 type = TokenType.BITWISE_OR;
 524                                 current++;
 525                                 if (current < end) {
 526                                         switch (current[0]) {
 527                                         case '=':
 528                                                 type = TokenType.ASSIGN_BITWISE_OR;
 529                                                 current++;
 530                                                 break;
 531                                         case '|':
 532                                                 type = TokenType.OP_OR;
 533                                                 current++;
 534                                                 break;
 535                                         }
 536                                 }
 537                                 break;
 538                         case '&':
 539                                 type = TokenType.BITWISE_AND;
 540                                 current++;
 541                                 if (current < end) {
 542                                         switch (current[0]) {
 543                                         case '=':
 544                                                 type = TokenType.ASSIGN_BITWISE_AND;
 545                                                 current++;
 546                                                 break;
 547                                         case '&':
 548                                                 type = TokenType.OP_AND;
 549                                                 current++;
 550                                                 break;
 551                                         }
 552                                 }
 553                                 break;
 554                         case '^':
 555                                 type = TokenType.CARRET;
 556                                 current++;
 557                                 if (current < end && current[0] == '=') {
 558                                         type = TokenType.ASSIGN_BITWISE_XOR;
 559                                         current++;
 560                                 }
 561                                 break;
 562                         case '~':
 563                                 type = TokenType.TILDE;
 564                                 current++;
 565                                 break;
 566                         case '=':
 567                                 type = TokenType.ASSIGN;
 568                                 current++;
 569                                 if (current < end) {
 570                                         switch (current[0]) {
 571                                         case '=':
 572                                                 type = TokenType.OP_EQ;
 573                                                 current++;
 574                                                 break;
 575                                         case '>':
 576                                                 type = TokenType.LAMBDA;
 577                                                 current++;
 578                                                 break;
 579                                         }
 580                                 }
 581                                 break;
 582                         case '<':
 583                                 type = TokenType.OP_LT;
 584                                 current++;
 585                                 if (current < end) {
 586                                         switch (current[0]) {
 587                                         case '=':
 588                                                 type = TokenType.OP_LE;
 589                                                 current++;
 590                                                 break;
 591                                         case '<':
 592                                                 type = TokenType.OP_SHIFT_LEFT;
 593                                                 current++;
 594                                                 if (current < end && current[0] == '=') {
 595                                                         type = TokenType.ASSIGN_SHIFT_LEFT;
 596                                                         current++;
 597                                                 }
 598                                                 break;
 599                                         }
 600                                 }
 601                                 break;
 602                         case '>':
 603                                 type = TokenType.OP_GT;
 604                                 current++;
 605                                 if (current < end && current[0] == '=') {
 606                                         type = TokenType.OP_GE;
 607                                         current++;
 608                                 }
 609                                 break;
 610                         case '!':
 611                                 type = TokenType.OP_NEG;
 612                                 current++;
 613                                 if (current < end && current[0] == '=') {
 614                                         type = TokenType.OP_NE;
 615                                         current++;
 616                                 }
 617                                 break;
 618                         case '+':
 619                                 type = TokenType.PLUS;
 620                                 current++;
 621                                 if (current < end) {
 622                                         switch (current[0]) {
 623                                         case '=':
 624                                                 type = TokenType.ASSIGN_ADD;
 625                                                 current++;
 626                                                 break;
 627                                         case '+':
 628                                                 type = TokenType.OP_INC;
 629                                                 current++;
 630                                                 break;
 631                                         }
 632                                 }
 633                                 break;
 634                         case '-':
 635                                 type = TokenType.MINUS;
 636                                 current++;
 637                                 if (current < end) {
 638                                         switch (current[0]) {
 639                                         case '=':
 640                                                 type = TokenType.ASSIGN_SUB;
 641                                                 current++;
 642                                                 break;
 643                                         case '-':
 644                                                 type = TokenType.OP_DEC;
 645                                                 current++;
 646                                                 break;
 647                                         case '>':
 648                                                 type = TokenType.OP_PTR;
 649                                                 current++;
 650                                                 break;
 651                                         }
 652                                 }
 653                                 break;
 654                         case '*':
 655                                 type = TokenType.STAR;
 656                                 current++;
 657                                 if (current < end && current[0] == '=') {
 658                                         type = TokenType.ASSIGN_MUL;
 659                                         current++;
 660                                 }
 661                                 break;
 662                         case '/':
 663                                 type = TokenType.DIV;
 664                                 current++;
 665                                 if (current < end && current[0] == '=') {
 666                                         type = TokenType.ASSIGN_DIV;
 667                                         current++;
 668                                 }
 669                                 break;
 670                         case '%':
 671                                 type = TokenType.PERCENT;
 672                                 current++;
 673                                 if (current < end && current[0] == '=') {
 674                                         type = TokenType.ASSIGN_PERCENT;
 675                                         current++;
 676                                 }
 677                                 break;
 678                         case '\'':
 679                         case '"':
 680                                 if (begin[0] == '\'') {
 681                                         type = TokenType.CHARACTER_LITERAL;
 682                                 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
 683                                         type = TokenType.VERBATIM_STRING_LITERAL;
 684                                         token_length_in_chars = 6;
 685                                         current += 3;
 686                                         while (current < end - 4) {
 687                                                 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
 688                                                         break;
 689                                                 } else if (current[0] == '\n') {
 690                                                         current++;
 691                                                         line++;
 692                                                         column = 1;
 693                                                         token_length_in_chars = 3;
 694                                                 } else {
 695                                                         unichar u = ((string) current).get_char_validated ((long) (end - current));
 696                                                         if (u != (unichar) (-1)) {
 697                                                                 current += u.to_utf8 (null);
 698                                                                 token_length_in_chars++;
 699                                                         } else {
 700                                                                 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
 701                                                         }
 702                                                 }
 703                                         }
 704                                         if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
 705                                                 current += 3;
 706                                         } else {
 707                                                 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
 708                                         }
 709                                         break;
 710                                 } else {
 711                                         type = TokenType.STRING_LITERAL;
 712                                 }
 713                                 token_length_in_chars = 2;
 714                                 current++;
 715                                 while (current < end && current[0] != begin[0]) {
 716                                         if (current[0] == '\\') {
 717                                                 current++;
 718                                                 token_length_in_chars++;
 719                                                 if (current >= end) {
 720                                                         break;
 721                                                 }
 722
 723                                                 switch (current[0]) {
 724                                                 case '\'':
 725                                                 case '"':
 726                                                 case '\\':
 727                                                 case '0':
 728                                                 case 'b':
 729                                                 case 'f':
 730                                                 case 'n':
 731                                                 case 'r':
 732                                                 case 't':
 733                                                         current++;
 734                                                         token_length_in_chars++;
 735                                                         break;
 736                                                 case 'x':
 737                                                         // hexadecimal escape character
 738                                                         current++;
 739                                                         token_length_in_chars++;
 740                                                         while (current < end && current[0].isxdigit ()) {
 741                                                                 current++;
 742                                                                 token_length_in_chars++;
 743                                                         }
 744                                                         break;
 745                                                 default:
 746                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid escape sequence");
 747                                                         break;
 748                                                 }
 749                                         } else if (current[0] == '\n') {
 750                                                 break;
 751                                         } else {
 752                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 753                                                 if (u != (unichar) (-1)) {
 754                                                         current += u.to_utf8 (null);
 755                                                         token_length_in_chars++;
 756                                                 } else {
 757                                                         current++;
 758                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
 759                                                 }
 760                                         }
 761                                 }
 762                                 if (current < end && current[0] != '\n') {
 763                                         current++;
 764                                 } else {
 765                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
 766                                 }
 767                                 break;
 768                         default:
 769                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 770                                 if (u != (unichar) (-1)) {
 771                                         current += u.to_utf8 (null);
 772                                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
 773                                 } else {
 774                                         current++;
 775                                         Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
 776                                 }
 777                                 column++;
 778                                 return read_token (out token_begin, out token_end);
 779                         }
 780                 }
 781
 782                 if (token_length_in_chars < 0) {
 783                         column += (int) (current - begin);
 784                 } else {
 785                         column += token_length_in_chars;
 786                 }
 787
 788                 token_end.pos = current;
 789                 token_end.line = line;
 790                 token_end.column = column - 1;
 791
 792                 return type;
 793         }
 794
 795         static bool matches (char* begin, string keyword) {
 796                 char* keyword_array = keyword;
 797                 long len = keyword.len ();
 798                 for (int i = 0; i < len; i++) {
 799                         if (begin[i] != keyword_array[i]) {
 800                                 return false;
 801                         }
 802                 }
 803                 return true;
 804         }
 805
 806         bool pp_whitespace () {
 807                 bool found = false;
 808                 while (current < end && current[0].isspace () && current[0] != '\n') {
 809                         found = true;
 810                         current++;
 811                         column++;
 812                 }
 813                 return found;
 814         }
 815
 816         void pp_directive () {
 817                 // hash sign
 818                 current++;
 819                 column++;
 820
 821                 pp_whitespace ();
 822
 823                 char* begin = current;
 824                 int len = 0;
 825                 while (current < end && current[0].isalnum ()) {
 826                         current++;
 827                         column++;
 828                         len++;
 829                 }
 830
 831                 if (len == 2 && matches (begin, "if")) {
 832                         parse_pp_if ();
 833                 } else if (len == 4 && matches (begin, "elif")) {
 834                         parse_pp_elif ();
 835                 } else if (len == 4 && matches (begin, "else")) {
 836                         parse_pp_else ();
 837                 } else if (len == 5 && matches (begin, "endif")) {
 838                         parse_pp_endif ();
 839                 } else {
 840                         Report.error (new SourceReference (source_file, line, column - len, line, column), "syntax error, invalid preprocessing directive");
 841                 }
 842
 843                 if (conditional_stack.length > 0
 844                     && conditional_stack[conditional_stack.length - 1].skip_section) {
 845                         // skip lines until next preprocessing directive
 846                         bool bol = false;
 847                         while (current < end) {
 848                                 if (bol && current[0] == '#') {
 849                                         // go back to begin of line
 850                                         current -= (column - 1);
 851                                         column = 1;
 852                                         return;
 853                                 }
 854                                 if (current[0] == '\n') {
 855                                         line++;
 856                                         column = 0;
 857                                         bol = true;
 858                                 } else if (!current[0].isspace ()) {
 859                                         bol = false;
 860                                 }
 861                                 current++;
 862                                 column++;
 863                         }
 864                 }
 865         }
 866
 867         void pp_eol () {
 868                 pp_whitespace ();
 869                 if (current >= end || current[0] != '\n') {
 870                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected newline");
 871                 }
 872         }
 873
 874         void parse_pp_if () {
 875                 pp_whitespace ();
 876
 877                 bool condition = parse_pp_expression ();
 878
 879                 pp_eol ();
 880
 881                 conditional_stack += Conditional ();
 882
 883                 if (condition && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
 884                         // condition true => process code within if
 885                         conditional_stack[conditional_stack.length - 1].matched = true;
 886                 } else {
 887                         // skip lines until next preprocessing directive
 888                         conditional_stack[conditional_stack.length - 1].skip_section = true;
 889                 }
 890         }
 891
 892         void parse_pp_elif () {
 893                 pp_whitespace ();
 894
 895                 bool condition = parse_pp_expression ();
 896
 897                 pp_eol ();
 898
 899                 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
 900                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #elif");
 901                         return;
 902                 }
 903
 904                 if (condition && !conditional_stack[conditional_stack.length - 1].matched
 905                     && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
 906                         // condition true => process code within if
 907                         conditional_stack[conditional_stack.length - 1].matched = true;
 908                         conditional_stack[conditional_stack.length - 1].skip_section = false;
 909                 } else {
 910                         // skip lines until next preprocessing directive
 911                         conditional_stack[conditional_stack.length - 1].skip_section = true;
 912                 }
 913         }
 914
 915         void parse_pp_else () {
 916                 pp_eol ();
 917
 918                 if (conditional_stack.length == 0 || conditional_stack[conditional_stack.length - 1].else_found) {
 919                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #else");
 920                         return;
 921                 }
 922
 923                 if (!conditional_stack[conditional_stack.length - 1].matched
 924                     && (conditional_stack.length == 1 || !conditional_stack[conditional_stack.length - 2].skip_section)) {
 925                         // condition true => process code within if
 926                         conditional_stack[conditional_stack.length - 1].matched = true;
 927                         conditional_stack[conditional_stack.length - 1].skip_section = false;
 928                 } else {
 929                         // skip lines until next preprocessing directive
 930                         conditional_stack[conditional_stack.length - 1].skip_section = true;
 931                 }
 932         }
 933
 934         void parse_pp_endif () {
 935                 pp_eol ();
 936
 937                 if (conditional_stack.length == 0) {
 938                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected #endif");
 939                         return;
 940                 }
 941
 942                 conditional_stack.length--;
 943         }
 944
 945         bool parse_pp_symbol () {
 946                 int len = 0;
 947                 while (current < end && is_ident_char (current[0])) {
 948                         current++;
 949                         column++;
 950                         len++;
 951                 }
 952
 953                 if (len == 0) {
 954                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
 955                         return false;
 956                 }
 957
 958                 string identifier = ((string) (current - len)).ndup (len);
 959                 bool defined;
 960                 if (identifier == "true") {
 961                         defined = true;
 962                 } else if (identifier == "false") {
 963                         defined = false;
 964                 } else {
 965                         defined = source_file.context.is_defined (identifier);
 966                 }
 967
 968                 return defined;
 969         }
 970
 971         bool parse_pp_primary_expression () {
 972                 if (current >= end) {
 973                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
 974                 } else if (is_ident_char (current[0])) {
 975                         return parse_pp_symbol ();
 976                 } else if (current[0] == '(') {
 977                         current++;
 978                         column++;
 979                         pp_whitespace ();
 980                         bool result = parse_pp_expression ();
 981                         pp_whitespace ();
 982                         if (current < end && current[0] ==  ')') {
 983                                 current++;
 984                                 column++;
 985                         } else {
 986                                 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected `)'");
 987                         }
 988                         return result;
 989                 } else {
 990                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected identifier");
 991                 }
 992                 return false;
 993         }
 994
 995         bool parse_pp_unary_expression () {
 996                 if (current < end && current[0] == '!') {
 997                         current++;
 998                         column++;
 999                         pp_whitespace ();
1000                         return !parse_pp_unary_expression ();
1001                 }
1002
1003                 return parse_pp_primary_expression ();
1004         }
1005
1006         bool parse_pp_equality_expression () {
1007                 bool left = parse_pp_unary_expression ();
1008                 pp_whitespace ();
1009                 while (true) {
1010                         if (current < end - 1 && current[0] == '=' && current[1] == '=') {
1011                                 current += 2;
1012                                 column += 2;
1013                                 pp_whitespace ();
1014                                 bool right = parse_pp_unary_expression ();
1015                                 left = (left == right);
1016                         } else if (current < end - 1 && current[0] == '!' && current[1] == '=') {
1017                                 current += 2;
1018                                 column += 2;
1019                                 pp_whitespace ();
1020                                 bool right = parse_pp_unary_expression ();
1021                                 left = (left != right);
1022                         } else {
1023                                 break;
1024                         }
1025                 }
1026                 return left;
1027         }
1028
1029         bool parse_pp_and_expression () {
1030                 bool left = parse_pp_equality_expression ();
1031                 pp_whitespace ();
1032                 while (current < end - 1 && current[0] == '&' && current[1] == '&') {
1033                         current += 2;
1034                         column += 2;
1035                         pp_whitespace ();
1036                         bool right = parse_pp_equality_expression ();
1037                         left = left && right;
1038                 }
1039                 return left;
1040         }
1041
1042         bool parse_pp_or_expression () {
1043                 bool left = parse_pp_and_expression ();
1044                 pp_whitespace ();
1045                 while (current < end - 1 && current[0] == '|' && current[1] == '|') {
1046                         current += 2;
1047                         column += 2;
1048                         pp_whitespace ();
1049                         bool right = parse_pp_and_expression ();
1050                         left = left || right;
1051                 }
1052                 return left;
1053         }
1054
1055         bool parse_pp_expression () {
1056                 return parse_pp_or_expression ();
1057         }
1058
1059         bool whitespace () {
1060                 bool found = false;
1061                 bool bol = (column == 1);
1062                 while (current < end && current[0].isspace ()) {
1063                         if (current[0] == '\n') {
1064                                 line++;
1065                                 column = 0;
1066                                 bol = true;
1067                         }
1068                         found = true;
1069                         current++;
1070                         column++;
1071                 }
1072                 if (bol && current < end && current[0] == '#') {
1073                         pp_directive ();
1074                         return true;
1075                 }
1076                 return found;
1077         }
1078
1079         bool comment (bool file_comment = false) {
1080                 if (current > end - 2
1081                     || current[0] != '/'
1082                     || (current[1] != '/' && current[1] != '*')) {
1083                         return false;
1084                 }
1085
1086                 if (current[1] == '/') {
1087                         SourceReference source_reference = null;
1088                         if (file_comment) {
1089                                 source_reference = new SourceReference (source_file, line, column, line, column);
1090                         }
1091
1092                         // single-line comment
1093                         current += 2;
1094                         char* begin = current;
1095
1096                         // skip until end of line or end of file
1097                         while (current < end && current[0] != '\n') {
1098                                 current++;
1099                         }
1100
1101                         if (source_reference != null) {
1102                                 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1103                         }
1104                 } else {
1105                         SourceReference source_reference = null;
1106
1107                         if (file_comment && current[2] == '*') {
1108                                 return false;
1109                         }
1110
1111                         if (current[2] == '*' || file_comment) {
1112                                 source_reference = new SourceReference (source_file, line, column, line, column);
1113                         }
1114
1115                         current += 2;
1116
1117                         char* begin = current;
1118                         while (current < end - 1
1119                                && (current[0] != '*' || current[1] != '/')) {
1120                                 if (current[0] == '\n') {
1121                                         line++;
1122                                         column = 0;
1123                                 }
1124                                 current++;
1125                                 column++;
1126                         }
1127
1128                         if (current == end - 1) {
1129                                 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
1130                                 return true;
1131                         }
1132
1133                         if (source_reference != null) {
1134                                 push_comment (((string) begin).ndup ((long) (current - begin)), source_reference, file_comment);
1135                         }
1136
1137                         current += 2;
1138                         column += 2;
1139                 }
1140
1141                 return true;
1142         }
1143
1144         void space () {
1145                 while (whitespace () || comment ()) {
1146                 }
1147         }
1148
1149         public void parse_file_comments () {
1150                 while (whitespace () || comment (true)) {
1151                 }
1152         }
1153
1154         void push_comment (string comment_item, SourceReference source_reference, bool file_comment) {
1155                 if (comment_item[0] == '*') {
1156                         _comment = new Comment (comment_item, source_reference);
1157                 }
1158
1159                 if (file_comment) {
1160                         source_file.add_comment (new Comment (comment_item, source_reference));
1161                         _comment = null;
1162                 }
1163         }
1164
1165         /**
1166          * Clears and returns the content of the comment stack.
1167          *
1168          * @return saved comment
1169          */
1170         public Comment? pop_comment () {
1171                 if (_comment == null) {
1172                         return null;
1173                 }
1174
1175                 var comment = _comment;
1176                 _comment = null;
1177                 return comment;
1178         }
1179 }
1180