vala/valascanner.vala

   1 /* valascanner.vala
   2  *
   3  * Copyright (C) 2008-2009  Jürg Billeter
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  18  *
  19  * Author:
  20  *      Jürg Billeter <j@bitron.ch>
  21  */
  22
  23 using GLib;
  24 using Gee;
  25
  26 /**
  27  * Lexical scanner for Vala source files.
  28  */
  29 public class Vala.Scanner {
  30         public SourceFile source_file { get; private set; }
  31
  32         char* current;
  33         char* end;
  34
  35         int line;
  36         int column;
  37
  38         string _comment;
  39
  40         public Scanner (SourceFile source_file) {
  41                 this.source_file = source_file;
  42
  43                 char* begin = source_file.get_mapped_contents ();
  44                 end = begin + source_file.get_mapped_length ();
  45
  46                 current = begin;
  47
  48                 line = 1;
  49                 column = 1;
  50         }
  51
  52         bool is_ident_char (char c) {
  53                 return (c.isalnum () || c == '_');
  54         }
  55
  56         TokenType get_identifier_or_keyword (char* begin, int len) {
  57                 switch (len) {
  58                 case 2:
  59                         switch (begin[0]) {
  60                         case 'a':
  61                                 if (matches (begin, "as")) return TokenType.AS;
  62                                 break;
  63                         case 'd':
  64                                 if (matches (begin, "do")) return TokenType.DO;
  65                                 break;
  66                         case 'i':
  67                                 switch (begin[1]) {
  68                                 case 'f':
  69                                         return TokenType.IF;
  70                                 case 'n':
  71                                         return TokenType.IN;
  72                                 case 's':
  73                                         return TokenType.IS;
  74                                 }
  75                                 break;
  76                         }
  77                         break;
  78                 case 3:
  79                         switch (begin[0]) {
  80                         case 'f':
  81                                 if (matches (begin, "for")) return TokenType.FOR;
  82                                 break;
  83                         case 'g':
  84                                 if (matches (begin, "get")) return TokenType.GET;
  85                                 break;
  86                         case 'n':
  87                                 if (matches (begin, "new")) return TokenType.NEW;
  88                                 break;
  89                         case 'o':
  90                                 if (matches (begin, "out")) return TokenType.OUT;
  91                                 break;
  92                         case 'r':
  93                                 if (matches (begin, "ref")) return TokenType.REF;
  94                                 break;
  95                         case 's':
  96                                 if (matches (begin, "set")) return TokenType.SET;
  97                                 break;
  98                         case 't':
  99                                 if (matches (begin, "try")) return TokenType.TRY;
 100                                 break;
 101                         case 'v':
 102                                 if (matches (begin, "var")) return TokenType.VAR;
 103                                 break;
 104                         }
 105                         break;
 106                 case 4:
 107                         switch (begin[0]) {
 108                         case 'b':
 109                                 if (matches (begin, "base")) return TokenType.BASE;
 110                                 break;
 111                         case 'c':
 112                                 if (matches (begin, "case")) return TokenType.CASE;
 113                                 break;
 114                         case 'e':
 115                                 switch (begin[1]) {
 116                                 case 'l':
 117                                         if (matches (begin, "else")) return TokenType.ELSE;
 118                                         break;
 119                                 case 'n':
 120                                         if (matches (begin, "enum")) return TokenType.ENUM;
 121                                         break;
 122                                 }
 123                                 break;
 124                         case 'l':
 125                                 if (matches (begin, "lock")) return TokenType.LOCK;
 126                                 break;
 127                         case 'n':
 128                                 if (matches (begin, "null")) return TokenType.NULL;
 129                                 break;
 130                         case 't':
 131                                 switch (begin[1]) {
 132                                 case 'h':
 133                                         if (matches (begin, "this")) return TokenType.THIS;
 134                                         break;
 135                                 case 'r':
 136                                         if (matches (begin, "true")) return TokenType.TRUE;
 137                                         break;
 138                                 }
 139                                 break;
 140                         case 'v':
 141                                 if (matches (begin, "void")) return TokenType.VOID;
 142                                 break;
 143                         case 'w':
 144                                 if (matches (begin, "weak")) return TokenType.WEAK;
 145                                 break;
 146                         }
 147                         break;
 148                 case 5:
 149                         switch (begin[0]) {
 150                         case 'b':
 151                                 if (matches (begin, "break")) return TokenType.BREAK;
 152                                 break;
 153                         case 'c':
 154                                 switch (begin[1]) {
 155                                 case 'a':
 156                                         if (matches (begin, "catch")) return TokenType.CATCH;
 157                                         break;
 158                                 case 'l':
 159                                         if (matches (begin, "class")) return TokenType.CLASS;
 160                                         break;
 161                                 case 'o':
 162                                         if (matches (begin, "const")) return TokenType.CONST;
 163                                         break;
 164                                 }
 165                                 break;
 166                         case 'f':
 167                                 if (matches (begin, "false")) return TokenType.FALSE;
 168                                 break;
 169                         case 'o':
 170                                 if (matches (begin, "owned")) return TokenType.OWNED;
 171                                 break;
 172                         case 't':
 173                                 if (matches (begin, "throw")) return TokenType.THROW;
 174                                 break;
 175                         case 'u':
 176                                 if (matches (begin, "using")) return TokenType.USING;
 177                                 break;
 178                         case 'w':
 179                                 if (matches (begin, "while")) return TokenType.WHILE;
 180                                 break;
 181                         case 'y':
 182                                 if (matches (begin, "yield")) return TokenType.YIELD;
 183                                 break;
 184                         }
 185                         break;
 186                 case 6:
 187                         switch (begin[0]) {
 188                         case 'd':
 189                                 if (matches (begin, "delete")) return TokenType.DELETE;
 190                                 break;
 191                         case 'e':
 192                                 if (matches (begin, "extern")) return TokenType.EXTERN;
 193                                 break;
 194                         case 'i':
 195                                 if (matches (begin, "inline")) return TokenType.INLINE;
 196                                 break;
 197                         case 'p':
 198                                 switch (begin[1]) {
 199                                 case 'a':
 200                                         if (matches (begin, "params")) return TokenType.PARAMS;
 201                                         break;
 202                                 case 'u':
 203                                         if (matches (begin, "public")) return TokenType.PUBLIC;
 204                                         break;
 205                                 }
 206                                 break;
 207                         case 'r':
 208                                 if (matches (begin, "return")) return TokenType.RETURN;
 209                                 break;
 210                         case 's':
 211                                 switch (begin[1]) {
 212                                 case 'i':
 213                                         switch (begin[2]) {
 214                                         case 'g':
 215                                                 if (matches (begin, "signal")) return TokenType.SIGNAL;
 216                                                 break;
 217                                         case 'z':
 218                                                 if (matches (begin, "sizeof")) return TokenType.SIZEOF;
 219                                                 break;
 220                                         }
 221                                         break;
 222                                 case 't':
 223                                         switch (begin[2]) {
 224                                         case 'a':
 225                                                 if (matches (begin, "static")) return TokenType.STATIC;
 226                                                 break;
 227                                         case 'r':
 228                                                 if (matches (begin, "struct")) return TokenType.STRUCT;
 229                                                 break;
 230                                         }
 231                                         break;
 232                                 case 'w':
 233                                         if (matches (begin, "switch")) return TokenType.SWITCH;
 234                                         break;
 235                                 }
 236                                 break;
 237                         case 't':
 238                                 switch (begin[1]) {
 239                                 case 'h':
 240                                         if (matches (begin, "throws")) return TokenType.THROWS;
 241                                         break;
 242                                 case 'y':
 243                                         if (matches (begin, "typeof")) return TokenType.TYPEOF;
 244                                         break;
 245                                 }
 246                                 break;
 247                         case 'y':
 248                                 if (matches (begin, "yields")) return TokenType.YIELDS;
 249                                 break;
 250                         }
 251                         break;
 252                 case 7:
 253                         switch (begin[0]) {
 254                         case 'd':
 255                                 switch (begin[1]) {
 256                                 case 'e':
 257                                         if (matches (begin, "default")) return TokenType.DEFAULT;
 258                                         break;
 259                                 case 'y':
 260                                         if (matches (begin, "dynamic")) return TokenType.DYNAMIC;
 261                                         break;
 262                                 }
 263                                 break;
 264                         case 'e':
 265                                 if (matches (begin, "ensures")) return TokenType.ENSURES;
 266                                 break;
 267                         case 'f':
 268                                 switch (begin[1]) {
 269                                 case 'i':
 270                                         if (matches (begin, "finally")) return TokenType.FINALLY;
 271                                         break;
 272                                 case 'o':
 273                                         if (matches (begin, "foreach")) return TokenType.FOREACH;
 274                                         break;
 275                                 }
 276                                 break;
 277                         case 'p':
 278                                 if (matches (begin, "private")) return TokenType.PRIVATE;
 279                                 break;
 280                         case 'u':
 281                                 if (matches (begin, "unowned")) return TokenType.UNOWNED;
 282                                 break;
 283                         case 'v':
 284                                 if (matches (begin, "virtual")) return TokenType.VIRTUAL;
 285                                 break;
 286                         }
 287                         break;
 288                 case 8:
 289                         switch (begin[0]) {
 290                         case 'a':
 291                                 if (matches (begin, "abstract")) return TokenType.ABSTRACT;
 292                                 break;
 293                         case 'c':
 294                                 if (matches (begin, "continue")) return TokenType.CONTINUE;
 295                                 break;
 296                         case 'd':
 297                                 if (matches (begin, "delegate")) return TokenType.DELEGATE;
 298                                 break;
 299                         case 'i':
 300                                 if (matches (begin, "internal")) return TokenType.INTERNAL;
 301                                 break;
 302                         case 'o':
 303                                 if (matches (begin, "override")) return TokenType.OVERRIDE;
 304                                 break;
 305                         case 'r':
 306                                 if (matches (begin, "requires")) return TokenType.REQUIRES;
 307                                 break;
 308                         case 'v':
 309                                 if (matches (begin, "volatile")) return TokenType.VOLATILE;
 310                                 break;
 311                         }
 312                         break;
 313                 case 9:
 314                         switch (begin[0]) {
 315                         case 'c':
 316                                 if (matches (begin, "construct")) return TokenType.CONSTRUCT;
 317                                 break;
 318                         case 'i':
 319                                 if (matches (begin, "interface")) return TokenType.INTERFACE;
 320                                 break;
 321                         case 'n':
 322                                 if (matches (begin, "namespace")) return TokenType.NAMESPACE;
 323                                 break;
 324                         case 'p':
 325                                 if (matches (begin, "protected")) return TokenType.PROTECTED;
 326                                 break;
 327                         }
 328                         break;
 329                 case 11:
 330                         if (matches (begin, "errordomain")) return TokenType.ERRORDOMAIN;
 331                         break;
 332                 }
 333                 return TokenType.IDENTIFIER;
 334         }
 335
 336         TokenType read_number () {
 337                 var type = TokenType.INTEGER_LITERAL;
 338
 339                 // integer part
 340                 if (current < end - 2 && current[0] == '0'
 341                     && current[1] == 'x' && current[2].isxdigit ()) {
 342                         // hexadecimal integer literal
 343                         current += 2;
 344                         while (current < end && current[0].isxdigit ()) {
 345                                 current++;
 346                         }
 347                 } else {
 348                         // decimal number
 349                         while (current < end && current[0].isdigit ()) {
 350                                 current++;
 351                         }
 352                 }
 353
 354                 // fractional part
 355                 if (current < end - 1 && current[0] == '.' && current[1].isdigit ()) {
 356                         type = TokenType.REAL_LITERAL;
 357                         current++;
 358                         while (current < end && current[0].isdigit ()) {
 359                                 current++;
 360                         }
 361                 }
 362
 363                 // exponent part
 364                 if (current < end && current[0].tolower () == 'e') {
 365                         type = TokenType.REAL_LITERAL;
 366                         current++;
 367                         if (current < end && (current[0] == '+' || current[0] == '-')) {
 368                                 current++;
 369                         }
 370                         while (current < end && current[0].isdigit ()) {
 371                                 current++;
 372                         }
 373                 }
 374
 375                 // type suffix
 376                 if (current < end) {
 377                         bool real_literal = (type == TokenType.REAL_LITERAL);
 378
 379                         switch (current[0]) {
 380                         case 'l':
 381                         case 'L':
 382                                 if (type == TokenType.INTEGER_LITERAL) {
 383                                         current++;
 384                                         if (current < end && current[0].tolower () == 'l') {
 385                                                 current++;
 386                                         }
 387                                 }
 388                                 break;
 389                         case 'u':
 390                         case 'U':
 391                                 if (type == TokenType.INTEGER_LITERAL) {
 392                                         current++;
 393                                         if (current < end && current[0].tolower () == 'l') {
 394                                                 current++;
 395                                                 if (current < end && current[0].tolower () == 'l') {
 396                                                         current++;
 397                                                 }
 398                                         }
 399                                 }
 400                                 break;
 401                         case 'f':
 402                         case 'F':
 403                         case 'd':
 404                         case 'D':
 405                                 type = TokenType.REAL_LITERAL;
 406                                 current++;
 407                                 break;
 408                         }
 409
 410                         if (!real_literal && is_ident_char (current[0])) {
 411                                 // allow identifiers to start with a digit
 412                                 // as long as they contain at least one char
 413                                 while (current < end && is_ident_char (current[0])) {
 414                                         current++;
 415                                 }
 416                                 type = TokenType.IDENTIFIER;
 417                         }
 418                 }
 419
 420                 return type;
 421         }
 422
 423         public TokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
 424                 space ();
 425
 426                 TokenType type;
 427                 char* begin = current;
 428                 token_begin.pos = begin;
 429                 token_begin.line = line;
 430                 token_begin.column = column;
 431
 432                 int token_length_in_chars = -1;
 433
 434                 if (current >= end) {
 435                         type = TokenType.EOF;
 436                 } else if (current[0].isalpha () || current[0] == '_') {
 437                         int len = 0;
 438                         while (current < end && is_ident_char (current[0])) {
 439                                 current++;
 440                                 len++;
 441                         }
 442                         type = get_identifier_or_keyword (begin, len);
 443                 } else if (current[0] == '@') {
 444                         token_begin.pos++; // @ is not part of the identifier
 445                         current++;
 446                         int len = 0;
 447                         while (current < end && is_ident_char (current[0])) {
 448                                 current++;
 449                                 len++;
 450                         }
 451                         type = TokenType.IDENTIFIER;
 452                 } else if (current[0].isdigit ()) {
 453                         type = read_number ();
 454                 } else {
 455                         switch (current[0]) {
 456                         case '{':
 457                                 type = TokenType.OPEN_BRACE;
 458                                 current++;
 459                                 break;
 460                         case '}':
 461                                 type = TokenType.CLOSE_BRACE;
 462                                 current++;
 463                                 break;
 464                         case '(':
 465                                 type = TokenType.OPEN_PARENS;
 466                                 current++;
 467                                 break;
 468                         case ')':
 469                                 type = TokenType.CLOSE_PARENS;
 470                                 current++;
 471                                 break;
 472                         case '[':
 473                                 type = TokenType.OPEN_BRACKET;
 474                                 current++;
 475                                 break;
 476                         case ']':
 477                                 type = TokenType.CLOSE_BRACKET;
 478                                 current++;
 479                                 break;
 480                         case '.':
 481                                 type = TokenType.DOT;
 482                                 current++;
 483                                 if (current < end - 1) {
 484                                         if (current[0] == '.' && current[1] == '.') {
 485                                                 type = TokenType.ELLIPSIS;
 486                                                 current += 2;
 487                                         }
 488                                 }
 489                                 break;
 490                         case ':':
 491                                 type = TokenType.COLON;
 492                                 current++;
 493                                 if (current < end && current[0] == ':') {
 494                                         type = TokenType.DOUBLE_COLON;
 495                                         current++;
 496                                 }
 497                                 break;
 498                         case ',':
 499                                 type = TokenType.COMMA;
 500                                 current++;
 501                                 break;
 502                         case ';':
 503                                 type = TokenType.SEMICOLON;
 504                                 current++;
 505                                 break;
 506                         case '#':
 507                                 type = TokenType.HASH;
 508                                 current++;
 509                                 break;
 510                         case '?':
 511                                 type = TokenType.INTERR;
 512                                 current++;
 513                                 break;
 514                         case '|':
 515                                 type = TokenType.BITWISE_OR;
 516                                 current++;
 517                                 if (current < end) {
 518                                         switch (current[0]) {
 519                                         case '=':
 520                                                 type = TokenType.ASSIGN_BITWISE_OR;
 521                                                 current++;
 522                                                 break;
 523                                         case '|':
 524                                                 type = TokenType.OP_OR;
 525                                                 current++;
 526                                                 break;
 527                                         }
 528                                 }
 529                                 break;
 530                         case '&':
 531                                 type = TokenType.BITWISE_AND;
 532                                 current++;
 533                                 if (current < end) {
 534                                         switch (current[0]) {
 535                                         case '=':
 536                                                 type = TokenType.ASSIGN_BITWISE_AND;
 537                                                 current++;
 538                                                 break;
 539                                         case '&':
 540                                                 type = TokenType.OP_AND;
 541                                                 current++;
 542                                                 break;
 543                                         }
 544                                 }
 545                                 break;
 546                         case '^':
 547                                 type = TokenType.CARRET;
 548                                 current++;
 549                                 if (current < end && current[0] == '=') {
 550                                         type = TokenType.ASSIGN_BITWISE_XOR;
 551                                         current++;
 552                                 }
 553                                 break;
 554                         case '~':
 555                                 type = TokenType.TILDE;
 556                                 current++;
 557                                 break;
 558                         case '=':
 559                                 type = TokenType.ASSIGN;
 560                                 current++;
 561                                 if (current < end) {
 562                                         switch (current[0]) {
 563                                         case '=':
 564                                                 type = TokenType.OP_EQ;
 565                                                 current++;
 566                                                 break;
 567                                         case '>':
 568                                                 type = TokenType.LAMBDA;
 569                                                 current++;
 570                                                 break;
 571                                         }
 572                                 }
 573                                 break;
 574                         case '<':
 575                                 type = TokenType.OP_LT;
 576                                 current++;
 577                                 if (current < end) {
 578                                         switch (current[0]) {
 579                                         case '=':
 580                                                 type = TokenType.OP_LE;
 581                                                 current++;
 582                                                 break;
 583                                         case '<':
 584                                                 type = TokenType.OP_SHIFT_LEFT;
 585                                                 current++;
 586                                                 if (current < end && current[0] == '=') {
 587                                                         type = TokenType.ASSIGN_SHIFT_LEFT;
 588                                                         current++;
 589                                                 }
 590                                                 break;
 591                                         }
 592                                 }
 593                                 break;
 594                         case '>':
 595                                 type = TokenType.OP_GT;
 596                                 current++;
 597                                 if (current < end && current[0] == '=') {
 598                                         type = TokenType.OP_GE;
 599                                         current++;
 600                                 }
 601                                 break;
 602                         case '!':
 603                                 type = TokenType.OP_NEG;
 604                                 current++;
 605                                 if (current < end && current[0] == '=') {
 606                                         type = TokenType.OP_NE;
 607                                         current++;
 608                                 }
 609                                 break;
 610                         case '+':
 611                                 type = TokenType.PLUS;
 612                                 current++;
 613                                 if (current < end) {
 614                                         switch (current[0]) {
 615                                         case '=':
 616                                                 type = TokenType.ASSIGN_ADD;
 617                                                 current++;
 618                                                 break;
 619                                         case '+':
 620                                                 type = TokenType.OP_INC;
 621                                                 current++;
 622                                                 break;
 623                                         }
 624                                 }
 625                                 break;
 626                         case '-':
 627                                 type = TokenType.MINUS;
 628                                 current++;
 629                                 if (current < end) {
 630                                         switch (current[0]) {
 631                                         case '=':
 632                                                 type = TokenType.ASSIGN_SUB;
 633                                                 current++;
 634                                                 break;
 635                                         case '-':
 636                                                 type = TokenType.OP_DEC;
 637                                                 current++;
 638                                                 break;
 639                                         case '>':
 640                                                 type = TokenType.OP_PTR;
 641                                                 current++;
 642                                                 break;
 643                                         }
 644                                 }
 645                                 break;
 646                         case '*':
 647                                 type = TokenType.STAR;
 648                                 current++;
 649                                 if (current < end && current[0] == '=') {
 650                                         type = TokenType.ASSIGN_MUL;
 651                                         current++;
 652                                 }
 653                                 break;
 654                         case '/':
 655                                 type = TokenType.DIV;
 656                                 current++;
 657                                 if (current < end && current[0] == '=') {
 658                                         type = TokenType.ASSIGN_DIV;
 659                                         current++;
 660                                 }
 661                                 break;
 662                         case '%':
 663                                 type = TokenType.PERCENT;
 664                                 current++;
 665                                 if (current < end && current[0] == '=') {
 666                                         type = TokenType.ASSIGN_PERCENT;
 667                                         current++;
 668                                 }
 669                                 break;
 670                         case '\'':
 671                         case '"':
 672                                 if (begin[0] == '\'') {
 673                                         type = TokenType.CHARACTER_LITERAL;
 674                                 } else if (current < end - 6 && begin[1] == '"' && begin[2] == '"') {
 675                                         type = TokenType.VERBATIM_STRING_LITERAL;
 676                                         token_length_in_chars = 6;
 677                                         current += 3;
 678                                         while (current < end - 4) {
 679                                                 if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
 680                                                         break;
 681                                                 } else if (current[0] == '\n') {
 682                                                         current++;
 683                                                         line++;
 684                                                         column = 1;
 685                                                         token_length_in_chars = 3;
 686                                                 } else {
 687                                                         unichar u = ((string) current).get_char_validated ((long) (end - current));
 688                                                         if (u != (unichar) (-1)) {
 689                                                                 current += u.to_utf8 (null);
 690                                                                 token_length_in_chars++;
 691                                                         } else {
 692                                                                 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
 693                                                         }
 694                                                 }
 695                                         }
 696                                         if (current[0] == '"' && current[1] == '"' && current[2] == '"') {
 697                                                 current += 3;
 698                                         } else {
 699                                                 Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected \"\"\"");
 700                                         }
 701                                         break;
 702                                 } else {
 703                                         type = TokenType.STRING_LITERAL;
 704                                 }
 705                                 token_length_in_chars = 2;
 706                                 current++;
 707                                 while (current < end && current[0] != begin[0]) {
 708                                         if (current[0] == '\\') {
 709                                                 current++;
 710                                                 token_length_in_chars++;
 711                                                 if (current < end && current[0] == 'x') {
 712                                                         // hexadecimal escape character
 713                                                         current++;
 714                                                         token_length_in_chars++;
 715                                                         while (current < end && current[0].isxdigit ()) {
 716                                                                 current++;
 717                                                                 token_length_in_chars++;
 718                                                         }
 719                                                 } else {
 720                                                         current++;
 721                                                         token_length_in_chars++;
 722                                                 }
 723                                         } else if (current[0] == '\n') {
 724                                                 break;
 725                                         } else {
 726                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 727                                                 if (u != (unichar) (-1)) {
 728                                                         current += u.to_utf8 (null);
 729                                                         token_length_in_chars++;
 730                                                 } else {
 731                                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "invalid UTF-8 character");
 732                                                 }
 733                                         }
 734                                 }
 735                                 if (current < end && current[0] != '\n') {
 736                                         current++;
 737                                 } else {
 738                                         Report.error (new SourceReference (source_file, line, column + token_length_in_chars, line, column + token_length_in_chars), "syntax error, expected %c".printf (begin[0]));
 739                                 }
 740                                 break;
 741                         default:
 742                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 743                                 if (u != (unichar) (-1)) {
 744                                         current += u.to_utf8 (null);
 745                                         Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, unexpected character");
 746                                 } else {
 747                                         current++;
 748                                         Report.error (new SourceReference (source_file, line, column, line, column), "invalid UTF-8 character");
 749                                 }
 750                                 column++;
 751                                 return read_token (out token_begin, out token_end);
 752                         }
 753                 }
 754
 755                 if (token_length_in_chars < 0) {
 756                         column += (int) (current - begin);
 757                 } else {
 758                         column += token_length_in_chars;
 759                 }
 760
 761                 token_end.pos = current;
 762                 token_end.line = line;
 763                 token_end.column = column - 1;
 764
 765                 return type;
 766         }
 767
 768         bool matches (char* begin, string keyword) {
 769                 char* keyword_array = keyword;
 770                 long len = keyword.len ();
 771                 for (int i = 0; i < len; i++) {
 772                         if (begin[i] != keyword_array[i]) {
 773                                 return false;
 774                         }
 775                 }
 776                 return true;
 777         }
 778
 779         bool whitespace () {
 780                 bool found = false;
 781                 while (current < end && current[0].isspace ()) {
 782                         if (current[0] == '\n') {
 783                                 line++;
 784                                 column = 0;
 785                         }
 786                         found = true;
 787                         current++;
 788                         column++;
 789                 }
 790                 return found;
 791         }
 792
 793         bool comment () {
 794                 if (current > end - 2
 795                     || current[0] != '/'
 796                     || (current[1] != '/' && current[1] != '*')) {
 797                         return false;
 798                 }
 799
 800                 if (current[1] == '/') {
 801                         // single-line comment
 802                         current += 2;
 803                         char* begin = current;
 804                         // skip until end of line or end of file
 805                         while (current < end && current[0] != '\n') {
 806                                 current++;
 807                         }
 808                         push_comment (((string) begin).ndup ((long) (current - begin)), line == 1);
 809                 } else {
 810                         // delimited comment
 811                         current += 2;
 812                         char* begin = current;
 813                         int begin_line = line;
 814                         while (current < end - 1
 815                                && (current[0] != '*' || current[1] != '/')) {
 816                                 if (current[0] == '\n') {
 817                                         line++;
 818                                         column = 0;
 819                                 }
 820                                 current++;
 821                                 column++;
 822                         }
 823                         if (current == end - 1) {
 824                                 Report.error (new SourceReference (source_file, line, column, line, column), "syntax error, expected */");
 825                                 return true;
 826                         }
 827                         push_comment (((string) begin).ndup ((long) (current - begin)), begin_line == 1);
 828                         current += 2;
 829                         column += 2;
 830                 }
 831
 832                 return true;
 833         }
 834
 835         void space () {
 836                 while (whitespace () || comment ()) {
 837                 }
 838         }
 839
 840         void push_comment (string comment_item, bool file_comment) {
 841                 if (_comment == null) {
 842                         _comment = comment_item;
 843                 } else {
 844                         _comment = "%s\n%s".printf (_comment, comment_item);
 845                 }
 846                 if (file_comment) {
 847                         source_file.comment = _comment;
 848                         _comment = null;
 849                 }
 850         }
 851
 852         /**
 853          * Clears and returns the content of the comment stack.
 854          *
 855          * @return saved comment
 856          */
 857         public string? pop_comment () {
 858                 if (_comment == null) {
 859                         return null;
 860                 }
 861
 862                 var result = new StringBuilder (_comment);
 863                 _comment = null;
 864
 865                 weak string index;
 866                 while ((index = result.str.chr (-1, '\t')) != null) {
 867                         result.erase (result.str.pointer_to_offset (index), 1);
 868                 }
 869
 870                 return result.str;
 871         }
 872 }
 873