gcc/d/dmd/lexer.d

   1 /**
   2  * Implements the lexical analyzer, which converts source code into lexical tokens.
   3  *
   4  * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
   5  *
   6  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
   7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
   8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
   9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
  10  * Documentation:  https://dlang.org/phobos/dmd_lexer.html
  11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
  12  */
  13
  14 module dmd.lexer;
  15
  16 import core.stdc.ctype;
  17 import core.stdc.errno;
  18 import core.stdc.stdarg;
  19 import core.stdc.stdio;
  20 import core.stdc.stdlib : getenv;
  21 import core.stdc.string;
  22 import core.stdc.time;
  23
  24 import dmd.entity;
  25 import dmd.errors;
  26 import dmd.globals;
  27 import dmd.id;
  28 import dmd.identifier;
  29 import dmd.root.array;
  30 import dmd.root.ctfloat;
  31 import dmd.common.outbuffer;
  32 import dmd.root.port;
  33 import dmd.root.rmem;
  34 import dmd.root.string;
  35 import dmd.tokens;
  36 import dmd.utf;
  37 import dmd.utils;
  38
  39 nothrow:
  40
  41 private enum LS = 0x2028;       // UTF line separator
  42 private enum PS = 0x2029;       // UTF paragraph separator
  43
  44 /********************************************
  45  * Do our own char maps
  46  */
  47 private static immutable cmtable = () {
  48     ubyte[256] table;
  49     foreach (const c; 0 .. table.length)
  50     {
  51         if ('0' <= c && c <= '7')
  52             table[c] |= CMoctal;
  53         if (c_isxdigit(c))
  54             table[c] |= CMhex;
  55         if (c_isalnum(c) || c == '_')
  56             table[c] |= CMidchar;
  57
  58         switch (c)
  59         {
  60             case 'x': case 'X':
  61             case 'b': case 'B':
  62                 table[c] |= CMzerosecond;
  63                 break;
  64
  65             case '0': .. case '9':
  66             case 'e': case 'E':
  67             case 'f': case 'F':
  68             case 'l': case 'L':
  69             case 'p': case 'P':
  70             case 'u': case 'U':
  71             case 'i':
  72             case '.':
  73             case '_':
  74                 table[c] |= CMzerosecond | CMdigitsecond;
  75                 break;
  76
  77             default:
  78                 break;
  79         }
  80
  81         switch (c)
  82         {
  83             case '\\':
  84             case '\n':
  85             case '\r':
  86             case 0:
  87             case 0x1A:
  88             case '\'':
  89                 break;
  90             default:
  91                 if (!(c & 0x80))
  92                     table[c] |= CMsinglechar;
  93                 break;
  94         }
  95     }
  96     return table;
  97 }();
  98
  99 private
 100 {
 101     enum CMoctal  = 0x1;
 102     enum CMhex    = 0x2;
 103     enum CMidchar = 0x4;
 104     enum CMzerosecond = 0x8;
 105     enum CMdigitsecond = 0x10;
 106     enum CMsinglechar = 0x20;
 107 }
 108
 109 private bool isoctal(const char c) pure @nogc @safe
 110 {
 111     return (cmtable[c] & CMoctal) != 0;
 112 }
 113
 114 private bool ishex(const char c) pure @nogc @safe
 115 {
 116     return (cmtable[c] & CMhex) != 0;
 117 }
 118
 119 private bool isidchar(const char c) pure @nogc @safe
 120 {
 121     return (cmtable[c] & CMidchar) != 0;
 122 }
 123
 124 private bool isZeroSecond(const char c) pure @nogc @safe
 125 {
 126     return (cmtable[c] & CMzerosecond) != 0;
 127 }
 128
 129 private bool isDigitSecond(const char c) pure @nogc @safe
 130 {
 131     return (cmtable[c] & CMdigitsecond) != 0;
 132 }
 133
 134 private bool issinglechar(const char c) pure @nogc @safe
 135 {
 136     return (cmtable[c] & CMsinglechar) != 0;
 137 }
 138
 139 private bool c_isxdigit(const int c) pure @nogc @safe
 140 {
 141     return (( c >= '0' && c <= '9') ||
 142             ( c >= 'a' && c <= 'f') ||
 143             ( c >= 'A' && c <= 'F'));
 144 }
 145
 146 private bool c_isalnum(const int c) pure @nogc @safe
 147 {
 148     return (( c >= '0' && c <= '9') ||
 149             ( c >= 'a' && c <= 'z') ||
 150             ( c >= 'A' && c <= 'Z'));
 151 }
 152
 153 unittest
 154 {
 155     //printf("lexer.unittest\n");
 156     /* Not much here, just trying things out.
 157      */
 158     string text = "int"; // We rely on the implicit null-terminator
 159     scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0);
 160     TOK tok;
 161     tok = lex1.nextToken();
 162     //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
 163     assert(tok == TOK.int32);
 164     tok = lex1.nextToken();
 165     assert(tok == TOK.endOfFile);
 166     tok = lex1.nextToken();
 167     assert(tok == TOK.endOfFile);
 168     tok = lex1.nextToken();
 169     assert(tok == TOK.endOfFile);
 170 }
 171
 172 unittest
 173 {
 174     // We don't want to see Lexer error output during these tests.
 175     uint errors = global.startGagging();
 176     scope(exit) global.endGagging(errors);
 177
 178     // Test malformed input: even malformed input should end in a TOK.endOfFile.
 179     static immutable char[][] testcases =
 180     [   // Testcase must end with 0 or 0x1A.
 181         [0], // not malformed, but pathological
 182         ['\'', 0],
 183         ['\'', 0x1A],
 184         ['{', '{', 'q', '{', 0],
 185         [0xFF, 0],
 186         [0xFF, 0x80, 0],
 187         [0xFF, 0xFF, 0],
 188         [0xFF, 0xFF, 0],
 189         ['x', '"', 0x1A],
 190     ];
 191
 192     foreach (testcase; testcases)
 193     {
 194         scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0);
 195         TOK tok = lex2.nextToken();
 196         size_t iterations = 1;
 197         while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
 198         {
 199             tok = lex2.nextToken();
 200         }
 201         assert(tok == TOK.endOfFile);
 202         tok = lex2.nextToken();
 203         assert(tok == TOK.endOfFile);
 204     }
 205 }
 206
 207 version (DMDLIB)
 208 {
 209     version = LocOffset;
 210 }
 211
 212 /***********************************************************
 213  */
 214 class Lexer
 215 {
 216     private __gshared OutBuffer stringbuffer;
 217
 218     Loc scanloc;            // for error messages
 219     Loc prevloc;            // location of token before current
 220
 221     const(char)* p;         // current character
 222
 223     Token token;
 224
 225     // For ImportC
 226     bool Ccompile;              /// true if compiling ImportC
 227
 228     // The following are valid only if (Ccompile == true)
 229     ubyte longsize;             /// size of C long, 4 or 8
 230     ubyte long_doublesize;      /// size of C long double, 8 or D real.sizeof
 231     ubyte wchar_tsize;          /// size of C wchar_t, 2 or 4
 232
 233     structalign_t packalign;    /// current state of #pragma pack alignment (ImportC)
 234
 235     private
 236     {
 237         const(char)* base;      // pointer to start of buffer
 238         const(char)* end;       // pointer to last element of buffer
 239         const(char)* line;      // start of current line
 240
 241         bool doDocComment;      // collect doc comment information
 242         bool anyToken;          // seen at least one token
 243         bool commentToken;      // comments are TOK.comment's
 244         int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
 245         int lastDocLine;        // last line of previous doc comment
 246
 247         Token* tokenFreelist;
 248
 249         // ImportC #pragma pack stack
 250         Array!Identifier* records;      // identifers (or null)
 251         Array!structalign_t* packs;     // parallel alignment values
 252     }
 253
 254   nothrow:
 255
 256     /*********************
 257      * Creates a Lexer for the source code base[begoffset..endoffset+1].
 258      * The last character, base[endoffset], must be null (0) or EOF (0x1A).
 259      *
 260      * Params:
 261      *  filename = used for error messages
 262      *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
 263      *  begoffset = starting offset into base[]
 264      *  endoffset = the last offset to read into base[]
 265      *  doDocComment = handle documentation comments
 266      *  commentToken = comments become TOK.comment's
 267      */
 268     this(const(char)* filename, const(char)* base, size_t begoffset,
 269         size_t endoffset, bool doDocComment, bool commentToken) pure
 270     {
 271         scanloc = Loc(filename, 1, 1);
 272         //printf("Lexer::Lexer(%p,%d)\n",base,length);
 273         //printf("lexer.filename = %s\n", filename);
 274         token = Token.init;
 275         this.base = base;
 276         this.end = base + endoffset;
 277         p = base + begoffset;
 278         line = p;
 279         this.doDocComment = doDocComment;
 280         this.commentToken = commentToken;
 281         this.inTokenStringConstant = 0;
 282         this.lastDocLine = 0;
 283         this.packalign.setDefault();
 284         //initKeywords();
 285         /* If first line starts with '#!', ignore the line
 286          */
 287         if (p && p[0] == '#' && p[1] == '!')
 288         {
 289             p += 2;
 290             while (1)
 291             {
 292                 char c = *p++;
 293                 switch (c)
 294                 {
 295                 case 0:
 296                 case 0x1A:
 297                     p--;
 298                     goto case;
 299                 case '\n':
 300                     break;
 301                 default:
 302                     continue;
 303                 }
 304                 break;
 305             }
 306             endOfLine();
 307         }
 308     }
 309
 310     /// Returns: a newly allocated `Token`.
 311     Token* allocateToken() pure nothrow @safe
 312     {
 313         if (tokenFreelist)
 314         {
 315             Token* t = tokenFreelist;
 316             tokenFreelist = t.next;
 317             t.next = null;
 318             return t;
 319         }
 320         return new Token();
 321     }
 322
 323     /// Frees the given token by returning it to the freelist.
 324     private void releaseToken(Token* token) pure nothrow @nogc @safe
 325     {
 326         if (mem.isGCEnabled)
 327             *token = Token.init;
 328         token.next = tokenFreelist;
 329         tokenFreelist = token;
 330     }
 331
 332     final TOK nextToken()
 333     {
 334         prevloc = token.loc;
 335         if (token.next)
 336         {
 337             Token* t = token.next;
 338             memcpy(&token, t, Token.sizeof);
 339             releaseToken(t);
 340         }
 341         else
 342         {
 343             scan(&token);
 344         }
 345         //printf(token.toChars());
 346         return token.value;
 347     }
 348
 349     /***********************
 350      * Look ahead at next token's value.
 351      */
 352     final TOK peekNext()
 353     {
 354         return peek(&token).value;
 355     }
 356
 357     /***********************
 358      * Look 2 tokens ahead at value.
 359      */
 360     final TOK peekNext2()
 361     {
 362         Token* t = peek(&token);
 363         return peek(t).value;
 364     }
 365
 366     /****************************
 367      * Turn next token in buffer into a token.
 368      */
 369     final void scan(Token* t)
 370     {
 371         const lastLine = scanloc.linnum;
 372         Loc startLoc;
 373         t.blockComment = null;
 374         t.lineComment = null;
 375
 376         while (1)
 377         {
 378             t.ptr = p;
 379             //printf("p = %p, *p = '%c'\n",p,*p);
 380             t.loc = loc();
 381             switch (*p)
 382             {
 383             case 0:
 384             case 0x1A:
 385                 t.value = TOK.endOfFile; // end of file
 386                 // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
 387                 return;
 388             case ' ':
 389             case '\t':
 390             case '\v':
 391             case '\f':
 392                 p++;
 393                 continue; // skip white space
 394             case '\r':
 395                 p++;
 396                 if (*p != '\n') // if CR stands by itself
 397                 {
 398                     endOfLine();
 399                     goto skipFourSpaces;
 400                 }
 401                 continue; // skip white space
 402             case '\n':
 403                 p++;
 404                 endOfLine();
 405                 skipFourSpaces:
 406                 while (*(cast(uint*)p) == 0x20202020) //' ' == 0x20
 407                 {
 408                     p+=4;
 409                 }
 410                 continue; // skip white space
 411             case '0':
 412                 if (!isZeroSecond(p[1]))        // if numeric literal does not continue
 413                 {
 414                     ++p;
 415                     t.unsvalue = 0;
 416                     t.value = TOK.int32Literal;
 417                     return;
 418                 }
 419                 goto Lnumber;
 420
 421             case '1': .. case '9':
 422                 if (!isDigitSecond(p[1]))       // if numeric literal does not continue
 423                 {
 424                     t.unsvalue = *p - '0';
 425                     ++p;
 426                     t.value = TOK.int32Literal;
 427                     return;
 428                 }
 429             Lnumber:
 430                 t.value = number(t);
 431                 return;
 432
 433             case '\'':
 434                 if (issinglechar(p[1]) && p[2] == '\'')
 435                 {
 436                     t.unsvalue = p[1];        // simple one character literal
 437                     t.value = Ccompile ? TOK.int32Literal : TOK.charLiteral;
 438                     p += 3;
 439                 }
 440                 else if (Ccompile)
 441                 {
 442                     clexerCharConstant(*t, 0);
 443                 }
 444                 else
 445                 {
 446                     t.value = charConstant(t);
 447                 }
 448                 return;
 449
 450             case 'u':
 451             case 'U':
 452             case 'L':
 453                 if (!Ccompile)
 454                     goto case_ident;
 455                 if (p[1] == '\'')       // C wide character constant
 456                 {
 457                     char c = *p;
 458                     if (c == 'L')       // convert L to u or U
 459                         c = (wchar_tsize == 4) ? 'u' : 'U';
 460                     ++p;
 461                     clexerCharConstant(*t, c);
 462                     return;
 463                 }
 464                 else if (p[1] == '\"')  // C wide string literal
 465                 {
 466                     const c = *p;
 467                     ++p;
 468                     escapeStringConstant(t);
 469                     t.postfix = c == 'L' ? (wchar_tsize == 2 ? 'w' : 'd') :
 470                                 c == 'u' ? 'w' :
 471                                 'd';
 472                     return;
 473                 }
 474                 goto case_ident;
 475
 476             case 'r':
 477                 if (p[1] != '"')
 478                     goto case_ident;
 479                 p++;
 480                 goto case '`';
 481             case '`':
 482                 wysiwygStringConstant(t);
 483                 return;
 484             case 'x':
 485                 if (p[1] != '"')
 486                     goto case_ident;
 487                 p++;
 488                 auto start = p;
 489                 OutBuffer hexString;
 490                 t.value = hexStringConstant(t);
 491                 hexString.write(start[0 .. p - start]);
 492                 error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars());
 493                 return;
 494             case 'q':
 495                 if (p[1] == '"')
 496                 {
 497                     p++;
 498                     delimitedStringConstant(t);
 499                     return;
 500                 }
 501                 else if (p[1] == '{')
 502                 {
 503                     p++;
 504                     tokenStringConstant(t);
 505                     return;
 506                 }
 507                 else
 508                     goto case_ident;
 509             case '"':
 510                 escapeStringConstant(t);
 511                 return;
 512             case 'a':
 513             case 'b':
 514             case 'c':
 515             case 'd':
 516             case 'e':
 517             case 'f':
 518             case 'g':
 519             case 'h':
 520             case 'i':
 521             case 'j':
 522             case 'k':
 523             case 'l':
 524             case 'm':
 525             case 'n':
 526             case 'o':
 527             case 'p':
 528                 /*case 'q': case 'r':*/
 529             case 's':
 530             case 't':
 531             //case 'u':
 532             case 'v':
 533             case 'w':
 534                 /*case 'x':*/
 535             case 'y':
 536             case 'z':
 537             case 'A':
 538             case 'B':
 539             case 'C':
 540             case 'D':
 541             case 'E':
 542             case 'F':
 543             case 'G':
 544             case 'H':
 545             case 'I':
 546             case 'J':
 547             case 'K':
 548             //case 'L':
 549             case 'M':
 550             case 'N':
 551             case 'O':
 552             case 'P':
 553             case 'Q':
 554             case 'R':
 555             case 'S':
 556             case 'T':
 557             //case 'U':
 558             case 'V':
 559             case 'W':
 560             case 'X':
 561             case 'Y':
 562             case 'Z':
 563             case '_':
 564             case_ident:
 565                 {
 566                     while (1)
 567                     {
 568                         const c = *++p;
 569                         if (isidchar(c))
 570                             continue;
 571                         else if (c & 0x80)
 572                         {
 573                             const s = p;
 574                             const u = decodeUTF();
 575                             if (isUniAlpha(u))
 576                                 continue;
 577                             error("char 0x%04x not allowed in identifier", u);
 578                             p = s;
 579                         }
 580                         break;
 581                     }
 582                     Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
 583                     t.ident = id;
 584                     t.value = cast(TOK)id.getValue();
 585
 586                     anyToken = 1;
 587
 588                     /* Different keywords for C and D
 589                      */
 590                     if (Ccompile)
 591                     {
 592                         if (t.value != TOK.identifier)
 593                         {
 594                             t.value = Ckeywords[t.value];  // filter out D keywords
 595                         }
 596                     }
 597                     else if (t.value >= FirstCKeyword)
 598                         t.value = TOK.identifier;       // filter out C keywords
 599
 600                     else if (*t.ptr == '_') // if special identifier token
 601                     {
 602                         // Lazy initialization
 603                         TimeStampInfo.initialize(t.loc);
 604
 605                         if (id == Id.DATE)
 606                         {
 607                             t.ustring = TimeStampInfo.date.ptr;
 608                             goto Lstr;
 609                         }
 610                         else if (id == Id.TIME)
 611                         {
 612                             t.ustring = TimeStampInfo.time.ptr;
 613                             goto Lstr;
 614                         }
 615                         else if (id == Id.VENDOR)
 616                         {
 617                             t.ustring = global.vendor.xarraydup.ptr;
 618                             goto Lstr;
 619                         }
 620                         else if (id == Id.TIMESTAMP)
 621                         {
 622                             t.ustring = TimeStampInfo.timestamp.ptr;
 623                         Lstr:
 624                             t.value = TOK.string_;
 625                             t.postfix = 0;
 626                             t.len = cast(uint)strlen(t.ustring);
 627                         }
 628                         else if (id == Id.VERSIONX)
 629                         {
 630                             t.value = TOK.int64Literal;
 631                             t.unsvalue = global.versionNumber();
 632                         }
 633                         else if (id == Id.EOFX)
 634                         {
 635                             t.value = TOK.endOfFile;
 636                             // Advance scanner to end of file
 637                             while (!(*p == 0 || *p == 0x1A))
 638                                 p++;
 639                         }
 640                     }
 641                     //printf("t.value = %d\n",t.value);
 642                     return;
 643                 }
 644             case '/':
 645                 p++;
 646                 switch (*p)
 647                 {
 648                 case '=':
 649                     p++;
 650                     t.value = TOK.divAssign;
 651                     return;
 652                 case '*':
 653                     p++;
 654                     startLoc = loc();
 655                     while (1)
 656                     {
 657                         while (1)
 658                         {
 659                             const c = *p;
 660                             switch (c)
 661                             {
 662                             case '/':
 663                                 break;
 664                             case '\n':
 665                                 endOfLine();
 666                                 p++;
 667                                 continue;
 668                             case '\r':
 669                                 p++;
 670                                 if (*p != '\n')
 671                                     endOfLine();
 672                                 continue;
 673                             case 0:
 674                             case 0x1A:
 675                                 error("unterminated /* */ comment");
 676                                 p = end;
 677                                 t.loc = loc();
 678                                 t.value = TOK.endOfFile;
 679                                 return;
 680                             default:
 681                                 if (c & 0x80)
 682                                 {
 683                                     const u = decodeUTF();
 684                                     if (u == PS || u == LS)
 685                                         endOfLine();
 686                                 }
 687                                 p++;
 688                                 continue;
 689                             }
 690                             break;
 691                         }
 692                         p++;
 693                         if (p[-2] == '*' && p - 3 != t.ptr)
 694                             break;
 695                     }
 696                     if (commentToken)
 697                     {
 698                         t.loc = startLoc;
 699                         t.value = TOK.comment;
 700                         return;
 701                     }
 702                     else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
 703                     {
 704                         // if /** but not /**/
 705                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
 706                         lastDocLine = scanloc.linnum;
 707                     }
 708                     continue;
 709                 case '/': // do // style comments
 710                     startLoc = loc();
 711                     while (1)
 712                     {
 713                         const c = *++p;
 714                         switch (c)
 715                         {
 716                         case '\n':
 717                             break;
 718                         case '\r':
 719                             if (p[1] == '\n')
 720                                 p++;
 721                             break;
 722                         case 0:
 723                         case 0x1A:
 724                             if (commentToken)
 725                             {
 726                                 p = end;
 727                                 t.loc = startLoc;
 728                                 t.value = TOK.comment;
 729                                 return;
 730                             }
 731                             if (doDocComment && t.ptr[2] == '/')
 732                             {
 733                                 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
 734                                 lastDocLine = scanloc.linnum;
 735                             }
 736                             p = end;
 737                             t.loc = loc();
 738                             t.value = TOK.endOfFile;
 739                             return;
 740                         default:
 741                             if (c & 0x80)
 742                             {
 743                                 const u = decodeUTF();
 744                                 if (u == PS || u == LS)
 745                                     break;
 746                             }
 747                             continue;
 748                         }
 749                         break;
 750                     }
 751                     if (commentToken)
 752                     {
 753                         p++;
 754                         endOfLine();
 755                         t.loc = startLoc;
 756                         t.value = TOK.comment;
 757                         return;
 758                     }
 759                     if (doDocComment && t.ptr[2] == '/')
 760                     {
 761                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
 762                         lastDocLine = scanloc.linnum;
 763                     }
 764                     p++;
 765                     endOfLine();
 766                     continue;
 767                 case '+':
 768                     {
 769                         int nest;
 770                         startLoc = loc();
 771                         p++;
 772                         nest = 1;
 773                         while (1)
 774                         {
 775                             char c = *p;
 776                             switch (c)
 777                             {
 778                             case '/':
 779                                 p++;
 780                                 if (*p == '+')
 781                                 {
 782                                     p++;
 783                                     nest++;
 784                                 }
 785                                 continue;
 786                             case '+':
 787                                 p++;
 788                                 if (*p == '/')
 789                                 {
 790                                     p++;
 791                                     if (--nest == 0)
 792                                         break;
 793                                 }
 794                                 continue;
 795                             case '\r':
 796                                 p++;
 797                                 if (*p != '\n')
 798                                     endOfLine();
 799                                 continue;
 800                             case '\n':
 801                                 endOfLine();
 802                                 p++;
 803                                 continue;
 804                             case 0:
 805                             case 0x1A:
 806                                 error("unterminated /+ +/ comment");
 807                                 p = end;
 808                                 t.loc = loc();
 809                                 t.value = TOK.endOfFile;
 810                                 return;
 811                             default:
 812                                 if (c & 0x80)
 813                                 {
 814                                     uint u = decodeUTF();
 815                                     if (u == PS || u == LS)
 816                                         endOfLine();
 817                                 }
 818                                 p++;
 819                                 continue;
 820                             }
 821                             break;
 822                         }
 823                         if (commentToken)
 824                         {
 825                             t.loc = startLoc;
 826                             t.value = TOK.comment;
 827                             return;
 828                         }
 829                         if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
 830                         {
 831                             // if /++ but not /++/
 832                             getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
 833                             lastDocLine = scanloc.linnum;
 834                         }
 835                         continue;
 836                     }
 837                 default:
 838                     break;
 839                 }
 840                 t.value = TOK.div;
 841                 return;
 842             case '.':
 843                 p++;
 844                 if (isdigit(*p))
 845                 {
 846                     /* Note that we don't allow ._1 and ._ as being
 847                      * valid floating point numbers.
 848                      */
 849                     p--;
 850                     t.value = inreal(t);
 851                 }
 852                 else if (p[0] == '.')
 853                 {
 854                     if (p[1] == '.')
 855                     {
 856                         p += 2;
 857                         t.value = TOK.dotDotDot;
 858                     }
 859                     else
 860                     {
 861                         p++;
 862                         t.value = TOK.slice;
 863                     }
 864                 }
 865                 else
 866                     t.value = TOK.dot;
 867                 return;
 868             case '&':
 869                 p++;
 870                 if (*p == '=')
 871                 {
 872                     p++;
 873                     t.value = TOK.andAssign;
 874                 }
 875                 else if (*p == '&')
 876                 {
 877                     p++;
 878                     t.value = TOK.andAnd;
 879                 }
 880                 else
 881                     t.value = TOK.and;
 882                 return;
 883             case '|':
 884                 p++;
 885                 if (*p == '=')
 886                 {
 887                     p++;
 888                     t.value = TOK.orAssign;
 889                 }
 890                 else if (*p == '|')
 891                 {
 892                     p++;
 893                     t.value = TOK.orOr;
 894                 }
 895                 else
 896                     t.value = TOK.or;
 897                 return;
 898             case '-':
 899                 p++;
 900                 if (*p == '=')
 901                 {
 902                     p++;
 903                     t.value = TOK.minAssign;
 904                 }
 905                 else if (*p == '-')
 906                 {
 907                     p++;
 908                     t.value = TOK.minusMinus;
 909                 }
 910                 else if (*p == '>')
 911                 {
 912                     ++p;
 913                     t.value = TOK.arrow;
 914                 }
 915                 else
 916                     t.value = TOK.min;
 917                 return;
 918             case '+':
 919                 p++;
 920                 if (*p == '=')
 921                 {
 922                     p++;
 923                     t.value = TOK.addAssign;
 924                 }
 925                 else if (*p == '+')
 926                 {
 927                     p++;
 928                     t.value = TOK.plusPlus;
 929                 }
 930                 else
 931                     t.value = TOK.add;
 932                 return;
 933             case '<':
 934                 p++;
 935                 if (*p == '=')
 936                 {
 937                     p++;
 938                     t.value = TOK.lessOrEqual; // <=
 939                 }
 940                 else if (*p == '<')
 941                 {
 942                     p++;
 943                     if (*p == '=')
 944                     {
 945                         p++;
 946                         t.value = TOK.leftShiftAssign; // <<=
 947                     }
 948                     else
 949                         t.value = TOK.leftShift; // <<
 950                 }
 951                 else if (*p == ':' && Ccompile)
 952                 {
 953                     ++p;
 954                     t.value = TOK.leftBracket;  // <:
 955                 }
 956                 else if (*p == '%' && Ccompile)
 957                 {
 958                     ++p;
 959                     t.value = TOK.leftCurly;    // <%
 960                 }
 961                 else
 962                     t.value = TOK.lessThan; // <
 963                 return;
 964             case '>':
 965                 p++;
 966                 if (*p == '=')
 967                 {
 968                     p++;
 969                     t.value = TOK.greaterOrEqual; // >=
 970                 }
 971                 else if (*p == '>')
 972                 {
 973                     p++;
 974                     if (*p == '=')
 975                     {
 976                         p++;
 977                         t.value = TOK.rightShiftAssign; // >>=
 978                     }
 979                     else if (*p == '>')
 980                     {
 981                         p++;
 982                         if (*p == '=')
 983                         {
 984                             p++;
 985                             t.value = TOK.unsignedRightShiftAssign; // >>>=
 986                         }
 987                         else
 988                             t.value = TOK.unsignedRightShift; // >>>
 989                     }
 990                     else
 991                         t.value = TOK.rightShift; // >>
 992                 }
 993                 else
 994                     t.value = TOK.greaterThan; // >
 995                 return;
 996             case '!':
 997                 p++;
 998                 if (*p == '=')
 999                 {
1000                     p++;
1001                     t.value = TOK.notEqual; // !=
1002                 }
1003                 else
1004                     t.value = TOK.not; // !
1005                 return;
1006             case '=':
1007                 p++;
1008                 if (*p == '=')
1009                 {
1010                     p++;
1011                     t.value = TOK.equal; // ==
1012                 }
1013                 else if (*p == '>')
1014                 {
1015                     p++;
1016                     t.value = TOK.goesTo; // =>
1017                 }
1018                 else
1019                     t.value = TOK.assign; // =
1020                 return;
1021             case '~':
1022                 p++;
1023                 if (*p == '=')
1024                 {
1025                     p++;
1026                     t.value = TOK.concatenateAssign; // ~=
1027                 }
1028                 else
1029                     t.value = TOK.tilde; // ~
1030                 return;
1031             case '^':
1032                 p++;
1033                 if (*p == '^')
1034                 {
1035                     p++;
1036                     if (*p == '=')
1037                     {
1038                         p++;
1039                         t.value = TOK.powAssign; // ^^=
1040                     }
1041                     else
1042                         t.value = TOK.pow; // ^^
1043                 }
1044                 else if (*p == '=')
1045                 {
1046                     p++;
1047                     t.value = TOK.xorAssign; // ^=
1048                 }
1049                 else
1050                     t.value = TOK.xor; // ^
1051                 return;
1052             case '(':
1053                 p++;
1054                 t.value = TOK.leftParenthesis;
1055                 return;
1056             case ')':
1057                 p++;
1058                 t.value = TOK.rightParenthesis;
1059                 return;
1060             case '[':
1061                 p++;
1062                 t.value = TOK.leftBracket;
1063                 return;
1064             case ']':
1065                 p++;
1066                 t.value = TOK.rightBracket;
1067                 return;
1068             case '{':
1069                 p++;
1070                 t.value = TOK.leftCurly;
1071                 return;
1072             case '}':
1073                 p++;
1074                 t.value = TOK.rightCurly;
1075                 return;
1076             case '?':
1077                 p++;
1078                 t.value = TOK.question;
1079                 return;
1080             case ',':
1081                 p++;
1082                 t.value = TOK.comma;
1083                 return;
1084             case ';':
1085                 p++;
1086                 t.value = TOK.semicolon;
1087                 return;
1088             case ':':
1089                 p++;
1090                 if (*p == ':')
1091                 {
1092                     ++p;
1093                     t.value = TOK.colonColon;
1094                 }
1095                 else if (*p == '>' && Ccompile)
1096                 {
1097                     ++p;
1098                     t.value = TOK.rightBracket;
1099                 }
1100                 else
1101                     t.value = TOK.colon;
1102                 return;
1103             case '$':
1104                 p++;
1105                 t.value = TOK.dollar;
1106                 return;
1107             case '@':
1108                 p++;
1109                 t.value = TOK.at;
1110                 return;
1111             case '*':
1112                 p++;
1113                 if (*p == '=')
1114                 {
1115                     p++;
1116                     t.value = TOK.mulAssign;
1117                 }
1118                 else
1119                     t.value = TOK.mul;
1120                 return;
1121             case '%':
1122                 p++;
1123                 if (*p == '=')
1124                 {
1125                     p++;
1126                     t.value = TOK.modAssign;
1127                 }
1128                 else if (*p == '>' && Ccompile)
1129                 {
1130                     ++p;
1131                     t.value = TOK.rightCurly;
1132                 }
1133                 else if (*p == ':' && Ccompile)
1134                 {
1135                     goto case '#';      // %: means #
1136                 }
1137                 else
1138                     t.value = TOK.mod;
1139                 return;
1140             case '#':
1141                 {
1142                     p++;
1143                     Token n;
1144                     scan(&n);
1145                     if (Ccompile && n.value == TOK.int32Literal)
1146                     {
1147                         poundLine(n, true);
1148                         continue;
1149                     }
1150                     if (n.value == TOK.identifier)
1151                     {
1152                         if (n.ident == Id.line)
1153                         {
1154                             poundLine(n, false);
1155                             continue;
1156                         }
1157                         else if (n.ident == Id.__pragma && Ccompile)
1158                         {
1159                             pragmaDirective(scanloc);
1160                             continue;
1161                         }
1162                         else
1163                         {
1164                             const locx = loc();
1165                             warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
1166                         }
1167                     }
1168                     else if (n.value == TOK.if_)
1169                     {
1170                         error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
1171                     }
1172                     t.value = TOK.pound;
1173                     return;
1174                 }
1175             default:
1176                 {
1177                     dchar c = *p;
1178                     if (c & 0x80)
1179                     {
1180                         c = decodeUTF();
1181                         // Check for start of unicode identifier
1182                         if (isUniAlpha(c))
1183                             goto case_ident;
1184                         if (c == PS || c == LS)
1185                         {
1186                             endOfLine();
1187                             p++;
1188                             continue;
1189                         }
1190                     }
1191                     if (c < 0x80 && isprint(c))
1192                         error("character '%c' is not a valid token", c);
1193                     else
1194                         error("character 0x%02x is not a valid token", c);
1195                     p++;
1196                     continue;
1197                 }
1198             }
1199         }
1200     }
1201
1202     final Token* peek(Token* ct)
1203     {
1204         Token* t;
1205         if (ct.next)
1206             t = ct.next;
1207         else
1208         {
1209             t = allocateToken();
1210             scan(t);
1211             ct.next = t;
1212         }
1213         return t;
1214     }
1215
1216     /*********************************
1217      * tk is on the opening (.
1218      * Look ahead and return token that is past the closing ).
1219      */
1220     final Token* peekPastParen(Token* tk)
1221     {
1222         //printf("peekPastParen()\n");
1223         int parens = 1;
1224         int curlynest = 0;
1225         while (1)
1226         {
1227             tk = peek(tk);
1228             //tk.print();
1229             switch (tk.value)
1230             {
1231             case TOK.leftParenthesis:
1232                 parens++;
1233                 continue;
1234             case TOK.rightParenthesis:
1235                 --parens;
1236                 if (parens)
1237                     continue;
1238                 tk = peek(tk);
1239                 break;
1240             case TOK.leftCurly:
1241                 curlynest++;
1242                 continue;
1243             case TOK.rightCurly:
1244                 if (--curlynest >= 0)
1245                     continue;
1246                 break;
1247             case TOK.semicolon:
1248                 if (curlynest)
1249                     continue;
1250                 break;
1251             case TOK.endOfFile:
1252                 break;
1253             default:
1254                 continue;
1255             }
1256             return tk;
1257         }
1258     }
1259
1260     /*******************************************
1261      * Parse escape sequence.
1262      */
1263     private uint escapeSequence()
1264     {
1265         return Lexer.escapeSequence(token.loc, p, Ccompile);
1266     }
1267
1268     /********
1269      * Parse the given string literal escape sequence into a single character.
1270      * D https://dlang.org/spec/lex.html#escape_sequences
1271      * C11 6.4.4.4
1272      * Params:
1273      *  loc = location to use for error messages
1274      *  sequence = pointer to string with escape sequence to parse. Updated to
1275      *             point past the end of the escape sequence
1276      *  Ccompile = true for compile C11 escape sequences
1277      * Returns:
1278      *  the escape sequence as a single character
1279      */
1280     private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence, bool Ccompile)
1281     {
1282         const(char)* p = sequence; // cache sequence reference on stack
1283         scope(exit) sequence = p;
1284
1285         uint c = *p;
1286         int ndigits;
1287         switch (c)
1288         {
1289         case '\'':
1290         case '"':
1291         case '?':
1292         case '\\':
1293         Lconsume:
1294             p++;
1295             break;
1296         case 'a':
1297             c = 7;
1298             goto Lconsume;
1299         case 'b':
1300             c = 8;
1301             goto Lconsume;
1302         case 'f':
1303             c = 12;
1304             goto Lconsume;
1305         case 'n':
1306             c = 10;
1307             goto Lconsume;
1308         case 'r':
1309             c = 13;
1310             goto Lconsume;
1311         case 't':
1312             c = 9;
1313             goto Lconsume;
1314         case 'v':
1315             c = 11;
1316             goto Lconsume;
1317         case 'u':
1318             ndigits = 4;
1319             goto Lhex;
1320         case 'U':
1321             ndigits = 8;
1322             goto Lhex;
1323         case 'x':
1324             ndigits = 2;
1325         Lhex:
1326             p++;
1327             c = *p;
1328             if (ishex(cast(char)c))
1329             {
1330                 uint v = 0;
1331                 int n = 0;
1332                 while (1)
1333                 {
1334                     if (isdigit(cast(char)c))
1335                         c -= '0';
1336                     else if (islower(c))
1337                         c -= 'a' - 10;
1338                     else
1339                         c -= 'A' - 10;
1340                     v = v * 16 + c;
1341                     c = *++p;
1342                     if (++n == ndigits)
1343                         break;
1344                     if (!ishex(cast(char)c))
1345                     {
1346                         .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
1347                         break;
1348                     }
1349                 }
1350                 if (ndigits != 2 && !utf_isValidDchar(v))
1351                 {
1352                     .error(loc, "invalid UTF character \\U%08x", v);
1353                     v = '?'; // recover with valid UTF character
1354                 }
1355                 c = v;
1356             }
1357             else
1358             {
1359                 .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
1360                 p++;
1361             }
1362             break;
1363         case '&':
1364             if (Ccompile)
1365                 goto default;
1366
1367             // named character entity
1368             for (const idstart = ++p; 1; p++)
1369             {
1370                 switch (*p)
1371                 {
1372                 case ';':
1373                     c = HtmlNamedEntity(idstart, p - idstart);
1374                     if (c == ~0)
1375                     {
1376                         .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1377                         c = '?';
1378                     }
1379                     p++;
1380                     break;
1381                 default:
1382                     if (isalpha(*p) || (p != idstart && isdigit(*p)))
1383                         continue;
1384                     .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1385                     c = '?';
1386                     break;
1387                 }
1388                 break;
1389             }
1390             break;
1391         case 0:
1392         case 0x1A:
1393             // end of file
1394             c = '\\';
1395             break;
1396         default:
1397             if (isoctal(cast(char)c))
1398             {
1399                 uint v = 0;
1400                 int n = 0;
1401                 do
1402                 {
1403                     v = v * 8 + (c - '0');
1404                     c = *++p;
1405                 }
1406                 while (++n < 3 && isoctal(cast(char)c));
1407                 c = v;
1408                 if (c > 0xFF)
1409                     .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
1410             }
1411             else
1412             {
1413                 .error(loc, "undefined escape sequence \\%c", c);
1414                 p++;
1415             }
1416             break;
1417         }
1418         return c;
1419     }
1420
1421     /**
1422     Lex a wysiwyg string. `p` must be pointing to the first character before the
1423     contents of the string literal. The character pointed to by `p` will be used as
1424     the terminating character (i.e. backtick or double-quote).
1425     Params:
1426         result = pointer to the token that accepts the result
1427     */
1428     private void wysiwygStringConstant(Token* result)
1429     {
1430         result.value = TOK.string_;
1431         Loc start = loc();
1432         auto terminator = p[0];
1433         p++;
1434         stringbuffer.setsize(0);
1435         while (1)
1436         {
1437             dchar c = p[0];
1438             p++;
1439             switch (c)
1440             {
1441             case '\n':
1442                 endOfLine();
1443                 break;
1444             case '\r':
1445                 if (p[0] == '\n')
1446                     continue; // ignore
1447                 c = '\n'; // treat EndOfLine as \n character
1448                 endOfLine();
1449                 break;
1450             case 0:
1451             case 0x1A:
1452                 error("unterminated string constant starting at %s", start.toChars());
1453                 result.setString();
1454                 // rewind `p` so it points to the EOF character
1455                 p--;
1456                 return;
1457             default:
1458                 if (c == terminator)
1459                 {
1460                     result.setString(stringbuffer);
1461                     stringPostfix(result);
1462                     return;
1463                 }
1464                 else if (c & 0x80)
1465                 {
1466                     p--;
1467                     const u = decodeUTF();
1468                     p++;
1469                     if (u == PS || u == LS)
1470                         endOfLine();
1471                     stringbuffer.writeUTF8(u);
1472                     continue;
1473                 }
1474                 break;
1475             }
1476             stringbuffer.writeByte(c);
1477         }
1478     }
1479
1480     /**************************************
1481      * Lex hex strings:
1482      *      x"0A ae 34FE BD"
1483      */
1484     private TOK hexStringConstant(Token* t)
1485     {
1486         Loc start = loc();
1487         uint n = 0;
1488         uint v = ~0; // dead assignment, needed to suppress warning
1489         p++;
1490         stringbuffer.setsize(0);
1491         while (1)
1492         {
1493             dchar c = *p++;
1494             switch (c)
1495             {
1496             case ' ':
1497             case '\t':
1498             case '\v':
1499             case '\f':
1500                 continue; // skip white space
1501             case '\r':
1502                 if (*p == '\n')
1503                     continue; // ignore '\r' if followed by '\n'
1504                 // Treat isolated '\r' as if it were a '\n'
1505                 goto case '\n';
1506             case '\n':
1507                 endOfLine();
1508                 continue;
1509             case 0:
1510             case 0x1A:
1511                 error("unterminated string constant starting at %s", start.toChars());
1512                 t.setString();
1513                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1514                 p--;
1515                 return TOK.hexadecimalString;
1516             case '"':
1517                 if (n & 1)
1518                 {
1519                     error("odd number (%d) of hex characters in hex string", n);
1520                     stringbuffer.writeByte(v);
1521                 }
1522                 t.setString(stringbuffer);
1523                 stringPostfix(t);
1524                 return TOK.hexadecimalString;
1525             default:
1526                 if (c >= '0' && c <= '9')
1527                     c -= '0';
1528                 else if (c >= 'a' && c <= 'f')
1529                     c -= 'a' - 10;
1530                 else if (c >= 'A' && c <= 'F')
1531                     c -= 'A' - 10;
1532                 else if (c & 0x80)
1533                 {
1534                     p--;
1535                     const u = decodeUTF();
1536                     p++;
1537                     if (u == PS || u == LS)
1538                         endOfLine();
1539                     else
1540                         error("non-hex character \\u%04x in hex string", u);
1541                 }
1542                 else
1543                     error("non-hex character '%c' in hex string", c);
1544                 if (n & 1)
1545                 {
1546                     v = (v << 4) | c;
1547                     stringbuffer.writeByte(v);
1548                 }
1549                 else
1550                     v = c;
1551                 n++;
1552                 break;
1553             }
1554         }
1555         assert(0); // see bug 15731
1556     }
1557
1558     /**
1559     Lex a delimited string. Some examples of delimited strings are:
1560     ---
1561     q"(foo(xxx))"      // "foo(xxx)"
1562     q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
1563     q"/foo]/"          // "foo]"
1564     q"HERE
1565     foo
1566     HERE"              // "foo\n"
1567     ---
1568     It is assumed that `p` points to the opening double-quote '"'.
1569     Params:
1570         result = pointer to the token that accepts the result
1571     */
1572     private void delimitedStringConstant(Token* result)
1573     {
1574         result.value = TOK.string_;
1575         Loc start = loc();
1576         dchar delimleft = 0;
1577         dchar delimright = 0;
1578         uint nest = 1;
1579         uint nestcount = ~0; // dead assignment, needed to suppress warning
1580         Identifier hereid = null;
1581         uint blankrol = 0;
1582         uint startline = 0;
1583         p++;
1584         stringbuffer.setsize(0);
1585         while (1)
1586         {
1587             dchar c = *p++;
1588             //printf("c = '%c'\n", c);
1589             switch (c)
1590             {
1591             case '\n':
1592             Lnextline:
1593                 endOfLine();
1594                 startline = 1;
1595                 if (blankrol)
1596                 {
1597                     blankrol = 0;
1598                     continue;
1599                 }
1600                 if (hereid)
1601                 {
1602                     stringbuffer.writeUTF8(c);
1603                     continue;
1604                 }
1605                 break;
1606             case '\r':
1607                 if (*p == '\n')
1608                     continue; // ignore
1609                 c = '\n'; // treat EndOfLine as \n character
1610                 goto Lnextline;
1611             case 0:
1612             case 0x1A:
1613                 error("unterminated delimited string constant starting at %s", start.toChars());
1614                 result.setString();
1615                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1616                 p--;
1617                 return;
1618             default:
1619                 if (c & 0x80)
1620                 {
1621                     p--;
1622                     c = decodeUTF();
1623                     p++;
1624                     if (c == PS || c == LS)
1625                         goto Lnextline;
1626                 }
1627                 break;
1628             }
1629             if (delimleft == 0)
1630             {
1631                 delimleft = c;
1632                 nest = 1;
1633                 nestcount = 1;
1634                 if (c == '(')
1635                     delimright = ')';
1636                 else if (c == '{')
1637                     delimright = '}';
1638                 else if (c == '[')
1639                     delimright = ']';
1640                 else if (c == '<')
1641                     delimright = '>';
1642                 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1643                 {
1644                     // Start of identifier; must be a heredoc
1645                     Token tok;
1646                     p--;
1647                     scan(&tok); // read in heredoc identifier
1648                     if (tok.value != TOK.identifier)
1649                     {
1650                         error("identifier expected for heredoc, not %s", tok.toChars());
1651                         delimright = c;
1652                     }
1653                     else
1654                     {
1655                         hereid = tok.ident;
1656                         //printf("hereid = '%s'\n", hereid.toChars());
1657                         blankrol = 1;
1658                     }
1659                     nest = 0;
1660                 }
1661                 else
1662                 {
1663                     delimright = c;
1664                     nest = 0;
1665                     if (isspace(c))
1666                         error("delimiter cannot be whitespace");
1667                 }
1668             }
1669             else
1670             {
1671                 if (blankrol)
1672                 {
1673                     error("heredoc rest of line should be blank");
1674                     blankrol = 0;
1675                     continue;
1676                 }
1677                 if (nest == 1)
1678                 {
1679                     if (c == delimleft)
1680                         nestcount++;
1681                     else if (c == delimright)
1682                     {
1683                         nestcount--;
1684                         if (nestcount == 0)
1685                             goto Ldone;
1686                     }
1687                 }
1688                 else if (c == delimright)
1689                     goto Ldone;
1690                 if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
1691                 {
1692                     Token tok;
1693                     auto psave = p;
1694                     p--;
1695                     scan(&tok); // read in possible heredoc identifier
1696                     //printf("endid = '%s'\n", tok.ident.toChars());
1697                     if (tok.value == TOK.identifier && tok.ident is hereid)
1698                     {
1699                         /* should check that rest of line is blank
1700                          */
1701                         goto Ldone;
1702                     }
1703                     p = psave;
1704                 }
1705                 stringbuffer.writeUTF8(c);
1706                 startline = 0;
1707             }
1708         }
1709     Ldone:
1710         if (*p == '"')
1711             p++;
1712         else if (hereid)
1713             error("delimited string must end in %s\"", hereid.toChars());
1714         else
1715             error("delimited string must end in %c\"", delimright);
1716         result.setString(stringbuffer);
1717         stringPostfix(result);
1718     }
1719
1720     /**
1721     Lex a token string. Some examples of token strings are:
1722     ---
1723     q{ foo(xxx) }    // " foo(xxx) "
1724     q{foo$(LPAREN)}  // "foo$(LPAREN)"
1725     q{{foo}"}"}      // "{foo}"}""
1726     ---
1727     It is assumed that `p` points to the opening curly-brace.
1728     Params:
1729         result = pointer to the token that accepts the result
1730     */
1731     private void tokenStringConstant(Token* result)
1732     {
1733         result.value = TOK.string_;
1734
1735         uint nest = 1;
1736         const start = loc();
1737         const pstart = ++p;
1738         inTokenStringConstant++;
1739         scope(exit) inTokenStringConstant--;
1740         while (1)
1741         {
1742             Token tok;
1743             scan(&tok);
1744             switch (tok.value)
1745             {
1746             case TOK.leftCurly:
1747                 nest++;
1748                 continue;
1749             case TOK.rightCurly:
1750                 if (--nest == 0)
1751                 {
1752                     result.setString(pstart, p - 1 - pstart);
1753                     stringPostfix(result);
1754                     return;
1755                 }
1756                 continue;
1757             case TOK.endOfFile:
1758                 error("unterminated token string constant starting at %s", start.toChars());
1759                 result.setString();
1760                 return;
1761             default:
1762                 continue;
1763             }
1764         }
1765     }
1766
1767     /**
1768     Scan a quoted string while building the processed string value by
1769     handling escape sequences. The result is returned in the given `t` token.
1770     This function assumes that `p` currently points to the opening quote
1771     of the string.
1772     Params:
1773         t = the token to set the resulting string to
1774     * References:
1775     *   D https://dlang.org/spec/lex.html#double_quoted_strings
1776     *   ImportC C11 6.4.5
1777     */
1778     private void escapeStringConstant(Token* t)
1779     {
1780         t.value = TOK.string_;
1781
1782         const start = loc();
1783         const tc = *p++;        // opening quote
1784         stringbuffer.setsize(0);
1785         while (1)
1786         {
1787             dchar c = *p++;
1788             switch (c)
1789             {
1790             case '\\':
1791                 switch (*p)
1792                 {
1793                 case '&':
1794                     if (Ccompile)
1795                         goto default;
1796                     goto case;
1797
1798                 case 'u':
1799                 case 'U':
1800                     c = escapeSequence();
1801                     stringbuffer.writeUTF8(c);
1802                     continue;
1803                 default:
1804                     c = escapeSequence();
1805                     break;
1806                 }
1807                 break;
1808             case '\n':
1809                 endOfLine();
1810                 if (Ccompile)
1811                     goto Lunterminated;
1812                 break;
1813             case '\r':
1814                 if (*p == '\n')
1815                     continue; // ignore
1816                 c = '\n'; // treat EndOfLine as \n character
1817                 endOfLine();
1818                 if (Ccompile)
1819                     goto Lunterminated;
1820                 break;
1821             case '\'':
1822             case '"':
1823                 if (c != tc)
1824                     goto default;
1825                 t.setString(stringbuffer);
1826                 if (!Ccompile)
1827                     stringPostfix(t);
1828                 return;
1829             case 0:
1830             case 0x1A:
1831                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1832                 p--;
1833             Lunterminated:
1834                 error("unterminated string constant starting at %s", start.toChars());
1835                 t.setString();
1836                 return;
1837             default:
1838                 if (c & 0x80)
1839                 {
1840                     p--;
1841                     c = decodeUTF();
1842                     if (c == LS || c == PS)
1843                     {
1844                         c = '\n';
1845                         endOfLine();
1846                         if (Ccompile)
1847                             goto Lunterminated;
1848                     }
1849                     p++;
1850                     stringbuffer.writeUTF8(c);
1851                     continue;
1852                 }
1853                 break;
1854             }
1855             stringbuffer.writeByte(c);
1856         }
1857     }
1858
1859     /**************************************
1860      * Reference:
1861      *    https://dlang.org/spec/lex.html#characterliteral
1862      */
1863     private TOK charConstant(Token* t)
1864     {
1865         TOK tk = TOK.charLiteral;
1866         //printf("Lexer::charConstant\n");
1867         p++;
1868         dchar c = *p++;
1869         switch (c)
1870         {
1871         case '\\':
1872             switch (*p)
1873             {
1874             case 'u':
1875                 t.unsvalue = escapeSequence();
1876                 tk = TOK.wcharLiteral;
1877                 break;
1878             case 'U':
1879             case '&':
1880                 t.unsvalue = escapeSequence();
1881                 tk = TOK.dcharLiteral;
1882                 break;
1883             default:
1884                 t.unsvalue = escapeSequence();
1885                 break;
1886             }
1887             break;
1888         case '\n':
1889         L1:
1890             endOfLine();
1891             goto case;
1892         case '\r':
1893             goto case '\'';
1894         case 0:
1895         case 0x1A:
1896             // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1897             p--;
1898             goto case;
1899         case '\'':
1900             error("unterminated character constant");
1901             t.unsvalue = '?';
1902             return tk;
1903         default:
1904             if (c & 0x80)
1905             {
1906                 p--;
1907                 c = decodeUTF();
1908                 p++;
1909                 if (c == LS || c == PS)
1910                     goto L1;
1911                 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1912                     tk = TOK.wcharLiteral;
1913                 else
1914                     tk = TOK.dcharLiteral;
1915             }
1916             t.unsvalue = c;
1917             break;
1918         }
1919         if (*p != '\'')
1920         {
1921             while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
1922                     *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
1923             {
1924                 if (*p & 0x80)
1925                 {
1926                     const s = p;
1927                     c = decodeUTF();
1928                     if (c == LS || c == PS)
1929                     {
1930                         p = s;
1931                         break;
1932                     }
1933                 }
1934                 p++;
1935             }
1936
1937             if (*p == '\'')
1938             {
1939                 error("character constant has multiple characters");
1940                 p++;
1941             }
1942             else
1943                 error("unterminated character constant");
1944             t.unsvalue = '?';
1945             return tk;
1946         }
1947         p++;
1948         return tk;
1949     }
1950
1951     /***************************************
1952      * Lex C character constant.
1953      * Parser is on the opening quote.
1954      * Params:
1955      *  t = token to fill in
1956      *  prefix = one of `u`, `U` or 0.
1957      * Reference:
1958      *  C11 6.4.4.4
1959      */
1960     private void clexerCharConstant(ref Token t, char prefix)
1961     {
1962         escapeStringConstant(&t);
1963         const(char)[] str = t.ustring[0 .. t.len];
1964         const n = str.length;
1965         const loc = t.loc;
1966         if (n == 0)
1967         {
1968             error(loc, "empty character constant");
1969             t.value = TOK.semicolon;
1970             return;
1971         }
1972
1973         uint u;
1974         switch (prefix)
1975         {
1976             case 0:
1977                 if (n == 1) // fast case
1978                 {
1979                     u = str[0];
1980                 }
1981                 else if (n > 4)
1982                     error(loc, "max number of chars in character literal is 4, had %d",
1983                         cast(int)n);
1984                 else
1985                 {
1986                     foreach (i, c; str)
1987                         (cast(char*)&u)[n - 1 - i] = c;
1988                 }
1989                 break;
1990
1991             case 'u':
1992                 dchar d1;
1993                 size_t idx;
1994                 auto msg = utf_decodeChar(str, idx, d1);
1995                 dchar d2 = 0;
1996                 if (idx < n && !msg)
1997                     msg = utf_decodeChar(str, idx, d2);
1998                 if (msg)
1999                     error(loc, "%s", msg);
2000                 else if (idx < n)
2001                     error(loc, "max number of chars in 16 bit character literal is 2, had %d",
2002                         (n + 1) >> 1);
2003                 else if (d1 > 0x1_0000)
2004                     error(loc, "%d does not fit in 16 bits", d1);
2005                 else if (d2 > 0x1_0000)
2006                     error(loc, "%d does not fit in 16 bits", d2);
2007                 u = d1;
2008                 if (d2)
2009                     u = (d1 << 16) | d2;
2010                 break;
2011
2012             case 'U':
2013                 dchar d;
2014                 size_t idx;
2015                 auto msg = utf_decodeChar(str, idx, d);
2016                 if (msg)
2017                     error(loc, "%s", msg);
2018                 else if (idx < n)
2019                     error(loc, "max number of chars in 32 bit character literal is 1, had %d",
2020                         (n + 3) >> 2);
2021                 u = d;
2022                 break;
2023
2024             default:
2025                 assert(0);
2026         }
2027         t.value = TOK.int32Literal;
2028         t.unsvalue = u;
2029     }
2030
2031     /***************************************
2032      * Get postfix of string literal.
2033      */
2034     private void stringPostfix(Token* t) pure @nogc
2035     {
2036         switch (*p)
2037         {
2038         case 'c':
2039         case 'w':
2040         case 'd':
2041             t.postfix = *p;
2042             p++;
2043             break;
2044         default:
2045             t.postfix = 0;
2046             break;
2047         }
2048     }
2049
2050     /**************************************
2051      * Read in a number.
2052      * If it's an integer, store it in tok.TKutok.Vlong.
2053      *      integers can be decimal, octal or hex
2054      *      Handle the suffixes U, UL, LU, L, etc.
2055      * If it's double, store it in tok.TKutok.Vdouble.
2056      * Returns:
2057      *      TKnum
2058      *      TKdouble,...
2059      */
2060     private TOK number(Token* t)
2061     {
2062         int base = 10;
2063         const start = p;
2064         uinteger_t n = 0; // unsigned >=64 bit integer type
2065         int d;
2066         bool err = false;
2067         bool overflow = false;
2068         bool anyBinaryDigitsNoSingleUS = false;
2069         bool anyHexDigitsNoSingleUS = false;
2070         dchar c = *p;
2071         if (c == '0')
2072         {
2073             ++p;
2074             c = *p;
2075             switch (c)
2076             {
2077             case '0':
2078             case '1':
2079             case '2':
2080             case '3':
2081             case '4':
2082             case '5':
2083             case '6':
2084             case '7':
2085                 base = 8;
2086                 break;
2087
2088             case '8':
2089             case '9':
2090                 if (Ccompile)
2091                     error("octal digit expected, not `%c`", c);
2092                 base = 8;
2093                 break;
2094             case 'x':
2095             case 'X':
2096                 ++p;
2097                 base = 16;
2098                 break;
2099             case 'b':
2100             case 'B':
2101                 if (Ccompile)
2102                     error("binary constants not allowed");
2103                 ++p;
2104                 base = 2;
2105                 break;
2106             case '.':
2107                 if (p[1] == '.')
2108                     goto Ldone; // if ".."
2109                 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
2110                     goto Ldone; // if ".identifier" or ".unicode"
2111                 goto Lreal; // '.' is part of current token
2112             case 'i':
2113             case 'f':
2114             case 'F':
2115                 goto Lreal;
2116             case '_':
2117                 if (Ccompile)
2118                     error("embedded `_` not allowed");
2119                 ++p;
2120                 base = 8;
2121                 break;
2122             case 'L':
2123                 if (p[1] == 'i')
2124                     goto Lreal;
2125                 break;
2126             default:
2127                 break;
2128             }
2129         }
2130         while (1)
2131         {
2132             c = *p;
2133             switch (c)
2134             {
2135             case '0':
2136             case '1':
2137             case '2':
2138             case '3':
2139             case '4':
2140             case '5':
2141             case '6':
2142             case '7':
2143             case '8':
2144             case '9':
2145                 ++p;
2146                 d = c - '0';
2147                 break;
2148             case 'a':
2149             case 'b':
2150             case 'c':
2151             case 'd':
2152             case 'e':
2153             case 'f':
2154             case 'A':
2155             case 'B':
2156             case 'C':
2157             case 'D':
2158             case 'E':
2159             case 'F':
2160                 ++p;
2161                 if (base != 16)
2162                 {
2163                     if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
2164                         goto Lreal;
2165                 }
2166                 if (c >= 'a')
2167                     d = c + 10 - 'a';
2168                 else
2169                     d = c + 10 - 'A';
2170                 break;
2171             case 'L':
2172                 if (p[1] == 'i')
2173                     goto Lreal;
2174                 goto Ldone;
2175             case '.':
2176                 if (p[1] == '.')
2177                     goto Ldone; // if ".."
2178                 if (base <= 10 && n > 0 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
2179                     goto Ldone; // if ".identifier" or ".unicode"
2180                 if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
2181                     goto Ldone; // if ".identifier" or ".unicode"
2182                 if (base == 2)
2183                     goto Ldone; // if ".identifier" or ".unicode"
2184                 goto Lreal; // otherwise as part of a floating point literal
2185             case 'p':
2186             case 'P':
2187             case 'i':
2188             Lreal:
2189                 p = start;
2190                 return inreal(t);
2191             case '_':
2192                 if (Ccompile)
2193                     goto default;
2194                 ++p;
2195                 continue;
2196             default:
2197                 goto Ldone;
2198             }
2199             // got a digit here, set any necessary flags, check for errors
2200             anyHexDigitsNoSingleUS = true;
2201             anyBinaryDigitsNoSingleUS = true;
2202             if (!err && d >= base)
2203             {
2204                 error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
2205                                                      base == 8 ? "octal".ptr :
2206                                                      "decimal".ptr, c);
2207                 err = true;
2208             }
2209             // Avoid expensive overflow check if we aren't at risk of overflow
2210             if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
2211                 n = n * base + d;
2212             else
2213             {
2214                 import core.checkedint : mulu, addu;
2215
2216                 n = mulu(n, base, overflow);
2217                 n = addu(n, d, overflow);
2218             }
2219         }
2220     Ldone:
2221         if (overflow && !err)
2222         {
2223             error("integer overflow");
2224             err = true;
2225         }
2226         if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
2227             (base == 16 && !anyHexDigitsNoSingleUS))
2228             error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
2229
2230         t.unsvalue = n;
2231
2232         if (Ccompile)
2233             return cnumber(base, n);
2234
2235         enum FLAGS : int
2236         {
2237             none = 0,
2238             decimal = 1, // decimal
2239             unsigned = 2, // u or U suffix
2240             long_ = 4, // L suffix
2241         }
2242
2243         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
2244         // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2245         const psuffix = p;
2246         while (1)
2247         {
2248             FLAGS f;
2249             switch (*p)
2250             {
2251             case 'U':
2252             case 'u':
2253                 f = FLAGS.unsigned;
2254                 goto L1;
2255             case 'l':
2256                 f = FLAGS.long_;
2257                 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
2258                 goto L1;
2259             case 'L':
2260                 f = FLAGS.long_;
2261             L1:
2262                 p++;
2263                 if ((flags & f) && !err)
2264                 {
2265                     error("unrecognized token");
2266                     err = true;
2267                 }
2268                 flags = cast(FLAGS)(flags | f);
2269                 continue;
2270             default:
2271                 break;
2272             }
2273             break;
2274         }
2275         if (base == 8 && n >= 8)
2276         {
2277             if (err)
2278                 // can't translate invalid octal value, just show a generic message
2279                 error("octal literals larger than 7 are no longer supported");
2280             else
2281                 error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead",
2282                     n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
2283         }
2284         TOK result;
2285         switch (flags)
2286         {
2287         case FLAGS.none:
2288             /* Octal or Hexadecimal constant.
2289              * First that fits: int, uint, long, ulong
2290              */
2291             if (n & 0x8000000000000000L)
2292                 result = TOK.uns64Literal;
2293             else if (n & 0xFFFFFFFF00000000L)
2294                 result = TOK.int64Literal;
2295             else if (n & 0x80000000)
2296                 result = TOK.uns32Literal;
2297             else
2298                 result = TOK.int32Literal;
2299             break;
2300         case FLAGS.decimal:
2301             /* First that fits: int, long, long long
2302              */
2303             if (n & 0x8000000000000000L)
2304             {
2305                 result = TOK.uns64Literal;
2306             }
2307             else if (n & 0xFFFFFFFF80000000L)
2308                 result = TOK.int64Literal;
2309             else
2310                 result = TOK.int32Literal;
2311             break;
2312         case FLAGS.unsigned:
2313         case FLAGS.decimal | FLAGS.unsigned:
2314             /* First that fits: uint, ulong
2315              */
2316             if (n & 0xFFFFFFFF00000000L)
2317                 result = TOK.uns64Literal;
2318             else
2319                 result = TOK.uns32Literal;
2320             break;
2321         case FLAGS.decimal | FLAGS.long_:
2322             if (n & 0x8000000000000000L)
2323             {
2324                 if (!err)
2325                 {
2326                     error("signed integer overflow");
2327                     err = true;
2328                 }
2329                 result = TOK.uns64Literal;
2330             }
2331             else
2332                 result = TOK.int64Literal;
2333             break;
2334         case FLAGS.long_:
2335             if (n & 0x8000000000000000L)
2336                 result = TOK.uns64Literal;
2337             else
2338                 result = TOK.int64Literal;
2339             break;
2340         case FLAGS.unsigned | FLAGS.long_:
2341         case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2342             result = TOK.uns64Literal;
2343             break;
2344         default:
2345             debug
2346             {
2347                 printf("%x\n", flags);
2348             }
2349             assert(0);
2350         }
2351         return result;
2352     }
2353
2354     /**************************************
2355      * Lex C integer-suffix
2356      * Params:
2357      *  base = number base
2358      *  n = raw integer value
2359      * Returns:
2360      *  token value
2361      */
2362     private TOK cnumber(int base, uinteger_t n)
2363     {
2364         /* C11 6.4.4.1
2365          * Parse trailing suffixes:
2366          *   u or U
2367          *   l or L
2368          *   ll or LL
2369          */
2370         enum FLAGS : uint
2371         {
2372             octalhex = 1, // octal or hexadecimal
2373             decimal  = 2, // decimal
2374             unsigned = 4, // u or U suffix
2375             long_    = 8, // l or L suffix
2376             llong    = 0x10 // ll or LL
2377         }
2378         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.octalhex;
2379         bool err;
2380     Lsuffixes:
2381         while (1)
2382         {
2383             FLAGS f;
2384             const cs = *p;
2385             switch (cs)
2386             {
2387                 case 'U':
2388                 case 'u':
2389                     f = FLAGS.unsigned;
2390                     break;
2391
2392                 case 'l':
2393                 case 'L':
2394                     f = FLAGS.long_;
2395                     if (cs == p[1])
2396                     {
2397                         f = FLAGS.long_ | FLAGS.llong;
2398                         ++p;
2399                     }
2400                     break;
2401
2402                 default:
2403                     break Lsuffixes;
2404             }
2405             ++p;
2406             if ((flags & f) && !err)
2407             {
2408                 error("duplicate integer suffixes");
2409                 err = true;
2410             }
2411             flags = cast(FLAGS)(flags | f);
2412         }
2413
2414         void overflow()
2415         {
2416             error("integer overflow");
2417         }
2418
2419         TOK result = TOK.int32Literal;     // default
2420         switch (flags)
2421         {
2422             /* Since D doesn't have a variable sized `long` or `unsigned long` type,
2423              * this code deviates from C by picking D int, uint, long, or ulong instead
2424              */
2425
2426             case FLAGS.octalhex:
2427                 /* Octal or Hexadecimal constant.
2428                  * First that fits: int, unsigned, long, unsigned long,
2429                  * long long, unsigned long long
2430                  */
2431                 if (longsize == 4)
2432                 {
2433                     if (n & 0x8000000000000000L)
2434                         result = TOK.uns64Literal;
2435                     else if (n & 0xFFFFFFFF00000000L)
2436                         result = TOK.int64Literal;
2437                     else if (n & 0x80000000)
2438                         result = TOK.uns32Literal;
2439                     else
2440                         result = TOK.int32Literal;
2441                 }
2442                 else
2443                 {
2444                     if (n & 0x8000000000000000L)
2445                         result = TOK.uns64Literal;      // unsigned long
2446                     else if (n & 0xFFFFFFFF00000000L)
2447                         result = TOK.int64Literal;      // long
2448                     else if (n & 0x80000000)
2449                         result = TOK.uns32Literal;
2450                     else
2451                         result = TOK.int32Literal;
2452                 }
2453                 break;
2454
2455             case FLAGS.decimal:
2456                 /* First that fits: int, long, long long
2457                  */
2458                 if (longsize == 4)
2459                 {
2460                     if (n & 0x8000000000000000L)
2461                         result = TOK.uns64Literal;
2462                     else if (n & 0xFFFFFFFF80000000L)
2463                         result = TOK.int64Literal;
2464                     else
2465                         result = TOK.int32Literal;
2466                 }
2467                 else
2468                 {
2469                     if (n & 0x8000000000000000L)
2470                         result = TOK.uns64Literal;      // unsigned long
2471                     else if (n & 0xFFFFFFFF80000000L)
2472                         result = TOK.int64Literal;      // long
2473                     else
2474                         result = TOK.int32Literal;
2475                 }
2476                 break;
2477
2478             case FLAGS.octalhex | FLAGS.unsigned:
2479             case FLAGS.decimal | FLAGS.unsigned:
2480                 /* First that fits: unsigned, unsigned long, unsigned long long
2481                  */
2482                 if (longsize == 4)
2483                 {
2484                     if (n & 0xFFFFFFFF00000000L)
2485                         result = TOK.uns64Literal;
2486                     else
2487                         result = TOK.uns32Literal;
2488                 }
2489                 else
2490                 {
2491                     if (n & 0xFFFFFFFF00000000L)
2492                         result = TOK.uns64Literal;      // unsigned long
2493                     else
2494                         result = TOK.uns32Literal;
2495                 }
2496                 break;
2497
2498             case FLAGS.decimal | FLAGS.long_:
2499                 /* First that fits: long, long long
2500                  */
2501                 if (longsize == 4)
2502                 {
2503                     if (n & 0x8000000000000000L)
2504                         overflow();
2505                     else if (n & 0xFFFFFFFF_80000000L)
2506                         result = TOK.int64Literal;
2507                     else
2508                         result = TOK.int32Literal;      // long
2509                 }
2510                 else
2511                 {
2512                     if (n & 0x8000000000000000L)
2513                         overflow();
2514                     else
2515                         result = TOK.int64Literal;      // long
2516                 }
2517                 break;
2518
2519             case FLAGS.octalhex | FLAGS.long_:
2520                 /* First that fits: long, unsigned long, long long,
2521                  * unsigned long long
2522                  */
2523                 if (longsize == 4)
2524                 {
2525                     if (n & 0x8000000000000000L)
2526                         result = TOK.uns64Literal;
2527                     else if (n & 0xFFFFFFFF00000000L)
2528                         result = TOK.int64Literal;
2529                     else if (n & 0x80000000)
2530                         result = TOK.uns32Literal;      // unsigned long
2531                     else
2532                         result = TOK.int32Literal;      // long
2533                 }
2534                 else
2535                 {
2536                     if (n & 0x80000000_00000000L)
2537                         result = TOK.uns64Literal;      // unsigned long
2538                     else
2539                         result = TOK.int64Literal;      // long
2540                 }
2541                 break;
2542
2543             case FLAGS.octalhex | FLAGS.unsigned | FLAGS.long_:
2544             case FLAGS.decimal  | FLAGS.unsigned | FLAGS.long_:
2545                 /* First that fits: unsigned long, unsigned long long
2546                  */
2547                 if (longsize == 4)
2548                 {
2549                     if (n & 0xFFFFFFFF00000000L)
2550                         result = TOK.uns64Literal;
2551                     else
2552                         result = TOK.uns32Literal;      // unsigned long
2553                 }
2554                 else
2555                 {
2556                     result = TOK.uns64Literal;  // unsigned long
2557                 }
2558                 break;
2559
2560             case FLAGS.octalhex | FLAGS.long_ | FLAGS.llong:
2561                 /* First that fits: long long, unsigned long long
2562                  */
2563                 if (n & 0x8000000000000000L)
2564                     result = TOK.uns64Literal;
2565                 else
2566                     result = TOK.int64Literal;
2567                 break;
2568
2569             case FLAGS.decimal | FLAGS.long_ | FLAGS.llong:
2570                 /* long long
2571                  */
2572                 result = TOK.int64Literal;
2573                 break;
2574
2575             case FLAGS.octalhex | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
2576             case FLAGS.decimal  | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
2577                 result = TOK.uns64Literal;
2578                 break;
2579
2580             default:
2581                 debug printf("%x\n",flags);
2582                 assert(0);
2583         }
2584         return result;
2585     }
2586
2587     /**************************************
2588      * Read in characters, converting them to real.
2589      * Bugs:
2590      *      Exponent overflow not detected.
2591      *      Too much requested precision is not detected.
2592      */
2593     private TOK inreal(Token* t)
2594     {
2595         //printf("Lexer::inreal()\n");
2596         debug
2597         {
2598             assert(*p == '.' || isdigit(*p));
2599         }
2600         bool isWellformedString = true;
2601         stringbuffer.setsize(0);
2602         auto pstart = p;
2603         bool hex = false;
2604         dchar c = *p++;
2605         // Leading '0x'
2606         if (c == '0')
2607         {
2608             c = *p++;
2609             if (c == 'x' || c == 'X')
2610             {
2611                 hex = true;
2612                 c = *p++;
2613             }
2614         }
2615         // Digits to left of '.'
2616         while (1)
2617         {
2618             if (c == '.')
2619             {
2620                 c = *p++;
2621                 break;
2622             }
2623             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2624             {
2625                 c = *p++;
2626                 continue;
2627             }
2628             break;
2629         }
2630         // Digits to right of '.'
2631         while (1)
2632         {
2633             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2634             {
2635                 c = *p++;
2636                 continue;
2637             }
2638             break;
2639         }
2640         if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2641         {
2642             c = *p++;
2643             if (c == '-' || c == '+')
2644             {
2645                 c = *p++;
2646             }
2647             bool anyexp = false;
2648             while (1)
2649             {
2650                 if (isdigit(c))
2651                 {
2652                     anyexp = true;
2653                     c = *p++;
2654                     continue;
2655                 }
2656                 if (c == '_')
2657                 {
2658                     if (Ccompile)
2659                         error("embedded `_` in numeric literals not allowed");
2660                     c = *p++;
2661                     continue;
2662                 }
2663                 if (!anyexp)
2664                 {
2665                     error("missing exponent");
2666                     isWellformedString = false;
2667                 }
2668                 break;
2669             }
2670         }
2671         else if (hex)
2672         {
2673             error("exponent required for hex float");
2674             isWellformedString = false;
2675         }
2676         --p;
2677         while (pstart < p)
2678         {
2679             if (*pstart != '_')
2680                 stringbuffer.writeByte(*pstart);
2681             ++pstart;
2682         }
2683         stringbuffer.writeByte(0);
2684         auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
2685         TOK result;
2686         bool isOutOfRange = false;
2687         t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
2688         switch (*p)
2689         {
2690         case 'F':
2691         case 'f':
2692             if (isWellformedString && !isOutOfRange)
2693                 isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
2694             result = TOK.float32Literal;
2695             p++;
2696             break;
2697         default:
2698             if (isWellformedString && !isOutOfRange)
2699                 isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
2700             result = TOK.float64Literal;
2701             break;
2702         case 'l':
2703             if (!Ccompile)
2704                 error("use 'L' suffix instead of 'l'");
2705             goto case 'L';
2706         case 'L':
2707             ++p;
2708             if (Ccompile && long_doublesize == 8)
2709                 goto default;
2710             result = TOK.float80Literal;
2711             break;
2712         }
2713         if ((*p == 'i' || *p == 'I') && !Ccompile)
2714         {
2715             if (*p == 'I')
2716                 error("use 'i' suffix instead of 'I'");
2717             p++;
2718             switch (result)
2719             {
2720             case TOK.float32Literal:
2721                 result = TOK.imaginary32Literal;
2722                 break;
2723             case TOK.float64Literal:
2724                 result = TOK.imaginary64Literal;
2725                 break;
2726             case TOK.float80Literal:
2727                 result = TOK.imaginary80Literal;
2728                 break;
2729             default:
2730                 break;
2731             }
2732         }
2733         const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
2734         if (isOutOfRange && !isLong)
2735         {
2736             const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
2737             error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
2738         }
2739         debug
2740         {
2741             switch (result)
2742             {
2743             case TOK.float32Literal:
2744             case TOK.float64Literal:
2745             case TOK.float80Literal:
2746             case TOK.imaginary32Literal:
2747             case TOK.imaginary64Literal:
2748             case TOK.imaginary80Literal:
2749                 break;
2750             default:
2751                 assert(0);
2752             }
2753         }
2754         return result;
2755     }
2756
2757     final Loc loc() pure @nogc
2758     {
2759         scanloc.charnum = cast(uint)(1 + p - line);
2760         version (LocOffset)
2761             scanloc.fileOffset = cast(uint)(p - base);
2762         return scanloc;
2763     }
2764
2765     final void error(const(char)* format, ...)
2766     {
2767         va_list args;
2768         va_start(args, format);
2769         .verror(token.loc, format, args);
2770         va_end(args);
2771     }
2772
2773     final void error(const ref Loc loc, const(char)* format, ...)
2774     {
2775         va_list args;
2776         va_start(args, format);
2777         .verror(loc, format, args);
2778         va_end(args);
2779     }
2780
2781     final void deprecation(const(char)* format, ...)
2782     {
2783         va_list args;
2784         va_start(args, format);
2785         .vdeprecation(token.loc, format, args);
2786         va_end(args);
2787     }
2788
2789     /*********************************************
2790      * Parse line/file preprocessor directive:
2791      *    #line linnum [filespec]
2792      * Allow __LINE__ for linnum, and __FILE__ for filespec.
2793      * Accept linemarker format:
2794      *    # linnum [filespec] {flags}
2795      * There can be zero or more flags, which are one of the digits 1..4, and
2796      * must be in ascending order. The flags are ignored.
2797      * Params:
2798      *  tok = token we're on, which is linnum of linemarker
2799      *  linemarker = true if line marker format and lexer is on linnum
2800      * References:
2801      *  linemarker https://gcc.gnu.org/onlinedocs/gcc-11.1.0/cpp/Preprocessor-Output.html
2802      */
2803     private void poundLine(ref Token tok, bool linemarker)
2804     {
2805         auto linnum = this.scanloc.linnum;
2806         const(char)* filespec = null;
2807         const loc = this.loc();
2808         bool flags;
2809
2810         if (!linemarker)
2811             scan(&tok);
2812         if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
2813         {
2814             const lin = cast(int)(tok.unsvalue - 1);
2815             if (lin != tok.unsvalue - 1)
2816                 error("line number `%lld` out of range", cast(ulong)tok.unsvalue);
2817             else
2818                 linnum = lin;
2819         }
2820         else if (tok.value == TOK.line)  // #line __LINE__
2821         {
2822         }
2823         else
2824             goto Lerr;
2825         while (1)
2826         {
2827             switch (*p)
2828             {
2829             case 0:
2830             case 0x1A:
2831             case '\n':
2832             Lnewline:
2833                 if (!inTokenStringConstant)
2834                 {
2835                     this.scanloc.linnum = linnum;
2836                     if (filespec)
2837                         this.scanloc.filename = filespec;
2838                 }
2839                 return;
2840             case '\r':
2841                 p++;
2842                 if (*p != '\n')
2843                 {
2844                     p--;
2845                     goto Lnewline;
2846                 }
2847                 continue;
2848             case ' ':
2849             case '\t':
2850             case '\v':
2851             case '\f':
2852                 p++;
2853                 continue; // skip white space
2854             case '_':
2855                 if (filespec || flags)
2856                     goto Lerr;
2857                 if (memcmp(p, "__FILE__".ptr, 8) == 0)
2858                 {
2859                     p += 8;
2860                     filespec = mem.xstrdup(scanloc.filename);
2861                     continue;
2862                 }
2863                 goto Lerr;
2864             case '"':
2865                 if (filespec || flags)
2866                     goto Lerr;
2867                 stringbuffer.setsize(0);
2868                 p++;
2869                 while (1)
2870                 {
2871                     uint c;
2872                     c = *p;
2873                     switch (c)
2874                     {
2875                     case '\n':
2876                     case '\r':
2877                     case 0:
2878                     case 0x1A:
2879                         goto Lerr;
2880                     case '"':
2881                         stringbuffer.writeByte(0);
2882                         filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr);
2883                         p++;
2884                         break;
2885                     default:
2886                         if (c & 0x80)
2887                         {
2888                             uint u = decodeUTF();
2889                             if (u == PS || u == LS)
2890                                 goto Lerr;
2891                         }
2892                         stringbuffer.writeByte(c);
2893                         p++;
2894                         continue;
2895                     }
2896                     break;
2897                 }
2898                 continue;
2899
2900             case '1':
2901             case '2':
2902             case '3':
2903             case '4':
2904                 flags = true;   // linemarker flags seen
2905                 ++p;
2906                 if ('0' <= *p && *p <= '9')
2907                     goto Lerr;  // only one digit allowed
2908                 continue;
2909
2910             default:
2911                 if (*p & 0x80)
2912                 {
2913                     uint u = decodeUTF();
2914                     if (u == PS || u == LS)
2915                         goto Lnewline;
2916                 }
2917                 goto Lerr;
2918             }
2919         }
2920     Lerr:
2921         if (linemarker)
2922             error(loc, "# integer [\"filespec\"] { 1 | 2 | 3 | 4 }\\n expected");
2923         else
2924             error(loc, "#line integer [\"filespec\"]\\n expected");
2925     }
2926
2927     /*********************************************
2928      * C11 6.10.6 Pragma directive
2929      * # pragma pp-tokens(opt) new-line
2930      * The C preprocessor sometimes leaves pragma directives in
2931      * the preprocessed output. Ignore them.
2932      * Upon return, p is at start of next line.
2933      */
2934     private void pragmaDirective(const ref Loc loc)
2935     {
2936         Token n;
2937         scan(&n);
2938         if (n.value == TOK.identifier && n.ident == Id.pack)
2939             return pragmaPack(loc);
2940         skipToNextLine();
2941     }
2942
2943     /*********
2944      * ImportC
2945      * # pragma pack
2946      * https://gcc.gnu.org/onlinedocs/gcc-4.4.4/gcc/Structure_002dPacking-Pragmas.html
2947      * https://docs.microsoft.com/en-us/cpp/preprocessor/pack
2948      * Scanner is on the `pack`
2949      * Params:
2950      *  startloc = location to use for error messages
2951      */
2952     private void pragmaPack(const ref Loc startloc)
2953     {
2954         const loc = startloc;
2955         Token n;
2956         scan(&n);
2957         if (n.value != TOK.leftParenthesis)
2958         {
2959             error(loc, "left parenthesis expected to follow `#pragma pack`");
2960             skipToNextLine();
2961             return;
2962         }
2963
2964         void closingParen()
2965         {
2966             if (n.value != TOK.rightParenthesis)
2967             {
2968                 error(loc, "right parenthesis expected to close `#pragma pack(`");
2969             }
2970             skipToNextLine();
2971         }
2972
2973         void setPackAlign(ref const Token t)
2974         {
2975             const n = t.unsvalue;
2976             if (n < 1 || n & (n - 1) || ushort.max < n)
2977                 error(loc, "pack must be an integer positive power of 2, not 0x%llx", cast(ulong)n);
2978             packalign.set(cast(uint)n);
2979             packalign.setPack(true);
2980         }
2981
2982         scan(&n);
2983
2984         if (!records)
2985         {
2986             records = new Array!Identifier;
2987             packs = new Array!structalign_t;
2988         }
2989
2990         /* # pragma pack ( show )
2991          */
2992         if (n.value == TOK.identifier && n.ident == Id.show)
2993         {
2994             if (packalign.isDefault())
2995                 warning(startloc, "current pack attribute is default");
2996             else
2997                 warning(startloc, "current pack attribute is %d", packalign.get());
2998             scan(&n);
2999             return closingParen();
3000         }
3001         /* # pragma pack ( push )
3002          * # pragma pack ( push , identifier )
3003          * # pragma pack ( push , integer )
3004          * # pragma pack ( push , identifier , integer )
3005          */
3006         if (n.value == TOK.identifier && n.ident == Id.push)
3007         {
3008             scan(&n);
3009             Identifier record = null;
3010             if (n.value == TOK.comma)
3011             {
3012                 scan(&n);
3013                 if (n.value == TOK.identifier)
3014                 {
3015                     record = n.ident;
3016                     scan(&n);
3017                     if (n.value == TOK.comma)
3018                     {
3019                         scan(&n);
3020                         if (n.value == TOK.int32Literal)
3021                         {
3022                             setPackAlign(n);
3023                             scan(&n);
3024                         }
3025                         else
3026                             error(loc, "alignment value expected, not `%s`", n.toChars());
3027                     }
3028                 }
3029                 else if (n.value == TOK.int32Literal)
3030                 {
3031                     setPackAlign(n);
3032                     scan(&n);
3033                 }
3034                 else
3035                     error(loc, "alignment value expected, not `%s`", n.toChars());
3036             }
3037             this.records.push(record);
3038             this.packs.push(packalign);
3039             return closingParen();
3040         }
3041         /* # pragma pack ( pop )
3042          * # pragma pack ( pop PopList )
3043          * PopList :
3044          *    , IdentifierOrInteger
3045          *    , IdentifierOrInteger PopList
3046          * IdentifierOrInteger:
3047          *      identifier
3048          *      integer
3049          */
3050         if (n.value == TOK.identifier && n.ident == Id.pop)
3051         {
3052             scan(&n);
3053             while (n.value == TOK.comma)
3054             {
3055                 scan(&n);
3056                 if (n.value == TOK.identifier)
3057                 {
3058                     for (size_t len = this.records.length; len; --len)
3059                     {
3060                         if ((*this.records)[len - 1] == n.ident)
3061                         {
3062                             packalign = (*this.packs)[len - 1];
3063                             this.records.setDim(len - 1);
3064                             this.packs.setDim(len - 1);
3065                             break;
3066                         }
3067                     }
3068                     scan(&n);
3069                 }
3070                 else if (n.value == TOK.int32Literal)
3071                 {
3072                     setPackAlign(n);
3073                     this.records.push(null);
3074                     this.packs.push(packalign);
3075                     scan(&n);
3076                 }
3077             }
3078             return closingParen();
3079         }
3080         /* # pragma pack ( integer )
3081          */
3082         if (n.value == TOK.int32Literal)
3083         {
3084             setPackAlign(n);
3085             scan(&n);
3086             return closingParen();
3087         }
3088         /* # pragma pack ( )
3089          */
3090         if (n.value == TOK.rightParenthesis)
3091         {
3092             packalign.setDefault();
3093             return closingParen();
3094         }
3095
3096         error(loc, "unrecognized `#pragma pack(%s)`", n.toChars());
3097         skipToNextLine();
3098     }
3099
3100     /***************************************
3101      * Scan forward to start of next line.
3102      */
3103     private void skipToNextLine()
3104     {
3105         while (1)
3106         {
3107             switch (*p)
3108             {
3109             case 0:
3110             case 0x1A:
3111                 return; // do not advance p
3112
3113             case '\n':
3114                 ++p;
3115                 break;
3116
3117             case '\r':
3118                 ++p;
3119                 if (p[0] == '\n')
3120                    ++p;
3121                 break;
3122
3123             default:
3124                 if (*p & 0x80)
3125                 {
3126                     const u = decodeUTF();
3127                     if (u == PS || u == LS)
3128                     {
3129                         ++p;
3130                         break;
3131                     }
3132                 }
3133                 ++p;
3134                 continue;
3135             }
3136             break;
3137         }
3138         endOfLine();
3139     }
3140
3141     /********************************************
3142      * Decode UTF character.
3143      * Issue error messages for invalid sequences.
3144      * Return decoded character, advance p to last character in UTF sequence.
3145      */
3146     private uint decodeUTF()
3147     {
3148         const s = p;
3149         assert(*s & 0x80);
3150         // Check length of remaining string up to 4 UTF-8 characters
3151         size_t len;
3152         for (len = 1; len < 4 && s[len]; len++)
3153         {
3154         }
3155         size_t idx = 0;
3156         dchar u;
3157         const msg = utf_decodeChar(s[0 .. len], idx, u);
3158         p += idx - 1;
3159         if (msg)
3160         {
3161             error("%.*s", cast(int)msg.length, msg.ptr);
3162         }
3163         return u;
3164     }
3165
3166     /***************************************************
3167      * Parse doc comment embedded between t.ptr and p.
3168      * Remove trailing blanks and tabs from lines.
3169      * Replace all newlines with \n.
3170      * Remove leading comment character from each line.
3171      * Decide if it's a lineComment or a blockComment.
3172      * Append to previous one for this token.
3173      *
3174      * If newParagraph is true, an extra newline will be
3175      * added between adjoining doc comments.
3176      */
3177     private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
3178     {
3179         /* ct tells us which kind of comment it is: '/', '*', or '+'
3180          */
3181         const ct = t.ptr[2];
3182         /* Start of comment text skips over / * *, / + +, or / / /
3183          */
3184         const(char)* q = t.ptr + 3; // start of comment text
3185         const(char)* qend = p;
3186         if (ct == '*' || ct == '+')
3187             qend -= 2;
3188         /* Scan over initial row of ****'s or ++++'s or ////'s
3189          */
3190         for (; q < qend; q++)
3191         {
3192             if (*q != ct)
3193                 break;
3194         }
3195         /* Remove leading spaces until start of the comment
3196          */
3197         int linestart = 0;
3198         if (ct == '/')
3199         {
3200             while (q < qend && (*q == ' ' || *q == '\t'))
3201                 ++q;
3202         }
3203         else if (q < qend)
3204         {
3205             if (*q == '\r')
3206             {
3207                 ++q;
3208                 if (q < qend && *q == '\n')
3209                     ++q;
3210                 linestart = 1;
3211             }
3212             else if (*q == '\n')
3213             {
3214                 ++q;
3215                 linestart = 1;
3216             }
3217         }
3218         /* Remove trailing row of ****'s or ++++'s
3219          */
3220         if (ct != '/')
3221         {
3222             for (; q < qend; qend--)
3223             {
3224                 if (qend[-1] != ct)
3225                     break;
3226             }
3227         }
3228         /* Comment is now [q .. qend].
3229          * Canonicalize it into buf[].
3230          */
3231         OutBuffer buf;
3232
3233         void trimTrailingWhitespace()
3234         {
3235             const s = buf[];
3236             auto len = s.length;
3237             while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
3238                 --len;
3239             buf.setsize(len);
3240         }
3241
3242         for (; q < qend; q++)
3243         {
3244             char c = *q;
3245             switch (c)
3246             {
3247             case '*':
3248             case '+':
3249                 if (linestart && c == ct)
3250                 {
3251                     linestart = 0;
3252                     /* Trim preceding whitespace up to preceding \n
3253                      */
3254                     trimTrailingWhitespace();
3255                     continue;
3256                 }
3257                 break;
3258             case ' ':
3259             case '\t':
3260                 break;
3261             case '\r':
3262                 if (q[1] == '\n')
3263                     continue; // skip the \r
3264                 goto Lnewline;
3265             default:
3266                 if (c == 226)
3267                 {
3268                     // If LS or PS
3269                     if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
3270                     {
3271                         q += 2;
3272                         goto Lnewline;
3273                     }
3274                 }
3275                 linestart = 0;
3276                 break;
3277             Lnewline:
3278                 c = '\n'; // replace all newlines with \n
3279                 goto case;
3280             case '\n':
3281                 linestart = 1;
3282                 /* Trim trailing whitespace
3283                  */
3284                 trimTrailingWhitespace();
3285                 break;
3286             }
3287             buf.writeByte(c);
3288         }
3289         /* Trim trailing whitespace (if the last line does not have newline)
3290          */
3291         trimTrailingWhitespace();
3292
3293         // Always end with a newline
3294         const s = buf[];
3295         if (s.length == 0 || s[$ - 1] != '\n')
3296             buf.writeByte('\n');
3297
3298         // It's a line comment if the start of the doc comment comes
3299         // after other non-whitespace on the same line.
3300         auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
3301         // Combine with previous doc comment, if any
3302         if (*dc)
3303             *dc = combineComments(*dc, buf[], newParagraph).toDString();
3304         else
3305             *dc = buf.extractSlice(true);
3306     }
3307
3308     /********************************************
3309      * Combine two document comments into one,
3310      * separated by an extra newline if newParagraph is true.
3311      */
3312     static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
3313     {
3314         //printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
3315         const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
3316         if (!c1)
3317             return c2.ptr;
3318         if (!c2)
3319             return c1.ptr;
3320
3321         int insertNewLine = 0;
3322         if (c1.length && c1[$ - 1] != '\n')
3323             insertNewLine = 1;
3324         const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
3325         auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
3326         p[0 .. c1.length] = c1[];
3327         if (insertNewLine)
3328             p[c1.length] = '\n';
3329         if (newParagraph)
3330             p[c1.length + insertNewLine] = '\n';
3331         p[retSize - c2.length .. retSize] = c2[];
3332         p[retSize] = 0;
3333         return p;
3334     }
3335
3336     /**************************
3337      * `p` should be at start of next line
3338      */
3339     private void endOfLine() pure @nogc @safe
3340     {
3341         scanloc.linnum++;
3342         line = p;
3343     }
3344 }
3345
3346 /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
3347 private struct TimeStampInfo
3348 {
3349     private __gshared bool initdone = false;
3350
3351     // Note: Those properties need to be guarded by a call to `init`
3352     // The API isn't safe, and quite brittle, but it was left this way
3353     // over performance concerns.
3354     // This is currently only called once, from the lexer.
3355     __gshared char[11 + 1] date;
3356     __gshared char[8 + 1] time;
3357     __gshared char[24 + 1] timestamp;
3358
3359     public static void initialize(const ref Loc loc) nothrow
3360     {
3361         if (initdone)
3362             return;
3363
3364         initdone = true;
3365         time_t ct;
3366         // https://issues.dlang.org/show_bug.cgi?id=20444
3367         if (auto p = getenv("SOURCE_DATE_EPOCH"))
3368         {
3369             if (!ct.parseDigits(p.toDString()))
3370                 error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
3371         }
3372         else
3373             .time(&ct);
3374         const p = ctime(&ct);
3375         assert(p);
3376         sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
3377         sprintf(&time[0], "%.8s", p + 11);
3378         sprintf(&timestamp[0], "%.24s", p);
3379     }
3380 }
3381
3382 unittest
3383 {
3384     import dmd.console;
3385     nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
3386                                    const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
3387     {
3388         assert(0);
3389     }
3390     diagnosticHandler = &assertDiagnosticHandler;
3391
3392     static void test(T)(string sequence, T expected, bool Ccompile = false)
3393     {
3394         auto p = cast(const(char)*)sequence.ptr;
3395         assert(expected == Lexer.escapeSequence(Loc.initial, p, Ccompile));
3396         assert(p == sequence.ptr + sequence.length);
3397     }
3398
3399     test(`'`, '\'');
3400     test(`"`, '"');
3401     test(`?`, '?');
3402     test(`\`, '\\');
3403     test(`0`, '\0');
3404     test(`a`, '\a');
3405     test(`b`, '\b');
3406     test(`f`, '\f');
3407     test(`n`, '\n');
3408     test(`r`, '\r');
3409     test(`t`, '\t');
3410     test(`v`, '\v');
3411
3412     test(`x00`, 0x00);
3413     test(`xff`, 0xff);
3414     test(`xFF`, 0xff);
3415     test(`xa7`, 0xa7);
3416     test(`x3c`, 0x3c);
3417     test(`xe2`, 0xe2);
3418
3419     test(`1`, '\1');
3420     test(`42`, '\42');
3421     test(`357`, '\357');
3422
3423     test(`u1234`, '\u1234');
3424     test(`uf0e4`, '\uf0e4');
3425
3426     test(`U0001f603`, '\U0001f603');
3427
3428     test(`&quot;`, '"');
3429     test(`&lt;`, '<');
3430     test(`&gt;`, '>');
3431
3432     diagnosticHandler = null;
3433 }
3434 unittest
3435 {
3436     import dmd.console;
3437     string expected;
3438     bool gotError;
3439
3440     nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
3441                                          const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
3442     {
3443         assert(cast(Classification)headerColor == Classification.error);
3444
3445         gotError = true;
3446         char[100] buffer = void;
3447         auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
3448         assert(expected == actual);
3449         return true;
3450     }
3451
3452     diagnosticHandler = &expectDiagnosticHandler;
3453
3454     void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength, bool Ccompile = false)
3455     {
3456         uint errors = global.errors;
3457         gotError = false;
3458         expected = expectedError;
3459         auto p = cast(const(char)*)sequence.ptr;
3460         auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p, Ccompile);
3461         assert(gotError);
3462         assert(expectedReturnValue == actualReturnValue);
3463
3464         auto actualScanLength = p - sequence.ptr;
3465         assert(expectedScanLength == actualScanLength);
3466         global.errors = errors;
3467     }
3468
3469     test("c", `undefined escape sequence \c`, 'c', 1);
3470     test("!", `undefined escape sequence \!`, '!', 1);
3471     test("&quot;", `undefined escape sequence \&`, '&', 1, true);
3472
3473     test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
3474
3475     test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
3476     test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
3477     test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
3478
3479     test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
3480     test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
3481     test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
3482     test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
3483     test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
3484     test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
3485     test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
3486
3487     test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
3488     test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
3489     test("U00110000", `invalid UTF character \U00110000`, '?', 9);
3490
3491     test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
3492     test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
3493     test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
3494
3495     test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
3496     test("&quot", `unterminated named entity &quot;`, '?', 5);
3497     test("&quot", `unterminated named entity &quot;`, '?', 5);
3498
3499     test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
3500
3501     diagnosticHandler = null;
3502 }