d: Merge upstream dmd 568496d5b, druntime 178c44ff, phobos 574bf883b.
[official-gcc.git] / gcc / d / dmd / lexer.d
blobe2b4199b80a23952120bd5c9ff82df8862325580
1 /**
2 * Implements the lexical analyzer, which converts source code into lexical tokens.
4 * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
6 * Copyright: Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright)
8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
10 * Documentation: https://dlang.org/phobos/dmd_lexer.html
11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
14 module dmd.lexer;
16 import core.stdc.ctype;
17 import core.stdc.errno;
18 import core.stdc.stdarg;
19 import core.stdc.stdio;
20 import core.stdc.stdlib : getenv;
21 import core.stdc.string;
22 import core.stdc.time;
24 import dmd.entity;
25 import dmd.errors;
26 import dmd.globals;
27 import dmd.id;
28 import dmd.identifier;
29 import dmd.root.array;
30 import dmd.root.ctfloat;
31 import dmd.common.outbuffer;
32 import dmd.root.port;
33 import dmd.root.rmem;
34 import dmd.root.string;
35 import dmd.tokens;
36 import dmd.utf;
37 import dmd.utils;
39 nothrow:
41 private enum LS = 0x2028; // UTF line separator
42 private enum PS = 0x2029; // UTF paragraph separator
44 /********************************************
45 * Do our own char maps
47 private static immutable cmtable = () {
48 ubyte[256] table;
49 foreach (const c; 0 .. table.length)
51 if ('0' <= c && c <= '7')
52 table[c] |= CMoctal;
53 if (c_isxdigit(c))
54 table[c] |= CMhex;
55 if (c_isalnum(c) || c == '_')
56 table[c] |= CMidchar;
58 switch (c)
60 case 'x': case 'X':
61 case 'b': case 'B':
62 table[c] |= CMzerosecond;
63 break;
65 case '0': .. case '9':
66 case 'e': case 'E':
67 case 'f': case 'F':
68 case 'l': case 'L':
69 case 'p': case 'P':
70 case 'u': case 'U':
71 case 'i':
72 case '.':
73 case '_':
74 table[c] |= CMzerosecond | CMdigitsecond;
75 break;
77 default:
78 break;
81 switch (c)
83 case '\\':
84 case '\n':
85 case '\r':
86 case 0:
87 case 0x1A:
88 case '\'':
89 break;
90 default:
91 if (!(c & 0x80))
92 table[c] |= CMsinglechar;
93 break;
96 return table;
97 }();
99 private
101 enum CMoctal = 0x1;
102 enum CMhex = 0x2;
103 enum CMidchar = 0x4;
104 enum CMzerosecond = 0x8;
105 enum CMdigitsecond = 0x10;
106 enum CMsinglechar = 0x20;
109 private bool isoctal(const char c) pure @nogc @safe
111 return (cmtable[c] & CMoctal) != 0;
114 private bool ishex(const char c) pure @nogc @safe
116 return (cmtable[c] & CMhex) != 0;
119 private bool isidchar(const char c) pure @nogc @safe
121 return (cmtable[c] & CMidchar) != 0;
124 private bool isZeroSecond(const char c) pure @nogc @safe
126 return (cmtable[c] & CMzerosecond) != 0;
129 private bool isDigitSecond(const char c) pure @nogc @safe
131 return (cmtable[c] & CMdigitsecond) != 0;
134 private bool issinglechar(const char c) pure @nogc @safe
136 return (cmtable[c] & CMsinglechar) != 0;
139 private bool c_isxdigit(const int c) pure @nogc @safe
141 return (( c >= '0' && c <= '9') ||
142 ( c >= 'a' && c <= 'f') ||
143 ( c >= 'A' && c <= 'F'));
146 private bool c_isalnum(const int c) pure @nogc @safe
148 return (( c >= '0' && c <= '9') ||
149 ( c >= 'a' && c <= 'z') ||
150 ( c >= 'A' && c <= 'Z'));
153 unittest
155 //printf("lexer.unittest\n");
156 /* Not much here, just trying things out.
158 string text = "int"; // We rely on the implicit null-terminator
159 scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0);
160 TOK tok;
161 tok = lex1.nextToken();
162 //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
163 assert(tok == TOK.int32);
164 tok = lex1.nextToken();
165 assert(tok == TOK.endOfFile);
166 tok = lex1.nextToken();
167 assert(tok == TOK.endOfFile);
168 tok = lex1.nextToken();
169 assert(tok == TOK.endOfFile);
172 unittest
174 // We don't want to see Lexer error output during these tests.
175 uint errors = global.startGagging();
176 scope(exit) global.endGagging(errors);
178 // Test malformed input: even malformed input should end in a TOK.endOfFile.
179 static immutable char[][] testcases =
180 [ // Testcase must end with 0 or 0x1A.
181 [0], // not malformed, but pathological
182 ['\'', 0],
183 ['\'', 0x1A],
184 ['{', '{', 'q', '{', 0],
185 [0xFF, 0],
186 [0xFF, 0x80, 0],
187 [0xFF, 0xFF, 0],
188 [0xFF, 0xFF, 0],
189 ['x', '"', 0x1A],
192 foreach (testcase; testcases)
194 scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0);
195 TOK tok = lex2.nextToken();
196 size_t iterations = 1;
197 while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
199 tok = lex2.nextToken();
201 assert(tok == TOK.endOfFile);
202 tok = lex2.nextToken();
203 assert(tok == TOK.endOfFile);
207 version (DMDLIB)
209 version = LocOffset;
212 /***********************************************************
214 class Lexer
216 private __gshared OutBuffer stringbuffer;
218 Loc scanloc; // for error messages
219 Loc prevloc; // location of token before current
221 const(char)* p; // current character
223 Token token;
225 // For ImportC
226 bool Ccompile; /// true if compiling ImportC
228 // The following are valid only if (Ccompile == true)
229 ubyte longsize; /// size of C long, 4 or 8
230 ubyte long_doublesize; /// size of C long double, 8 or D real.sizeof
231 ubyte wchar_tsize; /// size of C wchar_t, 2 or 4
233 structalign_t packalign; /// current state of #pragma pack alignment (ImportC)
235 private
237 const(char)* base; // pointer to start of buffer
238 const(char)* end; // pointer to last element of buffer
239 const(char)* line; // start of current line
241 bool doDocComment; // collect doc comment information
242 bool anyToken; // seen at least one token
243 bool commentToken; // comments are TOK.comment's
244 int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
245 int lastDocLine; // last line of previous doc comment
247 Token* tokenFreelist;
249 // ImportC #pragma pack stack
250 Array!Identifier* records; // identifers (or null)
251 Array!structalign_t* packs; // parallel alignment values
254 nothrow:
256 /*********************
257 * Creates a Lexer for the source code base[begoffset..endoffset+1].
258 * The last character, base[endoffset], must be null (0) or EOF (0x1A).
260 * Params:
261 * filename = used for error messages
262 * base = source code, must be terminated by a null (0) or EOF (0x1A) character
263 * begoffset = starting offset into base[]
264 * endoffset = the last offset to read into base[]
265 * doDocComment = handle documentation comments
266 * commentToken = comments become TOK.comment's
268 this(const(char)* filename, const(char)* base, size_t begoffset,
269 size_t endoffset, bool doDocComment, bool commentToken) pure
271 scanloc = Loc(filename, 1, 1);
272 //printf("Lexer::Lexer(%p,%d)\n",base,length);
273 //printf("lexer.filename = %s\n", filename);
274 token = Token.init;
275 this.base = base;
276 this.end = base + endoffset;
277 p = base + begoffset;
278 line = p;
279 this.doDocComment = doDocComment;
280 this.commentToken = commentToken;
281 this.inTokenStringConstant = 0;
282 this.lastDocLine = 0;
283 this.packalign.setDefault();
284 //initKeywords();
285 /* If first line starts with '#!', ignore the line
287 if (p && p[0] == '#' && p[1] == '!')
289 p += 2;
290 while (1)
292 char c = *p++;
293 switch (c)
295 case 0:
296 case 0x1A:
297 p--;
298 goto case;
299 case '\n':
300 break;
301 default:
302 continue;
304 break;
306 endOfLine();
310 /// Returns: a newly allocated `Token`.
311 Token* allocateToken() pure nothrow @safe
313 if (tokenFreelist)
315 Token* t = tokenFreelist;
316 tokenFreelist = t.next;
317 t.next = null;
318 return t;
320 return new Token();
323 /// Frees the given token by returning it to the freelist.
324 private void releaseToken(Token* token) pure nothrow @nogc @safe
326 if (mem.isGCEnabled)
327 *token = Token.init;
328 token.next = tokenFreelist;
329 tokenFreelist = token;
332 final TOK nextToken()
334 prevloc = token.loc;
335 if (token.next)
337 Token* t = token.next;
338 memcpy(&token, t, Token.sizeof);
339 releaseToken(t);
341 else
343 scan(&token);
345 //printf(token.toChars());
346 return token.value;
349 /***********************
350 * Look ahead at next token's value.
352 final TOK peekNext()
354 return peek(&token).value;
357 /***********************
358 * Look 2 tokens ahead at value.
360 final TOK peekNext2()
362 Token* t = peek(&token);
363 return peek(t).value;
366 /****************************
367 * Turn next token in buffer into a token.
369 final void scan(Token* t)
371 const lastLine = scanloc.linnum;
372 Loc startLoc;
373 t.blockComment = null;
374 t.lineComment = null;
376 while (1)
378 t.ptr = p;
379 //printf("p = %p, *p = '%c'\n",p,*p);
380 t.loc = loc();
381 switch (*p)
383 case 0:
384 case 0x1A:
385 t.value = TOK.endOfFile; // end of file
386 // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
387 return;
388 case ' ':
389 case '\t':
390 case '\v':
391 case '\f':
392 p++;
393 continue; // skip white space
394 case '\r':
395 p++;
396 if (*p != '\n') // if CR stands by itself
398 endOfLine();
399 goto skipFourSpaces;
401 continue; // skip white space
402 case '\n':
403 p++;
404 endOfLine();
405 skipFourSpaces:
406 while (*(cast(uint*)p) == 0x20202020) //' ' == 0x20
408 p+=4;
410 continue; // skip white space
411 case '0':
412 if (!isZeroSecond(p[1])) // if numeric literal does not continue
414 ++p;
415 t.unsvalue = 0;
416 t.value = TOK.int32Literal;
417 return;
419 goto Lnumber;
421 case '1': .. case '9':
422 if (!isDigitSecond(p[1])) // if numeric literal does not continue
424 t.unsvalue = *p - '0';
425 ++p;
426 t.value = TOK.int32Literal;
427 return;
429 Lnumber:
430 t.value = number(t);
431 return;
433 case '\'':
434 if (issinglechar(p[1]) && p[2] == '\'')
436 t.unsvalue = p[1]; // simple one character literal
437 t.value = Ccompile ? TOK.int32Literal : TOK.charLiteral;
438 p += 3;
440 else if (Ccompile)
442 clexerCharConstant(*t, 0);
444 else
446 t.value = charConstant(t);
448 return;
450 case 'u':
451 case 'U':
452 case 'L':
453 if (!Ccompile)
454 goto case_ident;
455 if (p[1] == '\'') // C wide character constant
457 char c = *p;
458 if (c == 'L') // convert L to u or U
459 c = (wchar_tsize == 4) ? 'u' : 'U';
460 ++p;
461 clexerCharConstant(*t, c);
462 return;
464 else if (p[1] == '\"') // C wide string literal
466 const c = *p;
467 ++p;
468 escapeStringConstant(t);
469 t.postfix = c == 'L' ? (wchar_tsize == 2 ? 'w' : 'd') :
470 c == 'u' ? 'w' :
471 'd';
472 return;
474 goto case_ident;
476 case 'r':
477 if (p[1] != '"')
478 goto case_ident;
479 p++;
480 goto case '`';
481 case '`':
482 wysiwygStringConstant(t);
483 return;
484 case 'x':
485 if (p[1] != '"')
486 goto case_ident;
487 p++;
488 auto start = p;
489 OutBuffer hexString;
490 t.value = hexStringConstant(t);
491 hexString.write(start[0 .. p - start]);
492 error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars());
493 return;
494 case 'q':
495 if (p[1] == '"')
497 p++;
498 delimitedStringConstant(t);
499 return;
501 else if (p[1] == '{')
503 p++;
504 tokenStringConstant(t);
505 return;
507 else
508 goto case_ident;
509 case '"':
510 escapeStringConstant(t);
511 return;
512 case 'a':
513 case 'b':
514 case 'c':
515 case 'd':
516 case 'e':
517 case 'f':
518 case 'g':
519 case 'h':
520 case 'i':
521 case 'j':
522 case 'k':
523 case 'l':
524 case 'm':
525 case 'n':
526 case 'o':
527 case 'p':
528 /*case 'q': case 'r':*/
529 case 's':
530 case 't':
531 //case 'u':
532 case 'v':
533 case 'w':
534 /*case 'x':*/
535 case 'y':
536 case 'z':
537 case 'A':
538 case 'B':
539 case 'C':
540 case 'D':
541 case 'E':
542 case 'F':
543 case 'G':
544 case 'H':
545 case 'I':
546 case 'J':
547 case 'K':
548 //case 'L':
549 case 'M':
550 case 'N':
551 case 'O':
552 case 'P':
553 case 'Q':
554 case 'R':
555 case 'S':
556 case 'T':
557 //case 'U':
558 case 'V':
559 case 'W':
560 case 'X':
561 case 'Y':
562 case 'Z':
563 case '_':
564 case_ident:
566 while (1)
568 const c = *++p;
569 if (isidchar(c))
570 continue;
571 else if (c & 0x80)
573 const s = p;
574 const u = decodeUTF();
575 if (isUniAlpha(u))
576 continue;
577 error("char 0x%04x not allowed in identifier", u);
578 p = s;
580 break;
582 Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
583 t.ident = id;
584 t.value = cast(TOK)id.getValue();
586 anyToken = 1;
588 /* Different keywords for C and D
590 if (Ccompile)
592 if (t.value != TOK.identifier)
594 t.value = Ckeywords[t.value]; // filter out D keywords
597 else if (t.value >= FirstCKeyword)
598 t.value = TOK.identifier; // filter out C keywords
600 else if (*t.ptr == '_') // if special identifier token
602 // Lazy initialization
603 TimeStampInfo.initialize(t.loc);
605 if (id == Id.DATE)
607 t.ustring = TimeStampInfo.date.ptr;
608 goto Lstr;
610 else if (id == Id.TIME)
612 t.ustring = TimeStampInfo.time.ptr;
613 goto Lstr;
615 else if (id == Id.VENDOR)
617 t.ustring = global.vendor.xarraydup.ptr;
618 goto Lstr;
620 else if (id == Id.TIMESTAMP)
622 t.ustring = TimeStampInfo.timestamp.ptr;
623 Lstr:
624 t.value = TOK.string_;
625 t.postfix = 0;
626 t.len = cast(uint)strlen(t.ustring);
628 else if (id == Id.VERSIONX)
630 t.value = TOK.int64Literal;
631 t.unsvalue = global.versionNumber();
633 else if (id == Id.EOFX)
635 t.value = TOK.endOfFile;
636 // Advance scanner to end of file
637 while (!(*p == 0 || *p == 0x1A))
638 p++;
641 //printf("t.value = %d\n",t.value);
642 return;
644 case '/':
645 p++;
646 switch (*p)
648 case '=':
649 p++;
650 t.value = TOK.divAssign;
651 return;
652 case '*':
653 p++;
654 startLoc = loc();
655 while (1)
657 while (1)
659 const c = *p;
660 switch (c)
662 case '/':
663 break;
664 case '\n':
665 endOfLine();
666 p++;
667 continue;
668 case '\r':
669 p++;
670 if (*p != '\n')
671 endOfLine();
672 continue;
673 case 0:
674 case 0x1A:
675 error("unterminated /* */ comment");
676 p = end;
677 t.loc = loc();
678 t.value = TOK.endOfFile;
679 return;
680 default:
681 if (c & 0x80)
683 const u = decodeUTF();
684 if (u == PS || u == LS)
685 endOfLine();
687 p++;
688 continue;
690 break;
692 p++;
693 if (p[-2] == '*' && p - 3 != t.ptr)
694 break;
696 if (commentToken)
698 t.loc = startLoc;
699 t.value = TOK.comment;
700 return;
702 else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
704 // if /** but not /**/
705 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
706 lastDocLine = scanloc.linnum;
708 continue;
709 case '/': // do // style comments
710 startLoc = loc();
711 while (1)
713 const c = *++p;
714 switch (c)
716 case '\n':
717 break;
718 case '\r':
719 if (p[1] == '\n')
720 p++;
721 break;
722 case 0:
723 case 0x1A:
724 if (commentToken)
726 p = end;
727 t.loc = startLoc;
728 t.value = TOK.comment;
729 return;
731 if (doDocComment && t.ptr[2] == '/')
733 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
734 lastDocLine = scanloc.linnum;
736 p = end;
737 t.loc = loc();
738 t.value = TOK.endOfFile;
739 return;
740 default:
741 if (c & 0x80)
743 const u = decodeUTF();
744 if (u == PS || u == LS)
745 break;
747 continue;
749 break;
751 if (commentToken)
753 p++;
754 endOfLine();
755 t.loc = startLoc;
756 t.value = TOK.comment;
757 return;
759 if (doDocComment && t.ptr[2] == '/')
761 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
762 lastDocLine = scanloc.linnum;
764 p++;
765 endOfLine();
766 continue;
767 case '+':
769 int nest;
770 startLoc = loc();
771 p++;
772 nest = 1;
773 while (1)
775 char c = *p;
776 switch (c)
778 case '/':
779 p++;
780 if (*p == '+')
782 p++;
783 nest++;
785 continue;
786 case '+':
787 p++;
788 if (*p == '/')
790 p++;
791 if (--nest == 0)
792 break;
794 continue;
795 case '\r':
796 p++;
797 if (*p != '\n')
798 endOfLine();
799 continue;
800 case '\n':
801 endOfLine();
802 p++;
803 continue;
804 case 0:
805 case 0x1A:
806 error("unterminated /+ +/ comment");
807 p = end;
808 t.loc = loc();
809 t.value = TOK.endOfFile;
810 return;
811 default:
812 if (c & 0x80)
814 uint u = decodeUTF();
815 if (u == PS || u == LS)
816 endOfLine();
818 p++;
819 continue;
821 break;
823 if (commentToken)
825 t.loc = startLoc;
826 t.value = TOK.comment;
827 return;
829 if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
831 // if /++ but not /++/
832 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
833 lastDocLine = scanloc.linnum;
835 continue;
837 default:
838 break;
840 t.value = TOK.div;
841 return;
842 case '.':
843 p++;
844 if (isdigit(*p))
846 /* Note that we don't allow ._1 and ._ as being
847 * valid floating point numbers.
849 p--;
850 t.value = inreal(t);
852 else if (p[0] == '.')
854 if (p[1] == '.')
856 p += 2;
857 t.value = TOK.dotDotDot;
859 else
861 p++;
862 t.value = TOK.slice;
865 else
866 t.value = TOK.dot;
867 return;
868 case '&':
869 p++;
870 if (*p == '=')
872 p++;
873 t.value = TOK.andAssign;
875 else if (*p == '&')
877 p++;
878 t.value = TOK.andAnd;
880 else
881 t.value = TOK.and;
882 return;
883 case '|':
884 p++;
885 if (*p == '=')
887 p++;
888 t.value = TOK.orAssign;
890 else if (*p == '|')
892 p++;
893 t.value = TOK.orOr;
895 else
896 t.value = TOK.or;
897 return;
898 case '-':
899 p++;
900 if (*p == '=')
902 p++;
903 t.value = TOK.minAssign;
905 else if (*p == '-')
907 p++;
908 t.value = TOK.minusMinus;
910 else if (*p == '>')
912 ++p;
913 t.value = TOK.arrow;
915 else
916 t.value = TOK.min;
917 return;
918 case '+':
919 p++;
920 if (*p == '=')
922 p++;
923 t.value = TOK.addAssign;
925 else if (*p == '+')
927 p++;
928 t.value = TOK.plusPlus;
930 else
931 t.value = TOK.add;
932 return;
933 case '<':
934 p++;
935 if (*p == '=')
937 p++;
938 t.value = TOK.lessOrEqual; // <=
940 else if (*p == '<')
942 p++;
943 if (*p == '=')
945 p++;
946 t.value = TOK.leftShiftAssign; // <<=
948 else
949 t.value = TOK.leftShift; // <<
951 else if (*p == ':' && Ccompile)
953 ++p;
954 t.value = TOK.leftBracket; // <:
956 else if (*p == '%' && Ccompile)
958 ++p;
959 t.value = TOK.leftCurly; // <%
961 else
962 t.value = TOK.lessThan; // <
963 return;
964 case '>':
965 p++;
966 if (*p == '=')
968 p++;
969 t.value = TOK.greaterOrEqual; // >=
971 else if (*p == '>')
973 p++;
974 if (*p == '=')
976 p++;
977 t.value = TOK.rightShiftAssign; // >>=
979 else if (*p == '>')
981 p++;
982 if (*p == '=')
984 p++;
985 t.value = TOK.unsignedRightShiftAssign; // >>>=
987 else
988 t.value = TOK.unsignedRightShift; // >>>
990 else
991 t.value = TOK.rightShift; // >>
993 else
994 t.value = TOK.greaterThan; // >
995 return;
996 case '!':
997 p++;
998 if (*p == '=')
1000 p++;
1001 t.value = TOK.notEqual; // !=
1003 else
1004 t.value = TOK.not; // !
1005 return;
1006 case '=':
1007 p++;
1008 if (*p == '=')
1010 p++;
1011 t.value = TOK.equal; // ==
1013 else if (*p == '>')
1015 p++;
1016 t.value = TOK.goesTo; // =>
1018 else
1019 t.value = TOK.assign; // =
1020 return;
1021 case '~':
1022 p++;
1023 if (*p == '=')
1025 p++;
1026 t.value = TOK.concatenateAssign; // ~=
1028 else
1029 t.value = TOK.tilde; // ~
1030 return;
1031 case '^':
1032 p++;
1033 if (*p == '^')
1035 p++;
1036 if (*p == '=')
1038 p++;
1039 t.value = TOK.powAssign; // ^^=
1041 else
1042 t.value = TOK.pow; // ^^
1044 else if (*p == '=')
1046 p++;
1047 t.value = TOK.xorAssign; // ^=
1049 else
1050 t.value = TOK.xor; // ^
1051 return;
1052 case '(':
1053 p++;
1054 t.value = TOK.leftParenthesis;
1055 return;
1056 case ')':
1057 p++;
1058 t.value = TOK.rightParenthesis;
1059 return;
1060 case '[':
1061 p++;
1062 t.value = TOK.leftBracket;
1063 return;
1064 case ']':
1065 p++;
1066 t.value = TOK.rightBracket;
1067 return;
1068 case '{':
1069 p++;
1070 t.value = TOK.leftCurly;
1071 return;
1072 case '}':
1073 p++;
1074 t.value = TOK.rightCurly;
1075 return;
1076 case '?':
1077 p++;
1078 t.value = TOK.question;
1079 return;
1080 case ',':
1081 p++;
1082 t.value = TOK.comma;
1083 return;
1084 case ';':
1085 p++;
1086 t.value = TOK.semicolon;
1087 return;
1088 case ':':
1089 p++;
1090 if (*p == ':')
1092 ++p;
1093 t.value = TOK.colonColon;
1095 else if (*p == '>' && Ccompile)
1097 ++p;
1098 t.value = TOK.rightBracket;
1100 else
1101 t.value = TOK.colon;
1102 return;
1103 case '$':
1104 p++;
1105 t.value = TOK.dollar;
1106 return;
1107 case '@':
1108 p++;
1109 t.value = TOK.at;
1110 return;
1111 case '*':
1112 p++;
1113 if (*p == '=')
1115 p++;
1116 t.value = TOK.mulAssign;
1118 else
1119 t.value = TOK.mul;
1120 return;
1121 case '%':
1122 p++;
1123 if (*p == '=')
1125 p++;
1126 t.value = TOK.modAssign;
1128 else if (*p == '>' && Ccompile)
1130 ++p;
1131 t.value = TOK.rightCurly;
1133 else if (*p == ':' && Ccompile)
1135 goto case '#'; // %: means #
1137 else
1138 t.value = TOK.mod;
1139 return;
1140 case '#':
1142 p++;
1143 Token n;
1144 scan(&n);
1145 if (Ccompile && n.value == TOK.int32Literal)
1147 poundLine(n, true);
1148 continue;
1150 if (n.value == TOK.identifier)
1152 if (n.ident == Id.line)
1154 poundLine(n, false);
1155 continue;
1157 else if (n.ident == Id.__pragma && Ccompile)
1159 pragmaDirective(scanloc);
1160 continue;
1162 else
1164 const locx = loc();
1165 warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
1168 else if (n.value == TOK.if_)
1170 error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
1172 t.value = TOK.pound;
1173 return;
1175 default:
1177 dchar c = *p;
1178 if (c & 0x80)
1180 c = decodeUTF();
1181 // Check for start of unicode identifier
1182 if (isUniAlpha(c))
1183 goto case_ident;
1184 if (c == PS || c == LS)
1186 endOfLine();
1187 p++;
1188 continue;
1191 if (c < 0x80 && isprint(c))
1192 error("character '%c' is not a valid token", c);
1193 else
1194 error("character 0x%02x is not a valid token", c);
1195 p++;
1196 continue;
1202 final Token* peek(Token* ct)
1204 Token* t;
1205 if (ct.next)
1206 t = ct.next;
1207 else
1209 t = allocateToken();
1210 scan(t);
1211 ct.next = t;
1213 return t;
1216 /*********************************
1217 * tk is on the opening (.
1218 * Look ahead and return token that is past the closing ).
1220 final Token* peekPastParen(Token* tk)
1222 //printf("peekPastParen()\n");
1223 int parens = 1;
1224 int curlynest = 0;
1225 while (1)
1227 tk = peek(tk);
1228 //tk.print();
1229 switch (tk.value)
1231 case TOK.leftParenthesis:
1232 parens++;
1233 continue;
1234 case TOK.rightParenthesis:
1235 --parens;
1236 if (parens)
1237 continue;
1238 tk = peek(tk);
1239 break;
1240 case TOK.leftCurly:
1241 curlynest++;
1242 continue;
1243 case TOK.rightCurly:
1244 if (--curlynest >= 0)
1245 continue;
1246 break;
1247 case TOK.semicolon:
1248 if (curlynest)
1249 continue;
1250 break;
1251 case TOK.endOfFile:
1252 break;
1253 default:
1254 continue;
1256 return tk;
1260 /*******************************************
1261 * Parse escape sequence.
1263 private uint escapeSequence()
1265 return Lexer.escapeSequence(token.loc, p, Ccompile);
1268 /********
1269 * Parse the given string literal escape sequence into a single character.
1270 * D https://dlang.org/spec/lex.html#escape_sequences
1271 * C11 6.4.4.4
1272 * Params:
1273 * loc = location to use for error messages
1274 * sequence = pointer to string with escape sequence to parse. Updated to
1275 * point past the end of the escape sequence
1276 * Ccompile = true for compile C11 escape sequences
1277 * Returns:
1278 * the escape sequence as a single character
1280 private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence, bool Ccompile)
1282 const(char)* p = sequence; // cache sequence reference on stack
1283 scope(exit) sequence = p;
1285 uint c = *p;
1286 int ndigits;
1287 switch (c)
1289 case '\'':
1290 case '"':
1291 case '?':
1292 case '\\':
1293 Lconsume:
1294 p++;
1295 break;
1296 case 'a':
1297 c = 7;
1298 goto Lconsume;
1299 case 'b':
1300 c = 8;
1301 goto Lconsume;
1302 case 'f':
1303 c = 12;
1304 goto Lconsume;
1305 case 'n':
1306 c = 10;
1307 goto Lconsume;
1308 case 'r':
1309 c = 13;
1310 goto Lconsume;
1311 case 't':
1312 c = 9;
1313 goto Lconsume;
1314 case 'v':
1315 c = 11;
1316 goto Lconsume;
1317 case 'u':
1318 ndigits = 4;
1319 goto Lhex;
1320 case 'U':
1321 ndigits = 8;
1322 goto Lhex;
1323 case 'x':
1324 ndigits = 2;
1325 Lhex:
1326 p++;
1327 c = *p;
1328 if (ishex(cast(char)c))
1330 uint v = 0;
1331 int n = 0;
1332 while (1)
1334 if (isdigit(cast(char)c))
1335 c -= '0';
1336 else if (islower(c))
1337 c -= 'a' - 10;
1338 else
1339 c -= 'A' - 10;
1340 v = v * 16 + c;
1341 c = *++p;
1342 if (++n == ndigits)
1343 break;
1344 if (!ishex(cast(char)c))
1346 .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
1347 break;
1350 if (ndigits != 2 && !utf_isValidDchar(v))
1352 .error(loc, "invalid UTF character \\U%08x", v);
1353 v = '?'; // recover with valid UTF character
1355 c = v;
1357 else
1359 .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
1360 p++;
1362 break;
1363 case '&':
1364 if (Ccompile)
1365 goto default;
1367 // named character entity
1368 for (const idstart = ++p; 1; p++)
1370 switch (*p)
1372 case ';':
1373 c = HtmlNamedEntity(idstart, p - idstart);
1374 if (c == ~0)
1376 .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1377 c = '?';
1379 p++;
1380 break;
1381 default:
1382 if (isalpha(*p) || (p != idstart && isdigit(*p)))
1383 continue;
1384 .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1385 c = '?';
1386 break;
1388 break;
1390 break;
1391 case 0:
1392 case 0x1A:
1393 // end of file
1394 c = '\\';
1395 break;
1396 default:
1397 if (isoctal(cast(char)c))
1399 uint v = 0;
1400 int n = 0;
1403 v = v * 8 + (c - '0');
1404 c = *++p;
1406 while (++n < 3 && isoctal(cast(char)c));
1407 c = v;
1408 if (c > 0xFF)
1409 .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
1411 else
1413 .error(loc, "undefined escape sequence \\%c", c);
1414 p++;
1416 break;
1418 return c;
1422 Lex a wysiwyg string. `p` must be pointing to the first character before the
1423 contents of the string literal. The character pointed to by `p` will be used as
1424 the terminating character (i.e. backtick or double-quote).
1425 Params:
1426 result = pointer to the token that accepts the result
1428 private void wysiwygStringConstant(Token* result)
1430 result.value = TOK.string_;
1431 Loc start = loc();
1432 auto terminator = p[0];
1433 p++;
1434 stringbuffer.setsize(0);
1435 while (1)
1437 dchar c = p[0];
1438 p++;
1439 switch (c)
1441 case '\n':
1442 endOfLine();
1443 break;
1444 case '\r':
1445 if (p[0] == '\n')
1446 continue; // ignore
1447 c = '\n'; // treat EndOfLine as \n character
1448 endOfLine();
1449 break;
1450 case 0:
1451 case 0x1A:
1452 error("unterminated string constant starting at %s", start.toChars());
1453 result.setString();
1454 // rewind `p` so it points to the EOF character
1455 p--;
1456 return;
1457 default:
1458 if (c == terminator)
1460 result.setString(stringbuffer);
1461 stringPostfix(result);
1462 return;
1464 else if (c & 0x80)
1466 p--;
1467 const u = decodeUTF();
1468 p++;
1469 if (u == PS || u == LS)
1470 endOfLine();
1471 stringbuffer.writeUTF8(u);
1472 continue;
1474 break;
1476 stringbuffer.writeByte(c);
1480 /**************************************
1481 * Lex hex strings:
1482 * x"0A ae 34FE BD"
1484 private TOK hexStringConstant(Token* t)
1486 Loc start = loc();
1487 uint n = 0;
1488 uint v = ~0; // dead assignment, needed to suppress warning
1489 p++;
1490 stringbuffer.setsize(0);
1491 while (1)
1493 dchar c = *p++;
1494 switch (c)
1496 case ' ':
1497 case '\t':
1498 case '\v':
1499 case '\f':
1500 continue; // skip white space
1501 case '\r':
1502 if (*p == '\n')
1503 continue; // ignore '\r' if followed by '\n'
1504 // Treat isolated '\r' as if it were a '\n'
1505 goto case '\n';
1506 case '\n':
1507 endOfLine();
1508 continue;
1509 case 0:
1510 case 0x1A:
1511 error("unterminated string constant starting at %s", start.toChars());
1512 t.setString();
1513 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1514 p--;
1515 return TOK.hexadecimalString;
1516 case '"':
1517 if (n & 1)
1519 error("odd number (%d) of hex characters in hex string", n);
1520 stringbuffer.writeByte(v);
1522 t.setString(stringbuffer);
1523 stringPostfix(t);
1524 return TOK.hexadecimalString;
1525 default:
1526 if (c >= '0' && c <= '9')
1527 c -= '0';
1528 else if (c >= 'a' && c <= 'f')
1529 c -= 'a' - 10;
1530 else if (c >= 'A' && c <= 'F')
1531 c -= 'A' - 10;
1532 else if (c & 0x80)
1534 p--;
1535 const u = decodeUTF();
1536 p++;
1537 if (u == PS || u == LS)
1538 endOfLine();
1539 else
1540 error("non-hex character \\u%04x in hex string", u);
1542 else
1543 error("non-hex character '%c' in hex string", c);
1544 if (n & 1)
1546 v = (v << 4) | c;
1547 stringbuffer.writeByte(v);
1549 else
1550 v = c;
1551 n++;
1552 break;
1555 assert(0); // see bug 15731
1559 Lex a delimited string. Some examples of delimited strings are:
1561 q"(foo(xxx))" // "foo(xxx)"
1562 q"[foo$(LPAREN)]" // "foo$(LPAREN)"
1563 q"/foo]/" // "foo]"
1564 q"HERE
1566 HERE" // "foo\n"
1568 It is assumed that `p` points to the opening double-quote '"'.
1569 Params:
1570 result = pointer to the token that accepts the result
1572 private void delimitedStringConstant(Token* result)
1574 result.value = TOK.string_;
1575 Loc start = loc();
1576 dchar delimleft = 0;
1577 dchar delimright = 0;
1578 uint nest = 1;
1579 uint nestcount = ~0; // dead assignment, needed to suppress warning
1580 Identifier hereid = null;
1581 uint blankrol = 0;
1582 uint startline = 0;
1583 p++;
1584 stringbuffer.setsize(0);
1585 while (1)
1587 dchar c = *p++;
1588 //printf("c = '%c'\n", c);
1589 switch (c)
1591 case '\n':
1592 Lnextline:
1593 endOfLine();
1594 startline = 1;
1595 if (blankrol)
1597 blankrol = 0;
1598 continue;
1600 if (hereid)
1602 stringbuffer.writeUTF8(c);
1603 continue;
1605 break;
1606 case '\r':
1607 if (*p == '\n')
1608 continue; // ignore
1609 c = '\n'; // treat EndOfLine as \n character
1610 goto Lnextline;
1611 case 0:
1612 case 0x1A:
1613 error("unterminated delimited string constant starting at %s", start.toChars());
1614 result.setString();
1615 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1616 p--;
1617 return;
1618 default:
1619 if (c & 0x80)
1621 p--;
1622 c = decodeUTF();
1623 p++;
1624 if (c == PS || c == LS)
1625 goto Lnextline;
1627 break;
1629 if (delimleft == 0)
1631 delimleft = c;
1632 nest = 1;
1633 nestcount = 1;
1634 if (c == '(')
1635 delimright = ')';
1636 else if (c == '{')
1637 delimright = '}';
1638 else if (c == '[')
1639 delimright = ']';
1640 else if (c == '<')
1641 delimright = '>';
1642 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1644 // Start of identifier; must be a heredoc
1645 Token tok;
1646 p--;
1647 scan(&tok); // read in heredoc identifier
1648 if (tok.value != TOK.identifier)
1650 error("identifier expected for heredoc, not %s", tok.toChars());
1651 delimright = c;
1653 else
1655 hereid = tok.ident;
1656 //printf("hereid = '%s'\n", hereid.toChars());
1657 blankrol = 1;
1659 nest = 0;
1661 else
1663 delimright = c;
1664 nest = 0;
1665 if (isspace(c))
1666 error("delimiter cannot be whitespace");
1669 else
1671 if (blankrol)
1673 error("heredoc rest of line should be blank");
1674 blankrol = 0;
1675 continue;
1677 if (nest == 1)
1679 if (c == delimleft)
1680 nestcount++;
1681 else if (c == delimright)
1683 nestcount--;
1684 if (nestcount == 0)
1685 goto Ldone;
1688 else if (c == delimright)
1689 goto Ldone;
1690 if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
1692 Token tok;
1693 auto psave = p;
1694 p--;
1695 scan(&tok); // read in possible heredoc identifier
1696 //printf("endid = '%s'\n", tok.ident.toChars());
1697 if (tok.value == TOK.identifier && tok.ident is hereid)
1699 /* should check that rest of line is blank
1701 goto Ldone;
1703 p = psave;
1705 stringbuffer.writeUTF8(c);
1706 startline = 0;
1709 Ldone:
1710 if (*p == '"')
1711 p++;
1712 else if (hereid)
1713 error("delimited string must end in %s\"", hereid.toChars());
1714 else
1715 error("delimited string must end in %c\"", delimright);
1716 result.setString(stringbuffer);
1717 stringPostfix(result);
1721 Lex a token string. Some examples of token strings are:
1723 q{ foo(xxx) } // " foo(xxx) "
1724 q{foo$(LPAREN)} // "foo$(LPAREN)"
1725 q{{foo}"}"} // "{foo}"}""
1727 It is assumed that `p` points to the opening curly-brace.
1728 Params:
1729 result = pointer to the token that accepts the result
1731 private void tokenStringConstant(Token* result)
1733 result.value = TOK.string_;
1735 uint nest = 1;
1736 const start = loc();
1737 const pstart = ++p;
1738 inTokenStringConstant++;
1739 scope(exit) inTokenStringConstant--;
1740 while (1)
1742 Token tok;
1743 scan(&tok);
1744 switch (tok.value)
1746 case TOK.leftCurly:
1747 nest++;
1748 continue;
1749 case TOK.rightCurly:
1750 if (--nest == 0)
1752 result.setString(pstart, p - 1 - pstart);
1753 stringPostfix(result);
1754 return;
1756 continue;
1757 case TOK.endOfFile:
1758 error("unterminated token string constant starting at %s", start.toChars());
1759 result.setString();
1760 return;
1761 default:
1762 continue;
1768 Scan a quoted string while building the processed string value by
1769 handling escape sequences. The result is returned in the given `t` token.
1770 This function assumes that `p` currently points to the opening quote
1771 of the string.
1772 Params:
1773 t = the token to set the resulting string to
1774 * References:
1775 * D https://dlang.org/spec/lex.html#double_quoted_strings
1776 * ImportC C11 6.4.5
1778 private void escapeStringConstant(Token* t)
1780 t.value = TOK.string_;
1782 const start = loc();
1783 const tc = *p++; // opening quote
1784 stringbuffer.setsize(0);
1785 while (1)
1787 dchar c = *p++;
1788 switch (c)
1790 case '\\':
1791 switch (*p)
1793 case '&':
1794 if (Ccompile)
1795 goto default;
1796 goto case;
1798 case 'u':
1799 case 'U':
1800 c = escapeSequence();
1801 stringbuffer.writeUTF8(c);
1802 continue;
1803 default:
1804 c = escapeSequence();
1805 break;
1807 break;
1808 case '\n':
1809 endOfLine();
1810 if (Ccompile)
1811 goto Lunterminated;
1812 break;
1813 case '\r':
1814 if (*p == '\n')
1815 continue; // ignore
1816 c = '\n'; // treat EndOfLine as \n character
1817 endOfLine();
1818 if (Ccompile)
1819 goto Lunterminated;
1820 break;
1821 case '\'':
1822 case '"':
1823 if (c != tc)
1824 goto default;
1825 t.setString(stringbuffer);
1826 if (!Ccompile)
1827 stringPostfix(t);
1828 return;
1829 case 0:
1830 case 0x1A:
1831 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1832 p--;
1833 Lunterminated:
1834 error("unterminated string constant starting at %s", start.toChars());
1835 t.setString();
1836 return;
1837 default:
1838 if (c & 0x80)
1840 p--;
1841 c = decodeUTF();
1842 if (c == LS || c == PS)
1844 c = '\n';
1845 endOfLine();
1846 if (Ccompile)
1847 goto Lunterminated;
1849 p++;
1850 stringbuffer.writeUTF8(c);
1851 continue;
1853 break;
1855 stringbuffer.writeByte(c);
1859 /**************************************
1860 * Reference:
1861 * https://dlang.org/spec/lex.html#characterliteral
1863 private TOK charConstant(Token* t)
1865 TOK tk = TOK.charLiteral;
1866 //printf("Lexer::charConstant\n");
1867 p++;
1868 dchar c = *p++;
1869 switch (c)
1871 case '\\':
1872 switch (*p)
1874 case 'u':
1875 t.unsvalue = escapeSequence();
1876 tk = TOK.wcharLiteral;
1877 break;
1878 case 'U':
1879 case '&':
1880 t.unsvalue = escapeSequence();
1881 tk = TOK.dcharLiteral;
1882 break;
1883 default:
1884 t.unsvalue = escapeSequence();
1885 break;
1887 break;
1888 case '\n':
1890 endOfLine();
1891 goto case;
1892 case '\r':
1893 goto case '\'';
1894 case 0:
1895 case 0x1A:
1896 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1897 p--;
1898 goto case;
1899 case '\'':
1900 error("unterminated character constant");
1901 t.unsvalue = '?';
1902 return tk;
1903 default:
1904 if (c & 0x80)
1906 p--;
1907 c = decodeUTF();
1908 p++;
1909 if (c == LS || c == PS)
1910 goto L1;
1911 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1912 tk = TOK.wcharLiteral;
1913 else
1914 tk = TOK.dcharLiteral;
1916 t.unsvalue = c;
1917 break;
1919 if (*p != '\'')
1921 while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
1922 *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
1924 if (*p & 0x80)
1926 const s = p;
1927 c = decodeUTF();
1928 if (c == LS || c == PS)
1930 p = s;
1931 break;
1934 p++;
1937 if (*p == '\'')
1939 error("character constant has multiple characters");
1940 p++;
1942 else
1943 error("unterminated character constant");
1944 t.unsvalue = '?';
1945 return tk;
1947 p++;
1948 return tk;
1951 /***************************************
1952 * Lex C character constant.
1953 * Parser is on the opening quote.
1954 * Params:
1955 * t = token to fill in
1956 * prefix = one of `u`, `U` or 0.
1957 * Reference:
1958 * C11 6.4.4.4
1960 private void clexerCharConstant(ref Token t, char prefix)
1962 escapeStringConstant(&t);
1963 const(char)[] str = t.ustring[0 .. t.len];
1964 const n = str.length;
1965 const loc = t.loc;
1966 if (n == 0)
1968 error(loc, "empty character constant");
1969 t.value = TOK.semicolon;
1970 return;
1973 uint u;
1974 switch (prefix)
1976 case 0:
1977 if (n == 1) // fast case
1979 u = str[0];
1981 else if (n > 4)
1982 error(loc, "max number of chars in character literal is 4, had %d",
1983 cast(int)n);
1984 else
1986 foreach (i, c; str)
1987 (cast(char*)&u)[n - 1 - i] = c;
1989 break;
1991 case 'u':
1992 dchar d1;
1993 size_t idx;
1994 auto msg = utf_decodeChar(str, idx, d1);
1995 dchar d2 = 0;
1996 if (idx < n && !msg)
1997 msg = utf_decodeChar(str, idx, d2);
1998 if (msg)
1999 error(loc, "%s", msg);
2000 else if (idx < n)
2001 error(loc, "max number of chars in 16 bit character literal is 2, had %d",
2002 (n + 1) >> 1);
2003 else if (d1 > 0x1_0000)
2004 error(loc, "%d does not fit in 16 bits", d1);
2005 else if (d2 > 0x1_0000)
2006 error(loc, "%d does not fit in 16 bits", d2);
2007 u = d1;
2008 if (d2)
2009 u = (d1 << 16) | d2;
2010 break;
2012 case 'U':
2013 dchar d;
2014 size_t idx;
2015 auto msg = utf_decodeChar(str, idx, d);
2016 if (msg)
2017 error(loc, "%s", msg);
2018 else if (idx < n)
2019 error(loc, "max number of chars in 32 bit character literal is 1, had %d",
2020 (n + 3) >> 2);
2021 u = d;
2022 break;
2024 default:
2025 assert(0);
2027 t.value = TOK.int32Literal;
2028 t.unsvalue = u;
2031 /***************************************
2032 * Get postfix of string literal.
2034 private void stringPostfix(Token* t) pure @nogc
2036 switch (*p)
2038 case 'c':
2039 case 'w':
2040 case 'd':
2041 t.postfix = *p;
2042 p++;
2043 break;
2044 default:
2045 t.postfix = 0;
2046 break;
2050 /**************************************
2051 * Read in a number.
2052 * If it's an integer, store it in tok.TKutok.Vlong.
2053 * integers can be decimal, octal or hex
2054 * Handle the suffixes U, UL, LU, L, etc.
2055 * If it's double, store it in tok.TKutok.Vdouble.
2056 * Returns:
2057 * TKnum
2058 * TKdouble,...
2060 private TOK number(Token* t)
2062 int base = 10;
2063 const start = p;
2064 uinteger_t n = 0; // unsigned >=64 bit integer type
2065 int d;
2066 bool err = false;
2067 bool overflow = false;
2068 bool anyBinaryDigitsNoSingleUS = false;
2069 bool anyHexDigitsNoSingleUS = false;
2070 dchar c = *p;
2071 if (c == '0')
2073 ++p;
2074 c = *p;
2075 switch (c)
2077 case '0':
2078 case '1':
2079 case '2':
2080 case '3':
2081 case '4':
2082 case '5':
2083 case '6':
2084 case '7':
2085 base = 8;
2086 break;
2088 case '8':
2089 case '9':
2090 if (Ccompile)
2091 error("octal digit expected, not `%c`", c);
2092 base = 8;
2093 break;
2094 case 'x':
2095 case 'X':
2096 ++p;
2097 base = 16;
2098 break;
2099 case 'b':
2100 case 'B':
2101 if (Ccompile)
2102 error("binary constants not allowed");
2103 ++p;
2104 base = 2;
2105 break;
2106 case '.':
2107 if (p[1] == '.')
2108 goto Ldone; // if ".."
2109 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
2110 goto Ldone; // if ".identifier" or ".unicode"
2111 goto Lreal; // '.' is part of current token
2112 case 'i':
2113 case 'f':
2114 case 'F':
2115 goto Lreal;
2116 case '_':
2117 if (Ccompile)
2118 error("embedded `_` not allowed");
2119 ++p;
2120 base = 8;
2121 break;
2122 case 'L':
2123 if (p[1] == 'i')
2124 goto Lreal;
2125 break;
2126 default:
2127 break;
2130 while (1)
2132 c = *p;
2133 switch (c)
2135 case '0':
2136 case '1':
2137 case '2':
2138 case '3':
2139 case '4':
2140 case '5':
2141 case '6':
2142 case '7':
2143 case '8':
2144 case '9':
2145 ++p;
2146 d = c - '0';
2147 break;
2148 case 'a':
2149 case 'b':
2150 case 'c':
2151 case 'd':
2152 case 'e':
2153 case 'f':
2154 case 'A':
2155 case 'B':
2156 case 'C':
2157 case 'D':
2158 case 'E':
2159 case 'F':
2160 ++p;
2161 if (base != 16)
2163 if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
2164 goto Lreal;
2166 if (c >= 'a')
2167 d = c + 10 - 'a';
2168 else
2169 d = c + 10 - 'A';
2170 break;
2171 case 'L':
2172 if (p[1] == 'i')
2173 goto Lreal;
2174 goto Ldone;
2175 case '.':
2176 if (p[1] == '.')
2177 goto Ldone; // if ".."
2178 if (base <= 10 && n > 0 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
2179 goto Ldone; // if ".identifier" or ".unicode"
2180 if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
2181 goto Ldone; // if ".identifier" or ".unicode"
2182 if (base == 2)
2183 goto Ldone; // if ".identifier" or ".unicode"
2184 goto Lreal; // otherwise as part of a floating point literal
2185 case 'p':
2186 case 'P':
2187 case 'i':
2188 Lreal:
2189 p = start;
2190 return inreal(t);
2191 case '_':
2192 if (Ccompile)
2193 goto default;
2194 ++p;
2195 continue;
2196 default:
2197 goto Ldone;
2199 // got a digit here, set any necessary flags, check for errors
2200 anyHexDigitsNoSingleUS = true;
2201 anyBinaryDigitsNoSingleUS = true;
2202 if (!err && d >= base)
2204 error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
2205 base == 8 ? "octal".ptr :
2206 "decimal".ptr, c);
2207 err = true;
2209 // Avoid expensive overflow check if we aren't at risk of overflow
2210 if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
2211 n = n * base + d;
2212 else
2214 import core.checkedint : mulu, addu;
2216 n = mulu(n, base, overflow);
2217 n = addu(n, d, overflow);
2220 Ldone:
2221 if (overflow && !err)
2223 error("integer overflow");
2224 err = true;
2226 if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
2227 (base == 16 && !anyHexDigitsNoSingleUS))
2228 error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
2230 t.unsvalue = n;
2232 if (Ccompile)
2233 return cnumber(base, n);
2235 enum FLAGS : int
2237 none = 0,
2238 decimal = 1, // decimal
2239 unsigned = 2, // u or U suffix
2240 long_ = 4, // L suffix
2243 FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
2244 // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2245 const psuffix = p;
2246 while (1)
2248 FLAGS f;
2249 switch (*p)
2251 case 'U':
2252 case 'u':
2253 f = FLAGS.unsigned;
2254 goto L1;
2255 case 'l':
2256 f = FLAGS.long_;
2257 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
2258 goto L1;
2259 case 'L':
2260 f = FLAGS.long_;
2262 p++;
2263 if ((flags & f) && !err)
2265 error("unrecognized token");
2266 err = true;
2268 flags = cast(FLAGS)(flags | f);
2269 continue;
2270 default:
2271 break;
2273 break;
2275 if (base == 8 && n >= 8)
2277 if (err)
2278 // can't translate invalid octal value, just show a generic message
2279 error("octal literals larger than 7 are no longer supported");
2280 else
2281 error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead",
2282 n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
2284 TOK result;
2285 switch (flags)
2287 case FLAGS.none:
2288 /* Octal or Hexadecimal constant.
2289 * First that fits: int, uint, long, ulong
2291 if (n & 0x8000000000000000L)
2292 result = TOK.uns64Literal;
2293 else if (n & 0xFFFFFFFF00000000L)
2294 result = TOK.int64Literal;
2295 else if (n & 0x80000000)
2296 result = TOK.uns32Literal;
2297 else
2298 result = TOK.int32Literal;
2299 break;
2300 case FLAGS.decimal:
2301 /* First that fits: int, long, long long
2303 if (n & 0x8000000000000000L)
2305 result = TOK.uns64Literal;
2307 else if (n & 0xFFFFFFFF80000000L)
2308 result = TOK.int64Literal;
2309 else
2310 result = TOK.int32Literal;
2311 break;
2312 case FLAGS.unsigned:
2313 case FLAGS.decimal | FLAGS.unsigned:
2314 /* First that fits: uint, ulong
2316 if (n & 0xFFFFFFFF00000000L)
2317 result = TOK.uns64Literal;
2318 else
2319 result = TOK.uns32Literal;
2320 break;
2321 case FLAGS.decimal | FLAGS.long_:
2322 if (n & 0x8000000000000000L)
2324 if (!err)
2326 error("signed integer overflow");
2327 err = true;
2329 result = TOK.uns64Literal;
2331 else
2332 result = TOK.int64Literal;
2333 break;
2334 case FLAGS.long_:
2335 if (n & 0x8000000000000000L)
2336 result = TOK.uns64Literal;
2337 else
2338 result = TOK.int64Literal;
2339 break;
2340 case FLAGS.unsigned | FLAGS.long_:
2341 case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2342 result = TOK.uns64Literal;
2343 break;
2344 default:
2345 debug
2347 printf("%x\n", flags);
2349 assert(0);
2351 return result;
2354 /**************************************
2355 * Lex C integer-suffix
2356 * Params:
2357 * base = number base
2358 * n = raw integer value
2359 * Returns:
2360 * token value
2362 private TOK cnumber(int base, uinteger_t n)
2364 /* C11 6.4.4.1
2365 * Parse trailing suffixes:
2366 * u or U
2367 * l or L
2368 * ll or LL
2370 enum FLAGS : uint
2372 octalhex = 1, // octal or hexadecimal
2373 decimal = 2, // decimal
2374 unsigned = 4, // u or U suffix
2375 long_ = 8, // l or L suffix
2376 llong = 0x10 // ll or LL
2378 FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.octalhex;
2379 bool err;
2380 Lsuffixes:
2381 while (1)
2383 FLAGS f;
2384 const cs = *p;
2385 switch (cs)
2387 case 'U':
2388 case 'u':
2389 f = FLAGS.unsigned;
2390 break;
2392 case 'l':
2393 case 'L':
2394 f = FLAGS.long_;
2395 if (cs == p[1])
2397 f = FLAGS.long_ | FLAGS.llong;
2398 ++p;
2400 break;
2402 default:
2403 break Lsuffixes;
2405 ++p;
2406 if ((flags & f) && !err)
2408 error("duplicate integer suffixes");
2409 err = true;
2411 flags = cast(FLAGS)(flags | f);
2414 void overflow()
2416 error("integer overflow");
2419 TOK result = TOK.int32Literal; // default
2420 switch (flags)
2422 /* Since D doesn't have a variable sized `long` or `unsigned long` type,
2423 * this code deviates from C by picking D int, uint, long, or ulong instead
2426 case FLAGS.octalhex:
2427 /* Octal or Hexadecimal constant.
2428 * First that fits: int, unsigned, long, unsigned long,
2429 * long long, unsigned long long
2431 if (longsize == 4)
2433 if (n & 0x8000000000000000L)
2434 result = TOK.uns64Literal;
2435 else if (n & 0xFFFFFFFF00000000L)
2436 result = TOK.int64Literal;
2437 else if (n & 0x80000000)
2438 result = TOK.uns32Literal;
2439 else
2440 result = TOK.int32Literal;
2442 else
2444 if (n & 0x8000000000000000L)
2445 result = TOK.uns64Literal; // unsigned long
2446 else if (n & 0xFFFFFFFF00000000L)
2447 result = TOK.int64Literal; // long
2448 else if (n & 0x80000000)
2449 result = TOK.uns32Literal;
2450 else
2451 result = TOK.int32Literal;
2453 break;
2455 case FLAGS.decimal:
2456 /* First that fits: int, long, long long
2458 if (longsize == 4)
2460 if (n & 0x8000000000000000L)
2461 result = TOK.uns64Literal;
2462 else if (n & 0xFFFFFFFF80000000L)
2463 result = TOK.int64Literal;
2464 else
2465 result = TOK.int32Literal;
2467 else
2469 if (n & 0x8000000000000000L)
2470 result = TOK.uns64Literal; // unsigned long
2471 else if (n & 0xFFFFFFFF80000000L)
2472 result = TOK.int64Literal; // long
2473 else
2474 result = TOK.int32Literal;
2476 break;
2478 case FLAGS.octalhex | FLAGS.unsigned:
2479 case FLAGS.decimal | FLAGS.unsigned:
2480 /* First that fits: unsigned, unsigned long, unsigned long long
2482 if (longsize == 4)
2484 if (n & 0xFFFFFFFF00000000L)
2485 result = TOK.uns64Literal;
2486 else
2487 result = TOK.uns32Literal;
2489 else
2491 if (n & 0xFFFFFFFF00000000L)
2492 result = TOK.uns64Literal; // unsigned long
2493 else
2494 result = TOK.uns32Literal;
2496 break;
2498 case FLAGS.decimal | FLAGS.long_:
2499 /* First that fits: long, long long
2501 if (longsize == 4)
2503 if (n & 0x8000000000000000L)
2504 overflow();
2505 else if (n & 0xFFFFFFFF_80000000L)
2506 result = TOK.int64Literal;
2507 else
2508 result = TOK.int32Literal; // long
2510 else
2512 if (n & 0x8000000000000000L)
2513 overflow();
2514 else
2515 result = TOK.int64Literal; // long
2517 break;
2519 case FLAGS.octalhex | FLAGS.long_:
2520 /* First that fits: long, unsigned long, long long,
2521 * unsigned long long
2523 if (longsize == 4)
2525 if (n & 0x8000000000000000L)
2526 result = TOK.uns64Literal;
2527 else if (n & 0xFFFFFFFF00000000L)
2528 result = TOK.int64Literal;
2529 else if (n & 0x80000000)
2530 result = TOK.uns32Literal; // unsigned long
2531 else
2532 result = TOK.int32Literal; // long
2534 else
2536 if (n & 0x80000000_00000000L)
2537 result = TOK.uns64Literal; // unsigned long
2538 else
2539 result = TOK.int64Literal; // long
2541 break;
2543 case FLAGS.octalhex | FLAGS.unsigned | FLAGS.long_:
2544 case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2545 /* First that fits: unsigned long, unsigned long long
2547 if (longsize == 4)
2549 if (n & 0xFFFFFFFF00000000L)
2550 result = TOK.uns64Literal;
2551 else
2552 result = TOK.uns32Literal; // unsigned long
2554 else
2556 result = TOK.uns64Literal; // unsigned long
2558 break;
2560 case FLAGS.octalhex | FLAGS.long_ | FLAGS.llong:
2561 /* First that fits: long long, unsigned long long
2563 if (n & 0x8000000000000000L)
2564 result = TOK.uns64Literal;
2565 else
2566 result = TOK.int64Literal;
2567 break;
2569 case FLAGS.decimal | FLAGS.long_ | FLAGS.llong:
2570 /* long long
2572 result = TOK.int64Literal;
2573 break;
2575 case FLAGS.octalhex | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
2576 case FLAGS.decimal | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
2577 result = TOK.uns64Literal;
2578 break;
2580 default:
2581 debug printf("%x\n",flags);
2582 assert(0);
2584 return result;
2587 /**************************************
2588 * Read in characters, converting them to real.
2589 * Bugs:
2590 * Exponent overflow not detected.
2591 * Too much requested precision is not detected.
2593 private TOK inreal(Token* t)
2595 //printf("Lexer::inreal()\n");
2596 debug
2598 assert(*p == '.' || isdigit(*p));
2600 bool isWellformedString = true;
2601 stringbuffer.setsize(0);
2602 auto pstart = p;
2603 bool hex = false;
2604 dchar c = *p++;
2605 // Leading '0x'
2606 if (c == '0')
2608 c = *p++;
2609 if (c == 'x' || c == 'X')
2611 hex = true;
2612 c = *p++;
2615 // Digits to left of '.'
2616 while (1)
2618 if (c == '.')
2620 c = *p++;
2621 break;
2623 if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2625 c = *p++;
2626 continue;
2628 break;
2630 // Digits to right of '.'
2631 while (1)
2633 if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2635 c = *p++;
2636 continue;
2638 break;
2640 if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2642 c = *p++;
2643 if (c == '-' || c == '+')
2645 c = *p++;
2647 bool anyexp = false;
2648 while (1)
2650 if (isdigit(c))
2652 anyexp = true;
2653 c = *p++;
2654 continue;
2656 if (c == '_')
2658 if (Ccompile)
2659 error("embedded `_` in numeric literals not allowed");
2660 c = *p++;
2661 continue;
2663 if (!anyexp)
2665 error("missing exponent");
2666 isWellformedString = false;
2668 break;
2671 else if (hex)
2673 error("exponent required for hex float");
2674 isWellformedString = false;
2676 --p;
2677 while (pstart < p)
2679 if (*pstart != '_')
2680 stringbuffer.writeByte(*pstart);
2681 ++pstart;
2683 stringbuffer.writeByte(0);
2684 auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
2685 TOK result;
2686 bool isOutOfRange = false;
2687 t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
2688 switch (*p)
2690 case 'F':
2691 case 'f':
2692 if (isWellformedString && !isOutOfRange)
2693 isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
2694 result = TOK.float32Literal;
2695 p++;
2696 break;
2697 default:
2698 if (isWellformedString && !isOutOfRange)
2699 isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
2700 result = TOK.float64Literal;
2701 break;
2702 case 'l':
2703 if (!Ccompile)
2704 error("use 'L' suffix instead of 'l'");
2705 goto case 'L';
2706 case 'L':
2707 ++p;
2708 if (Ccompile && long_doublesize == 8)
2709 goto default;
2710 result = TOK.float80Literal;
2711 break;
2713 if ((*p == 'i' || *p == 'I') && !Ccompile)
2715 if (*p == 'I')
2716 error("use 'i' suffix instead of 'I'");
2717 p++;
2718 switch (result)
2720 case TOK.float32Literal:
2721 result = TOK.imaginary32Literal;
2722 break;
2723 case TOK.float64Literal:
2724 result = TOK.imaginary64Literal;
2725 break;
2726 case TOK.float80Literal:
2727 result = TOK.imaginary80Literal;
2728 break;
2729 default:
2730 break;
2733 const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
2734 if (isOutOfRange && !isLong)
2736 const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
2737 error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
2739 debug
2741 switch (result)
2743 case TOK.float32Literal:
2744 case TOK.float64Literal:
2745 case TOK.float80Literal:
2746 case TOK.imaginary32Literal:
2747 case TOK.imaginary64Literal:
2748 case TOK.imaginary80Literal:
2749 break;
2750 default:
2751 assert(0);
2754 return result;
2757 final Loc loc() pure @nogc
2759 scanloc.charnum = cast(uint)(1 + p - line);
2760 version (LocOffset)
2761 scanloc.fileOffset = cast(uint)(p - base);
2762 return scanloc;
2765 final void error(const(char)* format, ...)
2767 va_list args;
2768 va_start(args, format);
2769 .verror(token.loc, format, args);
2770 va_end(args);
2773 final void error(const ref Loc loc, const(char)* format, ...)
2775 va_list args;
2776 va_start(args, format);
2777 .verror(loc, format, args);
2778 va_end(args);
2781 final void deprecation(const(char)* format, ...)
2783 va_list args;
2784 va_start(args, format);
2785 .vdeprecation(token.loc, format, args);
2786 va_end(args);
2789 /*********************************************
2790 * Parse line/file preprocessor directive:
2791 * #line linnum [filespec]
2792 * Allow __LINE__ for linnum, and __FILE__ for filespec.
2793 * Accept linemarker format:
2794 * # linnum [filespec] {flags}
2795 * There can be zero or more flags, which are one of the digits 1..4, and
2796 * must be in ascending order. The flags are ignored.
2797 * Params:
2798 * tok = token we're on, which is linnum of linemarker
2799 * linemarker = true if line marker format and lexer is on linnum
2800 * References:
2801 * linemarker https://gcc.gnu.org/onlinedocs/gcc-11.1.0/cpp/Preprocessor-Output.html
2803 private void poundLine(ref Token tok, bool linemarker)
2805 auto linnum = this.scanloc.linnum;
2806 const(char)* filespec = null;
2807 const loc = this.loc();
2808 bool flags;
2810 if (!linemarker)
2811 scan(&tok);
2812 if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
2814 const lin = cast(int)(tok.unsvalue - 1);
2815 if (lin != tok.unsvalue - 1)
2816 error("line number `%lld` out of range", cast(ulong)tok.unsvalue);
2817 else
2818 linnum = lin;
2820 else if (tok.value == TOK.line) // #line __LINE__
2823 else
2824 goto Lerr;
2825 while (1)
2827 switch (*p)
2829 case 0:
2830 case 0x1A:
2831 case '\n':
2832 Lnewline:
2833 if (!inTokenStringConstant)
2835 this.scanloc.linnum = linnum;
2836 if (filespec)
2837 this.scanloc.filename = filespec;
2839 return;
2840 case '\r':
2841 p++;
2842 if (*p != '\n')
2844 p--;
2845 goto Lnewline;
2847 continue;
2848 case ' ':
2849 case '\t':
2850 case '\v':
2851 case '\f':
2852 p++;
2853 continue; // skip white space
2854 case '_':
2855 if (filespec || flags)
2856 goto Lerr;
2857 if (memcmp(p, "__FILE__".ptr, 8) == 0)
2859 p += 8;
2860 filespec = mem.xstrdup(scanloc.filename);
2861 continue;
2863 goto Lerr;
2864 case '"':
2865 if (filespec || flags)
2866 goto Lerr;
2867 stringbuffer.setsize(0);
2868 p++;
2869 while (1)
2871 uint c;
2872 c = *p;
2873 switch (c)
2875 case '\n':
2876 case '\r':
2877 case 0:
2878 case 0x1A:
2879 goto Lerr;
2880 case '"':
2881 stringbuffer.writeByte(0);
2882 filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr);
2883 p++;
2884 break;
2885 default:
2886 if (c & 0x80)
2888 uint u = decodeUTF();
2889 if (u == PS || u == LS)
2890 goto Lerr;
2892 stringbuffer.writeByte(c);
2893 p++;
2894 continue;
2896 break;
2898 continue;
2900 case '1':
2901 case '2':
2902 case '3':
2903 case '4':
2904 flags = true; // linemarker flags seen
2905 ++p;
2906 if ('0' <= *p && *p <= '9')
2907 goto Lerr; // only one digit allowed
2908 continue;
2910 default:
2911 if (*p & 0x80)
2913 uint u = decodeUTF();
2914 if (u == PS || u == LS)
2915 goto Lnewline;
2917 goto Lerr;
2920 Lerr:
2921 if (linemarker)
2922 error(loc, "# integer [\"filespec\"] { 1 | 2 | 3 | 4 }\\n expected");
2923 else
2924 error(loc, "#line integer [\"filespec\"]\\n expected");
2927 /*********************************************
2928 * C11 6.10.6 Pragma directive
2929 * # pragma pp-tokens(opt) new-line
2930 * The C preprocessor sometimes leaves pragma directives in
2931 * the preprocessed output. Ignore them.
2932 * Upon return, p is at start of next line.
2934 private void pragmaDirective(const ref Loc loc)
2936 Token n;
2937 scan(&n);
2938 if (n.value == TOK.identifier && n.ident == Id.pack)
2939 return pragmaPack(loc);
2940 skipToNextLine();
2943 /*********
2944 * ImportC
2945 * # pragma pack
2946 * https://gcc.gnu.org/onlinedocs/gcc-4.4.4/gcc/Structure_002dPacking-Pragmas.html
2947 * https://docs.microsoft.com/en-us/cpp/preprocessor/pack
2948 * Scanner is on the `pack`
2949 * Params:
2950 * startloc = location to use for error messages
2952 private void pragmaPack(const ref Loc startloc)
2954 const loc = startloc;
2955 Token n;
2956 scan(&n);
2957 if (n.value != TOK.leftParenthesis)
2959 error(loc, "left parenthesis expected to follow `#pragma pack`");
2960 skipToNextLine();
2961 return;
2964 void closingParen()
2966 if (n.value != TOK.rightParenthesis)
2968 error(loc, "right parenthesis expected to close `#pragma pack(`");
2970 skipToNextLine();
2973 void setPackAlign(ref const Token t)
2975 const n = t.unsvalue;
2976 if (n < 1 || n & (n - 1) || ushort.max < n)
2977 error(loc, "pack must be an integer positive power of 2, not 0x%llx", cast(ulong)n);
2978 packalign.set(cast(uint)n);
2979 packalign.setPack(true);
2982 scan(&n);
2984 if (!records)
2986 records = new Array!Identifier;
2987 packs = new Array!structalign_t;
2990 /* # pragma pack ( show )
2992 if (n.value == TOK.identifier && n.ident == Id.show)
2994 if (packalign.isDefault())
2995 warning(startloc, "current pack attribute is default");
2996 else
2997 warning(startloc, "current pack attribute is %d", packalign.get());
2998 scan(&n);
2999 return closingParen();
3001 /* # pragma pack ( push )
3002 * # pragma pack ( push , identifier )
3003 * # pragma pack ( push , integer )
3004 * # pragma pack ( push , identifier , integer )
3006 if (n.value == TOK.identifier && n.ident == Id.push)
3008 scan(&n);
3009 Identifier record = null;
3010 if (n.value == TOK.comma)
3012 scan(&n);
3013 if (n.value == TOK.identifier)
3015 record = n.ident;
3016 scan(&n);
3017 if (n.value == TOK.comma)
3019 scan(&n);
3020 if (n.value == TOK.int32Literal)
3022 setPackAlign(n);
3023 scan(&n);
3025 else
3026 error(loc, "alignment value expected, not `%s`", n.toChars());
3029 else if (n.value == TOK.int32Literal)
3031 setPackAlign(n);
3032 scan(&n);
3034 else
3035 error(loc, "alignment value expected, not `%s`", n.toChars());
3037 this.records.push(record);
3038 this.packs.push(packalign);
3039 return closingParen();
3041 /* # pragma pack ( pop )
3042 * # pragma pack ( pop PopList )
3043 * PopList :
3044 * , IdentifierOrInteger
3045 * , IdentifierOrInteger PopList
3046 * IdentifierOrInteger:
3047 * identifier
3048 * integer
3050 if (n.value == TOK.identifier && n.ident == Id.pop)
3052 scan(&n);
3053 while (n.value == TOK.comma)
3055 scan(&n);
3056 if (n.value == TOK.identifier)
3058 for (size_t len = this.records.length; len; --len)
3060 if ((*this.records)[len - 1] == n.ident)
3062 packalign = (*this.packs)[len - 1];
3063 this.records.setDim(len - 1);
3064 this.packs.setDim(len - 1);
3065 break;
3068 scan(&n);
3070 else if (n.value == TOK.int32Literal)
3072 setPackAlign(n);
3073 this.records.push(null);
3074 this.packs.push(packalign);
3075 scan(&n);
3078 return closingParen();
3080 /* # pragma pack ( integer )
3082 if (n.value == TOK.int32Literal)
3084 setPackAlign(n);
3085 scan(&n);
3086 return closingParen();
3088 /* # pragma pack ( )
3090 if (n.value == TOK.rightParenthesis)
3092 packalign.setDefault();
3093 return closingParen();
3096 error(loc, "unrecognized `#pragma pack(%s)`", n.toChars());
3097 skipToNextLine();
3100 /***************************************
3101 * Scan forward to start of next line.
3103 private void skipToNextLine()
3105 while (1)
3107 switch (*p)
3109 case 0:
3110 case 0x1A:
3111 return; // do not advance p
3113 case '\n':
3114 ++p;
3115 break;
3117 case '\r':
3118 ++p;
3119 if (p[0] == '\n')
3120 ++p;
3121 break;
3123 default:
3124 if (*p & 0x80)
3126 const u = decodeUTF();
3127 if (u == PS || u == LS)
3129 ++p;
3130 break;
3133 ++p;
3134 continue;
3136 break;
3138 endOfLine();
3141 /********************************************
3142 * Decode UTF character.
3143 * Issue error messages for invalid sequences.
3144 * Return decoded character, advance p to last character in UTF sequence.
3146 private uint decodeUTF()
3148 const s = p;
3149 assert(*s & 0x80);
3150 // Check length of remaining string up to 4 UTF-8 characters
3151 size_t len;
3152 for (len = 1; len < 4 && s[len]; len++)
3155 size_t idx = 0;
3156 dchar u;
3157 const msg = utf_decodeChar(s[0 .. len], idx, u);
3158 p += idx - 1;
3159 if (msg)
3161 error("%.*s", cast(int)msg.length, msg.ptr);
3163 return u;
3166 /***************************************************
3167 * Parse doc comment embedded between t.ptr and p.
3168 * Remove trailing blanks and tabs from lines.
3169 * Replace all newlines with \n.
3170 * Remove leading comment character from each line.
3171 * Decide if it's a lineComment or a blockComment.
3172 * Append to previous one for this token.
3174 * If newParagraph is true, an extra newline will be
3175 * added between adjoining doc comments.
3177 private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
3179 /* ct tells us which kind of comment it is: '/', '*', or '+'
3181 const ct = t.ptr[2];
3182 /* Start of comment text skips over / * *, / + +, or / / /
3184 const(char)* q = t.ptr + 3; // start of comment text
3185 const(char)* qend = p;
3186 if (ct == '*' || ct == '+')
3187 qend -= 2;
3188 /* Scan over initial row of ****'s or ++++'s or ////'s
3190 for (; q < qend; q++)
3192 if (*q != ct)
3193 break;
3195 /* Remove leading spaces until start of the comment
3197 int linestart = 0;
3198 if (ct == '/')
3200 while (q < qend && (*q == ' ' || *q == '\t'))
3201 ++q;
3203 else if (q < qend)
3205 if (*q == '\r')
3207 ++q;
3208 if (q < qend && *q == '\n')
3209 ++q;
3210 linestart = 1;
3212 else if (*q == '\n')
3214 ++q;
3215 linestart = 1;
3218 /* Remove trailing row of ****'s or ++++'s
3220 if (ct != '/')
3222 for (; q < qend; qend--)
3224 if (qend[-1] != ct)
3225 break;
3228 /* Comment is now [q .. qend].
3229 * Canonicalize it into buf[].
3231 OutBuffer buf;
3233 void trimTrailingWhitespace()
3235 const s = buf[];
3236 auto len = s.length;
3237 while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
3238 --len;
3239 buf.setsize(len);
3242 for (; q < qend; q++)
3244 char c = *q;
3245 switch (c)
3247 case '*':
3248 case '+':
3249 if (linestart && c == ct)
3251 linestart = 0;
3252 /* Trim preceding whitespace up to preceding \n
3254 trimTrailingWhitespace();
3255 continue;
3257 break;
3258 case ' ':
3259 case '\t':
3260 break;
3261 case '\r':
3262 if (q[1] == '\n')
3263 continue; // skip the \r
3264 goto Lnewline;
3265 default:
3266 if (c == 226)
3268 // If LS or PS
3269 if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
3271 q += 2;
3272 goto Lnewline;
3275 linestart = 0;
3276 break;
3277 Lnewline:
3278 c = '\n'; // replace all newlines with \n
3279 goto case;
3280 case '\n':
3281 linestart = 1;
3282 /* Trim trailing whitespace
3284 trimTrailingWhitespace();
3285 break;
3287 buf.writeByte(c);
3289 /* Trim trailing whitespace (if the last line does not have newline)
3291 trimTrailingWhitespace();
3293 // Always end with a newline
3294 const s = buf[];
3295 if (s.length == 0 || s[$ - 1] != '\n')
3296 buf.writeByte('\n');
3298 // It's a line comment if the start of the doc comment comes
3299 // after other non-whitespace on the same line.
3300 auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
3301 // Combine with previous doc comment, if any
3302 if (*dc)
3303 *dc = combineComments(*dc, buf[], newParagraph).toDString();
3304 else
3305 *dc = buf.extractSlice(true);
3308 /********************************************
3309 * Combine two document comments into one,
3310 * separated by an extra newline if newParagraph is true.
3312 static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
3314 //printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
3315 const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
3316 if (!c1)
3317 return c2.ptr;
3318 if (!c2)
3319 return c1.ptr;
3321 int insertNewLine = 0;
3322 if (c1.length && c1[$ - 1] != '\n')
3323 insertNewLine = 1;
3324 const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
3325 auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
3326 p[0 .. c1.length] = c1[];
3327 if (insertNewLine)
3328 p[c1.length] = '\n';
3329 if (newParagraph)
3330 p[c1.length + insertNewLine] = '\n';
3331 p[retSize - c2.length .. retSize] = c2[];
3332 p[retSize] = 0;
3333 return p;
3336 /**************************
3337 * `p` should be at start of next line
3339 private void endOfLine() pure @nogc @safe
3341 scanloc.linnum++;
3342 line = p;
3346 /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
3347 private struct TimeStampInfo
3349 private __gshared bool initdone = false;
3351 // Note: Those properties need to be guarded by a call to `init`
3352 // The API isn't safe, and quite brittle, but it was left this way
3353 // over performance concerns.
3354 // This is currently only called once, from the lexer.
3355 __gshared char[11 + 1] date;
3356 __gshared char[8 + 1] time;
3357 __gshared char[24 + 1] timestamp;
3359 public static void initialize(const ref Loc loc) nothrow
3361 if (initdone)
3362 return;
3364 initdone = true;
3365 time_t ct;
3366 // https://issues.dlang.org/show_bug.cgi?id=20444
3367 if (auto p = getenv("SOURCE_DATE_EPOCH"))
3369 if (!ct.parseDigits(p.toDString()))
3370 error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
3372 else
3373 .time(&ct);
3374 const p = ctime(&ct);
3375 assert(p);
3376 sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
3377 sprintf(&time[0], "%.8s", p + 11);
3378 sprintf(&timestamp[0], "%.24s", p);
3382 unittest
3384 import dmd.console;
3385 nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
3386 const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
3388 assert(0);
3390 diagnosticHandler = &assertDiagnosticHandler;
3392 static void test(T)(string sequence, T expected, bool Ccompile = false)
3394 auto p = cast(const(char)*)sequence.ptr;
3395 assert(expected == Lexer.escapeSequence(Loc.initial, p, Ccompile));
3396 assert(p == sequence.ptr + sequence.length);
3399 test(`'`, '\'');
3400 test(`"`, '"');
3401 test(`?`, '?');
3402 test(`\`, '\\');
3403 test(`0`, '\0');
3404 test(`a`, '\a');
3405 test(`b`, '\b');
3406 test(`f`, '\f');
3407 test(`n`, '\n');
3408 test(`r`, '\r');
3409 test(`t`, '\t');
3410 test(`v`, '\v');
3412 test(`x00`, 0x00);
3413 test(`xff`, 0xff);
3414 test(`xFF`, 0xff);
3415 test(`xa7`, 0xa7);
3416 test(`x3c`, 0x3c);
3417 test(`xe2`, 0xe2);
3419 test(`1`, '\1');
3420 test(`42`, '\42');
3421 test(`357`, '\357');
3423 test(`u1234`, '\u1234');
3424 test(`uf0e4`, '\uf0e4');
3426 test(`U0001f603`, '\U0001f603');
3428 test(`&quot;`, '"');
3429 test(`&lt;`, '<');
3430 test(`&gt;`, '>');
3432 diagnosticHandler = null;
3434 unittest
3436 import dmd.console;
3437 string expected;
3438 bool gotError;
3440 nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
3441 const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
3443 assert(cast(Classification)headerColor == Classification.error);
3445 gotError = true;
3446 char[100] buffer = void;
3447 auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
3448 assert(expected == actual);
3449 return true;
3452 diagnosticHandler = &expectDiagnosticHandler;
3454 void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength, bool Ccompile = false)
3456 uint errors = global.errors;
3457 gotError = false;
3458 expected = expectedError;
3459 auto p = cast(const(char)*)sequence.ptr;
3460 auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p, Ccompile);
3461 assert(gotError);
3462 assert(expectedReturnValue == actualReturnValue);
3464 auto actualScanLength = p - sequence.ptr;
3465 assert(expectedScanLength == actualScanLength);
3466 global.errors = errors;
3469 test("c", `undefined escape sequence \c`, 'c', 1);
3470 test("!", `undefined escape sequence \!`, '!', 1);
3471 test("&quot;", `undefined escape sequence \&`, '&', 1, true);
3473 test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
3475 test("u1" , `escape hex sequence has 1 hex digits instead of 4`, 0x1, 2);
3476 test("u12" , `escape hex sequence has 2 hex digits instead of 4`, 0x12, 3);
3477 test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
3479 test("U0" , `escape hex sequence has 1 hex digits instead of 8`, 0x0, 2);
3480 test("U00" , `escape hex sequence has 2 hex digits instead of 8`, 0x00, 3);
3481 test("U000" , `escape hex sequence has 3 hex digits instead of 8`, 0x000, 4);
3482 test("U0000" , `escape hex sequence has 4 hex digits instead of 8`, 0x0000, 5);
3483 test("U0001f" , `escape hex sequence has 5 hex digits instead of 8`, 0x0001f, 6);
3484 test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`, 0x0001f6, 7);
3485 test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
3487 test("ud800" , `invalid UTF character \U0000d800`, '?', 5);
3488 test("udfff" , `invalid UTF character \U0000dfff`, '?', 5);
3489 test("U00110000", `invalid UTF character \U00110000`, '?', 9);
3491 test("xg0" , `undefined escape hex sequence \xg`, 'g', 2);
3492 test("ug000" , `undefined escape hex sequence \ug`, 'g', 2);
3493 test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
3495 test("&BAD;", `unnamed character entity &BAD;` , '?', 5);
3496 test("&quot", `unterminated named entity &quot;`, '?', 5);
3497 test("&quot", `unterminated named entity &quot;`, '?', 5);
3499 test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
3501 diagnosticHandler = null;