[lld/COFF] Demangle symbol name in discarded section relocation error message (#119726)
[llvm-project.git] / llvm / lib / AsmParser / LLLexer.cpp
blob1b8e033134f51bdf42072d6c5b71de1d72c140ba
1 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implement the Lexer for .ll files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/AsmParser/LLLexer.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/IR/DerivedTypes.h"
19 #include "llvm/IR/Instruction.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/SourceMgr.h"
22 #include <cassert>
23 #include <cctype>
24 #include <cstdio>
26 using namespace llvm;
28 // Both the lexer and parser can issue error messages. If the lexer issues a
29 // lexer error, since we do not terminate execution immediately, usually that
30 // is followed by the parser issuing a parser error. However, the error issued
31 // by the lexer is more relevant in that case as opposed to potentially more
32 // generic parser error. So instead of always recording the last error message
33 // use the `Priority` to establish a priority, with Lexer > Parser > None. We
34 // record the issued message only if the message has same or higher priority
35 // than the existing one. This prevents lexer errors from being overwritten by
36 // parser errors.
37 void LLLexer::Error(LocTy ErrorLoc, const Twine &Msg,
38 LLLexer::ErrorPriority Priority) {
39 if (Priority < ErrorInfo.Priority)
40 return;
41 ErrorInfo.Error = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
42 ErrorInfo.Priority = Priority;
45 void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
46 SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
49 //===----------------------------------------------------------------------===//
50 // Helper functions.
51 //===----------------------------------------------------------------------===//
53 // atoull - Convert an ascii string of decimal digits into the unsigned long
54 // long representation... this does not have to do input error checking,
55 // because we know that the input will be matched by a suitable regex...
57 uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
58 uint64_t Result = 0;
59 for (; Buffer != End; Buffer++) {
60 uint64_t OldRes = Result;
61 Result *= 10;
62 Result += *Buffer-'0';
63 if (Result < OldRes) { // overflow detected.
64 LexError("constant bigger than 64 bits detected");
65 return 0;
68 return Result;
71 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
72 uint64_t Result = 0;
73 for (; Buffer != End; ++Buffer) {
74 uint64_t OldRes = Result;
75 Result *= 16;
76 Result += hexDigitValue(*Buffer);
78 if (Result < OldRes) { // overflow detected.
79 LexError("constant bigger than 64 bits detected");
80 return 0;
83 return Result;
86 void LLLexer::HexToIntPair(const char *Buffer, const char *End,
87 uint64_t Pair[2]) {
88 Pair[0] = 0;
89 if (End - Buffer >= 16) {
90 for (int i = 0; i < 16; i++, Buffer++) {
91 assert(Buffer != End);
92 Pair[0] *= 16;
93 Pair[0] += hexDigitValue(*Buffer);
96 Pair[1] = 0;
97 for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
98 Pair[1] *= 16;
99 Pair[1] += hexDigitValue(*Buffer);
101 if (Buffer != End)
102 LexError("constant bigger than 128 bits detected");
105 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
106 /// { low64, high16 } as usual for an APInt.
107 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
108 uint64_t Pair[2]) {
109 Pair[1] = 0;
110 for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
111 assert(Buffer != End);
112 Pair[1] *= 16;
113 Pair[1] += hexDigitValue(*Buffer);
115 Pair[0] = 0;
116 for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
117 Pair[0] *= 16;
118 Pair[0] += hexDigitValue(*Buffer);
120 if (Buffer != End)
121 LexError("constant bigger than 128 bits detected");
124 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
125 // appropriate character.
126 static void UnEscapeLexed(std::string &Str) {
127 if (Str.empty()) return;
129 char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
130 char *BOut = Buffer;
131 for (char *BIn = Buffer; BIn != EndBuffer; ) {
132 if (BIn[0] == '\\') {
133 if (BIn < EndBuffer-1 && BIn[1] == '\\') {
134 *BOut++ = '\\'; // Two \ becomes one
135 BIn += 2;
136 } else if (BIn < EndBuffer-2 &&
137 isxdigit(static_cast<unsigned char>(BIn[1])) &&
138 isxdigit(static_cast<unsigned char>(BIn[2]))) {
139 *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
140 BIn += 3; // Skip over handled chars
141 ++BOut;
142 } else {
143 *BOut++ = *BIn++;
145 } else {
146 *BOut++ = *BIn++;
149 Str.resize(BOut-Buffer);
152 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
153 static bool isLabelChar(char C) {
154 return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
155 C == '.' || C == '_';
158 /// isLabelTail - Return true if this pointer points to a valid end of a label.
159 static const char *isLabelTail(const char *CurPtr) {
160 while (true) {
161 if (CurPtr[0] == ':') return CurPtr+1;
162 if (!isLabelChar(CurPtr[0])) return nullptr;
163 ++CurPtr;
167 //===----------------------------------------------------------------------===//
168 // Lexer definition.
169 //===----------------------------------------------------------------------===//
171 LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
172 LLVMContext &C)
173 : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) {
174 CurPtr = CurBuf.begin();
177 int LLLexer::getNextChar() {
178 char CurChar = *CurPtr++;
179 switch (CurChar) {
180 default: return (unsigned char)CurChar;
181 case 0:
182 // A nul character in the stream is either the end of the current buffer or
183 // a random nul in the file. Disambiguate that here.
184 if (CurPtr-1 != CurBuf.end())
185 return 0; // Just whitespace.
187 // Otherwise, return end of file.
188 --CurPtr; // Another call to lex will return EOF again.
189 return EOF;
193 lltok::Kind LLLexer::LexToken() {
194 while (true) {
195 TokStart = CurPtr;
197 int CurChar = getNextChar();
198 switch (CurChar) {
199 default:
200 // Handle letters: [a-zA-Z_]
201 if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
202 return LexIdentifier();
203 return lltok::Error;
204 case EOF: return lltok::Eof;
205 case 0:
206 case ' ':
207 case '\t':
208 case '\n':
209 case '\r':
210 // Ignore whitespace.
211 continue;
212 case '+': return LexPositive();
213 case '@': return LexAt();
214 case '$': return LexDollar();
215 case '%': return LexPercent();
216 case '"': return LexQuote();
217 case '.':
218 if (const char *Ptr = isLabelTail(CurPtr)) {
219 CurPtr = Ptr;
220 StrVal.assign(TokStart, CurPtr-1);
221 return lltok::LabelStr;
223 if (CurPtr[0] == '.' && CurPtr[1] == '.') {
224 CurPtr += 2;
225 return lltok::dotdotdot;
227 return lltok::Error;
228 case ';':
229 SkipLineComment();
230 continue;
231 case '!': return LexExclaim();
232 case '^':
233 return LexCaret();
234 case ':':
235 return lltok::colon;
236 case '#': return LexHash();
237 case '0': case '1': case '2': case '3': case '4':
238 case '5': case '6': case '7': case '8': case '9':
239 case '-':
240 return LexDigitOrNegative();
241 case '=': return lltok::equal;
242 case '[': return lltok::lsquare;
243 case ']': return lltok::rsquare;
244 case '{': return lltok::lbrace;
245 case '}': return lltok::rbrace;
246 case '<': return lltok::less;
247 case '>': return lltok::greater;
248 case '(': return lltok::lparen;
249 case ')': return lltok::rparen;
250 case ',': return lltok::comma;
251 case '*': return lltok::star;
252 case '|': return lltok::bar;
253 case '/':
254 if (getNextChar() != '*')
255 return lltok::Error;
256 if (SkipCComment())
257 return lltok::Error;
258 continue;
263 void LLLexer::SkipLineComment() {
264 while (true) {
265 if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
266 return;
270 /// This skips C-style /**/ comments. Returns true if there
271 /// was an error.
272 bool LLLexer::SkipCComment() {
273 while (true) {
274 int CurChar = getNextChar();
275 switch (CurChar) {
276 case EOF:
277 LexError("unterminated comment");
278 return true;
279 case '*':
280 // End of the comment?
281 CurChar = getNextChar();
282 if (CurChar == '/')
283 return false;
284 if (CurChar == EOF) {
285 LexError("unterminated comment");
286 return true;
292 /// Lex all tokens that start with an @ character.
293 /// GlobalVar @\"[^\"]*\"
294 /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
295 /// GlobalVarID @[0-9]+
296 lltok::Kind LLLexer::LexAt() {
297 return LexVar(lltok::GlobalVar, lltok::GlobalID);
300 lltok::Kind LLLexer::LexDollar() {
301 if (const char *Ptr = isLabelTail(TokStart)) {
302 CurPtr = Ptr;
303 StrVal.assign(TokStart, CurPtr - 1);
304 return lltok::LabelStr;
307 // Handle DollarStringConstant: $\"[^\"]*\"
308 if (CurPtr[0] == '"') {
309 ++CurPtr;
311 while (true) {
312 int CurChar = getNextChar();
314 if (CurChar == EOF) {
315 LexError("end of file in COMDAT variable name");
316 return lltok::Error;
318 if (CurChar == '"') {
319 StrVal.assign(TokStart + 2, CurPtr - 1);
320 UnEscapeLexed(StrVal);
321 if (StringRef(StrVal).contains(0)) {
322 LexError("NUL character is not allowed in names");
323 return lltok::Error;
325 return lltok::ComdatVar;
330 // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
331 if (ReadVarName())
332 return lltok::ComdatVar;
334 return lltok::Error;
337 /// ReadString - Read a string until the closing quote.
338 lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
339 const char *Start = CurPtr;
340 while (true) {
341 int CurChar = getNextChar();
343 if (CurChar == EOF) {
344 LexError("end of file in string constant");
345 return lltok::Error;
347 if (CurChar == '"') {
348 StrVal.assign(Start, CurPtr-1);
349 UnEscapeLexed(StrVal);
350 return kind;
355 /// ReadVarName - Read the rest of a token containing a variable name.
356 bool LLLexer::ReadVarName() {
357 const char *NameStart = CurPtr;
358 if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
359 CurPtr[0] == '-' || CurPtr[0] == '$' ||
360 CurPtr[0] == '.' || CurPtr[0] == '_') {
361 ++CurPtr;
362 while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
363 CurPtr[0] == '-' || CurPtr[0] == '$' ||
364 CurPtr[0] == '.' || CurPtr[0] == '_')
365 ++CurPtr;
367 StrVal.assign(NameStart, CurPtr);
368 return true;
370 return false;
373 // Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
374 // returned, otherwise the Error token is returned.
375 lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
376 if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
377 return lltok::Error;
379 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
380 /*empty*/;
382 uint64_t Val = atoull(TokStart + 1, CurPtr);
383 if ((unsigned)Val != Val)
384 LexError("invalid value number (too large)");
385 UIntVal = unsigned(Val);
386 return Token;
389 lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
390 // Handle StringConstant: \"[^\"]*\"
391 if (CurPtr[0] == '"') {
392 ++CurPtr;
394 while (true) {
395 int CurChar = getNextChar();
397 if (CurChar == EOF) {
398 LexError("end of file in global variable name");
399 return lltok::Error;
401 if (CurChar == '"') {
402 StrVal.assign(TokStart+2, CurPtr-1);
403 UnEscapeLexed(StrVal);
404 if (StringRef(StrVal).contains(0)) {
405 LexError("NUL character is not allowed in names");
406 return lltok::Error;
408 return Var;
413 // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
414 if (ReadVarName())
415 return Var;
417 // Handle VarID: [0-9]+
418 return LexUIntID(VarID);
421 /// Lex all tokens that start with a % character.
422 /// LocalVar ::= %\"[^\"]*\"
423 /// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
424 /// LocalVarID ::= %[0-9]+
425 lltok::Kind LLLexer::LexPercent() {
426 return LexVar(lltok::LocalVar, lltok::LocalVarID);
429 /// Lex all tokens that start with a " character.
430 /// QuoteLabel "[^"]+":
431 /// StringConstant "[^"]*"
432 lltok::Kind LLLexer::LexQuote() {
433 lltok::Kind kind = ReadString(lltok::StringConstant);
434 if (kind == lltok::Error || kind == lltok::Eof)
435 return kind;
437 if (CurPtr[0] == ':') {
438 ++CurPtr;
439 if (StringRef(StrVal).contains(0)) {
440 LexError("NUL character is not allowed in names");
441 kind = lltok::Error;
442 } else {
443 kind = lltok::LabelStr;
447 return kind;
450 /// Lex all tokens that start with a ! character.
451 /// !foo
452 /// !
453 lltok::Kind LLLexer::LexExclaim() {
454 // Lex a metadata name as a MetadataVar.
455 if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
456 CurPtr[0] == '-' || CurPtr[0] == '$' ||
457 CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
458 ++CurPtr;
459 while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
460 CurPtr[0] == '-' || CurPtr[0] == '$' ||
461 CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
462 ++CurPtr;
464 StrVal.assign(TokStart+1, CurPtr); // Skip !
465 UnEscapeLexed(StrVal);
466 return lltok::MetadataVar;
468 return lltok::exclaim;
471 /// Lex all tokens that start with a ^ character.
472 /// SummaryID ::= ^[0-9]+
473 lltok::Kind LLLexer::LexCaret() {
474 // Handle SummaryID: ^[0-9]+
475 return LexUIntID(lltok::SummaryID);
478 /// Lex all tokens that start with a # character.
479 /// AttrGrpID ::= #[0-9]+
480 /// Hash ::= #
481 lltok::Kind LLLexer::LexHash() {
482 // Handle AttrGrpID: #[0-9]+
483 if (isdigit(static_cast<unsigned char>(CurPtr[0])))
484 return LexUIntID(lltok::AttrGrpID);
485 return lltok::hash;
488 /// Lex a label, integer type, keyword, or hexadecimal integer constant.
489 /// Label [-a-zA-Z$._0-9]+:
490 /// IntegerType i[0-9]+
491 /// Keyword sdiv, float, ...
492 /// HexIntConstant [us]0x[0-9A-Fa-f]+
493 lltok::Kind LLLexer::LexIdentifier() {
494 const char *StartChar = CurPtr;
495 const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
496 const char *KeywordEnd = nullptr;
498 for (; isLabelChar(*CurPtr); ++CurPtr) {
499 // If we decide this is an integer, remember the end of the sequence.
500 if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
501 IntEnd = CurPtr;
502 if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
503 *CurPtr != '_')
504 KeywordEnd = CurPtr;
507 // If we stopped due to a colon, unless we were directed to ignore it,
508 // this really is a label.
509 if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
510 StrVal.assign(StartChar-1, CurPtr++);
511 return lltok::LabelStr;
514 // Otherwise, this wasn't a label. If this was valid as an integer type,
515 // return it.
516 if (!IntEnd) IntEnd = CurPtr;
517 if (IntEnd != StartChar) {
518 CurPtr = IntEnd;
519 uint64_t NumBits = atoull(StartChar, CurPtr);
520 if (NumBits < IntegerType::MIN_INT_BITS ||
521 NumBits > IntegerType::MAX_INT_BITS) {
522 LexError("bitwidth for integer type out of range");
523 return lltok::Error;
525 TyVal = IntegerType::get(Context, NumBits);
526 return lltok::Type;
529 // Otherwise, this was a letter sequence. See which keyword this is.
530 if (!KeywordEnd) KeywordEnd = CurPtr;
531 CurPtr = KeywordEnd;
532 --StartChar;
533 StringRef Keyword(StartChar, CurPtr - StartChar);
535 #define KEYWORD(STR) \
536 do { \
537 if (Keyword == #STR) \
538 return lltok::kw_##STR; \
539 } while (false)
541 KEYWORD(true); KEYWORD(false);
542 KEYWORD(declare); KEYWORD(define);
543 KEYWORD(global); KEYWORD(constant);
545 KEYWORD(dso_local);
546 KEYWORD(dso_preemptable);
548 KEYWORD(private);
549 KEYWORD(internal);
550 KEYWORD(available_externally);
551 KEYWORD(linkonce);
552 KEYWORD(linkonce_odr);
553 KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
554 KEYWORD(weak_odr);
555 KEYWORD(appending);
556 KEYWORD(dllimport);
557 KEYWORD(dllexport);
558 KEYWORD(common);
559 KEYWORD(default);
560 KEYWORD(hidden);
561 KEYWORD(protected);
562 KEYWORD(unnamed_addr);
563 KEYWORD(local_unnamed_addr);
564 KEYWORD(externally_initialized);
565 KEYWORD(extern_weak);
566 KEYWORD(external);
567 KEYWORD(thread_local);
568 KEYWORD(localdynamic);
569 KEYWORD(initialexec);
570 KEYWORD(localexec);
571 KEYWORD(zeroinitializer);
572 KEYWORD(undef);
573 KEYWORD(null);
574 KEYWORD(none);
575 KEYWORD(poison);
576 KEYWORD(to);
577 KEYWORD(caller);
578 KEYWORD(within);
579 KEYWORD(from);
580 KEYWORD(tail);
581 KEYWORD(musttail);
582 KEYWORD(notail);
583 KEYWORD(target);
584 KEYWORD(triple);
585 KEYWORD(source_filename);
586 KEYWORD(unwind);
587 KEYWORD(datalayout);
588 KEYWORD(volatile);
589 KEYWORD(atomic);
590 KEYWORD(unordered);
591 KEYWORD(monotonic);
592 KEYWORD(acquire);
593 KEYWORD(release);
594 KEYWORD(acq_rel);
595 KEYWORD(seq_cst);
596 KEYWORD(syncscope);
598 KEYWORD(nnan);
599 KEYWORD(ninf);
600 KEYWORD(nsz);
601 KEYWORD(arcp);
602 KEYWORD(contract);
603 KEYWORD(reassoc);
604 KEYWORD(afn);
605 KEYWORD(fast);
606 KEYWORD(nuw);
607 KEYWORD(nsw);
608 KEYWORD(nusw);
609 KEYWORD(exact);
610 KEYWORD(disjoint);
611 KEYWORD(inbounds);
612 KEYWORD(nneg);
613 KEYWORD(samesign);
614 KEYWORD(inrange);
615 KEYWORD(addrspace);
616 KEYWORD(section);
617 KEYWORD(partition);
618 KEYWORD(code_model);
619 KEYWORD(alias);
620 KEYWORD(ifunc);
621 KEYWORD(module);
622 KEYWORD(asm);
623 KEYWORD(sideeffect);
624 KEYWORD(inteldialect);
625 KEYWORD(gc);
626 KEYWORD(prefix);
627 KEYWORD(prologue);
629 KEYWORD(no_sanitize_address);
630 KEYWORD(no_sanitize_hwaddress);
631 KEYWORD(sanitize_address_dyninit);
633 KEYWORD(ccc);
634 KEYWORD(fastcc);
635 KEYWORD(coldcc);
636 KEYWORD(cfguard_checkcc);
637 KEYWORD(x86_stdcallcc);
638 KEYWORD(x86_fastcallcc);
639 KEYWORD(x86_thiscallcc);
640 KEYWORD(x86_vectorcallcc);
641 KEYWORD(arm_apcscc);
642 KEYWORD(arm_aapcscc);
643 KEYWORD(arm_aapcs_vfpcc);
644 KEYWORD(aarch64_vector_pcs);
645 KEYWORD(aarch64_sve_vector_pcs);
646 KEYWORD(aarch64_sme_preservemost_from_x0);
647 KEYWORD(aarch64_sme_preservemost_from_x1);
648 KEYWORD(aarch64_sme_preservemost_from_x2);
649 KEYWORD(msp430_intrcc);
650 KEYWORD(avr_intrcc);
651 KEYWORD(avr_signalcc);
652 KEYWORD(ptx_kernel);
653 KEYWORD(ptx_device);
654 KEYWORD(spir_kernel);
655 KEYWORD(spir_func);
656 KEYWORD(intel_ocl_bicc);
657 KEYWORD(x86_64_sysvcc);
658 KEYWORD(win64cc);
659 KEYWORD(x86_regcallcc);
660 KEYWORD(swiftcc);
661 KEYWORD(swifttailcc);
662 KEYWORD(anyregcc);
663 KEYWORD(preserve_mostcc);
664 KEYWORD(preserve_allcc);
665 KEYWORD(preserve_nonecc);
666 KEYWORD(ghccc);
667 KEYWORD(x86_intrcc);
668 KEYWORD(hhvmcc);
669 KEYWORD(hhvm_ccc);
670 KEYWORD(cxx_fast_tlscc);
671 KEYWORD(amdgpu_vs);
672 KEYWORD(amdgpu_ls);
673 KEYWORD(amdgpu_hs);
674 KEYWORD(amdgpu_es);
675 KEYWORD(amdgpu_gs);
676 KEYWORD(amdgpu_ps);
677 KEYWORD(amdgpu_cs);
678 KEYWORD(amdgpu_cs_chain);
679 KEYWORD(amdgpu_cs_chain_preserve);
680 KEYWORD(amdgpu_kernel);
681 KEYWORD(amdgpu_gfx);
682 KEYWORD(tailcc);
683 KEYWORD(m68k_rtdcc);
684 KEYWORD(graalcc);
685 KEYWORD(riscv_vector_cc);
687 KEYWORD(cc);
688 KEYWORD(c);
690 KEYWORD(attributes);
691 KEYWORD(sync);
692 KEYWORD(async);
694 #define GET_ATTR_NAMES
695 #define ATTRIBUTE_ENUM(ENUM_NAME, DISPLAY_NAME) \
696 KEYWORD(DISPLAY_NAME);
697 #include "llvm/IR/Attributes.inc"
699 KEYWORD(read);
700 KEYWORD(write);
701 KEYWORD(readwrite);
702 KEYWORD(argmem);
703 KEYWORD(inaccessiblemem);
704 KEYWORD(argmemonly);
705 KEYWORD(inaccessiblememonly);
706 KEYWORD(inaccessiblemem_or_argmemonly);
708 // nofpclass attribute
709 KEYWORD(all);
710 KEYWORD(nan);
711 KEYWORD(snan);
712 KEYWORD(qnan);
713 KEYWORD(inf);
714 // ninf already a keyword
715 KEYWORD(pinf);
716 KEYWORD(norm);
717 KEYWORD(nnorm);
718 KEYWORD(pnorm);
719 // sub already a keyword
720 KEYWORD(nsub);
721 KEYWORD(psub);
722 KEYWORD(zero);
723 KEYWORD(nzero);
724 KEYWORD(pzero);
726 KEYWORD(type);
727 KEYWORD(opaque);
729 KEYWORD(comdat);
731 // Comdat types
732 KEYWORD(any);
733 KEYWORD(exactmatch);
734 KEYWORD(largest);
735 KEYWORD(nodeduplicate);
736 KEYWORD(samesize);
738 KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
739 KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
740 KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
741 KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
743 KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
744 KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
745 KEYWORD(uinc_wrap);
746 KEYWORD(udec_wrap);
747 KEYWORD(usub_cond);
748 KEYWORD(usub_sat);
750 KEYWORD(splat);
751 KEYWORD(vscale);
752 KEYWORD(x);
753 KEYWORD(blockaddress);
754 KEYWORD(dso_local_equivalent);
755 KEYWORD(no_cfi);
756 KEYWORD(ptrauth);
758 // Metadata types.
759 KEYWORD(distinct);
761 // Use-list order directives.
762 KEYWORD(uselistorder);
763 KEYWORD(uselistorder_bb);
765 KEYWORD(personality);
766 KEYWORD(cleanup);
767 KEYWORD(catch);
768 KEYWORD(filter);
770 // Summary index keywords.
771 KEYWORD(path);
772 KEYWORD(hash);
773 KEYWORD(gv);
774 KEYWORD(guid);
775 KEYWORD(name);
776 KEYWORD(summaries);
777 KEYWORD(flags);
778 KEYWORD(blockcount);
779 KEYWORD(linkage);
780 KEYWORD(visibility);
781 KEYWORD(notEligibleToImport);
782 KEYWORD(live);
783 KEYWORD(dsoLocal);
784 KEYWORD(canAutoHide);
785 KEYWORD(importType);
786 KEYWORD(definition);
787 KEYWORD(declaration);
788 KEYWORD(function);
789 KEYWORD(insts);
790 KEYWORD(funcFlags);
791 KEYWORD(readNone);
792 KEYWORD(readOnly);
793 KEYWORD(noRecurse);
794 KEYWORD(returnDoesNotAlias);
795 KEYWORD(noInline);
796 KEYWORD(alwaysInline);
797 KEYWORD(noUnwind);
798 KEYWORD(mayThrow);
799 KEYWORD(hasUnknownCall);
800 KEYWORD(mustBeUnreachable);
801 KEYWORD(calls);
802 KEYWORD(callee);
803 KEYWORD(params);
804 KEYWORD(param);
805 KEYWORD(hotness);
806 KEYWORD(unknown);
807 KEYWORD(critical);
808 KEYWORD(relbf);
809 KEYWORD(variable);
810 KEYWORD(vTableFuncs);
811 KEYWORD(virtFunc);
812 KEYWORD(aliasee);
813 KEYWORD(refs);
814 KEYWORD(typeIdInfo);
815 KEYWORD(typeTests);
816 KEYWORD(typeTestAssumeVCalls);
817 KEYWORD(typeCheckedLoadVCalls);
818 KEYWORD(typeTestAssumeConstVCalls);
819 KEYWORD(typeCheckedLoadConstVCalls);
820 KEYWORD(vFuncId);
821 KEYWORD(offset);
822 KEYWORD(args);
823 KEYWORD(typeid);
824 KEYWORD(typeidCompatibleVTable);
825 KEYWORD(summary);
826 KEYWORD(typeTestRes);
827 KEYWORD(kind);
828 KEYWORD(unsat);
829 KEYWORD(byteArray);
830 KEYWORD(inline);
831 KEYWORD(single);
832 KEYWORD(allOnes);
833 KEYWORD(sizeM1BitWidth);
834 KEYWORD(alignLog2);
835 KEYWORD(sizeM1);
836 KEYWORD(bitMask);
837 KEYWORD(inlineBits);
838 KEYWORD(vcall_visibility);
839 KEYWORD(wpdResolutions);
840 KEYWORD(wpdRes);
841 KEYWORD(indir);
842 KEYWORD(singleImpl);
843 KEYWORD(branchFunnel);
844 KEYWORD(singleImplName);
845 KEYWORD(resByArg);
846 KEYWORD(byArg);
847 KEYWORD(uniformRetVal);
848 KEYWORD(uniqueRetVal);
849 KEYWORD(virtualConstProp);
850 KEYWORD(info);
851 KEYWORD(byte);
852 KEYWORD(bit);
853 KEYWORD(varFlags);
854 KEYWORD(callsites);
855 KEYWORD(clones);
856 KEYWORD(stackIds);
857 KEYWORD(allocs);
858 KEYWORD(versions);
859 KEYWORD(memProf);
860 KEYWORD(notcold);
862 #undef KEYWORD
864 // Keywords for types.
865 #define TYPEKEYWORD(STR, LLVMTY) \
866 do { \
867 if (Keyword == STR) { \
868 TyVal = LLVMTY; \
869 return lltok::Type; \
871 } while (false)
873 TYPEKEYWORD("void", Type::getVoidTy(Context));
874 TYPEKEYWORD("half", Type::getHalfTy(Context));
875 TYPEKEYWORD("bfloat", Type::getBFloatTy(Context));
876 TYPEKEYWORD("float", Type::getFloatTy(Context));
877 TYPEKEYWORD("double", Type::getDoubleTy(Context));
878 TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
879 TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
880 TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
881 TYPEKEYWORD("label", Type::getLabelTy(Context));
882 TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
883 TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
884 TYPEKEYWORD("token", Type::getTokenTy(Context));
885 TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
887 #undef TYPEKEYWORD
889 // Keywords for instructions.
890 #define INSTKEYWORD(STR, Enum) \
891 do { \
892 if (Keyword == #STR) { \
893 UIntVal = Instruction::Enum; \
894 return lltok::kw_##STR; \
896 } while (false)
898 INSTKEYWORD(fneg, FNeg);
900 INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
901 INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
902 INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
903 INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
904 INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
905 INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
906 INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
907 INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
909 INSTKEYWORD(phi, PHI);
910 INSTKEYWORD(call, Call);
911 INSTKEYWORD(trunc, Trunc);
912 INSTKEYWORD(zext, ZExt);
913 INSTKEYWORD(sext, SExt);
914 INSTKEYWORD(fptrunc, FPTrunc);
915 INSTKEYWORD(fpext, FPExt);
916 INSTKEYWORD(uitofp, UIToFP);
917 INSTKEYWORD(sitofp, SIToFP);
918 INSTKEYWORD(fptoui, FPToUI);
919 INSTKEYWORD(fptosi, FPToSI);
920 INSTKEYWORD(inttoptr, IntToPtr);
921 INSTKEYWORD(ptrtoint, PtrToInt);
922 INSTKEYWORD(bitcast, BitCast);
923 INSTKEYWORD(addrspacecast, AddrSpaceCast);
924 INSTKEYWORD(select, Select);
925 INSTKEYWORD(va_arg, VAArg);
926 INSTKEYWORD(ret, Ret);
927 INSTKEYWORD(br, Br);
928 INSTKEYWORD(switch, Switch);
929 INSTKEYWORD(indirectbr, IndirectBr);
930 INSTKEYWORD(invoke, Invoke);
931 INSTKEYWORD(resume, Resume);
932 INSTKEYWORD(unreachable, Unreachable);
933 INSTKEYWORD(callbr, CallBr);
935 INSTKEYWORD(alloca, Alloca);
936 INSTKEYWORD(load, Load);
937 INSTKEYWORD(store, Store);
938 INSTKEYWORD(cmpxchg, AtomicCmpXchg);
939 INSTKEYWORD(atomicrmw, AtomicRMW);
940 INSTKEYWORD(fence, Fence);
941 INSTKEYWORD(getelementptr, GetElementPtr);
943 INSTKEYWORD(extractelement, ExtractElement);
944 INSTKEYWORD(insertelement, InsertElement);
945 INSTKEYWORD(shufflevector, ShuffleVector);
946 INSTKEYWORD(extractvalue, ExtractValue);
947 INSTKEYWORD(insertvalue, InsertValue);
948 INSTKEYWORD(landingpad, LandingPad);
949 INSTKEYWORD(cleanupret, CleanupRet);
950 INSTKEYWORD(catchret, CatchRet);
951 INSTKEYWORD(catchswitch, CatchSwitch);
952 INSTKEYWORD(catchpad, CatchPad);
953 INSTKEYWORD(cleanuppad, CleanupPad);
955 INSTKEYWORD(freeze, Freeze);
957 #undef INSTKEYWORD
959 #define DWKEYWORD(TYPE, TOKEN) \
960 do { \
961 if (Keyword.starts_with("DW_" #TYPE "_")) { \
962 StrVal.assign(Keyword.begin(), Keyword.end()); \
963 return lltok::TOKEN; \
965 } while (false)
967 DWKEYWORD(TAG, DwarfTag);
968 DWKEYWORD(ATE, DwarfAttEncoding);
969 DWKEYWORD(VIRTUALITY, DwarfVirtuality);
970 DWKEYWORD(LANG, DwarfLang);
971 DWKEYWORD(CC, DwarfCC);
972 DWKEYWORD(OP, DwarfOp);
973 DWKEYWORD(MACINFO, DwarfMacinfo);
975 #undef DWKEYWORD
977 // Keywords for debug record types.
978 #define DBGRECORDTYPEKEYWORD(STR) \
979 do { \
980 if (Keyword == "dbg_" #STR) { \
981 StrVal = #STR; \
982 return lltok::DbgRecordType; \
984 } while (false)
986 DBGRECORDTYPEKEYWORD(value);
987 DBGRECORDTYPEKEYWORD(declare);
988 DBGRECORDTYPEKEYWORD(assign);
989 DBGRECORDTYPEKEYWORD(label);
990 #undef DBGRECORDTYPEKEYWORD
992 if (Keyword.starts_with("DIFlag")) {
993 StrVal.assign(Keyword.begin(), Keyword.end());
994 return lltok::DIFlag;
997 if (Keyword.starts_with("DISPFlag")) {
998 StrVal.assign(Keyword.begin(), Keyword.end());
999 return lltok::DISPFlag;
1002 if (Keyword.starts_with("CSK_")) {
1003 StrVal.assign(Keyword.begin(), Keyword.end());
1004 return lltok::ChecksumKind;
1007 if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
1008 Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
1009 StrVal.assign(Keyword.begin(), Keyword.end());
1010 return lltok::EmissionKind;
1013 if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" ||
1014 Keyword == "Default") {
1015 StrVal.assign(Keyword.begin(), Keyword.end());
1016 return lltok::NameTableKind;
1019 // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
1020 // the CFE to avoid forcing it to deal with 64-bit numbers.
1021 if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
1022 TokStart[1] == '0' && TokStart[2] == 'x' &&
1023 isxdigit(static_cast<unsigned char>(TokStart[3]))) {
1024 int len = CurPtr-TokStart-3;
1025 uint32_t bits = len * 4;
1026 StringRef HexStr(TokStart + 3, len);
1027 if (!all_of(HexStr, isxdigit)) {
1028 // Bad token, return it as an error.
1029 CurPtr = TokStart+3;
1030 return lltok::Error;
1032 APInt Tmp(bits, HexStr, 16);
1033 uint32_t activeBits = Tmp.getActiveBits();
1034 if (activeBits > 0 && activeBits < bits)
1035 Tmp = Tmp.trunc(activeBits);
1036 APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
1037 return lltok::APSInt;
1040 // If this is "cc1234", return this as just "cc".
1041 if (TokStart[0] == 'c' && TokStart[1] == 'c') {
1042 CurPtr = TokStart+2;
1043 return lltok::kw_cc;
1046 // Finally, if this isn't known, return an error.
1047 CurPtr = TokStart+1;
1048 return lltok::Error;
1051 /// Lex all tokens that start with a 0x prefix, knowing they match and are not
1052 /// labels.
1053 /// HexFPConstant 0x[0-9A-Fa-f]+
1054 /// HexFP80Constant 0xK[0-9A-Fa-f]+
1055 /// HexFP128Constant 0xL[0-9A-Fa-f]+
1056 /// HexPPC128Constant 0xM[0-9A-Fa-f]+
1057 /// HexHalfConstant 0xH[0-9A-Fa-f]+
1058 /// HexBFloatConstant 0xR[0-9A-Fa-f]+
1059 lltok::Kind LLLexer::Lex0x() {
1060 CurPtr = TokStart + 2;
1062 char Kind;
1063 if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H' ||
1064 CurPtr[0] == 'R') {
1065 Kind = *CurPtr++;
1066 } else {
1067 Kind = 'J';
1070 if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
1071 // Bad token, return it as an error.
1072 CurPtr = TokStart+1;
1073 return lltok::Error;
1076 while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
1077 ++CurPtr;
1079 if (Kind == 'J') {
1080 // HexFPConstant - Floating point constant represented in IEEE format as a
1081 // hexadecimal number for when exponential notation is not precise enough.
1082 // Half, BFloat, Float, and double only.
1083 APFloatVal = APFloat(APFloat::IEEEdouble(),
1084 APInt(64, HexIntToVal(TokStart + 2, CurPtr)));
1085 return lltok::APFloat;
1088 uint64_t Pair[2];
1089 switch (Kind) {
1090 default: llvm_unreachable("Unknown kind!");
1091 case 'K':
1092 // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
1093 FP80HexToIntPair(TokStart+3, CurPtr, Pair);
1094 APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair));
1095 return lltok::APFloat;
1096 case 'L':
1097 // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
1098 HexToIntPair(TokStart+3, CurPtr, Pair);
1099 APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair));
1100 return lltok::APFloat;
1101 case 'M':
1102 // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
1103 HexToIntPair(TokStart+3, CurPtr, Pair);
1104 APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair));
1105 return lltok::APFloat;
1106 case 'H':
1107 APFloatVal = APFloat(APFloat::IEEEhalf(),
1108 APInt(16,HexIntToVal(TokStart+3, CurPtr)));
1109 return lltok::APFloat;
1110 case 'R':
1111 // Brain floating point
1112 APFloatVal = APFloat(APFloat::BFloat(),
1113 APInt(16, HexIntToVal(TokStart + 3, CurPtr)));
1114 return lltok::APFloat;
1118 /// Lex tokens for a label or a numeric constant, possibly starting with -.
1119 /// Label [-a-zA-Z$._0-9]+:
1120 /// NInteger -[0-9]+
1121 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1122 /// PInteger [0-9]+
1123 /// HexFPConstant 0x[0-9A-Fa-f]+
1124 /// HexFP80Constant 0xK[0-9A-Fa-f]+
1125 /// HexFP128Constant 0xL[0-9A-Fa-f]+
1126 /// HexPPC128Constant 0xM[0-9A-Fa-f]+
1127 lltok::Kind LLLexer::LexDigitOrNegative() {
1128 // If the letter after the negative is not a number, this is probably a label.
1129 if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
1130 !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
1131 // Okay, this is not a number after the -, it's probably a label.
1132 if (const char *End = isLabelTail(CurPtr)) {
1133 StrVal.assign(TokStart, End-1);
1134 CurPtr = End;
1135 return lltok::LabelStr;
1138 return lltok::Error;
1141 // At this point, it is either a label, int or fp constant.
1143 // Skip digits, we have at least one.
1144 for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1145 /*empty*/;
1147 // Check if this is a fully-numeric label:
1148 if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
1149 uint64_t Val = atoull(TokStart, CurPtr);
1150 ++CurPtr; // Skip the colon.
1151 if ((unsigned)Val != Val)
1152 LexError("invalid value number (too large)");
1153 UIntVal = unsigned(Val);
1154 return lltok::LabelID;
1157 // Check to see if this really is a string label, e.g. "-1:".
1158 if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
1159 if (const char *End = isLabelTail(CurPtr)) {
1160 StrVal.assign(TokStart, End-1);
1161 CurPtr = End;
1162 return lltok::LabelStr;
1166 // If the next character is a '.', then it is a fp value, otherwise its
1167 // integer.
1168 if (CurPtr[0] != '.') {
1169 if (TokStart[0] == '0' && TokStart[1] == 'x')
1170 return Lex0x();
1171 APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
1172 return lltok::APSInt;
1175 ++CurPtr;
1177 // Skip over [0-9]*([eE][-+]?[0-9]+)?
1178 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1180 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1181 if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1182 ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1183 isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1184 CurPtr += 2;
1185 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1189 APFloatVal = APFloat(APFloat::IEEEdouble(),
1190 StringRef(TokStart, CurPtr - TokStart));
1191 return lltok::APFloat;
1194 /// Lex a floating point constant starting with +.
1195 /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1196 lltok::Kind LLLexer::LexPositive() {
1197 // If the letter after the negative is a number, this is probably not a
1198 // label.
1199 if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
1200 return lltok::Error;
1202 // Skip digits.
1203 for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1204 /*empty*/;
1206 // At this point, we need a '.'.
1207 if (CurPtr[0] != '.') {
1208 CurPtr = TokStart+1;
1209 return lltok::Error;
1212 ++CurPtr;
1214 // Skip over [0-9]*([eE][-+]?[0-9]+)?
1215 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1217 if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1218 if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1219 ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1220 isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1221 CurPtr += 2;
1222 while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1226 APFloatVal = APFloat(APFloat::IEEEdouble(),
1227 StringRef(TokStart, CurPtr - TokStart));
1228 return lltok::APFloat;