[nfc][mlir][scf]: Define scf.for lower/upper bounds can be also negative or zero...
[llvm-project.git] / llvm / lib / MC / MCParser / MasmParser.cpp
blob78261c1f9fedb21f1b102bcbae1e44d03d16ba54
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/Dwarf.h"
26 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeView.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDirectives.h"
31 #include "llvm/MC/MCDwarf.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCInstPrinter.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrInfo.h"
36 #include "llvm/MC/MCParser/AsmCond.h"
37 #include "llvm/MC/MCParser/AsmLexer.h"
38 #include "llvm/MC/MCParser/MCAsmLexer.h"
39 #include "llvm/MC/MCParser/MCAsmParser.h"
40 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
41 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
42 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
43 #include "llvm/MC/MCRegisterInfo.h"
44 #include "llvm/MC/MCSection.h"
45 #include "llvm/MC/MCStreamer.h"
46 #include "llvm/MC/MCSubtargetInfo.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/MC/MCTargetOptions.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/Format.h"
53 #include "llvm/Support/MD5.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/Path.h"
57 #include "llvm/Support/SMLoc.h"
58 #include "llvm/Support/SourceMgr.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <climits>
63 #include <cstddef>
64 #include <cstdint>
65 #include <ctime>
66 #include <deque>
67 #include <memory>
68 #include <optional>
69 #include <sstream>
70 #include <string>
71 #include <tuple>
72 #include <utility>
73 #include <vector>
75 using namespace llvm;
77 namespace {
79 /// Helper types for tracking macro definitions.
80 typedef std::vector<AsmToken> MCAsmMacroArgument;
81 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
83 /// Helper class for storing information about an active macro instantiation.
84 struct MacroInstantiation {
85 /// The location of the instantiation.
86 SMLoc InstantiationLoc;
88 /// The buffer where parsing should resume upon instantiation completion.
89 unsigned ExitBuffer;
91 /// The location where parsing should resume upon instantiation completion.
92 SMLoc ExitLoc;
94 /// The depth of TheCondStack at the start of the instantiation.
95 size_t CondStackDepth;
98 struct ParseStatementInfo {
99 /// The parsed operands from the last parsed statement.
100 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
102 /// The opcode from the last parsed instruction.
103 unsigned Opcode = ~0U;
105 /// Was there an error parsing the inline assembly?
106 bool ParseError = false;
108 /// The value associated with a macro exit.
109 std::optional<std::string> ExitValue;
111 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
113 ParseStatementInfo() = delete;
114 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115 : AsmRewrites(rewrites) {}
118 enum FieldType {
119 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120 FT_REAL, // Initializer: real number, stored as an APInt.
121 FT_STRUCT // Initializer: struct initializer, stored recursively.
124 struct FieldInfo;
125 struct StructInfo {
126 StringRef Name;
127 bool IsUnion = false;
128 bool Initializable = true;
129 unsigned Alignment = 0;
130 unsigned AlignmentSize = 0;
131 unsigned NextOffset = 0;
132 unsigned Size = 0;
133 std::vector<FieldInfo> Fields;
134 StringMap<size_t> FieldsByName;
136 FieldInfo &addField(StringRef FieldName, FieldType FT,
137 unsigned FieldAlignmentSize);
139 StructInfo() = default;
140 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
143 // FIXME: This should probably use a class hierarchy, raw pointers between the
144 // objects, and dynamic type resolution instead of a union. On the other hand,
145 // ownership then becomes much more complicated; the obvious thing would be to
146 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
148 struct StructInitializer;
149 struct IntFieldInfo {
150 SmallVector<const MCExpr *, 1> Values;
152 IntFieldInfo() = default;
153 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
154 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
156 struct RealFieldInfo {
157 SmallVector<APInt, 1> AsIntValues;
159 RealFieldInfo() = default;
160 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
161 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
163 struct StructFieldInfo {
164 std::vector<StructInitializer> Initializers;
165 StructInfo Structure;
167 StructFieldInfo() = default;
168 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
171 class FieldInitializer {
172 public:
173 FieldType FT;
174 union {
175 IntFieldInfo IntInfo;
176 RealFieldInfo RealInfo;
177 StructFieldInfo StructInfo;
180 ~FieldInitializer();
181 FieldInitializer(FieldType FT);
183 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185 FieldInitializer(std::vector<StructInitializer> &&Initializers,
186 struct StructInfo Structure);
188 FieldInitializer(const FieldInitializer &Initializer);
189 FieldInitializer(FieldInitializer &&Initializer);
191 FieldInitializer &operator=(const FieldInitializer &Initializer);
192 FieldInitializer &operator=(FieldInitializer &&Initializer);
195 struct StructInitializer {
196 std::vector<FieldInitializer> FieldInitializers;
199 struct FieldInfo {
200 // Offset of the field within the containing STRUCT.
201 unsigned Offset = 0;
203 // Total size of the field (= LengthOf * Type).
204 unsigned SizeOf = 0;
206 // Number of elements in the field (1 if scalar, >1 if an array).
207 unsigned LengthOf = 0;
209 // Size of a single entry in this field, in bytes ("type" in MASM standards).
210 unsigned Type = 0;
212 FieldInitializer Contents;
214 FieldInfo(FieldType FT) : Contents(FT) {}
217 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218 StructInfo S) {
219 Initializers = std::move(V);
220 Structure = S;
223 StructInfo::StructInfo(StringRef StructName, bool Union,
224 unsigned AlignmentValue)
225 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
227 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228 unsigned FieldAlignmentSize) {
229 if (!FieldName.empty())
230 FieldsByName[FieldName.lower()] = Fields.size();
231 Fields.emplace_back(FT);
232 FieldInfo &Field = Fields.back();
233 Field.Offset =
234 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235 if (!IsUnion) {
236 NextOffset = std::max(NextOffset, Field.Offset);
238 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239 return Field;
242 FieldInitializer::~FieldInitializer() {
243 switch (FT) {
244 case FT_INTEGRAL:
245 IntInfo.~IntFieldInfo();
246 break;
247 case FT_REAL:
248 RealInfo.~RealFieldInfo();
249 break;
250 case FT_STRUCT:
251 StructInfo.~StructFieldInfo();
252 break;
256 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257 switch (FT) {
258 case FT_INTEGRAL:
259 new (&IntInfo) IntFieldInfo();
260 break;
261 case FT_REAL:
262 new (&RealInfo) RealFieldInfo();
263 break;
264 case FT_STRUCT:
265 new (&StructInfo) StructFieldInfo();
266 break;
270 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271 : FT(FT_INTEGRAL) {
272 new (&IntInfo) IntFieldInfo(std::move(Values));
275 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276 : FT(FT_REAL) {
277 new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
280 FieldInitializer::FieldInitializer(
281 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282 : FT(FT_STRUCT) {
283 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
286 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287 : FT(Initializer.FT) {
288 switch (FT) {
289 case FT_INTEGRAL:
290 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291 break;
292 case FT_REAL:
293 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294 break;
295 case FT_STRUCT:
296 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297 break;
301 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302 : FT(Initializer.FT) {
303 switch (FT) {
304 case FT_INTEGRAL:
305 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306 break;
307 case FT_REAL:
308 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309 break;
310 case FT_STRUCT:
311 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312 break;
316 FieldInitializer &
317 FieldInitializer::operator=(const FieldInitializer &Initializer) {
318 if (FT != Initializer.FT) {
319 switch (FT) {
320 case FT_INTEGRAL:
321 IntInfo.~IntFieldInfo();
322 break;
323 case FT_REAL:
324 RealInfo.~RealFieldInfo();
325 break;
326 case FT_STRUCT:
327 StructInfo.~StructFieldInfo();
328 break;
331 FT = Initializer.FT;
332 switch (FT) {
333 case FT_INTEGRAL:
334 IntInfo = Initializer.IntInfo;
335 break;
336 case FT_REAL:
337 RealInfo = Initializer.RealInfo;
338 break;
339 case FT_STRUCT:
340 StructInfo = Initializer.StructInfo;
341 break;
343 return *this;
346 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347 if (FT != Initializer.FT) {
348 switch (FT) {
349 case FT_INTEGRAL:
350 IntInfo.~IntFieldInfo();
351 break;
352 case FT_REAL:
353 RealInfo.~RealFieldInfo();
354 break;
355 case FT_STRUCT:
356 StructInfo.~StructFieldInfo();
357 break;
360 FT = Initializer.FT;
361 switch (FT) {
362 case FT_INTEGRAL:
363 IntInfo = Initializer.IntInfo;
364 break;
365 case FT_REAL:
366 RealInfo = Initializer.RealInfo;
367 break;
368 case FT_STRUCT:
369 StructInfo = Initializer.StructInfo;
370 break;
372 return *this;
375 /// The concrete assembly parser instance.
376 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378 class MasmParser : public MCAsmParser {
379 private:
380 AsmLexer Lexer;
381 MCContext &Ctx;
382 MCStreamer &Out;
383 const MCAsmInfo &MAI;
384 SourceMgr &SrcMgr;
385 SourceMgr::DiagHandlerTy SavedDiagHandler;
386 void *SavedDiagContext;
387 std::unique_ptr<MCAsmParserExtension> PlatformParser;
389 /// This is the current buffer index we're lexing from as managed by the
390 /// SourceMgr object.
391 unsigned CurBuffer;
393 /// time of assembly
394 struct tm TM;
396 BitVector EndStatementAtEOFStack;
398 AsmCond TheCondState;
399 std::vector<AsmCond> TheCondStack;
401 /// maps directive names to handler methods in parser
402 /// extensions. Extensions register themselves in this map by calling
403 /// addDirectiveHandler.
404 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
406 /// maps assembly-time variable names to variables.
407 struct Variable {
408 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
410 StringRef Name;
411 RedefinableKind Redefinable = REDEFINABLE;
412 bool IsText = false;
413 std::string TextValue;
415 StringMap<Variable> Variables;
417 /// Stack of active struct definitions.
418 SmallVector<StructInfo, 1> StructInProgress;
420 /// Maps struct tags to struct definitions.
421 StringMap<StructInfo> Structs;
423 /// Maps data location names to types.
424 StringMap<AsmTypeInfo> KnownType;
426 /// Stack of active macro instantiations.
427 std::vector<MacroInstantiation*> ActiveMacros;
429 /// List of bodies of anonymous macros.
430 std::deque<MCAsmMacro> MacroLikeBodies;
432 /// Keeps track of how many .macro's have been instantiated.
433 unsigned NumOfMacroInstantiations;
435 /// The values from the last parsed cpp hash file line comment if any.
436 struct CppHashInfoTy {
437 StringRef Filename;
438 int64_t LineNumber;
439 SMLoc Loc;
440 unsigned Buf;
441 CppHashInfoTy() : LineNumber(0), Buf(0) {}
443 CppHashInfoTy CppHashInfo;
445 /// The filename from the first cpp hash file line comment, if any.
446 StringRef FirstCppHashFilename;
448 /// List of forward directional labels for diagnosis at the end.
449 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
451 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452 /// Defaults to 1U, meaning Intel.
453 unsigned AssemblerDialect = 1U;
455 /// is Darwin compatibility enabled?
456 bool IsDarwin = false;
458 /// Are we parsing ms-style inline assembly?
459 bool ParsingMSInlineAsm = false;
461 /// Did we already inform the user about inconsistent MD5 usage?
462 bool ReportedInconsistentMD5 = false;
464 // Current <...> expression depth.
465 unsigned AngleBracketDepth = 0U;
467 // Number of locals defined.
468 uint16_t LocalCounter = 0;
470 public:
471 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473 MasmParser(const MasmParser &) = delete;
474 MasmParser &operator=(const MasmParser &) = delete;
475 ~MasmParser() override;
477 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
479 void addDirectiveHandler(StringRef Directive,
480 ExtensionDirectiveHandler Handler) override {
481 ExtensionDirectiveMap[Directive] = Handler;
482 DirectiveKindMap.try_emplace(Directive, DK_HANDLER_DIRECTIVE);
485 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
486 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
489 /// @name MCAsmParser Interface
490 /// {
492 SourceMgr &getSourceManager() override { return SrcMgr; }
493 MCAsmLexer &getLexer() override { return Lexer; }
494 MCContext &getContext() override { return Ctx; }
495 MCStreamer &getStreamer() override { return Out; }
497 CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
499 unsigned getAssemblerDialect() override {
500 if (AssemblerDialect == ~0U)
501 return MAI.getAssemblerDialect();
502 else
503 return AssemblerDialect;
505 void setAssemblerDialect(unsigned i) override {
506 AssemblerDialect = i;
509 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
510 bool Warning(SMLoc L, const Twine &Msg,
511 SMRange Range = std::nullopt) override;
512 bool printError(SMLoc L, const Twine &Msg,
513 SMRange Range = std::nullopt) override;
515 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
516 const AsmToken &Lex(ExpandKind ExpandNextToken);
517 const AsmToken &Lex() override { return Lex(ExpandMacros); }
519 void setParsingMSInlineAsm(bool V) override {
520 ParsingMSInlineAsm = V;
521 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
522 // hex integer literals.
523 Lexer.setLexMasmIntegers(V);
525 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
527 bool isParsingMasm() const override { return true; }
529 bool defineMacro(StringRef Name, StringRef Value) override;
531 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
532 bool lookUpField(StringRef Base, StringRef Member,
533 AsmFieldInfo &Info) const override;
535 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
537 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
538 unsigned &NumInputs,
539 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
540 SmallVectorImpl<std::string> &Constraints,
541 SmallVectorImpl<std::string> &Clobbers,
542 const MCInstrInfo *MII, MCInstPrinter *IP,
543 MCAsmParserSemaCallback &SI) override;
545 bool parseExpression(const MCExpr *&Res);
546 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
547 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
548 AsmTypeInfo *TypeInfo) override;
549 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
550 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
551 SMLoc &EndLoc) override;
552 bool parseAbsoluteExpression(int64_t &Res) override;
554 /// Parse a floating point expression using the float \p Semantics
555 /// and set \p Res to the value.
556 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
558 /// Parse an identifier or string (as a quoted identifier)
559 /// and set \p Res to the identifier contents.
560 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
561 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
562 bool parseIdentifier(StringRef &Res) override {
563 return parseIdentifier(Res, StandardPosition);
565 void eatToEndOfStatement() override;
567 bool checkForValidSection() override;
569 /// }
571 private:
572 bool expandMacros();
573 const AsmToken peekTok(bool ShouldSkipSpace = true);
575 bool parseStatement(ParseStatementInfo &Info,
576 MCAsmParserSemaCallback *SI);
577 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
578 bool parseCppHashLineFilenameComment(SMLoc L);
580 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
581 ArrayRef<MCAsmMacroParameter> Parameters,
582 ArrayRef<MCAsmMacroArgument> A,
583 const std::vector<std::string> &Locals, SMLoc L);
585 /// Are we inside a macro instantiation?
586 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
588 /// Handle entry to macro instantiation.
590 /// \param M The macro.
591 /// \param NameLoc Instantiation location.
592 bool handleMacroEntry(
593 const MCAsmMacro *M, SMLoc NameLoc,
594 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
596 /// Handle invocation of macro function.
598 /// \param M The macro.
599 /// \param NameLoc Invocation location.
600 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
602 /// Handle exit from macro instantiation.
603 void handleMacroExit();
605 /// Extract AsmTokens for a macro argument.
606 bool
607 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
608 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
610 /// Parse all macro arguments for a given macro.
611 bool
612 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
613 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
615 void printMacroInstantiations();
617 bool expandStatement(SMLoc Loc);
619 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
620 SMRange Range = std::nullopt) const {
621 ArrayRef<SMRange> Ranges(Range);
622 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
624 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
626 bool lookUpField(const StructInfo &Structure, StringRef Member,
627 AsmFieldInfo &Info) const;
629 /// Should we emit DWARF describing this assembler source? (Returns false if
630 /// the source has .file directives, which means we don't want to generate
631 /// info describing the assembler source itself.)
632 bool enabledGenDwarfForAssembly();
634 /// Enter the specified file. This returns true on failure.
635 bool enterIncludeFile(const std::string &Filename);
637 /// Reset the current lexer position to that given by \p Loc. The
638 /// current token is not set; clients should ensure Lex() is called
639 /// subsequently.
641 /// \param InBuffer If not 0, should be the known buffer id that contains the
642 /// location.
643 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
644 bool EndStatementAtEOF = true);
646 /// Parse up to a token of kind \p EndTok and return the contents from the
647 /// current token up to (but not including) this token; the current token on
648 /// exit will be either this kind or EOF. Reads through instantiated macro
649 /// functions and text macros.
650 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
651 std::string parseStringTo(AsmToken::TokenKind EndTok);
653 /// Parse up to the end of statement and return the contents from the current
654 /// token until the end of the statement; the current token on exit will be
655 /// either the EndOfStatement or EOF.
656 StringRef parseStringToEndOfStatement() override;
658 bool parseTextItem(std::string &Data);
660 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
661 MCBinaryExpr::Opcode &Kind);
663 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
664 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
665 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
667 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
669 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
670 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
672 // Generic (target and platform independent) directive parsing.
673 enum DirectiveKind {
674 DK_NO_DIRECTIVE, // Placeholder
675 DK_HANDLER_DIRECTIVE,
676 DK_ASSIGN,
677 DK_EQU,
678 DK_TEXTEQU,
679 DK_ASCII,
680 DK_ASCIZ,
681 DK_STRING,
682 DK_BYTE,
683 DK_SBYTE,
684 DK_WORD,
685 DK_SWORD,
686 DK_DWORD,
687 DK_SDWORD,
688 DK_FWORD,
689 DK_QWORD,
690 DK_SQWORD,
691 DK_DB,
692 DK_DD,
693 DK_DF,
694 DK_DQ,
695 DK_DW,
696 DK_REAL4,
697 DK_REAL8,
698 DK_REAL10,
699 DK_ALIGN,
700 DK_EVEN,
701 DK_ORG,
702 DK_ENDR,
703 DK_EXTERN,
704 DK_PUBLIC,
705 DK_COMM,
706 DK_COMMENT,
707 DK_INCLUDE,
708 DK_REPEAT,
709 DK_WHILE,
710 DK_FOR,
711 DK_FORC,
712 DK_IF,
713 DK_IFE,
714 DK_IFB,
715 DK_IFNB,
716 DK_IFDEF,
717 DK_IFNDEF,
718 DK_IFDIF,
719 DK_IFDIFI,
720 DK_IFIDN,
721 DK_IFIDNI,
722 DK_ELSEIF,
723 DK_ELSEIFE,
724 DK_ELSEIFB,
725 DK_ELSEIFNB,
726 DK_ELSEIFDEF,
727 DK_ELSEIFNDEF,
728 DK_ELSEIFDIF,
729 DK_ELSEIFDIFI,
730 DK_ELSEIFIDN,
731 DK_ELSEIFIDNI,
732 DK_ELSE,
733 DK_ENDIF,
734 DK_FILE,
735 DK_LINE,
736 DK_LOC,
737 DK_STABS,
738 DK_CV_FILE,
739 DK_CV_FUNC_ID,
740 DK_CV_INLINE_SITE_ID,
741 DK_CV_LOC,
742 DK_CV_LINETABLE,
743 DK_CV_INLINE_LINETABLE,
744 DK_CV_DEF_RANGE,
745 DK_CV_STRINGTABLE,
746 DK_CV_STRING,
747 DK_CV_FILECHECKSUMS,
748 DK_CV_FILECHECKSUM_OFFSET,
749 DK_CV_FPO_DATA,
750 DK_CFI_SECTIONS,
751 DK_CFI_STARTPROC,
752 DK_CFI_ENDPROC,
753 DK_CFI_DEF_CFA,
754 DK_CFI_DEF_CFA_OFFSET,
755 DK_CFI_ADJUST_CFA_OFFSET,
756 DK_CFI_DEF_CFA_REGISTER,
757 DK_CFI_OFFSET,
758 DK_CFI_REL_OFFSET,
759 DK_CFI_PERSONALITY,
760 DK_CFI_LSDA,
761 DK_CFI_REMEMBER_STATE,
762 DK_CFI_RESTORE_STATE,
763 DK_CFI_SAME_VALUE,
764 DK_CFI_RESTORE,
765 DK_CFI_ESCAPE,
766 DK_CFI_RETURN_COLUMN,
767 DK_CFI_SIGNAL_FRAME,
768 DK_CFI_UNDEFINED,
769 DK_CFI_REGISTER,
770 DK_CFI_WINDOW_SAVE,
771 DK_CFI_B_KEY_FRAME,
772 DK_MACRO,
773 DK_EXITM,
774 DK_ENDM,
775 DK_PURGE,
776 DK_ERR,
777 DK_ERRB,
778 DK_ERRNB,
779 DK_ERRDEF,
780 DK_ERRNDEF,
781 DK_ERRDIF,
782 DK_ERRDIFI,
783 DK_ERRIDN,
784 DK_ERRIDNI,
785 DK_ERRE,
786 DK_ERRNZ,
787 DK_ECHO,
788 DK_STRUCT,
789 DK_UNION,
790 DK_ENDS,
791 DK_END,
792 DK_PUSHFRAME,
793 DK_PUSHREG,
794 DK_SAVEREG,
795 DK_SAVEXMM128,
796 DK_SETFRAME,
797 DK_RADIX,
800 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
801 /// class.
802 StringMap<DirectiveKind> DirectiveKindMap;
804 bool isMacroLikeDirective();
806 // Codeview def_range type parsing.
807 enum CVDefRangeType {
808 CVDR_DEFRANGE = 0, // Placeholder
809 CVDR_DEFRANGE_REGISTER,
810 CVDR_DEFRANGE_FRAMEPOINTER_REL,
811 CVDR_DEFRANGE_SUBFIELD_REGISTER,
812 CVDR_DEFRANGE_REGISTER_REL
815 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
816 /// def_range types parsed by this class.
817 StringMap<CVDefRangeType> CVDefRangeTypeMap;
819 // Generic (target and platform independent) directive parsing.
820 enum BuiltinSymbol {
821 BI_NO_SYMBOL, // Placeholder
822 BI_DATE,
823 BI_TIME,
824 BI_VERSION,
825 BI_FILECUR,
826 BI_FILENAME,
827 BI_LINE,
828 BI_CURSEG,
829 BI_CPU,
830 BI_INTERFACE,
831 BI_CODE,
832 BI_DATA,
833 BI_FARDATA,
834 BI_WORDSIZE,
835 BI_CODESIZE,
836 BI_DATASIZE,
837 BI_MODEL,
838 BI_STACK,
841 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
842 /// class.
843 StringMap<BuiltinSymbol> BuiltinSymbolMap;
845 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
847 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
848 SMLoc StartLoc);
850 // ".ascii", ".asciz", ".string"
851 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
853 // "byte", "word", ...
854 bool emitIntValue(const MCExpr *Value, unsigned Size);
855 bool parseScalarInitializer(unsigned Size,
856 SmallVectorImpl<const MCExpr *> &Values,
857 unsigned StringPadLength = 0);
858 bool parseScalarInstList(
859 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
860 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
861 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
862 bool addIntegralField(StringRef Name, unsigned Size);
863 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
864 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
865 StringRef Name, SMLoc NameLoc);
867 // "real4", "real8", "real10"
868 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
869 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
870 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
871 size_t Size);
872 bool parseRealInstList(
873 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
874 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
875 bool parseDirectiveNamedRealValue(StringRef TypeName,
876 const fltSemantics &Semantics,
877 unsigned Size, StringRef Name,
878 SMLoc NameLoc);
880 bool parseOptionalAngleBracketOpen();
881 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
883 bool parseFieldInitializer(const FieldInfo &Field,
884 FieldInitializer &Initializer);
885 bool parseFieldInitializer(const FieldInfo &Field,
886 const IntFieldInfo &Contents,
887 FieldInitializer &Initializer);
888 bool parseFieldInitializer(const FieldInfo &Field,
889 const RealFieldInfo &Contents,
890 FieldInitializer &Initializer);
891 bool parseFieldInitializer(const FieldInfo &Field,
892 const StructFieldInfo &Contents,
893 FieldInitializer &Initializer);
895 bool parseStructInitializer(const StructInfo &Structure,
896 StructInitializer &Initializer);
897 bool parseStructInstList(
898 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
899 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
901 bool emitFieldValue(const FieldInfo &Field);
902 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
903 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
904 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
906 bool emitFieldInitializer(const FieldInfo &Field,
907 const FieldInitializer &Initializer);
908 bool emitFieldInitializer(const FieldInfo &Field,
909 const IntFieldInfo &Contents,
910 const IntFieldInfo &Initializer);
911 bool emitFieldInitializer(const FieldInfo &Field,
912 const RealFieldInfo &Contents,
913 const RealFieldInfo &Initializer);
914 bool emitFieldInitializer(const FieldInfo &Field,
915 const StructFieldInfo &Contents,
916 const StructFieldInfo &Initializer);
918 bool emitStructInitializer(const StructInfo &Structure,
919 const StructInitializer &Initializer);
921 // User-defined types (structs, unions):
922 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
923 bool addStructField(StringRef Name, const StructInfo &Structure);
924 bool parseDirectiveStructValue(const StructInfo &Structure,
925 StringRef Directive, SMLoc DirLoc);
926 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
927 StringRef Directive, SMLoc DirLoc,
928 StringRef Name);
930 // "=", "equ", "textequ"
931 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
932 DirectiveKind DirKind, SMLoc NameLoc);
934 bool parseDirectiveOrg(); // "org"
936 bool emitAlignTo(int64_t Alignment);
937 bool parseDirectiveAlign(); // "align"
938 bool parseDirectiveEven(); // "even"
940 // ".file", ".line", ".loc", ".stabs"
941 bool parseDirectiveFile(SMLoc DirectiveLoc);
942 bool parseDirectiveLine();
943 bool parseDirectiveLoc();
944 bool parseDirectiveStabs();
946 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
947 // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
948 bool parseDirectiveCVFile();
949 bool parseDirectiveCVFuncId();
950 bool parseDirectiveCVInlineSiteId();
951 bool parseDirectiveCVLoc();
952 bool parseDirectiveCVLinetable();
953 bool parseDirectiveCVInlineLinetable();
954 bool parseDirectiveCVDefRange();
955 bool parseDirectiveCVString();
956 bool parseDirectiveCVStringTable();
957 bool parseDirectiveCVFileChecksums();
958 bool parseDirectiveCVFileChecksumOffset();
959 bool parseDirectiveCVFPOData();
961 // .cfi directives
962 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
963 bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
964 bool parseDirectiveCFISections();
965 bool parseDirectiveCFIStartProc();
966 bool parseDirectiveCFIEndProc();
967 bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
968 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
969 bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
970 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
971 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
972 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
973 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
974 bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
975 bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
976 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
977 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
978 bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
979 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
980 bool parseDirectiveCFISignalFrame();
981 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
983 // macro directives
984 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
985 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
986 std::string &Value);
987 bool parseDirectiveEndMacro(StringRef Directive);
988 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
990 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
991 StringRef Name, SMLoc NameLoc);
992 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
993 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
994 bool parseDirectiveNestedEnds();
996 bool parseDirectiveExtern();
998 /// Parse a directive like ".globl" which accepts a single symbol (which
999 /// should be a label or an external).
1000 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1002 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1004 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1006 bool parseDirectiveInclude(); // "include"
1008 // "if" or "ife"
1009 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1010 // "ifb" or "ifnb", depending on ExpectBlank.
1011 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1012 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1013 // CaseInsensitive.
1014 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1015 bool CaseInsensitive);
1016 // "ifdef" or "ifndef", depending on expect_defined
1017 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1018 // "elseif" or "elseife"
1019 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1020 // "elseifb" or "elseifnb", depending on ExpectBlank.
1021 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1022 // ".elseifdef" or ".elseifndef", depending on expect_defined
1023 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1024 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1025 // ExpectEqual and CaseInsensitive.
1026 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1027 bool CaseInsensitive);
1028 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1029 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1030 bool parseEscapedString(std::string &Data) override;
1031 bool parseAngleBracketString(std::string &Data) override;
1033 // Macro-like directives
1034 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1035 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1036 raw_svector_ostream &OS);
1037 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038 SMLoc ExitLoc, raw_svector_ostream &OS);
1039 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1040 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1041 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1042 bool parseDirectiveWhile(SMLoc DirectiveLoc);
1044 // "_emit" or "__emit"
1045 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1046 size_t Len);
1048 // "align"
1049 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1051 // "end"
1052 bool parseDirectiveEnd(SMLoc DirectiveLoc);
1054 // ".err"
1055 bool parseDirectiveError(SMLoc DirectiveLoc);
1056 // ".errb" or ".errnb", depending on ExpectBlank.
1057 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1058 // ".errdef" or ".errndef", depending on ExpectBlank.
1059 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1060 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1061 // and CaseInsensitive.
1062 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1063 bool CaseInsensitive);
1064 // ".erre" or ".errnz", depending on ExpectZero.
1065 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1067 // ".radix"
1068 bool parseDirectiveRadix(SMLoc DirectiveLoc);
1070 // "echo"
1071 bool parseDirectiveEcho(SMLoc DirectiveLoc);
1073 void initializeDirectiveKindMap();
1074 void initializeCVDefRangeTypeMap();
1075 void initializeBuiltinSymbolMap();
1078 } // end anonymous namespace
1080 namespace llvm {
1082 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
1084 extern MCAsmParserExtension *createCOFFMasmParser();
1086 } // end namespace llvm
1088 enum { DEFAULT_ADDRSPACE = 0 };
1090 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1091 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1092 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1093 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1094 HadError = false;
1095 // Save the old handler.
1096 SavedDiagHandler = SrcMgr.getDiagHandler();
1097 SavedDiagContext = SrcMgr.getDiagContext();
1098 // Set our own handler which calls the saved handler.
1099 SrcMgr.setDiagHandler(DiagHandler, this);
1100 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1101 EndStatementAtEOFStack.push_back(true);
1103 // Initialize the platform / file format parser.
1104 switch (Ctx.getObjectFileType()) {
1105 case MCContext::IsCOFF:
1106 PlatformParser.reset(createCOFFMasmParser());
1107 break;
1108 default:
1109 report_fatal_error("llvm-ml currently supports only COFF output.");
1110 break;
1113 initializeDirectiveKindMap();
1114 PlatformParser->Initialize(*this);
1115 initializeCVDefRangeTypeMap();
1116 initializeBuiltinSymbolMap();
1118 NumOfMacroInstantiations = 0;
1121 MasmParser::~MasmParser() {
1122 assert((HadError || ActiveMacros.empty()) &&
1123 "Unexpected active macro instantiation!");
1125 // Restore the saved diagnostics handler and context for use during
1126 // finalization.
1127 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1130 void MasmParser::printMacroInstantiations() {
1131 // Print the active macro instantiation stack.
1132 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1133 it = ActiveMacros.rbegin(),
1134 ie = ActiveMacros.rend();
1135 it != ie; ++it)
1136 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1137 "while in macro instantiation");
1140 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1141 printPendingErrors();
1142 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1143 printMacroInstantiations();
1146 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1147 if (getTargetParser().getTargetOptions().MCNoWarn)
1148 return false;
1149 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1150 return Error(L, Msg, Range);
1151 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1152 printMacroInstantiations();
1153 return false;
1156 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1157 HadError = true;
1158 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1159 printMacroInstantiations();
1160 return true;
1163 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1164 std::string IncludedFile;
1165 unsigned NewBuf =
1166 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1167 if (!NewBuf)
1168 return true;
1170 CurBuffer = NewBuf;
1171 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1172 EndStatementAtEOFStack.push_back(true);
1173 return false;
1176 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1177 bool EndStatementAtEOF) {
1178 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1179 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1180 Loc.getPointer(), EndStatementAtEOF);
1183 bool MasmParser::expandMacros() {
1184 const AsmToken &Tok = getTok();
1185 const std::string IDLower = Tok.getIdentifier().lower();
1187 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1188 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1189 // This is a macro function invocation; expand it in place.
1190 const SMLoc MacroLoc = Tok.getLoc();
1191 const StringRef MacroId = Tok.getIdentifier();
1192 Lexer.Lex();
1193 if (handleMacroInvocation(M, MacroLoc)) {
1194 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1195 Lexer.Lex();
1197 return false;
1200 std::optional<std::string> ExpandedValue;
1201 auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1202 if (BuiltinIt != BuiltinSymbolMap.end()) {
1203 ExpandedValue =
1204 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1205 } else {
1206 auto VarIt = Variables.find(IDLower);
1207 if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1208 ExpandedValue = VarIt->getValue().TextValue;
1212 if (!ExpandedValue)
1213 return true;
1214 std::unique_ptr<MemoryBuffer> Instantiation =
1215 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1217 // Jump to the macro instantiation and prime the lexer.
1218 CurBuffer =
1219 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1220 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1221 /*EndStatementAtEOF=*/false);
1222 EndStatementAtEOFStack.push_back(false);
1223 Lexer.Lex();
1224 return false;
1227 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1228 if (Lexer.getTok().is(AsmToken::Error))
1229 Error(Lexer.getErrLoc(), Lexer.getErr());
1231 // if it's a end of statement with a comment in it
1232 if (getTok().is(AsmToken::EndOfStatement)) {
1233 // if this is a line comment output it.
1234 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1235 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1236 Out.addExplicitComment(Twine(getTok().getString()));
1239 const AsmToken *tok = &Lexer.Lex();
1240 bool StartOfStatement = Lexer.isAtStartOfStatement();
1242 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1243 if (StartOfStatement) {
1244 AsmToken NextTok;
1245 MutableArrayRef<AsmToken> Buf(NextTok);
1246 size_t ReadCount = Lexer.peekTokens(Buf);
1247 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1248 (NextTok.getString().equals_insensitive("equ") ||
1249 NextTok.getString().equals_insensitive("textequ"))) {
1250 // This looks like an EQU or TEXTEQU directive; don't expand the
1251 // identifier, allowing for redefinitions.
1252 break;
1255 if (expandMacros())
1256 break;
1259 // Parse comments here to be deferred until end of next statement.
1260 while (tok->is(AsmToken::Comment)) {
1261 if (MAI.preserveAsmComments())
1262 Out.addExplicitComment(Twine(tok->getString()));
1263 tok = &Lexer.Lex();
1266 // Recognize and bypass line continuations.
1267 while (tok->is(AsmToken::BackSlash) &&
1268 peekTok().is(AsmToken::EndOfStatement)) {
1269 // Eat both the backslash and the end of statement.
1270 Lexer.Lex();
1271 tok = &Lexer.Lex();
1274 if (tok->is(AsmToken::Eof)) {
1275 // If this is the end of an included file, pop the parent file off the
1276 // include stack.
1277 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1278 if (ParentIncludeLoc != SMLoc()) {
1279 EndStatementAtEOFStack.pop_back();
1280 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1281 return Lex();
1283 EndStatementAtEOFStack.pop_back();
1284 assert(EndStatementAtEOFStack.empty());
1287 return *tok;
1290 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1291 AsmToken Tok;
1293 MutableArrayRef<AsmToken> Buf(Tok);
1294 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1296 if (ReadCount == 0) {
1297 // If this is the end of an included file, pop the parent file off the
1298 // include stack.
1299 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1300 if (ParentIncludeLoc != SMLoc()) {
1301 EndStatementAtEOFStack.pop_back();
1302 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1303 return peekTok(ShouldSkipSpace);
1305 EndStatementAtEOFStack.pop_back();
1306 assert(EndStatementAtEOFStack.empty());
1309 assert(ReadCount == 1);
1310 return Tok;
1313 bool MasmParser::enabledGenDwarfForAssembly() {
1314 // Check whether the user specified -g.
1315 if (!getContext().getGenDwarfForAssembly())
1316 return false;
1317 // If we haven't encountered any .file directives (which would imply that
1318 // the assembler source was produced with debug info already) then emit one
1319 // describing the assembler source file itself.
1320 if (getContext().getGenDwarfFileNumber() == 0) {
1321 // Use the first #line directive for this, if any. It's preprocessed, so
1322 // there is no checksum, and of course no source directive.
1323 if (!FirstCppHashFilename.empty())
1324 getContext().setMCLineTableRootFile(
1325 /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1326 /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1327 const MCDwarfFile &RootFile =
1328 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1329 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1330 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1331 RootFile.Checksum, RootFile.Source));
1333 return true;
1336 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1337 // Create the initial section, if requested.
1338 if (!NoInitialTextSection)
1339 Out.initSections(false, getTargetParser().getSTI());
1341 // Prime the lexer.
1342 Lex();
1344 HadError = false;
1345 AsmCond StartingCondState = TheCondState;
1346 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1348 // If we are generating dwarf for assembly source files save the initial text
1349 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1350 // emitting any actual debug info yet and haven't had a chance to parse any
1351 // embedded .file directives.)
1352 if (getContext().getGenDwarfForAssembly()) {
1353 MCSection *Sec = getStreamer().getCurrentSectionOnly();
1354 if (!Sec->getBeginSymbol()) {
1355 MCSymbol *SectionStartSym = getContext().createTempSymbol();
1356 getStreamer().emitLabel(SectionStartSym);
1357 Sec->setBeginSymbol(SectionStartSym);
1359 bool InsertResult = getContext().addGenDwarfSection(Sec);
1360 assert(InsertResult && ".text section should not have debug info yet");
1361 (void)InsertResult;
1364 getTargetParser().onBeginOfFile();
1366 // While we have input, parse each statement.
1367 while (Lexer.isNot(AsmToken::Eof) ||
1368 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1369 // Skip through the EOF at the end of an inclusion.
1370 if (Lexer.is(AsmToken::Eof))
1371 Lex();
1373 ParseStatementInfo Info(&AsmStrRewrites);
1374 bool Parsed = parseStatement(Info, nullptr);
1376 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1377 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1378 // exists.
1379 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1380 Lex();
1383 // parseStatement returned true so may need to emit an error.
1384 printPendingErrors();
1386 // Skipping to the next line if needed.
1387 if (Parsed && !getLexer().isAtStartOfStatement())
1388 eatToEndOfStatement();
1391 getTargetParser().onEndOfFile();
1392 printPendingErrors();
1394 // All errors should have been emitted.
1395 assert(!hasPendingError() && "unexpected error from parseStatement");
1397 getTargetParser().flushPendingInstructions(getStreamer());
1399 if (TheCondState.TheCond != StartingCondState.TheCond ||
1400 TheCondState.Ignore != StartingCondState.Ignore)
1401 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1402 // Check to see there are no empty DwarfFile slots.
1403 const auto &LineTables = getContext().getMCDwarfLineTables();
1404 if (!LineTables.empty()) {
1405 unsigned Index = 0;
1406 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1407 if (File.Name.empty() && Index != 0)
1408 printError(getTok().getLoc(), "unassigned file number: " +
1409 Twine(Index) +
1410 " for .file directives");
1411 ++Index;
1415 // Check to see that all assembler local symbols were actually defined.
1416 // Targets that don't do subsections via symbols may not want this, though,
1417 // so conservatively exclude them. Only do this if we're finalizing, though,
1418 // as otherwise we won't necessarily have seen everything yet.
1419 if (!NoFinalize) {
1420 if (MAI.hasSubsectionsViaSymbols()) {
1421 for (const auto &TableEntry : getContext().getSymbols()) {
1422 MCSymbol *Sym = TableEntry.getValue().Symbol;
1423 // Variable symbols may not be marked as defined, so check those
1424 // explicitly. If we know it's a variable, we have a definition for
1425 // the purposes of this check.
1426 if (Sym && Sym->isTemporary() && !Sym->isVariable() &&
1427 !Sym->isDefined())
1428 // FIXME: We would really like to refer back to where the symbol was
1429 // first referenced for a source location. We need to add something
1430 // to track that. Currently, we just point to the end of the file.
1431 printError(getTok().getLoc(), "assembler local symbol '" +
1432 Sym->getName() + "' not defined");
1436 // Temporary symbols like the ones for directional jumps don't go in the
1437 // symbol table. They also need to be diagnosed in all (final) cases.
1438 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1439 if (std::get<2>(LocSym)->isUndefined()) {
1440 // Reset the state of any "# line file" directives we've seen to the
1441 // context as it was at the diagnostic site.
1442 CppHashInfo = std::get<1>(LocSym);
1443 printError(std::get<0>(LocSym), "directional label undefined");
1448 // Finalize the output stream if there are no errors and if the client wants
1449 // us to.
1450 if (!HadError && !NoFinalize)
1451 Out.finish(Lexer.getLoc());
1453 return HadError || getContext().hadError();
1456 bool MasmParser::checkForValidSection() {
1457 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1458 Out.initSections(false, getTargetParser().getSTI());
1459 return Error(getTok().getLoc(),
1460 "expected section directive before assembly directive");
1462 return false;
1465 /// Throw away the rest of the line for testing purposes.
1466 void MasmParser::eatToEndOfStatement() {
1467 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1468 if (Lexer.is(AsmToken::Eof)) {
1469 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1470 if (ParentIncludeLoc == SMLoc()) {
1471 break;
1474 EndStatementAtEOFStack.pop_back();
1475 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1478 Lexer.Lex();
1481 // Eat EOL.
1482 if (Lexer.is(AsmToken::EndOfStatement))
1483 Lexer.Lex();
1486 SmallVector<StringRef, 1>
1487 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1488 SmallVector<StringRef, 1> Refs;
1489 const char *Start = getTok().getLoc().getPointer();
1490 while (Lexer.isNot(EndTok)) {
1491 if (Lexer.is(AsmToken::Eof)) {
1492 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1493 if (ParentIncludeLoc == SMLoc()) {
1494 break;
1496 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1498 EndStatementAtEOFStack.pop_back();
1499 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1500 Lexer.Lex();
1501 Start = getTok().getLoc().getPointer();
1502 } else {
1503 Lexer.Lex();
1506 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1507 return Refs;
1510 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1511 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1512 std::string Str;
1513 for (StringRef S : Refs) {
1514 Str.append(S.str());
1516 return Str;
1519 StringRef MasmParser::parseStringToEndOfStatement() {
1520 const char *Start = getTok().getLoc().getPointer();
1522 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1523 Lexer.Lex();
1525 const char *End = getTok().getLoc().getPointer();
1526 return StringRef(Start, End - Start);
1529 /// Parse a paren expression and return it.
1530 /// NOTE: This assumes the leading '(' has already been consumed.
1532 /// parenexpr ::= expr)
1534 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1535 if (parseExpression(Res))
1536 return true;
1537 EndLoc = Lexer.getTok().getEndLoc();
1538 return parseRParen();
1541 /// Parse a bracket expression and return it.
1542 /// NOTE: This assumes the leading '[' has already been consumed.
1544 /// bracketexpr ::= expr]
1546 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1547 if (parseExpression(Res))
1548 return true;
1549 EndLoc = getTok().getEndLoc();
1550 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1551 return true;
1552 return false;
1555 /// Parse a primary expression and return it.
1556 /// primaryexpr ::= (parenexpr
1557 /// primaryexpr ::= symbol
1558 /// primaryexpr ::= number
1559 /// primaryexpr ::= '.'
1560 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1561 /// primaryexpr ::= string
1562 /// (a string is interpreted as a 64-bit number in big-endian base-256)
1563 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1564 AsmTypeInfo *TypeInfo) {
1565 SMLoc FirstTokenLoc = getLexer().getLoc();
1566 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1567 switch (FirstTokenKind) {
1568 default:
1569 return TokError("unknown token in expression");
1570 // If we have an error assume that we've already handled it.
1571 case AsmToken::Error:
1572 return true;
1573 case AsmToken::Exclaim:
1574 Lex(); // Eat the operator.
1575 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1576 return true;
1577 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1578 return false;
1579 case AsmToken::Dollar:
1580 case AsmToken::At:
1581 case AsmToken::Identifier: {
1582 StringRef Identifier;
1583 if (parseIdentifier(Identifier)) {
1584 // We may have failed but $ may be a valid token.
1585 if (getTok().is(AsmToken::Dollar)) {
1586 if (Lexer.getMAI().getDollarIsPC()) {
1587 Lex();
1588 // This is a '$' reference, which references the current PC. Emit a
1589 // temporary label to the streamer and refer to it.
1590 MCSymbol *Sym = Ctx.createTempSymbol();
1591 Out.emitLabel(Sym);
1592 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1593 getContext());
1594 EndLoc = FirstTokenLoc;
1595 return false;
1597 return Error(FirstTokenLoc, "invalid token in expression");
1600 // Parse named bitwise negation.
1601 if (Identifier.equals_insensitive("not")) {
1602 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1603 return true;
1604 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1605 return false;
1607 // Parse directional local label references.
1608 if (Identifier.equals_insensitive("@b") ||
1609 Identifier.equals_insensitive("@f")) {
1610 bool Before = Identifier.equals_insensitive("@b");
1611 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1612 if (Before && Sym->isUndefined())
1613 return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1614 Res = MCSymbolRefExpr::create(Sym, getContext());
1615 return false;
1617 // Parse symbol variant.
1618 std::pair<StringRef, StringRef> Split;
1619 if (!MAI.useParensForSymbolVariant()) {
1620 Split = Identifier.split('@');
1621 } else if (Lexer.is(AsmToken::LParen)) {
1622 Lex(); // eat '('.
1623 StringRef VName;
1624 parseIdentifier(VName);
1625 // eat ')'.
1626 if (parseToken(AsmToken::RParen,
1627 "unexpected token in variant, expected ')'"))
1628 return true;
1629 Split = std::make_pair(Identifier, VName);
1632 EndLoc = SMLoc::getFromPointer(Identifier.end());
1634 // This is a symbol reference.
1635 StringRef SymbolName = Identifier;
1636 if (SymbolName.empty())
1637 return Error(getLexer().getLoc(), "expected a symbol reference");
1639 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1641 // Look up the symbol variant if used.
1642 if (!Split.second.empty()) {
1643 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1644 if (Variant != MCSymbolRefExpr::VK_Invalid) {
1645 SymbolName = Split.first;
1646 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1647 Variant = MCSymbolRefExpr::VK_None;
1648 } else {
1649 return Error(SMLoc::getFromPointer(Split.second.begin()),
1650 "invalid variant '" + Split.second + "'");
1654 // Find the field offset if used.
1655 AsmFieldInfo Info;
1656 Split = SymbolName.split('.');
1657 if (Split.second.empty()) {
1658 } else {
1659 SymbolName = Split.first;
1660 if (lookUpField(SymbolName, Split.second, Info)) {
1661 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1662 StringRef Base = BaseMember.first, Member = BaseMember.second;
1663 lookUpField(Base, Member, Info);
1664 } else if (Structs.count(SymbolName.lower())) {
1665 // This is actually a reference to a field offset.
1666 Res = MCConstantExpr::create(Info.Offset, getContext());
1667 return false;
1671 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1672 if (!Sym) {
1673 // If this is a built-in numeric value, treat it as a constant.
1674 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1675 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1676 ? BI_NO_SYMBOL
1677 : BuiltinIt->getValue();
1678 if (Symbol != BI_NO_SYMBOL) {
1679 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1680 if (Value) {
1681 Res = Value;
1682 return false;
1686 // Variables use case-insensitive symbol names; if this is a variable, we
1687 // find the symbol using its canonical name.
1688 auto VarIt = Variables.find(SymbolName.lower());
1689 if (VarIt != Variables.end())
1690 SymbolName = VarIt->second.Name;
1691 Sym = getContext().getOrCreateSymbol(SymbolName);
1694 // If this is an absolute variable reference, substitute it now to preserve
1695 // semantics in the face of reassignment.
1696 if (Sym->isVariable()) {
1697 auto V = Sym->getVariableValue(/*SetUsed=*/false);
1698 bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1699 if (auto TV = dyn_cast<MCTargetExpr>(V))
1700 DoInline = TV->inlineAssignedExpr();
1701 if (DoInline) {
1702 if (Variant)
1703 return Error(EndLoc, "unexpected modifier on variable reference");
1704 Res = Sym->getVariableValue(/*SetUsed=*/false);
1705 return false;
1709 // Otherwise create a symbol ref.
1710 const MCExpr *SymRef =
1711 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1712 if (Info.Offset) {
1713 Res = MCBinaryExpr::create(
1714 MCBinaryExpr::Add, SymRef,
1715 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1716 } else {
1717 Res = SymRef;
1719 if (TypeInfo) {
1720 if (Info.Type.Name.empty()) {
1721 auto TypeIt = KnownType.find(Identifier.lower());
1722 if (TypeIt != KnownType.end()) {
1723 Info.Type = TypeIt->second;
1727 *TypeInfo = Info.Type;
1729 return false;
1731 case AsmToken::BigNum:
1732 return TokError("literal value out of range for directive");
1733 case AsmToken::Integer: {
1734 int64_t IntVal = getTok().getIntVal();
1735 Res = MCConstantExpr::create(IntVal, getContext());
1736 EndLoc = Lexer.getTok().getEndLoc();
1737 Lex(); // Eat token.
1738 return false;
1740 case AsmToken::String: {
1741 // MASM strings (used as constants) are interpreted as big-endian base-256.
1742 SMLoc ValueLoc = getTok().getLoc();
1743 std::string Value;
1744 if (parseEscapedString(Value))
1745 return true;
1746 if (Value.size() > 8)
1747 return Error(ValueLoc, "literal value out of range");
1748 uint64_t IntValue = 0;
1749 for (const unsigned char CharVal : Value)
1750 IntValue = (IntValue << 8) | CharVal;
1751 Res = MCConstantExpr::create(IntValue, getContext());
1752 return false;
1754 case AsmToken::Real: {
1755 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1756 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1757 Res = MCConstantExpr::create(IntVal, getContext());
1758 EndLoc = Lexer.getTok().getEndLoc();
1759 Lex(); // Eat token.
1760 return false;
1762 case AsmToken::Dot: {
1763 // This is a '.' reference, which references the current PC. Emit a
1764 // temporary label to the streamer and refer to it.
1765 MCSymbol *Sym = Ctx.createTempSymbol();
1766 Out.emitLabel(Sym);
1767 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1768 EndLoc = Lexer.getTok().getEndLoc();
1769 Lex(); // Eat identifier.
1770 return false;
1772 case AsmToken::LParen:
1773 Lex(); // Eat the '('.
1774 return parseParenExpr(Res, EndLoc);
1775 case AsmToken::LBrac:
1776 if (!PlatformParser->HasBracketExpressions())
1777 return TokError("brackets expression not supported on this target");
1778 Lex(); // Eat the '['.
1779 return parseBracketExpr(Res, EndLoc);
1780 case AsmToken::Minus:
1781 Lex(); // Eat the operator.
1782 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1783 return true;
1784 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1785 return false;
1786 case AsmToken::Plus:
1787 Lex(); // Eat the operator.
1788 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1789 return true;
1790 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1791 return false;
1792 case AsmToken::Tilde:
1793 Lex(); // Eat the operator.
1794 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1795 return true;
1796 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1797 return false;
1798 // MIPS unary expression operators. The lexer won't generate these tokens if
1799 // MCAsmInfo::HasMipsExpressions is false for the target.
1800 case AsmToken::PercentCall16:
1801 case AsmToken::PercentCall_Hi:
1802 case AsmToken::PercentCall_Lo:
1803 case AsmToken::PercentDtprel_Hi:
1804 case AsmToken::PercentDtprel_Lo:
1805 case AsmToken::PercentGot:
1806 case AsmToken::PercentGot_Disp:
1807 case AsmToken::PercentGot_Hi:
1808 case AsmToken::PercentGot_Lo:
1809 case AsmToken::PercentGot_Ofst:
1810 case AsmToken::PercentGot_Page:
1811 case AsmToken::PercentGottprel:
1812 case AsmToken::PercentGp_Rel:
1813 case AsmToken::PercentHi:
1814 case AsmToken::PercentHigher:
1815 case AsmToken::PercentHighest:
1816 case AsmToken::PercentLo:
1817 case AsmToken::PercentNeg:
1818 case AsmToken::PercentPcrel_Hi:
1819 case AsmToken::PercentPcrel_Lo:
1820 case AsmToken::PercentTlsgd:
1821 case AsmToken::PercentTlsldm:
1822 case AsmToken::PercentTprel_Hi:
1823 case AsmToken::PercentTprel_Lo:
1824 Lex(); // Eat the operator.
1825 if (Lexer.isNot(AsmToken::LParen))
1826 return TokError("expected '(' after operator");
1827 Lex(); // Eat the operator.
1828 if (parseExpression(Res, EndLoc))
1829 return true;
1830 if (parseRParen())
1831 return true;
1832 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1833 return !Res;
1837 bool MasmParser::parseExpression(const MCExpr *&Res) {
1838 SMLoc EndLoc;
1839 return parseExpression(Res, EndLoc);
1842 /// This function checks if the next token is <string> type or arithmetic.
1843 /// string that begin with character '<' must end with character '>'.
1844 /// otherwise it is arithmetics.
1845 /// If the function returns a 'true' value,
1846 /// the End argument will be filled with the last location pointed to the '>'
1847 /// character.
1848 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1849 assert((StrLoc.getPointer() != nullptr) &&
1850 "Argument to the function cannot be a NULL value");
1851 const char *CharPtr = StrLoc.getPointer();
1852 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1853 (*CharPtr != '\0')) {
1854 if (*CharPtr == '!')
1855 CharPtr++;
1856 CharPtr++;
1858 if (*CharPtr == '>') {
1859 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1860 return true;
1862 return false;
1865 /// creating a string without the escape characters '!'.
1866 static std::string angleBracketString(StringRef BracketContents) {
1867 std::string Res;
1868 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1869 if (BracketContents[Pos] == '!')
1870 Pos++;
1871 Res += BracketContents[Pos];
1873 return Res;
1876 /// Parse an expression and return it.
1878 /// expr ::= expr &&,|| expr -> lowest.
1879 /// expr ::= expr |,^,&,! expr
1880 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1881 /// expr ::= expr <<,>> expr
1882 /// expr ::= expr +,- expr
1883 /// expr ::= expr *,/,% expr -> highest.
1884 /// expr ::= primaryexpr
1886 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1887 // Parse the expression.
1888 Res = nullptr;
1889 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1890 parseBinOpRHS(1, Res, EndLoc))
1891 return true;
1893 // Try to constant fold it up front, if possible. Do not exploit
1894 // assembler here.
1895 int64_t Value;
1896 if (Res->evaluateAsAbsolute(Value))
1897 Res = MCConstantExpr::create(Value, getContext());
1899 return false;
1902 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1903 Res = nullptr;
1904 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1907 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1908 SMLoc &EndLoc) {
1909 if (parseParenExpr(Res, EndLoc))
1910 return true;
1912 for (; ParenDepth > 0; --ParenDepth) {
1913 if (parseBinOpRHS(1, Res, EndLoc))
1914 return true;
1916 // We don't Lex() the last RParen.
1917 // This is the same behavior as parseParenExpression().
1918 if (ParenDepth - 1 > 0) {
1919 EndLoc = getTok().getEndLoc();
1920 if (parseRParen())
1921 return true;
1924 return false;
1927 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1928 const MCExpr *Expr;
1930 SMLoc StartLoc = Lexer.getLoc();
1931 if (parseExpression(Expr))
1932 return true;
1934 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1935 return Error(StartLoc, "expected absolute expression");
1937 return false;
1940 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1941 MCBinaryExpr::Opcode &Kind,
1942 bool ShouldUseLogicalShr,
1943 bool EndExpressionAtGreater) {
1944 switch (K) {
1945 default:
1946 return 0; // not a binop.
1948 // Lowest Precedence: &&, ||
1949 case AsmToken::AmpAmp:
1950 Kind = MCBinaryExpr::LAnd;
1951 return 2;
1952 case AsmToken::PipePipe:
1953 Kind = MCBinaryExpr::LOr;
1954 return 1;
1956 // Low Precedence: ==, !=, <>, <, <=, >, >=
1957 case AsmToken::EqualEqual:
1958 Kind = MCBinaryExpr::EQ;
1959 return 3;
1960 case AsmToken::ExclaimEqual:
1961 case AsmToken::LessGreater:
1962 Kind = MCBinaryExpr::NE;
1963 return 3;
1964 case AsmToken::Less:
1965 Kind = MCBinaryExpr::LT;
1966 return 3;
1967 case AsmToken::LessEqual:
1968 Kind = MCBinaryExpr::LTE;
1969 return 3;
1970 case AsmToken::Greater:
1971 if (EndExpressionAtGreater)
1972 return 0;
1973 Kind = MCBinaryExpr::GT;
1974 return 3;
1975 case AsmToken::GreaterEqual:
1976 Kind = MCBinaryExpr::GTE;
1977 return 3;
1979 // Low Intermediate Precedence: +, -
1980 case AsmToken::Plus:
1981 Kind = MCBinaryExpr::Add;
1982 return 4;
1983 case AsmToken::Minus:
1984 Kind = MCBinaryExpr::Sub;
1985 return 4;
1987 // High Intermediate Precedence: |, &, ^
1988 case AsmToken::Pipe:
1989 Kind = MCBinaryExpr::Or;
1990 return 5;
1991 case AsmToken::Caret:
1992 Kind = MCBinaryExpr::Xor;
1993 return 5;
1994 case AsmToken::Amp:
1995 Kind = MCBinaryExpr::And;
1996 return 5;
1998 // Highest Precedence: *, /, %, <<, >>
1999 case AsmToken::Star:
2000 Kind = MCBinaryExpr::Mul;
2001 return 6;
2002 case AsmToken::Slash:
2003 Kind = MCBinaryExpr::Div;
2004 return 6;
2005 case AsmToken::Percent:
2006 Kind = MCBinaryExpr::Mod;
2007 return 6;
2008 case AsmToken::LessLess:
2009 Kind = MCBinaryExpr::Shl;
2010 return 6;
2011 case AsmToken::GreaterGreater:
2012 if (EndExpressionAtGreater)
2013 return 0;
2014 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2015 return 6;
2019 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2020 MCBinaryExpr::Opcode &Kind) {
2021 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2022 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2023 AngleBracketDepth > 0);
2026 /// Parse all binary operators with precedence >= 'Precedence'.
2027 /// Res contains the LHS of the expression on input.
2028 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2029 SMLoc &EndLoc) {
2030 SMLoc StartLoc = Lexer.getLoc();
2031 while (true) {
2032 AsmToken::TokenKind TokKind = Lexer.getKind();
2033 if (Lexer.getKind() == AsmToken::Identifier) {
2034 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
2035 .CaseLower("and", AsmToken::Amp)
2036 .CaseLower("not", AsmToken::Exclaim)
2037 .CaseLower("or", AsmToken::Pipe)
2038 .CaseLower("xor", AsmToken::Caret)
2039 .CaseLower("shl", AsmToken::LessLess)
2040 .CaseLower("shr", AsmToken::GreaterGreater)
2041 .CaseLower("eq", AsmToken::EqualEqual)
2042 .CaseLower("ne", AsmToken::ExclaimEqual)
2043 .CaseLower("lt", AsmToken::Less)
2044 .CaseLower("le", AsmToken::LessEqual)
2045 .CaseLower("gt", AsmToken::Greater)
2046 .CaseLower("ge", AsmToken::GreaterEqual)
2047 .Default(TokKind);
2049 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
2050 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2052 // If the next token is lower precedence than we are allowed to eat, return
2053 // successfully with what we ate already.
2054 if (TokPrec < Precedence)
2055 return false;
2057 Lex();
2059 // Eat the next primary expression.
2060 const MCExpr *RHS;
2061 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2062 return true;
2064 // If BinOp binds less tightly with RHS than the operator after RHS, let
2065 // the pending operator take RHS as its LHS.
2066 MCBinaryExpr::Opcode Dummy;
2067 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2068 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2069 return true;
2071 // Merge LHS and RHS according to operator.
2072 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2076 /// ParseStatement:
2077 /// ::= % statement
2078 /// ::= EndOfStatement
2079 /// ::= Label* Directive ...Operands... EndOfStatement
2080 /// ::= Label* Identifier OperandList* EndOfStatement
2081 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2082 MCAsmParserSemaCallback *SI) {
2083 assert(!hasPendingError() && "parseStatement started with pending error");
2084 // Eat initial spaces and comments.
2085 while (Lexer.is(AsmToken::Space))
2086 Lex();
2087 if (Lexer.is(AsmToken::EndOfStatement)) {
2088 // If this is a line comment we can drop it safely.
2089 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2090 getTok().getString().front() == '\n')
2091 Out.addBlankLine();
2092 Lex();
2093 return false;
2096 // If preceded by an expansion operator, first expand all text macros and
2097 // macro functions.
2098 if (getTok().is(AsmToken::Percent)) {
2099 SMLoc ExpansionLoc = getTok().getLoc();
2100 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2101 return true;
2104 // Statements always start with an identifier, unless we're dealing with a
2105 // processor directive (.386, .686, etc.) that lexes as a real.
2106 AsmToken ID = getTok();
2107 SMLoc IDLoc = ID.getLoc();
2108 StringRef IDVal;
2109 if (Lexer.is(AsmToken::HashDirective))
2110 return parseCppHashLineFilenameComment(IDLoc);
2111 if (Lexer.is(AsmToken::Dot)) {
2112 // Treat '.' as a valid identifier in this context.
2113 Lex();
2114 IDVal = ".";
2115 } else if (Lexer.is(AsmToken::Real)) {
2116 // Treat ".<number>" as a valid identifier in this context.
2117 IDVal = getTok().getString();
2118 Lex(); // always eat a token
2119 if (!IDVal.starts_with("."))
2120 return Error(IDLoc, "unexpected token at start of statement");
2121 } else if (parseIdentifier(IDVal, StartOfStatement)) {
2122 if (!TheCondState.Ignore) {
2123 Lex(); // always eat a token
2124 return Error(IDLoc, "unexpected token at start of statement");
2126 IDVal = "";
2129 // Handle conditional assembly here before checking for skipping. We
2130 // have to do this so that .endif isn't skipped in a ".if 0" block for
2131 // example.
2132 StringMap<DirectiveKind>::const_iterator DirKindIt =
2133 DirectiveKindMap.find(IDVal.lower());
2134 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2135 ? DK_NO_DIRECTIVE
2136 : DirKindIt->getValue();
2137 switch (DirKind) {
2138 default:
2139 break;
2140 case DK_IF:
2141 case DK_IFE:
2142 return parseDirectiveIf(IDLoc, DirKind);
2143 case DK_IFB:
2144 return parseDirectiveIfb(IDLoc, true);
2145 case DK_IFNB:
2146 return parseDirectiveIfb(IDLoc, false);
2147 case DK_IFDEF:
2148 return parseDirectiveIfdef(IDLoc, true);
2149 case DK_IFNDEF:
2150 return parseDirectiveIfdef(IDLoc, false);
2151 case DK_IFDIF:
2152 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2153 /*CaseInsensitive=*/false);
2154 case DK_IFDIFI:
2155 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2156 /*CaseInsensitive=*/true);
2157 case DK_IFIDN:
2158 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2159 /*CaseInsensitive=*/false);
2160 case DK_IFIDNI:
2161 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2162 /*CaseInsensitive=*/true);
2163 case DK_ELSEIF:
2164 case DK_ELSEIFE:
2165 return parseDirectiveElseIf(IDLoc, DirKind);
2166 case DK_ELSEIFB:
2167 return parseDirectiveElseIfb(IDLoc, true);
2168 case DK_ELSEIFNB:
2169 return parseDirectiveElseIfb(IDLoc, false);
2170 case DK_ELSEIFDEF:
2171 return parseDirectiveElseIfdef(IDLoc, true);
2172 case DK_ELSEIFNDEF:
2173 return parseDirectiveElseIfdef(IDLoc, false);
2174 case DK_ELSEIFDIF:
2175 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2176 /*CaseInsensitive=*/false);
2177 case DK_ELSEIFDIFI:
2178 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2179 /*CaseInsensitive=*/true);
2180 case DK_ELSEIFIDN:
2181 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2182 /*CaseInsensitive=*/false);
2183 case DK_ELSEIFIDNI:
2184 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2185 /*CaseInsensitive=*/true);
2186 case DK_ELSE:
2187 return parseDirectiveElse(IDLoc);
2188 case DK_ENDIF:
2189 return parseDirectiveEndIf(IDLoc);
2192 // Ignore the statement if in the middle of inactive conditional
2193 // (e.g. ".if 0").
2194 if (TheCondState.Ignore) {
2195 eatToEndOfStatement();
2196 return false;
2199 // FIXME: Recurse on local labels?
2201 // Check for a label.
2202 // ::= identifier ':'
2203 // ::= number ':'
2204 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2205 if (checkForValidSection())
2206 return true;
2208 // identifier ':' -> Label.
2209 Lex();
2211 // Diagnose attempt to use '.' as a label.
2212 if (IDVal == ".")
2213 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2215 // Diagnose attempt to use a variable as a label.
2217 // FIXME: Diagnostics. Note the location of the definition as a label.
2218 // FIXME: This doesn't diagnose assignment to a symbol which has been
2219 // implicitly marked as external.
2220 MCSymbol *Sym;
2221 if (ParsingMSInlineAsm && SI) {
2222 StringRef RewrittenLabel =
2223 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2224 assert(!RewrittenLabel.empty() &&
2225 "We should have an internal name here.");
2226 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2227 RewrittenLabel);
2228 IDVal = RewrittenLabel;
2230 // Handle directional local labels
2231 if (IDVal == "@@") {
2232 Sym = Ctx.createDirectionalLocalSymbol(0);
2233 } else {
2234 Sym = getContext().getOrCreateSymbol(IDVal);
2237 // End of Labels should be treated as end of line for lexing
2238 // purposes but that information is not available to the Lexer who
2239 // does not understand Labels. This may cause us to see a Hash
2240 // here instead of a preprocessor line comment.
2241 if (getTok().is(AsmToken::Hash)) {
2242 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2243 Lexer.Lex();
2244 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2247 // Consume any end of statement token, if present, to avoid spurious
2248 // addBlankLine calls().
2249 if (getTok().is(AsmToken::EndOfStatement)) {
2250 Lex();
2253 getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2255 // Emit the label.
2256 if (!getTargetParser().isParsingMSInlineAsm())
2257 Out.emitLabel(Sym, IDLoc);
2259 // If we are generating dwarf for assembly source files then gather the
2260 // info to make a dwarf label entry for this label if needed.
2261 if (enabledGenDwarfForAssembly())
2262 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2263 IDLoc);
2265 getTargetParser().onLabelParsed(Sym);
2267 return false;
2270 // If macros are enabled, check to see if this is a macro instantiation.
2271 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2272 return handleMacroEntry(M, IDLoc);
2275 // Otherwise, we have a normal instruction or directive.
2277 if (DirKind != DK_NO_DIRECTIVE) {
2278 // There are several entities interested in parsing directives:
2280 // 1. Asm parser extensions. For example, platform-specific parsers
2281 // (like the ELF parser) register themselves as extensions.
2282 // 2. The target-specific assembly parser. Some directives are target
2283 // specific or may potentially behave differently on certain targets.
2284 // 3. The generic directive parser implemented by this class. These are
2285 // all the directives that behave in a target and platform independent
2286 // manner, or at least have a default behavior that's shared between
2287 // all targets and platforms.
2289 getTargetParser().flushPendingInstructions(getStreamer());
2291 // Special-case handling of structure-end directives at higher priority,
2292 // since ENDS is overloaded as a segment-end directive.
2293 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2294 getTok().is(AsmToken::EndOfStatement)) {
2295 return parseDirectiveNestedEnds();
2298 // First, check the extension directive map to see if any extension has
2299 // registered itself to parse this directive.
2300 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2301 ExtensionDirectiveMap.lookup(IDVal.lower());
2302 if (Handler.first)
2303 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2305 // Next, let the target-specific assembly parser try.
2306 if (ID.isNot(AsmToken::Identifier))
2307 return false;
2309 ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2310 assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2311 "Should only return Failure iff there was an error");
2312 if (TPDirectiveReturn.isFailure())
2313 return true;
2314 if (TPDirectiveReturn.isSuccess())
2315 return false;
2317 // Finally, if no one else is interested in this directive, it must be
2318 // generic and familiar to this class.
2319 switch (DirKind) {
2320 default:
2321 break;
2322 case DK_ASCII:
2323 return parseDirectiveAscii(IDVal, false);
2324 case DK_ASCIZ:
2325 case DK_STRING:
2326 return parseDirectiveAscii(IDVal, true);
2327 case DK_BYTE:
2328 case DK_SBYTE:
2329 case DK_DB:
2330 return parseDirectiveValue(IDVal, 1);
2331 case DK_WORD:
2332 case DK_SWORD:
2333 case DK_DW:
2334 return parseDirectiveValue(IDVal, 2);
2335 case DK_DWORD:
2336 case DK_SDWORD:
2337 case DK_DD:
2338 return parseDirectiveValue(IDVal, 4);
2339 case DK_FWORD:
2340 case DK_DF:
2341 return parseDirectiveValue(IDVal, 6);
2342 case DK_QWORD:
2343 case DK_SQWORD:
2344 case DK_DQ:
2345 return parseDirectiveValue(IDVal, 8);
2346 case DK_REAL4:
2347 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2348 case DK_REAL8:
2349 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2350 case DK_REAL10:
2351 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2352 case DK_STRUCT:
2353 case DK_UNION:
2354 return parseDirectiveNestedStruct(IDVal, DirKind);
2355 case DK_ENDS:
2356 return parseDirectiveNestedEnds();
2357 case DK_ALIGN:
2358 return parseDirectiveAlign();
2359 case DK_EVEN:
2360 return parseDirectiveEven();
2361 case DK_ORG:
2362 return parseDirectiveOrg();
2363 case DK_EXTERN:
2364 return parseDirectiveExtern();
2365 case DK_PUBLIC:
2366 return parseDirectiveSymbolAttribute(MCSA_Global);
2367 case DK_COMM:
2368 return parseDirectiveComm(/*IsLocal=*/false);
2369 case DK_COMMENT:
2370 return parseDirectiveComment(IDLoc);
2371 case DK_INCLUDE:
2372 return parseDirectiveInclude();
2373 case DK_REPEAT:
2374 return parseDirectiveRepeat(IDLoc, IDVal);
2375 case DK_WHILE:
2376 return parseDirectiveWhile(IDLoc);
2377 case DK_FOR:
2378 return parseDirectiveFor(IDLoc, IDVal);
2379 case DK_FORC:
2380 return parseDirectiveForc(IDLoc, IDVal);
2381 case DK_FILE:
2382 return parseDirectiveFile(IDLoc);
2383 case DK_LINE:
2384 return parseDirectiveLine();
2385 case DK_LOC:
2386 return parseDirectiveLoc();
2387 case DK_STABS:
2388 return parseDirectiveStabs();
2389 case DK_CV_FILE:
2390 return parseDirectiveCVFile();
2391 case DK_CV_FUNC_ID:
2392 return parseDirectiveCVFuncId();
2393 case DK_CV_INLINE_SITE_ID:
2394 return parseDirectiveCVInlineSiteId();
2395 case DK_CV_LOC:
2396 return parseDirectiveCVLoc();
2397 case DK_CV_LINETABLE:
2398 return parseDirectiveCVLinetable();
2399 case DK_CV_INLINE_LINETABLE:
2400 return parseDirectiveCVInlineLinetable();
2401 case DK_CV_DEF_RANGE:
2402 return parseDirectiveCVDefRange();
2403 case DK_CV_STRING:
2404 return parseDirectiveCVString();
2405 case DK_CV_STRINGTABLE:
2406 return parseDirectiveCVStringTable();
2407 case DK_CV_FILECHECKSUMS:
2408 return parseDirectiveCVFileChecksums();
2409 case DK_CV_FILECHECKSUM_OFFSET:
2410 return parseDirectiveCVFileChecksumOffset();
2411 case DK_CV_FPO_DATA:
2412 return parseDirectiveCVFPOData();
2413 case DK_CFI_SECTIONS:
2414 return parseDirectiveCFISections();
2415 case DK_CFI_STARTPROC:
2416 return parseDirectiveCFIStartProc();
2417 case DK_CFI_ENDPROC:
2418 return parseDirectiveCFIEndProc();
2419 case DK_CFI_DEF_CFA:
2420 return parseDirectiveCFIDefCfa(IDLoc);
2421 case DK_CFI_DEF_CFA_OFFSET:
2422 return parseDirectiveCFIDefCfaOffset(IDLoc);
2423 case DK_CFI_ADJUST_CFA_OFFSET:
2424 return parseDirectiveCFIAdjustCfaOffset(IDLoc);
2425 case DK_CFI_DEF_CFA_REGISTER:
2426 return parseDirectiveCFIDefCfaRegister(IDLoc);
2427 case DK_CFI_OFFSET:
2428 return parseDirectiveCFIOffset(IDLoc);
2429 case DK_CFI_REL_OFFSET:
2430 return parseDirectiveCFIRelOffset(IDLoc);
2431 case DK_CFI_PERSONALITY:
2432 return parseDirectiveCFIPersonalityOrLsda(true);
2433 case DK_CFI_LSDA:
2434 return parseDirectiveCFIPersonalityOrLsda(false);
2435 case DK_CFI_REMEMBER_STATE:
2436 return parseDirectiveCFIRememberState(IDLoc);
2437 case DK_CFI_RESTORE_STATE:
2438 return parseDirectiveCFIRestoreState(IDLoc);
2439 case DK_CFI_SAME_VALUE:
2440 return parseDirectiveCFISameValue(IDLoc);
2441 case DK_CFI_RESTORE:
2442 return parseDirectiveCFIRestore(IDLoc);
2443 case DK_CFI_ESCAPE:
2444 return parseDirectiveCFIEscape(IDLoc);
2445 case DK_CFI_RETURN_COLUMN:
2446 return parseDirectiveCFIReturnColumn(IDLoc);
2447 case DK_CFI_SIGNAL_FRAME:
2448 return parseDirectiveCFISignalFrame();
2449 case DK_CFI_UNDEFINED:
2450 return parseDirectiveCFIUndefined(IDLoc);
2451 case DK_CFI_REGISTER:
2452 return parseDirectiveCFIRegister(IDLoc);
2453 case DK_CFI_WINDOW_SAVE:
2454 return parseDirectiveCFIWindowSave(IDLoc);
2455 case DK_EXITM:
2456 Info.ExitValue = "";
2457 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2458 case DK_ENDM:
2459 Info.ExitValue = "";
2460 return parseDirectiveEndMacro(IDVal);
2461 case DK_PURGE:
2462 return parseDirectivePurgeMacro(IDLoc);
2463 case DK_END:
2464 return parseDirectiveEnd(IDLoc);
2465 case DK_ERR:
2466 return parseDirectiveError(IDLoc);
2467 case DK_ERRB:
2468 return parseDirectiveErrorIfb(IDLoc, true);
2469 case DK_ERRNB:
2470 return parseDirectiveErrorIfb(IDLoc, false);
2471 case DK_ERRDEF:
2472 return parseDirectiveErrorIfdef(IDLoc, true);
2473 case DK_ERRNDEF:
2474 return parseDirectiveErrorIfdef(IDLoc, false);
2475 case DK_ERRDIF:
2476 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2477 /*CaseInsensitive=*/false);
2478 case DK_ERRDIFI:
2479 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2480 /*CaseInsensitive=*/true);
2481 case DK_ERRIDN:
2482 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2483 /*CaseInsensitive=*/false);
2484 case DK_ERRIDNI:
2485 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2486 /*CaseInsensitive=*/true);
2487 case DK_ERRE:
2488 return parseDirectiveErrorIfe(IDLoc, true);
2489 case DK_ERRNZ:
2490 return parseDirectiveErrorIfe(IDLoc, false);
2491 case DK_RADIX:
2492 return parseDirectiveRadix(IDLoc);
2493 case DK_ECHO:
2494 return parseDirectiveEcho(IDLoc);
2497 return Error(IDLoc, "unknown directive");
2500 // We also check if this is allocating memory with user-defined type.
2501 auto IDIt = Structs.find(IDVal.lower());
2502 if (IDIt != Structs.end())
2503 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2504 IDLoc);
2506 // Non-conditional Microsoft directives sometimes follow their first argument.
2507 const AsmToken nextTok = getTok();
2508 const StringRef nextVal = nextTok.getString();
2509 const SMLoc nextLoc = nextTok.getLoc();
2511 const AsmToken afterNextTok = peekTok();
2513 // There are several entities interested in parsing infix directives:
2515 // 1. Asm parser extensions. For example, platform-specific parsers
2516 // (like the ELF parser) register themselves as extensions.
2517 // 2. The generic directive parser implemented by this class. These are
2518 // all the directives that behave in a target and platform independent
2519 // manner, or at least have a default behavior that's shared between
2520 // all targets and platforms.
2522 getTargetParser().flushPendingInstructions(getStreamer());
2524 // Special-case handling of structure-end directives at higher priority, since
2525 // ENDS is overloaded as a segment-end directive.
2526 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2527 Lex();
2528 return parseDirectiveEnds(IDVal, IDLoc);
2531 // First, check the extension directive map to see if any extension has
2532 // registered itself to parse this directive.
2533 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2534 ExtensionDirectiveMap.lookup(nextVal.lower());
2535 if (Handler.first) {
2536 Lex();
2537 Lexer.UnLex(ID);
2538 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2541 // If no one else is interested in this directive, it must be
2542 // generic and familiar to this class.
2543 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2544 DirKind = (DirKindIt == DirectiveKindMap.end())
2545 ? DK_NO_DIRECTIVE
2546 : DirKindIt->getValue();
2547 switch (DirKind) {
2548 default:
2549 break;
2550 case DK_ASSIGN:
2551 case DK_EQU:
2552 case DK_TEXTEQU:
2553 Lex();
2554 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2555 case DK_BYTE:
2556 if (afterNextTok.is(AsmToken::Identifier) &&
2557 afterNextTok.getString().equals_insensitive("ptr")) {
2558 // Size directive; part of an instruction.
2559 break;
2561 [[fallthrough]];
2562 case DK_SBYTE:
2563 case DK_DB:
2564 Lex();
2565 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2566 case DK_WORD:
2567 if (afterNextTok.is(AsmToken::Identifier) &&
2568 afterNextTok.getString().equals_insensitive("ptr")) {
2569 // Size directive; part of an instruction.
2570 break;
2572 [[fallthrough]];
2573 case DK_SWORD:
2574 case DK_DW:
2575 Lex();
2576 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2577 case DK_DWORD:
2578 if (afterNextTok.is(AsmToken::Identifier) &&
2579 afterNextTok.getString().equals_insensitive("ptr")) {
2580 // Size directive; part of an instruction.
2581 break;
2583 [[fallthrough]];
2584 case DK_SDWORD:
2585 case DK_DD:
2586 Lex();
2587 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2588 case DK_FWORD:
2589 if (afterNextTok.is(AsmToken::Identifier) &&
2590 afterNextTok.getString().equals_insensitive("ptr")) {
2591 // Size directive; part of an instruction.
2592 break;
2594 [[fallthrough]];
2595 case DK_DF:
2596 Lex();
2597 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2598 case DK_QWORD:
2599 if (afterNextTok.is(AsmToken::Identifier) &&
2600 afterNextTok.getString().equals_insensitive("ptr")) {
2601 // Size directive; part of an instruction.
2602 break;
2604 [[fallthrough]];
2605 case DK_SQWORD:
2606 case DK_DQ:
2607 Lex();
2608 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2609 case DK_REAL4:
2610 Lex();
2611 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2612 IDVal, IDLoc);
2613 case DK_REAL8:
2614 Lex();
2615 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2616 IDVal, IDLoc);
2617 case DK_REAL10:
2618 Lex();
2619 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2620 10, IDVal, IDLoc);
2621 case DK_STRUCT:
2622 case DK_UNION:
2623 Lex();
2624 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2625 case DK_ENDS:
2626 Lex();
2627 return parseDirectiveEnds(IDVal, IDLoc);
2628 case DK_MACRO:
2629 Lex();
2630 return parseDirectiveMacro(IDVal, IDLoc);
2633 // Finally, we check if this is allocating a variable with user-defined type.
2634 auto NextIt = Structs.find(nextVal.lower());
2635 if (NextIt != Structs.end()) {
2636 Lex();
2637 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2638 nextVal, nextLoc, IDVal);
2641 // __asm _emit or __asm __emit
2642 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2643 IDVal == "_EMIT" || IDVal == "__EMIT"))
2644 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2646 // __asm align
2647 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2648 return parseDirectiveMSAlign(IDLoc, Info);
2650 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2651 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2652 if (checkForValidSection())
2653 return true;
2655 // Canonicalize the opcode to lower case.
2656 std::string OpcodeStr = IDVal.lower();
2657 ParseInstructionInfo IInfo(Info.AsmRewrites);
2658 bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID,
2659 Info.ParsedOperands);
2660 Info.ParseError = ParseHadError;
2662 // Dump the parsed representation, if requested.
2663 if (getShowParsedOperands()) {
2664 SmallString<256> Str;
2665 raw_svector_ostream OS(Str);
2666 OS << "parsed instruction: [";
2667 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2668 if (i != 0)
2669 OS << ", ";
2670 Info.ParsedOperands[i]->print(OS);
2672 OS << "]";
2674 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2677 // Fail even if ParseInstruction erroneously returns false.
2678 if (hasPendingError() || ParseHadError)
2679 return true;
2681 // If we are generating dwarf for the current section then generate a .loc
2682 // directive for the instruction.
2683 if (!ParseHadError && enabledGenDwarfForAssembly() &&
2684 getContext().getGenDwarfSectionSyms().count(
2685 getStreamer().getCurrentSectionOnly())) {
2686 unsigned Line;
2687 if (ActiveMacros.empty())
2688 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2689 else
2690 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2691 ActiveMacros.front()->ExitBuffer);
2693 // If we previously parsed a cpp hash file line comment then make sure the
2694 // current Dwarf File is for the CppHashFilename if not then emit the
2695 // Dwarf File table for it and adjust the line number for the .loc.
2696 if (!CppHashInfo.Filename.empty()) {
2697 unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2698 0, StringRef(), CppHashInfo.Filename);
2699 getContext().setGenDwarfFileNumber(FileNumber);
2701 unsigned CppHashLocLineNo =
2702 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2703 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2706 getStreamer().emitDwarfLocDirective(
2707 getContext().getGenDwarfFileNumber(), Line, 0,
2708 DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2709 StringRef());
2712 // If parsing succeeded, match the instruction.
2713 if (!ParseHadError) {
2714 uint64_t ErrorInfo;
2715 if (getTargetParser().matchAndEmitInstruction(
2716 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2717 getTargetParser().isParsingMSInlineAsm()))
2718 return true;
2720 return false;
2723 // Parse and erase curly braces marking block start/end.
2724 bool MasmParser::parseCurlyBlockScope(
2725 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2726 // Identify curly brace marking block start/end.
2727 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2728 return false;
2730 SMLoc StartLoc = Lexer.getLoc();
2731 Lex(); // Eat the brace.
2732 if (Lexer.is(AsmToken::EndOfStatement))
2733 Lex(); // Eat EndOfStatement following the brace.
2735 // Erase the block start/end brace from the output asm string.
2736 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2737 StartLoc.getPointer());
2738 return true;
2741 /// parseCppHashLineFilenameComment as this:
2742 /// ::= # number "filename"
2743 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2744 Lex(); // Eat the hash token.
2745 // Lexer only ever emits HashDirective if it fully formed if it's
2746 // done the checking already so this is an internal error.
2747 assert(getTok().is(AsmToken::Integer) &&
2748 "Lexing Cpp line comment: Expected Integer");
2749 int64_t LineNumber = getTok().getIntVal();
2750 Lex();
2751 assert(getTok().is(AsmToken::String) &&
2752 "Lexing Cpp line comment: Expected String");
2753 StringRef Filename = getTok().getString();
2754 Lex();
2756 // Get rid of the enclosing quotes.
2757 Filename = Filename.substr(1, Filename.size() - 2);
2759 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2760 // and possibly DWARF file info.
2761 CppHashInfo.Loc = L;
2762 CppHashInfo.Filename = Filename;
2763 CppHashInfo.LineNumber = LineNumber;
2764 CppHashInfo.Buf = CurBuffer;
2765 if (FirstCppHashFilename.empty())
2766 FirstCppHashFilename = Filename;
2767 return false;
2770 /// will use the last parsed cpp hash line filename comment
2771 /// for the Filename and LineNo if any in the diagnostic.
2772 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2773 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2774 raw_ostream &OS = errs();
2776 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2777 SMLoc DiagLoc = Diag.getLoc();
2778 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2779 unsigned CppHashBuf =
2780 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2782 // Like SourceMgr::printMessage() we need to print the include stack if any
2783 // before printing the message.
2784 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2785 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2786 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2787 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2788 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2791 // If we have not parsed a cpp hash line filename comment or the source
2792 // manager changed or buffer changed (like in a nested include) then just
2793 // print the normal diagnostic using its Filename and LineNo.
2794 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2795 DiagBuf != CppHashBuf) {
2796 if (Parser->SavedDiagHandler)
2797 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2798 else
2799 Diag.print(nullptr, OS);
2800 return;
2803 // Use the CppHashFilename and calculate a line number based on the
2804 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2805 // for the diagnostic.
2806 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2808 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2809 int CppHashLocLineNo =
2810 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2811 int LineNo =
2812 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2814 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2815 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2816 Diag.getLineContents(), Diag.getRanges());
2818 if (Parser->SavedDiagHandler)
2819 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2820 else
2821 NewDiag.print(nullptr, OS);
2824 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2825 // not accept '.'.
2826 static bool isMacroParameterChar(char C) {
2827 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2830 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2831 ArrayRef<MCAsmMacroParameter> Parameters,
2832 ArrayRef<MCAsmMacroArgument> A,
2833 const std::vector<std::string> &Locals, SMLoc L) {
2834 unsigned NParameters = Parameters.size();
2835 if (NParameters != A.size())
2836 return Error(L, "Wrong number of arguments");
2837 StringMap<std::string> LocalSymbols;
2838 std::string Name;
2839 Name.reserve(6);
2840 for (StringRef Local : Locals) {
2841 raw_string_ostream LocalName(Name);
2842 LocalName << "??"
2843 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2844 LocalSymbols.insert({Local, Name});
2845 Name.clear();
2848 std::optional<char> CurrentQuote;
2849 while (!Body.empty()) {
2850 // Scan for the next substitution.
2851 std::size_t End = Body.size(), Pos = 0;
2852 std::size_t IdentifierPos = End;
2853 for (; Pos != End; ++Pos) {
2854 // Find the next possible macro parameter, including preceding a '&'
2855 // inside quotes.
2856 if (Body[Pos] == '&')
2857 break;
2858 if (isMacroParameterChar(Body[Pos])) {
2859 if (!CurrentQuote)
2860 break;
2861 if (IdentifierPos == End)
2862 IdentifierPos = Pos;
2863 } else {
2864 IdentifierPos = End;
2867 // Track quotation status
2868 if (!CurrentQuote) {
2869 if (Body[Pos] == '\'' || Body[Pos] == '"')
2870 CurrentQuote = Body[Pos];
2871 } else if (Body[Pos] == CurrentQuote) {
2872 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2873 // Escaped quote, and quotes aren't identifier chars; skip
2874 ++Pos;
2875 continue;
2876 } else {
2877 CurrentQuote.reset();
2881 if (IdentifierPos != End) {
2882 // We've recognized an identifier before an apostrophe inside quotes;
2883 // check once to see if we can expand it.
2884 Pos = IdentifierPos;
2885 IdentifierPos = End;
2888 // Add the prefix.
2889 OS << Body.slice(0, Pos);
2891 // Check if we reached the end.
2892 if (Pos == End)
2893 break;
2895 unsigned I = Pos;
2896 bool InitialAmpersand = (Body[I] == '&');
2897 if (InitialAmpersand) {
2898 ++I;
2899 ++Pos;
2901 while (I < End && isMacroParameterChar(Body[I]))
2902 ++I;
2904 const char *Begin = Body.data() + Pos;
2905 StringRef Argument(Begin, I - Pos);
2906 const std::string ArgumentLower = Argument.lower();
2907 unsigned Index = 0;
2909 for (; Index < NParameters; ++Index)
2910 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2911 break;
2913 if (Index == NParameters) {
2914 if (InitialAmpersand)
2915 OS << '&';
2916 auto it = LocalSymbols.find(ArgumentLower);
2917 if (it != LocalSymbols.end())
2918 OS << it->second;
2919 else
2920 OS << Argument;
2921 Pos = I;
2922 } else {
2923 for (const AsmToken &Token : A[Index]) {
2924 // In MASM, you can write '%expr'.
2925 // The prefix '%' evaluates the expression 'expr'
2926 // and uses the result as a string (e.g. replace %(1+2) with the
2927 // string "3").
2928 // Here, we identify the integer token which is the result of the
2929 // absolute expression evaluation and replace it with its string
2930 // representation.
2931 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2932 // Emit an integer value to the buffer.
2933 OS << Token.getIntVal();
2934 else
2935 OS << Token.getString();
2938 Pos += Argument.size();
2939 if (Pos < End && Body[Pos] == '&') {
2940 ++Pos;
2943 // Update the scan point.
2944 Body = Body.substr(Pos);
2947 return false;
2950 static bool isOperator(AsmToken::TokenKind kind) {
2951 switch (kind) {
2952 default:
2953 return false;
2954 case AsmToken::Plus:
2955 case AsmToken::Minus:
2956 case AsmToken::Tilde:
2957 case AsmToken::Slash:
2958 case AsmToken::Star:
2959 case AsmToken::Dot:
2960 case AsmToken::Equal:
2961 case AsmToken::EqualEqual:
2962 case AsmToken::Pipe:
2963 case AsmToken::PipePipe:
2964 case AsmToken::Caret:
2965 case AsmToken::Amp:
2966 case AsmToken::AmpAmp:
2967 case AsmToken::Exclaim:
2968 case AsmToken::ExclaimEqual:
2969 case AsmToken::Less:
2970 case AsmToken::LessEqual:
2971 case AsmToken::LessLess:
2972 case AsmToken::LessGreater:
2973 case AsmToken::Greater:
2974 case AsmToken::GreaterEqual:
2975 case AsmToken::GreaterGreater:
2976 return true;
2980 namespace {
2982 class AsmLexerSkipSpaceRAII {
2983 public:
2984 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2985 Lexer.setSkipSpace(SkipSpace);
2988 ~AsmLexerSkipSpaceRAII() {
2989 Lexer.setSkipSpace(true);
2992 private:
2993 AsmLexer &Lexer;
2996 } // end anonymous namespace
2998 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
2999 MCAsmMacroArgument &MA,
3000 AsmToken::TokenKind EndTok) {
3001 if (MP && MP->Vararg) {
3002 if (Lexer.isNot(EndTok)) {
3003 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3004 for (StringRef S : Str) {
3005 MA.emplace_back(AsmToken::String, S);
3008 return false;
3011 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3012 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3013 const char *StrChar = StrLoc.getPointer() + 1;
3014 const char *EndChar = EndLoc.getPointer() - 1;
3015 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3016 /// Eat from '<' to '>'.
3017 Lex();
3018 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3019 return false;
3022 unsigned ParenLevel = 0;
3024 // Darwin doesn't use spaces to delmit arguments.
3025 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3027 bool SpaceEaten;
3029 while (true) {
3030 SpaceEaten = false;
3031 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3032 return TokError("unexpected token");
3034 if (ParenLevel == 0) {
3035 if (Lexer.is(AsmToken::Comma))
3036 break;
3038 if (Lexer.is(AsmToken::Space)) {
3039 SpaceEaten = true;
3040 Lex(); // Eat spaces.
3043 // Spaces can delimit parameters, but could also be part an expression.
3044 // If the token after a space is an operator, add the token and the next
3045 // one into this argument
3046 if (!IsDarwin) {
3047 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3048 MA.push_back(getTok());
3049 Lex();
3051 // Whitespace after an operator can be ignored.
3052 if (Lexer.is(AsmToken::Space))
3053 Lex();
3055 continue;
3058 if (SpaceEaten)
3059 break;
3062 // handleMacroEntry relies on not advancing the lexer here
3063 // to be able to fill in the remaining default parameter values
3064 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3065 break;
3067 // Adjust the current parentheses level.
3068 if (Lexer.is(AsmToken::LParen))
3069 ++ParenLevel;
3070 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3071 --ParenLevel;
3073 // Append the token to the current argument list.
3074 MA.push_back(getTok());
3075 Lex();
3078 if (ParenLevel != 0)
3079 return TokError("unbalanced parentheses in argument");
3081 if (MA.empty() && MP) {
3082 if (MP->Required) {
3083 return TokError("missing value for required parameter '" + MP->Name +
3084 "'");
3085 } else {
3086 MA = MP->Value;
3089 return false;
3092 // Parse the macro instantiation arguments.
3093 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3094 MCAsmMacroArguments &A,
3095 AsmToken::TokenKind EndTok) {
3096 const unsigned NParameters = M ? M->Parameters.size() : 0;
3097 bool NamedParametersFound = false;
3098 SmallVector<SMLoc, 4> FALocs;
3100 A.resize(NParameters);
3101 FALocs.resize(NParameters);
3103 // Parse two kinds of macro invocations:
3104 // - macros defined without any parameters accept an arbitrary number of them
3105 // - macros defined with parameters accept at most that many of them
3106 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3107 ++Parameter) {
3108 SMLoc IDLoc = Lexer.getLoc();
3109 MCAsmMacroParameter FA;
3111 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3112 if (parseIdentifier(FA.Name))
3113 return Error(IDLoc, "invalid argument identifier for formal argument");
3115 if (Lexer.isNot(AsmToken::Equal))
3116 return TokError("expected '=' after formal parameter identifier");
3118 Lex();
3120 NamedParametersFound = true;
3123 if (NamedParametersFound && FA.Name.empty())
3124 return Error(IDLoc, "cannot mix positional and keyword arguments");
3126 unsigned PI = Parameter;
3127 if (!FA.Name.empty()) {
3128 assert(M && "expected macro to be defined");
3129 unsigned FAI = 0;
3130 for (FAI = 0; FAI < NParameters; ++FAI)
3131 if (M->Parameters[FAI].Name == FA.Name)
3132 break;
3134 if (FAI >= NParameters) {
3135 return Error(IDLoc, "parameter named '" + FA.Name +
3136 "' does not exist for macro '" + M->Name + "'");
3138 PI = FAI;
3140 const MCAsmMacroParameter *MP = nullptr;
3141 if (M && PI < NParameters)
3142 MP = &M->Parameters[PI];
3144 SMLoc StrLoc = Lexer.getLoc();
3145 SMLoc EndLoc;
3146 if (Lexer.is(AsmToken::Percent)) {
3147 const MCExpr *AbsoluteExp;
3148 int64_t Value;
3149 /// Eat '%'.
3150 Lex();
3151 if (parseExpression(AbsoluteExp, EndLoc))
3152 return false;
3153 if (!AbsoluteExp->evaluateAsAbsolute(Value,
3154 getStreamer().getAssemblerPtr()))
3155 return Error(StrLoc, "expected absolute expression");
3156 const char *StrChar = StrLoc.getPointer();
3157 const char *EndChar = EndLoc.getPointer();
3158 AsmToken newToken(AsmToken::Integer,
3159 StringRef(StrChar, EndChar - StrChar), Value);
3160 FA.Value.push_back(newToken);
3161 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3162 if (M)
3163 return addErrorSuffix(" in '" + M->Name + "' macro");
3164 else
3165 return true;
3168 if (!FA.Value.empty()) {
3169 if (A.size() <= PI)
3170 A.resize(PI + 1);
3171 A[PI] = FA.Value;
3173 if (FALocs.size() <= PI)
3174 FALocs.resize(PI + 1);
3176 FALocs[PI] = Lexer.getLoc();
3179 // At the end of the statement, fill in remaining arguments that have
3180 // default values. If there aren't any, then the next argument is
3181 // required but missing
3182 if (Lexer.is(EndTok)) {
3183 bool Failure = false;
3184 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3185 if (A[FAI].empty()) {
3186 if (M->Parameters[FAI].Required) {
3187 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3188 "missing value for required parameter "
3189 "'" +
3190 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3191 Failure = true;
3194 if (!M->Parameters[FAI].Value.empty())
3195 A[FAI] = M->Parameters[FAI].Value;
3198 return Failure;
3201 if (Lexer.is(AsmToken::Comma))
3202 Lex();
3205 return TokError("too many positional arguments");
3208 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3209 AsmToken::TokenKind ArgumentEndTok) {
3210 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3211 // eliminate this, although we should protect against infinite loops.
3212 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3213 if (ActiveMacros.size() == MaxNestingDepth) {
3214 std::ostringstream MaxNestingDepthError;
3215 MaxNestingDepthError << "macros cannot be nested more than "
3216 << MaxNestingDepth << " levels deep."
3217 << " Use -asm-macro-max-nesting-depth to increase "
3218 "this limit.";
3219 return TokError(MaxNestingDepthError.str());
3222 MCAsmMacroArguments A;
3223 if (parseMacroArguments(M, A, ArgumentEndTok))
3224 return true;
3226 // Macro instantiation is lexical, unfortunately. We construct a new buffer
3227 // to hold the macro body with substitutions.
3228 SmallString<256> Buf;
3229 StringRef Body = M->Body;
3230 raw_svector_ostream OS(Buf);
3232 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3233 return true;
3235 // We include the endm in the buffer as our cue to exit the macro
3236 // instantiation.
3237 OS << "endm\n";
3239 std::unique_ptr<MemoryBuffer> Instantiation =
3240 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3242 // Create the macro instantiation object and add to the current macro
3243 // instantiation stack.
3244 MacroInstantiation *MI = new MacroInstantiation{
3245 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3246 ActiveMacros.push_back(MI);
3248 ++NumOfMacroInstantiations;
3250 // Jump to the macro instantiation and prime the lexer.
3251 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3252 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3253 EndStatementAtEOFStack.push_back(true);
3254 Lex();
3256 return false;
3259 void MasmParser::handleMacroExit() {
3260 // Jump to the token we should return to, and consume it.
3261 EndStatementAtEOFStack.pop_back();
3262 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3263 EndStatementAtEOFStack.back());
3264 Lex();
3266 // Pop the instantiation entry.
3267 delete ActiveMacros.back();
3268 ActiveMacros.pop_back();
3271 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3272 if (!M->IsFunction)
3273 return Error(NameLoc, "cannot invoke macro procedure as function");
3275 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3276 "' requires arguments in parentheses") ||
3277 handleMacroEntry(M, NameLoc, AsmToken::RParen))
3278 return true;
3280 // Parse all statements in the macro, retrieving the exit value when it ends.
3281 std::string ExitValue;
3282 SmallVector<AsmRewrite, 4> AsmStrRewrites;
3283 while (Lexer.isNot(AsmToken::Eof)) {
3284 ParseStatementInfo Info(&AsmStrRewrites);
3285 bool Parsed = parseStatement(Info, nullptr);
3287 if (!Parsed && Info.ExitValue) {
3288 ExitValue = std::move(*Info.ExitValue);
3289 break;
3292 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3293 // for printing ErrMsg via Lex() only if no (presumably better) parser error
3294 // exists.
3295 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3296 Lex();
3299 // parseStatement returned true so may need to emit an error.
3300 printPendingErrors();
3302 // Skipping to the next line if needed.
3303 if (Parsed && !getLexer().isAtStartOfStatement())
3304 eatToEndOfStatement();
3307 // Consume the right-parenthesis on the other side of the arguments.
3308 if (parseRParen())
3309 return true;
3311 // Exit values may require lexing, unfortunately. We construct a new buffer to
3312 // hold the exit value.
3313 std::unique_ptr<MemoryBuffer> MacroValue =
3314 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3316 // Jump from this location to the instantiated exit value, and prime the
3317 // lexer.
3318 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3319 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3320 /*EndStatementAtEOF=*/false);
3321 EndStatementAtEOFStack.push_back(false);
3322 Lex();
3324 return false;
3327 /// parseIdentifier:
3328 /// ::= identifier
3329 /// ::= string
3330 bool MasmParser::parseIdentifier(StringRef &Res,
3331 IdentifierPositionKind Position) {
3332 // The assembler has relaxed rules for accepting identifiers, in particular we
3333 // allow things like '.globl $foo' and '.def @feat.00', which would normally
3334 // be separate tokens. At this level, we have already lexed so we cannot
3335 // (currently) handle this as a context dependent token, instead we detect
3336 // adjacent tokens and return the combined identifier.
3337 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3338 SMLoc PrefixLoc = getLexer().getLoc();
3340 // Consume the prefix character, and check for a following identifier.
3342 AsmToken nextTok = peekTok(false);
3344 if (nextTok.isNot(AsmToken::Identifier))
3345 return true;
3347 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3348 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3349 return true;
3351 // eat $ or @
3352 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3353 // Construct the joined identifier and consume the token.
3354 Res =
3355 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3356 Lex(); // Parser Lex to maintain invariants.
3357 return false;
3360 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3361 return true;
3363 Res = getTok().getIdentifier();
3365 // Consume the identifier token - but if parsing certain directives, avoid
3366 // lexical expansion of the next token.
3367 ExpandKind ExpandNextToken = ExpandMacros;
3368 if (Position == StartOfStatement &&
3369 StringSwitch<bool>(Res)
3370 .CaseLower("echo", true)
3371 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3372 .Default(false)) {
3373 ExpandNextToken = DoNotExpandMacros;
3375 Lex(ExpandNextToken);
3377 return false;
3380 /// parseDirectiveEquate:
3381 /// ::= name "=" expression
3382 /// | name "equ" expression (not redefinable)
3383 /// | name "equ" text-list
3384 /// | name "textequ" text-list (redefinability unspecified)
3385 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3386 DirectiveKind DirKind, SMLoc NameLoc) {
3387 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3388 if (BuiltinIt != BuiltinSymbolMap.end())
3389 return Error(NameLoc, "cannot redefine a built-in symbol");
3391 Variable &Var = Variables[Name.lower()];
3392 if (Var.Name.empty()) {
3393 Var.Name = Name;
3396 SMLoc StartLoc = Lexer.getLoc();
3397 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3398 // "equ" and "textequ" both allow text expressions.
3399 std::string Value;
3400 std::string TextItem;
3401 if (!parseTextItem(TextItem)) {
3402 Value += TextItem;
3404 // Accept a text-list, not just one text-item.
3405 auto parseItem = [&]() -> bool {
3406 if (parseTextItem(TextItem))
3407 return TokError("expected text item");
3408 Value += TextItem;
3409 return false;
3411 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3412 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3414 if (!Var.IsText || Var.TextValue != Value) {
3415 switch (Var.Redefinable) {
3416 case Variable::NOT_REDEFINABLE:
3417 return Error(getTok().getLoc(), "invalid variable redefinition");
3418 case Variable::WARN_ON_REDEFINITION:
3419 if (Warning(NameLoc, "redefining '" + Name +
3420 "', already defined on the command line")) {
3421 return true;
3423 break;
3424 default:
3425 break;
3428 Var.IsText = true;
3429 Var.TextValue = Value;
3430 Var.Redefinable = Variable::REDEFINABLE;
3432 return false;
3435 if (DirKind == DK_TEXTEQU)
3436 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3438 // Parse as expression assignment.
3439 const MCExpr *Expr;
3440 SMLoc EndLoc;
3441 if (parseExpression(Expr, EndLoc))
3442 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3443 StringRef ExprAsString = StringRef(
3444 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3446 int64_t Value;
3447 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3448 if (DirKind == DK_ASSIGN)
3449 return Error(
3450 StartLoc,
3451 "expected absolute expression; not all symbols have known values",
3452 {StartLoc, EndLoc});
3454 // Not an absolute expression; define as a text replacement.
3455 if (!Var.IsText || Var.TextValue != ExprAsString) {
3456 switch (Var.Redefinable) {
3457 case Variable::NOT_REDEFINABLE:
3458 return Error(getTok().getLoc(), "invalid variable redefinition");
3459 case Variable::WARN_ON_REDEFINITION:
3460 if (Warning(NameLoc, "redefining '" + Name +
3461 "', already defined on the command line")) {
3462 return true;
3464 break;
3465 default:
3466 break;
3470 Var.IsText = true;
3471 Var.TextValue = ExprAsString.str();
3472 Var.Redefinable = Variable::REDEFINABLE;
3474 return false;
3477 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3479 const MCConstantExpr *PrevValue =
3480 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3481 Sym->getVariableValue(/*SetUsed=*/false))
3482 : nullptr;
3483 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3484 switch (Var.Redefinable) {
3485 case Variable::NOT_REDEFINABLE:
3486 return Error(getTok().getLoc(), "invalid variable redefinition");
3487 case Variable::WARN_ON_REDEFINITION:
3488 if (Warning(NameLoc, "redefining '" + Name +
3489 "', already defined on the command line")) {
3490 return true;
3492 break;
3493 default:
3494 break;
3498 Var.IsText = false;
3499 Var.TextValue.clear();
3500 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3501 : Variable::NOT_REDEFINABLE;
3503 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3504 Sym->setVariableValue(Expr);
3505 Sym->setExternal(false);
3507 return false;
3510 bool MasmParser::parseEscapedString(std::string &Data) {
3511 if (check(getTok().isNot(AsmToken::String), "expected string"))
3512 return true;
3514 Data = "";
3515 char Quote = getTok().getString().front();
3516 StringRef Str = getTok().getStringContents();
3517 Data.reserve(Str.size());
3518 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3519 Data.push_back(Str[i]);
3520 if (Str[i] == Quote) {
3521 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3522 // If we're escaping the string's trailing delimiter, we're definitely
3523 // missing a quotation mark.
3524 if (i + 1 == Str.size())
3525 return Error(getTok().getLoc(), "missing quotation mark in string");
3526 if (Str[i + 1] == Quote)
3527 ++i;
3531 Lex();
3532 return false;
3535 bool MasmParser::parseAngleBracketString(std::string &Data) {
3536 SMLoc EndLoc, StartLoc = getTok().getLoc();
3537 if (isAngleBracketString(StartLoc, EndLoc)) {
3538 const char *StartChar = StartLoc.getPointer() + 1;
3539 const char *EndChar = EndLoc.getPointer() - 1;
3540 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3541 // Eat from '<' to '>'.
3542 Lex();
3544 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3545 return false;
3547 return true;
3550 /// textItem ::= textLiteral | textMacroID | % constExpr
3551 bool MasmParser::parseTextItem(std::string &Data) {
3552 switch (getTok().getKind()) {
3553 default:
3554 return true;
3555 case AsmToken::Percent: {
3556 int64_t Res;
3557 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3558 return true;
3559 Data = std::to_string(Res);
3560 return false;
3562 case AsmToken::Less:
3563 case AsmToken::LessEqual:
3564 case AsmToken::LessLess:
3565 case AsmToken::LessGreater:
3566 return parseAngleBracketString(Data);
3567 case AsmToken::Identifier: {
3568 // This must be a text macro; we need to expand it accordingly.
3569 StringRef ID;
3570 SMLoc StartLoc = getTok().getLoc();
3571 if (parseIdentifier(ID))
3572 return true;
3573 Data = ID.str();
3575 bool Expanded = false;
3576 while (true) {
3577 // Try to resolve as a built-in text macro
3578 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3579 if (BuiltinIt != BuiltinSymbolMap.end()) {
3580 std::optional<std::string> BuiltinText =
3581 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3582 if (!BuiltinText) {
3583 // Not a text macro; break without substituting
3584 break;
3586 Data = std::move(*BuiltinText);
3587 ID = StringRef(Data);
3588 Expanded = true;
3589 continue;
3592 // Try to resolve as a variable text macro
3593 auto VarIt = Variables.find(ID.lower());
3594 if (VarIt != Variables.end()) {
3595 const Variable &Var = VarIt->getValue();
3596 if (!Var.IsText) {
3597 // Not a text macro; break without substituting
3598 break;
3600 Data = Var.TextValue;
3601 ID = StringRef(Data);
3602 Expanded = true;
3603 continue;
3606 break;
3609 if (!Expanded) {
3610 // Not a text macro; not usable in TextItem context. Since we haven't used
3611 // the token, put it back for better error recovery.
3612 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3613 return true;
3615 return false;
3618 llvm_unreachable("unhandled token kind");
3621 /// parseDirectiveAscii:
3622 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3623 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3624 auto parseOp = [&]() -> bool {
3625 std::string Data;
3626 if (checkForValidSection() || parseEscapedString(Data))
3627 return true;
3628 getStreamer().emitBytes(Data);
3629 if (ZeroTerminated)
3630 getStreamer().emitBytes(StringRef("\0", 1));
3631 return false;
3634 if (parseMany(parseOp))
3635 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3636 return false;
3639 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3640 // Special case constant expressions to match code generator.
3641 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3642 assert(Size <= 8 && "Invalid size");
3643 int64_t IntValue = MCE->getValue();
3644 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3645 return Error(MCE->getLoc(), "out of range literal value");
3646 getStreamer().emitIntValue(IntValue, Size);
3647 } else {
3648 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3649 if (MSE && MSE->getSymbol().getName() == "?") {
3650 // ? initializer; treat as 0.
3651 getStreamer().emitIntValue(0, Size);
3652 } else {
3653 getStreamer().emitValue(Value, Size, Value->getLoc());
3656 return false;
3659 bool MasmParser::parseScalarInitializer(unsigned Size,
3660 SmallVectorImpl<const MCExpr *> &Values,
3661 unsigned StringPadLength) {
3662 if (Size == 1 && getTok().is(AsmToken::String)) {
3663 std::string Value;
3664 if (parseEscapedString(Value))
3665 return true;
3666 // Treat each character as an initializer.
3667 for (const unsigned char CharVal : Value)
3668 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3670 // Pad the string with spaces to the specified length.
3671 for (size_t i = Value.size(); i < StringPadLength; ++i)
3672 Values.push_back(MCConstantExpr::create(' ', getContext()));
3673 } else {
3674 const MCExpr *Value;
3675 if (parseExpression(Value))
3676 return true;
3677 if (getTok().is(AsmToken::Identifier) &&
3678 getTok().getString().equals_insensitive("dup")) {
3679 Lex(); // Eat 'dup'.
3680 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3681 if (!MCE)
3682 return Error(Value->getLoc(),
3683 "cannot repeat value a non-constant number of times");
3684 const int64_t Repetitions = MCE->getValue();
3685 if (Repetitions < 0)
3686 return Error(Value->getLoc(),
3687 "cannot repeat value a negative number of times");
3689 SmallVector<const MCExpr *, 1> DuplicatedValues;
3690 if (parseToken(AsmToken::LParen,
3691 "parentheses required for 'dup' contents") ||
3692 parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3693 return true;
3695 for (int i = 0; i < Repetitions; ++i)
3696 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3697 } else {
3698 Values.push_back(Value);
3701 return false;
3704 bool MasmParser::parseScalarInstList(unsigned Size,
3705 SmallVectorImpl<const MCExpr *> &Values,
3706 const AsmToken::TokenKind EndToken) {
3707 while (getTok().isNot(EndToken) &&
3708 (EndToken != AsmToken::Greater ||
3709 getTok().isNot(AsmToken::GreaterGreater))) {
3710 parseScalarInitializer(Size, Values);
3712 // If we see a comma, continue, and allow line continuation.
3713 if (!parseOptionalToken(AsmToken::Comma))
3714 break;
3715 parseOptionalToken(AsmToken::EndOfStatement);
3717 return false;
3720 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3721 SmallVector<const MCExpr *, 1> Values;
3722 if (checkForValidSection() || parseScalarInstList(Size, Values))
3723 return true;
3725 for (const auto *Value : Values) {
3726 emitIntValue(Value, Size);
3728 if (Count)
3729 *Count = Values.size();
3730 return false;
3733 // Add a field to the current structure.
3734 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3735 StructInfo &Struct = StructInProgress.back();
3736 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3737 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3739 Field.Type = Size;
3741 if (parseScalarInstList(Size, IntInfo.Values))
3742 return true;
3744 Field.SizeOf = Field.Type * IntInfo.Values.size();
3745 Field.LengthOf = IntInfo.Values.size();
3746 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3747 if (!Struct.IsUnion) {
3748 Struct.NextOffset = FieldEnd;
3750 Struct.Size = std::max(Struct.Size, FieldEnd);
3751 return false;
3754 /// parseDirectiveValue
3755 /// ::= (byte | word | ... ) [ expression (, expression)* ]
3756 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3757 if (StructInProgress.empty()) {
3758 // Initialize data value.
3759 if (emitIntegralValues(Size))
3760 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3761 } else if (addIntegralField("", Size)) {
3762 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3765 return false;
3768 /// parseDirectiveNamedValue
3769 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
3770 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3771 StringRef Name, SMLoc NameLoc) {
3772 if (StructInProgress.empty()) {
3773 // Initialize named data value.
3774 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3775 getStreamer().emitLabel(Sym);
3776 unsigned Count;
3777 if (emitIntegralValues(Size, &Count))
3778 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3780 AsmTypeInfo Type;
3781 Type.Name = TypeName;
3782 Type.Size = Size * Count;
3783 Type.ElementSize = Size;
3784 Type.Length = Count;
3785 KnownType[Name.lower()] = Type;
3786 } else if (addIntegralField(Name, Size)) {
3787 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3790 return false;
3793 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3794 if (Asm.getTok().isNot(AsmToken::Integer) &&
3795 Asm.getTok().isNot(AsmToken::BigNum))
3796 return Asm.TokError("unknown token in expression");
3797 SMLoc ExprLoc = Asm.getTok().getLoc();
3798 APInt IntValue = Asm.getTok().getAPIntVal();
3799 Asm.Lex();
3800 if (!IntValue.isIntN(128))
3801 return Asm.Error(ExprLoc, "out of range literal value");
3802 if (!IntValue.isIntN(64)) {
3803 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3804 lo = IntValue.getLoBits(64).getZExtValue();
3805 } else {
3806 hi = 0;
3807 lo = IntValue.getZExtValue();
3809 return false;
3812 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3813 // We don't truly support arithmetic on floating point expressions, so we
3814 // have to manually parse unary prefixes.
3815 bool IsNeg = false;
3816 SMLoc SignLoc;
3817 if (getLexer().is(AsmToken::Minus)) {
3818 SignLoc = getLexer().getLoc();
3819 Lexer.Lex();
3820 IsNeg = true;
3821 } else if (getLexer().is(AsmToken::Plus)) {
3822 SignLoc = getLexer().getLoc();
3823 Lexer.Lex();
3826 if (Lexer.is(AsmToken::Error))
3827 return TokError(Lexer.getErr());
3828 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3829 Lexer.isNot(AsmToken::Identifier))
3830 return TokError("unexpected token in directive");
3832 // Convert to an APFloat.
3833 APFloat Value(Semantics);
3834 StringRef IDVal = getTok().getString();
3835 if (getLexer().is(AsmToken::Identifier)) {
3836 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3837 Value = APFloat::getInf(Semantics);
3838 else if (IDVal.equals_insensitive("nan"))
3839 Value = APFloat::getNaN(Semantics, false, ~0);
3840 else if (IDVal.equals_insensitive("?"))
3841 Value = APFloat::getZero(Semantics);
3842 else
3843 return TokError("invalid floating point literal");
3844 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3845 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3846 // To match ML64.exe, ignore the initial sign.
3847 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3848 if (SizeInBits != (IDVal.size() << 2))
3849 return TokError("invalid floating point literal");
3851 // Consume the numeric token.
3852 Lex();
3854 Res = APInt(SizeInBits, IDVal, 16);
3855 if (SignLoc.isValid())
3856 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3857 return false;
3858 } else if (errorToBool(
3859 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3860 .takeError())) {
3861 return TokError("invalid floating point literal");
3863 if (IsNeg)
3864 Value.changeSign();
3866 // Consume the numeric token.
3867 Lex();
3869 Res = Value.bitcastToAPInt();
3871 return false;
3874 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3875 SmallVectorImpl<APInt> &ValuesAsInt,
3876 const AsmToken::TokenKind EndToken) {
3877 while (getTok().isNot(EndToken) ||
3878 (EndToken == AsmToken::Greater &&
3879 getTok().isNot(AsmToken::GreaterGreater))) {
3880 const AsmToken NextTok = peekTok();
3881 if (NextTok.is(AsmToken::Identifier) &&
3882 NextTok.getString().equals_insensitive("dup")) {
3883 const MCExpr *Value;
3884 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3885 return true;
3886 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3887 if (!MCE)
3888 return Error(Value->getLoc(),
3889 "cannot repeat value a non-constant number of times");
3890 const int64_t Repetitions = MCE->getValue();
3891 if (Repetitions < 0)
3892 return Error(Value->getLoc(),
3893 "cannot repeat value a negative number of times");
3895 SmallVector<APInt, 1> DuplicatedValues;
3896 if (parseToken(AsmToken::LParen,
3897 "parentheses required for 'dup' contents") ||
3898 parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3899 return true;
3901 for (int i = 0; i < Repetitions; ++i)
3902 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3903 } else {
3904 APInt AsInt;
3905 if (parseRealValue(Semantics, AsInt))
3906 return true;
3907 ValuesAsInt.push_back(AsInt);
3910 // Continue if we see a comma. (Also, allow line continuation.)
3911 if (!parseOptionalToken(AsmToken::Comma))
3912 break;
3913 parseOptionalToken(AsmToken::EndOfStatement);
3916 return false;
3919 // Initialize real data values.
3920 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3921 unsigned *Count) {
3922 if (checkForValidSection())
3923 return true;
3925 SmallVector<APInt, 1> ValuesAsInt;
3926 if (parseRealInstList(Semantics, ValuesAsInt))
3927 return true;
3929 for (const APInt &AsInt : ValuesAsInt) {
3930 getStreamer().emitIntValue(AsInt);
3932 if (Count)
3933 *Count = ValuesAsInt.size();
3934 return false;
3937 // Add a real field to the current struct.
3938 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3939 size_t Size) {
3940 StructInfo &Struct = StructInProgress.back();
3941 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3942 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3944 Field.SizeOf = 0;
3946 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3947 return true;
3949 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3950 Field.LengthOf = RealInfo.AsIntValues.size();
3951 Field.SizeOf = Field.Type * Field.LengthOf;
3953 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3954 if (!Struct.IsUnion) {
3955 Struct.NextOffset = FieldEnd;
3957 Struct.Size = std::max(Struct.Size, FieldEnd);
3958 return false;
3961 /// parseDirectiveRealValue
3962 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
3963 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3964 const fltSemantics &Semantics,
3965 size_t Size) {
3966 if (StructInProgress.empty()) {
3967 // Initialize data value.
3968 if (emitRealValues(Semantics))
3969 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3970 } else if (addRealField("", Semantics, Size)) {
3971 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3973 return false;
3976 /// parseDirectiveNamedRealValue
3977 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
3978 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3979 const fltSemantics &Semantics,
3980 unsigned Size, StringRef Name,
3981 SMLoc NameLoc) {
3982 if (StructInProgress.empty()) {
3983 // Initialize named data value.
3984 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3985 getStreamer().emitLabel(Sym);
3986 unsigned Count;
3987 if (emitRealValues(Semantics, &Count))
3988 return addErrorSuffix(" in '" + TypeName + "' directive");
3990 AsmTypeInfo Type;
3991 Type.Name = TypeName;
3992 Type.Size = Size * Count;
3993 Type.ElementSize = Size;
3994 Type.Length = Count;
3995 KnownType[Name.lower()] = Type;
3996 } else if (addRealField(Name, Semantics, Size)) {
3997 return addErrorSuffix(" in '" + TypeName + "' directive");
3999 return false;
4002 bool MasmParser::parseOptionalAngleBracketOpen() {
4003 const AsmToken Tok = getTok();
4004 if (parseOptionalToken(AsmToken::LessLess)) {
4005 AngleBracketDepth++;
4006 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4007 return true;
4008 } else if (parseOptionalToken(AsmToken::LessGreater)) {
4009 AngleBracketDepth++;
4010 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4011 return true;
4012 } else if (parseOptionalToken(AsmToken::Less)) {
4013 AngleBracketDepth++;
4014 return true;
4017 return false;
4020 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4021 const AsmToken Tok = getTok();
4022 if (parseOptionalToken(AsmToken::GreaterGreater)) {
4023 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4024 } else if (parseToken(AsmToken::Greater, Msg)) {
4025 return true;
4027 AngleBracketDepth--;
4028 return false;
4031 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4032 const IntFieldInfo &Contents,
4033 FieldInitializer &Initializer) {
4034 SMLoc Loc = getTok().getLoc();
4036 SmallVector<const MCExpr *, 1> Values;
4037 if (parseOptionalToken(AsmToken::LCurly)) {
4038 if (Field.LengthOf == 1 && Field.Type > 1)
4039 return Error(Loc, "Cannot initialize scalar field with array value");
4040 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4041 parseToken(AsmToken::RCurly))
4042 return true;
4043 } else if (parseOptionalAngleBracketOpen()) {
4044 if (Field.LengthOf == 1 && Field.Type > 1)
4045 return Error(Loc, "Cannot initialize scalar field with array value");
4046 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4047 parseAngleBracketClose())
4048 return true;
4049 } else if (Field.LengthOf > 1 && Field.Type > 1) {
4050 return Error(Loc, "Cannot initialize array field with scalar value");
4051 } else if (parseScalarInitializer(Field.Type, Values,
4052 /*StringPadLength=*/Field.LengthOf)) {
4053 return true;
4056 if (Values.size() > Field.LengthOf) {
4057 return Error(Loc, "Initializer too long for field; expected at most " +
4058 std::to_string(Field.LengthOf) + " elements, got " +
4059 std::to_string(Values.size()));
4061 // Default-initialize all remaining values.
4062 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4064 Initializer = FieldInitializer(std::move(Values));
4065 return false;
4068 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4069 const RealFieldInfo &Contents,
4070 FieldInitializer &Initializer) {
4071 const fltSemantics *Semantics;
4072 switch (Field.Type) {
4073 case 4:
4074 Semantics = &APFloat::IEEEsingle();
4075 break;
4076 case 8:
4077 Semantics = &APFloat::IEEEdouble();
4078 break;
4079 case 10:
4080 Semantics = &APFloat::x87DoubleExtended();
4081 break;
4082 default:
4083 llvm_unreachable("unknown real field type");
4086 SMLoc Loc = getTok().getLoc();
4088 SmallVector<APInt, 1> AsIntValues;
4089 if (parseOptionalToken(AsmToken::LCurly)) {
4090 if (Field.LengthOf == 1)
4091 return Error(Loc, "Cannot initialize scalar field with array value");
4092 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4093 parseToken(AsmToken::RCurly))
4094 return true;
4095 } else if (parseOptionalAngleBracketOpen()) {
4096 if (Field.LengthOf == 1)
4097 return Error(Loc, "Cannot initialize scalar field with array value");
4098 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4099 parseAngleBracketClose())
4100 return true;
4101 } else if (Field.LengthOf > 1) {
4102 return Error(Loc, "Cannot initialize array field with scalar value");
4103 } else {
4104 AsIntValues.emplace_back();
4105 if (parseRealValue(*Semantics, AsIntValues.back()))
4106 return true;
4109 if (AsIntValues.size() > Field.LengthOf) {
4110 return Error(Loc, "Initializer too long for field; expected at most " +
4111 std::to_string(Field.LengthOf) + " elements, got " +
4112 std::to_string(AsIntValues.size()));
4114 // Default-initialize all remaining values.
4115 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4116 Contents.AsIntValues.end());
4118 Initializer = FieldInitializer(std::move(AsIntValues));
4119 return false;
4122 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4123 const StructFieldInfo &Contents,
4124 FieldInitializer &Initializer) {
4125 SMLoc Loc = getTok().getLoc();
4127 std::vector<StructInitializer> Initializers;
4128 if (Field.LengthOf > 1) {
4129 if (parseOptionalToken(AsmToken::LCurly)) {
4130 if (parseStructInstList(Contents.Structure, Initializers,
4131 AsmToken::RCurly) ||
4132 parseToken(AsmToken::RCurly))
4133 return true;
4134 } else if (parseOptionalAngleBracketOpen()) {
4135 if (parseStructInstList(Contents.Structure, Initializers,
4136 AsmToken::Greater) ||
4137 parseAngleBracketClose())
4138 return true;
4139 } else {
4140 return Error(Loc, "Cannot initialize array field with scalar value");
4142 } else {
4143 Initializers.emplace_back();
4144 if (parseStructInitializer(Contents.Structure, Initializers.back()))
4145 return true;
4148 if (Initializers.size() > Field.LengthOf) {
4149 return Error(Loc, "Initializer too long for field; expected at most " +
4150 std::to_string(Field.LengthOf) + " elements, got " +
4151 std::to_string(Initializers.size()));
4153 // Default-initialize all remaining values.
4154 Initializers.insert(Initializers.end(),
4155 Contents.Initializers.begin() + Initializers.size(),
4156 Contents.Initializers.end());
4158 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4159 return false;
4162 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4163 FieldInitializer &Initializer) {
4164 switch (Field.Contents.FT) {
4165 case FT_INTEGRAL:
4166 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4167 case FT_REAL:
4168 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4169 case FT_STRUCT:
4170 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4172 llvm_unreachable("Unhandled FieldType enum");
4175 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4176 StructInitializer &Initializer) {
4177 const AsmToken FirstToken = getTok();
4179 std::optional<AsmToken::TokenKind> EndToken;
4180 if (parseOptionalToken(AsmToken::LCurly)) {
4181 EndToken = AsmToken::RCurly;
4182 } else if (parseOptionalAngleBracketOpen()) {
4183 EndToken = AsmToken::Greater;
4184 AngleBracketDepth++;
4185 } else if (FirstToken.is(AsmToken::Identifier) &&
4186 FirstToken.getString() == "?") {
4187 // ? initializer; leave EndToken uninitialized to treat as empty.
4188 if (parseToken(AsmToken::Identifier))
4189 return true;
4190 } else {
4191 return Error(FirstToken.getLoc(), "Expected struct initializer");
4194 auto &FieldInitializers = Initializer.FieldInitializers;
4195 size_t FieldIndex = 0;
4196 if (EndToken) {
4197 // Initialize all fields with given initializers.
4198 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4199 const FieldInfo &Field = Structure.Fields[FieldIndex++];
4200 if (parseOptionalToken(AsmToken::Comma)) {
4201 // Empty initializer; use the default and continue. (Also, allow line
4202 // continuation.)
4203 FieldInitializers.push_back(Field.Contents);
4204 parseOptionalToken(AsmToken::EndOfStatement);
4205 continue;
4207 FieldInitializers.emplace_back(Field.Contents.FT);
4208 if (parseFieldInitializer(Field, FieldInitializers.back()))
4209 return true;
4211 // Continue if we see a comma. (Also, allow line continuation.)
4212 SMLoc CommaLoc = getTok().getLoc();
4213 if (!parseOptionalToken(AsmToken::Comma))
4214 break;
4215 if (FieldIndex == Structure.Fields.size())
4216 return Error(CommaLoc, "'" + Structure.Name +
4217 "' initializer initializes too many fields");
4218 parseOptionalToken(AsmToken::EndOfStatement);
4221 // Default-initialize all remaining fields.
4222 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4223 FieldInitializers.push_back(Field.Contents);
4225 if (EndToken) {
4226 if (*EndToken == AsmToken::Greater)
4227 return parseAngleBracketClose();
4229 return parseToken(*EndToken);
4232 return false;
4235 bool MasmParser::parseStructInstList(
4236 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4237 const AsmToken::TokenKind EndToken) {
4238 while (getTok().isNot(EndToken) ||
4239 (EndToken == AsmToken::Greater &&
4240 getTok().isNot(AsmToken::GreaterGreater))) {
4241 const AsmToken NextTok = peekTok();
4242 if (NextTok.is(AsmToken::Identifier) &&
4243 NextTok.getString().equals_insensitive("dup")) {
4244 const MCExpr *Value;
4245 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4246 return true;
4247 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4248 if (!MCE)
4249 return Error(Value->getLoc(),
4250 "cannot repeat value a non-constant number of times");
4251 const int64_t Repetitions = MCE->getValue();
4252 if (Repetitions < 0)
4253 return Error(Value->getLoc(),
4254 "cannot repeat value a negative number of times");
4256 std::vector<StructInitializer> DuplicatedValues;
4257 if (parseToken(AsmToken::LParen,
4258 "parentheses required for 'dup' contents") ||
4259 parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4260 return true;
4262 for (int i = 0; i < Repetitions; ++i)
4263 llvm::append_range(Initializers, DuplicatedValues);
4264 } else {
4265 Initializers.emplace_back();
4266 if (parseStructInitializer(Structure, Initializers.back()))
4267 return true;
4270 // Continue if we see a comma. (Also, allow line continuation.)
4271 if (!parseOptionalToken(AsmToken::Comma))
4272 break;
4273 parseOptionalToken(AsmToken::EndOfStatement);
4276 return false;
4279 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4280 const IntFieldInfo &Contents) {
4281 // Default-initialize all values.
4282 for (const MCExpr *Value : Contents.Values) {
4283 if (emitIntValue(Value, Field.Type))
4284 return true;
4286 return false;
4289 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4290 const RealFieldInfo &Contents) {
4291 for (const APInt &AsInt : Contents.AsIntValues) {
4292 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4293 AsInt.getBitWidth() / 8);
4295 return false;
4298 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4299 const StructFieldInfo &Contents) {
4300 for (const auto &Initializer : Contents.Initializers) {
4301 size_t Index = 0, Offset = 0;
4302 for (const auto &SubField : Contents.Structure.Fields) {
4303 getStreamer().emitZeros(SubField.Offset - Offset);
4304 Offset = SubField.Offset + SubField.SizeOf;
4305 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4308 return false;
4311 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4312 switch (Field.Contents.FT) {
4313 case FT_INTEGRAL:
4314 return emitFieldValue(Field, Field.Contents.IntInfo);
4315 case FT_REAL:
4316 return emitFieldValue(Field, Field.Contents.RealInfo);
4317 case FT_STRUCT:
4318 return emitFieldValue(Field, Field.Contents.StructInfo);
4320 llvm_unreachable("Unhandled FieldType enum");
4323 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4324 const IntFieldInfo &Contents,
4325 const IntFieldInfo &Initializer) {
4326 for (const auto &Value : Initializer.Values) {
4327 if (emitIntValue(Value, Field.Type))
4328 return true;
4330 // Default-initialize all remaining values.
4331 for (const auto &Value :
4332 llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4333 if (emitIntValue(Value, Field.Type))
4334 return true;
4336 return false;
4339 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4340 const RealFieldInfo &Contents,
4341 const RealFieldInfo &Initializer) {
4342 for (const auto &AsInt : Initializer.AsIntValues) {
4343 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4344 AsInt.getBitWidth() / 8);
4346 // Default-initialize all remaining values.
4347 for (const auto &AsInt :
4348 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4349 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4350 AsInt.getBitWidth() / 8);
4352 return false;
4355 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4356 const StructFieldInfo &Contents,
4357 const StructFieldInfo &Initializer) {
4358 for (const auto &Init : Initializer.Initializers) {
4359 if (emitStructInitializer(Contents.Structure, Init))
4360 return true;
4362 // Default-initialize all remaining values.
4363 for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4364 Initializer.Initializers.size())) {
4365 if (emitStructInitializer(Contents.Structure, Init))
4366 return true;
4368 return false;
4371 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4372 const FieldInitializer &Initializer) {
4373 switch (Field.Contents.FT) {
4374 case FT_INTEGRAL:
4375 return emitFieldInitializer(Field, Field.Contents.IntInfo,
4376 Initializer.IntInfo);
4377 case FT_REAL:
4378 return emitFieldInitializer(Field, Field.Contents.RealInfo,
4379 Initializer.RealInfo);
4380 case FT_STRUCT:
4381 return emitFieldInitializer(Field, Field.Contents.StructInfo,
4382 Initializer.StructInfo);
4384 llvm_unreachable("Unhandled FieldType enum");
4387 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4388 const StructInitializer &Initializer) {
4389 if (!Structure.Initializable)
4390 return Error(getLexer().getLoc(),
4391 "cannot initialize a value of type '" + Structure.Name +
4392 "'; 'org' was used in the type's declaration");
4393 size_t Index = 0, Offset = 0;
4394 for (const auto &Init : Initializer.FieldInitializers) {
4395 const auto &Field = Structure.Fields[Index++];
4396 getStreamer().emitZeros(Field.Offset - Offset);
4397 Offset = Field.Offset + Field.SizeOf;
4398 if (emitFieldInitializer(Field, Init))
4399 return true;
4401 // Default-initialize all remaining fields.
4402 for (const auto &Field : llvm::drop_begin(
4403 Structure.Fields, Initializer.FieldInitializers.size())) {
4404 getStreamer().emitZeros(Field.Offset - Offset);
4405 Offset = Field.Offset + Field.SizeOf;
4406 if (emitFieldValue(Field))
4407 return true;
4409 // Add final padding.
4410 if (Offset != Structure.Size)
4411 getStreamer().emitZeros(Structure.Size - Offset);
4412 return false;
4415 // Set data values from initializers.
4416 bool MasmParser::emitStructValues(const StructInfo &Structure,
4417 unsigned *Count) {
4418 std::vector<StructInitializer> Initializers;
4419 if (parseStructInstList(Structure, Initializers))
4420 return true;
4422 for (const auto &Initializer : Initializers) {
4423 if (emitStructInitializer(Structure, Initializer))
4424 return true;
4427 if (Count)
4428 *Count = Initializers.size();
4429 return false;
4432 // Declare a field in the current struct.
4433 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4434 StructInfo &OwningStruct = StructInProgress.back();
4435 FieldInfo &Field =
4436 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4437 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4439 StructInfo.Structure = Structure;
4440 Field.Type = Structure.Size;
4442 if (parseStructInstList(Structure, StructInfo.Initializers))
4443 return true;
4445 Field.LengthOf = StructInfo.Initializers.size();
4446 Field.SizeOf = Field.Type * Field.LengthOf;
4448 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4449 if (!OwningStruct.IsUnion) {
4450 OwningStruct.NextOffset = FieldEnd;
4452 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4454 return false;
4457 /// parseDirectiveStructValue
4458 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4459 /// [, (<struct-initializer> | {struct-initializer})]*
4460 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4461 StringRef Directive, SMLoc DirLoc) {
4462 if (StructInProgress.empty()) {
4463 if (emitStructValues(Structure))
4464 return true;
4465 } else if (addStructField("", Structure)) {
4466 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4469 return false;
4472 /// parseDirectiveNamedValue
4473 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
4474 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4475 StringRef Directive,
4476 SMLoc DirLoc, StringRef Name) {
4477 if (StructInProgress.empty()) {
4478 // Initialize named data value.
4479 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4480 getStreamer().emitLabel(Sym);
4481 unsigned Count;
4482 if (emitStructValues(Structure, &Count))
4483 return true;
4484 AsmTypeInfo Type;
4485 Type.Name = Structure.Name;
4486 Type.Size = Structure.Size * Count;
4487 Type.ElementSize = Structure.Size;
4488 Type.Length = Count;
4489 KnownType[Name.lower()] = Type;
4490 } else if (addStructField(Name, Structure)) {
4491 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4494 return false;
4497 /// parseDirectiveStruct
4498 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4499 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4500 /// <name> ENDS
4501 ////// dataDir = data declaration
4502 ////// offsetDir = EVEN, ORG, ALIGN
4503 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4504 DirectiveKind DirKind, StringRef Name,
4505 SMLoc NameLoc) {
4506 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4507 // anyway, so all field accesses must be qualified.
4508 AsmToken NextTok = getTok();
4509 int64_t AlignmentValue = 1;
4510 if (NextTok.isNot(AsmToken::Comma) &&
4511 NextTok.isNot(AsmToken::EndOfStatement) &&
4512 parseAbsoluteExpression(AlignmentValue)) {
4513 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4514 "' directive");
4516 if (!isPowerOf2_64(AlignmentValue)) {
4517 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4518 std::to_string(AlignmentValue));
4521 StringRef Qualifier;
4522 SMLoc QualifierLoc;
4523 if (parseOptionalToken(AsmToken::Comma)) {
4524 QualifierLoc = getTok().getLoc();
4525 if (parseIdentifier(Qualifier))
4526 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4527 if (!Qualifier.equals_insensitive("nonunique"))
4528 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4529 Twine(Directive) +
4530 "' directive; expected none or NONUNIQUE");
4533 if (parseEOL())
4534 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4536 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4537 return false;
4540 /// parseDirectiveNestedStruct
4541 /// ::= (STRUC | STRUCT | UNION) [name]
4542 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4543 /// ENDS
4544 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4545 DirectiveKind DirKind) {
4546 if (StructInProgress.empty())
4547 return TokError("missing name in top-level '" + Twine(Directive) +
4548 "' directive");
4550 StringRef Name;
4551 if (getTok().is(AsmToken::Identifier)) {
4552 Name = getTok().getIdentifier();
4553 parseToken(AsmToken::Identifier);
4555 if (parseEOL())
4556 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4558 // Reserve space to ensure Alignment doesn't get invalidated when
4559 // StructInProgress grows.
4560 StructInProgress.reserve(StructInProgress.size() + 1);
4561 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4562 StructInProgress.back().Alignment);
4563 return false;
4566 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4567 if (StructInProgress.empty())
4568 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4569 if (StructInProgress.size() > 1)
4570 return Error(NameLoc, "unexpected name in nested ENDS directive");
4571 if (StructInProgress.back().Name.compare_insensitive(Name))
4572 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4573 StructInProgress.back().Name + "'");
4574 StructInfo Structure = StructInProgress.pop_back_val();
4575 // Pad to make the structure's size divisible by the smaller of its alignment
4576 // and the size of its largest field.
4577 Structure.Size = llvm::alignTo(
4578 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4579 Structs[Name.lower()] = Structure;
4581 if (parseEOL())
4582 return addErrorSuffix(" in ENDS directive");
4584 return false;
4587 bool MasmParser::parseDirectiveNestedEnds() {
4588 if (StructInProgress.empty())
4589 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4590 if (StructInProgress.size() == 1)
4591 return TokError("missing name in top-level ENDS directive");
4593 if (parseEOL())
4594 return addErrorSuffix(" in nested ENDS directive");
4596 StructInfo Structure = StructInProgress.pop_back_val();
4597 // Pad to make the structure's size divisible by its alignment.
4598 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4600 StructInfo &ParentStruct = StructInProgress.back();
4601 if (Structure.Name.empty()) {
4602 // Anonymous substructures' fields are addressed as if they belong to the
4603 // parent structure - so we transfer them to the parent here.
4604 const size_t OldFields = ParentStruct.Fields.size();
4605 ParentStruct.Fields.insert(
4606 ParentStruct.Fields.end(),
4607 std::make_move_iterator(Structure.Fields.begin()),
4608 std::make_move_iterator(Structure.Fields.end()));
4609 for (const auto &FieldByName : Structure.FieldsByName) {
4610 ParentStruct.FieldsByName[FieldByName.getKey()] =
4611 FieldByName.getValue() + OldFields;
4614 unsigned FirstFieldOffset = 0;
4615 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4616 FirstFieldOffset = llvm::alignTo(
4617 ParentStruct.NextOffset,
4618 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4621 if (ParentStruct.IsUnion) {
4622 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4623 } else {
4624 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4625 Field.Offset += FirstFieldOffset;
4627 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4628 if (!ParentStruct.IsUnion) {
4629 ParentStruct.NextOffset = StructureEnd;
4631 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4633 } else {
4634 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4635 Structure.AlignmentSize);
4636 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4637 Field.Type = Structure.Size;
4638 Field.LengthOf = 1;
4639 Field.SizeOf = Structure.Size;
4641 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4642 if (!ParentStruct.IsUnion) {
4643 ParentStruct.NextOffset = StructureEnd;
4645 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4647 StructInfo.Structure = Structure;
4648 StructInfo.Initializers.emplace_back();
4649 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4650 for (const auto &SubField : Structure.Fields) {
4651 FieldInitializers.push_back(SubField.Contents);
4655 return false;
4658 /// parseDirectiveOrg
4659 /// ::= org expression
4660 bool MasmParser::parseDirectiveOrg() {
4661 const MCExpr *Offset;
4662 SMLoc OffsetLoc = Lexer.getLoc();
4663 if (checkForValidSection() || parseExpression(Offset))
4664 return true;
4665 if (parseEOL())
4666 return addErrorSuffix(" in 'org' directive");
4668 if (StructInProgress.empty()) {
4669 // Not in a struct; change the offset for the next instruction or data
4670 if (checkForValidSection())
4671 return addErrorSuffix(" in 'org' directive");
4673 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4674 } else {
4675 // Offset the next field of this struct
4676 StructInfo &Structure = StructInProgress.back();
4677 int64_t OffsetRes;
4678 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4679 return Error(OffsetLoc,
4680 "expected absolute expression in 'org' directive");
4681 if (OffsetRes < 0)
4682 return Error(
4683 OffsetLoc,
4684 "expected non-negative value in struct's 'org' directive; was " +
4685 std::to_string(OffsetRes));
4686 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4688 // ORG-affected structures cannot be initialized
4689 Structure.Initializable = false;
4692 return false;
4695 bool MasmParser::emitAlignTo(int64_t Alignment) {
4696 if (StructInProgress.empty()) {
4697 // Not in a struct; align the next instruction or data
4698 if (checkForValidSection())
4699 return true;
4701 // Check whether we should use optimal code alignment for this align
4702 // directive.
4703 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4704 assert(Section && "must have section to emit alignment");
4705 if (Section->useCodeAlign()) {
4706 getStreamer().emitCodeAlignment(Align(Alignment),
4707 &getTargetParser().getSTI(),
4708 /*MaxBytesToEmit=*/0);
4709 } else {
4710 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4711 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4712 /*ValueSize=*/1,
4713 /*MaxBytesToEmit=*/0);
4715 } else {
4716 // Align the next field of this struct
4717 StructInfo &Structure = StructInProgress.back();
4718 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4721 return false;
4724 /// parseDirectiveAlign
4725 /// ::= align expression
4726 bool MasmParser::parseDirectiveAlign() {
4727 SMLoc AlignmentLoc = getLexer().getLoc();
4728 int64_t Alignment;
4730 // Ignore empty 'align' directives.
4731 if (getTok().is(AsmToken::EndOfStatement)) {
4732 return Warning(AlignmentLoc,
4733 "align directive with no operand is ignored") &&
4734 parseEOL();
4736 if (parseAbsoluteExpression(Alignment) || parseEOL())
4737 return addErrorSuffix(" in align directive");
4739 // Always emit an alignment here even if we throw an error.
4740 bool ReturnVal = false;
4742 // Reject alignments that aren't either a power of two or zero, for ML.exe
4743 // compatibility. Alignment of zero is silently rounded up to one.
4744 if (Alignment == 0)
4745 Alignment = 1;
4746 if (!isPowerOf2_64(Alignment))
4747 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4748 std::to_string(Alignment));
4750 if (emitAlignTo(Alignment))
4751 ReturnVal |= addErrorSuffix(" in align directive");
4753 return ReturnVal;
4756 /// parseDirectiveEven
4757 /// ::= even
4758 bool MasmParser::parseDirectiveEven() {
4759 if (parseEOL() || emitAlignTo(2))
4760 return addErrorSuffix(" in even directive");
4762 return false;
4765 /// parseDirectiveFile
4766 /// ::= .file filename
4767 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
4768 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4769 // FIXME: I'm not sure what this is.
4770 int64_t FileNumber = -1;
4771 if (getLexer().is(AsmToken::Integer)) {
4772 FileNumber = getTok().getIntVal();
4773 Lex();
4775 if (FileNumber < 0)
4776 return TokError("negative file number");
4779 std::string Path;
4781 // Usually the directory and filename together, otherwise just the directory.
4782 // Allow the strings to have escaped octal character sequence.
4783 if (check(getTok().isNot(AsmToken::String),
4784 "unexpected token in '.file' directive") ||
4785 parseEscapedString(Path))
4786 return true;
4788 StringRef Directory;
4789 StringRef Filename;
4790 std::string FilenameData;
4791 if (getLexer().is(AsmToken::String)) {
4792 if (check(FileNumber == -1,
4793 "explicit path specified, but no file number") ||
4794 parseEscapedString(FilenameData))
4795 return true;
4796 Filename = FilenameData;
4797 Directory = Path;
4798 } else {
4799 Filename = Path;
4802 uint64_t MD5Hi, MD5Lo;
4803 bool HasMD5 = false;
4805 std::optional<StringRef> Source;
4806 bool HasSource = false;
4807 std::string SourceString;
4809 while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4810 StringRef Keyword;
4811 if (check(getTok().isNot(AsmToken::Identifier),
4812 "unexpected token in '.file' directive") ||
4813 parseIdentifier(Keyword))
4814 return true;
4815 if (Keyword == "md5") {
4816 HasMD5 = true;
4817 if (check(FileNumber == -1,
4818 "MD5 checksum specified, but no file number") ||
4819 parseHexOcta(*this, MD5Hi, MD5Lo))
4820 return true;
4821 } else if (Keyword == "source") {
4822 HasSource = true;
4823 if (check(FileNumber == -1,
4824 "source specified, but no file number") ||
4825 check(getTok().isNot(AsmToken::String),
4826 "unexpected token in '.file' directive") ||
4827 parseEscapedString(SourceString))
4828 return true;
4829 } else {
4830 return TokError("unexpected token in '.file' directive");
4834 if (FileNumber == -1) {
4835 // Ignore the directive if there is no number and the target doesn't support
4836 // numberless .file directives. This allows some portability of assembler
4837 // between different object file formats.
4838 if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4839 getStreamer().emitFileDirective(Filename);
4840 } else {
4841 // In case there is a -g option as well as debug info from directive .file,
4842 // we turn off the -g option, directly use the existing debug info instead.
4843 // Throw away any implicit file table for the assembler source.
4844 if (Ctx.getGenDwarfForAssembly()) {
4845 Ctx.getMCDwarfLineTable(0).resetFileTable();
4846 Ctx.setGenDwarfForAssembly(false);
4849 std::optional<MD5::MD5Result> CKMem;
4850 if (HasMD5) {
4851 MD5::MD5Result Sum;
4852 for (unsigned i = 0; i != 8; ++i) {
4853 Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4854 Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4856 CKMem = Sum;
4858 if (HasSource) {
4859 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4860 memcpy(SourceBuf, SourceString.data(), SourceString.size());
4861 Source = StringRef(SourceBuf, SourceString.size());
4863 if (FileNumber == 0) {
4864 if (Ctx.getDwarfVersion() < 5)
4865 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4866 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4867 } else {
4868 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4869 FileNumber, Directory, Filename, CKMem, Source);
4870 if (!FileNumOrErr)
4871 return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4873 // Alert the user if there are some .file directives with MD5 and some not.
4874 // But only do that once.
4875 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4876 ReportedInconsistentMD5 = true;
4877 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4881 return false;
4884 /// parseDirectiveLine
4885 /// ::= .line [number]
4886 bool MasmParser::parseDirectiveLine() {
4887 int64_t LineNumber;
4888 if (getLexer().is(AsmToken::Integer)) {
4889 if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4890 return true;
4891 (void)LineNumber;
4892 // FIXME: Do something with the .line.
4894 if (parseEOL())
4895 return true;
4897 return false;
4900 /// parseDirectiveLoc
4901 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4902 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4903 /// The first number is a file number, must have been previously assigned with
4904 /// a .file directive, the second number is the line number and optionally the
4905 /// third number is a column position (zero if not specified). The remaining
4906 /// optional items are .loc sub-directives.
4907 bool MasmParser::parseDirectiveLoc() {
4908 int64_t FileNumber = 0, LineNumber = 0;
4909 SMLoc Loc = getTok().getLoc();
4910 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4911 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4912 "file number less than one in '.loc' directive") ||
4913 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4914 "unassigned file number in '.loc' directive"))
4915 return true;
4917 // optional
4918 if (getLexer().is(AsmToken::Integer)) {
4919 LineNumber = getTok().getIntVal();
4920 if (LineNumber < 0)
4921 return TokError("line number less than zero in '.loc' directive");
4922 Lex();
4925 int64_t ColumnPos = 0;
4926 if (getLexer().is(AsmToken::Integer)) {
4927 ColumnPos = getTok().getIntVal();
4928 if (ColumnPos < 0)
4929 return TokError("column position less than zero in '.loc' directive");
4930 Lex();
4933 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4934 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4935 unsigned Isa = 0;
4936 int64_t Discriminator = 0;
4938 auto parseLocOp = [&]() -> bool {
4939 StringRef Name;
4940 SMLoc Loc = getTok().getLoc();
4941 if (parseIdentifier(Name))
4942 return TokError("unexpected token in '.loc' directive");
4944 if (Name == "basic_block")
4945 Flags |= DWARF2_FLAG_BASIC_BLOCK;
4946 else if (Name == "prologue_end")
4947 Flags |= DWARF2_FLAG_PROLOGUE_END;
4948 else if (Name == "epilogue_begin")
4949 Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4950 else if (Name == "is_stmt") {
4951 Loc = getTok().getLoc();
4952 const MCExpr *Value;
4953 if (parseExpression(Value))
4954 return true;
4955 // The expression must be the constant 0 or 1.
4956 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4957 int Value = MCE->getValue();
4958 if (Value == 0)
4959 Flags &= ~DWARF2_FLAG_IS_STMT;
4960 else if (Value == 1)
4961 Flags |= DWARF2_FLAG_IS_STMT;
4962 else
4963 return Error(Loc, "is_stmt value not 0 or 1");
4964 } else {
4965 return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4967 } else if (Name == "isa") {
4968 Loc = getTok().getLoc();
4969 const MCExpr *Value;
4970 if (parseExpression(Value))
4971 return true;
4972 // The expression must be a constant greater or equal to 0.
4973 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4974 int Value = MCE->getValue();
4975 if (Value < 0)
4976 return Error(Loc, "isa number less than zero");
4977 Isa = Value;
4978 } else {
4979 return Error(Loc, "isa number not a constant value");
4981 } else if (Name == "discriminator") {
4982 if (parseAbsoluteExpression(Discriminator))
4983 return true;
4984 } else {
4985 return Error(Loc, "unknown sub-directive in '.loc' directive");
4987 return false;
4990 if (parseMany(parseLocOp, false /*hasComma*/))
4991 return true;
4993 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4994 Isa, Discriminator, StringRef());
4996 return false;
4999 /// parseDirectiveStabs
5000 /// ::= .stabs string, number, number, number
5001 bool MasmParser::parseDirectiveStabs() {
5002 return TokError("unsupported directive '.stabs'");
5005 /// parseDirectiveCVFile
5006 /// ::= .cv_file number filename [checksum] [checksumkind]
5007 bool MasmParser::parseDirectiveCVFile() {
5008 SMLoc FileNumberLoc = getTok().getLoc();
5009 int64_t FileNumber;
5010 std::string Filename;
5011 std::string Checksum;
5012 int64_t ChecksumKind = 0;
5014 if (parseIntToken(FileNumber,
5015 "expected file number in '.cv_file' directive") ||
5016 check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5017 check(getTok().isNot(AsmToken::String),
5018 "unexpected token in '.cv_file' directive") ||
5019 parseEscapedString(Filename))
5020 return true;
5021 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5022 if (check(getTok().isNot(AsmToken::String),
5023 "unexpected token in '.cv_file' directive") ||
5024 parseEscapedString(Checksum) ||
5025 parseIntToken(ChecksumKind,
5026 "expected checksum kind in '.cv_file' directive") ||
5027 parseEOL())
5028 return true;
5031 Checksum = fromHex(Checksum);
5032 void *CKMem = Ctx.allocate(Checksum.size(), 1);
5033 memcpy(CKMem, Checksum.data(), Checksum.size());
5034 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5035 Checksum.size());
5037 if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5038 static_cast<uint8_t>(ChecksumKind)))
5039 return Error(FileNumberLoc, "file number already allocated");
5041 return false;
5044 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5045 StringRef DirectiveName) {
5046 SMLoc Loc;
5047 return parseTokenLoc(Loc) ||
5048 parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5049 "' directive") ||
5050 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5051 "expected function id within range [0, UINT_MAX)");
5054 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5055 SMLoc Loc;
5056 return parseTokenLoc(Loc) ||
5057 parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5058 "' directive") ||
5059 check(FileNumber < 1, Loc, "file number less than one in '" +
5060 DirectiveName + "' directive") ||
5061 check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5062 "unassigned file number in '" + DirectiveName + "' directive");
5065 /// parseDirectiveCVFuncId
5066 /// ::= .cv_func_id FunctionId
5068 /// Introduces a function ID that can be used with .cv_loc.
5069 bool MasmParser::parseDirectiveCVFuncId() {
5070 SMLoc FunctionIdLoc = getTok().getLoc();
5071 int64_t FunctionId;
5073 if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5074 return true;
5076 if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5077 return Error(FunctionIdLoc, "function id already allocated");
5079 return false;
5082 /// parseDirectiveCVInlineSiteId
5083 /// ::= .cv_inline_site_id FunctionId
5084 /// "within" IAFunc
5085 /// "inlined_at" IAFile IALine [IACol]
5087 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5088 /// at" source location information for use in the line table of the caller,
5089 /// whether the caller is a real function or another inlined call site.
5090 bool MasmParser::parseDirectiveCVInlineSiteId() {
5091 SMLoc FunctionIdLoc = getTok().getLoc();
5092 int64_t FunctionId;
5093 int64_t IAFunc;
5094 int64_t IAFile;
5095 int64_t IALine;
5096 int64_t IACol = 0;
5098 // FunctionId
5099 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5100 return true;
5102 // "within"
5103 if (check((getLexer().isNot(AsmToken::Identifier) ||
5104 getTok().getIdentifier() != "within"),
5105 "expected 'within' identifier in '.cv_inline_site_id' directive"))
5106 return true;
5107 Lex();
5109 // IAFunc
5110 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5111 return true;
5113 // "inlined_at"
5114 if (check((getLexer().isNot(AsmToken::Identifier) ||
5115 getTok().getIdentifier() != "inlined_at"),
5116 "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5117 "directive") )
5118 return true;
5119 Lex();
5121 // IAFile IALine
5122 if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5123 parseIntToken(IALine, "expected line number after 'inlined_at'"))
5124 return true;
5126 // [IACol]
5127 if (getLexer().is(AsmToken::Integer)) {
5128 IACol = getTok().getIntVal();
5129 Lex();
5132 if (parseEOL())
5133 return true;
5135 if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5136 IALine, IACol, FunctionIdLoc))
5137 return Error(FunctionIdLoc, "function id already allocated");
5139 return false;
5142 /// parseDirectiveCVLoc
5143 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5144 /// [is_stmt VALUE]
5145 /// The first number is a file number, must have been previously assigned with
5146 /// a .file directive, the second number is the line number and optionally the
5147 /// third number is a column position (zero if not specified). The remaining
5148 /// optional items are .loc sub-directives.
5149 bool MasmParser::parseDirectiveCVLoc() {
5150 SMLoc DirectiveLoc = getTok().getLoc();
5151 int64_t FunctionId, FileNumber;
5152 if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5153 parseCVFileId(FileNumber, ".cv_loc"))
5154 return true;
5156 int64_t LineNumber = 0;
5157 if (getLexer().is(AsmToken::Integer)) {
5158 LineNumber = getTok().getIntVal();
5159 if (LineNumber < 0)
5160 return TokError("line number less than zero in '.cv_loc' directive");
5161 Lex();
5164 int64_t ColumnPos = 0;
5165 if (getLexer().is(AsmToken::Integer)) {
5166 ColumnPos = getTok().getIntVal();
5167 if (ColumnPos < 0)
5168 return TokError("column position less than zero in '.cv_loc' directive");
5169 Lex();
5172 bool PrologueEnd = false;
5173 uint64_t IsStmt = 0;
5175 auto parseOp = [&]() -> bool {
5176 StringRef Name;
5177 SMLoc Loc = getTok().getLoc();
5178 if (parseIdentifier(Name))
5179 return TokError("unexpected token in '.cv_loc' directive");
5180 if (Name == "prologue_end")
5181 PrologueEnd = true;
5182 else if (Name == "is_stmt") {
5183 Loc = getTok().getLoc();
5184 const MCExpr *Value;
5185 if (parseExpression(Value))
5186 return true;
5187 // The expression must be the constant 0 or 1.
5188 IsStmt = ~0ULL;
5189 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5190 IsStmt = MCE->getValue();
5192 if (IsStmt > 1)
5193 return Error(Loc, "is_stmt value not 0 or 1");
5194 } else {
5195 return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5197 return false;
5200 if (parseMany(parseOp, false /*hasComma*/))
5201 return true;
5203 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5204 ColumnPos, PrologueEnd, IsStmt, StringRef(),
5205 DirectiveLoc);
5206 return false;
5209 /// parseDirectiveCVLinetable
5210 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
5211 bool MasmParser::parseDirectiveCVLinetable() {
5212 int64_t FunctionId;
5213 StringRef FnStartName, FnEndName;
5214 SMLoc Loc = getTok().getLoc();
5215 if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5216 parseToken(AsmToken::Comma,
5217 "unexpected token in '.cv_linetable' directive") ||
5218 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5219 "expected identifier in directive") ||
5220 parseToken(AsmToken::Comma,
5221 "unexpected token in '.cv_linetable' directive") ||
5222 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5223 "expected identifier in directive"))
5224 return true;
5226 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5227 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5229 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5230 return false;
5233 /// parseDirectiveCVInlineLinetable
5234 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5235 bool MasmParser::parseDirectiveCVInlineLinetable() {
5236 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5237 StringRef FnStartName, FnEndName;
5238 SMLoc Loc = getTok().getLoc();
5239 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5240 parseTokenLoc(Loc) ||
5241 parseIntToken(
5242 SourceFileId,
5243 "expected SourceField in '.cv_inline_linetable' directive") ||
5244 check(SourceFileId <= 0, Loc,
5245 "File id less than zero in '.cv_inline_linetable' directive") ||
5246 parseTokenLoc(Loc) ||
5247 parseIntToken(
5248 SourceLineNum,
5249 "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5250 check(SourceLineNum < 0, Loc,
5251 "Line number less than zero in '.cv_inline_linetable' directive") ||
5252 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5253 "expected identifier in directive") ||
5254 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5255 "expected identifier in directive"))
5256 return true;
5258 if (parseEOL())
5259 return true;
5261 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5262 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5263 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5264 SourceLineNum, FnStartSym,
5265 FnEndSym);
5266 return false;
5269 void MasmParser::initializeCVDefRangeTypeMap() {
5270 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5271 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5272 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5273 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5276 /// parseDirectiveCVDefRange
5277 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5278 bool MasmParser::parseDirectiveCVDefRange() {
5279 SMLoc Loc;
5280 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5281 while (getLexer().is(AsmToken::Identifier)) {
5282 Loc = getLexer().getLoc();
5283 StringRef GapStartName;
5284 if (parseIdentifier(GapStartName))
5285 return Error(Loc, "expected identifier in directive");
5286 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5288 Loc = getLexer().getLoc();
5289 StringRef GapEndName;
5290 if (parseIdentifier(GapEndName))
5291 return Error(Loc, "expected identifier in directive");
5292 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5294 Ranges.push_back({GapStartSym, GapEndSym});
5297 StringRef CVDefRangeTypeStr;
5298 if (parseToken(
5299 AsmToken::Comma,
5300 "expected comma before def_range type in .cv_def_range directive") ||
5301 parseIdentifier(CVDefRangeTypeStr))
5302 return Error(Loc, "expected def_range type in directive");
5304 StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5305 CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5306 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5307 ? CVDR_DEFRANGE
5308 : CVTypeIt->getValue();
5309 switch (CVDRType) {
5310 case CVDR_DEFRANGE_REGISTER: {
5311 int64_t DRRegister;
5312 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5313 ".cv_def_range directive") ||
5314 parseAbsoluteExpression(DRRegister))
5315 return Error(Loc, "expected register number");
5317 codeview::DefRangeRegisterHeader DRHdr;
5318 DRHdr.Register = DRRegister;
5319 DRHdr.MayHaveNoName = 0;
5320 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5321 break;
5323 case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5324 int64_t DROffset;
5325 if (parseToken(AsmToken::Comma,
5326 "expected comma before offset in .cv_def_range directive") ||
5327 parseAbsoluteExpression(DROffset))
5328 return Error(Loc, "expected offset value");
5330 codeview::DefRangeFramePointerRelHeader DRHdr;
5331 DRHdr.Offset = DROffset;
5332 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5333 break;
5335 case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5336 int64_t DRRegister;
5337 int64_t DROffsetInParent;
5338 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5339 ".cv_def_range directive") ||
5340 parseAbsoluteExpression(DRRegister))
5341 return Error(Loc, "expected register number");
5342 if (parseToken(AsmToken::Comma,
5343 "expected comma before offset in .cv_def_range directive") ||
5344 parseAbsoluteExpression(DROffsetInParent))
5345 return Error(Loc, "expected offset value");
5347 codeview::DefRangeSubfieldRegisterHeader DRHdr;
5348 DRHdr.Register = DRRegister;
5349 DRHdr.MayHaveNoName = 0;
5350 DRHdr.OffsetInParent = DROffsetInParent;
5351 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5352 break;
5354 case CVDR_DEFRANGE_REGISTER_REL: {
5355 int64_t DRRegister;
5356 int64_t DRFlags;
5357 int64_t DRBasePointerOffset;
5358 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5359 ".cv_def_range directive") ||
5360 parseAbsoluteExpression(DRRegister))
5361 return Error(Loc, "expected register value");
5362 if (parseToken(
5363 AsmToken::Comma,
5364 "expected comma before flag value in .cv_def_range directive") ||
5365 parseAbsoluteExpression(DRFlags))
5366 return Error(Loc, "expected flag value");
5367 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5368 "in .cv_def_range directive") ||
5369 parseAbsoluteExpression(DRBasePointerOffset))
5370 return Error(Loc, "expected base pointer offset value");
5372 codeview::DefRangeRegisterRelHeader DRHdr;
5373 DRHdr.Register = DRRegister;
5374 DRHdr.Flags = DRFlags;
5375 DRHdr.BasePointerOffset = DRBasePointerOffset;
5376 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5377 break;
5379 default:
5380 return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5382 return true;
5385 /// parseDirectiveCVString
5386 /// ::= .cv_stringtable "string"
5387 bool MasmParser::parseDirectiveCVString() {
5388 std::string Data;
5389 if (checkForValidSection() || parseEscapedString(Data))
5390 return addErrorSuffix(" in '.cv_string' directive");
5392 // Put the string in the table and emit the offset.
5393 std::pair<StringRef, unsigned> Insertion =
5394 getCVContext().addToStringTable(Data);
5395 getStreamer().emitIntValue(Insertion.second, 4);
5396 return false;
5399 /// parseDirectiveCVStringTable
5400 /// ::= .cv_stringtable
5401 bool MasmParser::parseDirectiveCVStringTable() {
5402 getStreamer().emitCVStringTableDirective();
5403 return false;
5406 /// parseDirectiveCVFileChecksums
5407 /// ::= .cv_filechecksums
5408 bool MasmParser::parseDirectiveCVFileChecksums() {
5409 getStreamer().emitCVFileChecksumsDirective();
5410 return false;
5413 /// parseDirectiveCVFileChecksumOffset
5414 /// ::= .cv_filechecksumoffset fileno
5415 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5416 int64_t FileNo;
5417 if (parseIntToken(FileNo, "expected identifier in directive"))
5418 return true;
5419 if (parseEOL())
5420 return true;
5421 getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5422 return false;
5425 /// parseDirectiveCVFPOData
5426 /// ::= .cv_fpo_data procsym
5427 bool MasmParser::parseDirectiveCVFPOData() {
5428 SMLoc DirLoc = getLexer().getLoc();
5429 StringRef ProcName;
5430 if (parseIdentifier(ProcName))
5431 return TokError("expected symbol name");
5432 if (parseEOL("unexpected tokens"))
5433 return addErrorSuffix(" in '.cv_fpo_data' directive");
5434 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5435 getStreamer().emitCVFPOData(ProcSym, DirLoc);
5436 return false;
5439 /// parseDirectiveCFISections
5440 /// ::= .cfi_sections section [, section]
5441 bool MasmParser::parseDirectiveCFISections() {
5442 StringRef Name;
5443 bool EH = false;
5444 bool Debug = false;
5446 if (parseIdentifier(Name))
5447 return TokError("Expected an identifier");
5449 if (Name == ".eh_frame")
5450 EH = true;
5451 else if (Name == ".debug_frame")
5452 Debug = true;
5454 if (getLexer().is(AsmToken::Comma)) {
5455 Lex();
5457 if (parseIdentifier(Name))
5458 return TokError("Expected an identifier");
5460 if (Name == ".eh_frame")
5461 EH = true;
5462 else if (Name == ".debug_frame")
5463 Debug = true;
5466 getStreamer().emitCFISections(EH, Debug);
5467 return false;
5470 /// parseDirectiveCFIStartProc
5471 /// ::= .cfi_startproc [simple]
5472 bool MasmParser::parseDirectiveCFIStartProc() {
5473 StringRef Simple;
5474 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5475 if (check(parseIdentifier(Simple) || Simple != "simple",
5476 "unexpected token") ||
5477 parseEOL())
5478 return addErrorSuffix(" in '.cfi_startproc' directive");
5481 // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5482 // being produced if this directive is emitted as part of preprocessor macro
5483 // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5484 // Tools like llvm-mc on the other hand are not affected by it, and report
5485 // correct context information.
5486 getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5487 return false;
5490 /// parseDirectiveCFIEndProc
5491 /// ::= .cfi_endproc
5492 bool MasmParser::parseDirectiveCFIEndProc() {
5493 getStreamer().emitCFIEndProc();
5494 return false;
5497 /// parse register name or number.
5498 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5499 SMLoc DirectiveLoc) {
5500 MCRegister RegNo;
5502 if (getLexer().isNot(AsmToken::Integer)) {
5503 if (getTargetParser().parseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5504 return true;
5505 Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5506 } else
5507 return parseAbsoluteExpression(Register);
5509 return false;
5512 /// parseDirectiveCFIDefCfa
5513 /// ::= .cfi_def_cfa register, offset
5514 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5515 int64_t Register = 0, Offset = 0;
5516 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5517 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5518 parseAbsoluteExpression(Offset))
5519 return true;
5521 getStreamer().emitCFIDefCfa(Register, Offset);
5522 return false;
5525 /// parseDirectiveCFIDefCfaOffset
5526 /// ::= .cfi_def_cfa_offset offset
5527 bool MasmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
5528 int64_t Offset = 0;
5529 if (parseAbsoluteExpression(Offset))
5530 return true;
5532 getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
5533 return false;
5536 /// parseDirectiveCFIRegister
5537 /// ::= .cfi_register register, register
5538 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5539 int64_t Register1 = 0, Register2 = 0;
5540 if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5541 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5542 parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5543 return true;
5545 getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
5546 return false;
5549 /// parseDirectiveCFIWindowSave
5550 /// ::= .cfi_window_save
5551 bool MasmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
5552 getStreamer().emitCFIWindowSave(DirectiveLoc);
5553 return false;
5556 /// parseDirectiveCFIAdjustCfaOffset
5557 /// ::= .cfi_adjust_cfa_offset adjustment
5558 bool MasmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
5559 int64_t Adjustment = 0;
5560 if (parseAbsoluteExpression(Adjustment))
5561 return true;
5563 getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
5564 return false;
5567 /// parseDirectiveCFIDefCfaRegister
5568 /// ::= .cfi_def_cfa_register register
5569 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5570 int64_t Register = 0;
5571 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5572 return true;
5574 getStreamer().emitCFIDefCfaRegister(Register);
5575 return false;
5578 /// parseDirectiveCFIOffset
5579 /// ::= .cfi_offset register, offset
5580 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5581 int64_t Register = 0;
5582 int64_t Offset = 0;
5584 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5585 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5586 parseAbsoluteExpression(Offset))
5587 return true;
5589 getStreamer().emitCFIOffset(Register, Offset);
5590 return false;
5593 /// parseDirectiveCFIRelOffset
5594 /// ::= .cfi_rel_offset register, offset
5595 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5596 int64_t Register = 0, Offset = 0;
5598 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5599 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5600 parseAbsoluteExpression(Offset))
5601 return true;
5603 getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
5604 return false;
5607 static bool isValidEncoding(int64_t Encoding) {
5608 if (Encoding & ~0xff)
5609 return false;
5611 if (Encoding == dwarf::DW_EH_PE_omit)
5612 return true;
5614 const unsigned Format = Encoding & 0xf;
5615 if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5616 Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5617 Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5618 Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5619 return false;
5621 const unsigned Application = Encoding & 0x70;
5622 if (Application != dwarf::DW_EH_PE_absptr &&
5623 Application != dwarf::DW_EH_PE_pcrel)
5624 return false;
5626 return true;
5629 /// parseDirectiveCFIPersonalityOrLsda
5630 /// IsPersonality true for cfi_personality, false for cfi_lsda
5631 /// ::= .cfi_personality encoding, [symbol_name]
5632 /// ::= .cfi_lsda encoding, [symbol_name]
5633 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5634 int64_t Encoding = 0;
5635 if (parseAbsoluteExpression(Encoding))
5636 return true;
5637 if (Encoding == dwarf::DW_EH_PE_omit)
5638 return false;
5640 StringRef Name;
5641 if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5642 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5643 check(parseIdentifier(Name), "expected identifier in directive"))
5644 return true;
5646 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5648 if (IsPersonality)
5649 getStreamer().emitCFIPersonality(Sym, Encoding);
5650 else
5651 getStreamer().emitCFILsda(Sym, Encoding);
5652 return false;
5655 /// parseDirectiveCFIRememberState
5656 /// ::= .cfi_remember_state
5657 bool MasmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
5658 getStreamer().emitCFIRememberState(DirectiveLoc);
5659 return false;
5662 /// parseDirectiveCFIRestoreState
5663 /// ::= .cfi_remember_state
5664 bool MasmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
5665 getStreamer().emitCFIRestoreState(DirectiveLoc);
5666 return false;
5669 /// parseDirectiveCFISameValue
5670 /// ::= .cfi_same_value register
5671 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5672 int64_t Register = 0;
5674 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5675 return true;
5677 getStreamer().emitCFISameValue(Register, DirectiveLoc);
5678 return false;
5681 /// parseDirectiveCFIRestore
5682 /// ::= .cfi_restore register
5683 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5684 int64_t Register = 0;
5685 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5686 return true;
5688 getStreamer().emitCFIRestore(Register);
5689 return false;
5692 /// parseDirectiveCFIEscape
5693 /// ::= .cfi_escape expression[,...]
5694 bool MasmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
5695 std::string Values;
5696 int64_t CurrValue;
5697 if (parseAbsoluteExpression(CurrValue))
5698 return true;
5700 Values.push_back((uint8_t)CurrValue);
5702 while (getLexer().is(AsmToken::Comma)) {
5703 Lex();
5705 if (parseAbsoluteExpression(CurrValue))
5706 return true;
5708 Values.push_back((uint8_t)CurrValue);
5711 getStreamer().emitCFIEscape(Values, DirectiveLoc);
5712 return false;
5715 /// parseDirectiveCFIReturnColumn
5716 /// ::= .cfi_return_column register
5717 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5718 int64_t Register = 0;
5719 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5720 return true;
5721 getStreamer().emitCFIReturnColumn(Register);
5722 return false;
5725 /// parseDirectiveCFISignalFrame
5726 /// ::= .cfi_signal_frame
5727 bool MasmParser::parseDirectiveCFISignalFrame() {
5728 if (parseEOL())
5729 return true;
5731 getStreamer().emitCFISignalFrame();
5732 return false;
5735 /// parseDirectiveCFIUndefined
5736 /// ::= .cfi_undefined register
5737 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5738 int64_t Register = 0;
5740 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5741 return true;
5743 getStreamer().emitCFIUndefined(Register);
5744 return false;
5747 /// parseDirectiveMacro
5748 /// ::= name macro [parameters]
5749 /// ["LOCAL" identifiers]
5750 /// parameters ::= parameter [, parameter]*
5751 /// parameter ::= name ":" qualifier
5752 /// qualifier ::= "req" | "vararg" | "=" macro_argument
5753 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5754 MCAsmMacroParameters Parameters;
5755 while (getLexer().isNot(AsmToken::EndOfStatement)) {
5756 if (!Parameters.empty() && Parameters.back().Vararg)
5757 return Error(Lexer.getLoc(),
5758 "Vararg parameter '" + Parameters.back().Name +
5759 "' should be last in the list of parameters");
5761 MCAsmMacroParameter Parameter;
5762 if (parseIdentifier(Parameter.Name))
5763 return TokError("expected identifier in 'macro' directive");
5765 // Emit an error if two (or more) named parameters share the same name.
5766 for (const MCAsmMacroParameter& CurrParam : Parameters)
5767 if (CurrParam.Name.equals_insensitive(Parameter.Name))
5768 return TokError("macro '" + Name + "' has multiple parameters"
5769 " named '" + Parameter.Name + "'");
5771 if (Lexer.is(AsmToken::Colon)) {
5772 Lex(); // consume ':'
5774 if (parseOptionalToken(AsmToken::Equal)) {
5775 // Default value
5776 SMLoc ParamLoc;
5778 ParamLoc = Lexer.getLoc();
5779 if (parseMacroArgument(nullptr, Parameter.Value))
5780 return true;
5781 } else {
5782 SMLoc QualLoc;
5783 StringRef Qualifier;
5785 QualLoc = Lexer.getLoc();
5786 if (parseIdentifier(Qualifier))
5787 return Error(QualLoc, "missing parameter qualifier for "
5788 "'" +
5789 Parameter.Name + "' in macro '" + Name +
5790 "'");
5792 if (Qualifier.equals_insensitive("req"))
5793 Parameter.Required = true;
5794 else if (Qualifier.equals_insensitive("vararg"))
5795 Parameter.Vararg = true;
5796 else
5797 return Error(QualLoc,
5798 Qualifier + " is not a valid parameter qualifier for '" +
5799 Parameter.Name + "' in macro '" + Name + "'");
5803 Parameters.push_back(std::move(Parameter));
5805 if (getLexer().is(AsmToken::Comma))
5806 Lex();
5809 // Eat just the end of statement.
5810 Lexer.Lex();
5812 std::vector<std::string> Locals;
5813 if (getTok().is(AsmToken::Identifier) &&
5814 getTok().getIdentifier().equals_insensitive("local")) {
5815 Lex(); // Eat the LOCAL directive.
5817 StringRef ID;
5818 while (true) {
5819 if (parseIdentifier(ID))
5820 return true;
5821 Locals.push_back(ID.lower());
5823 // If we see a comma, continue (and allow line continuation).
5824 if (!parseOptionalToken(AsmToken::Comma))
5825 break;
5826 parseOptionalToken(AsmToken::EndOfStatement);
5830 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5831 AsmToken EndToken, StartToken = getTok();
5832 unsigned MacroDepth = 0;
5833 bool IsMacroFunction = false;
5834 // Lex the macro definition.
5835 while (true) {
5836 // Ignore Lexing errors in macros.
5837 while (Lexer.is(AsmToken::Error)) {
5838 Lexer.Lex();
5841 // Check whether we have reached the end of the file.
5842 if (getLexer().is(AsmToken::Eof))
5843 return Error(NameLoc, "no matching 'endm' in definition");
5845 // Otherwise, check whether we have reached the 'endm'... and determine if
5846 // this is a macro function.
5847 if (getLexer().is(AsmToken::Identifier)) {
5848 if (getTok().getIdentifier().equals_insensitive("endm")) {
5849 if (MacroDepth == 0) { // Outermost macro.
5850 EndToken = getTok();
5851 Lexer.Lex();
5852 if (getLexer().isNot(AsmToken::EndOfStatement))
5853 return TokError("unexpected token in '" + EndToken.getIdentifier() +
5854 "' directive");
5855 break;
5856 } else {
5857 // Otherwise we just found the end of an inner macro.
5858 --MacroDepth;
5860 } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
5861 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
5862 IsMacroFunction = true;
5864 } else if (isMacroLikeDirective()) {
5865 // We allow nested macros. Those aren't instantiated until the
5866 // outermost macro is expanded so just ignore them for now.
5867 ++MacroDepth;
5871 // Otherwise, scan til the end of the statement.
5872 eatToEndOfStatement();
5875 if (getContext().lookupMacro(Name.lower())) {
5876 return Error(NameLoc, "macro '" + Name + "' is already defined");
5879 const char *BodyStart = StartToken.getLoc().getPointer();
5880 const char *BodyEnd = EndToken.getLoc().getPointer();
5881 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5882 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5883 IsMacroFunction);
5884 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5885 Macro.dump());
5886 getContext().defineMacro(Name.lower(), std::move(Macro));
5887 return false;
5890 /// parseDirectiveExitMacro
5891 /// ::= "exitm" [textitem]
5892 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5893 StringRef Directive,
5894 std::string &Value) {
5895 SMLoc EndLoc = getTok().getLoc();
5896 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5897 return Error(EndLoc,
5898 "unable to parse text item in '" + Directive + "' directive");
5899 eatToEndOfStatement();
5901 if (!isInsideMacroInstantiation())
5902 return TokError("unexpected '" + Directive + "' in file, "
5903 "no current macro definition");
5905 // Exit all conditionals that are active in the current macro.
5906 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5907 TheCondState = TheCondStack.back();
5908 TheCondStack.pop_back();
5911 handleMacroExit();
5912 return false;
5915 /// parseDirectiveEndMacro
5916 /// ::= endm
5917 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5918 if (getLexer().isNot(AsmToken::EndOfStatement))
5919 return TokError("unexpected token in '" + Directive + "' directive");
5921 // If we are inside a macro instantiation, terminate the current
5922 // instantiation.
5923 if (isInsideMacroInstantiation()) {
5924 handleMacroExit();
5925 return false;
5928 // Otherwise, this .endmacro is a stray entry in the file; well formed
5929 // .endmacro directives are handled during the macro definition parsing.
5930 return TokError("unexpected '" + Directive + "' in file, "
5931 "no current macro definition");
5934 /// parseDirectivePurgeMacro
5935 /// ::= purge identifier ( , identifier )*
5936 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5937 StringRef Name;
5938 while (true) {
5939 SMLoc NameLoc;
5940 if (parseTokenLoc(NameLoc) ||
5941 check(parseIdentifier(Name), NameLoc,
5942 "expected identifier in 'purge' directive"))
5943 return true;
5945 DEBUG_WITH_TYPE("asm-macros", dbgs()
5946 << "Un-defining macro: " << Name << "\n");
5947 if (!getContext().lookupMacro(Name.lower()))
5948 return Error(NameLoc, "macro '" + Name + "' is not defined");
5949 getContext().undefineMacro(Name.lower());
5951 if (!parseOptionalToken(AsmToken::Comma))
5952 break;
5953 parseOptionalToken(AsmToken::EndOfStatement);
5956 return false;
5959 bool MasmParser::parseDirectiveExtern() {
5960 // .extern is the default - but we still need to take any provided type info.
5961 auto parseOp = [&]() -> bool {
5962 StringRef Name;
5963 SMLoc NameLoc = getTok().getLoc();
5964 if (parseIdentifier(Name))
5965 return Error(NameLoc, "expected name");
5966 if (parseToken(AsmToken::Colon))
5967 return true;
5969 StringRef TypeName;
5970 SMLoc TypeLoc = getTok().getLoc();
5971 if (parseIdentifier(TypeName))
5972 return Error(TypeLoc, "expected type");
5973 if (!TypeName.equals_insensitive("proc")) {
5974 AsmTypeInfo Type;
5975 if (lookUpType(TypeName, Type))
5976 return Error(TypeLoc, "unrecognized type");
5977 KnownType[Name.lower()] = Type;
5980 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5981 Sym->setExternal(true);
5982 getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
5984 return false;
5987 if (parseMany(parseOp))
5988 return addErrorSuffix(" in directive 'extern'");
5989 return false;
5992 /// parseDirectiveSymbolAttribute
5993 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
5994 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5995 auto parseOp = [&]() -> bool {
5996 StringRef Name;
5997 SMLoc Loc = getTok().getLoc();
5998 if (parseIdentifier(Name))
5999 return Error(Loc, "expected identifier");
6000 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6002 // Assembler local symbols don't make any sense here. Complain loudly.
6003 if (Sym->isTemporary())
6004 return Error(Loc, "non-local symbol required");
6006 if (!getStreamer().emitSymbolAttribute(Sym, Attr))
6007 return Error(Loc, "unable to emit symbol attribute");
6008 return false;
6011 if (parseMany(parseOp))
6012 return addErrorSuffix(" in directive");
6013 return false;
6016 /// parseDirectiveComm
6017 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
6018 bool MasmParser::parseDirectiveComm(bool IsLocal) {
6019 if (checkForValidSection())
6020 return true;
6022 SMLoc IDLoc = getLexer().getLoc();
6023 StringRef Name;
6024 if (parseIdentifier(Name))
6025 return TokError("expected identifier in directive");
6027 // Handle the identifier as the key symbol.
6028 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6030 if (getLexer().isNot(AsmToken::Comma))
6031 return TokError("unexpected token in directive");
6032 Lex();
6034 int64_t Size;
6035 SMLoc SizeLoc = getLexer().getLoc();
6036 if (parseAbsoluteExpression(Size))
6037 return true;
6039 int64_t Pow2Alignment = 0;
6040 SMLoc Pow2AlignmentLoc;
6041 if (getLexer().is(AsmToken::Comma)) {
6042 Lex();
6043 Pow2AlignmentLoc = getLexer().getLoc();
6044 if (parseAbsoluteExpression(Pow2Alignment))
6045 return true;
6047 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
6048 if (IsLocal && LCOMM == LCOMM::NoAlignment)
6049 return Error(Pow2AlignmentLoc, "alignment not supported on this target");
6051 // If this target takes alignments in bytes (not log) validate and convert.
6052 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
6053 (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
6054 if (!isPowerOf2_64(Pow2Alignment))
6055 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
6056 Pow2Alignment = Log2_64(Pow2Alignment);
6060 if (parseEOL())
6061 return true;
6063 // NOTE: a size of zero for a .comm should create a undefined symbol
6064 // but a size of .lcomm creates a bss symbol of size zero.
6065 if (Size < 0)
6066 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
6067 "be less than zero");
6069 // NOTE: The alignment in the directive is a power of 2 value, the assembler
6070 // may internally end up wanting an alignment in bytes.
6071 // FIXME: Diagnose overflow.
6072 if (Pow2Alignment < 0)
6073 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
6074 "alignment, can't be less than zero");
6076 Sym->redefineIfPossible();
6077 if (!Sym->isUndefined())
6078 return Error(IDLoc, "invalid symbol redefinition");
6080 // Create the Symbol as a common or local common with Size and Pow2Alignment.
6081 if (IsLocal) {
6082 getStreamer().emitLocalCommonSymbol(Sym, Size,
6083 Align(1ULL << Pow2Alignment));
6084 return false;
6087 getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
6088 return false;
6091 /// parseDirectiveComment
6092 /// ::= comment delimiter [[text]]
6093 /// [[text]]
6094 /// [[text]] delimiter [[text]]
6095 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
6096 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
6097 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
6098 assert(DelimiterEnd != std::string::npos);
6099 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
6100 if (Delimiter.empty())
6101 return Error(DirectiveLoc, "no delimiter in 'comment' directive");
6102 do {
6103 if (getTok().is(AsmToken::Eof))
6104 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
6105 Lex(); // eat end of statement
6106 } while (
6107 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
6108 return parseEOL();
6111 /// parseDirectiveInclude
6112 /// ::= include <filename>
6113 /// | include filename
6114 bool MasmParser::parseDirectiveInclude() {
6115 // Allow the strings to have escaped octal character sequence.
6116 std::string Filename;
6117 SMLoc IncludeLoc = getTok().getLoc();
6119 if (parseAngleBracketString(Filename))
6120 Filename = parseStringTo(AsmToken::EndOfStatement);
6121 if (check(Filename.empty(), "missing filename in 'include' directive") ||
6122 check(getTok().isNot(AsmToken::EndOfStatement),
6123 "unexpected token in 'include' directive") ||
6124 // Attempt to switch the lexer to the included file before consuming the
6125 // end of statement to avoid losing it when we switch.
6126 check(enterIncludeFile(Filename), IncludeLoc,
6127 "Could not find include file '" + Filename + "'"))
6128 return true;
6130 return false;
6133 /// parseDirectiveIf
6134 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
6135 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
6136 TheCondStack.push_back(TheCondState);
6137 TheCondState.TheCond = AsmCond::IfCond;
6138 if (TheCondState.Ignore) {
6139 eatToEndOfStatement();
6140 } else {
6141 int64_t ExprValue;
6142 if (parseAbsoluteExpression(ExprValue) || parseEOL())
6143 return true;
6145 switch (DirKind) {
6146 default:
6147 llvm_unreachable("unsupported directive");
6148 case DK_IF:
6149 break;
6150 case DK_IFE:
6151 ExprValue = ExprValue == 0;
6152 break;
6155 TheCondState.CondMet = ExprValue;
6156 TheCondState.Ignore = !TheCondState.CondMet;
6159 return false;
6162 /// parseDirectiveIfb
6163 /// ::= .ifb textitem
6164 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6165 TheCondStack.push_back(TheCondState);
6166 TheCondState.TheCond = AsmCond::IfCond;
6168 if (TheCondState.Ignore) {
6169 eatToEndOfStatement();
6170 } else {
6171 std::string Str;
6172 if (parseTextItem(Str))
6173 return TokError("expected text item parameter for 'ifb' directive");
6175 if (parseEOL())
6176 return true;
6178 TheCondState.CondMet = ExpectBlank == Str.empty();
6179 TheCondState.Ignore = !TheCondState.CondMet;
6182 return false;
6185 /// parseDirectiveIfidn
6186 /// ::= ifidn textitem, textitem
6187 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6188 bool CaseInsensitive) {
6189 std::string String1, String2;
6191 if (parseTextItem(String1)) {
6192 if (ExpectEqual)
6193 return TokError("expected text item parameter for 'ifidn' directive");
6194 return TokError("expected text item parameter for 'ifdif' directive");
6197 if (Lexer.isNot(AsmToken::Comma)) {
6198 if (ExpectEqual)
6199 return TokError(
6200 "expected comma after first string for 'ifidn' directive");
6201 return TokError("expected comma after first string for 'ifdif' directive");
6203 Lex();
6205 if (parseTextItem(String2)) {
6206 if (ExpectEqual)
6207 return TokError("expected text item parameter for 'ifidn' directive");
6208 return TokError("expected text item parameter for 'ifdif' directive");
6211 TheCondStack.push_back(TheCondState);
6212 TheCondState.TheCond = AsmCond::IfCond;
6213 if (CaseInsensitive)
6214 TheCondState.CondMet =
6215 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6216 else
6217 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6218 TheCondState.Ignore = !TheCondState.CondMet;
6220 return false;
6223 /// parseDirectiveIfdef
6224 /// ::= ifdef symbol
6225 /// | ifdef variable
6226 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
6227 TheCondStack.push_back(TheCondState);
6228 TheCondState.TheCond = AsmCond::IfCond;
6230 if (TheCondState.Ignore) {
6231 eatToEndOfStatement();
6232 } else {
6233 bool is_defined = false;
6234 MCRegister Reg;
6235 SMLoc StartLoc, EndLoc;
6236 is_defined =
6237 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6238 if (!is_defined) {
6239 StringRef Name;
6240 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
6241 parseEOL())
6242 return true;
6244 if (BuiltinSymbolMap.contains(Name.lower())) {
6245 is_defined = true;
6246 } else if (Variables.contains(Name.lower())) {
6247 is_defined = true;
6248 } else {
6249 MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
6250 is_defined = (Sym && !Sym->isUndefined(false));
6254 TheCondState.CondMet = (is_defined == expect_defined);
6255 TheCondState.Ignore = !TheCondState.CondMet;
6258 return false;
6261 /// parseDirectiveElseIf
6262 /// ::= elseif expression
6263 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
6264 DirectiveKind DirKind) {
6265 if (TheCondState.TheCond != AsmCond::IfCond &&
6266 TheCondState.TheCond != AsmCond::ElseIfCond)
6267 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
6268 " .if or an .elseif");
6269 TheCondState.TheCond = AsmCond::ElseIfCond;
6271 bool LastIgnoreState = false;
6272 if (!TheCondStack.empty())
6273 LastIgnoreState = TheCondStack.back().Ignore;
6274 if (LastIgnoreState || TheCondState.CondMet) {
6275 TheCondState.Ignore = true;
6276 eatToEndOfStatement();
6277 } else {
6278 int64_t ExprValue;
6279 if (parseAbsoluteExpression(ExprValue))
6280 return true;
6282 if (parseEOL())
6283 return true;
6285 switch (DirKind) {
6286 default:
6287 llvm_unreachable("unsupported directive");
6288 case DK_ELSEIF:
6289 break;
6290 case DK_ELSEIFE:
6291 ExprValue = ExprValue == 0;
6292 break;
6295 TheCondState.CondMet = ExprValue;
6296 TheCondState.Ignore = !TheCondState.CondMet;
6299 return false;
6302 /// parseDirectiveElseIfb
6303 /// ::= elseifb textitem
6304 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6305 if (TheCondState.TheCond != AsmCond::IfCond &&
6306 TheCondState.TheCond != AsmCond::ElseIfCond)
6307 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6308 " if or an elseif");
6309 TheCondState.TheCond = AsmCond::ElseIfCond;
6311 bool LastIgnoreState = false;
6312 if (!TheCondStack.empty())
6313 LastIgnoreState = TheCondStack.back().Ignore;
6314 if (LastIgnoreState || TheCondState.CondMet) {
6315 TheCondState.Ignore = true;
6316 eatToEndOfStatement();
6317 } else {
6318 std::string Str;
6319 if (parseTextItem(Str)) {
6320 if (ExpectBlank)
6321 return TokError("expected text item parameter for 'elseifb' directive");
6322 return TokError("expected text item parameter for 'elseifnb' directive");
6325 if (parseEOL())
6326 return true;
6328 TheCondState.CondMet = ExpectBlank == Str.empty();
6329 TheCondState.Ignore = !TheCondState.CondMet;
6332 return false;
6335 /// parseDirectiveElseIfdef
6336 /// ::= elseifdef symbol
6337 /// | elseifdef variable
6338 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6339 bool expect_defined) {
6340 if (TheCondState.TheCond != AsmCond::IfCond &&
6341 TheCondState.TheCond != AsmCond::ElseIfCond)
6342 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6343 " if or an elseif");
6344 TheCondState.TheCond = AsmCond::ElseIfCond;
6346 bool LastIgnoreState = false;
6347 if (!TheCondStack.empty())
6348 LastIgnoreState = TheCondStack.back().Ignore;
6349 if (LastIgnoreState || TheCondState.CondMet) {
6350 TheCondState.Ignore = true;
6351 eatToEndOfStatement();
6352 } else {
6353 bool is_defined = false;
6354 MCRegister Reg;
6355 SMLoc StartLoc, EndLoc;
6356 is_defined =
6357 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6358 if (!is_defined) {
6359 StringRef Name;
6360 if (check(parseIdentifier(Name),
6361 "expected identifier after 'elseifdef'") ||
6362 parseEOL())
6363 return true;
6365 if (BuiltinSymbolMap.contains(Name.lower())) {
6366 is_defined = true;
6367 } else if (Variables.contains(Name.lower())) {
6368 is_defined = true;
6369 } else {
6370 MCSymbol *Sym = getContext().lookupSymbol(Name);
6371 is_defined = (Sym && !Sym->isUndefined(false));
6375 TheCondState.CondMet = (is_defined == expect_defined);
6376 TheCondState.Ignore = !TheCondState.CondMet;
6379 return false;
6382 /// parseDirectiveElseIfidn
6383 /// ::= elseifidn textitem, textitem
6384 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6385 bool CaseInsensitive) {
6386 if (TheCondState.TheCond != AsmCond::IfCond &&
6387 TheCondState.TheCond != AsmCond::ElseIfCond)
6388 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6389 " if or an elseif");
6390 TheCondState.TheCond = AsmCond::ElseIfCond;
6392 bool LastIgnoreState = false;
6393 if (!TheCondStack.empty())
6394 LastIgnoreState = TheCondStack.back().Ignore;
6395 if (LastIgnoreState || TheCondState.CondMet) {
6396 TheCondState.Ignore = true;
6397 eatToEndOfStatement();
6398 } else {
6399 std::string String1, String2;
6401 if (parseTextItem(String1)) {
6402 if (ExpectEqual)
6403 return TokError(
6404 "expected text item parameter for 'elseifidn' directive");
6405 return TokError("expected text item parameter for 'elseifdif' directive");
6408 if (Lexer.isNot(AsmToken::Comma)) {
6409 if (ExpectEqual)
6410 return TokError(
6411 "expected comma after first string for 'elseifidn' directive");
6412 return TokError(
6413 "expected comma after first string for 'elseifdif' directive");
6415 Lex();
6417 if (parseTextItem(String2)) {
6418 if (ExpectEqual)
6419 return TokError(
6420 "expected text item parameter for 'elseifidn' directive");
6421 return TokError("expected text item parameter for 'elseifdif' directive");
6424 if (CaseInsensitive)
6425 TheCondState.CondMet =
6426 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6427 else
6428 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6429 TheCondState.Ignore = !TheCondState.CondMet;
6432 return false;
6435 /// parseDirectiveElse
6436 /// ::= else
6437 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6438 if (parseEOL())
6439 return true;
6441 if (TheCondState.TheCond != AsmCond::IfCond &&
6442 TheCondState.TheCond != AsmCond::ElseIfCond)
6443 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6444 " or an elseif");
6445 TheCondState.TheCond = AsmCond::ElseCond;
6446 bool LastIgnoreState = false;
6447 if (!TheCondStack.empty())
6448 LastIgnoreState = TheCondStack.back().Ignore;
6449 if (LastIgnoreState || TheCondState.CondMet)
6450 TheCondState.Ignore = true;
6451 else
6452 TheCondState.Ignore = false;
6454 return false;
6457 /// parseDirectiveEnd
6458 /// ::= end
6459 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6460 if (parseEOL())
6461 return true;
6463 while (Lexer.isNot(AsmToken::Eof))
6464 Lexer.Lex();
6466 return false;
6469 /// parseDirectiveError
6470 /// ::= .err [message]
6471 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6472 if (!TheCondStack.empty()) {
6473 if (TheCondStack.back().Ignore) {
6474 eatToEndOfStatement();
6475 return false;
6479 std::string Message = ".err directive invoked in source file";
6480 if (Lexer.isNot(AsmToken::EndOfStatement))
6481 Message = parseStringTo(AsmToken::EndOfStatement);
6482 Lex();
6484 return Error(DirectiveLoc, Message);
6487 /// parseDirectiveErrorIfb
6488 /// ::= .errb textitem[, message]
6489 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6490 if (!TheCondStack.empty()) {
6491 if (TheCondStack.back().Ignore) {
6492 eatToEndOfStatement();
6493 return false;
6497 std::string Text;
6498 if (parseTextItem(Text))
6499 return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6501 std::string Message = ".errb directive invoked in source file";
6502 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6503 if (parseToken(AsmToken::Comma))
6504 return addErrorSuffix(" in '.errb' directive");
6505 Message = parseStringTo(AsmToken::EndOfStatement);
6507 Lex();
6509 if (Text.empty() == ExpectBlank)
6510 return Error(DirectiveLoc, Message);
6511 return false;
6514 /// parseDirectiveErrorIfdef
6515 /// ::= .errdef name[, message]
6516 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6517 bool ExpectDefined) {
6518 if (!TheCondStack.empty()) {
6519 if (TheCondStack.back().Ignore) {
6520 eatToEndOfStatement();
6521 return false;
6525 bool IsDefined = false;
6526 MCRegister Reg;
6527 SMLoc StartLoc, EndLoc;
6528 IsDefined =
6529 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6530 if (!IsDefined) {
6531 StringRef Name;
6532 if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6533 return true;
6535 if (BuiltinSymbolMap.contains(Name.lower())) {
6536 IsDefined = true;
6537 } else if (Variables.contains(Name.lower())) {
6538 IsDefined = true;
6539 } else {
6540 MCSymbol *Sym = getContext().lookupSymbol(Name);
6541 IsDefined = (Sym && !Sym->isUndefined(false));
6545 std::string Message = ".errdef directive invoked in source file";
6546 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6547 if (parseToken(AsmToken::Comma))
6548 return addErrorSuffix(" in '.errdef' directive");
6549 Message = parseStringTo(AsmToken::EndOfStatement);
6551 Lex();
6553 if (IsDefined == ExpectDefined)
6554 return Error(DirectiveLoc, Message);
6555 return false;
6558 /// parseDirectiveErrorIfidn
6559 /// ::= .erridn textitem, textitem[, message]
6560 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6561 bool CaseInsensitive) {
6562 if (!TheCondStack.empty()) {
6563 if (TheCondStack.back().Ignore) {
6564 eatToEndOfStatement();
6565 return false;
6569 std::string String1, String2;
6571 if (parseTextItem(String1)) {
6572 if (ExpectEqual)
6573 return TokError("expected string parameter for '.erridn' directive");
6574 return TokError("expected string parameter for '.errdif' directive");
6577 if (Lexer.isNot(AsmToken::Comma)) {
6578 if (ExpectEqual)
6579 return TokError(
6580 "expected comma after first string for '.erridn' directive");
6581 return TokError(
6582 "expected comma after first string for '.errdif' directive");
6584 Lex();
6586 if (parseTextItem(String2)) {
6587 if (ExpectEqual)
6588 return TokError("expected string parameter for '.erridn' directive");
6589 return TokError("expected string parameter for '.errdif' directive");
6592 std::string Message;
6593 if (ExpectEqual)
6594 Message = ".erridn directive invoked in source file";
6595 else
6596 Message = ".errdif directive invoked in source file";
6597 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6598 if (parseToken(AsmToken::Comma))
6599 return addErrorSuffix(" in '.erridn' directive");
6600 Message = parseStringTo(AsmToken::EndOfStatement);
6602 Lex();
6604 if (CaseInsensitive)
6605 TheCondState.CondMet =
6606 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6607 else
6608 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6609 TheCondState.Ignore = !TheCondState.CondMet;
6611 if ((CaseInsensitive &&
6612 ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
6613 (ExpectEqual == (String1 == String2)))
6614 return Error(DirectiveLoc, Message);
6615 return false;
6618 /// parseDirectiveErrorIfe
6619 /// ::= .erre expression[, message]
6620 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6621 if (!TheCondStack.empty()) {
6622 if (TheCondStack.back().Ignore) {
6623 eatToEndOfStatement();
6624 return false;
6628 int64_t ExprValue;
6629 if (parseAbsoluteExpression(ExprValue))
6630 return addErrorSuffix(" in '.erre' directive");
6632 std::string Message = ".erre directive invoked in source file";
6633 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6634 if (parseToken(AsmToken::Comma))
6635 return addErrorSuffix(" in '.erre' directive");
6636 Message = parseStringTo(AsmToken::EndOfStatement);
6638 Lex();
6640 if ((ExprValue == 0) == ExpectZero)
6641 return Error(DirectiveLoc, Message);
6642 return false;
6645 /// parseDirectiveEndIf
6646 /// ::= .endif
6647 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6648 if (parseEOL())
6649 return true;
6651 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6652 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6653 "an .if or .else");
6654 if (!TheCondStack.empty()) {
6655 TheCondState = TheCondStack.back();
6656 TheCondStack.pop_back();
6659 return false;
6662 void MasmParser::initializeDirectiveKindMap() {
6663 DirectiveKindMap["="] = DK_ASSIGN;
6664 DirectiveKindMap["equ"] = DK_EQU;
6665 DirectiveKindMap["textequ"] = DK_TEXTEQU;
6666 // DirectiveKindMap[".ascii"] = DK_ASCII;
6667 // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6668 // DirectiveKindMap[".string"] = DK_STRING;
6669 DirectiveKindMap["byte"] = DK_BYTE;
6670 DirectiveKindMap["sbyte"] = DK_SBYTE;
6671 DirectiveKindMap["word"] = DK_WORD;
6672 DirectiveKindMap["sword"] = DK_SWORD;
6673 DirectiveKindMap["dword"] = DK_DWORD;
6674 DirectiveKindMap["sdword"] = DK_SDWORD;
6675 DirectiveKindMap["fword"] = DK_FWORD;
6676 DirectiveKindMap["qword"] = DK_QWORD;
6677 DirectiveKindMap["sqword"] = DK_SQWORD;
6678 DirectiveKindMap["real4"] = DK_REAL4;
6679 DirectiveKindMap["real8"] = DK_REAL8;
6680 DirectiveKindMap["real10"] = DK_REAL10;
6681 DirectiveKindMap["align"] = DK_ALIGN;
6682 DirectiveKindMap["even"] = DK_EVEN;
6683 DirectiveKindMap["org"] = DK_ORG;
6684 DirectiveKindMap["extern"] = DK_EXTERN;
6685 DirectiveKindMap["extrn"] = DK_EXTERN;
6686 DirectiveKindMap["public"] = DK_PUBLIC;
6687 // DirectiveKindMap[".comm"] = DK_COMM;
6688 DirectiveKindMap["comment"] = DK_COMMENT;
6689 DirectiveKindMap["include"] = DK_INCLUDE;
6690 DirectiveKindMap["repeat"] = DK_REPEAT;
6691 DirectiveKindMap["rept"] = DK_REPEAT;
6692 DirectiveKindMap["while"] = DK_WHILE;
6693 DirectiveKindMap["for"] = DK_FOR;
6694 DirectiveKindMap["irp"] = DK_FOR;
6695 DirectiveKindMap["forc"] = DK_FORC;
6696 DirectiveKindMap["irpc"] = DK_FORC;
6697 DirectiveKindMap["if"] = DK_IF;
6698 DirectiveKindMap["ife"] = DK_IFE;
6699 DirectiveKindMap["ifb"] = DK_IFB;
6700 DirectiveKindMap["ifnb"] = DK_IFNB;
6701 DirectiveKindMap["ifdef"] = DK_IFDEF;
6702 DirectiveKindMap["ifndef"] = DK_IFNDEF;
6703 DirectiveKindMap["ifdif"] = DK_IFDIF;
6704 DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6705 DirectiveKindMap["ifidn"] = DK_IFIDN;
6706 DirectiveKindMap["ifidni"] = DK_IFIDNI;
6707 DirectiveKindMap["elseif"] = DK_ELSEIF;
6708 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6709 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6710 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6711 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6712 DirectiveKindMap["else"] = DK_ELSE;
6713 DirectiveKindMap["end"] = DK_END;
6714 DirectiveKindMap["endif"] = DK_ENDIF;
6715 // DirectiveKindMap[".file"] = DK_FILE;
6716 // DirectiveKindMap[".line"] = DK_LINE;
6717 // DirectiveKindMap[".loc"] = DK_LOC;
6718 // DirectiveKindMap[".stabs"] = DK_STABS;
6719 // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6720 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6721 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6722 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6723 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6724 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6725 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6726 // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6727 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6728 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6729 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6730 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6731 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6732 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6733 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6734 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6735 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6736 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6737 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6738 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6739 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6740 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6741 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6742 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6743 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6744 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6745 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6746 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6747 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6748 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6749 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6750 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6751 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6752 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6753 // DirectiveKindMap[".cfi_val_offset"] = DK_CFI_VAL_OFFSET;
6754 DirectiveKindMap["macro"] = DK_MACRO;
6755 DirectiveKindMap["exitm"] = DK_EXITM;
6756 DirectiveKindMap["endm"] = DK_ENDM;
6757 DirectiveKindMap["purge"] = DK_PURGE;
6758 DirectiveKindMap[".err"] = DK_ERR;
6759 DirectiveKindMap[".errb"] = DK_ERRB;
6760 DirectiveKindMap[".errnb"] = DK_ERRNB;
6761 DirectiveKindMap[".errdef"] = DK_ERRDEF;
6762 DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6763 DirectiveKindMap[".errdif"] = DK_ERRDIF;
6764 DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6765 DirectiveKindMap[".erridn"] = DK_ERRIDN;
6766 DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6767 DirectiveKindMap[".erre"] = DK_ERRE;
6768 DirectiveKindMap[".errnz"] = DK_ERRNZ;
6769 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6770 DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6771 DirectiveKindMap[".savereg"] = DK_SAVEREG;
6772 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6773 DirectiveKindMap[".setframe"] = DK_SETFRAME;
6774 DirectiveKindMap[".radix"] = DK_RADIX;
6775 DirectiveKindMap["db"] = DK_DB;
6776 DirectiveKindMap["dd"] = DK_DD;
6777 DirectiveKindMap["df"] = DK_DF;
6778 DirectiveKindMap["dq"] = DK_DQ;
6779 DirectiveKindMap["dw"] = DK_DW;
6780 DirectiveKindMap["echo"] = DK_ECHO;
6781 DirectiveKindMap["struc"] = DK_STRUCT;
6782 DirectiveKindMap["struct"] = DK_STRUCT;
6783 DirectiveKindMap["union"] = DK_UNION;
6784 DirectiveKindMap["ends"] = DK_ENDS;
6787 bool MasmParser::isMacroLikeDirective() {
6788 if (getLexer().is(AsmToken::Identifier)) {
6789 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6790 .CasesLower("repeat", "rept", true)
6791 .CaseLower("while", true)
6792 .CasesLower("for", "irp", true)
6793 .CasesLower("forc", "irpc", true)
6794 .Default(false);
6795 if (IsMacroLike)
6796 return true;
6798 if (peekTok().is(AsmToken::Identifier) &&
6799 peekTok().getIdentifier().equals_insensitive("macro"))
6800 return true;
6802 return false;
6805 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6806 AsmToken EndToken, StartToken = getTok();
6808 unsigned NestLevel = 0;
6809 while (true) {
6810 // Check whether we have reached the end of the file.
6811 if (getLexer().is(AsmToken::Eof)) {
6812 printError(DirectiveLoc, "no matching 'endm' in definition");
6813 return nullptr;
6816 if (isMacroLikeDirective())
6817 ++NestLevel;
6819 // Otherwise, check whether we have reached the endm.
6820 if (Lexer.is(AsmToken::Identifier) &&
6821 getTok().getIdentifier().equals_insensitive("endm")) {
6822 if (NestLevel == 0) {
6823 EndToken = getTok();
6824 Lex();
6825 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6826 printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6827 return nullptr;
6829 break;
6831 --NestLevel;
6834 // Otherwise, scan till the end of the statement.
6835 eatToEndOfStatement();
6838 const char *BodyStart = StartToken.getLoc().getPointer();
6839 const char *BodyEnd = EndToken.getLoc().getPointer();
6840 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6842 // We Are Anonymous.
6843 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6844 return &MacroLikeBodies.back();
6847 bool MasmParser::expandStatement(SMLoc Loc) {
6848 std::string Body = parseStringTo(AsmToken::EndOfStatement);
6849 SMLoc EndLoc = getTok().getLoc();
6851 MCAsmMacroParameters Parameters;
6852 MCAsmMacroArguments Arguments;
6854 StringMap<std::string> BuiltinValues;
6855 for (const auto &S : BuiltinSymbolMap) {
6856 const BuiltinSymbol &Sym = S.getValue();
6857 if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
6858 BuiltinValues[S.getKey().lower()] = std::move(*Text);
6861 for (const auto &B : BuiltinValues) {
6862 MCAsmMacroParameter P;
6863 MCAsmMacroArgument A;
6864 P.Name = B.getKey();
6865 P.Required = true;
6866 A.push_back(AsmToken(AsmToken::String, B.getValue()));
6868 Parameters.push_back(std::move(P));
6869 Arguments.push_back(std::move(A));
6872 for (const auto &V : Variables) {
6873 const Variable &Var = V.getValue();
6874 if (Var.IsText) {
6875 MCAsmMacroParameter P;
6876 MCAsmMacroArgument A;
6877 P.Name = Var.Name;
6878 P.Required = true;
6879 A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6881 Parameters.push_back(std::move(P));
6882 Arguments.push_back(std::move(A));
6885 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6886 MCAsmMacro M = MacroLikeBodies.back();
6888 // Expand the statement in a new buffer.
6889 SmallString<80> Buf;
6890 raw_svector_ostream OS(Buf);
6891 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6892 return true;
6893 std::unique_ptr<MemoryBuffer> Expansion =
6894 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6896 // Jump to the expanded statement and prime the lexer.
6897 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6898 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6899 EndStatementAtEOFStack.push_back(false);
6900 Lex();
6901 return false;
6904 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6905 raw_svector_ostream &OS) {
6906 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6908 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6909 SMLoc ExitLoc,
6910 raw_svector_ostream &OS) {
6911 OS << "endm\n";
6913 std::unique_ptr<MemoryBuffer> Instantiation =
6914 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6916 // Create the macro instantiation object and add to the current macro
6917 // instantiation stack.
6918 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6919 ExitLoc, TheCondStack.size()};
6920 ActiveMacros.push_back(MI);
6922 // Jump to the macro instantiation and prime the lexer.
6923 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6924 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6925 EndStatementAtEOFStack.push_back(true);
6926 Lex();
6929 /// parseDirectiveRepeat
6930 /// ::= ("repeat" | "rept") count
6931 /// body
6932 /// endm
6933 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6934 const MCExpr *CountExpr;
6935 SMLoc CountLoc = getTok().getLoc();
6936 if (parseExpression(CountExpr))
6937 return true;
6939 int64_t Count;
6940 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6941 return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6944 if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
6945 return true;
6947 // Lex the repeat definition.
6948 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6949 if (!M)
6950 return true;
6952 // Macro instantiation is lexical, unfortunately. We construct a new buffer
6953 // to hold the macro body with substitutions.
6954 SmallString<256> Buf;
6955 raw_svector_ostream OS(Buf);
6956 while (Count--) {
6957 if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc()))
6958 return true;
6960 instantiateMacroLikeBody(M, DirectiveLoc, OS);
6962 return false;
6965 /// parseDirectiveWhile
6966 /// ::= "while" expression
6967 /// body
6968 /// endm
6969 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6970 const MCExpr *CondExpr;
6971 SMLoc CondLoc = getTok().getLoc();
6972 if (parseExpression(CondExpr))
6973 return true;
6975 // Lex the repeat definition.
6976 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6977 if (!M)
6978 return true;
6980 // Macro instantiation is lexical, unfortunately. We construct a new buffer
6981 // to hold the macro body with substitutions.
6982 SmallString<256> Buf;
6983 raw_svector_ostream OS(Buf);
6984 int64_t Condition;
6985 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6986 return Error(CondLoc, "expected absolute expression in 'while' directive");
6987 if (Condition) {
6988 // Instantiate the macro, then resume at this directive to recheck the
6989 // condition.
6990 if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc()))
6991 return true;
6992 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6995 return false;
6998 /// parseDirectiveFor
6999 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
7000 /// body
7001 /// endm
7002 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
7003 MCAsmMacroParameter Parameter;
7004 MCAsmMacroArguments A;
7005 if (check(parseIdentifier(Parameter.Name),
7006 "expected identifier in '" + Dir + "' directive"))
7007 return true;
7009 // Parse optional qualifier (default value, or "req")
7010 if (parseOptionalToken(AsmToken::Colon)) {
7011 if (parseOptionalToken(AsmToken::Equal)) {
7012 // Default value
7013 SMLoc ParamLoc;
7015 ParamLoc = Lexer.getLoc();
7016 if (parseMacroArgument(nullptr, Parameter.Value))
7017 return true;
7018 } else {
7019 SMLoc QualLoc;
7020 StringRef Qualifier;
7022 QualLoc = Lexer.getLoc();
7023 if (parseIdentifier(Qualifier))
7024 return Error(QualLoc, "missing parameter qualifier for "
7025 "'" +
7026 Parameter.Name + "' in '" + Dir +
7027 "' directive");
7029 if (Qualifier.equals_insensitive("req"))
7030 Parameter.Required = true;
7031 else
7032 return Error(QualLoc,
7033 Qualifier + " is not a valid parameter qualifier for '" +
7034 Parameter.Name + "' in '" + Dir + "' directive");
7038 if (parseToken(AsmToken::Comma,
7039 "expected comma in '" + Dir + "' directive") ||
7040 parseToken(AsmToken::Less,
7041 "values in '" + Dir +
7042 "' directive must be enclosed in angle brackets"))
7043 return true;
7045 while (true) {
7046 A.emplace_back();
7047 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
7048 return addErrorSuffix(" in arguments for '" + Dir + "' directive");
7050 // If we see a comma, continue, and allow line continuation.
7051 if (!parseOptionalToken(AsmToken::Comma))
7052 break;
7053 parseOptionalToken(AsmToken::EndOfStatement);
7056 if (parseToken(AsmToken::Greater,
7057 "values in '" + Dir +
7058 "' directive must be enclosed in angle brackets") ||
7059 parseEOL())
7060 return true;
7062 // Lex the for definition.
7063 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7064 if (!M)
7065 return true;
7067 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7068 // to hold the macro body with substitutions.
7069 SmallString<256> Buf;
7070 raw_svector_ostream OS(Buf);
7072 for (const MCAsmMacroArgument &Arg : A) {
7073 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7074 return true;
7077 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7079 return false;
7082 /// parseDirectiveForc
7083 /// ::= ("forc" | "irpc") symbol, <string>
7084 /// body
7085 /// endm
7086 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
7087 MCAsmMacroParameter Parameter;
7089 std::string Argument;
7090 if (check(parseIdentifier(Parameter.Name),
7091 "expected identifier in '" + Directive + "' directive") ||
7092 parseToken(AsmToken::Comma,
7093 "expected comma in '" + Directive + "' directive"))
7094 return true;
7095 if (parseAngleBracketString(Argument)) {
7096 // Match ml64.exe; treat all characters to end of statement as a string,
7097 // ignoring comment markers, then discard anything following a space (using
7098 // the C locale).
7099 Argument = parseStringTo(AsmToken::EndOfStatement);
7100 if (getTok().is(AsmToken::EndOfStatement))
7101 Argument += getTok().getString();
7102 size_t End = 0;
7103 for (; End < Argument.size(); ++End) {
7104 if (isSpace(Argument[End]))
7105 break;
7107 Argument.resize(End);
7109 if (parseEOL())
7110 return true;
7112 // Lex the irpc definition.
7113 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7114 if (!M)
7115 return true;
7117 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7118 // to hold the macro body with substitutions.
7119 SmallString<256> Buf;
7120 raw_svector_ostream OS(Buf);
7122 StringRef Values(Argument);
7123 for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
7124 MCAsmMacroArgument Arg;
7125 Arg.emplace_back(AsmToken::Identifier, Values.substr(I, 1));
7127 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7128 return true;
7131 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7133 return false;
7136 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
7137 size_t Len) {
7138 const MCExpr *Value;
7139 SMLoc ExprLoc = getLexer().getLoc();
7140 if (parseExpression(Value))
7141 return true;
7142 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7143 if (!MCE)
7144 return Error(ExprLoc, "unexpected expression in _emit");
7145 uint64_t IntValue = MCE->getValue();
7146 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
7147 return Error(ExprLoc, "literal value out of range for directive");
7149 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
7150 return false;
7153 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
7154 const MCExpr *Value;
7155 SMLoc ExprLoc = getLexer().getLoc();
7156 if (parseExpression(Value))
7157 return true;
7158 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7159 if (!MCE)
7160 return Error(ExprLoc, "unexpected expression in align");
7161 uint64_t IntValue = MCE->getValue();
7162 if (!isPowerOf2_64(IntValue))
7163 return Error(ExprLoc, "literal value not a power of two greater then zero");
7165 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
7166 return false;
7169 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
7170 const SMLoc Loc = getLexer().getLoc();
7171 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
7172 StringRef RadixString = StringRef(RadixStringRaw).trim();
7173 unsigned Radix;
7174 if (RadixString.getAsInteger(10, Radix)) {
7175 return Error(Loc,
7176 "radix must be a decimal number in the range 2 to 16; was " +
7177 RadixString);
7179 if (Radix < 2 || Radix > 16)
7180 return Error(Loc, "radix must be in the range 2 to 16; was " +
7181 std::to_string(Radix));
7182 getLexer().setMasmDefaultRadix(Radix);
7183 return false;
7186 /// parseDirectiveEcho
7187 /// ::= "echo" message
7188 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
7189 std::string Message = parseStringTo(AsmToken::EndOfStatement);
7190 llvm::outs() << Message;
7191 if (!StringRef(Message).ends_with("\n"))
7192 llvm::outs() << '\n';
7193 return false;
7196 // We are comparing pointers, but the pointers are relative to a single string.
7197 // Thus, this should always be deterministic.
7198 static int rewritesSort(const AsmRewrite *AsmRewriteA,
7199 const AsmRewrite *AsmRewriteB) {
7200 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
7201 return -1;
7202 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
7203 return 1;
7205 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
7206 // rewrite to the same location. Make sure the SizeDirective rewrite is
7207 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This
7208 // ensures the sort algorithm is stable.
7209 if (AsmRewritePrecedence[AsmRewriteA->Kind] >
7210 AsmRewritePrecedence[AsmRewriteB->Kind])
7211 return -1;
7213 if (AsmRewritePrecedence[AsmRewriteA->Kind] <
7214 AsmRewritePrecedence[AsmRewriteB->Kind])
7215 return 1;
7216 llvm_unreachable("Unstable rewrite sort.");
7219 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
7220 Variable &Var = Variables[Name.lower()];
7221 if (Var.Name.empty()) {
7222 Var.Name = Name;
7223 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
7224 return Error(SMLoc(), "invalid variable redefinition");
7225 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
7226 Warning(SMLoc(), "redefining '" + Name +
7227 "', already defined on the command line")) {
7228 return true;
7230 Var.Redefinable = Variable::WARN_ON_REDEFINITION;
7231 Var.IsText = true;
7232 Var.TextValue = Value.str();
7233 return false;
7236 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
7237 const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
7238 const StringRef Base = BaseMember.first, Member = BaseMember.second;
7239 return lookUpField(Base, Member, Info);
7242 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
7243 AsmFieldInfo &Info) const {
7244 if (Base.empty())
7245 return true;
7247 AsmFieldInfo BaseInfo;
7248 if (Base.contains('.') && !lookUpField(Base, BaseInfo))
7249 Base = BaseInfo.Type.Name;
7251 auto StructIt = Structs.find(Base.lower());
7252 auto TypeIt = KnownType.find(Base.lower());
7253 if (TypeIt != KnownType.end()) {
7254 StructIt = Structs.find(TypeIt->second.Name.lower());
7256 if (StructIt != Structs.end())
7257 return lookUpField(StructIt->second, Member, Info);
7259 return true;
7262 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
7263 AsmFieldInfo &Info) const {
7264 if (Member.empty()) {
7265 Info.Type.Name = Structure.Name;
7266 Info.Type.Size = Structure.Size;
7267 Info.Type.ElementSize = Structure.Size;
7268 Info.Type.Length = 1;
7269 return false;
7272 std::pair<StringRef, StringRef> Split = Member.split('.');
7273 const StringRef FieldName = Split.first, FieldMember = Split.second;
7275 auto StructIt = Structs.find(FieldName.lower());
7276 if (StructIt != Structs.end())
7277 return lookUpField(StructIt->second, FieldMember, Info);
7279 auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
7280 if (FieldIt == Structure.FieldsByName.end())
7281 return true;
7283 const FieldInfo &Field = Structure.Fields[FieldIt->second];
7284 if (FieldMember.empty()) {
7285 Info.Offset += Field.Offset;
7286 Info.Type.Size = Field.SizeOf;
7287 Info.Type.ElementSize = Field.Type;
7288 Info.Type.Length = Field.LengthOf;
7289 if (Field.Contents.FT == FT_STRUCT)
7290 Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
7291 else
7292 Info.Type.Name = "";
7293 return false;
7296 if (Field.Contents.FT != FT_STRUCT)
7297 return true;
7298 const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7300 if (lookUpField(StructInfo.Structure, FieldMember, Info))
7301 return true;
7303 Info.Offset += Field.Offset;
7304 return false;
7307 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7308 unsigned Size = StringSwitch<unsigned>(Name)
7309 .CasesLower("byte", "db", "sbyte", 1)
7310 .CasesLower("word", "dw", "sword", 2)
7311 .CasesLower("dword", "dd", "sdword", 4)
7312 .CasesLower("fword", "df", 6)
7313 .CasesLower("qword", "dq", "sqword", 8)
7314 .CaseLower("real4", 4)
7315 .CaseLower("real8", 8)
7316 .CaseLower("real10", 10)
7317 .Default(0);
7318 if (Size) {
7319 Info.Name = Name;
7320 Info.ElementSize = Size;
7321 Info.Length = 1;
7322 Info.Size = Size;
7323 return false;
7326 auto StructIt = Structs.find(Name.lower());
7327 if (StructIt != Structs.end()) {
7328 const StructInfo &Structure = StructIt->second;
7329 Info.Name = Name;
7330 Info.ElementSize = Structure.Size;
7331 Info.Length = 1;
7332 Info.Size = Structure.Size;
7333 return false;
7336 return true;
7339 bool MasmParser::parseMSInlineAsm(
7340 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
7341 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7342 SmallVectorImpl<std::string> &Constraints,
7343 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7344 MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7345 SmallVector<void *, 4> InputDecls;
7346 SmallVector<void *, 4> OutputDecls;
7347 SmallVector<bool, 4> InputDeclsAddressOf;
7348 SmallVector<bool, 4> OutputDeclsAddressOf;
7349 SmallVector<std::string, 4> InputConstraints;
7350 SmallVector<std::string, 4> OutputConstraints;
7351 SmallVector<MCRegister, 4> ClobberRegs;
7353 SmallVector<AsmRewrite, 4> AsmStrRewrites;
7355 // Prime the lexer.
7356 Lex();
7358 // While we have input, parse each statement.
7359 unsigned InputIdx = 0;
7360 unsigned OutputIdx = 0;
7361 while (getLexer().isNot(AsmToken::Eof)) {
7362 // Parse curly braces marking block start/end.
7363 if (parseCurlyBlockScope(AsmStrRewrites))
7364 continue;
7366 ParseStatementInfo Info(&AsmStrRewrites);
7367 bool StatementErr = parseStatement(Info, &SI);
7369 if (StatementErr || Info.ParseError) {
7370 // Emit pending errors if any exist.
7371 printPendingErrors();
7372 return true;
7375 // No pending error should exist here.
7376 assert(!hasPendingError() && "unexpected error from parseStatement");
7378 if (Info.Opcode == ~0U)
7379 continue;
7381 const MCInstrDesc &Desc = MII->get(Info.Opcode);
7383 // Build the list of clobbers, outputs and inputs.
7384 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7385 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7387 // Register operand.
7388 if (Operand.isReg() && !Operand.needAddressOf() &&
7389 !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) {
7390 unsigned NumDefs = Desc.getNumDefs();
7391 // Clobber.
7392 if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7393 ClobberRegs.push_back(Operand.getReg());
7394 continue;
7397 // Expr/Input or Output.
7398 StringRef SymName = Operand.getSymName();
7399 if (SymName.empty())
7400 continue;
7402 void *OpDecl = Operand.getOpDecl();
7403 if (!OpDecl)
7404 continue;
7406 StringRef Constraint = Operand.getConstraint();
7407 if (Operand.isImm()) {
7408 // Offset as immediate.
7409 if (Operand.isOffsetOfLocal())
7410 Constraint = "r";
7411 else
7412 Constraint = "i";
7415 bool isOutput = (i == 1) && Desc.mayStore();
7416 SMLoc Start = SMLoc::getFromPointer(SymName.data());
7417 if (isOutput) {
7418 ++InputIdx;
7419 OutputDecls.push_back(OpDecl);
7420 OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7421 OutputConstraints.push_back(("=" + Constraint).str());
7422 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7423 } else {
7424 InputDecls.push_back(OpDecl);
7425 InputDeclsAddressOf.push_back(Operand.needAddressOf());
7426 InputConstraints.push_back(Constraint.str());
7427 if (Desc.operands()[i - 1].isBranchTarget())
7428 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7429 else
7430 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7434 // Consider implicit defs to be clobbers. Think of cpuid and push.
7435 llvm::append_range(ClobberRegs, Desc.implicit_defs());
7438 // Set the number of Outputs and Inputs.
7439 NumOutputs = OutputDecls.size();
7440 NumInputs = InputDecls.size();
7442 // Set the unique clobbers.
7443 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7444 ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end());
7445 Clobbers.assign(ClobberRegs.size(), std::string());
7446 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7447 raw_string_ostream OS(Clobbers[I]);
7448 IP->printRegName(OS, ClobberRegs[I]);
7451 // Merge the various outputs and inputs. Output are expected first.
7452 if (NumOutputs || NumInputs) {
7453 unsigned NumExprs = NumOutputs + NumInputs;
7454 OpDecls.resize(NumExprs);
7455 Constraints.resize(NumExprs);
7456 for (unsigned i = 0; i < NumOutputs; ++i) {
7457 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7458 Constraints[i] = OutputConstraints[i];
7460 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7461 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7462 Constraints[j] = InputConstraints[i];
7466 // Build the IR assembly string.
7467 std::string AsmStringIR;
7468 raw_string_ostream OS(AsmStringIR);
7469 StringRef ASMString =
7470 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7471 const char *AsmStart = ASMString.begin();
7472 const char *AsmEnd = ASMString.end();
7473 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7474 for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
7475 const AsmRewrite &AR = *I;
7476 // Check if this has already been covered by another rewrite...
7477 if (AR.Done)
7478 continue;
7479 AsmRewriteKind Kind = AR.Kind;
7481 const char *Loc = AR.Loc.getPointer();
7482 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7484 // Emit everything up to the immediate/expression.
7485 if (unsigned Len = Loc - AsmStart)
7486 OS << StringRef(AsmStart, Len);
7488 // Skip the original expression.
7489 if (Kind == AOK_Skip) {
7490 AsmStart = Loc + AR.Len;
7491 continue;
7494 unsigned AdditionalSkip = 0;
7495 // Rewrite expressions in $N notation.
7496 switch (Kind) {
7497 default:
7498 break;
7499 case AOK_IntelExpr:
7500 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7501 if (AR.IntelExp.NeedBracs)
7502 OS << "[";
7503 if (AR.IntelExp.hasBaseReg())
7504 OS << AR.IntelExp.BaseReg;
7505 if (AR.IntelExp.hasIndexReg())
7506 OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7507 << AR.IntelExp.IndexReg;
7508 if (AR.IntelExp.Scale > 1)
7509 OS << " * $$" << AR.IntelExp.Scale;
7510 if (AR.IntelExp.hasOffset()) {
7511 if (AR.IntelExp.hasRegs())
7512 OS << " + ";
7513 // Fuse this rewrite with a rewrite of the offset name, if present.
7514 StringRef OffsetName = AR.IntelExp.OffsetName;
7515 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7516 size_t OffsetLen = OffsetName.size();
7517 auto rewrite_it = std::find_if(
7518 I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7519 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7520 (FusingAR.Kind == AOK_Input ||
7521 FusingAR.Kind == AOK_CallInput);
7523 if (rewrite_it == AsmStrRewrites.end()) {
7524 OS << "offset " << OffsetName;
7525 } else if (rewrite_it->Kind == AOK_CallInput) {
7526 OS << "${" << InputIdx++ << ":P}";
7527 rewrite_it->Done = true;
7528 } else {
7529 OS << '$' << InputIdx++;
7530 rewrite_it->Done = true;
7533 if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7534 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7535 if (AR.IntelExp.NeedBracs)
7536 OS << "]";
7537 break;
7538 case AOK_Label:
7539 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7540 break;
7541 case AOK_Input:
7542 OS << '$' << InputIdx++;
7543 break;
7544 case AOK_CallInput:
7545 OS << "${" << InputIdx++ << ":P}";
7546 break;
7547 case AOK_Output:
7548 OS << '$' << OutputIdx++;
7549 break;
7550 case AOK_SizeDirective:
7551 switch (AR.Val) {
7552 default: break;
7553 case 8: OS << "byte ptr "; break;
7554 case 16: OS << "word ptr "; break;
7555 case 32: OS << "dword ptr "; break;
7556 case 64: OS << "qword ptr "; break;
7557 case 80: OS << "xword ptr "; break;
7558 case 128: OS << "xmmword ptr "; break;
7559 case 256: OS << "ymmword ptr "; break;
7561 break;
7562 case AOK_Emit:
7563 OS << ".byte";
7564 break;
7565 case AOK_Align: {
7566 // MS alignment directives are measured in bytes. If the native assembler
7567 // measures alignment in bytes, we can pass it straight through.
7568 OS << ".align";
7569 if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7570 break;
7572 // Alignment is in log2 form, so print that instead and skip the original
7573 // immediate.
7574 unsigned Val = AR.Val;
7575 OS << ' ' << Val;
7576 assert(Val < 10 && "Expected alignment less then 2^10.");
7577 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7578 break;
7580 case AOK_EVEN:
7581 OS << ".even";
7582 break;
7583 case AOK_EndOfStatement:
7584 OS << "\n\t";
7585 break;
7588 // Skip the original expression.
7589 AsmStart = Loc + AR.Len + AdditionalSkip;
7592 // Emit the remainder of the asm string.
7593 if (AsmStart != AsmEnd)
7594 OS << StringRef(AsmStart, AsmEnd - AsmStart);
7596 AsmString = OS.str();
7597 return false;
7600 void MasmParser::initializeBuiltinSymbolMap() {
7601 // Numeric built-ins (supported in all versions)
7602 BuiltinSymbolMap["@version"] = BI_VERSION;
7603 BuiltinSymbolMap["@line"] = BI_LINE;
7605 // Text built-ins (supported in all versions)
7606 BuiltinSymbolMap["@date"] = BI_DATE;
7607 BuiltinSymbolMap["@time"] = BI_TIME;
7608 BuiltinSymbolMap["@filecur"] = BI_FILECUR;
7609 BuiltinSymbolMap["@filename"] = BI_FILENAME;
7610 BuiltinSymbolMap["@curseg"] = BI_CURSEG;
7612 // Some built-ins exist only for MASM32 (32-bit x86)
7613 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
7614 Triple::x86) {
7615 // Numeric built-ins
7616 // BuiltinSymbolMap["@cpu"] = BI_CPU;
7617 // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
7618 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
7619 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
7620 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
7621 // BuiltinSymbolMap["@model"] = BI_MODEL;
7623 // Text built-ins
7624 // BuiltinSymbolMap["@code"] = BI_CODE;
7625 // BuiltinSymbolMap["@data"] = BI_DATA;
7626 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
7627 // BuiltinSymbolMap["@stack"] = BI_STACK;
7631 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
7632 SMLoc StartLoc) {
7633 switch (Symbol) {
7634 default:
7635 return nullptr;
7636 case BI_VERSION:
7637 // Match a recent version of ML.EXE.
7638 return MCConstantExpr::create(1427, getContext());
7639 case BI_LINE: {
7640 int64_t Line;
7641 if (ActiveMacros.empty())
7642 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
7643 else
7644 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
7645 ActiveMacros.front()->ExitBuffer);
7646 return MCConstantExpr::create(Line, getContext());
7649 llvm_unreachable("unhandled built-in symbol");
7652 std::optional<std::string>
7653 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
7654 switch (Symbol) {
7655 default:
7656 return {};
7657 case BI_DATE: {
7658 // Current local date, formatted MM/DD/YY
7659 char TmpBuffer[sizeof("mm/dd/yy")];
7660 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
7661 return std::string(TmpBuffer, Len);
7663 case BI_TIME: {
7664 // Current local time, formatted HH:MM:SS (24-hour clock)
7665 char TmpBuffer[sizeof("hh:mm:ss")];
7666 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
7667 return std::string(TmpBuffer, Len);
7669 case BI_FILECUR:
7670 return SrcMgr
7671 .getMemoryBuffer(
7672 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
7673 ->getBufferIdentifier()
7674 .str();
7675 case BI_FILENAME:
7676 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
7677 ->getBufferIdentifier())
7678 .upper();
7679 case BI_CURSEG:
7680 return getStreamer().getCurrentSectionOnly()->getName().str();
7682 llvm_unreachable("unhandled built-in symbol");
7685 /// Create an MCAsmParser instance.
7686 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7687 MCStreamer &Out, const MCAsmInfo &MAI,
7688 struct tm TM, unsigned CB) {
7689 return new MasmParser(SM, C, Out, MAI, TM, CB);