[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / MC / MCParser / MasmParser.cpp
blob51563ea86a6c5246bebbd9e0cded795f53135ce2
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/Dwarf.h"
26 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeView.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDirectives.h"
31 #include "llvm/MC/MCDwarf.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCInstPrinter.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrInfo.h"
36 #include "llvm/MC/MCParser/AsmCond.h"
37 #include "llvm/MC/MCParser/AsmLexer.h"
38 #include "llvm/MC/MCParser/MCAsmLexer.h"
39 #include "llvm/MC/MCParser/MCAsmParser.h"
40 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
41 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
42 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
43 #include "llvm/MC/MCRegisterInfo.h"
44 #include "llvm/MC/MCSection.h"
45 #include "llvm/MC/MCStreamer.h"
46 #include "llvm/MC/MCSubtargetInfo.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/MC/MCTargetOptions.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/Format.h"
53 #include "llvm/Support/MD5.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/Path.h"
57 #include "llvm/Support/SMLoc.h"
58 #include "llvm/Support/SourceMgr.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <climits>
63 #include <cstddef>
64 #include <cstdint>
65 #include <ctime>
66 #include <deque>
67 #include <memory>
68 #include <optional>
69 #include <sstream>
70 #include <string>
71 #include <tuple>
72 #include <utility>
73 #include <vector>
75 using namespace llvm;
77 namespace {
79 /// Helper types for tracking macro definitions.
80 typedef std::vector<AsmToken> MCAsmMacroArgument;
81 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
83 /// Helper class for storing information about an active macro instantiation.
84 struct MacroInstantiation {
85 /// The location of the instantiation.
86 SMLoc InstantiationLoc;
88 /// The buffer where parsing should resume upon instantiation completion.
89 unsigned ExitBuffer;
91 /// The location where parsing should resume upon instantiation completion.
92 SMLoc ExitLoc;
94 /// The depth of TheCondStack at the start of the instantiation.
95 size_t CondStackDepth;
98 struct ParseStatementInfo {
99 /// The parsed operands from the last parsed statement.
100 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
102 /// The opcode from the last parsed instruction.
103 unsigned Opcode = ~0U;
105 /// Was there an error parsing the inline assembly?
106 bool ParseError = false;
108 /// The value associated with a macro exit.
109 std::optional<std::string> ExitValue;
111 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
113 ParseStatementInfo() = delete;
114 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115 : AsmRewrites(rewrites) {}
118 enum FieldType {
119 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120 FT_REAL, // Initializer: real number, stored as an APInt.
121 FT_STRUCT // Initializer: struct initializer, stored recursively.
124 struct FieldInfo;
125 struct StructInfo {
126 StringRef Name;
127 bool IsUnion = false;
128 bool Initializable = true;
129 unsigned Alignment = 0;
130 unsigned AlignmentSize = 0;
131 unsigned NextOffset = 0;
132 unsigned Size = 0;
133 std::vector<FieldInfo> Fields;
134 StringMap<size_t> FieldsByName;
136 FieldInfo &addField(StringRef FieldName, FieldType FT,
137 unsigned FieldAlignmentSize);
139 StructInfo() = default;
140 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
143 // FIXME: This should probably use a class hierarchy, raw pointers between the
144 // objects, and dynamic type resolution instead of a union. On the other hand,
145 // ownership then becomes much more complicated; the obvious thing would be to
146 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
148 struct StructInitializer;
149 struct IntFieldInfo {
150 SmallVector<const MCExpr *, 1> Values;
152 IntFieldInfo() = default;
153 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
154 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
156 struct RealFieldInfo {
157 SmallVector<APInt, 1> AsIntValues;
159 RealFieldInfo() = default;
160 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
161 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
163 struct StructFieldInfo {
164 std::vector<StructInitializer> Initializers;
165 StructInfo Structure;
167 StructFieldInfo() = default;
168 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
171 class FieldInitializer {
172 public:
173 FieldType FT;
174 union {
175 IntFieldInfo IntInfo;
176 RealFieldInfo RealInfo;
177 StructFieldInfo StructInfo;
180 ~FieldInitializer();
181 FieldInitializer(FieldType FT);
183 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185 FieldInitializer(std::vector<StructInitializer> &&Initializers,
186 struct StructInfo Structure);
188 FieldInitializer(const FieldInitializer &Initializer);
189 FieldInitializer(FieldInitializer &&Initializer);
191 FieldInitializer &operator=(const FieldInitializer &Initializer);
192 FieldInitializer &operator=(FieldInitializer &&Initializer);
195 struct StructInitializer {
196 std::vector<FieldInitializer> FieldInitializers;
199 struct FieldInfo {
200 // Offset of the field within the containing STRUCT.
201 unsigned Offset = 0;
203 // Total size of the field (= LengthOf * Type).
204 unsigned SizeOf = 0;
206 // Number of elements in the field (1 if scalar, >1 if an array).
207 unsigned LengthOf = 0;
209 // Size of a single entry in this field, in bytes ("type" in MASM standards).
210 unsigned Type = 0;
212 FieldInitializer Contents;
214 FieldInfo(FieldType FT) : Contents(FT) {}
217 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218 StructInfo S) {
219 Initializers = std::move(V);
220 Structure = S;
223 StructInfo::StructInfo(StringRef StructName, bool Union,
224 unsigned AlignmentValue)
225 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
227 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228 unsigned FieldAlignmentSize) {
229 if (!FieldName.empty())
230 FieldsByName[FieldName.lower()] = Fields.size();
231 Fields.emplace_back(FT);
232 FieldInfo &Field = Fields.back();
233 Field.Offset =
234 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235 if (!IsUnion) {
236 NextOffset = std::max(NextOffset, Field.Offset);
238 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239 return Field;
242 FieldInitializer::~FieldInitializer() {
243 switch (FT) {
244 case FT_INTEGRAL:
245 IntInfo.~IntFieldInfo();
246 break;
247 case FT_REAL:
248 RealInfo.~RealFieldInfo();
249 break;
250 case FT_STRUCT:
251 StructInfo.~StructFieldInfo();
252 break;
256 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257 switch (FT) {
258 case FT_INTEGRAL:
259 new (&IntInfo) IntFieldInfo();
260 break;
261 case FT_REAL:
262 new (&RealInfo) RealFieldInfo();
263 break;
264 case FT_STRUCT:
265 new (&StructInfo) StructFieldInfo();
266 break;
270 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271 : FT(FT_INTEGRAL) {
272 new (&IntInfo) IntFieldInfo(std::move(Values));
275 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276 : FT(FT_REAL) {
277 new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
280 FieldInitializer::FieldInitializer(
281 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282 : FT(FT_STRUCT) {
283 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
286 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287 : FT(Initializer.FT) {
288 switch (FT) {
289 case FT_INTEGRAL:
290 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291 break;
292 case FT_REAL:
293 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294 break;
295 case FT_STRUCT:
296 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297 break;
301 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302 : FT(Initializer.FT) {
303 switch (FT) {
304 case FT_INTEGRAL:
305 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306 break;
307 case FT_REAL:
308 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309 break;
310 case FT_STRUCT:
311 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312 break;
316 FieldInitializer &
317 FieldInitializer::operator=(const FieldInitializer &Initializer) {
318 if (FT != Initializer.FT) {
319 switch (FT) {
320 case FT_INTEGRAL:
321 IntInfo.~IntFieldInfo();
322 break;
323 case FT_REAL:
324 RealInfo.~RealFieldInfo();
325 break;
326 case FT_STRUCT:
327 StructInfo.~StructFieldInfo();
328 break;
331 FT = Initializer.FT;
332 switch (FT) {
333 case FT_INTEGRAL:
334 IntInfo = Initializer.IntInfo;
335 break;
336 case FT_REAL:
337 RealInfo = Initializer.RealInfo;
338 break;
339 case FT_STRUCT:
340 StructInfo = Initializer.StructInfo;
341 break;
343 return *this;
346 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347 if (FT != Initializer.FT) {
348 switch (FT) {
349 case FT_INTEGRAL:
350 IntInfo.~IntFieldInfo();
351 break;
352 case FT_REAL:
353 RealInfo.~RealFieldInfo();
354 break;
355 case FT_STRUCT:
356 StructInfo.~StructFieldInfo();
357 break;
360 FT = Initializer.FT;
361 switch (FT) {
362 case FT_INTEGRAL:
363 IntInfo = Initializer.IntInfo;
364 break;
365 case FT_REAL:
366 RealInfo = Initializer.RealInfo;
367 break;
368 case FT_STRUCT:
369 StructInfo = Initializer.StructInfo;
370 break;
372 return *this;
375 /// The concrete assembly parser instance.
376 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378 class MasmParser : public MCAsmParser {
379 private:
380 AsmLexer Lexer;
381 MCContext &Ctx;
382 MCStreamer &Out;
383 const MCAsmInfo &MAI;
384 SourceMgr &SrcMgr;
385 SourceMgr::DiagHandlerTy SavedDiagHandler;
386 void *SavedDiagContext;
387 std::unique_ptr<MCAsmParserExtension> PlatformParser;
389 /// This is the current buffer index we're lexing from as managed by the
390 /// SourceMgr object.
391 unsigned CurBuffer;
393 /// time of assembly
394 struct tm TM;
396 BitVector EndStatementAtEOFStack;
398 AsmCond TheCondState;
399 std::vector<AsmCond> TheCondStack;
401 /// maps directive names to handler methods in parser
402 /// extensions. Extensions register themselves in this map by calling
403 /// addDirectiveHandler.
404 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
406 /// maps assembly-time variable names to variables.
407 struct Variable {
408 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
410 StringRef Name;
411 RedefinableKind Redefinable = REDEFINABLE;
412 bool IsText = false;
413 std::string TextValue;
415 StringMap<Variable> Variables;
417 /// Stack of active struct definitions.
418 SmallVector<StructInfo, 1> StructInProgress;
420 /// Maps struct tags to struct definitions.
421 StringMap<StructInfo> Structs;
423 /// Maps data location names to types.
424 StringMap<AsmTypeInfo> KnownType;
426 /// Stack of active macro instantiations.
427 std::vector<MacroInstantiation*> ActiveMacros;
429 /// List of bodies of anonymous macros.
430 std::deque<MCAsmMacro> MacroLikeBodies;
432 /// Keeps track of how many .macro's have been instantiated.
433 unsigned NumOfMacroInstantiations;
435 /// The values from the last parsed cpp hash file line comment if any.
436 struct CppHashInfoTy {
437 StringRef Filename;
438 int64_t LineNumber;
439 SMLoc Loc;
440 unsigned Buf;
441 CppHashInfoTy() : LineNumber(0), Buf(0) {}
443 CppHashInfoTy CppHashInfo;
445 /// The filename from the first cpp hash file line comment, if any.
446 StringRef FirstCppHashFilename;
448 /// List of forward directional labels for diagnosis at the end.
449 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
451 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452 /// Defaults to 1U, meaning Intel.
453 unsigned AssemblerDialect = 1U;
455 /// is Darwin compatibility enabled?
456 bool IsDarwin = false;
458 /// Are we parsing ms-style inline assembly?
459 bool ParsingMSInlineAsm = false;
461 /// Did we already inform the user about inconsistent MD5 usage?
462 bool ReportedInconsistentMD5 = false;
464 // Current <...> expression depth.
465 unsigned AngleBracketDepth = 0U;
467 // Number of locals defined.
468 uint16_t LocalCounter = 0;
470 public:
471 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473 MasmParser(const MasmParser &) = delete;
474 MasmParser &operator=(const MasmParser &) = delete;
475 ~MasmParser() override;
477 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
479 void addDirectiveHandler(StringRef Directive,
480 ExtensionDirectiveHandler Handler) override {
481 ExtensionDirectiveMap[Directive] = Handler;
482 if (!DirectiveKindMap.contains(Directive)) {
483 DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
487 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
488 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
491 /// @name MCAsmParser Interface
492 /// {
494 SourceMgr &getSourceManager() override { return SrcMgr; }
495 MCAsmLexer &getLexer() override { return Lexer; }
496 MCContext &getContext() override { return Ctx; }
497 MCStreamer &getStreamer() override { return Out; }
499 CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
501 unsigned getAssemblerDialect() override {
502 if (AssemblerDialect == ~0U)
503 return MAI.getAssemblerDialect();
504 else
505 return AssemblerDialect;
507 void setAssemblerDialect(unsigned i) override {
508 AssemblerDialect = i;
511 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
512 bool Warning(SMLoc L, const Twine &Msg,
513 SMRange Range = std::nullopt) override;
514 bool printError(SMLoc L, const Twine &Msg,
515 SMRange Range = std::nullopt) override;
517 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
518 const AsmToken &Lex(ExpandKind ExpandNextToken);
519 const AsmToken &Lex() override { return Lex(ExpandMacros); }
521 void setParsingMSInlineAsm(bool V) override {
522 ParsingMSInlineAsm = V;
523 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
524 // hex integer literals.
525 Lexer.setLexMasmIntegers(V);
527 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
529 bool isParsingMasm() const override { return true; }
531 bool defineMacro(StringRef Name, StringRef Value) override;
533 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
534 bool lookUpField(StringRef Base, StringRef Member,
535 AsmFieldInfo &Info) const override;
537 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
539 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
540 unsigned &NumInputs,
541 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
542 SmallVectorImpl<std::string> &Constraints,
543 SmallVectorImpl<std::string> &Clobbers,
544 const MCInstrInfo *MII, const MCInstPrinter *IP,
545 MCAsmParserSemaCallback &SI) override;
547 bool parseExpression(const MCExpr *&Res);
548 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
549 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
550 AsmTypeInfo *TypeInfo) override;
551 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
552 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
553 SMLoc &EndLoc) override;
554 bool parseAbsoluteExpression(int64_t &Res) override;
556 /// Parse a floating point expression using the float \p Semantics
557 /// and set \p Res to the value.
558 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
560 /// Parse an identifier or string (as a quoted identifier)
561 /// and set \p Res to the identifier contents.
562 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
563 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
564 bool parseIdentifier(StringRef &Res) override {
565 return parseIdentifier(Res, StandardPosition);
567 void eatToEndOfStatement() override;
569 bool checkForValidSection() override;
571 /// }
573 private:
574 bool expandMacros();
575 const AsmToken peekTok(bool ShouldSkipSpace = true);
577 bool parseStatement(ParseStatementInfo &Info,
578 MCAsmParserSemaCallback *SI);
579 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
580 bool parseCppHashLineFilenameComment(SMLoc L);
582 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
583 ArrayRef<MCAsmMacroParameter> Parameters,
584 ArrayRef<MCAsmMacroArgument> A,
585 const std::vector<std::string> &Locals, SMLoc L);
587 /// Are we inside a macro instantiation?
588 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
590 /// Handle entry to macro instantiation.
592 /// \param M The macro.
593 /// \param NameLoc Instantiation location.
594 bool handleMacroEntry(
595 const MCAsmMacro *M, SMLoc NameLoc,
596 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
598 /// Handle invocation of macro function.
600 /// \param M The macro.
601 /// \param NameLoc Invocation location.
602 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
604 /// Handle exit from macro instantiation.
605 void handleMacroExit();
607 /// Extract AsmTokens for a macro argument.
608 bool
609 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
610 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
612 /// Parse all macro arguments for a given macro.
613 bool
614 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
615 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
617 void printMacroInstantiations();
619 bool expandStatement(SMLoc Loc);
621 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
622 SMRange Range = std::nullopt) const {
623 ArrayRef<SMRange> Ranges(Range);
624 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
626 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
628 bool lookUpField(const StructInfo &Structure, StringRef Member,
629 AsmFieldInfo &Info) const;
631 /// Should we emit DWARF describing this assembler source? (Returns false if
632 /// the source has .file directives, which means we don't want to generate
633 /// info describing the assembler source itself.)
634 bool enabledGenDwarfForAssembly();
636 /// Enter the specified file. This returns true on failure.
637 bool enterIncludeFile(const std::string &Filename);
639 /// Reset the current lexer position to that given by \p Loc. The
640 /// current token is not set; clients should ensure Lex() is called
641 /// subsequently.
643 /// \param InBuffer If not 0, should be the known buffer id that contains the
644 /// location.
645 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
646 bool EndStatementAtEOF = true);
648 /// Parse up to a token of kind \p EndTok and return the contents from the
649 /// current token up to (but not including) this token; the current token on
650 /// exit will be either this kind or EOF. Reads through instantiated macro
651 /// functions and text macros.
652 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
653 std::string parseStringTo(AsmToken::TokenKind EndTok);
655 /// Parse up to the end of statement and return the contents from the current
656 /// token until the end of the statement; the current token on exit will be
657 /// either the EndOfStatement or EOF.
658 StringRef parseStringToEndOfStatement() override;
660 bool parseTextItem(std::string &Data);
662 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
663 MCBinaryExpr::Opcode &Kind);
665 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
666 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
667 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
669 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
671 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
672 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
674 // Generic (target and platform independent) directive parsing.
675 enum DirectiveKind {
676 DK_NO_DIRECTIVE, // Placeholder
677 DK_HANDLER_DIRECTIVE,
678 DK_ASSIGN,
679 DK_EQU,
680 DK_TEXTEQU,
681 DK_ASCII,
682 DK_ASCIZ,
683 DK_STRING,
684 DK_BYTE,
685 DK_SBYTE,
686 DK_WORD,
687 DK_SWORD,
688 DK_DWORD,
689 DK_SDWORD,
690 DK_FWORD,
691 DK_QWORD,
692 DK_SQWORD,
693 DK_DB,
694 DK_DD,
695 DK_DF,
696 DK_DQ,
697 DK_DW,
698 DK_REAL4,
699 DK_REAL8,
700 DK_REAL10,
701 DK_ALIGN,
702 DK_EVEN,
703 DK_ORG,
704 DK_ENDR,
705 DK_EXTERN,
706 DK_PUBLIC,
707 DK_COMM,
708 DK_COMMENT,
709 DK_INCLUDE,
710 DK_REPEAT,
711 DK_WHILE,
712 DK_FOR,
713 DK_FORC,
714 DK_IF,
715 DK_IFE,
716 DK_IFB,
717 DK_IFNB,
718 DK_IFDEF,
719 DK_IFNDEF,
720 DK_IFDIF,
721 DK_IFDIFI,
722 DK_IFIDN,
723 DK_IFIDNI,
724 DK_ELSEIF,
725 DK_ELSEIFE,
726 DK_ELSEIFB,
727 DK_ELSEIFNB,
728 DK_ELSEIFDEF,
729 DK_ELSEIFNDEF,
730 DK_ELSEIFDIF,
731 DK_ELSEIFDIFI,
732 DK_ELSEIFIDN,
733 DK_ELSEIFIDNI,
734 DK_ELSE,
735 DK_ENDIF,
736 DK_FILE,
737 DK_LINE,
738 DK_LOC,
739 DK_STABS,
740 DK_CV_FILE,
741 DK_CV_FUNC_ID,
742 DK_CV_INLINE_SITE_ID,
743 DK_CV_LOC,
744 DK_CV_LINETABLE,
745 DK_CV_INLINE_LINETABLE,
746 DK_CV_DEF_RANGE,
747 DK_CV_STRINGTABLE,
748 DK_CV_STRING,
749 DK_CV_FILECHECKSUMS,
750 DK_CV_FILECHECKSUM_OFFSET,
751 DK_CV_FPO_DATA,
752 DK_CFI_SECTIONS,
753 DK_CFI_STARTPROC,
754 DK_CFI_ENDPROC,
755 DK_CFI_DEF_CFA,
756 DK_CFI_DEF_CFA_OFFSET,
757 DK_CFI_ADJUST_CFA_OFFSET,
758 DK_CFI_DEF_CFA_REGISTER,
759 DK_CFI_OFFSET,
760 DK_CFI_REL_OFFSET,
761 DK_CFI_PERSONALITY,
762 DK_CFI_LSDA,
763 DK_CFI_REMEMBER_STATE,
764 DK_CFI_RESTORE_STATE,
765 DK_CFI_SAME_VALUE,
766 DK_CFI_RESTORE,
767 DK_CFI_ESCAPE,
768 DK_CFI_RETURN_COLUMN,
769 DK_CFI_SIGNAL_FRAME,
770 DK_CFI_UNDEFINED,
771 DK_CFI_REGISTER,
772 DK_CFI_WINDOW_SAVE,
773 DK_CFI_B_KEY_FRAME,
774 DK_MACRO,
775 DK_EXITM,
776 DK_ENDM,
777 DK_PURGE,
778 DK_ERR,
779 DK_ERRB,
780 DK_ERRNB,
781 DK_ERRDEF,
782 DK_ERRNDEF,
783 DK_ERRDIF,
784 DK_ERRDIFI,
785 DK_ERRIDN,
786 DK_ERRIDNI,
787 DK_ERRE,
788 DK_ERRNZ,
789 DK_ECHO,
790 DK_STRUCT,
791 DK_UNION,
792 DK_ENDS,
793 DK_END,
794 DK_PUSHFRAME,
795 DK_PUSHREG,
796 DK_SAVEREG,
797 DK_SAVEXMM128,
798 DK_SETFRAME,
799 DK_RADIX,
802 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
803 /// class.
804 StringMap<DirectiveKind> DirectiveKindMap;
806 bool isMacroLikeDirective();
808 // Codeview def_range type parsing.
809 enum CVDefRangeType {
810 CVDR_DEFRANGE = 0, // Placeholder
811 CVDR_DEFRANGE_REGISTER,
812 CVDR_DEFRANGE_FRAMEPOINTER_REL,
813 CVDR_DEFRANGE_SUBFIELD_REGISTER,
814 CVDR_DEFRANGE_REGISTER_REL
817 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
818 /// def_range types parsed by this class.
819 StringMap<CVDefRangeType> CVDefRangeTypeMap;
821 // Generic (target and platform independent) directive parsing.
822 enum BuiltinSymbol {
823 BI_NO_SYMBOL, // Placeholder
824 BI_DATE,
825 BI_TIME,
826 BI_VERSION,
827 BI_FILECUR,
828 BI_FILENAME,
829 BI_LINE,
830 BI_CURSEG,
831 BI_CPU,
832 BI_INTERFACE,
833 BI_CODE,
834 BI_DATA,
835 BI_FARDATA,
836 BI_WORDSIZE,
837 BI_CODESIZE,
838 BI_DATASIZE,
839 BI_MODEL,
840 BI_STACK,
843 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
844 /// class.
845 StringMap<BuiltinSymbol> BuiltinSymbolMap;
847 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
849 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
850 SMLoc StartLoc);
852 // ".ascii", ".asciz", ".string"
853 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
855 // "byte", "word", ...
856 bool emitIntValue(const MCExpr *Value, unsigned Size);
857 bool parseScalarInitializer(unsigned Size,
858 SmallVectorImpl<const MCExpr *> &Values,
859 unsigned StringPadLength = 0);
860 bool parseScalarInstList(
861 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
862 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
863 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
864 bool addIntegralField(StringRef Name, unsigned Size);
865 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
866 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
867 StringRef Name, SMLoc NameLoc);
869 // "real4", "real8", "real10"
870 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
871 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
872 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
873 size_t Size);
874 bool parseRealInstList(
875 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
876 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
877 bool parseDirectiveNamedRealValue(StringRef TypeName,
878 const fltSemantics &Semantics,
879 unsigned Size, StringRef Name,
880 SMLoc NameLoc);
882 bool parseOptionalAngleBracketOpen();
883 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
885 bool parseFieldInitializer(const FieldInfo &Field,
886 FieldInitializer &Initializer);
887 bool parseFieldInitializer(const FieldInfo &Field,
888 const IntFieldInfo &Contents,
889 FieldInitializer &Initializer);
890 bool parseFieldInitializer(const FieldInfo &Field,
891 const RealFieldInfo &Contents,
892 FieldInitializer &Initializer);
893 bool parseFieldInitializer(const FieldInfo &Field,
894 const StructFieldInfo &Contents,
895 FieldInitializer &Initializer);
897 bool parseStructInitializer(const StructInfo &Structure,
898 StructInitializer &Initializer);
899 bool parseStructInstList(
900 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
901 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
903 bool emitFieldValue(const FieldInfo &Field);
904 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
905 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
906 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
908 bool emitFieldInitializer(const FieldInfo &Field,
909 const FieldInitializer &Initializer);
910 bool emitFieldInitializer(const FieldInfo &Field,
911 const IntFieldInfo &Contents,
912 const IntFieldInfo &Initializer);
913 bool emitFieldInitializer(const FieldInfo &Field,
914 const RealFieldInfo &Contents,
915 const RealFieldInfo &Initializer);
916 bool emitFieldInitializer(const FieldInfo &Field,
917 const StructFieldInfo &Contents,
918 const StructFieldInfo &Initializer);
920 bool emitStructInitializer(const StructInfo &Structure,
921 const StructInitializer &Initializer);
923 // User-defined types (structs, unions):
924 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
925 bool addStructField(StringRef Name, const StructInfo &Structure);
926 bool parseDirectiveStructValue(const StructInfo &Structure,
927 StringRef Directive, SMLoc DirLoc);
928 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
929 StringRef Directive, SMLoc DirLoc,
930 StringRef Name);
932 // "=", "equ", "textequ"
933 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
934 DirectiveKind DirKind, SMLoc NameLoc);
936 bool parseDirectiveOrg(); // "org"
938 bool emitAlignTo(int64_t Alignment);
939 bool parseDirectiveAlign(); // "align"
940 bool parseDirectiveEven(); // "even"
942 // ".file", ".line", ".loc", ".stabs"
943 bool parseDirectiveFile(SMLoc DirectiveLoc);
944 bool parseDirectiveLine();
945 bool parseDirectiveLoc();
946 bool parseDirectiveStabs();
948 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
949 // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
950 bool parseDirectiveCVFile();
951 bool parseDirectiveCVFuncId();
952 bool parseDirectiveCVInlineSiteId();
953 bool parseDirectiveCVLoc();
954 bool parseDirectiveCVLinetable();
955 bool parseDirectiveCVInlineLinetable();
956 bool parseDirectiveCVDefRange();
957 bool parseDirectiveCVString();
958 bool parseDirectiveCVStringTable();
959 bool parseDirectiveCVFileChecksums();
960 bool parseDirectiveCVFileChecksumOffset();
961 bool parseDirectiveCVFPOData();
963 // .cfi directives
964 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
965 bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
966 bool parseDirectiveCFISections();
967 bool parseDirectiveCFIStartProc();
968 bool parseDirectiveCFIEndProc();
969 bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
970 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
971 bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
972 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
973 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
974 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
975 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
976 bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
977 bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
978 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
979 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
980 bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
981 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
982 bool parseDirectiveCFISignalFrame();
983 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
985 // macro directives
986 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
987 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
988 std::string &Value);
989 bool parseDirectiveEndMacro(StringRef Directive);
990 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
992 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
993 StringRef Name, SMLoc NameLoc);
994 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
995 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
996 bool parseDirectiveNestedEnds();
998 bool parseDirectiveExtern();
1000 /// Parse a directive like ".globl" which accepts a single symbol (which
1001 /// should be a label or an external).
1002 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1004 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1006 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1008 bool parseDirectiveInclude(); // "include"
1010 // "if" or "ife"
1011 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1012 // "ifb" or "ifnb", depending on ExpectBlank.
1013 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1014 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1015 // CaseInsensitive.
1016 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1017 bool CaseInsensitive);
1018 // "ifdef" or "ifndef", depending on expect_defined
1019 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1020 // "elseif" or "elseife"
1021 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1022 // "elseifb" or "elseifnb", depending on ExpectBlank.
1023 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1024 // ".elseifdef" or ".elseifndef", depending on expect_defined
1025 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1026 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1027 // ExpectEqual and CaseInsensitive.
1028 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1029 bool CaseInsensitive);
1030 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1031 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1032 bool parseEscapedString(std::string &Data) override;
1033 bool parseAngleBracketString(std::string &Data) override;
1035 // Macro-like directives
1036 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1037 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038 raw_svector_ostream &OS);
1039 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1040 SMLoc ExitLoc, raw_svector_ostream &OS);
1041 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1042 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1043 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1044 bool parseDirectiveWhile(SMLoc DirectiveLoc);
1046 // "_emit" or "__emit"
1047 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1048 size_t Len);
1050 // "align"
1051 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1053 // "end"
1054 bool parseDirectiveEnd(SMLoc DirectiveLoc);
1056 // ".err"
1057 bool parseDirectiveError(SMLoc DirectiveLoc);
1058 // ".errb" or ".errnb", depending on ExpectBlank.
1059 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1060 // ".errdef" or ".errndef", depending on ExpectBlank.
1061 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1062 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1063 // and CaseInsensitive.
1064 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1065 bool CaseInsensitive);
1066 // ".erre" or ".errnz", depending on ExpectZero.
1067 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1069 // ".radix"
1070 bool parseDirectiveRadix(SMLoc DirectiveLoc);
1072 // "echo"
1073 bool parseDirectiveEcho(SMLoc DirectiveLoc);
1075 void initializeDirectiveKindMap();
1076 void initializeCVDefRangeTypeMap();
1077 void initializeBuiltinSymbolMap();
1080 } // end anonymous namespace
1082 namespace llvm {
1084 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
1086 extern MCAsmParserExtension *createCOFFMasmParser();
1088 } // end namespace llvm
1090 enum { DEFAULT_ADDRSPACE = 0 };
1092 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1093 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1094 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1095 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1096 HadError = false;
1097 // Save the old handler.
1098 SavedDiagHandler = SrcMgr.getDiagHandler();
1099 SavedDiagContext = SrcMgr.getDiagContext();
1100 // Set our own handler which calls the saved handler.
1101 SrcMgr.setDiagHandler(DiagHandler, this);
1102 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1103 EndStatementAtEOFStack.push_back(true);
1105 // Initialize the platform / file format parser.
1106 switch (Ctx.getObjectFileType()) {
1107 case MCContext::IsCOFF:
1108 PlatformParser.reset(createCOFFMasmParser());
1109 break;
1110 default:
1111 report_fatal_error("llvm-ml currently supports only COFF output.");
1112 break;
1115 initializeDirectiveKindMap();
1116 PlatformParser->Initialize(*this);
1117 initializeCVDefRangeTypeMap();
1118 initializeBuiltinSymbolMap();
1120 NumOfMacroInstantiations = 0;
1123 MasmParser::~MasmParser() {
1124 assert((HadError || ActiveMacros.empty()) &&
1125 "Unexpected active macro instantiation!");
1127 // Restore the saved diagnostics handler and context for use during
1128 // finalization.
1129 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1132 void MasmParser::printMacroInstantiations() {
1133 // Print the active macro instantiation stack.
1134 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1135 it = ActiveMacros.rbegin(),
1136 ie = ActiveMacros.rend();
1137 it != ie; ++it)
1138 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1139 "while in macro instantiation");
1142 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1143 printPendingErrors();
1144 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1145 printMacroInstantiations();
1148 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1149 if (getTargetParser().getTargetOptions().MCNoWarn)
1150 return false;
1151 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1152 return Error(L, Msg, Range);
1153 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1154 printMacroInstantiations();
1155 return false;
1158 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1159 HadError = true;
1160 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1161 printMacroInstantiations();
1162 return true;
1165 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1166 std::string IncludedFile;
1167 unsigned NewBuf =
1168 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1169 if (!NewBuf)
1170 return true;
1172 CurBuffer = NewBuf;
1173 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1174 EndStatementAtEOFStack.push_back(true);
1175 return false;
1178 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1179 bool EndStatementAtEOF) {
1180 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1181 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1182 Loc.getPointer(), EndStatementAtEOF);
1185 bool MasmParser::expandMacros() {
1186 const AsmToken &Tok = getTok();
1187 const std::string IDLower = Tok.getIdentifier().lower();
1189 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1190 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1191 // This is a macro function invocation; expand it in place.
1192 const SMLoc MacroLoc = Tok.getLoc();
1193 const StringRef MacroId = Tok.getIdentifier();
1194 Lexer.Lex();
1195 if (handleMacroInvocation(M, MacroLoc)) {
1196 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1197 Lexer.Lex();
1199 return false;
1202 std::optional<std::string> ExpandedValue;
1203 auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1204 if (BuiltinIt != BuiltinSymbolMap.end()) {
1205 ExpandedValue =
1206 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1207 } else {
1208 auto VarIt = Variables.find(IDLower);
1209 if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1210 ExpandedValue = VarIt->getValue().TextValue;
1214 if (!ExpandedValue)
1215 return true;
1216 std::unique_ptr<MemoryBuffer> Instantiation =
1217 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1219 // Jump to the macro instantiation and prime the lexer.
1220 CurBuffer =
1221 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1222 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1223 /*EndStatementAtEOF=*/false);
1224 EndStatementAtEOFStack.push_back(false);
1225 Lexer.Lex();
1226 return false;
1229 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1230 if (Lexer.getTok().is(AsmToken::Error))
1231 Error(Lexer.getErrLoc(), Lexer.getErr());
1233 // if it's a end of statement with a comment in it
1234 if (getTok().is(AsmToken::EndOfStatement)) {
1235 // if this is a line comment output it.
1236 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1237 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1238 Out.addExplicitComment(Twine(getTok().getString()));
1241 const AsmToken *tok = &Lexer.Lex();
1242 bool StartOfStatement = Lexer.isAtStartOfStatement();
1244 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1245 if (StartOfStatement) {
1246 AsmToken NextTok;
1247 MutableArrayRef<AsmToken> Buf(NextTok);
1248 size_t ReadCount = Lexer.peekTokens(Buf);
1249 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1250 (NextTok.getString().equals_insensitive("equ") ||
1251 NextTok.getString().equals_insensitive("textequ"))) {
1252 // This looks like an EQU or TEXTEQU directive; don't expand the
1253 // identifier, allowing for redefinitions.
1254 break;
1257 if (expandMacros())
1258 break;
1261 // Parse comments here to be deferred until end of next statement.
1262 while (tok->is(AsmToken::Comment)) {
1263 if (MAI.preserveAsmComments())
1264 Out.addExplicitComment(Twine(tok->getString()));
1265 tok = &Lexer.Lex();
1268 // Recognize and bypass line continuations.
1269 while (tok->is(AsmToken::BackSlash) &&
1270 peekTok().is(AsmToken::EndOfStatement)) {
1271 // Eat both the backslash and the end of statement.
1272 Lexer.Lex();
1273 tok = &Lexer.Lex();
1276 if (tok->is(AsmToken::Eof)) {
1277 // If this is the end of an included file, pop the parent file off the
1278 // include stack.
1279 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1280 if (ParentIncludeLoc != SMLoc()) {
1281 EndStatementAtEOFStack.pop_back();
1282 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1283 return Lex();
1285 EndStatementAtEOFStack.pop_back();
1286 assert(EndStatementAtEOFStack.empty());
1289 return *tok;
1292 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1293 AsmToken Tok;
1295 MutableArrayRef<AsmToken> Buf(Tok);
1296 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1298 if (ReadCount == 0) {
1299 // If this is the end of an included file, pop the parent file off the
1300 // include stack.
1301 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1302 if (ParentIncludeLoc != SMLoc()) {
1303 EndStatementAtEOFStack.pop_back();
1304 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1305 return peekTok(ShouldSkipSpace);
1307 EndStatementAtEOFStack.pop_back();
1308 assert(EndStatementAtEOFStack.empty());
1311 assert(ReadCount == 1);
1312 return Tok;
1315 bool MasmParser::enabledGenDwarfForAssembly() {
1316 // Check whether the user specified -g.
1317 if (!getContext().getGenDwarfForAssembly())
1318 return false;
1319 // If we haven't encountered any .file directives (which would imply that
1320 // the assembler source was produced with debug info already) then emit one
1321 // describing the assembler source file itself.
1322 if (getContext().getGenDwarfFileNumber() == 0) {
1323 // Use the first #line directive for this, if any. It's preprocessed, so
1324 // there is no checksum, and of course no source directive.
1325 if (!FirstCppHashFilename.empty())
1326 getContext().setMCLineTableRootFile(
1327 /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1328 /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1329 const MCDwarfFile &RootFile =
1330 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1331 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1332 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1333 RootFile.Checksum, RootFile.Source));
1335 return true;
1338 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1339 // Create the initial section, if requested.
1340 if (!NoInitialTextSection)
1341 Out.initSections(false, getTargetParser().getSTI());
1343 // Prime the lexer.
1344 Lex();
1346 HadError = false;
1347 AsmCond StartingCondState = TheCondState;
1348 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1350 // If we are generating dwarf for assembly source files save the initial text
1351 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1352 // emitting any actual debug info yet and haven't had a chance to parse any
1353 // embedded .file directives.)
1354 if (getContext().getGenDwarfForAssembly()) {
1355 MCSection *Sec = getStreamer().getCurrentSectionOnly();
1356 if (!Sec->getBeginSymbol()) {
1357 MCSymbol *SectionStartSym = getContext().createTempSymbol();
1358 getStreamer().emitLabel(SectionStartSym);
1359 Sec->setBeginSymbol(SectionStartSym);
1361 bool InsertResult = getContext().addGenDwarfSection(Sec);
1362 assert(InsertResult && ".text section should not have debug info yet");
1363 (void)InsertResult;
1366 getTargetParser().onBeginOfFile();
1368 // While we have input, parse each statement.
1369 while (Lexer.isNot(AsmToken::Eof) ||
1370 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1371 // Skip through the EOF at the end of an inclusion.
1372 if (Lexer.is(AsmToken::Eof))
1373 Lex();
1375 ParseStatementInfo Info(&AsmStrRewrites);
1376 bool Parsed = parseStatement(Info, nullptr);
1378 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1379 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1380 // exists.
1381 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1382 Lex();
1385 // parseStatement returned true so may need to emit an error.
1386 printPendingErrors();
1388 // Skipping to the next line if needed.
1389 if (Parsed && !getLexer().isAtStartOfStatement())
1390 eatToEndOfStatement();
1393 getTargetParser().onEndOfFile();
1394 printPendingErrors();
1396 // All errors should have been emitted.
1397 assert(!hasPendingError() && "unexpected error from parseStatement");
1399 getTargetParser().flushPendingInstructions(getStreamer());
1401 if (TheCondState.TheCond != StartingCondState.TheCond ||
1402 TheCondState.Ignore != StartingCondState.Ignore)
1403 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1404 // Check to see there are no empty DwarfFile slots.
1405 const auto &LineTables = getContext().getMCDwarfLineTables();
1406 if (!LineTables.empty()) {
1407 unsigned Index = 0;
1408 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1409 if (File.Name.empty() && Index != 0)
1410 printError(getTok().getLoc(), "unassigned file number: " +
1411 Twine(Index) +
1412 " for .file directives");
1413 ++Index;
1417 // Check to see that all assembler local symbols were actually defined.
1418 // Targets that don't do subsections via symbols may not want this, though,
1419 // so conservatively exclude them. Only do this if we're finalizing, though,
1420 // as otherwise we won't necessarilly have seen everything yet.
1421 if (!NoFinalize) {
1422 if (MAI.hasSubsectionsViaSymbols()) {
1423 for (const auto &TableEntry : getContext().getSymbols()) {
1424 MCSymbol *Sym = TableEntry.getValue();
1425 // Variable symbols may not be marked as defined, so check those
1426 // explicitly. If we know it's a variable, we have a definition for
1427 // the purposes of this check.
1428 if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1429 // FIXME: We would really like to refer back to where the symbol was
1430 // first referenced for a source location. We need to add something
1431 // to track that. Currently, we just point to the end of the file.
1432 printError(getTok().getLoc(), "assembler local symbol '" +
1433 Sym->getName() + "' not defined");
1437 // Temporary symbols like the ones for directional jumps don't go in the
1438 // symbol table. They also need to be diagnosed in all (final) cases.
1439 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1440 if (std::get<2>(LocSym)->isUndefined()) {
1441 // Reset the state of any "# line file" directives we've seen to the
1442 // context as it was at the diagnostic site.
1443 CppHashInfo = std::get<1>(LocSym);
1444 printError(std::get<0>(LocSym), "directional label undefined");
1449 // Finalize the output stream if there are no errors and if the client wants
1450 // us to.
1451 if (!HadError && !NoFinalize)
1452 Out.finish(Lexer.getLoc());
1454 return HadError || getContext().hadError();
1457 bool MasmParser::checkForValidSection() {
1458 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1459 Out.initSections(false, getTargetParser().getSTI());
1460 return Error(getTok().getLoc(),
1461 "expected section directive before assembly directive");
1463 return false;
1466 /// Throw away the rest of the line for testing purposes.
1467 void MasmParser::eatToEndOfStatement() {
1468 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1469 if (Lexer.is(AsmToken::Eof)) {
1470 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1471 if (ParentIncludeLoc == SMLoc()) {
1472 break;
1475 EndStatementAtEOFStack.pop_back();
1476 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1479 Lexer.Lex();
1482 // Eat EOL.
1483 if (Lexer.is(AsmToken::EndOfStatement))
1484 Lexer.Lex();
1487 SmallVector<StringRef, 1>
1488 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1489 SmallVector<StringRef, 1> Refs;
1490 const char *Start = getTok().getLoc().getPointer();
1491 while (Lexer.isNot(EndTok)) {
1492 if (Lexer.is(AsmToken::Eof)) {
1493 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1494 if (ParentIncludeLoc == SMLoc()) {
1495 break;
1497 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1499 EndStatementAtEOFStack.pop_back();
1500 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1501 Lexer.Lex();
1502 Start = getTok().getLoc().getPointer();
1503 } else {
1504 Lexer.Lex();
1507 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1508 return Refs;
1511 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1512 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1513 std::string Str;
1514 for (StringRef S : Refs) {
1515 Str.append(S.str());
1517 return Str;
1520 StringRef MasmParser::parseStringToEndOfStatement() {
1521 const char *Start = getTok().getLoc().getPointer();
1523 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1524 Lexer.Lex();
1526 const char *End = getTok().getLoc().getPointer();
1527 return StringRef(Start, End - Start);
1530 /// Parse a paren expression and return it.
1531 /// NOTE: This assumes the leading '(' has already been consumed.
1533 /// parenexpr ::= expr)
1535 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1536 if (parseExpression(Res))
1537 return true;
1538 EndLoc = Lexer.getTok().getEndLoc();
1539 return parseRParen();
1542 /// Parse a bracket expression and return it.
1543 /// NOTE: This assumes the leading '[' has already been consumed.
1545 /// bracketexpr ::= expr]
1547 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1548 if (parseExpression(Res))
1549 return true;
1550 EndLoc = getTok().getEndLoc();
1551 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1552 return true;
1553 return false;
1556 /// Parse a primary expression and return it.
1557 /// primaryexpr ::= (parenexpr
1558 /// primaryexpr ::= symbol
1559 /// primaryexpr ::= number
1560 /// primaryexpr ::= '.'
1561 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1562 /// primaryexpr ::= string
1563 /// (a string is interpreted as a 64-bit number in big-endian base-256)
1564 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1565 AsmTypeInfo *TypeInfo) {
1566 SMLoc FirstTokenLoc = getLexer().getLoc();
1567 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1568 switch (FirstTokenKind) {
1569 default:
1570 return TokError("unknown token in expression");
1571 // If we have an error assume that we've already handled it.
1572 case AsmToken::Error:
1573 return true;
1574 case AsmToken::Exclaim:
1575 Lex(); // Eat the operator.
1576 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1577 return true;
1578 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1579 return false;
1580 case AsmToken::Dollar:
1581 case AsmToken::At:
1582 case AsmToken::Identifier: {
1583 StringRef Identifier;
1584 if (parseIdentifier(Identifier)) {
1585 // We may have failed but $ may be a valid token.
1586 if (getTok().is(AsmToken::Dollar)) {
1587 if (Lexer.getMAI().getDollarIsPC()) {
1588 Lex();
1589 // This is a '$' reference, which references the current PC. Emit a
1590 // temporary label to the streamer and refer to it.
1591 MCSymbol *Sym = Ctx.createTempSymbol();
1592 Out.emitLabel(Sym);
1593 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1594 getContext());
1595 EndLoc = FirstTokenLoc;
1596 return false;
1598 return Error(FirstTokenLoc, "invalid token in expression");
1601 // Parse named bitwise negation.
1602 if (Identifier.equals_insensitive("not")) {
1603 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1604 return true;
1605 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1606 return false;
1608 // Parse directional local label references.
1609 if (Identifier.equals_insensitive("@b") ||
1610 Identifier.equals_insensitive("@f")) {
1611 bool Before = Identifier.equals_insensitive("@b");
1612 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1613 if (Before && Sym->isUndefined())
1614 return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1615 Res = MCSymbolRefExpr::create(Sym, getContext());
1616 return false;
1618 // Parse symbol variant.
1619 std::pair<StringRef, StringRef> Split;
1620 if (!MAI.useParensForSymbolVariant()) {
1621 Split = Identifier.split('@');
1622 } else if (Lexer.is(AsmToken::LParen)) {
1623 Lex(); // eat '('.
1624 StringRef VName;
1625 parseIdentifier(VName);
1626 // eat ')'.
1627 if (parseToken(AsmToken::RParen,
1628 "unexpected token in variant, expected ')'"))
1629 return true;
1630 Split = std::make_pair(Identifier, VName);
1633 EndLoc = SMLoc::getFromPointer(Identifier.end());
1635 // This is a symbol reference.
1636 StringRef SymbolName = Identifier;
1637 if (SymbolName.empty())
1638 return Error(getLexer().getLoc(), "expected a symbol reference");
1640 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1642 // Look up the symbol variant if used.
1643 if (!Split.second.empty()) {
1644 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1645 if (Variant != MCSymbolRefExpr::VK_Invalid) {
1646 SymbolName = Split.first;
1647 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1648 Variant = MCSymbolRefExpr::VK_None;
1649 } else {
1650 return Error(SMLoc::getFromPointer(Split.second.begin()),
1651 "invalid variant '" + Split.second + "'");
1655 // Find the field offset if used.
1656 AsmFieldInfo Info;
1657 Split = SymbolName.split('.');
1658 if (Split.second.empty()) {
1659 } else {
1660 SymbolName = Split.first;
1661 if (lookUpField(SymbolName, Split.second, Info)) {
1662 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1663 StringRef Base = BaseMember.first, Member = BaseMember.second;
1664 lookUpField(Base, Member, Info);
1665 } else if (Structs.count(SymbolName.lower())) {
1666 // This is actually a reference to a field offset.
1667 Res = MCConstantExpr::create(Info.Offset, getContext());
1668 return false;
1672 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1673 if (!Sym) {
1674 // If this is a built-in numeric value, treat it as a constant.
1675 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1676 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1677 ? BI_NO_SYMBOL
1678 : BuiltinIt->getValue();
1679 if (Symbol != BI_NO_SYMBOL) {
1680 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1681 if (Value) {
1682 Res = Value;
1683 return false;
1687 // Variables use case-insensitive symbol names; if this is a variable, we
1688 // find the symbol using its canonical name.
1689 auto VarIt = Variables.find(SymbolName.lower());
1690 if (VarIt != Variables.end())
1691 SymbolName = VarIt->second.Name;
1692 Sym = getContext().getOrCreateSymbol(SymbolName);
1695 // If this is an absolute variable reference, substitute it now to preserve
1696 // semantics in the face of reassignment.
1697 if (Sym->isVariable()) {
1698 auto V = Sym->getVariableValue(/*SetUsed=*/false);
1699 bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1700 if (auto TV = dyn_cast<MCTargetExpr>(V))
1701 DoInline = TV->inlineAssignedExpr();
1702 if (DoInline) {
1703 if (Variant)
1704 return Error(EndLoc, "unexpected modifier on variable reference");
1705 Res = Sym->getVariableValue(/*SetUsed=*/false);
1706 return false;
1710 // Otherwise create a symbol ref.
1711 const MCExpr *SymRef =
1712 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1713 if (Info.Offset) {
1714 Res = MCBinaryExpr::create(
1715 MCBinaryExpr::Add, SymRef,
1716 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1717 } else {
1718 Res = SymRef;
1720 if (TypeInfo) {
1721 if (Info.Type.Name.empty()) {
1722 auto TypeIt = KnownType.find(Identifier.lower());
1723 if (TypeIt != KnownType.end()) {
1724 Info.Type = TypeIt->second;
1728 *TypeInfo = Info.Type;
1730 return false;
1732 case AsmToken::BigNum:
1733 return TokError("literal value out of range for directive");
1734 case AsmToken::Integer: {
1735 int64_t IntVal = getTok().getIntVal();
1736 Res = MCConstantExpr::create(IntVal, getContext());
1737 EndLoc = Lexer.getTok().getEndLoc();
1738 Lex(); // Eat token.
1739 return false;
1741 case AsmToken::String: {
1742 // MASM strings (used as constants) are interpreted as big-endian base-256.
1743 SMLoc ValueLoc = getTok().getLoc();
1744 std::string Value;
1745 if (parseEscapedString(Value))
1746 return true;
1747 if (Value.size() > 8)
1748 return Error(ValueLoc, "literal value out of range");
1749 uint64_t IntValue = 0;
1750 for (const unsigned char CharVal : Value)
1751 IntValue = (IntValue << 8) | CharVal;
1752 Res = MCConstantExpr::create(IntValue, getContext());
1753 return false;
1755 case AsmToken::Real: {
1756 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1757 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1758 Res = MCConstantExpr::create(IntVal, getContext());
1759 EndLoc = Lexer.getTok().getEndLoc();
1760 Lex(); // Eat token.
1761 return false;
1763 case AsmToken::Dot: {
1764 // This is a '.' reference, which references the current PC. Emit a
1765 // temporary label to the streamer and refer to it.
1766 MCSymbol *Sym = Ctx.createTempSymbol();
1767 Out.emitLabel(Sym);
1768 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1769 EndLoc = Lexer.getTok().getEndLoc();
1770 Lex(); // Eat identifier.
1771 return false;
1773 case AsmToken::LParen:
1774 Lex(); // Eat the '('.
1775 return parseParenExpr(Res, EndLoc);
1776 case AsmToken::LBrac:
1777 if (!PlatformParser->HasBracketExpressions())
1778 return TokError("brackets expression not supported on this target");
1779 Lex(); // Eat the '['.
1780 return parseBracketExpr(Res, EndLoc);
1781 case AsmToken::Minus:
1782 Lex(); // Eat the operator.
1783 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1784 return true;
1785 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1786 return false;
1787 case AsmToken::Plus:
1788 Lex(); // Eat the operator.
1789 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1790 return true;
1791 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1792 return false;
1793 case AsmToken::Tilde:
1794 Lex(); // Eat the operator.
1795 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1796 return true;
1797 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1798 return false;
1799 // MIPS unary expression operators. The lexer won't generate these tokens if
1800 // MCAsmInfo::HasMipsExpressions is false for the target.
1801 case AsmToken::PercentCall16:
1802 case AsmToken::PercentCall_Hi:
1803 case AsmToken::PercentCall_Lo:
1804 case AsmToken::PercentDtprel_Hi:
1805 case AsmToken::PercentDtprel_Lo:
1806 case AsmToken::PercentGot:
1807 case AsmToken::PercentGot_Disp:
1808 case AsmToken::PercentGot_Hi:
1809 case AsmToken::PercentGot_Lo:
1810 case AsmToken::PercentGot_Ofst:
1811 case AsmToken::PercentGot_Page:
1812 case AsmToken::PercentGottprel:
1813 case AsmToken::PercentGp_Rel:
1814 case AsmToken::PercentHi:
1815 case AsmToken::PercentHigher:
1816 case AsmToken::PercentHighest:
1817 case AsmToken::PercentLo:
1818 case AsmToken::PercentNeg:
1819 case AsmToken::PercentPcrel_Hi:
1820 case AsmToken::PercentPcrel_Lo:
1821 case AsmToken::PercentTlsgd:
1822 case AsmToken::PercentTlsldm:
1823 case AsmToken::PercentTprel_Hi:
1824 case AsmToken::PercentTprel_Lo:
1825 Lex(); // Eat the operator.
1826 if (Lexer.isNot(AsmToken::LParen))
1827 return TokError("expected '(' after operator");
1828 Lex(); // Eat the operator.
1829 if (parseExpression(Res, EndLoc))
1830 return true;
1831 if (parseRParen())
1832 return true;
1833 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1834 return !Res;
1838 bool MasmParser::parseExpression(const MCExpr *&Res) {
1839 SMLoc EndLoc;
1840 return parseExpression(Res, EndLoc);
1843 /// This function checks if the next token is <string> type or arithmetic.
1844 /// string that begin with character '<' must end with character '>'.
1845 /// otherwise it is arithmetics.
1846 /// If the function returns a 'true' value,
1847 /// the End argument will be filled with the last location pointed to the '>'
1848 /// character.
1849 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1850 assert((StrLoc.getPointer() != nullptr) &&
1851 "Argument to the function cannot be a NULL value");
1852 const char *CharPtr = StrLoc.getPointer();
1853 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1854 (*CharPtr != '\0')) {
1855 if (*CharPtr == '!')
1856 CharPtr++;
1857 CharPtr++;
1859 if (*CharPtr == '>') {
1860 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1861 return true;
1863 return false;
1866 /// creating a string without the escape characters '!'.
1867 static std::string angleBracketString(StringRef BracketContents) {
1868 std::string Res;
1869 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1870 if (BracketContents[Pos] == '!')
1871 Pos++;
1872 Res += BracketContents[Pos];
1874 return Res;
1877 /// Parse an expression and return it.
1879 /// expr ::= expr &&,|| expr -> lowest.
1880 /// expr ::= expr |,^,&,! expr
1881 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1882 /// expr ::= expr <<,>> expr
1883 /// expr ::= expr +,- expr
1884 /// expr ::= expr *,/,% expr -> highest.
1885 /// expr ::= primaryexpr
1887 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1888 // Parse the expression.
1889 Res = nullptr;
1890 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1891 parseBinOpRHS(1, Res, EndLoc))
1892 return true;
1894 // Try to constant fold it up front, if possible. Do not exploit
1895 // assembler here.
1896 int64_t Value;
1897 if (Res->evaluateAsAbsolute(Value))
1898 Res = MCConstantExpr::create(Value, getContext());
1900 return false;
1903 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1904 Res = nullptr;
1905 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1908 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1909 SMLoc &EndLoc) {
1910 if (parseParenExpr(Res, EndLoc))
1911 return true;
1913 for (; ParenDepth > 0; --ParenDepth) {
1914 if (parseBinOpRHS(1, Res, EndLoc))
1915 return true;
1917 // We don't Lex() the last RParen.
1918 // This is the same behavior as parseParenExpression().
1919 if (ParenDepth - 1 > 0) {
1920 EndLoc = getTok().getEndLoc();
1921 if (parseRParen())
1922 return true;
1925 return false;
1928 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1929 const MCExpr *Expr;
1931 SMLoc StartLoc = Lexer.getLoc();
1932 if (parseExpression(Expr))
1933 return true;
1935 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1936 return Error(StartLoc, "expected absolute expression");
1938 return false;
1941 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1942 MCBinaryExpr::Opcode &Kind,
1943 bool ShouldUseLogicalShr,
1944 bool EndExpressionAtGreater) {
1945 switch (K) {
1946 default:
1947 return 0; // not a binop.
1949 // Lowest Precedence: &&, ||
1950 case AsmToken::AmpAmp:
1951 Kind = MCBinaryExpr::LAnd;
1952 return 2;
1953 case AsmToken::PipePipe:
1954 Kind = MCBinaryExpr::LOr;
1955 return 1;
1957 // Low Precedence: ==, !=, <>, <, <=, >, >=
1958 case AsmToken::EqualEqual:
1959 Kind = MCBinaryExpr::EQ;
1960 return 3;
1961 case AsmToken::ExclaimEqual:
1962 case AsmToken::LessGreater:
1963 Kind = MCBinaryExpr::NE;
1964 return 3;
1965 case AsmToken::Less:
1966 Kind = MCBinaryExpr::LT;
1967 return 3;
1968 case AsmToken::LessEqual:
1969 Kind = MCBinaryExpr::LTE;
1970 return 3;
1971 case AsmToken::Greater:
1972 if (EndExpressionAtGreater)
1973 return 0;
1974 Kind = MCBinaryExpr::GT;
1975 return 3;
1976 case AsmToken::GreaterEqual:
1977 Kind = MCBinaryExpr::GTE;
1978 return 3;
1980 // Low Intermediate Precedence: +, -
1981 case AsmToken::Plus:
1982 Kind = MCBinaryExpr::Add;
1983 return 4;
1984 case AsmToken::Minus:
1985 Kind = MCBinaryExpr::Sub;
1986 return 4;
1988 // High Intermediate Precedence: |, &, ^
1989 case AsmToken::Pipe:
1990 Kind = MCBinaryExpr::Or;
1991 return 5;
1992 case AsmToken::Caret:
1993 Kind = MCBinaryExpr::Xor;
1994 return 5;
1995 case AsmToken::Amp:
1996 Kind = MCBinaryExpr::And;
1997 return 5;
1999 // Highest Precedence: *, /, %, <<, >>
2000 case AsmToken::Star:
2001 Kind = MCBinaryExpr::Mul;
2002 return 6;
2003 case AsmToken::Slash:
2004 Kind = MCBinaryExpr::Div;
2005 return 6;
2006 case AsmToken::Percent:
2007 Kind = MCBinaryExpr::Mod;
2008 return 6;
2009 case AsmToken::LessLess:
2010 Kind = MCBinaryExpr::Shl;
2011 return 6;
2012 case AsmToken::GreaterGreater:
2013 if (EndExpressionAtGreater)
2014 return 0;
2015 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2016 return 6;
2020 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2021 MCBinaryExpr::Opcode &Kind) {
2022 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2023 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2024 AngleBracketDepth > 0);
2027 /// Parse all binary operators with precedence >= 'Precedence'.
2028 /// Res contains the LHS of the expression on input.
2029 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2030 SMLoc &EndLoc) {
2031 SMLoc StartLoc = Lexer.getLoc();
2032 while (true) {
2033 AsmToken::TokenKind TokKind = Lexer.getKind();
2034 if (Lexer.getKind() == AsmToken::Identifier) {
2035 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
2036 .CaseLower("and", AsmToken::Amp)
2037 .CaseLower("not", AsmToken::Exclaim)
2038 .CaseLower("or", AsmToken::Pipe)
2039 .CaseLower("xor", AsmToken::Caret)
2040 .CaseLower("shl", AsmToken::LessLess)
2041 .CaseLower("shr", AsmToken::GreaterGreater)
2042 .CaseLower("eq", AsmToken::EqualEqual)
2043 .CaseLower("ne", AsmToken::ExclaimEqual)
2044 .CaseLower("lt", AsmToken::Less)
2045 .CaseLower("le", AsmToken::LessEqual)
2046 .CaseLower("gt", AsmToken::Greater)
2047 .CaseLower("ge", AsmToken::GreaterEqual)
2048 .Default(TokKind);
2050 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
2051 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2053 // If the next token is lower precedence than we are allowed to eat, return
2054 // successfully with what we ate already.
2055 if (TokPrec < Precedence)
2056 return false;
2058 Lex();
2060 // Eat the next primary expression.
2061 const MCExpr *RHS;
2062 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2063 return true;
2065 // If BinOp binds less tightly with RHS than the operator after RHS, let
2066 // the pending operator take RHS as its LHS.
2067 MCBinaryExpr::Opcode Dummy;
2068 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2069 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2070 return true;
2072 // Merge LHS and RHS according to operator.
2073 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2077 /// ParseStatement:
2078 /// ::= % statement
2079 /// ::= EndOfStatement
2080 /// ::= Label* Directive ...Operands... EndOfStatement
2081 /// ::= Label* Identifier OperandList* EndOfStatement
2082 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2083 MCAsmParserSemaCallback *SI) {
2084 assert(!hasPendingError() && "parseStatement started with pending error");
2085 // Eat initial spaces and comments.
2086 while (Lexer.is(AsmToken::Space))
2087 Lex();
2088 if (Lexer.is(AsmToken::EndOfStatement)) {
2089 // If this is a line comment we can drop it safely.
2090 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2091 getTok().getString().front() == '\n')
2092 Out.addBlankLine();
2093 Lex();
2094 return false;
2097 // If preceded by an expansion operator, first expand all text macros and
2098 // macro functions.
2099 if (getTok().is(AsmToken::Percent)) {
2100 SMLoc ExpansionLoc = getTok().getLoc();
2101 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2102 return true;
2105 // Statements always start with an identifier, unless we're dealing with a
2106 // processor directive (.386, .686, etc.) that lexes as a real.
2107 AsmToken ID = getTok();
2108 SMLoc IDLoc = ID.getLoc();
2109 StringRef IDVal;
2110 if (Lexer.is(AsmToken::HashDirective))
2111 return parseCppHashLineFilenameComment(IDLoc);
2112 if (Lexer.is(AsmToken::Dot)) {
2113 // Treat '.' as a valid identifier in this context.
2114 Lex();
2115 IDVal = ".";
2116 } else if (Lexer.is(AsmToken::Real)) {
2117 // Treat ".<number>" as a valid identifier in this context.
2118 IDVal = getTok().getString();
2119 Lex(); // always eat a token
2120 if (!IDVal.starts_with("."))
2121 return Error(IDLoc, "unexpected token at start of statement");
2122 } else if (parseIdentifier(IDVal, StartOfStatement)) {
2123 if (!TheCondState.Ignore) {
2124 Lex(); // always eat a token
2125 return Error(IDLoc, "unexpected token at start of statement");
2127 IDVal = "";
2130 // Handle conditional assembly here before checking for skipping. We
2131 // have to do this so that .endif isn't skipped in a ".if 0" block for
2132 // example.
2133 StringMap<DirectiveKind>::const_iterator DirKindIt =
2134 DirectiveKindMap.find(IDVal.lower());
2135 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2136 ? DK_NO_DIRECTIVE
2137 : DirKindIt->getValue();
2138 switch (DirKind) {
2139 default:
2140 break;
2141 case DK_IF:
2142 case DK_IFE:
2143 return parseDirectiveIf(IDLoc, DirKind);
2144 case DK_IFB:
2145 return parseDirectiveIfb(IDLoc, true);
2146 case DK_IFNB:
2147 return parseDirectiveIfb(IDLoc, false);
2148 case DK_IFDEF:
2149 return parseDirectiveIfdef(IDLoc, true);
2150 case DK_IFNDEF:
2151 return parseDirectiveIfdef(IDLoc, false);
2152 case DK_IFDIF:
2153 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2154 /*CaseInsensitive=*/false);
2155 case DK_IFDIFI:
2156 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2157 /*CaseInsensitive=*/true);
2158 case DK_IFIDN:
2159 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2160 /*CaseInsensitive=*/false);
2161 case DK_IFIDNI:
2162 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2163 /*CaseInsensitive=*/true);
2164 case DK_ELSEIF:
2165 case DK_ELSEIFE:
2166 return parseDirectiveElseIf(IDLoc, DirKind);
2167 case DK_ELSEIFB:
2168 return parseDirectiveElseIfb(IDLoc, true);
2169 case DK_ELSEIFNB:
2170 return parseDirectiveElseIfb(IDLoc, false);
2171 case DK_ELSEIFDEF:
2172 return parseDirectiveElseIfdef(IDLoc, true);
2173 case DK_ELSEIFNDEF:
2174 return parseDirectiveElseIfdef(IDLoc, false);
2175 case DK_ELSEIFDIF:
2176 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2177 /*CaseInsensitive=*/false);
2178 case DK_ELSEIFDIFI:
2179 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2180 /*CaseInsensitive=*/true);
2181 case DK_ELSEIFIDN:
2182 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2183 /*CaseInsensitive=*/false);
2184 case DK_ELSEIFIDNI:
2185 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2186 /*CaseInsensitive=*/true);
2187 case DK_ELSE:
2188 return parseDirectiveElse(IDLoc);
2189 case DK_ENDIF:
2190 return parseDirectiveEndIf(IDLoc);
2193 // Ignore the statement if in the middle of inactive conditional
2194 // (e.g. ".if 0").
2195 if (TheCondState.Ignore) {
2196 eatToEndOfStatement();
2197 return false;
2200 // FIXME: Recurse on local labels?
2202 // Check for a label.
2203 // ::= identifier ':'
2204 // ::= number ':'
2205 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2206 if (checkForValidSection())
2207 return true;
2209 // identifier ':' -> Label.
2210 Lex();
2212 // Diagnose attempt to use '.' as a label.
2213 if (IDVal == ".")
2214 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2216 // Diagnose attempt to use a variable as a label.
2218 // FIXME: Diagnostics. Note the location of the definition as a label.
2219 // FIXME: This doesn't diagnose assignment to a symbol which has been
2220 // implicitly marked as external.
2221 MCSymbol *Sym;
2222 if (ParsingMSInlineAsm && SI) {
2223 StringRef RewrittenLabel =
2224 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2225 assert(!RewrittenLabel.empty() &&
2226 "We should have an internal name here.");
2227 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2228 RewrittenLabel);
2229 IDVal = RewrittenLabel;
2231 // Handle directional local labels
2232 if (IDVal == "@@") {
2233 Sym = Ctx.createDirectionalLocalSymbol(0);
2234 } else {
2235 Sym = getContext().getOrCreateSymbol(IDVal);
2238 // End of Labels should be treated as end of line for lexing
2239 // purposes but that information is not available to the Lexer who
2240 // does not understand Labels. This may cause us to see a Hash
2241 // here instead of a preprocessor line comment.
2242 if (getTok().is(AsmToken::Hash)) {
2243 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2244 Lexer.Lex();
2245 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2248 // Consume any end of statement token, if present, to avoid spurious
2249 // addBlankLine calls().
2250 if (getTok().is(AsmToken::EndOfStatement)) {
2251 Lex();
2254 getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2256 // Emit the label.
2257 if (!getTargetParser().isParsingMSInlineAsm())
2258 Out.emitLabel(Sym, IDLoc);
2260 // If we are generating dwarf for assembly source files then gather the
2261 // info to make a dwarf label entry for this label if needed.
2262 if (enabledGenDwarfForAssembly())
2263 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2264 IDLoc);
2266 getTargetParser().onLabelParsed(Sym);
2268 return false;
2271 // If macros are enabled, check to see if this is a macro instantiation.
2272 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2273 return handleMacroEntry(M, IDLoc);
2276 // Otherwise, we have a normal instruction or directive.
2278 if (DirKind != DK_NO_DIRECTIVE) {
2279 // There are several entities interested in parsing directives:
2281 // 1. Asm parser extensions. For example, platform-specific parsers
2282 // (like the ELF parser) register themselves as extensions.
2283 // 2. The target-specific assembly parser. Some directives are target
2284 // specific or may potentially behave differently on certain targets.
2285 // 3. The generic directive parser implemented by this class. These are
2286 // all the directives that behave in a target and platform independent
2287 // manner, or at least have a default behavior that's shared between
2288 // all targets and platforms.
2290 getTargetParser().flushPendingInstructions(getStreamer());
2292 // Special-case handling of structure-end directives at higher priority,
2293 // since ENDS is overloaded as a segment-end directive.
2294 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2295 getTok().is(AsmToken::EndOfStatement)) {
2296 return parseDirectiveNestedEnds();
2299 // First, check the extension directive map to see if any extension has
2300 // registered itself to parse this directive.
2301 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2302 ExtensionDirectiveMap.lookup(IDVal.lower());
2303 if (Handler.first)
2304 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2306 // Next, let the target-specific assembly parser try.
2307 if (ID.isNot(AsmToken::Identifier))
2308 return false;
2310 ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2311 assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2312 "Should only return Failure iff there was an error");
2313 if (TPDirectiveReturn.isFailure())
2314 return true;
2315 if (TPDirectiveReturn.isSuccess())
2316 return false;
2318 // Finally, if no one else is interested in this directive, it must be
2319 // generic and familiar to this class.
2320 switch (DirKind) {
2321 default:
2322 break;
2323 case DK_ASCII:
2324 return parseDirectiveAscii(IDVal, false);
2325 case DK_ASCIZ:
2326 case DK_STRING:
2327 return parseDirectiveAscii(IDVal, true);
2328 case DK_BYTE:
2329 case DK_SBYTE:
2330 case DK_DB:
2331 return parseDirectiveValue(IDVal, 1);
2332 case DK_WORD:
2333 case DK_SWORD:
2334 case DK_DW:
2335 return parseDirectiveValue(IDVal, 2);
2336 case DK_DWORD:
2337 case DK_SDWORD:
2338 case DK_DD:
2339 return parseDirectiveValue(IDVal, 4);
2340 case DK_FWORD:
2341 case DK_DF:
2342 return parseDirectiveValue(IDVal, 6);
2343 case DK_QWORD:
2344 case DK_SQWORD:
2345 case DK_DQ:
2346 return parseDirectiveValue(IDVal, 8);
2347 case DK_REAL4:
2348 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2349 case DK_REAL8:
2350 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2351 case DK_REAL10:
2352 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2353 case DK_STRUCT:
2354 case DK_UNION:
2355 return parseDirectiveNestedStruct(IDVal, DirKind);
2356 case DK_ENDS:
2357 return parseDirectiveNestedEnds();
2358 case DK_ALIGN:
2359 return parseDirectiveAlign();
2360 case DK_EVEN:
2361 return parseDirectiveEven();
2362 case DK_ORG:
2363 return parseDirectiveOrg();
2364 case DK_EXTERN:
2365 return parseDirectiveExtern();
2366 case DK_PUBLIC:
2367 return parseDirectiveSymbolAttribute(MCSA_Global);
2368 case DK_COMM:
2369 return parseDirectiveComm(/*IsLocal=*/false);
2370 case DK_COMMENT:
2371 return parseDirectiveComment(IDLoc);
2372 case DK_INCLUDE:
2373 return parseDirectiveInclude();
2374 case DK_REPEAT:
2375 return parseDirectiveRepeat(IDLoc, IDVal);
2376 case DK_WHILE:
2377 return parseDirectiveWhile(IDLoc);
2378 case DK_FOR:
2379 return parseDirectiveFor(IDLoc, IDVal);
2380 case DK_FORC:
2381 return parseDirectiveForc(IDLoc, IDVal);
2382 case DK_FILE:
2383 return parseDirectiveFile(IDLoc);
2384 case DK_LINE:
2385 return parseDirectiveLine();
2386 case DK_LOC:
2387 return parseDirectiveLoc();
2388 case DK_STABS:
2389 return parseDirectiveStabs();
2390 case DK_CV_FILE:
2391 return parseDirectiveCVFile();
2392 case DK_CV_FUNC_ID:
2393 return parseDirectiveCVFuncId();
2394 case DK_CV_INLINE_SITE_ID:
2395 return parseDirectiveCVInlineSiteId();
2396 case DK_CV_LOC:
2397 return parseDirectiveCVLoc();
2398 case DK_CV_LINETABLE:
2399 return parseDirectiveCVLinetable();
2400 case DK_CV_INLINE_LINETABLE:
2401 return parseDirectiveCVInlineLinetable();
2402 case DK_CV_DEF_RANGE:
2403 return parseDirectiveCVDefRange();
2404 case DK_CV_STRING:
2405 return parseDirectiveCVString();
2406 case DK_CV_STRINGTABLE:
2407 return parseDirectiveCVStringTable();
2408 case DK_CV_FILECHECKSUMS:
2409 return parseDirectiveCVFileChecksums();
2410 case DK_CV_FILECHECKSUM_OFFSET:
2411 return parseDirectiveCVFileChecksumOffset();
2412 case DK_CV_FPO_DATA:
2413 return parseDirectiveCVFPOData();
2414 case DK_CFI_SECTIONS:
2415 return parseDirectiveCFISections();
2416 case DK_CFI_STARTPROC:
2417 return parseDirectiveCFIStartProc();
2418 case DK_CFI_ENDPROC:
2419 return parseDirectiveCFIEndProc();
2420 case DK_CFI_DEF_CFA:
2421 return parseDirectiveCFIDefCfa(IDLoc);
2422 case DK_CFI_DEF_CFA_OFFSET:
2423 return parseDirectiveCFIDefCfaOffset(IDLoc);
2424 case DK_CFI_ADJUST_CFA_OFFSET:
2425 return parseDirectiveCFIAdjustCfaOffset(IDLoc);
2426 case DK_CFI_DEF_CFA_REGISTER:
2427 return parseDirectiveCFIDefCfaRegister(IDLoc);
2428 case DK_CFI_OFFSET:
2429 return parseDirectiveCFIOffset(IDLoc);
2430 case DK_CFI_REL_OFFSET:
2431 return parseDirectiveCFIRelOffset(IDLoc);
2432 case DK_CFI_PERSONALITY:
2433 return parseDirectiveCFIPersonalityOrLsda(true);
2434 case DK_CFI_LSDA:
2435 return parseDirectiveCFIPersonalityOrLsda(false);
2436 case DK_CFI_REMEMBER_STATE:
2437 return parseDirectiveCFIRememberState(IDLoc);
2438 case DK_CFI_RESTORE_STATE:
2439 return parseDirectiveCFIRestoreState(IDLoc);
2440 case DK_CFI_SAME_VALUE:
2441 return parseDirectiveCFISameValue(IDLoc);
2442 case DK_CFI_RESTORE:
2443 return parseDirectiveCFIRestore(IDLoc);
2444 case DK_CFI_ESCAPE:
2445 return parseDirectiveCFIEscape(IDLoc);
2446 case DK_CFI_RETURN_COLUMN:
2447 return parseDirectiveCFIReturnColumn(IDLoc);
2448 case DK_CFI_SIGNAL_FRAME:
2449 return parseDirectiveCFISignalFrame();
2450 case DK_CFI_UNDEFINED:
2451 return parseDirectiveCFIUndefined(IDLoc);
2452 case DK_CFI_REGISTER:
2453 return parseDirectiveCFIRegister(IDLoc);
2454 case DK_CFI_WINDOW_SAVE:
2455 return parseDirectiveCFIWindowSave(IDLoc);
2456 case DK_EXITM:
2457 Info.ExitValue = "";
2458 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2459 case DK_ENDM:
2460 Info.ExitValue = "";
2461 return parseDirectiveEndMacro(IDVal);
2462 case DK_PURGE:
2463 return parseDirectivePurgeMacro(IDLoc);
2464 case DK_END:
2465 return parseDirectiveEnd(IDLoc);
2466 case DK_ERR:
2467 return parseDirectiveError(IDLoc);
2468 case DK_ERRB:
2469 return parseDirectiveErrorIfb(IDLoc, true);
2470 case DK_ERRNB:
2471 return parseDirectiveErrorIfb(IDLoc, false);
2472 case DK_ERRDEF:
2473 return parseDirectiveErrorIfdef(IDLoc, true);
2474 case DK_ERRNDEF:
2475 return parseDirectiveErrorIfdef(IDLoc, false);
2476 case DK_ERRDIF:
2477 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2478 /*CaseInsensitive=*/false);
2479 case DK_ERRDIFI:
2480 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2481 /*CaseInsensitive=*/true);
2482 case DK_ERRIDN:
2483 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2484 /*CaseInsensitive=*/false);
2485 case DK_ERRIDNI:
2486 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2487 /*CaseInsensitive=*/true);
2488 case DK_ERRE:
2489 return parseDirectiveErrorIfe(IDLoc, true);
2490 case DK_ERRNZ:
2491 return parseDirectiveErrorIfe(IDLoc, false);
2492 case DK_RADIX:
2493 return parseDirectiveRadix(IDLoc);
2494 case DK_ECHO:
2495 return parseDirectiveEcho(IDLoc);
2498 return Error(IDLoc, "unknown directive");
2501 // We also check if this is allocating memory with user-defined type.
2502 auto IDIt = Structs.find(IDVal.lower());
2503 if (IDIt != Structs.end())
2504 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2505 IDLoc);
2507 // Non-conditional Microsoft directives sometimes follow their first argument.
2508 const AsmToken nextTok = getTok();
2509 const StringRef nextVal = nextTok.getString();
2510 const SMLoc nextLoc = nextTok.getLoc();
2512 const AsmToken afterNextTok = peekTok();
2514 // There are several entities interested in parsing infix directives:
2516 // 1. Asm parser extensions. For example, platform-specific parsers
2517 // (like the ELF parser) register themselves as extensions.
2518 // 2. The generic directive parser implemented by this class. These are
2519 // all the directives that behave in a target and platform independent
2520 // manner, or at least have a default behavior that's shared between
2521 // all targets and platforms.
2523 getTargetParser().flushPendingInstructions(getStreamer());
2525 // Special-case handling of structure-end directives at higher priority, since
2526 // ENDS is overloaded as a segment-end directive.
2527 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2528 Lex();
2529 return parseDirectiveEnds(IDVal, IDLoc);
2532 // First, check the extension directive map to see if any extension has
2533 // registered itself to parse this directive.
2534 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2535 ExtensionDirectiveMap.lookup(nextVal.lower());
2536 if (Handler.first) {
2537 Lex();
2538 Lexer.UnLex(ID);
2539 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2542 // If no one else is interested in this directive, it must be
2543 // generic and familiar to this class.
2544 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2545 DirKind = (DirKindIt == DirectiveKindMap.end())
2546 ? DK_NO_DIRECTIVE
2547 : DirKindIt->getValue();
2548 switch (DirKind) {
2549 default:
2550 break;
2551 case DK_ASSIGN:
2552 case DK_EQU:
2553 case DK_TEXTEQU:
2554 Lex();
2555 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2556 case DK_BYTE:
2557 if (afterNextTok.is(AsmToken::Identifier) &&
2558 afterNextTok.getString().equals_insensitive("ptr")) {
2559 // Size directive; part of an instruction.
2560 break;
2562 [[fallthrough]];
2563 case DK_SBYTE:
2564 case DK_DB:
2565 Lex();
2566 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2567 case DK_WORD:
2568 if (afterNextTok.is(AsmToken::Identifier) &&
2569 afterNextTok.getString().equals_insensitive("ptr")) {
2570 // Size directive; part of an instruction.
2571 break;
2573 [[fallthrough]];
2574 case DK_SWORD:
2575 case DK_DW:
2576 Lex();
2577 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2578 case DK_DWORD:
2579 if (afterNextTok.is(AsmToken::Identifier) &&
2580 afterNextTok.getString().equals_insensitive("ptr")) {
2581 // Size directive; part of an instruction.
2582 break;
2584 [[fallthrough]];
2585 case DK_SDWORD:
2586 case DK_DD:
2587 Lex();
2588 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2589 case DK_FWORD:
2590 if (afterNextTok.is(AsmToken::Identifier) &&
2591 afterNextTok.getString().equals_insensitive("ptr")) {
2592 // Size directive; part of an instruction.
2593 break;
2595 [[fallthrough]];
2596 case DK_DF:
2597 Lex();
2598 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2599 case DK_QWORD:
2600 if (afterNextTok.is(AsmToken::Identifier) &&
2601 afterNextTok.getString().equals_insensitive("ptr")) {
2602 // Size directive; part of an instruction.
2603 break;
2605 [[fallthrough]];
2606 case DK_SQWORD:
2607 case DK_DQ:
2608 Lex();
2609 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2610 case DK_REAL4:
2611 Lex();
2612 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2613 IDVal, IDLoc);
2614 case DK_REAL8:
2615 Lex();
2616 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2617 IDVal, IDLoc);
2618 case DK_REAL10:
2619 Lex();
2620 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2621 10, IDVal, IDLoc);
2622 case DK_STRUCT:
2623 case DK_UNION:
2624 Lex();
2625 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2626 case DK_ENDS:
2627 Lex();
2628 return parseDirectiveEnds(IDVal, IDLoc);
2629 case DK_MACRO:
2630 Lex();
2631 return parseDirectiveMacro(IDVal, IDLoc);
2634 // Finally, we check if this is allocating a variable with user-defined type.
2635 auto NextIt = Structs.find(nextVal.lower());
2636 if (NextIt != Structs.end()) {
2637 Lex();
2638 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2639 nextVal, nextLoc, IDVal);
2642 // __asm _emit or __asm __emit
2643 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2644 IDVal == "_EMIT" || IDVal == "__EMIT"))
2645 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2647 // __asm align
2648 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2649 return parseDirectiveMSAlign(IDLoc, Info);
2651 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2652 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2653 if (checkForValidSection())
2654 return true;
2656 // Canonicalize the opcode to lower case.
2657 std::string OpcodeStr = IDVal.lower();
2658 ParseInstructionInfo IInfo(Info.AsmRewrites);
2659 bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2660 Info.ParsedOperands);
2661 Info.ParseError = ParseHadError;
2663 // Dump the parsed representation, if requested.
2664 if (getShowParsedOperands()) {
2665 SmallString<256> Str;
2666 raw_svector_ostream OS(Str);
2667 OS << "parsed instruction: [";
2668 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2669 if (i != 0)
2670 OS << ", ";
2671 Info.ParsedOperands[i]->print(OS);
2673 OS << "]";
2675 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2678 // Fail even if ParseInstruction erroneously returns false.
2679 if (hasPendingError() || ParseHadError)
2680 return true;
2682 // If we are generating dwarf for the current section then generate a .loc
2683 // directive for the instruction.
2684 if (!ParseHadError && enabledGenDwarfForAssembly() &&
2685 getContext().getGenDwarfSectionSyms().count(
2686 getStreamer().getCurrentSectionOnly())) {
2687 unsigned Line;
2688 if (ActiveMacros.empty())
2689 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2690 else
2691 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2692 ActiveMacros.front()->ExitBuffer);
2694 // If we previously parsed a cpp hash file line comment then make sure the
2695 // current Dwarf File is for the CppHashFilename if not then emit the
2696 // Dwarf File table for it and adjust the line number for the .loc.
2697 if (!CppHashInfo.Filename.empty()) {
2698 unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2699 0, StringRef(), CppHashInfo.Filename);
2700 getContext().setGenDwarfFileNumber(FileNumber);
2702 unsigned CppHashLocLineNo =
2703 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2704 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2707 getStreamer().emitDwarfLocDirective(
2708 getContext().getGenDwarfFileNumber(), Line, 0,
2709 DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2710 StringRef());
2713 // If parsing succeeded, match the instruction.
2714 if (!ParseHadError) {
2715 uint64_t ErrorInfo;
2716 if (getTargetParser().MatchAndEmitInstruction(
2717 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2718 getTargetParser().isParsingMSInlineAsm()))
2719 return true;
2721 return false;
2724 // Parse and erase curly braces marking block start/end.
2725 bool MasmParser::parseCurlyBlockScope(
2726 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2727 // Identify curly brace marking block start/end.
2728 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2729 return false;
2731 SMLoc StartLoc = Lexer.getLoc();
2732 Lex(); // Eat the brace.
2733 if (Lexer.is(AsmToken::EndOfStatement))
2734 Lex(); // Eat EndOfStatement following the brace.
2736 // Erase the block start/end brace from the output asm string.
2737 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2738 StartLoc.getPointer());
2739 return true;
2742 /// parseCppHashLineFilenameComment as this:
2743 /// ::= # number "filename"
2744 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2745 Lex(); // Eat the hash token.
2746 // Lexer only ever emits HashDirective if it fully formed if it's
2747 // done the checking already so this is an internal error.
2748 assert(getTok().is(AsmToken::Integer) &&
2749 "Lexing Cpp line comment: Expected Integer");
2750 int64_t LineNumber = getTok().getIntVal();
2751 Lex();
2752 assert(getTok().is(AsmToken::String) &&
2753 "Lexing Cpp line comment: Expected String");
2754 StringRef Filename = getTok().getString();
2755 Lex();
2757 // Get rid of the enclosing quotes.
2758 Filename = Filename.substr(1, Filename.size() - 2);
2760 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2761 // and possibly DWARF file info.
2762 CppHashInfo.Loc = L;
2763 CppHashInfo.Filename = Filename;
2764 CppHashInfo.LineNumber = LineNumber;
2765 CppHashInfo.Buf = CurBuffer;
2766 if (FirstCppHashFilename.empty())
2767 FirstCppHashFilename = Filename;
2768 return false;
2771 /// will use the last parsed cpp hash line filename comment
2772 /// for the Filename and LineNo if any in the diagnostic.
2773 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2774 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2775 raw_ostream &OS = errs();
2777 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2778 SMLoc DiagLoc = Diag.getLoc();
2779 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2780 unsigned CppHashBuf =
2781 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2783 // Like SourceMgr::printMessage() we need to print the include stack if any
2784 // before printing the message.
2785 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2786 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2787 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2788 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2789 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2792 // If we have not parsed a cpp hash line filename comment or the source
2793 // manager changed or buffer changed (like in a nested include) then just
2794 // print the normal diagnostic using its Filename and LineNo.
2795 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2796 DiagBuf != CppHashBuf) {
2797 if (Parser->SavedDiagHandler)
2798 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2799 else
2800 Diag.print(nullptr, OS);
2801 return;
2804 // Use the CppHashFilename and calculate a line number based on the
2805 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2806 // for the diagnostic.
2807 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2809 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2810 int CppHashLocLineNo =
2811 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2812 int LineNo =
2813 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2815 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2816 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2817 Diag.getLineContents(), Diag.getRanges());
2819 if (Parser->SavedDiagHandler)
2820 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2821 else
2822 NewDiag.print(nullptr, OS);
2825 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2826 // not accept '.'.
2827 static bool isMacroParameterChar(char C) {
2828 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2831 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2832 ArrayRef<MCAsmMacroParameter> Parameters,
2833 ArrayRef<MCAsmMacroArgument> A,
2834 const std::vector<std::string> &Locals, SMLoc L) {
2835 unsigned NParameters = Parameters.size();
2836 if (NParameters != A.size())
2837 return Error(L, "Wrong number of arguments");
2838 StringMap<std::string> LocalSymbols;
2839 std::string Name;
2840 Name.reserve(6);
2841 for (StringRef Local : Locals) {
2842 raw_string_ostream LocalName(Name);
2843 LocalName << "??"
2844 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2845 LocalSymbols.insert({Local, LocalName.str()});
2846 Name.clear();
2849 std::optional<char> CurrentQuote;
2850 while (!Body.empty()) {
2851 // Scan for the next substitution.
2852 std::size_t End = Body.size(), Pos = 0;
2853 std::size_t IdentifierPos = End;
2854 for (; Pos != End; ++Pos) {
2855 // Find the next possible macro parameter, including preceding a '&'
2856 // inside quotes.
2857 if (Body[Pos] == '&')
2858 break;
2859 if (isMacroParameterChar(Body[Pos])) {
2860 if (!CurrentQuote)
2861 break;
2862 if (IdentifierPos == End)
2863 IdentifierPos = Pos;
2864 } else {
2865 IdentifierPos = End;
2868 // Track quotation status
2869 if (!CurrentQuote) {
2870 if (Body[Pos] == '\'' || Body[Pos] == '"')
2871 CurrentQuote = Body[Pos];
2872 } else if (Body[Pos] == CurrentQuote) {
2873 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2874 // Escaped quote, and quotes aren't identifier chars; skip
2875 ++Pos;
2876 continue;
2877 } else {
2878 CurrentQuote.reset();
2882 if (IdentifierPos != End) {
2883 // We've recognized an identifier before an apostrophe inside quotes;
2884 // check once to see if we can expand it.
2885 Pos = IdentifierPos;
2886 IdentifierPos = End;
2889 // Add the prefix.
2890 OS << Body.slice(0, Pos);
2892 // Check if we reached the end.
2893 if (Pos == End)
2894 break;
2896 unsigned I = Pos;
2897 bool InitialAmpersand = (Body[I] == '&');
2898 if (InitialAmpersand) {
2899 ++I;
2900 ++Pos;
2902 while (I < End && isMacroParameterChar(Body[I]))
2903 ++I;
2905 const char *Begin = Body.data() + Pos;
2906 StringRef Argument(Begin, I - Pos);
2907 const std::string ArgumentLower = Argument.lower();
2908 unsigned Index = 0;
2910 for (; Index < NParameters; ++Index)
2911 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2912 break;
2914 if (Index == NParameters) {
2915 if (InitialAmpersand)
2916 OS << '&';
2917 auto it = LocalSymbols.find(ArgumentLower);
2918 if (it != LocalSymbols.end())
2919 OS << it->second;
2920 else
2921 OS << Argument;
2922 Pos = I;
2923 } else {
2924 for (const AsmToken &Token : A[Index]) {
2925 // In MASM, you can write '%expr'.
2926 // The prefix '%' evaluates the expression 'expr'
2927 // and uses the result as a string (e.g. replace %(1+2) with the
2928 // string "3").
2929 // Here, we identify the integer token which is the result of the
2930 // absolute expression evaluation and replace it with its string
2931 // representation.
2932 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2933 // Emit an integer value to the buffer.
2934 OS << Token.getIntVal();
2935 else
2936 OS << Token.getString();
2939 Pos += Argument.size();
2940 if (Pos < End && Body[Pos] == '&') {
2941 ++Pos;
2944 // Update the scan point.
2945 Body = Body.substr(Pos);
2948 return false;
2951 static bool isOperator(AsmToken::TokenKind kind) {
2952 switch (kind) {
2953 default:
2954 return false;
2955 case AsmToken::Plus:
2956 case AsmToken::Minus:
2957 case AsmToken::Tilde:
2958 case AsmToken::Slash:
2959 case AsmToken::Star:
2960 case AsmToken::Dot:
2961 case AsmToken::Equal:
2962 case AsmToken::EqualEqual:
2963 case AsmToken::Pipe:
2964 case AsmToken::PipePipe:
2965 case AsmToken::Caret:
2966 case AsmToken::Amp:
2967 case AsmToken::AmpAmp:
2968 case AsmToken::Exclaim:
2969 case AsmToken::ExclaimEqual:
2970 case AsmToken::Less:
2971 case AsmToken::LessEqual:
2972 case AsmToken::LessLess:
2973 case AsmToken::LessGreater:
2974 case AsmToken::Greater:
2975 case AsmToken::GreaterEqual:
2976 case AsmToken::GreaterGreater:
2977 return true;
2981 namespace {
2983 class AsmLexerSkipSpaceRAII {
2984 public:
2985 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2986 Lexer.setSkipSpace(SkipSpace);
2989 ~AsmLexerSkipSpaceRAII() {
2990 Lexer.setSkipSpace(true);
2993 private:
2994 AsmLexer &Lexer;
2997 } // end anonymous namespace
2999 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3000 MCAsmMacroArgument &MA,
3001 AsmToken::TokenKind EndTok) {
3002 if (MP && MP->Vararg) {
3003 if (Lexer.isNot(EndTok)) {
3004 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3005 for (StringRef S : Str) {
3006 MA.emplace_back(AsmToken::String, S);
3009 return false;
3012 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3013 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3014 const char *StrChar = StrLoc.getPointer() + 1;
3015 const char *EndChar = EndLoc.getPointer() - 1;
3016 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3017 /// Eat from '<' to '>'.
3018 Lex();
3019 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3020 return false;
3023 unsigned ParenLevel = 0;
3025 // Darwin doesn't use spaces to delmit arguments.
3026 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3028 bool SpaceEaten;
3030 while (true) {
3031 SpaceEaten = false;
3032 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3033 return TokError("unexpected token");
3035 if (ParenLevel == 0) {
3036 if (Lexer.is(AsmToken::Comma))
3037 break;
3039 if (Lexer.is(AsmToken::Space)) {
3040 SpaceEaten = true;
3041 Lex(); // Eat spaces.
3044 // Spaces can delimit parameters, but could also be part an expression.
3045 // If the token after a space is an operator, add the token and the next
3046 // one into this argument
3047 if (!IsDarwin) {
3048 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3049 MA.push_back(getTok());
3050 Lex();
3052 // Whitespace after an operator can be ignored.
3053 if (Lexer.is(AsmToken::Space))
3054 Lex();
3056 continue;
3059 if (SpaceEaten)
3060 break;
3063 // handleMacroEntry relies on not advancing the lexer here
3064 // to be able to fill in the remaining default parameter values
3065 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3066 break;
3068 // Adjust the current parentheses level.
3069 if (Lexer.is(AsmToken::LParen))
3070 ++ParenLevel;
3071 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3072 --ParenLevel;
3074 // Append the token to the current argument list.
3075 MA.push_back(getTok());
3076 Lex();
3079 if (ParenLevel != 0)
3080 return TokError("unbalanced parentheses in argument");
3082 if (MA.empty() && MP) {
3083 if (MP->Required) {
3084 return TokError("missing value for required parameter '" + MP->Name +
3085 "'");
3086 } else {
3087 MA = MP->Value;
3090 return false;
3093 // Parse the macro instantiation arguments.
3094 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3095 MCAsmMacroArguments &A,
3096 AsmToken::TokenKind EndTok) {
3097 const unsigned NParameters = M ? M->Parameters.size() : 0;
3098 bool NamedParametersFound = false;
3099 SmallVector<SMLoc, 4> FALocs;
3101 A.resize(NParameters);
3102 FALocs.resize(NParameters);
3104 // Parse two kinds of macro invocations:
3105 // - macros defined without any parameters accept an arbitrary number of them
3106 // - macros defined with parameters accept at most that many of them
3107 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3108 ++Parameter) {
3109 SMLoc IDLoc = Lexer.getLoc();
3110 MCAsmMacroParameter FA;
3112 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3113 if (parseIdentifier(FA.Name))
3114 return Error(IDLoc, "invalid argument identifier for formal argument");
3116 if (Lexer.isNot(AsmToken::Equal))
3117 return TokError("expected '=' after formal parameter identifier");
3119 Lex();
3121 NamedParametersFound = true;
3124 if (NamedParametersFound && FA.Name.empty())
3125 return Error(IDLoc, "cannot mix positional and keyword arguments");
3127 unsigned PI = Parameter;
3128 if (!FA.Name.empty()) {
3129 assert(M && "expected macro to be defined");
3130 unsigned FAI = 0;
3131 for (FAI = 0; FAI < NParameters; ++FAI)
3132 if (M->Parameters[FAI].Name == FA.Name)
3133 break;
3135 if (FAI >= NParameters) {
3136 return Error(IDLoc, "parameter named '" + FA.Name +
3137 "' does not exist for macro '" + M->Name + "'");
3139 PI = FAI;
3141 const MCAsmMacroParameter *MP = nullptr;
3142 if (M && PI < NParameters)
3143 MP = &M->Parameters[PI];
3145 SMLoc StrLoc = Lexer.getLoc();
3146 SMLoc EndLoc;
3147 if (Lexer.is(AsmToken::Percent)) {
3148 const MCExpr *AbsoluteExp;
3149 int64_t Value;
3150 /// Eat '%'.
3151 Lex();
3152 if (parseExpression(AbsoluteExp, EndLoc))
3153 return false;
3154 if (!AbsoluteExp->evaluateAsAbsolute(Value,
3155 getStreamer().getAssemblerPtr()))
3156 return Error(StrLoc, "expected absolute expression");
3157 const char *StrChar = StrLoc.getPointer();
3158 const char *EndChar = EndLoc.getPointer();
3159 AsmToken newToken(AsmToken::Integer,
3160 StringRef(StrChar, EndChar - StrChar), Value);
3161 FA.Value.push_back(newToken);
3162 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3163 if (M)
3164 return addErrorSuffix(" in '" + M->Name + "' macro");
3165 else
3166 return true;
3169 if (!FA.Value.empty()) {
3170 if (A.size() <= PI)
3171 A.resize(PI + 1);
3172 A[PI] = FA.Value;
3174 if (FALocs.size() <= PI)
3175 FALocs.resize(PI + 1);
3177 FALocs[PI] = Lexer.getLoc();
3180 // At the end of the statement, fill in remaining arguments that have
3181 // default values. If there aren't any, then the next argument is
3182 // required but missing
3183 if (Lexer.is(EndTok)) {
3184 bool Failure = false;
3185 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3186 if (A[FAI].empty()) {
3187 if (M->Parameters[FAI].Required) {
3188 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3189 "missing value for required parameter "
3190 "'" +
3191 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3192 Failure = true;
3195 if (!M->Parameters[FAI].Value.empty())
3196 A[FAI] = M->Parameters[FAI].Value;
3199 return Failure;
3202 if (Lexer.is(AsmToken::Comma))
3203 Lex();
3206 return TokError("too many positional arguments");
3209 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3210 AsmToken::TokenKind ArgumentEndTok) {
3211 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3212 // eliminate this, although we should protect against infinite loops.
3213 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3214 if (ActiveMacros.size() == MaxNestingDepth) {
3215 std::ostringstream MaxNestingDepthError;
3216 MaxNestingDepthError << "macros cannot be nested more than "
3217 << MaxNestingDepth << " levels deep."
3218 << " Use -asm-macro-max-nesting-depth to increase "
3219 "this limit.";
3220 return TokError(MaxNestingDepthError.str());
3223 MCAsmMacroArguments A;
3224 if (parseMacroArguments(M, A, ArgumentEndTok))
3225 return true;
3227 // Macro instantiation is lexical, unfortunately. We construct a new buffer
3228 // to hold the macro body with substitutions.
3229 SmallString<256> Buf;
3230 StringRef Body = M->Body;
3231 raw_svector_ostream OS(Buf);
3233 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3234 return true;
3236 // We include the endm in the buffer as our cue to exit the macro
3237 // instantiation.
3238 OS << "endm\n";
3240 std::unique_ptr<MemoryBuffer> Instantiation =
3241 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3243 // Create the macro instantiation object and add to the current macro
3244 // instantiation stack.
3245 MacroInstantiation *MI = new MacroInstantiation{
3246 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3247 ActiveMacros.push_back(MI);
3249 ++NumOfMacroInstantiations;
3251 // Jump to the macro instantiation and prime the lexer.
3252 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3253 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3254 EndStatementAtEOFStack.push_back(true);
3255 Lex();
3257 return false;
3260 void MasmParser::handleMacroExit() {
3261 // Jump to the token we should return to, and consume it.
3262 EndStatementAtEOFStack.pop_back();
3263 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3264 EndStatementAtEOFStack.back());
3265 Lex();
3267 // Pop the instantiation entry.
3268 delete ActiveMacros.back();
3269 ActiveMacros.pop_back();
3272 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3273 if (!M->IsFunction)
3274 return Error(NameLoc, "cannot invoke macro procedure as function");
3276 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3277 "' requires arguments in parentheses") ||
3278 handleMacroEntry(M, NameLoc, AsmToken::RParen))
3279 return true;
3281 // Parse all statements in the macro, retrieving the exit value when it ends.
3282 std::string ExitValue;
3283 SmallVector<AsmRewrite, 4> AsmStrRewrites;
3284 while (Lexer.isNot(AsmToken::Eof)) {
3285 ParseStatementInfo Info(&AsmStrRewrites);
3286 bool Parsed = parseStatement(Info, nullptr);
3288 if (!Parsed && Info.ExitValue) {
3289 ExitValue = std::move(*Info.ExitValue);
3290 break;
3293 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3294 // for printing ErrMsg via Lex() only if no (presumably better) parser error
3295 // exists.
3296 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3297 Lex();
3300 // parseStatement returned true so may need to emit an error.
3301 printPendingErrors();
3303 // Skipping to the next line if needed.
3304 if (Parsed && !getLexer().isAtStartOfStatement())
3305 eatToEndOfStatement();
3308 // Consume the right-parenthesis on the other side of the arguments.
3309 if (parseRParen())
3310 return true;
3312 // Exit values may require lexing, unfortunately. We construct a new buffer to
3313 // hold the exit value.
3314 std::unique_ptr<MemoryBuffer> MacroValue =
3315 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3317 // Jump from this location to the instantiated exit value, and prime the
3318 // lexer.
3319 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3320 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3321 /*EndStatementAtEOF=*/false);
3322 EndStatementAtEOFStack.push_back(false);
3323 Lex();
3325 return false;
3328 /// parseIdentifier:
3329 /// ::= identifier
3330 /// ::= string
3331 bool MasmParser::parseIdentifier(StringRef &Res,
3332 IdentifierPositionKind Position) {
3333 // The assembler has relaxed rules for accepting identifiers, in particular we
3334 // allow things like '.globl $foo' and '.def @feat.00', which would normally
3335 // be separate tokens. At this level, we have already lexed so we cannot
3336 // (currently) handle this as a context dependent token, instead we detect
3337 // adjacent tokens and return the combined identifier.
3338 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3339 SMLoc PrefixLoc = getLexer().getLoc();
3341 // Consume the prefix character, and check for a following identifier.
3343 AsmToken nextTok = peekTok(false);
3345 if (nextTok.isNot(AsmToken::Identifier))
3346 return true;
3348 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3349 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3350 return true;
3352 // eat $ or @
3353 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3354 // Construct the joined identifier and consume the token.
3355 Res =
3356 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3357 Lex(); // Parser Lex to maintain invariants.
3358 return false;
3361 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3362 return true;
3364 Res = getTok().getIdentifier();
3366 // Consume the identifier token - but if parsing certain directives, avoid
3367 // lexical expansion of the next token.
3368 ExpandKind ExpandNextToken = ExpandMacros;
3369 if (Position == StartOfStatement &&
3370 StringSwitch<bool>(Res)
3371 .CaseLower("echo", true)
3372 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3373 .Default(false)) {
3374 ExpandNextToken = DoNotExpandMacros;
3376 Lex(ExpandNextToken);
3378 return false;
3381 /// parseDirectiveEquate:
3382 /// ::= name "=" expression
3383 /// | name "equ" expression (not redefinable)
3384 /// | name "equ" text-list
3385 /// | name "textequ" text-list (redefinability unspecified)
3386 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3387 DirectiveKind DirKind, SMLoc NameLoc) {
3388 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3389 if (BuiltinIt != BuiltinSymbolMap.end())
3390 return Error(NameLoc, "cannot redefine a built-in symbol");
3392 Variable &Var = Variables[Name.lower()];
3393 if (Var.Name.empty()) {
3394 Var.Name = Name;
3397 SMLoc StartLoc = Lexer.getLoc();
3398 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3399 // "equ" and "textequ" both allow text expressions.
3400 std::string Value;
3401 std::string TextItem;
3402 if (!parseTextItem(TextItem)) {
3403 Value += TextItem;
3405 // Accept a text-list, not just one text-item.
3406 auto parseItem = [&]() -> bool {
3407 if (parseTextItem(TextItem))
3408 return TokError("expected text item");
3409 Value += TextItem;
3410 return false;
3412 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3413 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3415 if (!Var.IsText || Var.TextValue != Value) {
3416 switch (Var.Redefinable) {
3417 case Variable::NOT_REDEFINABLE:
3418 return Error(getTok().getLoc(), "invalid variable redefinition");
3419 case Variable::WARN_ON_REDEFINITION:
3420 if (Warning(NameLoc, "redefining '" + Name +
3421 "', already defined on the command line")) {
3422 return true;
3424 break;
3425 default:
3426 break;
3429 Var.IsText = true;
3430 Var.TextValue = Value;
3431 Var.Redefinable = Variable::REDEFINABLE;
3433 return false;
3436 if (DirKind == DK_TEXTEQU)
3437 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3439 // Parse as expression assignment.
3440 const MCExpr *Expr;
3441 SMLoc EndLoc;
3442 if (parseExpression(Expr, EndLoc))
3443 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3444 StringRef ExprAsString = StringRef(
3445 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3447 int64_t Value;
3448 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3449 if (DirKind == DK_ASSIGN)
3450 return Error(
3451 StartLoc,
3452 "expected absolute expression; not all symbols have known values",
3453 {StartLoc, EndLoc});
3455 // Not an absolute expression; define as a text replacement.
3456 if (!Var.IsText || Var.TextValue != ExprAsString) {
3457 switch (Var.Redefinable) {
3458 case Variable::NOT_REDEFINABLE:
3459 return Error(getTok().getLoc(), "invalid variable redefinition");
3460 case Variable::WARN_ON_REDEFINITION:
3461 if (Warning(NameLoc, "redefining '" + Name +
3462 "', already defined on the command line")) {
3463 return true;
3465 break;
3466 default:
3467 break;
3471 Var.IsText = true;
3472 Var.TextValue = ExprAsString.str();
3473 Var.Redefinable = Variable::REDEFINABLE;
3475 return false;
3478 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3480 const MCConstantExpr *PrevValue =
3481 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3482 Sym->getVariableValue(/*SetUsed=*/false))
3483 : nullptr;
3484 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3485 switch (Var.Redefinable) {
3486 case Variable::NOT_REDEFINABLE:
3487 return Error(getTok().getLoc(), "invalid variable redefinition");
3488 case Variable::WARN_ON_REDEFINITION:
3489 if (Warning(NameLoc, "redefining '" + Name +
3490 "', already defined on the command line")) {
3491 return true;
3493 break;
3494 default:
3495 break;
3499 Var.IsText = false;
3500 Var.TextValue.clear();
3501 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3502 : Variable::NOT_REDEFINABLE;
3504 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3505 Sym->setVariableValue(Expr);
3506 Sym->setExternal(false);
3508 return false;
3511 bool MasmParser::parseEscapedString(std::string &Data) {
3512 if (check(getTok().isNot(AsmToken::String), "expected string"))
3513 return true;
3515 Data = "";
3516 char Quote = getTok().getString().front();
3517 StringRef Str = getTok().getStringContents();
3518 Data.reserve(Str.size());
3519 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3520 Data.push_back(Str[i]);
3521 if (Str[i] == Quote) {
3522 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3523 // If we're escaping the string's trailing delimiter, we're definitely
3524 // missing a quotation mark.
3525 if (i + 1 == Str.size())
3526 return Error(getTok().getLoc(), "missing quotation mark in string");
3527 if (Str[i + 1] == Quote)
3528 ++i;
3532 Lex();
3533 return false;
3536 bool MasmParser::parseAngleBracketString(std::string &Data) {
3537 SMLoc EndLoc, StartLoc = getTok().getLoc();
3538 if (isAngleBracketString(StartLoc, EndLoc)) {
3539 const char *StartChar = StartLoc.getPointer() + 1;
3540 const char *EndChar = EndLoc.getPointer() - 1;
3541 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3542 // Eat from '<' to '>'.
3543 Lex();
3545 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3546 return false;
3548 return true;
3551 /// textItem ::= textLiteral | textMacroID | % constExpr
3552 bool MasmParser::parseTextItem(std::string &Data) {
3553 switch (getTok().getKind()) {
3554 default:
3555 return true;
3556 case AsmToken::Percent: {
3557 int64_t Res;
3558 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3559 return true;
3560 Data = std::to_string(Res);
3561 return false;
3563 case AsmToken::Less:
3564 case AsmToken::LessEqual:
3565 case AsmToken::LessLess:
3566 case AsmToken::LessGreater:
3567 return parseAngleBracketString(Data);
3568 case AsmToken::Identifier: {
3569 // This must be a text macro; we need to expand it accordingly.
3570 StringRef ID;
3571 SMLoc StartLoc = getTok().getLoc();
3572 if (parseIdentifier(ID))
3573 return true;
3574 Data = ID.str();
3576 bool Expanded = false;
3577 while (true) {
3578 // Try to resolve as a built-in text macro
3579 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3580 if (BuiltinIt != BuiltinSymbolMap.end()) {
3581 std::optional<std::string> BuiltinText =
3582 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3583 if (!BuiltinText) {
3584 // Not a text macro; break without substituting
3585 break;
3587 Data = std::move(*BuiltinText);
3588 ID = StringRef(Data);
3589 Expanded = true;
3590 continue;
3593 // Try to resolve as a variable text macro
3594 auto VarIt = Variables.find(ID.lower());
3595 if (VarIt != Variables.end()) {
3596 const Variable &Var = VarIt->getValue();
3597 if (!Var.IsText) {
3598 // Not a text macro; break without substituting
3599 break;
3601 Data = Var.TextValue;
3602 ID = StringRef(Data);
3603 Expanded = true;
3604 continue;
3607 break;
3610 if (!Expanded) {
3611 // Not a text macro; not usable in TextItem context. Since we haven't used
3612 // the token, put it back for better error recovery.
3613 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3614 return true;
3616 return false;
3619 llvm_unreachable("unhandled token kind");
3622 /// parseDirectiveAscii:
3623 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3624 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3625 auto parseOp = [&]() -> bool {
3626 std::string Data;
3627 if (checkForValidSection() || parseEscapedString(Data))
3628 return true;
3629 getStreamer().emitBytes(Data);
3630 if (ZeroTerminated)
3631 getStreamer().emitBytes(StringRef("\0", 1));
3632 return false;
3635 if (parseMany(parseOp))
3636 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3637 return false;
3640 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3641 // Special case constant expressions to match code generator.
3642 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3643 assert(Size <= 8 && "Invalid size");
3644 int64_t IntValue = MCE->getValue();
3645 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3646 return Error(MCE->getLoc(), "out of range literal value");
3647 getStreamer().emitIntValue(IntValue, Size);
3648 } else {
3649 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3650 if (MSE && MSE->getSymbol().getName() == "?") {
3651 // ? initializer; treat as 0.
3652 getStreamer().emitIntValue(0, Size);
3653 } else {
3654 getStreamer().emitValue(Value, Size, Value->getLoc());
3657 return false;
3660 bool MasmParser::parseScalarInitializer(unsigned Size,
3661 SmallVectorImpl<const MCExpr *> &Values,
3662 unsigned StringPadLength) {
3663 if (Size == 1 && getTok().is(AsmToken::String)) {
3664 std::string Value;
3665 if (parseEscapedString(Value))
3666 return true;
3667 // Treat each character as an initializer.
3668 for (const unsigned char CharVal : Value)
3669 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3671 // Pad the string with spaces to the specified length.
3672 for (size_t i = Value.size(); i < StringPadLength; ++i)
3673 Values.push_back(MCConstantExpr::create(' ', getContext()));
3674 } else {
3675 const MCExpr *Value;
3676 if (parseExpression(Value))
3677 return true;
3678 if (getTok().is(AsmToken::Identifier) &&
3679 getTok().getString().equals_insensitive("dup")) {
3680 Lex(); // Eat 'dup'.
3681 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3682 if (!MCE)
3683 return Error(Value->getLoc(),
3684 "cannot repeat value a non-constant number of times");
3685 const int64_t Repetitions = MCE->getValue();
3686 if (Repetitions < 0)
3687 return Error(Value->getLoc(),
3688 "cannot repeat value a negative number of times");
3690 SmallVector<const MCExpr *, 1> DuplicatedValues;
3691 if (parseToken(AsmToken::LParen,
3692 "parentheses required for 'dup' contents") ||
3693 parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3694 return true;
3696 for (int i = 0; i < Repetitions; ++i)
3697 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3698 } else {
3699 Values.push_back(Value);
3702 return false;
3705 bool MasmParser::parseScalarInstList(unsigned Size,
3706 SmallVectorImpl<const MCExpr *> &Values,
3707 const AsmToken::TokenKind EndToken) {
3708 while (getTok().isNot(EndToken) &&
3709 (EndToken != AsmToken::Greater ||
3710 getTok().isNot(AsmToken::GreaterGreater))) {
3711 parseScalarInitializer(Size, Values);
3713 // If we see a comma, continue, and allow line continuation.
3714 if (!parseOptionalToken(AsmToken::Comma))
3715 break;
3716 parseOptionalToken(AsmToken::EndOfStatement);
3718 return false;
3721 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3722 SmallVector<const MCExpr *, 1> Values;
3723 if (checkForValidSection() || parseScalarInstList(Size, Values))
3724 return true;
3726 for (const auto *Value : Values) {
3727 emitIntValue(Value, Size);
3729 if (Count)
3730 *Count = Values.size();
3731 return false;
3734 // Add a field to the current structure.
3735 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3736 StructInfo &Struct = StructInProgress.back();
3737 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3738 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3740 Field.Type = Size;
3742 if (parseScalarInstList(Size, IntInfo.Values))
3743 return true;
3745 Field.SizeOf = Field.Type * IntInfo.Values.size();
3746 Field.LengthOf = IntInfo.Values.size();
3747 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3748 if (!Struct.IsUnion) {
3749 Struct.NextOffset = FieldEnd;
3751 Struct.Size = std::max(Struct.Size, FieldEnd);
3752 return false;
3755 /// parseDirectiveValue
3756 /// ::= (byte | word | ... ) [ expression (, expression)* ]
3757 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3758 if (StructInProgress.empty()) {
3759 // Initialize data value.
3760 if (emitIntegralValues(Size))
3761 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3762 } else if (addIntegralField("", Size)) {
3763 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3766 return false;
3769 /// parseDirectiveNamedValue
3770 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
3771 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3772 StringRef Name, SMLoc NameLoc) {
3773 if (StructInProgress.empty()) {
3774 // Initialize named data value.
3775 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3776 getStreamer().emitLabel(Sym);
3777 unsigned Count;
3778 if (emitIntegralValues(Size, &Count))
3779 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3781 AsmTypeInfo Type;
3782 Type.Name = TypeName;
3783 Type.Size = Size * Count;
3784 Type.ElementSize = Size;
3785 Type.Length = Count;
3786 KnownType[Name.lower()] = Type;
3787 } else if (addIntegralField(Name, Size)) {
3788 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3791 return false;
3794 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3795 if (Asm.getTok().isNot(AsmToken::Integer) &&
3796 Asm.getTok().isNot(AsmToken::BigNum))
3797 return Asm.TokError("unknown token in expression");
3798 SMLoc ExprLoc = Asm.getTok().getLoc();
3799 APInt IntValue = Asm.getTok().getAPIntVal();
3800 Asm.Lex();
3801 if (!IntValue.isIntN(128))
3802 return Asm.Error(ExprLoc, "out of range literal value");
3803 if (!IntValue.isIntN(64)) {
3804 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3805 lo = IntValue.getLoBits(64).getZExtValue();
3806 } else {
3807 hi = 0;
3808 lo = IntValue.getZExtValue();
3810 return false;
3813 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3814 // We don't truly support arithmetic on floating point expressions, so we
3815 // have to manually parse unary prefixes.
3816 bool IsNeg = false;
3817 SMLoc SignLoc;
3818 if (getLexer().is(AsmToken::Minus)) {
3819 SignLoc = getLexer().getLoc();
3820 Lexer.Lex();
3821 IsNeg = true;
3822 } else if (getLexer().is(AsmToken::Plus)) {
3823 SignLoc = getLexer().getLoc();
3824 Lexer.Lex();
3827 if (Lexer.is(AsmToken::Error))
3828 return TokError(Lexer.getErr());
3829 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3830 Lexer.isNot(AsmToken::Identifier))
3831 return TokError("unexpected token in directive");
3833 // Convert to an APFloat.
3834 APFloat Value(Semantics);
3835 StringRef IDVal = getTok().getString();
3836 if (getLexer().is(AsmToken::Identifier)) {
3837 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3838 Value = APFloat::getInf(Semantics);
3839 else if (IDVal.equals_insensitive("nan"))
3840 Value = APFloat::getNaN(Semantics, false, ~0);
3841 else if (IDVal.equals_insensitive("?"))
3842 Value = APFloat::getZero(Semantics);
3843 else
3844 return TokError("invalid floating point literal");
3845 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3846 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3847 // To match ML64.exe, ignore the initial sign.
3848 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3849 if (SizeInBits != (IDVal.size() << 2))
3850 return TokError("invalid floating point literal");
3852 // Consume the numeric token.
3853 Lex();
3855 Res = APInt(SizeInBits, IDVal, 16);
3856 if (SignLoc.isValid())
3857 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3858 return false;
3859 } else if (errorToBool(
3860 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3861 .takeError())) {
3862 return TokError("invalid floating point literal");
3864 if (IsNeg)
3865 Value.changeSign();
3867 // Consume the numeric token.
3868 Lex();
3870 Res = Value.bitcastToAPInt();
3872 return false;
3875 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3876 SmallVectorImpl<APInt> &ValuesAsInt,
3877 const AsmToken::TokenKind EndToken) {
3878 while (getTok().isNot(EndToken) ||
3879 (EndToken == AsmToken::Greater &&
3880 getTok().isNot(AsmToken::GreaterGreater))) {
3881 const AsmToken NextTok = peekTok();
3882 if (NextTok.is(AsmToken::Identifier) &&
3883 NextTok.getString().equals_insensitive("dup")) {
3884 const MCExpr *Value;
3885 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3886 return true;
3887 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3888 if (!MCE)
3889 return Error(Value->getLoc(),
3890 "cannot repeat value a non-constant number of times");
3891 const int64_t Repetitions = MCE->getValue();
3892 if (Repetitions < 0)
3893 return Error(Value->getLoc(),
3894 "cannot repeat value a negative number of times");
3896 SmallVector<APInt, 1> DuplicatedValues;
3897 if (parseToken(AsmToken::LParen,
3898 "parentheses required for 'dup' contents") ||
3899 parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3900 return true;
3902 for (int i = 0; i < Repetitions; ++i)
3903 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3904 } else {
3905 APInt AsInt;
3906 if (parseRealValue(Semantics, AsInt))
3907 return true;
3908 ValuesAsInt.push_back(AsInt);
3911 // Continue if we see a comma. (Also, allow line continuation.)
3912 if (!parseOptionalToken(AsmToken::Comma))
3913 break;
3914 parseOptionalToken(AsmToken::EndOfStatement);
3917 return false;
3920 // Initialize real data values.
3921 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3922 unsigned *Count) {
3923 if (checkForValidSection())
3924 return true;
3926 SmallVector<APInt, 1> ValuesAsInt;
3927 if (parseRealInstList(Semantics, ValuesAsInt))
3928 return true;
3930 for (const APInt &AsInt : ValuesAsInt) {
3931 getStreamer().emitIntValue(AsInt);
3933 if (Count)
3934 *Count = ValuesAsInt.size();
3935 return false;
3938 // Add a real field to the current struct.
3939 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3940 size_t Size) {
3941 StructInfo &Struct = StructInProgress.back();
3942 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3943 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3945 Field.SizeOf = 0;
3947 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3948 return true;
3950 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3951 Field.LengthOf = RealInfo.AsIntValues.size();
3952 Field.SizeOf = Field.Type * Field.LengthOf;
3954 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3955 if (!Struct.IsUnion) {
3956 Struct.NextOffset = FieldEnd;
3958 Struct.Size = std::max(Struct.Size, FieldEnd);
3959 return false;
3962 /// parseDirectiveRealValue
3963 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
3964 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3965 const fltSemantics &Semantics,
3966 size_t Size) {
3967 if (StructInProgress.empty()) {
3968 // Initialize data value.
3969 if (emitRealValues(Semantics))
3970 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3971 } else if (addRealField("", Semantics, Size)) {
3972 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3974 return false;
3977 /// parseDirectiveNamedRealValue
3978 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
3979 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3980 const fltSemantics &Semantics,
3981 unsigned Size, StringRef Name,
3982 SMLoc NameLoc) {
3983 if (StructInProgress.empty()) {
3984 // Initialize named data value.
3985 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3986 getStreamer().emitLabel(Sym);
3987 unsigned Count;
3988 if (emitRealValues(Semantics, &Count))
3989 return addErrorSuffix(" in '" + TypeName + "' directive");
3991 AsmTypeInfo Type;
3992 Type.Name = TypeName;
3993 Type.Size = Size * Count;
3994 Type.ElementSize = Size;
3995 Type.Length = Count;
3996 KnownType[Name.lower()] = Type;
3997 } else if (addRealField(Name, Semantics, Size)) {
3998 return addErrorSuffix(" in '" + TypeName + "' directive");
4000 return false;
4003 bool MasmParser::parseOptionalAngleBracketOpen() {
4004 const AsmToken Tok = getTok();
4005 if (parseOptionalToken(AsmToken::LessLess)) {
4006 AngleBracketDepth++;
4007 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4008 return true;
4009 } else if (parseOptionalToken(AsmToken::LessGreater)) {
4010 AngleBracketDepth++;
4011 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4012 return true;
4013 } else if (parseOptionalToken(AsmToken::Less)) {
4014 AngleBracketDepth++;
4015 return true;
4018 return false;
4021 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4022 const AsmToken Tok = getTok();
4023 if (parseOptionalToken(AsmToken::GreaterGreater)) {
4024 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4025 } else if (parseToken(AsmToken::Greater, Msg)) {
4026 return true;
4028 AngleBracketDepth--;
4029 return false;
4032 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4033 const IntFieldInfo &Contents,
4034 FieldInitializer &Initializer) {
4035 SMLoc Loc = getTok().getLoc();
4037 SmallVector<const MCExpr *, 1> Values;
4038 if (parseOptionalToken(AsmToken::LCurly)) {
4039 if (Field.LengthOf == 1 && Field.Type > 1)
4040 return Error(Loc, "Cannot initialize scalar field with array value");
4041 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4042 parseToken(AsmToken::RCurly))
4043 return true;
4044 } else if (parseOptionalAngleBracketOpen()) {
4045 if (Field.LengthOf == 1 && Field.Type > 1)
4046 return Error(Loc, "Cannot initialize scalar field with array value");
4047 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4048 parseAngleBracketClose())
4049 return true;
4050 } else if (Field.LengthOf > 1 && Field.Type > 1) {
4051 return Error(Loc, "Cannot initialize array field with scalar value");
4052 } else if (parseScalarInitializer(Field.Type, Values,
4053 /*StringPadLength=*/Field.LengthOf)) {
4054 return true;
4057 if (Values.size() > Field.LengthOf) {
4058 return Error(Loc, "Initializer too long for field; expected at most " +
4059 std::to_string(Field.LengthOf) + " elements, got " +
4060 std::to_string(Values.size()));
4062 // Default-initialize all remaining values.
4063 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4065 Initializer = FieldInitializer(std::move(Values));
4066 return false;
4069 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4070 const RealFieldInfo &Contents,
4071 FieldInitializer &Initializer) {
4072 const fltSemantics *Semantics;
4073 switch (Field.Type) {
4074 case 4:
4075 Semantics = &APFloat::IEEEsingle();
4076 break;
4077 case 8:
4078 Semantics = &APFloat::IEEEdouble();
4079 break;
4080 case 10:
4081 Semantics = &APFloat::x87DoubleExtended();
4082 break;
4083 default:
4084 llvm_unreachable("unknown real field type");
4087 SMLoc Loc = getTok().getLoc();
4089 SmallVector<APInt, 1> AsIntValues;
4090 if (parseOptionalToken(AsmToken::LCurly)) {
4091 if (Field.LengthOf == 1)
4092 return Error(Loc, "Cannot initialize scalar field with array value");
4093 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4094 parseToken(AsmToken::RCurly))
4095 return true;
4096 } else if (parseOptionalAngleBracketOpen()) {
4097 if (Field.LengthOf == 1)
4098 return Error(Loc, "Cannot initialize scalar field with array value");
4099 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4100 parseAngleBracketClose())
4101 return true;
4102 } else if (Field.LengthOf > 1) {
4103 return Error(Loc, "Cannot initialize array field with scalar value");
4104 } else {
4105 AsIntValues.emplace_back();
4106 if (parseRealValue(*Semantics, AsIntValues.back()))
4107 return true;
4110 if (AsIntValues.size() > Field.LengthOf) {
4111 return Error(Loc, "Initializer too long for field; expected at most " +
4112 std::to_string(Field.LengthOf) + " elements, got " +
4113 std::to_string(AsIntValues.size()));
4115 // Default-initialize all remaining values.
4116 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4117 Contents.AsIntValues.end());
4119 Initializer = FieldInitializer(std::move(AsIntValues));
4120 return false;
4123 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4124 const StructFieldInfo &Contents,
4125 FieldInitializer &Initializer) {
4126 SMLoc Loc = getTok().getLoc();
4128 std::vector<StructInitializer> Initializers;
4129 if (Field.LengthOf > 1) {
4130 if (parseOptionalToken(AsmToken::LCurly)) {
4131 if (parseStructInstList(Contents.Structure, Initializers,
4132 AsmToken::RCurly) ||
4133 parseToken(AsmToken::RCurly))
4134 return true;
4135 } else if (parseOptionalAngleBracketOpen()) {
4136 if (parseStructInstList(Contents.Structure, Initializers,
4137 AsmToken::Greater) ||
4138 parseAngleBracketClose())
4139 return true;
4140 } else {
4141 return Error(Loc, "Cannot initialize array field with scalar value");
4143 } else {
4144 Initializers.emplace_back();
4145 if (parseStructInitializer(Contents.Structure, Initializers.back()))
4146 return true;
4149 if (Initializers.size() > Field.LengthOf) {
4150 return Error(Loc, "Initializer too long for field; expected at most " +
4151 std::to_string(Field.LengthOf) + " elements, got " +
4152 std::to_string(Initializers.size()));
4154 // Default-initialize all remaining values.
4155 Initializers.insert(Initializers.end(),
4156 Contents.Initializers.begin() + Initializers.size(),
4157 Contents.Initializers.end());
4159 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4160 return false;
4163 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4164 FieldInitializer &Initializer) {
4165 switch (Field.Contents.FT) {
4166 case FT_INTEGRAL:
4167 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4168 case FT_REAL:
4169 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4170 case FT_STRUCT:
4171 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4173 llvm_unreachable("Unhandled FieldType enum");
4176 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4177 StructInitializer &Initializer) {
4178 const AsmToken FirstToken = getTok();
4180 std::optional<AsmToken::TokenKind> EndToken;
4181 if (parseOptionalToken(AsmToken::LCurly)) {
4182 EndToken = AsmToken::RCurly;
4183 } else if (parseOptionalAngleBracketOpen()) {
4184 EndToken = AsmToken::Greater;
4185 AngleBracketDepth++;
4186 } else if (FirstToken.is(AsmToken::Identifier) &&
4187 FirstToken.getString() == "?") {
4188 // ? initializer; leave EndToken uninitialized to treat as empty.
4189 if (parseToken(AsmToken::Identifier))
4190 return true;
4191 } else {
4192 return Error(FirstToken.getLoc(), "Expected struct initializer");
4195 auto &FieldInitializers = Initializer.FieldInitializers;
4196 size_t FieldIndex = 0;
4197 if (EndToken) {
4198 // Initialize all fields with given initializers.
4199 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4200 const FieldInfo &Field = Structure.Fields[FieldIndex++];
4201 if (parseOptionalToken(AsmToken::Comma)) {
4202 // Empty initializer; use the default and continue. (Also, allow line
4203 // continuation.)
4204 FieldInitializers.push_back(Field.Contents);
4205 parseOptionalToken(AsmToken::EndOfStatement);
4206 continue;
4208 FieldInitializers.emplace_back(Field.Contents.FT);
4209 if (parseFieldInitializer(Field, FieldInitializers.back()))
4210 return true;
4212 // Continue if we see a comma. (Also, allow line continuation.)
4213 SMLoc CommaLoc = getTok().getLoc();
4214 if (!parseOptionalToken(AsmToken::Comma))
4215 break;
4216 if (FieldIndex == Structure.Fields.size())
4217 return Error(CommaLoc, "'" + Structure.Name +
4218 "' initializer initializes too many fields");
4219 parseOptionalToken(AsmToken::EndOfStatement);
4222 // Default-initialize all remaining fields.
4223 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4224 FieldInitializers.push_back(Field.Contents);
4226 if (EndToken) {
4227 if (*EndToken == AsmToken::Greater)
4228 return parseAngleBracketClose();
4230 return parseToken(*EndToken);
4233 return false;
4236 bool MasmParser::parseStructInstList(
4237 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4238 const AsmToken::TokenKind EndToken) {
4239 while (getTok().isNot(EndToken) ||
4240 (EndToken == AsmToken::Greater &&
4241 getTok().isNot(AsmToken::GreaterGreater))) {
4242 const AsmToken NextTok = peekTok();
4243 if (NextTok.is(AsmToken::Identifier) &&
4244 NextTok.getString().equals_insensitive("dup")) {
4245 const MCExpr *Value;
4246 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4247 return true;
4248 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4249 if (!MCE)
4250 return Error(Value->getLoc(),
4251 "cannot repeat value a non-constant number of times");
4252 const int64_t Repetitions = MCE->getValue();
4253 if (Repetitions < 0)
4254 return Error(Value->getLoc(),
4255 "cannot repeat value a negative number of times");
4257 std::vector<StructInitializer> DuplicatedValues;
4258 if (parseToken(AsmToken::LParen,
4259 "parentheses required for 'dup' contents") ||
4260 parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4261 return true;
4263 for (int i = 0; i < Repetitions; ++i)
4264 llvm::append_range(Initializers, DuplicatedValues);
4265 } else {
4266 Initializers.emplace_back();
4267 if (parseStructInitializer(Structure, Initializers.back()))
4268 return true;
4271 // Continue if we see a comma. (Also, allow line continuation.)
4272 if (!parseOptionalToken(AsmToken::Comma))
4273 break;
4274 parseOptionalToken(AsmToken::EndOfStatement);
4277 return false;
4280 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4281 const IntFieldInfo &Contents) {
4282 // Default-initialize all values.
4283 for (const MCExpr *Value : Contents.Values) {
4284 if (emitIntValue(Value, Field.Type))
4285 return true;
4287 return false;
4290 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4291 const RealFieldInfo &Contents) {
4292 for (const APInt &AsInt : Contents.AsIntValues) {
4293 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4294 AsInt.getBitWidth() / 8);
4296 return false;
4299 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4300 const StructFieldInfo &Contents) {
4301 for (const auto &Initializer : Contents.Initializers) {
4302 size_t Index = 0, Offset = 0;
4303 for (const auto &SubField : Contents.Structure.Fields) {
4304 getStreamer().emitZeros(SubField.Offset - Offset);
4305 Offset = SubField.Offset + SubField.SizeOf;
4306 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4309 return false;
4312 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4313 switch (Field.Contents.FT) {
4314 case FT_INTEGRAL:
4315 return emitFieldValue(Field, Field.Contents.IntInfo);
4316 case FT_REAL:
4317 return emitFieldValue(Field, Field.Contents.RealInfo);
4318 case FT_STRUCT:
4319 return emitFieldValue(Field, Field.Contents.StructInfo);
4321 llvm_unreachable("Unhandled FieldType enum");
4324 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4325 const IntFieldInfo &Contents,
4326 const IntFieldInfo &Initializer) {
4327 for (const auto &Value : Initializer.Values) {
4328 if (emitIntValue(Value, Field.Type))
4329 return true;
4331 // Default-initialize all remaining values.
4332 for (const auto &Value :
4333 llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4334 if (emitIntValue(Value, Field.Type))
4335 return true;
4337 return false;
4340 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4341 const RealFieldInfo &Contents,
4342 const RealFieldInfo &Initializer) {
4343 for (const auto &AsInt : Initializer.AsIntValues) {
4344 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4345 AsInt.getBitWidth() / 8);
4347 // Default-initialize all remaining values.
4348 for (const auto &AsInt :
4349 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4350 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4351 AsInt.getBitWidth() / 8);
4353 return false;
4356 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4357 const StructFieldInfo &Contents,
4358 const StructFieldInfo &Initializer) {
4359 for (const auto &Init : Initializer.Initializers) {
4360 if (emitStructInitializer(Contents.Structure, Init))
4361 return true;
4363 // Default-initialize all remaining values.
4364 for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4365 Initializer.Initializers.size())) {
4366 if (emitStructInitializer(Contents.Structure, Init))
4367 return true;
4369 return false;
4372 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4373 const FieldInitializer &Initializer) {
4374 switch (Field.Contents.FT) {
4375 case FT_INTEGRAL:
4376 return emitFieldInitializer(Field, Field.Contents.IntInfo,
4377 Initializer.IntInfo);
4378 case FT_REAL:
4379 return emitFieldInitializer(Field, Field.Contents.RealInfo,
4380 Initializer.RealInfo);
4381 case FT_STRUCT:
4382 return emitFieldInitializer(Field, Field.Contents.StructInfo,
4383 Initializer.StructInfo);
4385 llvm_unreachable("Unhandled FieldType enum");
4388 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4389 const StructInitializer &Initializer) {
4390 if (!Structure.Initializable)
4391 return Error(getLexer().getLoc(),
4392 "cannot initialize a value of type '" + Structure.Name +
4393 "'; 'org' was used in the type's declaration");
4394 size_t Index = 0, Offset = 0;
4395 for (const auto &Init : Initializer.FieldInitializers) {
4396 const auto &Field = Structure.Fields[Index++];
4397 getStreamer().emitZeros(Field.Offset - Offset);
4398 Offset = Field.Offset + Field.SizeOf;
4399 if (emitFieldInitializer(Field, Init))
4400 return true;
4402 // Default-initialize all remaining fields.
4403 for (const auto &Field : llvm::drop_begin(
4404 Structure.Fields, Initializer.FieldInitializers.size())) {
4405 getStreamer().emitZeros(Field.Offset - Offset);
4406 Offset = Field.Offset + Field.SizeOf;
4407 if (emitFieldValue(Field))
4408 return true;
4410 // Add final padding.
4411 if (Offset != Structure.Size)
4412 getStreamer().emitZeros(Structure.Size - Offset);
4413 return false;
4416 // Set data values from initializers.
4417 bool MasmParser::emitStructValues(const StructInfo &Structure,
4418 unsigned *Count) {
4419 std::vector<StructInitializer> Initializers;
4420 if (parseStructInstList(Structure, Initializers))
4421 return true;
4423 for (const auto &Initializer : Initializers) {
4424 if (emitStructInitializer(Structure, Initializer))
4425 return true;
4428 if (Count)
4429 *Count = Initializers.size();
4430 return false;
4433 // Declare a field in the current struct.
4434 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4435 StructInfo &OwningStruct = StructInProgress.back();
4436 FieldInfo &Field =
4437 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4438 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4440 StructInfo.Structure = Structure;
4441 Field.Type = Structure.Size;
4443 if (parseStructInstList(Structure, StructInfo.Initializers))
4444 return true;
4446 Field.LengthOf = StructInfo.Initializers.size();
4447 Field.SizeOf = Field.Type * Field.LengthOf;
4449 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4450 if (!OwningStruct.IsUnion) {
4451 OwningStruct.NextOffset = FieldEnd;
4453 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4455 return false;
4458 /// parseDirectiveStructValue
4459 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4460 /// [, (<struct-initializer> | {struct-initializer})]*
4461 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4462 StringRef Directive, SMLoc DirLoc) {
4463 if (StructInProgress.empty()) {
4464 if (emitStructValues(Structure))
4465 return true;
4466 } else if (addStructField("", Structure)) {
4467 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4470 return false;
4473 /// parseDirectiveNamedValue
4474 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
4475 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4476 StringRef Directive,
4477 SMLoc DirLoc, StringRef Name) {
4478 if (StructInProgress.empty()) {
4479 // Initialize named data value.
4480 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4481 getStreamer().emitLabel(Sym);
4482 unsigned Count;
4483 if (emitStructValues(Structure, &Count))
4484 return true;
4485 AsmTypeInfo Type;
4486 Type.Name = Structure.Name;
4487 Type.Size = Structure.Size * Count;
4488 Type.ElementSize = Structure.Size;
4489 Type.Length = Count;
4490 KnownType[Name.lower()] = Type;
4491 } else if (addStructField(Name, Structure)) {
4492 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4495 return false;
4498 /// parseDirectiveStruct
4499 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4500 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4501 /// <name> ENDS
4502 ////// dataDir = data declaration
4503 ////// offsetDir = EVEN, ORG, ALIGN
4504 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4505 DirectiveKind DirKind, StringRef Name,
4506 SMLoc NameLoc) {
4507 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4508 // anyway, so all field accesses must be qualified.
4509 AsmToken NextTok = getTok();
4510 int64_t AlignmentValue = 1;
4511 if (NextTok.isNot(AsmToken::Comma) &&
4512 NextTok.isNot(AsmToken::EndOfStatement) &&
4513 parseAbsoluteExpression(AlignmentValue)) {
4514 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4515 "' directive");
4517 if (!isPowerOf2_64(AlignmentValue)) {
4518 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4519 std::to_string(AlignmentValue));
4522 StringRef Qualifier;
4523 SMLoc QualifierLoc;
4524 if (parseOptionalToken(AsmToken::Comma)) {
4525 QualifierLoc = getTok().getLoc();
4526 if (parseIdentifier(Qualifier))
4527 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4528 if (!Qualifier.equals_insensitive("nonunique"))
4529 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4530 Twine(Directive) +
4531 "' directive; expected none or NONUNIQUE");
4534 if (parseEOL())
4535 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4537 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4538 return false;
4541 /// parseDirectiveNestedStruct
4542 /// ::= (STRUC | STRUCT | UNION) [name]
4543 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4544 /// ENDS
4545 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4546 DirectiveKind DirKind) {
4547 if (StructInProgress.empty())
4548 return TokError("missing name in top-level '" + Twine(Directive) +
4549 "' directive");
4551 StringRef Name;
4552 if (getTok().is(AsmToken::Identifier)) {
4553 Name = getTok().getIdentifier();
4554 parseToken(AsmToken::Identifier);
4556 if (parseEOL())
4557 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4559 // Reserve space to ensure Alignment doesn't get invalidated when
4560 // StructInProgress grows.
4561 StructInProgress.reserve(StructInProgress.size() + 1);
4562 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4563 StructInProgress.back().Alignment);
4564 return false;
4567 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4568 if (StructInProgress.empty())
4569 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4570 if (StructInProgress.size() > 1)
4571 return Error(NameLoc, "unexpected name in nested ENDS directive");
4572 if (StructInProgress.back().Name.compare_insensitive(Name))
4573 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4574 StructInProgress.back().Name + "'");
4575 StructInfo Structure = StructInProgress.pop_back_val();
4576 // Pad to make the structure's size divisible by the smaller of its alignment
4577 // and the size of its largest field.
4578 Structure.Size = llvm::alignTo(
4579 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4580 Structs[Name.lower()] = Structure;
4582 if (parseEOL())
4583 return addErrorSuffix(" in ENDS directive");
4585 return false;
4588 bool MasmParser::parseDirectiveNestedEnds() {
4589 if (StructInProgress.empty())
4590 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4591 if (StructInProgress.size() == 1)
4592 return TokError("missing name in top-level ENDS directive");
4594 if (parseEOL())
4595 return addErrorSuffix(" in nested ENDS directive");
4597 StructInfo Structure = StructInProgress.pop_back_val();
4598 // Pad to make the structure's size divisible by its alignment.
4599 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4601 StructInfo &ParentStruct = StructInProgress.back();
4602 if (Structure.Name.empty()) {
4603 // Anonymous substructures' fields are addressed as if they belong to the
4604 // parent structure - so we transfer them to the parent here.
4605 const size_t OldFields = ParentStruct.Fields.size();
4606 ParentStruct.Fields.insert(
4607 ParentStruct.Fields.end(),
4608 std::make_move_iterator(Structure.Fields.begin()),
4609 std::make_move_iterator(Structure.Fields.end()));
4610 for (const auto &FieldByName : Structure.FieldsByName) {
4611 ParentStruct.FieldsByName[FieldByName.getKey()] =
4612 FieldByName.getValue() + OldFields;
4615 unsigned FirstFieldOffset = 0;
4616 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4617 FirstFieldOffset = llvm::alignTo(
4618 ParentStruct.NextOffset,
4619 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4622 if (ParentStruct.IsUnion) {
4623 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4624 } else {
4625 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4626 Field.Offset += FirstFieldOffset;
4628 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4629 if (!ParentStruct.IsUnion) {
4630 ParentStruct.NextOffset = StructureEnd;
4632 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4634 } else {
4635 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4636 Structure.AlignmentSize);
4637 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4638 Field.Type = Structure.Size;
4639 Field.LengthOf = 1;
4640 Field.SizeOf = Structure.Size;
4642 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4643 if (!ParentStruct.IsUnion) {
4644 ParentStruct.NextOffset = StructureEnd;
4646 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4648 StructInfo.Structure = Structure;
4649 StructInfo.Initializers.emplace_back();
4650 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4651 for (const auto &SubField : Structure.Fields) {
4652 FieldInitializers.push_back(SubField.Contents);
4656 return false;
4659 /// parseDirectiveOrg
4660 /// ::= org expression
4661 bool MasmParser::parseDirectiveOrg() {
4662 const MCExpr *Offset;
4663 SMLoc OffsetLoc = Lexer.getLoc();
4664 if (checkForValidSection() || parseExpression(Offset))
4665 return true;
4666 if (parseEOL())
4667 return addErrorSuffix(" in 'org' directive");
4669 if (StructInProgress.empty()) {
4670 // Not in a struct; change the offset for the next instruction or data
4671 if (checkForValidSection())
4672 return addErrorSuffix(" in 'org' directive");
4674 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4675 } else {
4676 // Offset the next field of this struct
4677 StructInfo &Structure = StructInProgress.back();
4678 int64_t OffsetRes;
4679 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4680 return Error(OffsetLoc,
4681 "expected absolute expression in 'org' directive");
4682 if (OffsetRes < 0)
4683 return Error(
4684 OffsetLoc,
4685 "expected non-negative value in struct's 'org' directive; was " +
4686 std::to_string(OffsetRes));
4687 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4689 // ORG-affected structures cannot be initialized
4690 Structure.Initializable = false;
4693 return false;
4696 bool MasmParser::emitAlignTo(int64_t Alignment) {
4697 if (StructInProgress.empty()) {
4698 // Not in a struct; align the next instruction or data
4699 if (checkForValidSection())
4700 return true;
4702 // Check whether we should use optimal code alignment for this align
4703 // directive.
4704 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4705 assert(Section && "must have section to emit alignment");
4706 if (Section->useCodeAlign()) {
4707 getStreamer().emitCodeAlignment(Align(Alignment),
4708 &getTargetParser().getSTI(),
4709 /*MaxBytesToEmit=*/0);
4710 } else {
4711 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4712 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4713 /*ValueSize=*/1,
4714 /*MaxBytesToEmit=*/0);
4716 } else {
4717 // Align the next field of this struct
4718 StructInfo &Structure = StructInProgress.back();
4719 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4722 return false;
4725 /// parseDirectiveAlign
4726 /// ::= align expression
4727 bool MasmParser::parseDirectiveAlign() {
4728 SMLoc AlignmentLoc = getLexer().getLoc();
4729 int64_t Alignment;
4731 // Ignore empty 'align' directives.
4732 if (getTok().is(AsmToken::EndOfStatement)) {
4733 return Warning(AlignmentLoc,
4734 "align directive with no operand is ignored") &&
4735 parseEOL();
4737 if (parseAbsoluteExpression(Alignment) || parseEOL())
4738 return addErrorSuffix(" in align directive");
4740 // Always emit an alignment here even if we throw an error.
4741 bool ReturnVal = false;
4743 // Reject alignments that aren't either a power of two or zero, for ML.exe
4744 // compatibility. Alignment of zero is silently rounded up to one.
4745 if (Alignment == 0)
4746 Alignment = 1;
4747 if (!isPowerOf2_64(Alignment))
4748 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4749 std::to_string(Alignment));
4751 if (emitAlignTo(Alignment))
4752 ReturnVal |= addErrorSuffix(" in align directive");
4754 return ReturnVal;
4757 /// parseDirectiveEven
4758 /// ::= even
4759 bool MasmParser::parseDirectiveEven() {
4760 if (parseEOL() || emitAlignTo(2))
4761 return addErrorSuffix(" in even directive");
4763 return false;
4766 /// parseDirectiveFile
4767 /// ::= .file filename
4768 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
4769 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4770 // FIXME: I'm not sure what this is.
4771 int64_t FileNumber = -1;
4772 if (getLexer().is(AsmToken::Integer)) {
4773 FileNumber = getTok().getIntVal();
4774 Lex();
4776 if (FileNumber < 0)
4777 return TokError("negative file number");
4780 std::string Path;
4782 // Usually the directory and filename together, otherwise just the directory.
4783 // Allow the strings to have escaped octal character sequence.
4784 if (check(getTok().isNot(AsmToken::String),
4785 "unexpected token in '.file' directive") ||
4786 parseEscapedString(Path))
4787 return true;
4789 StringRef Directory;
4790 StringRef Filename;
4791 std::string FilenameData;
4792 if (getLexer().is(AsmToken::String)) {
4793 if (check(FileNumber == -1,
4794 "explicit path specified, but no file number") ||
4795 parseEscapedString(FilenameData))
4796 return true;
4797 Filename = FilenameData;
4798 Directory = Path;
4799 } else {
4800 Filename = Path;
4803 uint64_t MD5Hi, MD5Lo;
4804 bool HasMD5 = false;
4806 std::optional<StringRef> Source;
4807 bool HasSource = false;
4808 std::string SourceString;
4810 while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4811 StringRef Keyword;
4812 if (check(getTok().isNot(AsmToken::Identifier),
4813 "unexpected token in '.file' directive") ||
4814 parseIdentifier(Keyword))
4815 return true;
4816 if (Keyword == "md5") {
4817 HasMD5 = true;
4818 if (check(FileNumber == -1,
4819 "MD5 checksum specified, but no file number") ||
4820 parseHexOcta(*this, MD5Hi, MD5Lo))
4821 return true;
4822 } else if (Keyword == "source") {
4823 HasSource = true;
4824 if (check(FileNumber == -1,
4825 "source specified, but no file number") ||
4826 check(getTok().isNot(AsmToken::String),
4827 "unexpected token in '.file' directive") ||
4828 parseEscapedString(SourceString))
4829 return true;
4830 } else {
4831 return TokError("unexpected token in '.file' directive");
4835 if (FileNumber == -1) {
4836 // Ignore the directive if there is no number and the target doesn't support
4837 // numberless .file directives. This allows some portability of assembler
4838 // between different object file formats.
4839 if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4840 getStreamer().emitFileDirective(Filename);
4841 } else {
4842 // In case there is a -g option as well as debug info from directive .file,
4843 // we turn off the -g option, directly use the existing debug info instead.
4844 // Throw away any implicit file table for the assembler source.
4845 if (Ctx.getGenDwarfForAssembly()) {
4846 Ctx.getMCDwarfLineTable(0).resetFileTable();
4847 Ctx.setGenDwarfForAssembly(false);
4850 std::optional<MD5::MD5Result> CKMem;
4851 if (HasMD5) {
4852 MD5::MD5Result Sum;
4853 for (unsigned i = 0; i != 8; ++i) {
4854 Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4855 Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4857 CKMem = Sum;
4859 if (HasSource) {
4860 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4861 memcpy(SourceBuf, SourceString.data(), SourceString.size());
4862 Source = StringRef(SourceBuf, SourceString.size());
4864 if (FileNumber == 0) {
4865 if (Ctx.getDwarfVersion() < 5)
4866 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4867 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4868 } else {
4869 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4870 FileNumber, Directory, Filename, CKMem, Source);
4871 if (!FileNumOrErr)
4872 return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4874 // Alert the user if there are some .file directives with MD5 and some not.
4875 // But only do that once.
4876 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4877 ReportedInconsistentMD5 = true;
4878 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4882 return false;
4885 /// parseDirectiveLine
4886 /// ::= .line [number]
4887 bool MasmParser::parseDirectiveLine() {
4888 int64_t LineNumber;
4889 if (getLexer().is(AsmToken::Integer)) {
4890 if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4891 return true;
4892 (void)LineNumber;
4893 // FIXME: Do something with the .line.
4895 if (parseEOL())
4896 return true;
4898 return false;
4901 /// parseDirectiveLoc
4902 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4903 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4904 /// The first number is a file number, must have been previously assigned with
4905 /// a .file directive, the second number is the line number and optionally the
4906 /// third number is a column position (zero if not specified). The remaining
4907 /// optional items are .loc sub-directives.
4908 bool MasmParser::parseDirectiveLoc() {
4909 int64_t FileNumber = 0, LineNumber = 0;
4910 SMLoc Loc = getTok().getLoc();
4911 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4912 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4913 "file number less than one in '.loc' directive") ||
4914 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4915 "unassigned file number in '.loc' directive"))
4916 return true;
4918 // optional
4919 if (getLexer().is(AsmToken::Integer)) {
4920 LineNumber = getTok().getIntVal();
4921 if (LineNumber < 0)
4922 return TokError("line number less than zero in '.loc' directive");
4923 Lex();
4926 int64_t ColumnPos = 0;
4927 if (getLexer().is(AsmToken::Integer)) {
4928 ColumnPos = getTok().getIntVal();
4929 if (ColumnPos < 0)
4930 return TokError("column position less than zero in '.loc' directive");
4931 Lex();
4934 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4935 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4936 unsigned Isa = 0;
4937 int64_t Discriminator = 0;
4939 auto parseLocOp = [&]() -> bool {
4940 StringRef Name;
4941 SMLoc Loc = getTok().getLoc();
4942 if (parseIdentifier(Name))
4943 return TokError("unexpected token in '.loc' directive");
4945 if (Name == "basic_block")
4946 Flags |= DWARF2_FLAG_BASIC_BLOCK;
4947 else if (Name == "prologue_end")
4948 Flags |= DWARF2_FLAG_PROLOGUE_END;
4949 else if (Name == "epilogue_begin")
4950 Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4951 else if (Name == "is_stmt") {
4952 Loc = getTok().getLoc();
4953 const MCExpr *Value;
4954 if (parseExpression(Value))
4955 return true;
4956 // The expression must be the constant 0 or 1.
4957 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4958 int Value = MCE->getValue();
4959 if (Value == 0)
4960 Flags &= ~DWARF2_FLAG_IS_STMT;
4961 else if (Value == 1)
4962 Flags |= DWARF2_FLAG_IS_STMT;
4963 else
4964 return Error(Loc, "is_stmt value not 0 or 1");
4965 } else {
4966 return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4968 } else if (Name == "isa") {
4969 Loc = getTok().getLoc();
4970 const MCExpr *Value;
4971 if (parseExpression(Value))
4972 return true;
4973 // The expression must be a constant greater or equal to 0.
4974 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4975 int Value = MCE->getValue();
4976 if (Value < 0)
4977 return Error(Loc, "isa number less than zero");
4978 Isa = Value;
4979 } else {
4980 return Error(Loc, "isa number not a constant value");
4982 } else if (Name == "discriminator") {
4983 if (parseAbsoluteExpression(Discriminator))
4984 return true;
4985 } else {
4986 return Error(Loc, "unknown sub-directive in '.loc' directive");
4988 return false;
4991 if (parseMany(parseLocOp, false /*hasComma*/))
4992 return true;
4994 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4995 Isa, Discriminator, StringRef());
4997 return false;
5000 /// parseDirectiveStabs
5001 /// ::= .stabs string, number, number, number
5002 bool MasmParser::parseDirectiveStabs() {
5003 return TokError("unsupported directive '.stabs'");
5006 /// parseDirectiveCVFile
5007 /// ::= .cv_file number filename [checksum] [checksumkind]
5008 bool MasmParser::parseDirectiveCVFile() {
5009 SMLoc FileNumberLoc = getTok().getLoc();
5010 int64_t FileNumber;
5011 std::string Filename;
5012 std::string Checksum;
5013 int64_t ChecksumKind = 0;
5015 if (parseIntToken(FileNumber,
5016 "expected file number in '.cv_file' directive") ||
5017 check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5018 check(getTok().isNot(AsmToken::String),
5019 "unexpected token in '.cv_file' directive") ||
5020 parseEscapedString(Filename))
5021 return true;
5022 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5023 if (check(getTok().isNot(AsmToken::String),
5024 "unexpected token in '.cv_file' directive") ||
5025 parseEscapedString(Checksum) ||
5026 parseIntToken(ChecksumKind,
5027 "expected checksum kind in '.cv_file' directive") ||
5028 parseEOL())
5029 return true;
5032 Checksum = fromHex(Checksum);
5033 void *CKMem = Ctx.allocate(Checksum.size(), 1);
5034 memcpy(CKMem, Checksum.data(), Checksum.size());
5035 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5036 Checksum.size());
5038 if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5039 static_cast<uint8_t>(ChecksumKind)))
5040 return Error(FileNumberLoc, "file number already allocated");
5042 return false;
5045 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5046 StringRef DirectiveName) {
5047 SMLoc Loc;
5048 return parseTokenLoc(Loc) ||
5049 parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5050 "' directive") ||
5051 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5052 "expected function id within range [0, UINT_MAX)");
5055 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5056 SMLoc Loc;
5057 return parseTokenLoc(Loc) ||
5058 parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5059 "' directive") ||
5060 check(FileNumber < 1, Loc, "file number less than one in '" +
5061 DirectiveName + "' directive") ||
5062 check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5063 "unassigned file number in '" + DirectiveName + "' directive");
5066 /// parseDirectiveCVFuncId
5067 /// ::= .cv_func_id FunctionId
5069 /// Introduces a function ID that can be used with .cv_loc.
5070 bool MasmParser::parseDirectiveCVFuncId() {
5071 SMLoc FunctionIdLoc = getTok().getLoc();
5072 int64_t FunctionId;
5074 if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5075 return true;
5077 if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5078 return Error(FunctionIdLoc, "function id already allocated");
5080 return false;
5083 /// parseDirectiveCVInlineSiteId
5084 /// ::= .cv_inline_site_id FunctionId
5085 /// "within" IAFunc
5086 /// "inlined_at" IAFile IALine [IACol]
5088 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5089 /// at" source location information for use in the line table of the caller,
5090 /// whether the caller is a real function or another inlined call site.
5091 bool MasmParser::parseDirectiveCVInlineSiteId() {
5092 SMLoc FunctionIdLoc = getTok().getLoc();
5093 int64_t FunctionId;
5094 int64_t IAFunc;
5095 int64_t IAFile;
5096 int64_t IALine;
5097 int64_t IACol = 0;
5099 // FunctionId
5100 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5101 return true;
5103 // "within"
5104 if (check((getLexer().isNot(AsmToken::Identifier) ||
5105 getTok().getIdentifier() != "within"),
5106 "expected 'within' identifier in '.cv_inline_site_id' directive"))
5107 return true;
5108 Lex();
5110 // IAFunc
5111 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5112 return true;
5114 // "inlined_at"
5115 if (check((getLexer().isNot(AsmToken::Identifier) ||
5116 getTok().getIdentifier() != "inlined_at"),
5117 "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5118 "directive") )
5119 return true;
5120 Lex();
5122 // IAFile IALine
5123 if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5124 parseIntToken(IALine, "expected line number after 'inlined_at'"))
5125 return true;
5127 // [IACol]
5128 if (getLexer().is(AsmToken::Integer)) {
5129 IACol = getTok().getIntVal();
5130 Lex();
5133 if (parseEOL())
5134 return true;
5136 if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5137 IALine, IACol, FunctionIdLoc))
5138 return Error(FunctionIdLoc, "function id already allocated");
5140 return false;
5143 /// parseDirectiveCVLoc
5144 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5145 /// [is_stmt VALUE]
5146 /// The first number is a file number, must have been previously assigned with
5147 /// a .file directive, the second number is the line number and optionally the
5148 /// third number is a column position (zero if not specified). The remaining
5149 /// optional items are .loc sub-directives.
5150 bool MasmParser::parseDirectiveCVLoc() {
5151 SMLoc DirectiveLoc = getTok().getLoc();
5152 int64_t FunctionId, FileNumber;
5153 if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5154 parseCVFileId(FileNumber, ".cv_loc"))
5155 return true;
5157 int64_t LineNumber = 0;
5158 if (getLexer().is(AsmToken::Integer)) {
5159 LineNumber = getTok().getIntVal();
5160 if (LineNumber < 0)
5161 return TokError("line number less than zero in '.cv_loc' directive");
5162 Lex();
5165 int64_t ColumnPos = 0;
5166 if (getLexer().is(AsmToken::Integer)) {
5167 ColumnPos = getTok().getIntVal();
5168 if (ColumnPos < 0)
5169 return TokError("column position less than zero in '.cv_loc' directive");
5170 Lex();
5173 bool PrologueEnd = false;
5174 uint64_t IsStmt = 0;
5176 auto parseOp = [&]() -> bool {
5177 StringRef Name;
5178 SMLoc Loc = getTok().getLoc();
5179 if (parseIdentifier(Name))
5180 return TokError("unexpected token in '.cv_loc' directive");
5181 if (Name == "prologue_end")
5182 PrologueEnd = true;
5183 else if (Name == "is_stmt") {
5184 Loc = getTok().getLoc();
5185 const MCExpr *Value;
5186 if (parseExpression(Value))
5187 return true;
5188 // The expression must be the constant 0 or 1.
5189 IsStmt = ~0ULL;
5190 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5191 IsStmt = MCE->getValue();
5193 if (IsStmt > 1)
5194 return Error(Loc, "is_stmt value not 0 or 1");
5195 } else {
5196 return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5198 return false;
5201 if (parseMany(parseOp, false /*hasComma*/))
5202 return true;
5204 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5205 ColumnPos, PrologueEnd, IsStmt, StringRef(),
5206 DirectiveLoc);
5207 return false;
5210 /// parseDirectiveCVLinetable
5211 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
5212 bool MasmParser::parseDirectiveCVLinetable() {
5213 int64_t FunctionId;
5214 StringRef FnStartName, FnEndName;
5215 SMLoc Loc = getTok().getLoc();
5216 if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5217 parseToken(AsmToken::Comma,
5218 "unexpected token in '.cv_linetable' directive") ||
5219 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5220 "expected identifier in directive") ||
5221 parseToken(AsmToken::Comma,
5222 "unexpected token in '.cv_linetable' directive") ||
5223 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5224 "expected identifier in directive"))
5225 return true;
5227 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5228 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5230 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5231 return false;
5234 /// parseDirectiveCVInlineLinetable
5235 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5236 bool MasmParser::parseDirectiveCVInlineLinetable() {
5237 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5238 StringRef FnStartName, FnEndName;
5239 SMLoc Loc = getTok().getLoc();
5240 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5241 parseTokenLoc(Loc) ||
5242 parseIntToken(
5243 SourceFileId,
5244 "expected SourceField in '.cv_inline_linetable' directive") ||
5245 check(SourceFileId <= 0, Loc,
5246 "File id less than zero in '.cv_inline_linetable' directive") ||
5247 parseTokenLoc(Loc) ||
5248 parseIntToken(
5249 SourceLineNum,
5250 "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5251 check(SourceLineNum < 0, Loc,
5252 "Line number less than zero in '.cv_inline_linetable' directive") ||
5253 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5254 "expected identifier in directive") ||
5255 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5256 "expected identifier in directive"))
5257 return true;
5259 if (parseEOL())
5260 return true;
5262 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5263 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5264 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5265 SourceLineNum, FnStartSym,
5266 FnEndSym);
5267 return false;
5270 void MasmParser::initializeCVDefRangeTypeMap() {
5271 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5272 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5273 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5274 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5277 /// parseDirectiveCVDefRange
5278 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5279 bool MasmParser::parseDirectiveCVDefRange() {
5280 SMLoc Loc;
5281 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5282 while (getLexer().is(AsmToken::Identifier)) {
5283 Loc = getLexer().getLoc();
5284 StringRef GapStartName;
5285 if (parseIdentifier(GapStartName))
5286 return Error(Loc, "expected identifier in directive");
5287 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5289 Loc = getLexer().getLoc();
5290 StringRef GapEndName;
5291 if (parseIdentifier(GapEndName))
5292 return Error(Loc, "expected identifier in directive");
5293 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5295 Ranges.push_back({GapStartSym, GapEndSym});
5298 StringRef CVDefRangeTypeStr;
5299 if (parseToken(
5300 AsmToken::Comma,
5301 "expected comma before def_range type in .cv_def_range directive") ||
5302 parseIdentifier(CVDefRangeTypeStr))
5303 return Error(Loc, "expected def_range type in directive");
5305 StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5306 CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5307 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5308 ? CVDR_DEFRANGE
5309 : CVTypeIt->getValue();
5310 switch (CVDRType) {
5311 case CVDR_DEFRANGE_REGISTER: {
5312 int64_t DRRegister;
5313 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5314 ".cv_def_range directive") ||
5315 parseAbsoluteExpression(DRRegister))
5316 return Error(Loc, "expected register number");
5318 codeview::DefRangeRegisterHeader DRHdr;
5319 DRHdr.Register = DRRegister;
5320 DRHdr.MayHaveNoName = 0;
5321 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5322 break;
5324 case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5325 int64_t DROffset;
5326 if (parseToken(AsmToken::Comma,
5327 "expected comma before offset in .cv_def_range directive") ||
5328 parseAbsoluteExpression(DROffset))
5329 return Error(Loc, "expected offset value");
5331 codeview::DefRangeFramePointerRelHeader DRHdr;
5332 DRHdr.Offset = DROffset;
5333 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5334 break;
5336 case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5337 int64_t DRRegister;
5338 int64_t DROffsetInParent;
5339 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5340 ".cv_def_range directive") ||
5341 parseAbsoluteExpression(DRRegister))
5342 return Error(Loc, "expected register number");
5343 if (parseToken(AsmToken::Comma,
5344 "expected comma before offset in .cv_def_range directive") ||
5345 parseAbsoluteExpression(DROffsetInParent))
5346 return Error(Loc, "expected offset value");
5348 codeview::DefRangeSubfieldRegisterHeader DRHdr;
5349 DRHdr.Register = DRRegister;
5350 DRHdr.MayHaveNoName = 0;
5351 DRHdr.OffsetInParent = DROffsetInParent;
5352 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5353 break;
5355 case CVDR_DEFRANGE_REGISTER_REL: {
5356 int64_t DRRegister;
5357 int64_t DRFlags;
5358 int64_t DRBasePointerOffset;
5359 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5360 ".cv_def_range directive") ||
5361 parseAbsoluteExpression(DRRegister))
5362 return Error(Loc, "expected register value");
5363 if (parseToken(
5364 AsmToken::Comma,
5365 "expected comma before flag value in .cv_def_range directive") ||
5366 parseAbsoluteExpression(DRFlags))
5367 return Error(Loc, "expected flag value");
5368 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5369 "in .cv_def_range directive") ||
5370 parseAbsoluteExpression(DRBasePointerOffset))
5371 return Error(Loc, "expected base pointer offset value");
5373 codeview::DefRangeRegisterRelHeader DRHdr;
5374 DRHdr.Register = DRRegister;
5375 DRHdr.Flags = DRFlags;
5376 DRHdr.BasePointerOffset = DRBasePointerOffset;
5377 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5378 break;
5380 default:
5381 return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5383 return true;
5386 /// parseDirectiveCVString
5387 /// ::= .cv_stringtable "string"
5388 bool MasmParser::parseDirectiveCVString() {
5389 std::string Data;
5390 if (checkForValidSection() || parseEscapedString(Data))
5391 return addErrorSuffix(" in '.cv_string' directive");
5393 // Put the string in the table and emit the offset.
5394 std::pair<StringRef, unsigned> Insertion =
5395 getCVContext().addToStringTable(Data);
5396 getStreamer().emitIntValue(Insertion.second, 4);
5397 return false;
5400 /// parseDirectiveCVStringTable
5401 /// ::= .cv_stringtable
5402 bool MasmParser::parseDirectiveCVStringTable() {
5403 getStreamer().emitCVStringTableDirective();
5404 return false;
5407 /// parseDirectiveCVFileChecksums
5408 /// ::= .cv_filechecksums
5409 bool MasmParser::parseDirectiveCVFileChecksums() {
5410 getStreamer().emitCVFileChecksumsDirective();
5411 return false;
5414 /// parseDirectiveCVFileChecksumOffset
5415 /// ::= .cv_filechecksumoffset fileno
5416 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5417 int64_t FileNo;
5418 if (parseIntToken(FileNo, "expected identifier in directive"))
5419 return true;
5420 if (parseEOL())
5421 return true;
5422 getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5423 return false;
5426 /// parseDirectiveCVFPOData
5427 /// ::= .cv_fpo_data procsym
5428 bool MasmParser::parseDirectiveCVFPOData() {
5429 SMLoc DirLoc = getLexer().getLoc();
5430 StringRef ProcName;
5431 if (parseIdentifier(ProcName))
5432 return TokError("expected symbol name");
5433 if (parseEOL("unexpected tokens"))
5434 return addErrorSuffix(" in '.cv_fpo_data' directive");
5435 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5436 getStreamer().emitCVFPOData(ProcSym, DirLoc);
5437 return false;
5440 /// parseDirectiveCFISections
5441 /// ::= .cfi_sections section [, section]
5442 bool MasmParser::parseDirectiveCFISections() {
5443 StringRef Name;
5444 bool EH = false;
5445 bool Debug = false;
5447 if (parseIdentifier(Name))
5448 return TokError("Expected an identifier");
5450 if (Name == ".eh_frame")
5451 EH = true;
5452 else if (Name == ".debug_frame")
5453 Debug = true;
5455 if (getLexer().is(AsmToken::Comma)) {
5456 Lex();
5458 if (parseIdentifier(Name))
5459 return TokError("Expected an identifier");
5461 if (Name == ".eh_frame")
5462 EH = true;
5463 else if (Name == ".debug_frame")
5464 Debug = true;
5467 getStreamer().emitCFISections(EH, Debug);
5468 return false;
5471 /// parseDirectiveCFIStartProc
5472 /// ::= .cfi_startproc [simple]
5473 bool MasmParser::parseDirectiveCFIStartProc() {
5474 StringRef Simple;
5475 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5476 if (check(parseIdentifier(Simple) || Simple != "simple",
5477 "unexpected token") ||
5478 parseEOL())
5479 return addErrorSuffix(" in '.cfi_startproc' directive");
5482 // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5483 // being produced if this directive is emitted as part of preprocessor macro
5484 // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5485 // Tools like llvm-mc on the other hand are not affected by it, and report
5486 // correct context information.
5487 getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5488 return false;
5491 /// parseDirectiveCFIEndProc
5492 /// ::= .cfi_endproc
5493 bool MasmParser::parseDirectiveCFIEndProc() {
5494 getStreamer().emitCFIEndProc();
5495 return false;
5498 /// parse register name or number.
5499 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5500 SMLoc DirectiveLoc) {
5501 MCRegister RegNo;
5503 if (getLexer().isNot(AsmToken::Integer)) {
5504 if (getTargetParser().parseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5505 return true;
5506 Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5507 } else
5508 return parseAbsoluteExpression(Register);
5510 return false;
5513 /// parseDirectiveCFIDefCfa
5514 /// ::= .cfi_def_cfa register, offset
5515 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5516 int64_t Register = 0, Offset = 0;
5517 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5518 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5519 parseAbsoluteExpression(Offset))
5520 return true;
5522 getStreamer().emitCFIDefCfa(Register, Offset);
5523 return false;
5526 /// parseDirectiveCFIDefCfaOffset
5527 /// ::= .cfi_def_cfa_offset offset
5528 bool MasmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
5529 int64_t Offset = 0;
5530 if (parseAbsoluteExpression(Offset))
5531 return true;
5533 getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
5534 return false;
5537 /// parseDirectiveCFIRegister
5538 /// ::= .cfi_register register, register
5539 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5540 int64_t Register1 = 0, Register2 = 0;
5541 if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5542 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5543 parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5544 return true;
5546 getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
5547 return false;
5550 /// parseDirectiveCFIWindowSave
5551 /// ::= .cfi_window_save
5552 bool MasmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
5553 getStreamer().emitCFIWindowSave(DirectiveLoc);
5554 return false;
5557 /// parseDirectiveCFIAdjustCfaOffset
5558 /// ::= .cfi_adjust_cfa_offset adjustment
5559 bool MasmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
5560 int64_t Adjustment = 0;
5561 if (parseAbsoluteExpression(Adjustment))
5562 return true;
5564 getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
5565 return false;
5568 /// parseDirectiveCFIDefCfaRegister
5569 /// ::= .cfi_def_cfa_register register
5570 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5571 int64_t Register = 0;
5572 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5573 return true;
5575 getStreamer().emitCFIDefCfaRegister(Register);
5576 return false;
5579 /// parseDirectiveCFIOffset
5580 /// ::= .cfi_offset register, offset
5581 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5582 int64_t Register = 0;
5583 int64_t Offset = 0;
5585 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5586 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5587 parseAbsoluteExpression(Offset))
5588 return true;
5590 getStreamer().emitCFIOffset(Register, Offset);
5591 return false;
5594 /// parseDirectiveCFIRelOffset
5595 /// ::= .cfi_rel_offset register, offset
5596 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5597 int64_t Register = 0, Offset = 0;
5599 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5600 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5601 parseAbsoluteExpression(Offset))
5602 return true;
5604 getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
5605 return false;
5608 static bool isValidEncoding(int64_t Encoding) {
5609 if (Encoding & ~0xff)
5610 return false;
5612 if (Encoding == dwarf::DW_EH_PE_omit)
5613 return true;
5615 const unsigned Format = Encoding & 0xf;
5616 if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5617 Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5618 Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5619 Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5620 return false;
5622 const unsigned Application = Encoding & 0x70;
5623 if (Application != dwarf::DW_EH_PE_absptr &&
5624 Application != dwarf::DW_EH_PE_pcrel)
5625 return false;
5627 return true;
5630 /// parseDirectiveCFIPersonalityOrLsda
5631 /// IsPersonality true for cfi_personality, false for cfi_lsda
5632 /// ::= .cfi_personality encoding, [symbol_name]
5633 /// ::= .cfi_lsda encoding, [symbol_name]
5634 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5635 int64_t Encoding = 0;
5636 if (parseAbsoluteExpression(Encoding))
5637 return true;
5638 if (Encoding == dwarf::DW_EH_PE_omit)
5639 return false;
5641 StringRef Name;
5642 if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5643 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5644 check(parseIdentifier(Name), "expected identifier in directive"))
5645 return true;
5647 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5649 if (IsPersonality)
5650 getStreamer().emitCFIPersonality(Sym, Encoding);
5651 else
5652 getStreamer().emitCFILsda(Sym, Encoding);
5653 return false;
5656 /// parseDirectiveCFIRememberState
5657 /// ::= .cfi_remember_state
5658 bool MasmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
5659 getStreamer().emitCFIRememberState(DirectiveLoc);
5660 return false;
5663 /// parseDirectiveCFIRestoreState
5664 /// ::= .cfi_remember_state
5665 bool MasmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
5666 getStreamer().emitCFIRestoreState(DirectiveLoc);
5667 return false;
5670 /// parseDirectiveCFISameValue
5671 /// ::= .cfi_same_value register
5672 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5673 int64_t Register = 0;
5675 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5676 return true;
5678 getStreamer().emitCFISameValue(Register, DirectiveLoc);
5679 return false;
5682 /// parseDirectiveCFIRestore
5683 /// ::= .cfi_restore register
5684 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5685 int64_t Register = 0;
5686 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5687 return true;
5689 getStreamer().emitCFIRestore(Register);
5690 return false;
5693 /// parseDirectiveCFIEscape
5694 /// ::= .cfi_escape expression[,...]
5695 bool MasmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
5696 std::string Values;
5697 int64_t CurrValue;
5698 if (parseAbsoluteExpression(CurrValue))
5699 return true;
5701 Values.push_back((uint8_t)CurrValue);
5703 while (getLexer().is(AsmToken::Comma)) {
5704 Lex();
5706 if (parseAbsoluteExpression(CurrValue))
5707 return true;
5709 Values.push_back((uint8_t)CurrValue);
5712 getStreamer().emitCFIEscape(Values, DirectiveLoc);
5713 return false;
5716 /// parseDirectiveCFIReturnColumn
5717 /// ::= .cfi_return_column register
5718 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5719 int64_t Register = 0;
5720 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5721 return true;
5722 getStreamer().emitCFIReturnColumn(Register);
5723 return false;
5726 /// parseDirectiveCFISignalFrame
5727 /// ::= .cfi_signal_frame
5728 bool MasmParser::parseDirectiveCFISignalFrame() {
5729 if (parseEOL())
5730 return true;
5732 getStreamer().emitCFISignalFrame();
5733 return false;
5736 /// parseDirectiveCFIUndefined
5737 /// ::= .cfi_undefined register
5738 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5739 int64_t Register = 0;
5741 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5742 return true;
5744 getStreamer().emitCFIUndefined(Register);
5745 return false;
5748 /// parseDirectiveMacro
5749 /// ::= name macro [parameters]
5750 /// ["LOCAL" identifiers]
5751 /// parameters ::= parameter [, parameter]*
5752 /// parameter ::= name ":" qualifier
5753 /// qualifier ::= "req" | "vararg" | "=" macro_argument
5754 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5755 MCAsmMacroParameters Parameters;
5756 while (getLexer().isNot(AsmToken::EndOfStatement)) {
5757 if (!Parameters.empty() && Parameters.back().Vararg)
5758 return Error(Lexer.getLoc(),
5759 "Vararg parameter '" + Parameters.back().Name +
5760 "' should be last in the list of parameters");
5762 MCAsmMacroParameter Parameter;
5763 if (parseIdentifier(Parameter.Name))
5764 return TokError("expected identifier in 'macro' directive");
5766 // Emit an error if two (or more) named parameters share the same name.
5767 for (const MCAsmMacroParameter& CurrParam : Parameters)
5768 if (CurrParam.Name.equals_insensitive(Parameter.Name))
5769 return TokError("macro '" + Name + "' has multiple parameters"
5770 " named '" + Parameter.Name + "'");
5772 if (Lexer.is(AsmToken::Colon)) {
5773 Lex(); // consume ':'
5775 if (parseOptionalToken(AsmToken::Equal)) {
5776 // Default value
5777 SMLoc ParamLoc;
5779 ParamLoc = Lexer.getLoc();
5780 if (parseMacroArgument(nullptr, Parameter.Value))
5781 return true;
5782 } else {
5783 SMLoc QualLoc;
5784 StringRef Qualifier;
5786 QualLoc = Lexer.getLoc();
5787 if (parseIdentifier(Qualifier))
5788 return Error(QualLoc, "missing parameter qualifier for "
5789 "'" +
5790 Parameter.Name + "' in macro '" + Name +
5791 "'");
5793 if (Qualifier.equals_insensitive("req"))
5794 Parameter.Required = true;
5795 else if (Qualifier.equals_insensitive("vararg"))
5796 Parameter.Vararg = true;
5797 else
5798 return Error(QualLoc,
5799 Qualifier + " is not a valid parameter qualifier for '" +
5800 Parameter.Name + "' in macro '" + Name + "'");
5804 Parameters.push_back(std::move(Parameter));
5806 if (getLexer().is(AsmToken::Comma))
5807 Lex();
5810 // Eat just the end of statement.
5811 Lexer.Lex();
5813 std::vector<std::string> Locals;
5814 if (getTok().is(AsmToken::Identifier) &&
5815 getTok().getIdentifier().equals_insensitive("local")) {
5816 Lex(); // Eat the LOCAL directive.
5818 StringRef ID;
5819 while (true) {
5820 if (parseIdentifier(ID))
5821 return true;
5822 Locals.push_back(ID.lower());
5824 // If we see a comma, continue (and allow line continuation).
5825 if (!parseOptionalToken(AsmToken::Comma))
5826 break;
5827 parseOptionalToken(AsmToken::EndOfStatement);
5831 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5832 AsmToken EndToken, StartToken = getTok();
5833 unsigned MacroDepth = 0;
5834 bool IsMacroFunction = false;
5835 // Lex the macro definition.
5836 while (true) {
5837 // Ignore Lexing errors in macros.
5838 while (Lexer.is(AsmToken::Error)) {
5839 Lexer.Lex();
5842 // Check whether we have reached the end of the file.
5843 if (getLexer().is(AsmToken::Eof))
5844 return Error(NameLoc, "no matching 'endm' in definition");
5846 // Otherwise, check whether we have reached the 'endm'... and determine if
5847 // this is a macro function.
5848 if (getLexer().is(AsmToken::Identifier)) {
5849 if (getTok().getIdentifier().equals_insensitive("endm")) {
5850 if (MacroDepth == 0) { // Outermost macro.
5851 EndToken = getTok();
5852 Lexer.Lex();
5853 if (getLexer().isNot(AsmToken::EndOfStatement))
5854 return TokError("unexpected token in '" + EndToken.getIdentifier() +
5855 "' directive");
5856 break;
5857 } else {
5858 // Otherwise we just found the end of an inner macro.
5859 --MacroDepth;
5861 } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
5862 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
5863 IsMacroFunction = true;
5865 } else if (isMacroLikeDirective()) {
5866 // We allow nested macros. Those aren't instantiated until the
5867 // outermost macro is expanded so just ignore them for now.
5868 ++MacroDepth;
5872 // Otherwise, scan til the end of the statement.
5873 eatToEndOfStatement();
5876 if (getContext().lookupMacro(Name.lower())) {
5877 return Error(NameLoc, "macro '" + Name + "' is already defined");
5880 const char *BodyStart = StartToken.getLoc().getPointer();
5881 const char *BodyEnd = EndToken.getLoc().getPointer();
5882 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5883 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5884 IsMacroFunction);
5885 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5886 Macro.dump());
5887 getContext().defineMacro(Name.lower(), std::move(Macro));
5888 return false;
5891 /// parseDirectiveExitMacro
5892 /// ::= "exitm" [textitem]
5893 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5894 StringRef Directive,
5895 std::string &Value) {
5896 SMLoc EndLoc = getTok().getLoc();
5897 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5898 return Error(EndLoc,
5899 "unable to parse text item in '" + Directive + "' directive");
5900 eatToEndOfStatement();
5902 if (!isInsideMacroInstantiation())
5903 return TokError("unexpected '" + Directive + "' in file, "
5904 "no current macro definition");
5906 // Exit all conditionals that are active in the current macro.
5907 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5908 TheCondState = TheCondStack.back();
5909 TheCondStack.pop_back();
5912 handleMacroExit();
5913 return false;
5916 /// parseDirectiveEndMacro
5917 /// ::= endm
5918 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5919 if (getLexer().isNot(AsmToken::EndOfStatement))
5920 return TokError("unexpected token in '" + Directive + "' directive");
5922 // If we are inside a macro instantiation, terminate the current
5923 // instantiation.
5924 if (isInsideMacroInstantiation()) {
5925 handleMacroExit();
5926 return false;
5929 // Otherwise, this .endmacro is a stray entry in the file; well formed
5930 // .endmacro directives are handled during the macro definition parsing.
5931 return TokError("unexpected '" + Directive + "' in file, "
5932 "no current macro definition");
5935 /// parseDirectivePurgeMacro
5936 /// ::= purge identifier ( , identifier )*
5937 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5938 StringRef Name;
5939 while (true) {
5940 SMLoc NameLoc;
5941 if (parseTokenLoc(NameLoc) ||
5942 check(parseIdentifier(Name), NameLoc,
5943 "expected identifier in 'purge' directive"))
5944 return true;
5946 DEBUG_WITH_TYPE("asm-macros", dbgs()
5947 << "Un-defining macro: " << Name << "\n");
5948 if (!getContext().lookupMacro(Name.lower()))
5949 return Error(NameLoc, "macro '" + Name + "' is not defined");
5950 getContext().undefineMacro(Name.lower());
5952 if (!parseOptionalToken(AsmToken::Comma))
5953 break;
5954 parseOptionalToken(AsmToken::EndOfStatement);
5957 return false;
5960 bool MasmParser::parseDirectiveExtern() {
5961 // .extern is the default - but we still need to take any provided type info.
5962 auto parseOp = [&]() -> bool {
5963 StringRef Name;
5964 SMLoc NameLoc = getTok().getLoc();
5965 if (parseIdentifier(Name))
5966 return Error(NameLoc, "expected name");
5967 if (parseToken(AsmToken::Colon))
5968 return true;
5970 StringRef TypeName;
5971 SMLoc TypeLoc = getTok().getLoc();
5972 if (parseIdentifier(TypeName))
5973 return Error(TypeLoc, "expected type");
5974 if (!TypeName.equals_insensitive("proc")) {
5975 AsmTypeInfo Type;
5976 if (lookUpType(TypeName, Type))
5977 return Error(TypeLoc, "unrecognized type");
5978 KnownType[Name.lower()] = Type;
5981 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5982 Sym->setExternal(true);
5983 getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
5985 return false;
5988 if (parseMany(parseOp))
5989 return addErrorSuffix(" in directive 'extern'");
5990 return false;
5993 /// parseDirectiveSymbolAttribute
5994 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
5995 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5996 auto parseOp = [&]() -> bool {
5997 StringRef Name;
5998 SMLoc Loc = getTok().getLoc();
5999 if (parseIdentifier(Name))
6000 return Error(Loc, "expected identifier");
6001 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6003 // Assembler local symbols don't make any sense here. Complain loudly.
6004 if (Sym->isTemporary())
6005 return Error(Loc, "non-local symbol required");
6007 if (!getStreamer().emitSymbolAttribute(Sym, Attr))
6008 return Error(Loc, "unable to emit symbol attribute");
6009 return false;
6012 if (parseMany(parseOp))
6013 return addErrorSuffix(" in directive");
6014 return false;
6017 /// parseDirectiveComm
6018 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
6019 bool MasmParser::parseDirectiveComm(bool IsLocal) {
6020 if (checkForValidSection())
6021 return true;
6023 SMLoc IDLoc = getLexer().getLoc();
6024 StringRef Name;
6025 if (parseIdentifier(Name))
6026 return TokError("expected identifier in directive");
6028 // Handle the identifier as the key symbol.
6029 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6031 if (getLexer().isNot(AsmToken::Comma))
6032 return TokError("unexpected token in directive");
6033 Lex();
6035 int64_t Size;
6036 SMLoc SizeLoc = getLexer().getLoc();
6037 if (parseAbsoluteExpression(Size))
6038 return true;
6040 int64_t Pow2Alignment = 0;
6041 SMLoc Pow2AlignmentLoc;
6042 if (getLexer().is(AsmToken::Comma)) {
6043 Lex();
6044 Pow2AlignmentLoc = getLexer().getLoc();
6045 if (parseAbsoluteExpression(Pow2Alignment))
6046 return true;
6048 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
6049 if (IsLocal && LCOMM == LCOMM::NoAlignment)
6050 return Error(Pow2AlignmentLoc, "alignment not supported on this target");
6052 // If this target takes alignments in bytes (not log) validate and convert.
6053 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
6054 (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
6055 if (!isPowerOf2_64(Pow2Alignment))
6056 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
6057 Pow2Alignment = Log2_64(Pow2Alignment);
6061 if (parseEOL())
6062 return true;
6064 // NOTE: a size of zero for a .comm should create a undefined symbol
6065 // but a size of .lcomm creates a bss symbol of size zero.
6066 if (Size < 0)
6067 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
6068 "be less than zero");
6070 // NOTE: The alignment in the directive is a power of 2 value, the assembler
6071 // may internally end up wanting an alignment in bytes.
6072 // FIXME: Diagnose overflow.
6073 if (Pow2Alignment < 0)
6074 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
6075 "alignment, can't be less than zero");
6077 Sym->redefineIfPossible();
6078 if (!Sym->isUndefined())
6079 return Error(IDLoc, "invalid symbol redefinition");
6081 // Create the Symbol as a common or local common with Size and Pow2Alignment.
6082 if (IsLocal) {
6083 getStreamer().emitLocalCommonSymbol(Sym, Size,
6084 Align(1ULL << Pow2Alignment));
6085 return false;
6088 getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
6089 return false;
6092 /// parseDirectiveComment
6093 /// ::= comment delimiter [[text]]
6094 /// [[text]]
6095 /// [[text]] delimiter [[text]]
6096 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
6097 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
6098 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
6099 assert(DelimiterEnd != std::string::npos);
6100 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
6101 if (Delimiter.empty())
6102 return Error(DirectiveLoc, "no delimiter in 'comment' directive");
6103 do {
6104 if (getTok().is(AsmToken::Eof))
6105 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
6106 Lex(); // eat end of statement
6107 } while (
6108 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
6109 return parseEOL();
6112 /// parseDirectiveInclude
6113 /// ::= include <filename>
6114 /// | include filename
6115 bool MasmParser::parseDirectiveInclude() {
6116 // Allow the strings to have escaped octal character sequence.
6117 std::string Filename;
6118 SMLoc IncludeLoc = getTok().getLoc();
6120 if (parseAngleBracketString(Filename))
6121 Filename = parseStringTo(AsmToken::EndOfStatement);
6122 if (check(Filename.empty(), "missing filename in 'include' directive") ||
6123 check(getTok().isNot(AsmToken::EndOfStatement),
6124 "unexpected token in 'include' directive") ||
6125 // Attempt to switch the lexer to the included file before consuming the
6126 // end of statement to avoid losing it when we switch.
6127 check(enterIncludeFile(Filename), IncludeLoc,
6128 "Could not find include file '" + Filename + "'"))
6129 return true;
6131 return false;
6134 /// parseDirectiveIf
6135 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
6136 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
6137 TheCondStack.push_back(TheCondState);
6138 TheCondState.TheCond = AsmCond::IfCond;
6139 if (TheCondState.Ignore) {
6140 eatToEndOfStatement();
6141 } else {
6142 int64_t ExprValue;
6143 if (parseAbsoluteExpression(ExprValue) || parseEOL())
6144 return true;
6146 switch (DirKind) {
6147 default:
6148 llvm_unreachable("unsupported directive");
6149 case DK_IF:
6150 break;
6151 case DK_IFE:
6152 ExprValue = ExprValue == 0;
6153 break;
6156 TheCondState.CondMet = ExprValue;
6157 TheCondState.Ignore = !TheCondState.CondMet;
6160 return false;
6163 /// parseDirectiveIfb
6164 /// ::= .ifb textitem
6165 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6166 TheCondStack.push_back(TheCondState);
6167 TheCondState.TheCond = AsmCond::IfCond;
6169 if (TheCondState.Ignore) {
6170 eatToEndOfStatement();
6171 } else {
6172 std::string Str;
6173 if (parseTextItem(Str))
6174 return TokError("expected text item parameter for 'ifb' directive");
6176 if (parseEOL())
6177 return true;
6179 TheCondState.CondMet = ExpectBlank == Str.empty();
6180 TheCondState.Ignore = !TheCondState.CondMet;
6183 return false;
6186 /// parseDirectiveIfidn
6187 /// ::= ifidn textitem, textitem
6188 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6189 bool CaseInsensitive) {
6190 std::string String1, String2;
6192 if (parseTextItem(String1)) {
6193 if (ExpectEqual)
6194 return TokError("expected text item parameter for 'ifidn' directive");
6195 return TokError("expected text item parameter for 'ifdif' directive");
6198 if (Lexer.isNot(AsmToken::Comma)) {
6199 if (ExpectEqual)
6200 return TokError(
6201 "expected comma after first string for 'ifidn' directive");
6202 return TokError("expected comma after first string for 'ifdif' directive");
6204 Lex();
6206 if (parseTextItem(String2)) {
6207 if (ExpectEqual)
6208 return TokError("expected text item parameter for 'ifidn' directive");
6209 return TokError("expected text item parameter for 'ifdif' directive");
6212 TheCondStack.push_back(TheCondState);
6213 TheCondState.TheCond = AsmCond::IfCond;
6214 if (CaseInsensitive)
6215 TheCondState.CondMet =
6216 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6217 else
6218 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6219 TheCondState.Ignore = !TheCondState.CondMet;
6221 return false;
6224 /// parseDirectiveIfdef
6225 /// ::= ifdef symbol
6226 /// | ifdef variable
6227 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
6228 TheCondStack.push_back(TheCondState);
6229 TheCondState.TheCond = AsmCond::IfCond;
6231 if (TheCondState.Ignore) {
6232 eatToEndOfStatement();
6233 } else {
6234 bool is_defined = false;
6235 MCRegister Reg;
6236 SMLoc StartLoc, EndLoc;
6237 is_defined =
6238 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6239 if (!is_defined) {
6240 StringRef Name;
6241 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
6242 parseEOL())
6243 return true;
6245 if (BuiltinSymbolMap.contains(Name.lower())) {
6246 is_defined = true;
6247 } else if (Variables.contains(Name.lower())) {
6248 is_defined = true;
6249 } else {
6250 MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
6251 is_defined = (Sym && !Sym->isUndefined(false));
6255 TheCondState.CondMet = (is_defined == expect_defined);
6256 TheCondState.Ignore = !TheCondState.CondMet;
6259 return false;
6262 /// parseDirectiveElseIf
6263 /// ::= elseif expression
6264 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
6265 DirectiveKind DirKind) {
6266 if (TheCondState.TheCond != AsmCond::IfCond &&
6267 TheCondState.TheCond != AsmCond::ElseIfCond)
6268 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
6269 " .if or an .elseif");
6270 TheCondState.TheCond = AsmCond::ElseIfCond;
6272 bool LastIgnoreState = false;
6273 if (!TheCondStack.empty())
6274 LastIgnoreState = TheCondStack.back().Ignore;
6275 if (LastIgnoreState || TheCondState.CondMet) {
6276 TheCondState.Ignore = true;
6277 eatToEndOfStatement();
6278 } else {
6279 int64_t ExprValue;
6280 if (parseAbsoluteExpression(ExprValue))
6281 return true;
6283 if (parseEOL())
6284 return true;
6286 switch (DirKind) {
6287 default:
6288 llvm_unreachable("unsupported directive");
6289 case DK_ELSEIF:
6290 break;
6291 case DK_ELSEIFE:
6292 ExprValue = ExprValue == 0;
6293 break;
6296 TheCondState.CondMet = ExprValue;
6297 TheCondState.Ignore = !TheCondState.CondMet;
6300 return false;
6303 /// parseDirectiveElseIfb
6304 /// ::= elseifb textitem
6305 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6306 if (TheCondState.TheCond != AsmCond::IfCond &&
6307 TheCondState.TheCond != AsmCond::ElseIfCond)
6308 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6309 " if or an elseif");
6310 TheCondState.TheCond = AsmCond::ElseIfCond;
6312 bool LastIgnoreState = false;
6313 if (!TheCondStack.empty())
6314 LastIgnoreState = TheCondStack.back().Ignore;
6315 if (LastIgnoreState || TheCondState.CondMet) {
6316 TheCondState.Ignore = true;
6317 eatToEndOfStatement();
6318 } else {
6319 std::string Str;
6320 if (parseTextItem(Str)) {
6321 if (ExpectBlank)
6322 return TokError("expected text item parameter for 'elseifb' directive");
6323 return TokError("expected text item parameter for 'elseifnb' directive");
6326 if (parseEOL())
6327 return true;
6329 TheCondState.CondMet = ExpectBlank == Str.empty();
6330 TheCondState.Ignore = !TheCondState.CondMet;
6333 return false;
6336 /// parseDirectiveElseIfdef
6337 /// ::= elseifdef symbol
6338 /// | elseifdef variable
6339 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6340 bool expect_defined) {
6341 if (TheCondState.TheCond != AsmCond::IfCond &&
6342 TheCondState.TheCond != AsmCond::ElseIfCond)
6343 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6344 " if or an elseif");
6345 TheCondState.TheCond = AsmCond::ElseIfCond;
6347 bool LastIgnoreState = false;
6348 if (!TheCondStack.empty())
6349 LastIgnoreState = TheCondStack.back().Ignore;
6350 if (LastIgnoreState || TheCondState.CondMet) {
6351 TheCondState.Ignore = true;
6352 eatToEndOfStatement();
6353 } else {
6354 bool is_defined = false;
6355 MCRegister Reg;
6356 SMLoc StartLoc, EndLoc;
6357 is_defined =
6358 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6359 if (!is_defined) {
6360 StringRef Name;
6361 if (check(parseIdentifier(Name),
6362 "expected identifier after 'elseifdef'") ||
6363 parseEOL())
6364 return true;
6366 if (BuiltinSymbolMap.contains(Name.lower())) {
6367 is_defined = true;
6368 } else if (Variables.contains(Name.lower())) {
6369 is_defined = true;
6370 } else {
6371 MCSymbol *Sym = getContext().lookupSymbol(Name);
6372 is_defined = (Sym && !Sym->isUndefined(false));
6376 TheCondState.CondMet = (is_defined == expect_defined);
6377 TheCondState.Ignore = !TheCondState.CondMet;
6380 return false;
6383 /// parseDirectiveElseIfidn
6384 /// ::= elseifidn textitem, textitem
6385 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6386 bool CaseInsensitive) {
6387 if (TheCondState.TheCond != AsmCond::IfCond &&
6388 TheCondState.TheCond != AsmCond::ElseIfCond)
6389 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6390 " if or an elseif");
6391 TheCondState.TheCond = AsmCond::ElseIfCond;
6393 bool LastIgnoreState = false;
6394 if (!TheCondStack.empty())
6395 LastIgnoreState = TheCondStack.back().Ignore;
6396 if (LastIgnoreState || TheCondState.CondMet) {
6397 TheCondState.Ignore = true;
6398 eatToEndOfStatement();
6399 } else {
6400 std::string String1, String2;
6402 if (parseTextItem(String1)) {
6403 if (ExpectEqual)
6404 return TokError(
6405 "expected text item parameter for 'elseifidn' directive");
6406 return TokError("expected text item parameter for 'elseifdif' directive");
6409 if (Lexer.isNot(AsmToken::Comma)) {
6410 if (ExpectEqual)
6411 return TokError(
6412 "expected comma after first string for 'elseifidn' directive");
6413 return TokError(
6414 "expected comma after first string for 'elseifdif' directive");
6416 Lex();
6418 if (parseTextItem(String2)) {
6419 if (ExpectEqual)
6420 return TokError(
6421 "expected text item parameter for 'elseifidn' directive");
6422 return TokError("expected text item parameter for 'elseifdif' directive");
6425 if (CaseInsensitive)
6426 TheCondState.CondMet =
6427 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6428 else
6429 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6430 TheCondState.Ignore = !TheCondState.CondMet;
6433 return false;
6436 /// parseDirectiveElse
6437 /// ::= else
6438 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6439 if (parseEOL())
6440 return true;
6442 if (TheCondState.TheCond != AsmCond::IfCond &&
6443 TheCondState.TheCond != AsmCond::ElseIfCond)
6444 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6445 " or an elseif");
6446 TheCondState.TheCond = AsmCond::ElseCond;
6447 bool LastIgnoreState = false;
6448 if (!TheCondStack.empty())
6449 LastIgnoreState = TheCondStack.back().Ignore;
6450 if (LastIgnoreState || TheCondState.CondMet)
6451 TheCondState.Ignore = true;
6452 else
6453 TheCondState.Ignore = false;
6455 return false;
6458 /// parseDirectiveEnd
6459 /// ::= end
6460 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6461 if (parseEOL())
6462 return true;
6464 while (Lexer.isNot(AsmToken::Eof))
6465 Lexer.Lex();
6467 return false;
6470 /// parseDirectiveError
6471 /// ::= .err [message]
6472 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6473 if (!TheCondStack.empty()) {
6474 if (TheCondStack.back().Ignore) {
6475 eatToEndOfStatement();
6476 return false;
6480 std::string Message = ".err directive invoked in source file";
6481 if (Lexer.isNot(AsmToken::EndOfStatement))
6482 Message = parseStringTo(AsmToken::EndOfStatement);
6483 Lex();
6485 return Error(DirectiveLoc, Message);
6488 /// parseDirectiveErrorIfb
6489 /// ::= .errb textitem[, message]
6490 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6491 if (!TheCondStack.empty()) {
6492 if (TheCondStack.back().Ignore) {
6493 eatToEndOfStatement();
6494 return false;
6498 std::string Text;
6499 if (parseTextItem(Text))
6500 return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6502 std::string Message = ".errb directive invoked in source file";
6503 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6504 if (parseToken(AsmToken::Comma))
6505 return addErrorSuffix(" in '.errb' directive");
6506 Message = parseStringTo(AsmToken::EndOfStatement);
6508 Lex();
6510 if (Text.empty() == ExpectBlank)
6511 return Error(DirectiveLoc, Message);
6512 return false;
6515 /// parseDirectiveErrorIfdef
6516 /// ::= .errdef name[, message]
6517 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6518 bool ExpectDefined) {
6519 if (!TheCondStack.empty()) {
6520 if (TheCondStack.back().Ignore) {
6521 eatToEndOfStatement();
6522 return false;
6526 bool IsDefined = false;
6527 MCRegister Reg;
6528 SMLoc StartLoc, EndLoc;
6529 IsDefined =
6530 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6531 if (!IsDefined) {
6532 StringRef Name;
6533 if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6534 return true;
6536 if (BuiltinSymbolMap.contains(Name.lower())) {
6537 IsDefined = true;
6538 } else if (Variables.contains(Name.lower())) {
6539 IsDefined = true;
6540 } else {
6541 MCSymbol *Sym = getContext().lookupSymbol(Name);
6542 IsDefined = (Sym && !Sym->isUndefined(false));
6546 std::string Message = ".errdef directive invoked in source file";
6547 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6548 if (parseToken(AsmToken::Comma))
6549 return addErrorSuffix(" in '.errdef' directive");
6550 Message = parseStringTo(AsmToken::EndOfStatement);
6552 Lex();
6554 if (IsDefined == ExpectDefined)
6555 return Error(DirectiveLoc, Message);
6556 return false;
6559 /// parseDirectiveErrorIfidn
6560 /// ::= .erridn textitem, textitem[, message]
6561 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6562 bool CaseInsensitive) {
6563 if (!TheCondStack.empty()) {
6564 if (TheCondStack.back().Ignore) {
6565 eatToEndOfStatement();
6566 return false;
6570 std::string String1, String2;
6572 if (parseTextItem(String1)) {
6573 if (ExpectEqual)
6574 return TokError("expected string parameter for '.erridn' directive");
6575 return TokError("expected string parameter for '.errdif' directive");
6578 if (Lexer.isNot(AsmToken::Comma)) {
6579 if (ExpectEqual)
6580 return TokError(
6581 "expected comma after first string for '.erridn' directive");
6582 return TokError(
6583 "expected comma after first string for '.errdif' directive");
6585 Lex();
6587 if (parseTextItem(String2)) {
6588 if (ExpectEqual)
6589 return TokError("expected string parameter for '.erridn' directive");
6590 return TokError("expected string parameter for '.errdif' directive");
6593 std::string Message;
6594 if (ExpectEqual)
6595 Message = ".erridn directive invoked in source file";
6596 else
6597 Message = ".errdif directive invoked in source file";
6598 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6599 if (parseToken(AsmToken::Comma))
6600 return addErrorSuffix(" in '.erridn' directive");
6601 Message = parseStringTo(AsmToken::EndOfStatement);
6603 Lex();
6605 if (CaseInsensitive)
6606 TheCondState.CondMet =
6607 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6608 else
6609 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6610 TheCondState.Ignore = !TheCondState.CondMet;
6612 if ((CaseInsensitive &&
6613 ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
6614 (ExpectEqual == (String1 == String2)))
6615 return Error(DirectiveLoc, Message);
6616 return false;
6619 /// parseDirectiveErrorIfe
6620 /// ::= .erre expression[, message]
6621 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6622 if (!TheCondStack.empty()) {
6623 if (TheCondStack.back().Ignore) {
6624 eatToEndOfStatement();
6625 return false;
6629 int64_t ExprValue;
6630 if (parseAbsoluteExpression(ExprValue))
6631 return addErrorSuffix(" in '.erre' directive");
6633 std::string Message = ".erre directive invoked in source file";
6634 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6635 if (parseToken(AsmToken::Comma))
6636 return addErrorSuffix(" in '.erre' directive");
6637 Message = parseStringTo(AsmToken::EndOfStatement);
6639 Lex();
6641 if ((ExprValue == 0) == ExpectZero)
6642 return Error(DirectiveLoc, Message);
6643 return false;
6646 /// parseDirectiveEndIf
6647 /// ::= .endif
6648 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6649 if (parseEOL())
6650 return true;
6652 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6653 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6654 "an .if or .else");
6655 if (!TheCondStack.empty()) {
6656 TheCondState = TheCondStack.back();
6657 TheCondStack.pop_back();
6660 return false;
6663 void MasmParser::initializeDirectiveKindMap() {
6664 DirectiveKindMap["="] = DK_ASSIGN;
6665 DirectiveKindMap["equ"] = DK_EQU;
6666 DirectiveKindMap["textequ"] = DK_TEXTEQU;
6667 // DirectiveKindMap[".ascii"] = DK_ASCII;
6668 // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6669 // DirectiveKindMap[".string"] = DK_STRING;
6670 DirectiveKindMap["byte"] = DK_BYTE;
6671 DirectiveKindMap["sbyte"] = DK_SBYTE;
6672 DirectiveKindMap["word"] = DK_WORD;
6673 DirectiveKindMap["sword"] = DK_SWORD;
6674 DirectiveKindMap["dword"] = DK_DWORD;
6675 DirectiveKindMap["sdword"] = DK_SDWORD;
6676 DirectiveKindMap["fword"] = DK_FWORD;
6677 DirectiveKindMap["qword"] = DK_QWORD;
6678 DirectiveKindMap["sqword"] = DK_SQWORD;
6679 DirectiveKindMap["real4"] = DK_REAL4;
6680 DirectiveKindMap["real8"] = DK_REAL8;
6681 DirectiveKindMap["real10"] = DK_REAL10;
6682 DirectiveKindMap["align"] = DK_ALIGN;
6683 DirectiveKindMap["even"] = DK_EVEN;
6684 DirectiveKindMap["org"] = DK_ORG;
6685 DirectiveKindMap["extern"] = DK_EXTERN;
6686 DirectiveKindMap["extrn"] = DK_EXTERN;
6687 DirectiveKindMap["public"] = DK_PUBLIC;
6688 // DirectiveKindMap[".comm"] = DK_COMM;
6689 DirectiveKindMap["comment"] = DK_COMMENT;
6690 DirectiveKindMap["include"] = DK_INCLUDE;
6691 DirectiveKindMap["repeat"] = DK_REPEAT;
6692 DirectiveKindMap["rept"] = DK_REPEAT;
6693 DirectiveKindMap["while"] = DK_WHILE;
6694 DirectiveKindMap["for"] = DK_FOR;
6695 DirectiveKindMap["irp"] = DK_FOR;
6696 DirectiveKindMap["forc"] = DK_FORC;
6697 DirectiveKindMap["irpc"] = DK_FORC;
6698 DirectiveKindMap["if"] = DK_IF;
6699 DirectiveKindMap["ife"] = DK_IFE;
6700 DirectiveKindMap["ifb"] = DK_IFB;
6701 DirectiveKindMap["ifnb"] = DK_IFNB;
6702 DirectiveKindMap["ifdef"] = DK_IFDEF;
6703 DirectiveKindMap["ifndef"] = DK_IFNDEF;
6704 DirectiveKindMap["ifdif"] = DK_IFDIF;
6705 DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6706 DirectiveKindMap["ifidn"] = DK_IFIDN;
6707 DirectiveKindMap["ifidni"] = DK_IFIDNI;
6708 DirectiveKindMap["elseif"] = DK_ELSEIF;
6709 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6710 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6711 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6712 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6713 DirectiveKindMap["else"] = DK_ELSE;
6714 DirectiveKindMap["end"] = DK_END;
6715 DirectiveKindMap["endif"] = DK_ENDIF;
6716 // DirectiveKindMap[".file"] = DK_FILE;
6717 // DirectiveKindMap[".line"] = DK_LINE;
6718 // DirectiveKindMap[".loc"] = DK_LOC;
6719 // DirectiveKindMap[".stabs"] = DK_STABS;
6720 // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6721 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6722 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6723 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6724 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6725 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6726 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6727 // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6728 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6729 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6730 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6731 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6732 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6733 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6734 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6735 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6736 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6737 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6738 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6739 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6740 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6741 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6742 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6743 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6744 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6745 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6746 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6747 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6748 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6749 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6750 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6751 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6752 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6753 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6754 DirectiveKindMap["macro"] = DK_MACRO;
6755 DirectiveKindMap["exitm"] = DK_EXITM;
6756 DirectiveKindMap["endm"] = DK_ENDM;
6757 DirectiveKindMap["purge"] = DK_PURGE;
6758 DirectiveKindMap[".err"] = DK_ERR;
6759 DirectiveKindMap[".errb"] = DK_ERRB;
6760 DirectiveKindMap[".errnb"] = DK_ERRNB;
6761 DirectiveKindMap[".errdef"] = DK_ERRDEF;
6762 DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6763 DirectiveKindMap[".errdif"] = DK_ERRDIF;
6764 DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6765 DirectiveKindMap[".erridn"] = DK_ERRIDN;
6766 DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6767 DirectiveKindMap[".erre"] = DK_ERRE;
6768 DirectiveKindMap[".errnz"] = DK_ERRNZ;
6769 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6770 DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6771 DirectiveKindMap[".savereg"] = DK_SAVEREG;
6772 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6773 DirectiveKindMap[".setframe"] = DK_SETFRAME;
6774 DirectiveKindMap[".radix"] = DK_RADIX;
6775 DirectiveKindMap["db"] = DK_DB;
6776 DirectiveKindMap["dd"] = DK_DD;
6777 DirectiveKindMap["df"] = DK_DF;
6778 DirectiveKindMap["dq"] = DK_DQ;
6779 DirectiveKindMap["dw"] = DK_DW;
6780 DirectiveKindMap["echo"] = DK_ECHO;
6781 DirectiveKindMap["struc"] = DK_STRUCT;
6782 DirectiveKindMap["struct"] = DK_STRUCT;
6783 DirectiveKindMap["union"] = DK_UNION;
6784 DirectiveKindMap["ends"] = DK_ENDS;
6787 bool MasmParser::isMacroLikeDirective() {
6788 if (getLexer().is(AsmToken::Identifier)) {
6789 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6790 .CasesLower("repeat", "rept", true)
6791 .CaseLower("while", true)
6792 .CasesLower("for", "irp", true)
6793 .CasesLower("forc", "irpc", true)
6794 .Default(false);
6795 if (IsMacroLike)
6796 return true;
6798 if (peekTok().is(AsmToken::Identifier) &&
6799 peekTok().getIdentifier().equals_insensitive("macro"))
6800 return true;
6802 return false;
6805 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6806 AsmToken EndToken, StartToken = getTok();
6808 unsigned NestLevel = 0;
6809 while (true) {
6810 // Check whether we have reached the end of the file.
6811 if (getLexer().is(AsmToken::Eof)) {
6812 printError(DirectiveLoc, "no matching 'endm' in definition");
6813 return nullptr;
6816 if (isMacroLikeDirective())
6817 ++NestLevel;
6819 // Otherwise, check whether we have reached the endm.
6820 if (Lexer.is(AsmToken::Identifier) &&
6821 getTok().getIdentifier().equals_insensitive("endm")) {
6822 if (NestLevel == 0) {
6823 EndToken = getTok();
6824 Lex();
6825 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6826 printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6827 return nullptr;
6829 break;
6831 --NestLevel;
6834 // Otherwise, scan till the end of the statement.
6835 eatToEndOfStatement();
6838 const char *BodyStart = StartToken.getLoc().getPointer();
6839 const char *BodyEnd = EndToken.getLoc().getPointer();
6840 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6842 // We Are Anonymous.
6843 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6844 return &MacroLikeBodies.back();
6847 bool MasmParser::expandStatement(SMLoc Loc) {
6848 std::string Body = parseStringTo(AsmToken::EndOfStatement);
6849 SMLoc EndLoc = getTok().getLoc();
6851 MCAsmMacroParameters Parameters;
6852 MCAsmMacroArguments Arguments;
6854 StringMap<std::string> BuiltinValues;
6855 for (const auto &S : BuiltinSymbolMap) {
6856 const BuiltinSymbol &Sym = S.getValue();
6857 if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
6858 BuiltinValues[S.getKey().lower()] = std::move(*Text);
6861 for (const auto &B : BuiltinValues) {
6862 MCAsmMacroParameter P;
6863 MCAsmMacroArgument A;
6864 P.Name = B.getKey();
6865 P.Required = true;
6866 A.push_back(AsmToken(AsmToken::String, B.getValue()));
6868 Parameters.push_back(std::move(P));
6869 Arguments.push_back(std::move(A));
6872 for (const auto &V : Variables) {
6873 const Variable &Var = V.getValue();
6874 if (Var.IsText) {
6875 MCAsmMacroParameter P;
6876 MCAsmMacroArgument A;
6877 P.Name = Var.Name;
6878 P.Required = true;
6879 A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6881 Parameters.push_back(std::move(P));
6882 Arguments.push_back(std::move(A));
6885 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6886 MCAsmMacro M = MacroLikeBodies.back();
6888 // Expand the statement in a new buffer.
6889 SmallString<80> Buf;
6890 raw_svector_ostream OS(Buf);
6891 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6892 return true;
6893 std::unique_ptr<MemoryBuffer> Expansion =
6894 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6896 // Jump to the expanded statement and prime the lexer.
6897 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6898 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6899 EndStatementAtEOFStack.push_back(false);
6900 Lex();
6901 return false;
6904 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6905 raw_svector_ostream &OS) {
6906 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6908 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6909 SMLoc ExitLoc,
6910 raw_svector_ostream &OS) {
6911 OS << "endm\n";
6913 std::unique_ptr<MemoryBuffer> Instantiation =
6914 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6916 // Create the macro instantiation object and add to the current macro
6917 // instantiation stack.
6918 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6919 ExitLoc, TheCondStack.size()};
6920 ActiveMacros.push_back(MI);
6922 // Jump to the macro instantiation and prime the lexer.
6923 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6924 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6925 EndStatementAtEOFStack.push_back(true);
6926 Lex();
6929 /// parseDirectiveRepeat
6930 /// ::= ("repeat" | "rept") count
6931 /// body
6932 /// endm
6933 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6934 const MCExpr *CountExpr;
6935 SMLoc CountLoc = getTok().getLoc();
6936 if (parseExpression(CountExpr))
6937 return true;
6939 int64_t Count;
6940 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6941 return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6944 if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
6945 return true;
6947 // Lex the repeat definition.
6948 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6949 if (!M)
6950 return true;
6952 // Macro instantiation is lexical, unfortunately. We construct a new buffer
6953 // to hold the macro body with substitutions.
6954 SmallString<256> Buf;
6955 raw_svector_ostream OS(Buf);
6956 while (Count--) {
6957 if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6958 getTok().getLoc()))
6959 return true;
6961 instantiateMacroLikeBody(M, DirectiveLoc, OS);
6963 return false;
6966 /// parseDirectiveWhile
6967 /// ::= "while" expression
6968 /// body
6969 /// endm
6970 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6971 const MCExpr *CondExpr;
6972 SMLoc CondLoc = getTok().getLoc();
6973 if (parseExpression(CondExpr))
6974 return true;
6976 // Lex the repeat definition.
6977 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6978 if (!M)
6979 return true;
6981 // Macro instantiation is lexical, unfortunately. We construct a new buffer
6982 // to hold the macro body with substitutions.
6983 SmallString<256> Buf;
6984 raw_svector_ostream OS(Buf);
6985 int64_t Condition;
6986 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6987 return Error(CondLoc, "expected absolute expression in 'while' directive");
6988 if (Condition) {
6989 // Instantiate the macro, then resume at this directive to recheck the
6990 // condition.
6991 if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6992 getTok().getLoc()))
6993 return true;
6994 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6997 return false;
7000 /// parseDirectiveFor
7001 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
7002 /// body
7003 /// endm
7004 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
7005 MCAsmMacroParameter Parameter;
7006 MCAsmMacroArguments A;
7007 if (check(parseIdentifier(Parameter.Name),
7008 "expected identifier in '" + Dir + "' directive"))
7009 return true;
7011 // Parse optional qualifier (default value, or "req")
7012 if (parseOptionalToken(AsmToken::Colon)) {
7013 if (parseOptionalToken(AsmToken::Equal)) {
7014 // Default value
7015 SMLoc ParamLoc;
7017 ParamLoc = Lexer.getLoc();
7018 if (parseMacroArgument(nullptr, Parameter.Value))
7019 return true;
7020 } else {
7021 SMLoc QualLoc;
7022 StringRef Qualifier;
7024 QualLoc = Lexer.getLoc();
7025 if (parseIdentifier(Qualifier))
7026 return Error(QualLoc, "missing parameter qualifier for "
7027 "'" +
7028 Parameter.Name + "' in '" + Dir +
7029 "' directive");
7031 if (Qualifier.equals_insensitive("req"))
7032 Parameter.Required = true;
7033 else
7034 return Error(QualLoc,
7035 Qualifier + " is not a valid parameter qualifier for '" +
7036 Parameter.Name + "' in '" + Dir + "' directive");
7040 if (parseToken(AsmToken::Comma,
7041 "expected comma in '" + Dir + "' directive") ||
7042 parseToken(AsmToken::Less,
7043 "values in '" + Dir +
7044 "' directive must be enclosed in angle brackets"))
7045 return true;
7047 while (true) {
7048 A.emplace_back();
7049 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
7050 return addErrorSuffix(" in arguments for '" + Dir + "' directive");
7052 // If we see a comma, continue, and allow line continuation.
7053 if (!parseOptionalToken(AsmToken::Comma))
7054 break;
7055 parseOptionalToken(AsmToken::EndOfStatement);
7058 if (parseToken(AsmToken::Greater,
7059 "values in '" + Dir +
7060 "' directive must be enclosed in angle brackets") ||
7061 parseEOL())
7062 return true;
7064 // Lex the for definition.
7065 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7066 if (!M)
7067 return true;
7069 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7070 // to hold the macro body with substitutions.
7071 SmallString<256> Buf;
7072 raw_svector_ostream OS(Buf);
7074 for (const MCAsmMacroArgument &Arg : A) {
7075 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7076 return true;
7079 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7081 return false;
7084 /// parseDirectiveForc
7085 /// ::= ("forc" | "irpc") symbol, <string>
7086 /// body
7087 /// endm
7088 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
7089 MCAsmMacroParameter Parameter;
7091 std::string Argument;
7092 if (check(parseIdentifier(Parameter.Name),
7093 "expected identifier in '" + Directive + "' directive") ||
7094 parseToken(AsmToken::Comma,
7095 "expected comma in '" + Directive + "' directive"))
7096 return true;
7097 if (parseAngleBracketString(Argument)) {
7098 // Match ml64.exe; treat all characters to end of statement as a string,
7099 // ignoring comment markers, then discard anything following a space (using
7100 // the C locale).
7101 Argument = parseStringTo(AsmToken::EndOfStatement);
7102 if (getTok().is(AsmToken::EndOfStatement))
7103 Argument += getTok().getString();
7104 size_t End = 0;
7105 for (; End < Argument.size(); ++End) {
7106 if (isSpace(Argument[End]))
7107 break;
7109 Argument.resize(End);
7111 if (parseEOL())
7112 return true;
7114 // Lex the irpc definition.
7115 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7116 if (!M)
7117 return true;
7119 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7120 // to hold the macro body with substitutions.
7121 SmallString<256> Buf;
7122 raw_svector_ostream OS(Buf);
7124 StringRef Values(Argument);
7125 for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
7126 MCAsmMacroArgument Arg;
7127 Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
7129 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7130 return true;
7133 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7135 return false;
7138 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
7139 size_t Len) {
7140 const MCExpr *Value;
7141 SMLoc ExprLoc = getLexer().getLoc();
7142 if (parseExpression(Value))
7143 return true;
7144 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7145 if (!MCE)
7146 return Error(ExprLoc, "unexpected expression in _emit");
7147 uint64_t IntValue = MCE->getValue();
7148 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
7149 return Error(ExprLoc, "literal value out of range for directive");
7151 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
7152 return false;
7155 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
7156 const MCExpr *Value;
7157 SMLoc ExprLoc = getLexer().getLoc();
7158 if (parseExpression(Value))
7159 return true;
7160 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7161 if (!MCE)
7162 return Error(ExprLoc, "unexpected expression in align");
7163 uint64_t IntValue = MCE->getValue();
7164 if (!isPowerOf2_64(IntValue))
7165 return Error(ExprLoc, "literal value not a power of two greater then zero");
7167 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
7168 return false;
7171 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
7172 const SMLoc Loc = getLexer().getLoc();
7173 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
7174 StringRef RadixString = StringRef(RadixStringRaw).trim();
7175 unsigned Radix;
7176 if (RadixString.getAsInteger(10, Radix)) {
7177 return Error(Loc,
7178 "radix must be a decimal number in the range 2 to 16; was " +
7179 RadixString);
7181 if (Radix < 2 || Radix > 16)
7182 return Error(Loc, "radix must be in the range 2 to 16; was " +
7183 std::to_string(Radix));
7184 getLexer().setMasmDefaultRadix(Radix);
7185 return false;
7188 /// parseDirectiveEcho
7189 /// ::= "echo" message
7190 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
7191 std::string Message = parseStringTo(AsmToken::EndOfStatement);
7192 llvm::outs() << Message;
7193 if (!StringRef(Message).ends_with("\n"))
7194 llvm::outs() << '\n';
7195 return false;
7198 // We are comparing pointers, but the pointers are relative to a single string.
7199 // Thus, this should always be deterministic.
7200 static int rewritesSort(const AsmRewrite *AsmRewriteA,
7201 const AsmRewrite *AsmRewriteB) {
7202 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
7203 return -1;
7204 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
7205 return 1;
7207 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
7208 // rewrite to the same location. Make sure the SizeDirective rewrite is
7209 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This
7210 // ensures the sort algorithm is stable.
7211 if (AsmRewritePrecedence[AsmRewriteA->Kind] >
7212 AsmRewritePrecedence[AsmRewriteB->Kind])
7213 return -1;
7215 if (AsmRewritePrecedence[AsmRewriteA->Kind] <
7216 AsmRewritePrecedence[AsmRewriteB->Kind])
7217 return 1;
7218 llvm_unreachable("Unstable rewrite sort.");
7221 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
7222 Variable &Var = Variables[Name.lower()];
7223 if (Var.Name.empty()) {
7224 Var.Name = Name;
7225 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
7226 return Error(SMLoc(), "invalid variable redefinition");
7227 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
7228 Warning(SMLoc(), "redefining '" + Name +
7229 "', already defined on the command line")) {
7230 return true;
7232 Var.Redefinable = Variable::WARN_ON_REDEFINITION;
7233 Var.IsText = true;
7234 Var.TextValue = Value.str();
7235 return false;
7238 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
7239 const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
7240 const StringRef Base = BaseMember.first, Member = BaseMember.second;
7241 return lookUpField(Base, Member, Info);
7244 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
7245 AsmFieldInfo &Info) const {
7246 if (Base.empty())
7247 return true;
7249 AsmFieldInfo BaseInfo;
7250 if (Base.contains('.') && !lookUpField(Base, BaseInfo))
7251 Base = BaseInfo.Type.Name;
7253 auto StructIt = Structs.find(Base.lower());
7254 auto TypeIt = KnownType.find(Base.lower());
7255 if (TypeIt != KnownType.end()) {
7256 StructIt = Structs.find(TypeIt->second.Name.lower());
7258 if (StructIt != Structs.end())
7259 return lookUpField(StructIt->second, Member, Info);
7261 return true;
7264 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
7265 AsmFieldInfo &Info) const {
7266 if (Member.empty()) {
7267 Info.Type.Name = Structure.Name;
7268 Info.Type.Size = Structure.Size;
7269 Info.Type.ElementSize = Structure.Size;
7270 Info.Type.Length = 1;
7271 return false;
7274 std::pair<StringRef, StringRef> Split = Member.split('.');
7275 const StringRef FieldName = Split.first, FieldMember = Split.second;
7277 auto StructIt = Structs.find(FieldName.lower());
7278 if (StructIt != Structs.end())
7279 return lookUpField(StructIt->second, FieldMember, Info);
7281 auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
7282 if (FieldIt == Structure.FieldsByName.end())
7283 return true;
7285 const FieldInfo &Field = Structure.Fields[FieldIt->second];
7286 if (FieldMember.empty()) {
7287 Info.Offset += Field.Offset;
7288 Info.Type.Size = Field.SizeOf;
7289 Info.Type.ElementSize = Field.Type;
7290 Info.Type.Length = Field.LengthOf;
7291 if (Field.Contents.FT == FT_STRUCT)
7292 Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
7293 else
7294 Info.Type.Name = "";
7295 return false;
7298 if (Field.Contents.FT != FT_STRUCT)
7299 return true;
7300 const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7302 if (lookUpField(StructInfo.Structure, FieldMember, Info))
7303 return true;
7305 Info.Offset += Field.Offset;
7306 return false;
7309 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7310 unsigned Size = StringSwitch<unsigned>(Name)
7311 .CasesLower("byte", "db", "sbyte", 1)
7312 .CasesLower("word", "dw", "sword", 2)
7313 .CasesLower("dword", "dd", "sdword", 4)
7314 .CasesLower("fword", "df", 6)
7315 .CasesLower("qword", "dq", "sqword", 8)
7316 .CaseLower("real4", 4)
7317 .CaseLower("real8", 8)
7318 .CaseLower("real10", 10)
7319 .Default(0);
7320 if (Size) {
7321 Info.Name = Name;
7322 Info.ElementSize = Size;
7323 Info.Length = 1;
7324 Info.Size = Size;
7325 return false;
7328 auto StructIt = Structs.find(Name.lower());
7329 if (StructIt != Structs.end()) {
7330 const StructInfo &Structure = StructIt->second;
7331 Info.Name = Name;
7332 Info.ElementSize = Structure.Size;
7333 Info.Length = 1;
7334 Info.Size = Structure.Size;
7335 return false;
7338 return true;
7341 bool MasmParser::parseMSInlineAsm(
7342 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
7343 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7344 SmallVectorImpl<std::string> &Constraints,
7345 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7346 const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7347 SmallVector<void *, 4> InputDecls;
7348 SmallVector<void *, 4> OutputDecls;
7349 SmallVector<bool, 4> InputDeclsAddressOf;
7350 SmallVector<bool, 4> OutputDeclsAddressOf;
7351 SmallVector<std::string, 4> InputConstraints;
7352 SmallVector<std::string, 4> OutputConstraints;
7353 SmallVector<unsigned, 4> ClobberRegs;
7355 SmallVector<AsmRewrite, 4> AsmStrRewrites;
7357 // Prime the lexer.
7358 Lex();
7360 // While we have input, parse each statement.
7361 unsigned InputIdx = 0;
7362 unsigned OutputIdx = 0;
7363 while (getLexer().isNot(AsmToken::Eof)) {
7364 // Parse curly braces marking block start/end.
7365 if (parseCurlyBlockScope(AsmStrRewrites))
7366 continue;
7368 ParseStatementInfo Info(&AsmStrRewrites);
7369 bool StatementErr = parseStatement(Info, &SI);
7371 if (StatementErr || Info.ParseError) {
7372 // Emit pending errors if any exist.
7373 printPendingErrors();
7374 return true;
7377 // No pending error should exist here.
7378 assert(!hasPendingError() && "unexpected error from parseStatement");
7380 if (Info.Opcode == ~0U)
7381 continue;
7383 const MCInstrDesc &Desc = MII->get(Info.Opcode);
7385 // Build the list of clobbers, outputs and inputs.
7386 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7387 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7389 // Register operand.
7390 if (Operand.isReg() && !Operand.needAddressOf() &&
7391 !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7392 unsigned NumDefs = Desc.getNumDefs();
7393 // Clobber.
7394 if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7395 ClobberRegs.push_back(Operand.getReg());
7396 continue;
7399 // Expr/Input or Output.
7400 StringRef SymName = Operand.getSymName();
7401 if (SymName.empty())
7402 continue;
7404 void *OpDecl = Operand.getOpDecl();
7405 if (!OpDecl)
7406 continue;
7408 StringRef Constraint = Operand.getConstraint();
7409 if (Operand.isImm()) {
7410 // Offset as immediate.
7411 if (Operand.isOffsetOfLocal())
7412 Constraint = "r";
7413 else
7414 Constraint = "i";
7417 bool isOutput = (i == 1) && Desc.mayStore();
7418 SMLoc Start = SMLoc::getFromPointer(SymName.data());
7419 if (isOutput) {
7420 ++InputIdx;
7421 OutputDecls.push_back(OpDecl);
7422 OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7423 OutputConstraints.push_back(("=" + Constraint).str());
7424 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7425 } else {
7426 InputDecls.push_back(OpDecl);
7427 InputDeclsAddressOf.push_back(Operand.needAddressOf());
7428 InputConstraints.push_back(Constraint.str());
7429 if (Desc.operands()[i - 1].isBranchTarget())
7430 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7431 else
7432 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7436 // Consider implicit defs to be clobbers. Think of cpuid and push.
7437 llvm::append_range(ClobberRegs, Desc.implicit_defs());
7440 // Set the number of Outputs and Inputs.
7441 NumOutputs = OutputDecls.size();
7442 NumInputs = InputDecls.size();
7444 // Set the unique clobbers.
7445 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7446 ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
7447 ClobberRegs.end());
7448 Clobbers.assign(ClobberRegs.size(), std::string());
7449 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7450 raw_string_ostream OS(Clobbers[I]);
7451 IP->printRegName(OS, ClobberRegs[I]);
7454 // Merge the various outputs and inputs. Output are expected first.
7455 if (NumOutputs || NumInputs) {
7456 unsigned NumExprs = NumOutputs + NumInputs;
7457 OpDecls.resize(NumExprs);
7458 Constraints.resize(NumExprs);
7459 for (unsigned i = 0; i < NumOutputs; ++i) {
7460 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7461 Constraints[i] = OutputConstraints[i];
7463 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7464 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7465 Constraints[j] = InputConstraints[i];
7469 // Build the IR assembly string.
7470 std::string AsmStringIR;
7471 raw_string_ostream OS(AsmStringIR);
7472 StringRef ASMString =
7473 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7474 const char *AsmStart = ASMString.begin();
7475 const char *AsmEnd = ASMString.end();
7476 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7477 for (auto it = AsmStrRewrites.begin(); it != AsmStrRewrites.end(); ++it) {
7478 const AsmRewrite &AR = *it;
7479 // Check if this has already been covered by another rewrite...
7480 if (AR.Done)
7481 continue;
7482 AsmRewriteKind Kind = AR.Kind;
7484 const char *Loc = AR.Loc.getPointer();
7485 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7487 // Emit everything up to the immediate/expression.
7488 if (unsigned Len = Loc - AsmStart)
7489 OS << StringRef(AsmStart, Len);
7491 // Skip the original expression.
7492 if (Kind == AOK_Skip) {
7493 AsmStart = Loc + AR.Len;
7494 continue;
7497 unsigned AdditionalSkip = 0;
7498 // Rewrite expressions in $N notation.
7499 switch (Kind) {
7500 default:
7501 break;
7502 case AOK_IntelExpr:
7503 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7504 if (AR.IntelExp.NeedBracs)
7505 OS << "[";
7506 if (AR.IntelExp.hasBaseReg())
7507 OS << AR.IntelExp.BaseReg;
7508 if (AR.IntelExp.hasIndexReg())
7509 OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7510 << AR.IntelExp.IndexReg;
7511 if (AR.IntelExp.Scale > 1)
7512 OS << " * $$" << AR.IntelExp.Scale;
7513 if (AR.IntelExp.hasOffset()) {
7514 if (AR.IntelExp.hasRegs())
7515 OS << " + ";
7516 // Fuse this rewrite with a rewrite of the offset name, if present.
7517 StringRef OffsetName = AR.IntelExp.OffsetName;
7518 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7519 size_t OffsetLen = OffsetName.size();
7520 auto rewrite_it = std::find_if(
7521 it, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7522 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7523 (FusingAR.Kind == AOK_Input ||
7524 FusingAR.Kind == AOK_CallInput);
7526 if (rewrite_it == AsmStrRewrites.end()) {
7527 OS << "offset " << OffsetName;
7528 } else if (rewrite_it->Kind == AOK_CallInput) {
7529 OS << "${" << InputIdx++ << ":P}";
7530 rewrite_it->Done = true;
7531 } else {
7532 OS << '$' << InputIdx++;
7533 rewrite_it->Done = true;
7536 if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7537 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7538 if (AR.IntelExp.NeedBracs)
7539 OS << "]";
7540 break;
7541 case AOK_Label:
7542 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7543 break;
7544 case AOK_Input:
7545 OS << '$' << InputIdx++;
7546 break;
7547 case AOK_CallInput:
7548 OS << "${" << InputIdx++ << ":P}";
7549 break;
7550 case AOK_Output:
7551 OS << '$' << OutputIdx++;
7552 break;
7553 case AOK_SizeDirective:
7554 switch (AR.Val) {
7555 default: break;
7556 case 8: OS << "byte ptr "; break;
7557 case 16: OS << "word ptr "; break;
7558 case 32: OS << "dword ptr "; break;
7559 case 64: OS << "qword ptr "; break;
7560 case 80: OS << "xword ptr "; break;
7561 case 128: OS << "xmmword ptr "; break;
7562 case 256: OS << "ymmword ptr "; break;
7564 break;
7565 case AOK_Emit:
7566 OS << ".byte";
7567 break;
7568 case AOK_Align: {
7569 // MS alignment directives are measured in bytes. If the native assembler
7570 // measures alignment in bytes, we can pass it straight through.
7571 OS << ".align";
7572 if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7573 break;
7575 // Alignment is in log2 form, so print that instead and skip the original
7576 // immediate.
7577 unsigned Val = AR.Val;
7578 OS << ' ' << Val;
7579 assert(Val < 10 && "Expected alignment less then 2^10.");
7580 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7581 break;
7583 case AOK_EVEN:
7584 OS << ".even";
7585 break;
7586 case AOK_EndOfStatement:
7587 OS << "\n\t";
7588 break;
7591 // Skip the original expression.
7592 AsmStart = Loc + AR.Len + AdditionalSkip;
7595 // Emit the remainder of the asm string.
7596 if (AsmStart != AsmEnd)
7597 OS << StringRef(AsmStart, AsmEnd - AsmStart);
7599 AsmString = OS.str();
7600 return false;
7603 void MasmParser::initializeBuiltinSymbolMap() {
7604 // Numeric built-ins (supported in all versions)
7605 BuiltinSymbolMap["@version"] = BI_VERSION;
7606 BuiltinSymbolMap["@line"] = BI_LINE;
7608 // Text built-ins (supported in all versions)
7609 BuiltinSymbolMap["@date"] = BI_DATE;
7610 BuiltinSymbolMap["@time"] = BI_TIME;
7611 BuiltinSymbolMap["@filecur"] = BI_FILECUR;
7612 BuiltinSymbolMap["@filename"] = BI_FILENAME;
7613 BuiltinSymbolMap["@curseg"] = BI_CURSEG;
7615 // Some built-ins exist only for MASM32 (32-bit x86)
7616 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
7617 Triple::x86) {
7618 // Numeric built-ins
7619 // BuiltinSymbolMap["@cpu"] = BI_CPU;
7620 // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
7621 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
7622 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
7623 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
7624 // BuiltinSymbolMap["@model"] = BI_MODEL;
7626 // Text built-ins
7627 // BuiltinSymbolMap["@code"] = BI_CODE;
7628 // BuiltinSymbolMap["@data"] = BI_DATA;
7629 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
7630 // BuiltinSymbolMap["@stack"] = BI_STACK;
7634 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
7635 SMLoc StartLoc) {
7636 switch (Symbol) {
7637 default:
7638 return nullptr;
7639 case BI_VERSION:
7640 // Match a recent version of ML.EXE.
7641 return MCConstantExpr::create(1427, getContext());
7642 case BI_LINE: {
7643 int64_t Line;
7644 if (ActiveMacros.empty())
7645 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
7646 else
7647 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
7648 ActiveMacros.front()->ExitBuffer);
7649 return MCConstantExpr::create(Line, getContext());
7652 llvm_unreachable("unhandled built-in symbol");
7655 std::optional<std::string>
7656 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
7657 switch (Symbol) {
7658 default:
7659 return {};
7660 case BI_DATE: {
7661 // Current local date, formatted MM/DD/YY
7662 char TmpBuffer[sizeof("mm/dd/yy")];
7663 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
7664 return std::string(TmpBuffer, Len);
7666 case BI_TIME: {
7667 // Current local time, formatted HH:MM:SS (24-hour clock)
7668 char TmpBuffer[sizeof("hh:mm:ss")];
7669 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
7670 return std::string(TmpBuffer, Len);
7672 case BI_FILECUR:
7673 return SrcMgr
7674 .getMemoryBuffer(
7675 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
7676 ->getBufferIdentifier()
7677 .str();
7678 case BI_FILENAME:
7679 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
7680 ->getBufferIdentifier())
7681 .upper();
7682 case BI_CURSEG:
7683 return getStreamer().getCurrentSectionOnly()->getName().str();
7685 llvm_unreachable("unhandled built-in symbol");
7688 /// Create an MCAsmParser instance.
7689 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7690 MCStreamer &Out, const MCAsmInfo &MAI,
7691 struct tm TM, unsigned CB) {
7692 return new MasmParser(SM, C, Out, MAI, TM, CB);