[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / MC / MCParser / MasmParser.cpp
blob7b4d6e529cc2c3c4efce05f62f41620658d7a8e0
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringMap.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/ADT/Twine.h"
27 #include "llvm/BinaryFormat/Dwarf.h"
28 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCCodeView.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDirectives.h"
33 #include "llvm/MC/MCDwarf.h"
34 #include "llvm/MC/MCExpr.h"
35 #include "llvm/MC/MCInstPrinter.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/MC/MCInstrInfo.h"
38 #include "llvm/MC/MCObjectFileInfo.h"
39 #include "llvm/MC/MCParser/AsmCond.h"
40 #include "llvm/MC/MCParser/AsmLexer.h"
41 #include "llvm/MC/MCParser/MCAsmLexer.h"
42 #include "llvm/MC/MCParser/MCAsmParser.h"
43 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
44 #include "llvm/MC/MCParser/MCAsmParserUtils.h"
45 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
46 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/MC/MCSection.h"
49 #include "llvm/MC/MCStreamer.h"
50 #include "llvm/MC/MCSymbol.h"
51 #include "llvm/MC/MCTargetOptions.h"
52 #include "llvm/MC/MCValue.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/ErrorHandling.h"
56 #include "llvm/Support/Format.h"
57 #include "llvm/Support/MD5.h"
58 #include "llvm/Support/MathExtras.h"
59 #include "llvm/Support/MemoryBuffer.h"
60 #include "llvm/Support/Path.h"
61 #include "llvm/Support/SMLoc.h"
62 #include "llvm/Support/SourceMgr.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <cctype>
67 #include <climits>
68 #include <cstddef>
69 #include <cstdint>
70 #include <ctime>
71 #include <deque>
72 #include <memory>
73 #include <sstream>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 #include <vector>
79 using namespace llvm;
81 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
83 namespace {
85 /// Helper types for tracking macro definitions.
86 typedef std::vector<AsmToken> MCAsmMacroArgument;
87 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
89 /// Helper class for storing information about an active macro instantiation.
90 struct MacroInstantiation {
91 /// The location of the instantiation.
92 SMLoc InstantiationLoc;
94 /// The buffer where parsing should resume upon instantiation completion.
95 unsigned ExitBuffer;
97 /// The location where parsing should resume upon instantiation completion.
98 SMLoc ExitLoc;
100 /// The depth of TheCondStack at the start of the instantiation.
101 size_t CondStackDepth;
104 struct ParseStatementInfo {
105 /// The parsed operands from the last parsed statement.
106 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
108 /// The opcode from the last parsed instruction.
109 unsigned Opcode = ~0U;
111 /// Was there an error parsing the inline assembly?
112 bool ParseError = false;
114 /// The value associated with a macro exit.
115 Optional<std::string> ExitValue;
117 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
119 ParseStatementInfo() = delete;
120 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
121 : AsmRewrites(rewrites) {}
124 enum FieldType {
125 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
126 FT_REAL, // Initializer: real number, stored as an APInt.
127 FT_STRUCT // Initializer: struct initializer, stored recursively.
130 struct FieldInfo;
131 struct StructInfo {
132 StringRef Name;
133 bool IsUnion = false;
134 bool Initializable = true;
135 unsigned Alignment = 0;
136 unsigned AlignmentSize = 0;
137 unsigned NextOffset = 0;
138 unsigned Size = 0;
139 std::vector<FieldInfo> Fields;
140 StringMap<size_t> FieldsByName;
142 FieldInfo &addField(StringRef FieldName, FieldType FT,
143 unsigned FieldAlignmentSize);
145 StructInfo() = default;
147 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue)
148 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
151 // FIXME: This should probably use a class hierarchy, raw pointers between the
152 // objects, and dynamic type resolution instead of a union. On the other hand,
153 // ownership then becomes much more complicated; the obvious thing would be to
154 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
156 struct StructInitializer;
157 struct IntFieldInfo {
158 SmallVector<const MCExpr *, 1> Values;
160 IntFieldInfo() = default;
161 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
162 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
164 struct RealFieldInfo {
165 SmallVector<APInt, 1> AsIntValues;
167 RealFieldInfo() = default;
168 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
169 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
171 struct StructFieldInfo {
172 std::vector<StructInitializer> Initializers;
173 StructInfo Structure;
175 StructFieldInfo() = default;
176 StructFieldInfo(const std::vector<StructInitializer> &V, StructInfo S) {
177 Initializers = V;
178 Structure = S;
180 StructFieldInfo(std::vector<StructInitializer> &&V, StructInfo S) {
181 Initializers = V;
182 Structure = S;
186 class FieldInitializer {
187 public:
188 FieldType FT;
189 union {
190 IntFieldInfo IntInfo;
191 RealFieldInfo RealInfo;
192 StructFieldInfo StructInfo;
195 ~FieldInitializer() {
196 switch (FT) {
197 case FT_INTEGRAL:
198 IntInfo.~IntFieldInfo();
199 break;
200 case FT_REAL:
201 RealInfo.~RealFieldInfo();
202 break;
203 case FT_STRUCT:
204 StructInfo.~StructFieldInfo();
205 break;
209 FieldInitializer(FieldType FT) : FT(FT) {
210 switch (FT) {
211 case FT_INTEGRAL:
212 new (&IntInfo) IntFieldInfo();
213 break;
214 case FT_REAL:
215 new (&RealInfo) RealFieldInfo();
216 break;
217 case FT_STRUCT:
218 new (&StructInfo) StructFieldInfo();
219 break;
223 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) : FT(FT_INTEGRAL) {
224 new (&IntInfo) IntFieldInfo(Values);
227 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) : FT(FT_REAL) {
228 new (&RealInfo) RealFieldInfo(AsIntValues);
231 FieldInitializer(std::vector<StructInitializer> &&Initializers,
232 struct StructInfo Structure)
233 : FT(FT_STRUCT) {
234 new (&StructInfo) StructFieldInfo(Initializers, Structure);
237 FieldInitializer(const FieldInitializer &Initializer) : FT(Initializer.FT) {
238 switch (FT) {
239 case FT_INTEGRAL:
240 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
241 break;
242 case FT_REAL:
243 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
244 break;
245 case FT_STRUCT:
246 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
247 break;
251 FieldInitializer(FieldInitializer &&Initializer) : FT(Initializer.FT) {
252 switch (FT) {
253 case FT_INTEGRAL:
254 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
255 break;
256 case FT_REAL:
257 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
258 break;
259 case FT_STRUCT:
260 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
261 break;
265 FieldInitializer &operator=(const FieldInitializer &Initializer) {
266 if (FT != Initializer.FT) {
267 switch (FT) {
268 case FT_INTEGRAL:
269 IntInfo.~IntFieldInfo();
270 break;
271 case FT_REAL:
272 RealInfo.~RealFieldInfo();
273 break;
274 case FT_STRUCT:
275 StructInfo.~StructFieldInfo();
276 break;
279 FT = Initializer.FT;
280 switch (FT) {
281 case FT_INTEGRAL:
282 IntInfo = Initializer.IntInfo;
283 break;
284 case FT_REAL:
285 RealInfo = Initializer.RealInfo;
286 break;
287 case FT_STRUCT:
288 StructInfo = Initializer.StructInfo;
289 break;
291 return *this;
294 FieldInitializer &operator=(FieldInitializer &&Initializer) {
295 if (FT != Initializer.FT) {
296 switch (FT) {
297 case FT_INTEGRAL:
298 IntInfo.~IntFieldInfo();
299 break;
300 case FT_REAL:
301 RealInfo.~RealFieldInfo();
302 break;
303 case FT_STRUCT:
304 StructInfo.~StructFieldInfo();
305 break;
308 FT = Initializer.FT;
309 switch (FT) {
310 case FT_INTEGRAL:
311 IntInfo = Initializer.IntInfo;
312 break;
313 case FT_REAL:
314 RealInfo = Initializer.RealInfo;
315 break;
316 case FT_STRUCT:
317 StructInfo = Initializer.StructInfo;
318 break;
320 return *this;
324 struct StructInitializer {
325 std::vector<FieldInitializer> FieldInitializers;
328 struct FieldInfo {
329 // Offset of the field within the containing STRUCT.
330 unsigned Offset = 0;
332 // Total size of the field (= LengthOf * Type).
333 unsigned SizeOf = 0;
335 // Number of elements in the field (1 if scalar, >1 if an array).
336 unsigned LengthOf = 0;
338 // Size of a single entry in this field, in bytes ("type" in MASM standards).
339 unsigned Type = 0;
341 FieldInitializer Contents;
343 FieldInfo(FieldType FT) : Contents(FT) {}
346 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
347 unsigned FieldAlignmentSize) {
348 if (!FieldName.empty())
349 FieldsByName[FieldName.lower()] = Fields.size();
350 Fields.emplace_back(FT);
351 FieldInfo &Field = Fields.back();
352 Field.Offset =
353 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
354 if (!IsUnion) {
355 NextOffset = std::max(NextOffset, Field.Offset);
357 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
358 return Field;
361 /// The concrete assembly parser instance.
362 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
363 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
364 class MasmParser : public MCAsmParser {
365 private:
366 AsmLexer Lexer;
367 MCContext &Ctx;
368 MCStreamer &Out;
369 const MCAsmInfo &MAI;
370 SourceMgr &SrcMgr;
371 SourceMgr::DiagHandlerTy SavedDiagHandler;
372 void *SavedDiagContext;
373 std::unique_ptr<MCAsmParserExtension> PlatformParser;
375 /// This is the current buffer index we're lexing from as managed by the
376 /// SourceMgr object.
377 unsigned CurBuffer;
379 /// time of assembly
380 struct tm TM;
382 std::vector<bool> EndStatementAtEOFStack;
384 AsmCond TheCondState;
385 std::vector<AsmCond> TheCondStack;
387 /// maps directive names to handler methods in parser
388 /// extensions. Extensions register themselves in this map by calling
389 /// addDirectiveHandler.
390 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
392 /// maps assembly-time variable names to variables.
393 struct Variable {
394 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
396 StringRef Name;
397 RedefinableKind Redefinable = REDEFINABLE;
398 bool IsText = false;
399 std::string TextValue;
401 StringMap<Variable> Variables;
403 /// Stack of active struct definitions.
404 SmallVector<StructInfo, 1> StructInProgress;
406 /// Maps struct tags to struct definitions.
407 StringMap<StructInfo> Structs;
409 /// Maps data location names to types.
410 StringMap<AsmTypeInfo> KnownType;
412 /// Stack of active macro instantiations.
413 std::vector<MacroInstantiation*> ActiveMacros;
415 /// List of bodies of anonymous macros.
416 std::deque<MCAsmMacro> MacroLikeBodies;
418 /// Keeps track of how many .macro's have been instantiated.
419 unsigned NumOfMacroInstantiations;
421 /// The values from the last parsed cpp hash file line comment if any.
422 struct CppHashInfoTy {
423 StringRef Filename;
424 int64_t LineNumber;
425 SMLoc Loc;
426 unsigned Buf;
427 CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {}
429 CppHashInfoTy CppHashInfo;
431 /// The filename from the first cpp hash file line comment, if any.
432 StringRef FirstCppHashFilename;
434 /// List of forward directional labels for diagnosis at the end.
435 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
437 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
438 /// Defaults to 1U, meaning Intel.
439 unsigned AssemblerDialect = 1U;
441 /// is Darwin compatibility enabled?
442 bool IsDarwin = false;
444 /// Are we parsing ms-style inline assembly?
445 bool ParsingMSInlineAsm = false;
447 /// Did we already inform the user about inconsistent MD5 usage?
448 bool ReportedInconsistentMD5 = false;
450 // Current <...> expression depth.
451 unsigned AngleBracketDepth = 0U;
453 // Number of locals defined.
454 uint16_t LocalCounter = 0;
456 public:
457 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
458 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
459 MasmParser(const MasmParser &) = delete;
460 MasmParser &operator=(const MasmParser &) = delete;
461 ~MasmParser() override;
463 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
465 void addDirectiveHandler(StringRef Directive,
466 ExtensionDirectiveHandler Handler) override {
467 ExtensionDirectiveMap[Directive] = Handler;
468 if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
469 DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
473 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
474 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
477 /// @name MCAsmParser Interface
478 /// {
480 SourceMgr &getSourceManager() override { return SrcMgr; }
481 MCAsmLexer &getLexer() override { return Lexer; }
482 MCContext &getContext() override { return Ctx; }
483 MCStreamer &getStreamer() override { return Out; }
485 CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
487 unsigned getAssemblerDialect() override {
488 if (AssemblerDialect == ~0U)
489 return MAI.getAssemblerDialect();
490 else
491 return AssemblerDialect;
493 void setAssemblerDialect(unsigned i) override {
494 AssemblerDialect = i;
497 void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
498 bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
499 bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
501 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
502 const AsmToken &Lex(ExpandKind ExpandNextToken);
503 const AsmToken &Lex() override { return Lex(ExpandMacros); }
505 void setParsingMSInlineAsm(bool V) override {
506 ParsingMSInlineAsm = V;
507 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
508 // hex integer literals.
509 Lexer.setLexMasmIntegers(V);
511 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
513 bool isParsingMasm() const override { return true; }
515 bool defineMacro(StringRef Name, StringRef Value) override;
517 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
518 bool lookUpField(StringRef Base, StringRef Member,
519 AsmFieldInfo &Info) const override;
521 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
523 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
524 unsigned &NumInputs,
525 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
526 SmallVectorImpl<std::string> &Constraints,
527 SmallVectorImpl<std::string> &Clobbers,
528 const MCInstrInfo *MII, const MCInstPrinter *IP,
529 MCAsmParserSemaCallback &SI) override;
531 bool parseExpression(const MCExpr *&Res);
532 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
533 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
534 AsmTypeInfo *TypeInfo) override;
535 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
536 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
537 SMLoc &EndLoc) override;
538 bool parseAbsoluteExpression(int64_t &Res) override;
540 /// Parse a floating point expression using the float \p Semantics
541 /// and set \p Res to the value.
542 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
544 /// Parse an identifier or string (as a quoted identifier)
545 /// and set \p Res to the identifier contents.
546 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
547 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
548 bool parseIdentifier(StringRef &Res) override {
549 return parseIdentifier(Res, StandardPosition);
551 void eatToEndOfStatement() override;
553 bool checkForValidSection() override;
555 /// }
557 private:
558 bool expandMacros();
559 const AsmToken peekTok(bool ShouldSkipSpace = true);
561 bool parseStatement(ParseStatementInfo &Info,
562 MCAsmParserSemaCallback *SI);
563 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
564 bool parseCppHashLineFilenameComment(SMLoc L);
566 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
567 ArrayRef<MCAsmMacroParameter> Parameters,
568 ArrayRef<MCAsmMacroArgument> A,
569 const std::vector<std::string> &Locals, SMLoc L);
571 /// Are we inside a macro instantiation?
572 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
574 /// Handle entry to macro instantiation.
576 /// \param M The macro.
577 /// \param NameLoc Instantiation location.
578 bool handleMacroEntry(
579 const MCAsmMacro *M, SMLoc NameLoc,
580 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
582 /// Handle invocation of macro function.
584 /// \param M The macro.
585 /// \param NameLoc Invocation location.
586 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
588 /// Handle exit from macro instantiation.
589 void handleMacroExit();
591 /// Extract AsmTokens for a macro argument.
592 bool
593 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
594 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
596 /// Parse all macro arguments for a given macro.
597 bool
598 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
599 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
601 void printMacroInstantiations();
603 bool expandStatement(SMLoc Loc);
605 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
606 SMRange Range = None) const {
607 ArrayRef<SMRange> Ranges(Range);
608 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
610 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
612 bool lookUpField(const StructInfo &Structure, StringRef Member,
613 AsmFieldInfo &Info) const;
615 /// Should we emit DWARF describing this assembler source? (Returns false if
616 /// the source has .file directives, which means we don't want to generate
617 /// info describing the assembler source itself.)
618 bool enabledGenDwarfForAssembly();
620 /// Enter the specified file. This returns true on failure.
621 bool enterIncludeFile(const std::string &Filename);
623 /// Reset the current lexer position to that given by \p Loc. The
624 /// current token is not set; clients should ensure Lex() is called
625 /// subsequently.
627 /// \param InBuffer If not 0, should be the known buffer id that contains the
628 /// location.
629 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
630 bool EndStatementAtEOF = true);
632 /// Parse up to a token of kind \p EndTok and return the contents from the
633 /// current token up to (but not including) this token; the current token on
634 /// exit will be either this kind or EOF. Reads through instantiated macro
635 /// functions and text macros.
636 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
637 std::string parseStringTo(AsmToken::TokenKind EndTok);
639 /// Parse up to the end of statement and return the contents from the current
640 /// token until the end of the statement; the current token on exit will be
641 /// either the EndOfStatement or EOF.
642 StringRef parseStringToEndOfStatement() override;
644 bool parseTextItem(std::string &Data);
646 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
647 MCBinaryExpr::Opcode &Kind);
649 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
650 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
651 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
653 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
655 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
656 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
658 // Generic (target and platform independent) directive parsing.
659 enum DirectiveKind {
660 DK_NO_DIRECTIVE, // Placeholder
661 DK_HANDLER_DIRECTIVE,
662 DK_ASSIGN,
663 DK_EQU,
664 DK_TEXTEQU,
665 DK_ASCII,
666 DK_ASCIZ,
667 DK_STRING,
668 DK_BYTE,
669 DK_SBYTE,
670 DK_WORD,
671 DK_SWORD,
672 DK_DWORD,
673 DK_SDWORD,
674 DK_FWORD,
675 DK_QWORD,
676 DK_SQWORD,
677 DK_DB,
678 DK_DD,
679 DK_DF,
680 DK_DQ,
681 DK_DW,
682 DK_REAL4,
683 DK_REAL8,
684 DK_REAL10,
685 DK_ALIGN,
686 DK_EVEN,
687 DK_ORG,
688 DK_ENDR,
689 DK_EXTERN,
690 DK_PUBLIC,
691 DK_COMM,
692 DK_COMMENT,
693 DK_INCLUDE,
694 DK_REPEAT,
695 DK_WHILE,
696 DK_FOR,
697 DK_FORC,
698 DK_IF,
699 DK_IFE,
700 DK_IFB,
701 DK_IFNB,
702 DK_IFDEF,
703 DK_IFNDEF,
704 DK_IFDIF,
705 DK_IFDIFI,
706 DK_IFIDN,
707 DK_IFIDNI,
708 DK_ELSEIF,
709 DK_ELSEIFE,
710 DK_ELSEIFB,
711 DK_ELSEIFNB,
712 DK_ELSEIFDEF,
713 DK_ELSEIFNDEF,
714 DK_ELSEIFDIF,
715 DK_ELSEIFDIFI,
716 DK_ELSEIFIDN,
717 DK_ELSEIFIDNI,
718 DK_ELSE,
719 DK_ENDIF,
720 DK_FILE,
721 DK_LINE,
722 DK_LOC,
723 DK_STABS,
724 DK_CV_FILE,
725 DK_CV_FUNC_ID,
726 DK_CV_INLINE_SITE_ID,
727 DK_CV_LOC,
728 DK_CV_LINETABLE,
729 DK_CV_INLINE_LINETABLE,
730 DK_CV_DEF_RANGE,
731 DK_CV_STRINGTABLE,
732 DK_CV_STRING,
733 DK_CV_FILECHECKSUMS,
734 DK_CV_FILECHECKSUM_OFFSET,
735 DK_CV_FPO_DATA,
736 DK_CFI_SECTIONS,
737 DK_CFI_STARTPROC,
738 DK_CFI_ENDPROC,
739 DK_CFI_DEF_CFA,
740 DK_CFI_DEF_CFA_OFFSET,
741 DK_CFI_ADJUST_CFA_OFFSET,
742 DK_CFI_DEF_CFA_REGISTER,
743 DK_CFI_OFFSET,
744 DK_CFI_REL_OFFSET,
745 DK_CFI_PERSONALITY,
746 DK_CFI_LSDA,
747 DK_CFI_REMEMBER_STATE,
748 DK_CFI_RESTORE_STATE,
749 DK_CFI_SAME_VALUE,
750 DK_CFI_RESTORE,
751 DK_CFI_ESCAPE,
752 DK_CFI_RETURN_COLUMN,
753 DK_CFI_SIGNAL_FRAME,
754 DK_CFI_UNDEFINED,
755 DK_CFI_REGISTER,
756 DK_CFI_WINDOW_SAVE,
757 DK_CFI_B_KEY_FRAME,
758 DK_MACRO,
759 DK_EXITM,
760 DK_ENDM,
761 DK_PURGE,
762 DK_ERR,
763 DK_ERRB,
764 DK_ERRNB,
765 DK_ERRDEF,
766 DK_ERRNDEF,
767 DK_ERRDIF,
768 DK_ERRDIFI,
769 DK_ERRIDN,
770 DK_ERRIDNI,
771 DK_ERRE,
772 DK_ERRNZ,
773 DK_ECHO,
774 DK_STRUCT,
775 DK_UNION,
776 DK_ENDS,
777 DK_END,
778 DK_PUSHFRAME,
779 DK_PUSHREG,
780 DK_SAVEREG,
781 DK_SAVEXMM128,
782 DK_SETFRAME,
783 DK_RADIX,
786 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
787 /// class.
788 StringMap<DirectiveKind> DirectiveKindMap;
790 bool isMacroLikeDirective();
792 // Codeview def_range type parsing.
793 enum CVDefRangeType {
794 CVDR_DEFRANGE = 0, // Placeholder
795 CVDR_DEFRANGE_REGISTER,
796 CVDR_DEFRANGE_FRAMEPOINTER_REL,
797 CVDR_DEFRANGE_SUBFIELD_REGISTER,
798 CVDR_DEFRANGE_REGISTER_REL
801 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
802 /// def_range types parsed by this class.
803 StringMap<CVDefRangeType> CVDefRangeTypeMap;
805 // Generic (target and platform independent) directive parsing.
806 enum BuiltinSymbol {
807 BI_NO_SYMBOL, // Placeholder
808 BI_DATE,
809 BI_TIME,
810 BI_VERSION,
811 BI_FILECUR,
812 BI_FILENAME,
813 BI_LINE,
814 BI_CURSEG,
815 BI_CPU,
816 BI_INTERFACE,
817 BI_CODE,
818 BI_DATA,
819 BI_FARDATA,
820 BI_WORDSIZE,
821 BI_CODESIZE,
822 BI_DATASIZE,
823 BI_MODEL,
824 BI_STACK,
827 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
828 /// class.
829 StringMap<BuiltinSymbol> BuiltinSymbolMap;
831 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
833 llvm::Optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
834 SMLoc StartLoc);
836 // ".ascii", ".asciz", ".string"
837 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
839 // "byte", "word", ...
840 bool emitIntValue(const MCExpr *Value, unsigned Size);
841 bool parseScalarInitializer(unsigned Size,
842 SmallVectorImpl<const MCExpr *> &Values,
843 unsigned StringPadLength = 0);
844 bool parseScalarInstList(
845 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
846 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
847 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
848 bool addIntegralField(StringRef Name, unsigned Size);
849 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
850 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
851 StringRef Name, SMLoc NameLoc);
853 // "real4", "real8", "real10"
854 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
855 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
856 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
857 size_t Size);
858 bool parseRealInstList(
859 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
860 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
861 bool parseDirectiveNamedRealValue(StringRef TypeName,
862 const fltSemantics &Semantics,
863 unsigned Size, StringRef Name,
864 SMLoc NameLoc);
866 bool parseOptionalAngleBracketOpen();
867 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
869 bool parseFieldInitializer(const FieldInfo &Field,
870 FieldInitializer &Initializer);
871 bool parseFieldInitializer(const FieldInfo &Field,
872 const IntFieldInfo &Contents,
873 FieldInitializer &Initializer);
874 bool parseFieldInitializer(const FieldInfo &Field,
875 const RealFieldInfo &Contents,
876 FieldInitializer &Initializer);
877 bool parseFieldInitializer(const FieldInfo &Field,
878 const StructFieldInfo &Contents,
879 FieldInitializer &Initializer);
881 bool parseStructInitializer(const StructInfo &Structure,
882 StructInitializer &Initializer);
883 bool parseStructInstList(
884 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
885 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
887 bool emitFieldValue(const FieldInfo &Field);
888 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
889 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
890 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
892 bool emitFieldInitializer(const FieldInfo &Field,
893 const FieldInitializer &Initializer);
894 bool emitFieldInitializer(const FieldInfo &Field,
895 const IntFieldInfo &Contents,
896 const IntFieldInfo &Initializer);
897 bool emitFieldInitializer(const FieldInfo &Field,
898 const RealFieldInfo &Contents,
899 const RealFieldInfo &Initializer);
900 bool emitFieldInitializer(const FieldInfo &Field,
901 const StructFieldInfo &Contents,
902 const StructFieldInfo &Initializer);
904 bool emitStructInitializer(const StructInfo &Structure,
905 const StructInitializer &Initializer);
907 // User-defined types (structs, unions):
908 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
909 bool addStructField(StringRef Name, const StructInfo &Structure);
910 bool parseDirectiveStructValue(const StructInfo &Structure,
911 StringRef Directive, SMLoc DirLoc);
912 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
913 StringRef Directive, SMLoc DirLoc,
914 StringRef Name);
916 // "=", "equ", "textequ"
917 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
918 DirectiveKind DirKind, SMLoc NameLoc);
920 bool parseDirectiveOrg(); // "org"
922 bool emitAlignTo(int64_t Alignment);
923 bool parseDirectiveAlign(); // "align"
924 bool parseDirectiveEven(); // "even"
926 // ".file", ".line", ".loc", ".stabs"
927 bool parseDirectiveFile(SMLoc DirectiveLoc);
928 bool parseDirectiveLine();
929 bool parseDirectiveLoc();
930 bool parseDirectiveStabs();
932 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
933 // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
934 bool parseDirectiveCVFile();
935 bool parseDirectiveCVFuncId();
936 bool parseDirectiveCVInlineSiteId();
937 bool parseDirectiveCVLoc();
938 bool parseDirectiveCVLinetable();
939 bool parseDirectiveCVInlineLinetable();
940 bool parseDirectiveCVDefRange();
941 bool parseDirectiveCVString();
942 bool parseDirectiveCVStringTable();
943 bool parseDirectiveCVFileChecksums();
944 bool parseDirectiveCVFileChecksumOffset();
945 bool parseDirectiveCVFPOData();
947 // .cfi directives
948 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
949 bool parseDirectiveCFIWindowSave();
950 bool parseDirectiveCFISections();
951 bool parseDirectiveCFIStartProc();
952 bool parseDirectiveCFIEndProc();
953 bool parseDirectiveCFIDefCfaOffset();
954 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
955 bool parseDirectiveCFIAdjustCfaOffset();
956 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
957 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
958 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
959 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
960 bool parseDirectiveCFIRememberState();
961 bool parseDirectiveCFIRestoreState();
962 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
963 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
964 bool parseDirectiveCFIEscape();
965 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
966 bool parseDirectiveCFISignalFrame();
967 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
969 // macro directives
970 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
971 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
972 std::string &Value);
973 bool parseDirectiveEndMacro(StringRef Directive);
974 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
976 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
977 StringRef Name, SMLoc NameLoc);
978 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
979 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
980 bool parseDirectiveNestedEnds();
982 /// Parse a directive like ".globl" which accepts a single symbol (which
983 /// should be a label or an external).
984 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
986 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
988 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
990 bool parseDirectiveInclude(); // "include"
992 // "if" or "ife"
993 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
994 // "ifb" or "ifnb", depending on ExpectBlank.
995 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
996 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
997 // CaseInsensitive.
998 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
999 bool CaseInsensitive);
1000 // "ifdef" or "ifndef", depending on expect_defined
1001 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1002 // "elseif" or "elseife"
1003 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1004 // "elseifb" or "elseifnb", depending on ExpectBlank.
1005 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1006 // ".elseifdef" or ".elseifndef", depending on expect_defined
1007 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1008 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1009 // ExpectEqual and CaseInsensitive.
1010 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1011 bool CaseInsensitive);
1012 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1013 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1014 bool parseEscapedString(std::string &Data) override;
1015 bool parseAngleBracketString(std::string &Data) override;
1017 // Macro-like directives
1018 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1019 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1020 raw_svector_ostream &OS);
1021 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1022 SMLoc ExitLoc, raw_svector_ostream &OS);
1023 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1024 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1025 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1026 bool parseDirectiveWhile(SMLoc DirectiveLoc);
1028 // "_emit" or "__emit"
1029 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1030 size_t Len);
1032 // "align"
1033 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1035 // "end"
1036 bool parseDirectiveEnd(SMLoc DirectiveLoc);
1038 // ".err"
1039 bool parseDirectiveError(SMLoc DirectiveLoc);
1040 // ".errb" or ".errnb", depending on ExpectBlank.
1041 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1042 // ".errdef" or ".errndef", depending on ExpectBlank.
1043 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1044 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1045 // and CaseInsensitive.
1046 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1047 bool CaseInsensitive);
1048 // ".erre" or ".errnz", depending on ExpectZero.
1049 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1051 // ".radix"
1052 bool parseDirectiveRadix(SMLoc DirectiveLoc);
1054 // "echo"
1055 bool parseDirectiveEcho(SMLoc DirectiveLoc);
1057 void initializeDirectiveKindMap();
1058 void initializeCVDefRangeTypeMap();
1059 void initializeBuiltinSymbolMap();
1062 } // end anonymous namespace
1064 namespace llvm {
1066 extern MCAsmParserExtension *createCOFFMasmParser();
1068 } // end namespace llvm
1070 enum { DEFAULT_ADDRSPACE = 0 };
1072 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1073 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1074 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1075 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1076 HadError = false;
1077 // Save the old handler.
1078 SavedDiagHandler = SrcMgr.getDiagHandler();
1079 SavedDiagContext = SrcMgr.getDiagContext();
1080 // Set our own handler which calls the saved handler.
1081 SrcMgr.setDiagHandler(DiagHandler, this);
1082 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1083 EndStatementAtEOFStack.push_back(true);
1085 // Initialize the platform / file format parser.
1086 switch (Ctx.getObjectFileType()) {
1087 case MCContext::IsCOFF:
1088 PlatformParser.reset(createCOFFMasmParser());
1089 break;
1090 default:
1091 report_fatal_error("llvm-ml currently supports only COFF output.");
1092 break;
1095 initializeDirectiveKindMap();
1096 PlatformParser->Initialize(*this);
1097 initializeCVDefRangeTypeMap();
1098 initializeBuiltinSymbolMap();
1100 NumOfMacroInstantiations = 0;
1103 MasmParser::~MasmParser() {
1104 assert((HadError || ActiveMacros.empty()) &&
1105 "Unexpected active macro instantiation!");
1107 // Restore the saved diagnostics handler and context for use during
1108 // finalization.
1109 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1112 void MasmParser::printMacroInstantiations() {
1113 // Print the active macro instantiation stack.
1114 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1115 it = ActiveMacros.rbegin(),
1116 ie = ActiveMacros.rend();
1117 it != ie; ++it)
1118 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1119 "while in macro instantiation");
1122 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1123 printPendingErrors();
1124 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1125 printMacroInstantiations();
1128 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1129 if (getTargetParser().getTargetOptions().MCNoWarn)
1130 return false;
1131 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1132 return Error(L, Msg, Range);
1133 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1134 printMacroInstantiations();
1135 return false;
1138 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1139 HadError = true;
1140 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1141 printMacroInstantiations();
1142 return true;
1145 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1146 std::string IncludedFile;
1147 unsigned NewBuf =
1148 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1149 if (!NewBuf)
1150 return true;
1152 CurBuffer = NewBuf;
1153 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1154 EndStatementAtEOFStack.push_back(true);
1155 return false;
1158 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1159 bool EndStatementAtEOF) {
1160 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1161 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1162 Loc.getPointer(), EndStatementAtEOF);
1165 bool MasmParser::expandMacros() {
1166 const AsmToken &Tok = getTok();
1167 const std::string IDLower = Tok.getIdentifier().lower();
1169 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1170 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1171 // This is a macro function invocation; expand it in place.
1172 const SMLoc MacroLoc = Tok.getLoc();
1173 const StringRef MacroId = Tok.getIdentifier();
1174 Lexer.Lex();
1175 if (handleMacroInvocation(M, MacroLoc)) {
1176 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1177 Lexer.Lex();
1179 return false;
1182 llvm::Optional<std::string> ExpandedValue;
1183 auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1184 if (BuiltinIt != BuiltinSymbolMap.end()) {
1185 ExpandedValue =
1186 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1187 } else {
1188 auto VarIt = Variables.find(IDLower);
1189 if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1190 ExpandedValue = VarIt->getValue().TextValue;
1194 if (!ExpandedValue.hasValue())
1195 return true;
1196 std::unique_ptr<MemoryBuffer> Instantiation =
1197 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1199 // Jump to the macro instantiation and prime the lexer.
1200 CurBuffer =
1201 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1202 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1203 /*EndStatementAtEOF=*/false);
1204 EndStatementAtEOFStack.push_back(false);
1205 Lexer.Lex();
1206 return false;
1209 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1210 if (Lexer.getTok().is(AsmToken::Error))
1211 Error(Lexer.getErrLoc(), Lexer.getErr());
1213 // if it's a end of statement with a comment in it
1214 if (getTok().is(AsmToken::EndOfStatement)) {
1215 // if this is a line comment output it.
1216 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1217 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1218 Out.addExplicitComment(Twine(getTok().getString()));
1221 const AsmToken *tok = &Lexer.Lex();
1222 bool StartOfStatement = Lexer.isAtStartOfStatement();
1224 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1225 if (StartOfStatement) {
1226 AsmToken NextTok;
1227 MutableArrayRef<AsmToken> Buf(NextTok);
1228 size_t ReadCount = Lexer.peekTokens(Buf);
1229 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1230 (NextTok.getString().equals_insensitive("equ") ||
1231 NextTok.getString().equals_insensitive("textequ"))) {
1232 // This looks like an EQU or TEXTEQU directive; don't expand the
1233 // identifier, allowing for redefinitions.
1234 break;
1237 if (expandMacros())
1238 break;
1241 // Parse comments here to be deferred until end of next statement.
1242 while (tok->is(AsmToken::Comment)) {
1243 if (MAI.preserveAsmComments())
1244 Out.addExplicitComment(Twine(tok->getString()));
1245 tok = &Lexer.Lex();
1248 // Recognize and bypass line continuations.
1249 while (tok->is(AsmToken::BackSlash) &&
1250 peekTok().is(AsmToken::EndOfStatement)) {
1251 // Eat both the backslash and the end of statement.
1252 Lexer.Lex();
1253 tok = &Lexer.Lex();
1256 if (tok->is(AsmToken::Eof)) {
1257 // If this is the end of an included file, pop the parent file off the
1258 // include stack.
1259 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1260 if (ParentIncludeLoc != SMLoc()) {
1261 EndStatementAtEOFStack.pop_back();
1262 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1263 return Lex();
1265 EndStatementAtEOFStack.pop_back();
1266 assert(EndStatementAtEOFStack.empty());
1269 return *tok;
1272 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1273 AsmToken Tok;
1275 MutableArrayRef<AsmToken> Buf(Tok);
1276 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1278 if (ReadCount == 0) {
1279 // If this is the end of an included file, pop the parent file off the
1280 // include stack.
1281 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1282 if (ParentIncludeLoc != SMLoc()) {
1283 EndStatementAtEOFStack.pop_back();
1284 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1285 return peekTok(ShouldSkipSpace);
1287 EndStatementAtEOFStack.pop_back();
1288 assert(EndStatementAtEOFStack.empty());
1291 assert(ReadCount == 1);
1292 return Tok;
1295 bool MasmParser::enabledGenDwarfForAssembly() {
1296 // Check whether the user specified -g.
1297 if (!getContext().getGenDwarfForAssembly())
1298 return false;
1299 // If we haven't encountered any .file directives (which would imply that
1300 // the assembler source was produced with debug info already) then emit one
1301 // describing the assembler source file itself.
1302 if (getContext().getGenDwarfFileNumber() == 0) {
1303 // Use the first #line directive for this, if any. It's preprocessed, so
1304 // there is no checksum, and of course no source directive.
1305 if (!FirstCppHashFilename.empty())
1306 getContext().setMCLineTableRootFile(/*CUID=*/0,
1307 getContext().getCompilationDir(),
1308 FirstCppHashFilename,
1309 /*Cksum=*/None, /*Source=*/None);
1310 const MCDwarfFile &RootFile =
1311 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1312 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1313 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1314 RootFile.Checksum, RootFile.Source));
1316 return true;
1319 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1320 // Create the initial section, if requested.
1321 if (!NoInitialTextSection)
1322 Out.InitSections(false);
1324 // Prime the lexer.
1325 Lex();
1327 HadError = false;
1328 AsmCond StartingCondState = TheCondState;
1329 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1331 // If we are generating dwarf for assembly source files save the initial text
1332 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1333 // emitting any actual debug info yet and haven't had a chance to parse any
1334 // embedded .file directives.)
1335 if (getContext().getGenDwarfForAssembly()) {
1336 MCSection *Sec = getStreamer().getCurrentSectionOnly();
1337 if (!Sec->getBeginSymbol()) {
1338 MCSymbol *SectionStartSym = getContext().createTempSymbol();
1339 getStreamer().emitLabel(SectionStartSym);
1340 Sec->setBeginSymbol(SectionStartSym);
1342 bool InsertResult = getContext().addGenDwarfSection(Sec);
1343 assert(InsertResult && ".text section should not have debug info yet");
1344 (void)InsertResult;
1347 getTargetParser().onBeginOfFile();
1349 // While we have input, parse each statement.
1350 while (Lexer.isNot(AsmToken::Eof) ||
1351 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1352 // Skip through the EOF at the end of an inclusion.
1353 if (Lexer.is(AsmToken::Eof))
1354 Lex();
1356 ParseStatementInfo Info(&AsmStrRewrites);
1357 bool Parsed = parseStatement(Info, nullptr);
1359 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1360 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1361 // exists.
1362 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1363 Lex();
1366 // parseStatement returned true so may need to emit an error.
1367 printPendingErrors();
1369 // Skipping to the next line if needed.
1370 if (Parsed && !getLexer().isAtStartOfStatement())
1371 eatToEndOfStatement();
1374 getTargetParser().onEndOfFile();
1375 printPendingErrors();
1377 // All errors should have been emitted.
1378 assert(!hasPendingError() && "unexpected error from parseStatement");
1380 getTargetParser().flushPendingInstructions(getStreamer());
1382 if (TheCondState.TheCond != StartingCondState.TheCond ||
1383 TheCondState.Ignore != StartingCondState.Ignore)
1384 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1385 // Check to see there are no empty DwarfFile slots.
1386 const auto &LineTables = getContext().getMCDwarfLineTables();
1387 if (!LineTables.empty()) {
1388 unsigned Index = 0;
1389 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1390 if (File.Name.empty() && Index != 0)
1391 printError(getTok().getLoc(), "unassigned file number: " +
1392 Twine(Index) +
1393 " for .file directives");
1394 ++Index;
1398 // Check to see that all assembler local symbols were actually defined.
1399 // Targets that don't do subsections via symbols may not want this, though,
1400 // so conservatively exclude them. Only do this if we're finalizing, though,
1401 // as otherwise we won't necessarilly have seen everything yet.
1402 if (!NoFinalize) {
1403 if (MAI.hasSubsectionsViaSymbols()) {
1404 for (const auto &TableEntry : getContext().getSymbols()) {
1405 MCSymbol *Sym = TableEntry.getValue();
1406 // Variable symbols may not be marked as defined, so check those
1407 // explicitly. If we know it's a variable, we have a definition for
1408 // the purposes of this check.
1409 if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
1410 // FIXME: We would really like to refer back to where the symbol was
1411 // first referenced for a source location. We need to add something
1412 // to track that. Currently, we just point to the end of the file.
1413 printError(getTok().getLoc(), "assembler local symbol '" +
1414 Sym->getName() + "' not defined");
1418 // Temporary symbols like the ones for directional jumps don't go in the
1419 // symbol table. They also need to be diagnosed in all (final) cases.
1420 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1421 if (std::get<2>(LocSym)->isUndefined()) {
1422 // Reset the state of any "# line file" directives we've seen to the
1423 // context as it was at the diagnostic site.
1424 CppHashInfo = std::get<1>(LocSym);
1425 printError(std::get<0>(LocSym), "directional label undefined");
1430 // Finalize the output stream if there are no errors and if the client wants
1431 // us to.
1432 if (!HadError && !NoFinalize)
1433 Out.Finish(Lexer.getLoc());
1435 return HadError || getContext().hadError();
1438 bool MasmParser::checkForValidSection() {
1439 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1440 Out.InitSections(false);
1441 return Error(getTok().getLoc(),
1442 "expected section directive before assembly directive");
1444 return false;
1447 /// Throw away the rest of the line for testing purposes.
1448 void MasmParser::eatToEndOfStatement() {
1449 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1450 if (Lexer.is(AsmToken::Eof)) {
1451 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1452 if (ParentIncludeLoc == SMLoc()) {
1453 break;
1456 EndStatementAtEOFStack.pop_back();
1457 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1460 Lexer.Lex();
1463 // Eat EOL.
1464 if (Lexer.is(AsmToken::EndOfStatement))
1465 Lexer.Lex();
1468 SmallVector<StringRef, 1>
1469 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1470 SmallVector<StringRef, 1> Refs;
1471 const char *Start = getTok().getLoc().getPointer();
1472 while (Lexer.isNot(EndTok)) {
1473 if (Lexer.is(AsmToken::Eof)) {
1474 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1475 if (ParentIncludeLoc == SMLoc()) {
1476 break;
1478 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1480 EndStatementAtEOFStack.pop_back();
1481 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1482 Lexer.Lex();
1483 Start = getTok().getLoc().getPointer();
1484 } else {
1485 Lexer.Lex();
1488 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1489 return Refs;
1492 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1493 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1494 std::string Str;
1495 for (StringRef S : Refs) {
1496 Str.append(S.str());
1498 return Str;
1501 StringRef MasmParser::parseStringToEndOfStatement() {
1502 const char *Start = getTok().getLoc().getPointer();
1504 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1505 Lexer.Lex();
1507 const char *End = getTok().getLoc().getPointer();
1508 return StringRef(Start, End - Start);
1511 /// Parse a paren expression and return it.
1512 /// NOTE: This assumes the leading '(' has already been consumed.
1514 /// parenexpr ::= expr)
1516 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1517 if (parseExpression(Res))
1518 return true;
1519 if (Lexer.isNot(AsmToken::RParen))
1520 return TokError("expected ')' in parentheses expression");
1521 EndLoc = Lexer.getTok().getEndLoc();
1522 Lex();
1523 return false;
1526 /// Parse a bracket expression and return it.
1527 /// NOTE: This assumes the leading '[' has already been consumed.
1529 /// bracketexpr ::= expr]
1531 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1532 if (parseExpression(Res))
1533 return true;
1534 EndLoc = getTok().getEndLoc();
1535 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1536 return true;
1537 return false;
1540 /// Parse a primary expression and return it.
1541 /// primaryexpr ::= (parenexpr
1542 /// primaryexpr ::= symbol
1543 /// primaryexpr ::= number
1544 /// primaryexpr ::= '.'
1545 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1546 /// primaryexpr ::= string
1547 /// (a string is interpreted as a 64-bit number in big-endian base-256)
1548 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1549 AsmTypeInfo *TypeInfo) {
1550 SMLoc FirstTokenLoc = getLexer().getLoc();
1551 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1552 switch (FirstTokenKind) {
1553 default:
1554 return TokError("unknown token in expression");
1555 // If we have an error assume that we've already handled it.
1556 case AsmToken::Error:
1557 return true;
1558 case AsmToken::Exclaim:
1559 Lex(); // Eat the operator.
1560 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1561 return true;
1562 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1563 return false;
1564 case AsmToken::Dollar:
1565 case AsmToken::At:
1566 case AsmToken::Identifier: {
1567 StringRef Identifier;
1568 if (parseIdentifier(Identifier)) {
1569 // We may have failed but $ may be a valid token.
1570 if (getTok().is(AsmToken::Dollar)) {
1571 if (Lexer.getMAI().getDollarIsPC()) {
1572 Lex();
1573 // This is a '$' reference, which references the current PC. Emit a
1574 // temporary label to the streamer and refer to it.
1575 MCSymbol *Sym = Ctx.createTempSymbol();
1576 Out.emitLabel(Sym);
1577 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1578 getContext());
1579 EndLoc = FirstTokenLoc;
1580 return false;
1582 return Error(FirstTokenLoc, "invalid token in expression");
1585 // Parse named bitwise negation.
1586 if (Identifier.equals_insensitive("not")) {
1587 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1588 return true;
1589 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1590 return false;
1592 // Parse symbol variant.
1593 std::pair<StringRef, StringRef> Split;
1594 if (!MAI.useParensForSymbolVariant()) {
1595 if (FirstTokenKind == AsmToken::String) {
1596 if (Lexer.is(AsmToken::At)) {
1597 Lex(); // eat @
1598 SMLoc AtLoc = getLexer().getLoc();
1599 StringRef VName;
1600 if (parseIdentifier(VName))
1601 return Error(AtLoc, "expected symbol variant after '@'");
1603 Split = std::make_pair(Identifier, VName);
1605 } else {
1606 Split = Identifier.split('@');
1608 } else if (Lexer.is(AsmToken::LParen)) {
1609 Lex(); // eat '('.
1610 StringRef VName;
1611 parseIdentifier(VName);
1612 // eat ')'.
1613 if (parseToken(AsmToken::RParen,
1614 "unexpected token in variant, expected ')'"))
1615 return true;
1616 Split = std::make_pair(Identifier, VName);
1619 EndLoc = SMLoc::getFromPointer(Identifier.end());
1621 // This is a symbol reference.
1622 StringRef SymbolName = Identifier;
1623 if (SymbolName.empty())
1624 return Error(getLexer().getLoc(), "expected a symbol reference");
1626 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1628 // Look up the symbol variant if used.
1629 if (!Split.second.empty()) {
1630 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1631 if (Variant != MCSymbolRefExpr::VK_Invalid) {
1632 SymbolName = Split.first;
1633 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1634 Variant = MCSymbolRefExpr::VK_None;
1635 } else {
1636 return Error(SMLoc::getFromPointer(Split.second.begin()),
1637 "invalid variant '" + Split.second + "'");
1641 // Find the field offset if used.
1642 AsmFieldInfo Info;
1643 Split = SymbolName.split('.');
1644 if (Split.second.empty()) {
1645 } else {
1646 SymbolName = Split.first;
1647 if (lookUpField(SymbolName, Split.second, Info)) {
1648 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1649 StringRef Base = BaseMember.first, Member = BaseMember.second;
1650 lookUpField(Base, Member, Info);
1651 } else if (Structs.count(SymbolName.lower())) {
1652 // This is actually a reference to a field offset.
1653 Res = MCConstantExpr::create(Info.Offset, getContext());
1654 return false;
1658 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1659 if (!Sym) {
1660 // If this is a built-in numeric value, treat it as a constant.
1661 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1662 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1663 ? BI_NO_SYMBOL
1664 : BuiltinIt->getValue();
1665 if (Symbol != BI_NO_SYMBOL) {
1666 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1667 if (Value) {
1668 Res = Value;
1669 return false;
1673 // Variables use case-insensitive symbol names; if this is a variable, we
1674 // find the symbol using its canonical name.
1675 auto VarIt = Variables.find(SymbolName.lower());
1676 if (VarIt != Variables.end())
1677 SymbolName = VarIt->second.Name;
1678 Sym = getContext().getOrCreateSymbol(SymbolName);
1681 // If this is an absolute variable reference, substitute it now to preserve
1682 // semantics in the face of reassignment.
1683 if (Sym->isVariable()) {
1684 auto V = Sym->getVariableValue(/*SetUsed=*/false);
1685 bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1686 if (auto TV = dyn_cast<MCTargetExpr>(V))
1687 DoInline = TV->inlineAssignedExpr();
1688 if (DoInline) {
1689 if (Variant)
1690 return Error(EndLoc, "unexpected modifier on variable reference");
1691 Res = Sym->getVariableValue(/*SetUsed=*/false);
1692 return false;
1696 // Otherwise create a symbol ref.
1697 const MCExpr *SymRef =
1698 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1699 if (Info.Offset) {
1700 Res = MCBinaryExpr::create(
1701 MCBinaryExpr::Add, SymRef,
1702 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1703 } else {
1704 Res = SymRef;
1706 if (TypeInfo) {
1707 if (Info.Type.Name.empty()) {
1708 auto TypeIt = KnownType.find(Identifier.lower());
1709 if (TypeIt != KnownType.end()) {
1710 Info.Type = TypeIt->second;
1714 *TypeInfo = Info.Type;
1716 return false;
1718 case AsmToken::BigNum:
1719 return TokError("literal value out of range for directive");
1720 case AsmToken::Integer: {
1721 SMLoc Loc = getTok().getLoc();
1722 int64_t IntVal = getTok().getIntVal();
1723 Res = MCConstantExpr::create(IntVal, getContext());
1724 EndLoc = Lexer.getTok().getEndLoc();
1725 Lex(); // Eat token.
1726 // Look for 'b' or 'f' following an Integer as a directional label.
1727 if (Lexer.getKind() == AsmToken::Identifier) {
1728 StringRef IDVal = getTok().getString();
1729 // Look up the symbol variant if used.
1730 std::pair<StringRef, StringRef> Split = IDVal.split('@');
1731 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1732 if (Split.first.size() != IDVal.size()) {
1733 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1734 if (Variant == MCSymbolRefExpr::VK_Invalid)
1735 return TokError("invalid variant '" + Split.second + "'");
1736 IDVal = Split.first;
1738 if (IDVal == "f" || IDVal == "b") {
1739 MCSymbol *Sym =
1740 Ctx.getDirectionalLocalSymbol(IntVal, IDVal == "b");
1741 Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
1742 if (IDVal == "b" && Sym->isUndefined())
1743 return Error(Loc, "directional label undefined");
1744 DirLabels.push_back(std::make_tuple(Loc, CppHashInfo, Sym));
1745 EndLoc = Lexer.getTok().getEndLoc();
1746 Lex(); // Eat identifier.
1749 return false;
1751 case AsmToken::String: {
1752 // MASM strings (used as constants) are interpreted as big-endian base-256.
1753 SMLoc ValueLoc = getTok().getLoc();
1754 std::string Value;
1755 if (parseEscapedString(Value))
1756 return true;
1757 if (Value.size() > 8)
1758 return Error(ValueLoc, "literal value out of range");
1759 uint64_t IntValue = 0;
1760 for (const unsigned char CharVal : Value)
1761 IntValue = (IntValue << 8) | CharVal;
1762 Res = MCConstantExpr::create(IntValue, getContext());
1763 return false;
1765 case AsmToken::Real: {
1766 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1767 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1768 Res = MCConstantExpr::create(IntVal, getContext());
1769 EndLoc = Lexer.getTok().getEndLoc();
1770 Lex(); // Eat token.
1771 return false;
1773 case AsmToken::Dot: {
1774 // This is a '.' reference, which references the current PC. Emit a
1775 // temporary label to the streamer and refer to it.
1776 MCSymbol *Sym = Ctx.createTempSymbol();
1777 Out.emitLabel(Sym);
1778 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1779 EndLoc = Lexer.getTok().getEndLoc();
1780 Lex(); // Eat identifier.
1781 return false;
1783 case AsmToken::LParen:
1784 Lex(); // Eat the '('.
1785 return parseParenExpr(Res, EndLoc);
1786 case AsmToken::LBrac:
1787 if (!PlatformParser->HasBracketExpressions())
1788 return TokError("brackets expression not supported on this target");
1789 Lex(); // Eat the '['.
1790 return parseBracketExpr(Res, EndLoc);
1791 case AsmToken::Minus:
1792 Lex(); // Eat the operator.
1793 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1794 return true;
1795 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1796 return false;
1797 case AsmToken::Plus:
1798 Lex(); // Eat the operator.
1799 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1800 return true;
1801 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1802 return false;
1803 case AsmToken::Tilde:
1804 Lex(); // Eat the operator.
1805 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1806 return true;
1807 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1808 return false;
1809 // MIPS unary expression operators. The lexer won't generate these tokens if
1810 // MCAsmInfo::HasMipsExpressions is false for the target.
1811 case AsmToken::PercentCall16:
1812 case AsmToken::PercentCall_Hi:
1813 case AsmToken::PercentCall_Lo:
1814 case AsmToken::PercentDtprel_Hi:
1815 case AsmToken::PercentDtprel_Lo:
1816 case AsmToken::PercentGot:
1817 case AsmToken::PercentGot_Disp:
1818 case AsmToken::PercentGot_Hi:
1819 case AsmToken::PercentGot_Lo:
1820 case AsmToken::PercentGot_Ofst:
1821 case AsmToken::PercentGot_Page:
1822 case AsmToken::PercentGottprel:
1823 case AsmToken::PercentGp_Rel:
1824 case AsmToken::PercentHi:
1825 case AsmToken::PercentHigher:
1826 case AsmToken::PercentHighest:
1827 case AsmToken::PercentLo:
1828 case AsmToken::PercentNeg:
1829 case AsmToken::PercentPcrel_Hi:
1830 case AsmToken::PercentPcrel_Lo:
1831 case AsmToken::PercentTlsgd:
1832 case AsmToken::PercentTlsldm:
1833 case AsmToken::PercentTprel_Hi:
1834 case AsmToken::PercentTprel_Lo:
1835 Lex(); // Eat the operator.
1836 if (Lexer.isNot(AsmToken::LParen))
1837 return TokError("expected '(' after operator");
1838 Lex(); // Eat the operator.
1839 if (parseExpression(Res, EndLoc))
1840 return true;
1841 if (Lexer.isNot(AsmToken::RParen))
1842 return TokError("expected ')'");
1843 Lex(); // Eat the operator.
1844 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1845 return !Res;
1849 bool MasmParser::parseExpression(const MCExpr *&Res) {
1850 SMLoc EndLoc;
1851 return parseExpression(Res, EndLoc);
1854 /// This function checks if the next token is <string> type or arithmetic.
1855 /// string that begin with character '<' must end with character '>'.
1856 /// otherwise it is arithmetics.
1857 /// If the function returns a 'true' value,
1858 /// the End argument will be filled with the last location pointed to the '>'
1859 /// character.
1860 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1861 assert((StrLoc.getPointer() != nullptr) &&
1862 "Argument to the function cannot be a NULL value");
1863 const char *CharPtr = StrLoc.getPointer();
1864 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1865 (*CharPtr != '\0')) {
1866 if (*CharPtr == '!')
1867 CharPtr++;
1868 CharPtr++;
1870 if (*CharPtr == '>') {
1871 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1872 return true;
1874 return false;
1877 /// creating a string without the escape characters '!'.
1878 static std::string angleBracketString(StringRef BracketContents) {
1879 std::string Res;
1880 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1881 if (BracketContents[Pos] == '!')
1882 Pos++;
1883 Res += BracketContents[Pos];
1885 return Res;
1888 /// Parse an expression and return it.
1890 /// expr ::= expr &&,|| expr -> lowest.
1891 /// expr ::= expr |,^,&,! expr
1892 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1893 /// expr ::= expr <<,>> expr
1894 /// expr ::= expr +,- expr
1895 /// expr ::= expr *,/,% expr -> highest.
1896 /// expr ::= primaryexpr
1898 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1899 // Parse the expression.
1900 Res = nullptr;
1901 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1902 parseBinOpRHS(1, Res, EndLoc))
1903 return true;
1905 // Try to constant fold it up front, if possible. Do not exploit
1906 // assembler here.
1907 int64_t Value;
1908 if (Res->evaluateAsAbsolute(Value))
1909 Res = MCConstantExpr::create(Value, getContext());
1911 return false;
1914 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1915 Res = nullptr;
1916 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1919 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1920 SMLoc &EndLoc) {
1921 if (parseParenExpr(Res, EndLoc))
1922 return true;
1924 for (; ParenDepth > 0; --ParenDepth) {
1925 if (parseBinOpRHS(1, Res, EndLoc))
1926 return true;
1928 // We don't Lex() the last RParen.
1929 // This is the same behavior as parseParenExpression().
1930 if (ParenDepth - 1 > 0) {
1931 EndLoc = getTok().getEndLoc();
1932 if (parseToken(AsmToken::RParen,
1933 "expected ')' in parentheses expression"))
1934 return true;
1937 return false;
1940 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1941 const MCExpr *Expr;
1943 SMLoc StartLoc = Lexer.getLoc();
1944 if (parseExpression(Expr))
1945 return true;
1947 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1948 return Error(StartLoc, "expected absolute expression");
1950 return false;
1953 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1954 MCBinaryExpr::Opcode &Kind,
1955 bool ShouldUseLogicalShr,
1956 bool EndExpressionAtGreater) {
1957 switch (K) {
1958 default:
1959 return 0; // not a binop.
1961 // Lowest Precedence: &&, ||
1962 case AsmToken::AmpAmp:
1963 Kind = MCBinaryExpr::LAnd;
1964 return 2;
1965 case AsmToken::PipePipe:
1966 Kind = MCBinaryExpr::LOr;
1967 return 1;
1969 // Low Precedence: ==, !=, <>, <, <=, >, >=
1970 case AsmToken::EqualEqual:
1971 Kind = MCBinaryExpr::EQ;
1972 return 3;
1973 case AsmToken::ExclaimEqual:
1974 case AsmToken::LessGreater:
1975 Kind = MCBinaryExpr::NE;
1976 return 3;
1977 case AsmToken::Less:
1978 Kind = MCBinaryExpr::LT;
1979 return 3;
1980 case AsmToken::LessEqual:
1981 Kind = MCBinaryExpr::LTE;
1982 return 3;
1983 case AsmToken::Greater:
1984 if (EndExpressionAtGreater)
1985 return 0;
1986 Kind = MCBinaryExpr::GT;
1987 return 3;
1988 case AsmToken::GreaterEqual:
1989 Kind = MCBinaryExpr::GTE;
1990 return 3;
1992 // Low Intermediate Precedence: +, -
1993 case AsmToken::Plus:
1994 Kind = MCBinaryExpr::Add;
1995 return 4;
1996 case AsmToken::Minus:
1997 Kind = MCBinaryExpr::Sub;
1998 return 4;
2000 // High Intermediate Precedence: |, &, ^
2001 case AsmToken::Pipe:
2002 Kind = MCBinaryExpr::Or;
2003 return 5;
2004 case AsmToken::Caret:
2005 Kind = MCBinaryExpr::Xor;
2006 return 5;
2007 case AsmToken::Amp:
2008 Kind = MCBinaryExpr::And;
2009 return 5;
2011 // Highest Precedence: *, /, %, <<, >>
2012 case AsmToken::Star:
2013 Kind = MCBinaryExpr::Mul;
2014 return 6;
2015 case AsmToken::Slash:
2016 Kind = MCBinaryExpr::Div;
2017 return 6;
2018 case AsmToken::Percent:
2019 Kind = MCBinaryExpr::Mod;
2020 return 6;
2021 case AsmToken::LessLess:
2022 Kind = MCBinaryExpr::Shl;
2023 return 6;
2024 case AsmToken::GreaterGreater:
2025 if (EndExpressionAtGreater)
2026 return 0;
2027 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2028 return 6;
2032 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2033 MCBinaryExpr::Opcode &Kind) {
2034 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2035 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2036 AngleBracketDepth > 0);
2039 /// Parse all binary operators with precedence >= 'Precedence'.
2040 /// Res contains the LHS of the expression on input.
2041 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2042 SMLoc &EndLoc) {
2043 SMLoc StartLoc = Lexer.getLoc();
2044 while (true) {
2045 AsmToken::TokenKind TokKind = Lexer.getKind();
2046 if (Lexer.getKind() == AsmToken::Identifier) {
2047 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
2048 .CaseLower("and", AsmToken::Amp)
2049 .CaseLower("not", AsmToken::Exclaim)
2050 .CaseLower("or", AsmToken::Pipe)
2051 .CaseLower("eq", AsmToken::EqualEqual)
2052 .CaseLower("ne", AsmToken::ExclaimEqual)
2053 .CaseLower("lt", AsmToken::Less)
2054 .CaseLower("le", AsmToken::LessEqual)
2055 .CaseLower("gt", AsmToken::Greater)
2056 .CaseLower("ge", AsmToken::GreaterEqual)
2057 .Default(TokKind);
2059 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
2060 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2062 // If the next token is lower precedence than we are allowed to eat, return
2063 // successfully with what we ate already.
2064 if (TokPrec < Precedence)
2065 return false;
2067 Lex();
2069 // Eat the next primary expression.
2070 const MCExpr *RHS;
2071 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2072 return true;
2074 // If BinOp binds less tightly with RHS than the operator after RHS, let
2075 // the pending operator take RHS as its LHS.
2076 MCBinaryExpr::Opcode Dummy;
2077 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2078 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2079 return true;
2081 // Merge LHS and RHS according to operator.
2082 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2086 /// ParseStatement:
2087 /// ::= % statement
2088 /// ::= EndOfStatement
2089 /// ::= Label* Directive ...Operands... EndOfStatement
2090 /// ::= Label* Identifier OperandList* EndOfStatement
2091 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2092 MCAsmParserSemaCallback *SI) {
2093 assert(!hasPendingError() && "parseStatement started with pending error");
2094 // Eat initial spaces and comments.
2095 while (Lexer.is(AsmToken::Space))
2096 Lex();
2097 if (Lexer.is(AsmToken::EndOfStatement)) {
2098 // If this is a line comment we can drop it safely.
2099 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2100 getTok().getString().front() == '\n')
2101 Out.AddBlankLine();
2102 Lex();
2103 return false;
2106 // If preceded by an expansion operator, first expand all text macros and
2107 // macro functions.
2108 if (getTok().is(AsmToken::Percent)) {
2109 SMLoc ExpansionLoc = getTok().getLoc();
2110 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2111 return true;
2114 // Statements always start with an identifier, unless we're dealing with a
2115 // processor directive (.386, .686, etc.) that lexes as a real.
2116 AsmToken ID = getTok();
2117 SMLoc IDLoc = ID.getLoc();
2118 StringRef IDVal;
2119 int64_t LocalLabelVal = -1;
2120 if (Lexer.is(AsmToken::HashDirective))
2121 return parseCppHashLineFilenameComment(IDLoc);
2122 // Allow an integer followed by a ':' as a directional local label.
2123 if (Lexer.is(AsmToken::Integer)) {
2124 LocalLabelVal = getTok().getIntVal();
2125 if (LocalLabelVal < 0) {
2126 if (!TheCondState.Ignore) {
2127 Lex(); // always eat a token
2128 return Error(IDLoc, "unexpected token at start of statement");
2130 IDVal = "";
2131 } else {
2132 IDVal = getTok().getString();
2133 Lex(); // Consume the integer token to be used as an identifier token.
2134 if (Lexer.getKind() != AsmToken::Colon) {
2135 if (!TheCondState.Ignore) {
2136 Lex(); // always eat a token
2137 return Error(IDLoc, "unexpected token at start of statement");
2141 } else if (Lexer.is(AsmToken::Dot)) {
2142 // Treat '.' as a valid identifier in this context.
2143 Lex();
2144 IDVal = ".";
2145 } else if (Lexer.is(AsmToken::LCurly)) {
2146 // Treat '{' as a valid identifier in this context.
2147 Lex();
2148 IDVal = "{";
2150 } else if (Lexer.is(AsmToken::RCurly)) {
2151 // Treat '}' as a valid identifier in this context.
2152 Lex();
2153 IDVal = "}";
2154 } else if (Lexer.is(AsmToken::Star) &&
2155 getTargetParser().starIsStartOfStatement()) {
2156 // Accept '*' as a valid start of statement.
2157 Lex();
2158 IDVal = "*";
2159 } else if (Lexer.is(AsmToken::Real)) {
2160 // Treat ".<number>" as a valid identifier in this context.
2161 IDVal = getTok().getString();
2162 Lex(); // always eat a token
2163 if (!IDVal.startswith("."))
2164 return Error(IDLoc, "unexpected token at start of statement");
2165 } else if (parseIdentifier(IDVal, StartOfStatement)) {
2166 if (!TheCondState.Ignore) {
2167 Lex(); // always eat a token
2168 return Error(IDLoc, "unexpected token at start of statement");
2170 IDVal = "";
2173 // Handle conditional assembly here before checking for skipping. We
2174 // have to do this so that .endif isn't skipped in a ".if 0" block for
2175 // example.
2176 StringMap<DirectiveKind>::const_iterator DirKindIt =
2177 DirectiveKindMap.find(IDVal.lower());
2178 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2179 ? DK_NO_DIRECTIVE
2180 : DirKindIt->getValue();
2181 switch (DirKind) {
2182 default:
2183 break;
2184 case DK_IF:
2185 case DK_IFE:
2186 return parseDirectiveIf(IDLoc, DirKind);
2187 case DK_IFB:
2188 return parseDirectiveIfb(IDLoc, true);
2189 case DK_IFNB:
2190 return parseDirectiveIfb(IDLoc, false);
2191 case DK_IFDEF:
2192 return parseDirectiveIfdef(IDLoc, true);
2193 case DK_IFNDEF:
2194 return parseDirectiveIfdef(IDLoc, false);
2195 case DK_IFDIF:
2196 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2197 /*CaseInsensitive=*/false);
2198 case DK_IFDIFI:
2199 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2200 /*CaseInsensitive=*/true);
2201 case DK_IFIDN:
2202 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2203 /*CaseInsensitive=*/false);
2204 case DK_IFIDNI:
2205 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2206 /*CaseInsensitive=*/true);
2207 case DK_ELSEIF:
2208 case DK_ELSEIFE:
2209 return parseDirectiveElseIf(IDLoc, DirKind);
2210 case DK_ELSEIFB:
2211 return parseDirectiveElseIfb(IDLoc, true);
2212 case DK_ELSEIFNB:
2213 return parseDirectiveElseIfb(IDLoc, false);
2214 case DK_ELSEIFDEF:
2215 return parseDirectiveElseIfdef(IDLoc, true);
2216 case DK_ELSEIFNDEF:
2217 return parseDirectiveElseIfdef(IDLoc, false);
2218 case DK_ELSEIFDIF:
2219 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2220 /*CaseInsensitive=*/false);
2221 case DK_ELSEIFDIFI:
2222 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2223 /*CaseInsensitive=*/true);
2224 case DK_ELSEIFIDN:
2225 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2226 /*CaseInsensitive=*/false);
2227 case DK_ELSEIFIDNI:
2228 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2229 /*CaseInsensitive=*/true);
2230 case DK_ELSE:
2231 return parseDirectiveElse(IDLoc);
2232 case DK_ENDIF:
2233 return parseDirectiveEndIf(IDLoc);
2236 // Ignore the statement if in the middle of inactive conditional
2237 // (e.g. ".if 0").
2238 if (TheCondState.Ignore) {
2239 eatToEndOfStatement();
2240 return false;
2243 // FIXME: Recurse on local labels?
2245 // See what kind of statement we have.
2246 switch (Lexer.getKind()) {
2247 case AsmToken::Colon: {
2248 if (!getTargetParser().isLabel(ID))
2249 break;
2250 if (checkForValidSection())
2251 return true;
2253 // identifier ':' -> Label.
2254 Lex();
2256 // Diagnose attempt to use '.' as a label.
2257 if (IDVal == ".")
2258 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2260 // Diagnose attempt to use a variable as a label.
2262 // FIXME: Diagnostics. Note the location of the definition as a label.
2263 // FIXME: This doesn't diagnose assignment to a symbol which has been
2264 // implicitly marked as external.
2265 MCSymbol *Sym;
2266 if (LocalLabelVal == -1) {
2267 if (ParsingMSInlineAsm && SI) {
2268 StringRef RewrittenLabel =
2269 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2270 assert(!RewrittenLabel.empty() &&
2271 "We should have an internal name here.");
2272 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2273 RewrittenLabel);
2274 IDVal = RewrittenLabel;
2276 Sym = getContext().getOrCreateSymbol(IDVal);
2277 } else
2278 Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
2279 // End of Labels should be treated as end of line for lexing
2280 // purposes but that information is not available to the Lexer who
2281 // does not understand Labels. This may cause us to see a Hash
2282 // here instead of a preprocessor line comment.
2283 if (getTok().is(AsmToken::Hash)) {
2284 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2285 Lexer.Lex();
2286 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2289 // Consume any end of statement token, if present, to avoid spurious
2290 // AddBlankLine calls().
2291 if (getTok().is(AsmToken::EndOfStatement)) {
2292 Lex();
2295 getTargetParser().doBeforeLabelEmit(Sym);
2297 // Emit the label.
2298 if (!getTargetParser().isParsingMSInlineAsm())
2299 Out.emitLabel(Sym, IDLoc);
2301 // If we are generating dwarf for assembly source files then gather the
2302 // info to make a dwarf label entry for this label if needed.
2303 if (enabledGenDwarfForAssembly())
2304 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2305 IDLoc);
2307 getTargetParser().onLabelParsed(Sym);
2309 return false;
2312 default: // Normal instruction or directive.
2313 break;
2316 // If macros are enabled, check to see if this is a macro instantiation.
2317 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2318 return handleMacroEntry(M, IDLoc);
2321 // Otherwise, we have a normal instruction or directive.
2323 if (DirKind != DK_NO_DIRECTIVE) {
2324 // There are several entities interested in parsing directives:
2326 // 1. Asm parser extensions. For example, platform-specific parsers
2327 // (like the ELF parser) register themselves as extensions.
2328 // 2. The target-specific assembly parser. Some directives are target
2329 // specific or may potentially behave differently on certain targets.
2330 // 3. The generic directive parser implemented by this class. These are
2331 // all the directives that behave in a target and platform independent
2332 // manner, or at least have a default behavior that's shared between
2333 // all targets and platforms.
2335 getTargetParser().flushPendingInstructions(getStreamer());
2337 // Special-case handling of structure-end directives at higher priority,
2338 // since ENDS is overloaded as a segment-end directive.
2339 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2340 getTok().is(AsmToken::EndOfStatement)) {
2341 return parseDirectiveNestedEnds();
2344 // First, check the extension directive map to see if any extension has
2345 // registered itself to parse this directive.
2346 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2347 ExtensionDirectiveMap.lookup(IDVal.lower());
2348 if (Handler.first)
2349 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2351 // Next, let the target-specific assembly parser try.
2352 SMLoc StartTokLoc = getTok().getLoc();
2353 bool TPDirectiveReturn =
2354 ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
2356 if (hasPendingError())
2357 return true;
2358 // Currently the return value should be true if we are
2359 // uninterested but as this is at odds with the standard parsing
2360 // convention (return true = error) we have instances of a parsed
2361 // directive that fails returning true as an error. Catch these
2362 // cases as best as possible errors here.
2363 if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
2364 return true;
2365 // Return if we did some parsing or believe we succeeded.
2366 if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
2367 return false;
2369 // Finally, if no one else is interested in this directive, it must be
2370 // generic and familiar to this class.
2371 switch (DirKind) {
2372 default:
2373 break;
2374 case DK_ASCII:
2375 return parseDirectiveAscii(IDVal, false);
2376 case DK_ASCIZ:
2377 case DK_STRING:
2378 return parseDirectiveAscii(IDVal, true);
2379 case DK_BYTE:
2380 case DK_SBYTE:
2381 case DK_DB:
2382 return parseDirectiveValue(IDVal, 1);
2383 case DK_WORD:
2384 case DK_SWORD:
2385 case DK_DW:
2386 return parseDirectiveValue(IDVal, 2);
2387 case DK_DWORD:
2388 case DK_SDWORD:
2389 case DK_DD:
2390 return parseDirectiveValue(IDVal, 4);
2391 case DK_FWORD:
2392 case DK_DF:
2393 return parseDirectiveValue(IDVal, 6);
2394 case DK_QWORD:
2395 case DK_SQWORD:
2396 case DK_DQ:
2397 return parseDirectiveValue(IDVal, 8);
2398 case DK_REAL4:
2399 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2400 case DK_REAL8:
2401 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2402 case DK_REAL10:
2403 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2404 case DK_STRUCT:
2405 case DK_UNION:
2406 return parseDirectiveNestedStruct(IDVal, DirKind);
2407 case DK_ENDS:
2408 return parseDirectiveNestedEnds();
2409 case DK_ALIGN:
2410 return parseDirectiveAlign();
2411 case DK_EVEN:
2412 return parseDirectiveEven();
2413 case DK_ORG:
2414 return parseDirectiveOrg();
2415 case DK_EXTERN:
2416 eatToEndOfStatement(); // .extern is the default, ignore it.
2417 return false;
2418 case DK_PUBLIC:
2419 return parseDirectiveSymbolAttribute(MCSA_Global);
2420 case DK_COMM:
2421 return parseDirectiveComm(/*IsLocal=*/false);
2422 case DK_COMMENT:
2423 return parseDirectiveComment(IDLoc);
2424 case DK_INCLUDE:
2425 return parseDirectiveInclude();
2426 case DK_REPEAT:
2427 return parseDirectiveRepeat(IDLoc, IDVal);
2428 case DK_WHILE:
2429 return parseDirectiveWhile(IDLoc);
2430 case DK_FOR:
2431 return parseDirectiveFor(IDLoc, IDVal);
2432 case DK_FORC:
2433 return parseDirectiveForc(IDLoc, IDVal);
2434 case DK_FILE:
2435 return parseDirectiveFile(IDLoc);
2436 case DK_LINE:
2437 return parseDirectiveLine();
2438 case DK_LOC:
2439 return parseDirectiveLoc();
2440 case DK_STABS:
2441 return parseDirectiveStabs();
2442 case DK_CV_FILE:
2443 return parseDirectiveCVFile();
2444 case DK_CV_FUNC_ID:
2445 return parseDirectiveCVFuncId();
2446 case DK_CV_INLINE_SITE_ID:
2447 return parseDirectiveCVInlineSiteId();
2448 case DK_CV_LOC:
2449 return parseDirectiveCVLoc();
2450 case DK_CV_LINETABLE:
2451 return parseDirectiveCVLinetable();
2452 case DK_CV_INLINE_LINETABLE:
2453 return parseDirectiveCVInlineLinetable();
2454 case DK_CV_DEF_RANGE:
2455 return parseDirectiveCVDefRange();
2456 case DK_CV_STRING:
2457 return parseDirectiveCVString();
2458 case DK_CV_STRINGTABLE:
2459 return parseDirectiveCVStringTable();
2460 case DK_CV_FILECHECKSUMS:
2461 return parseDirectiveCVFileChecksums();
2462 case DK_CV_FILECHECKSUM_OFFSET:
2463 return parseDirectiveCVFileChecksumOffset();
2464 case DK_CV_FPO_DATA:
2465 return parseDirectiveCVFPOData();
2466 case DK_CFI_SECTIONS:
2467 return parseDirectiveCFISections();
2468 case DK_CFI_STARTPROC:
2469 return parseDirectiveCFIStartProc();
2470 case DK_CFI_ENDPROC:
2471 return parseDirectiveCFIEndProc();
2472 case DK_CFI_DEF_CFA:
2473 return parseDirectiveCFIDefCfa(IDLoc);
2474 case DK_CFI_DEF_CFA_OFFSET:
2475 return parseDirectiveCFIDefCfaOffset();
2476 case DK_CFI_ADJUST_CFA_OFFSET:
2477 return parseDirectiveCFIAdjustCfaOffset();
2478 case DK_CFI_DEF_CFA_REGISTER:
2479 return parseDirectiveCFIDefCfaRegister(IDLoc);
2480 case DK_CFI_OFFSET:
2481 return parseDirectiveCFIOffset(IDLoc);
2482 case DK_CFI_REL_OFFSET:
2483 return parseDirectiveCFIRelOffset(IDLoc);
2484 case DK_CFI_PERSONALITY:
2485 return parseDirectiveCFIPersonalityOrLsda(true);
2486 case DK_CFI_LSDA:
2487 return parseDirectiveCFIPersonalityOrLsda(false);
2488 case DK_CFI_REMEMBER_STATE:
2489 return parseDirectiveCFIRememberState();
2490 case DK_CFI_RESTORE_STATE:
2491 return parseDirectiveCFIRestoreState();
2492 case DK_CFI_SAME_VALUE:
2493 return parseDirectiveCFISameValue(IDLoc);
2494 case DK_CFI_RESTORE:
2495 return parseDirectiveCFIRestore(IDLoc);
2496 case DK_CFI_ESCAPE:
2497 return parseDirectiveCFIEscape();
2498 case DK_CFI_RETURN_COLUMN:
2499 return parseDirectiveCFIReturnColumn(IDLoc);
2500 case DK_CFI_SIGNAL_FRAME:
2501 return parseDirectiveCFISignalFrame();
2502 case DK_CFI_UNDEFINED:
2503 return parseDirectiveCFIUndefined(IDLoc);
2504 case DK_CFI_REGISTER:
2505 return parseDirectiveCFIRegister(IDLoc);
2506 case DK_CFI_WINDOW_SAVE:
2507 return parseDirectiveCFIWindowSave();
2508 case DK_EXITM:
2509 Info.ExitValue = "";
2510 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2511 case DK_ENDM:
2512 Info.ExitValue = "";
2513 return parseDirectiveEndMacro(IDVal);
2514 case DK_PURGE:
2515 return parseDirectivePurgeMacro(IDLoc);
2516 case DK_END:
2517 return parseDirectiveEnd(IDLoc);
2518 case DK_ERR:
2519 return parseDirectiveError(IDLoc);
2520 case DK_ERRB:
2521 return parseDirectiveErrorIfb(IDLoc, true);
2522 case DK_ERRNB:
2523 return parseDirectiveErrorIfb(IDLoc, false);
2524 case DK_ERRDEF:
2525 return parseDirectiveErrorIfdef(IDLoc, true);
2526 case DK_ERRNDEF:
2527 return parseDirectiveErrorIfdef(IDLoc, false);
2528 case DK_ERRDIF:
2529 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2530 /*CaseInsensitive=*/false);
2531 case DK_ERRDIFI:
2532 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2533 /*CaseInsensitive=*/true);
2534 case DK_ERRIDN:
2535 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2536 /*CaseInsensitive=*/false);
2537 case DK_ERRIDNI:
2538 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2539 /*CaseInsensitive=*/true);
2540 case DK_ERRE:
2541 return parseDirectiveErrorIfe(IDLoc, true);
2542 case DK_ERRNZ:
2543 return parseDirectiveErrorIfe(IDLoc, false);
2544 case DK_RADIX:
2545 return parseDirectiveRadix(IDLoc);
2546 case DK_ECHO:
2547 return parseDirectiveEcho(IDLoc);
2550 return Error(IDLoc, "unknown directive");
2553 // We also check if this is allocating memory with user-defined type.
2554 auto IDIt = Structs.find(IDVal.lower());
2555 if (IDIt != Structs.end())
2556 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2557 IDLoc);
2559 // Non-conditional Microsoft directives sometimes follow their first argument.
2560 const AsmToken nextTok = getTok();
2561 const StringRef nextVal = nextTok.getString();
2562 const SMLoc nextLoc = nextTok.getLoc();
2564 const AsmToken afterNextTok = peekTok();
2566 // There are several entities interested in parsing infix directives:
2568 // 1. Asm parser extensions. For example, platform-specific parsers
2569 // (like the ELF parser) register themselves as extensions.
2570 // 2. The generic directive parser implemented by this class. These are
2571 // all the directives that behave in a target and platform independent
2572 // manner, or at least have a default behavior that's shared between
2573 // all targets and platforms.
2575 getTargetParser().flushPendingInstructions(getStreamer());
2577 // Special-case handling of structure-end directives at higher priority, since
2578 // ENDS is overloaded as a segment-end directive.
2579 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2580 Lex();
2581 return parseDirectiveEnds(IDVal, IDLoc);
2584 // First, check the extension directive map to see if any extension has
2585 // registered itself to parse this directive.
2586 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2587 ExtensionDirectiveMap.lookup(nextVal.lower());
2588 if (Handler.first) {
2589 Lex();
2590 Lexer.UnLex(ID);
2591 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2594 // If no one else is interested in this directive, it must be
2595 // generic and familiar to this class.
2596 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2597 DirKind = (DirKindIt == DirectiveKindMap.end())
2598 ? DK_NO_DIRECTIVE
2599 : DirKindIt->getValue();
2600 switch (DirKind) {
2601 default:
2602 break;
2603 case DK_ASSIGN:
2604 case DK_EQU:
2605 case DK_TEXTEQU:
2606 Lex();
2607 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2608 case DK_BYTE:
2609 if (afterNextTok.is(AsmToken::Identifier) &&
2610 afterNextTok.getString().equals_insensitive("ptr")) {
2611 // Size directive; part of an instruction.
2612 break;
2614 LLVM_FALLTHROUGH;
2615 case DK_SBYTE:
2616 case DK_DB:
2617 Lex();
2618 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2619 case DK_WORD:
2620 if (afterNextTok.is(AsmToken::Identifier) &&
2621 afterNextTok.getString().equals_insensitive("ptr")) {
2622 // Size directive; part of an instruction.
2623 break;
2625 LLVM_FALLTHROUGH;
2626 case DK_SWORD:
2627 case DK_DW:
2628 Lex();
2629 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2630 case DK_DWORD:
2631 if (afterNextTok.is(AsmToken::Identifier) &&
2632 afterNextTok.getString().equals_insensitive("ptr")) {
2633 // Size directive; part of an instruction.
2634 break;
2636 LLVM_FALLTHROUGH;
2637 case DK_SDWORD:
2638 case DK_DD:
2639 Lex();
2640 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2641 case DK_FWORD:
2642 if (afterNextTok.is(AsmToken::Identifier) &&
2643 afterNextTok.getString().equals_insensitive("ptr")) {
2644 // Size directive; part of an instruction.
2645 break;
2647 LLVM_FALLTHROUGH;
2648 case DK_DF:
2649 Lex();
2650 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2651 case DK_QWORD:
2652 if (afterNextTok.is(AsmToken::Identifier) &&
2653 afterNextTok.getString().equals_insensitive("ptr")) {
2654 // Size directive; part of an instruction.
2655 break;
2657 LLVM_FALLTHROUGH;
2658 case DK_SQWORD:
2659 case DK_DQ:
2660 Lex();
2661 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2662 case DK_REAL4:
2663 Lex();
2664 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2665 IDVal, IDLoc);
2666 case DK_REAL8:
2667 Lex();
2668 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2669 IDVal, IDLoc);
2670 case DK_REAL10:
2671 Lex();
2672 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2673 10, IDVal, IDLoc);
2674 case DK_STRUCT:
2675 case DK_UNION:
2676 Lex();
2677 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2678 case DK_ENDS:
2679 Lex();
2680 return parseDirectiveEnds(IDVal, IDLoc);
2681 case DK_MACRO:
2682 Lex();
2683 return parseDirectiveMacro(IDVal, IDLoc);
2686 // Finally, we check if this is allocating a variable with user-defined type.
2687 auto NextIt = Structs.find(nextVal.lower());
2688 if (NextIt != Structs.end()) {
2689 Lex();
2690 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2691 nextVal, nextLoc, IDVal);
2694 // __asm _emit or __asm __emit
2695 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2696 IDVal == "_EMIT" || IDVal == "__EMIT"))
2697 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2699 // __asm align
2700 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2701 return parseDirectiveMSAlign(IDLoc, Info);
2703 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2704 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2705 if (checkForValidSection())
2706 return true;
2708 // Canonicalize the opcode to lower case.
2709 std::string OpcodeStr = IDVal.lower();
2710 ParseInstructionInfo IInfo(Info.AsmRewrites);
2711 bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2712 Info.ParsedOperands);
2713 Info.ParseError = ParseHadError;
2715 // Dump the parsed representation, if requested.
2716 if (getShowParsedOperands()) {
2717 SmallString<256> Str;
2718 raw_svector_ostream OS(Str);
2719 OS << "parsed instruction: [";
2720 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2721 if (i != 0)
2722 OS << ", ";
2723 Info.ParsedOperands[i]->print(OS);
2725 OS << "]";
2727 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2730 // Fail even if ParseInstruction erroneously returns false.
2731 if (hasPendingError() || ParseHadError)
2732 return true;
2734 // If we are generating dwarf for the current section then generate a .loc
2735 // directive for the instruction.
2736 if (!ParseHadError && enabledGenDwarfForAssembly() &&
2737 getContext().getGenDwarfSectionSyms().count(
2738 getStreamer().getCurrentSectionOnly())) {
2739 unsigned Line;
2740 if (ActiveMacros.empty())
2741 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2742 else
2743 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2744 ActiveMacros.front()->ExitBuffer);
2746 // If we previously parsed a cpp hash file line comment then make sure the
2747 // current Dwarf File is for the CppHashFilename if not then emit the
2748 // Dwarf File table for it and adjust the line number for the .loc.
2749 if (!CppHashInfo.Filename.empty()) {
2750 unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2751 0, StringRef(), CppHashInfo.Filename);
2752 getContext().setGenDwarfFileNumber(FileNumber);
2754 unsigned CppHashLocLineNo =
2755 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2756 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2759 getStreamer().emitDwarfLocDirective(
2760 getContext().getGenDwarfFileNumber(), Line, 0,
2761 DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2762 StringRef());
2765 // If parsing succeeded, match the instruction.
2766 if (!ParseHadError) {
2767 uint64_t ErrorInfo;
2768 if (getTargetParser().MatchAndEmitInstruction(
2769 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2770 getTargetParser().isParsingMSInlineAsm()))
2771 return true;
2773 return false;
2776 // Parse and erase curly braces marking block start/end.
2777 bool MasmParser::parseCurlyBlockScope(
2778 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2779 // Identify curly brace marking block start/end.
2780 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2781 return false;
2783 SMLoc StartLoc = Lexer.getLoc();
2784 Lex(); // Eat the brace.
2785 if (Lexer.is(AsmToken::EndOfStatement))
2786 Lex(); // Eat EndOfStatement following the brace.
2788 // Erase the block start/end brace from the output asm string.
2789 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2790 StartLoc.getPointer());
2791 return true;
2794 /// parseCppHashLineFilenameComment as this:
2795 /// ::= # number "filename"
2796 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2797 Lex(); // Eat the hash token.
2798 // Lexer only ever emits HashDirective if it fully formed if it's
2799 // done the checking already so this is an internal error.
2800 assert(getTok().is(AsmToken::Integer) &&
2801 "Lexing Cpp line comment: Expected Integer");
2802 int64_t LineNumber = getTok().getIntVal();
2803 Lex();
2804 assert(getTok().is(AsmToken::String) &&
2805 "Lexing Cpp line comment: Expected String");
2806 StringRef Filename = getTok().getString();
2807 Lex();
2809 // Get rid of the enclosing quotes.
2810 Filename = Filename.substr(1, Filename.size() - 2);
2812 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2813 // and possibly DWARF file info.
2814 CppHashInfo.Loc = L;
2815 CppHashInfo.Filename = Filename;
2816 CppHashInfo.LineNumber = LineNumber;
2817 CppHashInfo.Buf = CurBuffer;
2818 if (FirstCppHashFilename.empty())
2819 FirstCppHashFilename = Filename;
2820 return false;
2823 /// will use the last parsed cpp hash line filename comment
2824 /// for the Filename and LineNo if any in the diagnostic.
2825 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2826 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2827 raw_ostream &OS = errs();
2829 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2830 SMLoc DiagLoc = Diag.getLoc();
2831 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2832 unsigned CppHashBuf =
2833 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2835 // Like SourceMgr::printMessage() we need to print the include stack if any
2836 // before printing the message.
2837 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2838 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2839 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2840 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2841 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2844 // If we have not parsed a cpp hash line filename comment or the source
2845 // manager changed or buffer changed (like in a nested include) then just
2846 // print the normal diagnostic using its Filename and LineNo.
2847 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2848 DiagBuf != CppHashBuf) {
2849 if (Parser->SavedDiagHandler)
2850 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2851 else
2852 Diag.print(nullptr, OS);
2853 return;
2856 // Use the CppHashFilename and calculate a line number based on the
2857 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2858 // for the diagnostic.
2859 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2861 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2862 int CppHashLocLineNo =
2863 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2864 int LineNo =
2865 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2867 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2868 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2869 Diag.getLineContents(), Diag.getRanges());
2871 if (Parser->SavedDiagHandler)
2872 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2873 else
2874 NewDiag.print(nullptr, OS);
2877 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2878 // not accept '.'.
2879 static bool isMacroParameterChar(char C) {
2880 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2883 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2884 ArrayRef<MCAsmMacroParameter> Parameters,
2885 ArrayRef<MCAsmMacroArgument> A,
2886 const std::vector<std::string> &Locals, SMLoc L) {
2887 unsigned NParameters = Parameters.size();
2888 if (NParameters != A.size())
2889 return Error(L, "Wrong number of arguments");
2890 StringMap<std::string> LocalSymbols;
2891 std::string Name;
2892 Name.reserve(6);
2893 for (StringRef Local : Locals) {
2894 raw_string_ostream LocalName(Name);
2895 LocalName << "??"
2896 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2897 LocalSymbols.insert({Local, LocalName.str()});
2898 Name.clear();
2901 Optional<char> CurrentQuote;
2902 while (!Body.empty()) {
2903 // Scan for the next substitution.
2904 std::size_t End = Body.size(), Pos = 0;
2905 std::size_t IdentifierPos = End;
2906 for (; Pos != End; ++Pos) {
2907 // Find the next possible macro parameter, including preceding a '&'
2908 // inside quotes.
2909 if (Body[Pos] == '&')
2910 break;
2911 if (isMacroParameterChar(Body[Pos])) {
2912 if (!CurrentQuote.hasValue())
2913 break;
2914 if (IdentifierPos == End)
2915 IdentifierPos = Pos;
2916 } else {
2917 IdentifierPos = End;
2920 // Track quotation status
2921 if (!CurrentQuote.hasValue()) {
2922 if (Body[Pos] == '\'' || Body[Pos] == '"')
2923 CurrentQuote = Body[Pos];
2924 } else if (Body[Pos] == CurrentQuote) {
2925 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2926 // Escaped quote, and quotes aren't identifier chars; skip
2927 ++Pos;
2928 continue;
2929 } else {
2930 CurrentQuote.reset();
2934 if (IdentifierPos != End) {
2935 // We've recognized an identifier before an apostrophe inside quotes;
2936 // check once to see if we can expand it.
2937 Pos = IdentifierPos;
2938 IdentifierPos = End;
2941 // Add the prefix.
2942 OS << Body.slice(0, Pos);
2944 // Check if we reached the end.
2945 if (Pos == End)
2946 break;
2948 unsigned I = Pos;
2949 bool InitialAmpersand = (Body[I] == '&');
2950 if (InitialAmpersand) {
2951 ++I;
2952 ++Pos;
2954 while (I < End && isMacroParameterChar(Body[I]))
2955 ++I;
2957 const char *Begin = Body.data() + Pos;
2958 StringRef Argument(Begin, I - Pos);
2959 const std::string ArgumentLower = Argument.lower();
2960 unsigned Index = 0;
2962 for (; Index < NParameters; ++Index)
2963 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2964 break;
2966 if (Index == NParameters) {
2967 if (InitialAmpersand)
2968 OS << '&';
2969 auto it = LocalSymbols.find(ArgumentLower);
2970 if (it != LocalSymbols.end())
2971 OS << it->second;
2972 else
2973 OS << Argument;
2974 Pos = I;
2975 } else {
2976 for (const AsmToken &Token : A[Index]) {
2977 // In MASM, you can write '%expr'.
2978 // The prefix '%' evaluates the expression 'expr'
2979 // and uses the result as a string (e.g. replace %(1+2) with the
2980 // string "3").
2981 // Here, we identify the integer token which is the result of the
2982 // absolute expression evaluation and replace it with its string
2983 // representation.
2984 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2985 // Emit an integer value to the buffer.
2986 OS << Token.getIntVal();
2987 else
2988 OS << Token.getString();
2991 Pos += Argument.size();
2992 if (Pos < End && Body[Pos] == '&') {
2993 ++Pos;
2996 // Update the scan point.
2997 Body = Body.substr(Pos);
3000 return false;
3003 static bool isOperator(AsmToken::TokenKind kind) {
3004 switch (kind) {
3005 default:
3006 return false;
3007 case AsmToken::Plus:
3008 case AsmToken::Minus:
3009 case AsmToken::Tilde:
3010 case AsmToken::Slash:
3011 case AsmToken::Star:
3012 case AsmToken::Dot:
3013 case AsmToken::Equal:
3014 case AsmToken::EqualEqual:
3015 case AsmToken::Pipe:
3016 case AsmToken::PipePipe:
3017 case AsmToken::Caret:
3018 case AsmToken::Amp:
3019 case AsmToken::AmpAmp:
3020 case AsmToken::Exclaim:
3021 case AsmToken::ExclaimEqual:
3022 case AsmToken::Less:
3023 case AsmToken::LessEqual:
3024 case AsmToken::LessLess:
3025 case AsmToken::LessGreater:
3026 case AsmToken::Greater:
3027 case AsmToken::GreaterEqual:
3028 case AsmToken::GreaterGreater:
3029 return true;
3033 namespace {
3035 class AsmLexerSkipSpaceRAII {
3036 public:
3037 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
3038 Lexer.setSkipSpace(SkipSpace);
3041 ~AsmLexerSkipSpaceRAII() {
3042 Lexer.setSkipSpace(true);
3045 private:
3046 AsmLexer &Lexer;
3049 } // end anonymous namespace
3051 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3052 MCAsmMacroArgument &MA,
3053 AsmToken::TokenKind EndTok) {
3054 if (MP && MP->Vararg) {
3055 if (Lexer.isNot(EndTok)) {
3056 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3057 for (StringRef S : Str) {
3058 MA.emplace_back(AsmToken::String, S);
3061 return false;
3064 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3065 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3066 const char *StrChar = StrLoc.getPointer() + 1;
3067 const char *EndChar = EndLoc.getPointer() - 1;
3068 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3069 /// Eat from '<' to '>'.
3070 Lex();
3071 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3072 return false;
3075 unsigned ParenLevel = 0;
3077 // Darwin doesn't use spaces to delmit arguments.
3078 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3080 bool SpaceEaten;
3082 while (true) {
3083 SpaceEaten = false;
3084 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3085 return TokError("unexpected token");
3087 if (ParenLevel == 0) {
3088 if (Lexer.is(AsmToken::Comma))
3089 break;
3091 if (Lexer.is(AsmToken::Space)) {
3092 SpaceEaten = true;
3093 Lex(); // Eat spaces.
3096 // Spaces can delimit parameters, but could also be part an expression.
3097 // If the token after a space is an operator, add the token and the next
3098 // one into this argument
3099 if (!IsDarwin) {
3100 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3101 MA.push_back(getTok());
3102 Lex();
3104 // Whitespace after an operator can be ignored.
3105 if (Lexer.is(AsmToken::Space))
3106 Lex();
3108 continue;
3111 if (SpaceEaten)
3112 break;
3115 // handleMacroEntry relies on not advancing the lexer here
3116 // to be able to fill in the remaining default parameter values
3117 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3118 break;
3120 // Adjust the current parentheses level.
3121 if (Lexer.is(AsmToken::LParen))
3122 ++ParenLevel;
3123 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3124 --ParenLevel;
3126 // Append the token to the current argument list.
3127 MA.push_back(getTok());
3128 Lex();
3131 if (ParenLevel != 0)
3132 return TokError("unbalanced parentheses in argument");
3134 if (MA.empty() && MP) {
3135 if (MP->Required) {
3136 return TokError("missing value for required parameter '" + MP->Name +
3137 "'");
3138 } else {
3139 MA = MP->Value;
3142 return false;
3145 // Parse the macro instantiation arguments.
3146 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3147 MCAsmMacroArguments &A,
3148 AsmToken::TokenKind EndTok) {
3149 const unsigned NParameters = M ? M->Parameters.size() : 0;
3150 bool NamedParametersFound = false;
3151 SmallVector<SMLoc, 4> FALocs;
3153 A.resize(NParameters);
3154 FALocs.resize(NParameters);
3156 // Parse two kinds of macro invocations:
3157 // - macros defined without any parameters accept an arbitrary number of them
3158 // - macros defined with parameters accept at most that many of them
3159 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3160 ++Parameter) {
3161 SMLoc IDLoc = Lexer.getLoc();
3162 MCAsmMacroParameter FA;
3164 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3165 if (parseIdentifier(FA.Name))
3166 return Error(IDLoc, "invalid argument identifier for formal argument");
3168 if (Lexer.isNot(AsmToken::Equal))
3169 return TokError("expected '=' after formal parameter identifier");
3171 Lex();
3173 NamedParametersFound = true;
3176 if (NamedParametersFound && FA.Name.empty())
3177 return Error(IDLoc, "cannot mix positional and keyword arguments");
3179 unsigned PI = Parameter;
3180 if (!FA.Name.empty()) {
3181 assert(M && "expected macro to be defined");
3182 unsigned FAI = 0;
3183 for (FAI = 0; FAI < NParameters; ++FAI)
3184 if (M->Parameters[FAI].Name == FA.Name)
3185 break;
3187 if (FAI >= NParameters) {
3188 return Error(IDLoc, "parameter named '" + FA.Name +
3189 "' does not exist for macro '" + M->Name + "'");
3191 PI = FAI;
3193 const MCAsmMacroParameter *MP = nullptr;
3194 if (M && PI < NParameters)
3195 MP = &M->Parameters[PI];
3197 SMLoc StrLoc = Lexer.getLoc();
3198 SMLoc EndLoc;
3199 if (Lexer.is(AsmToken::Percent)) {
3200 const MCExpr *AbsoluteExp;
3201 int64_t Value;
3202 /// Eat '%'.
3203 Lex();
3204 if (parseExpression(AbsoluteExp, EndLoc))
3205 return false;
3206 if (!AbsoluteExp->evaluateAsAbsolute(Value,
3207 getStreamer().getAssemblerPtr()))
3208 return Error(StrLoc, "expected absolute expression");
3209 const char *StrChar = StrLoc.getPointer();
3210 const char *EndChar = EndLoc.getPointer();
3211 AsmToken newToken(AsmToken::Integer,
3212 StringRef(StrChar, EndChar - StrChar), Value);
3213 FA.Value.push_back(newToken);
3214 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3215 if (M)
3216 return addErrorSuffix(" in '" + M->Name + "' macro");
3217 else
3218 return true;
3221 if (!FA.Value.empty()) {
3222 if (A.size() <= PI)
3223 A.resize(PI + 1);
3224 A[PI] = FA.Value;
3226 if (FALocs.size() <= PI)
3227 FALocs.resize(PI + 1);
3229 FALocs[PI] = Lexer.getLoc();
3232 // At the end of the statement, fill in remaining arguments that have
3233 // default values. If there aren't any, then the next argument is
3234 // required but missing
3235 if (Lexer.is(EndTok)) {
3236 bool Failure = false;
3237 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3238 if (A[FAI].empty()) {
3239 if (M->Parameters[FAI].Required) {
3240 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3241 "missing value for required parameter "
3242 "'" +
3243 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3244 Failure = true;
3247 if (!M->Parameters[FAI].Value.empty())
3248 A[FAI] = M->Parameters[FAI].Value;
3251 return Failure;
3254 if (Lexer.is(AsmToken::Comma))
3255 Lex();
3258 return TokError("too many positional arguments");
3261 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3262 AsmToken::TokenKind ArgumentEndTok) {
3263 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3264 // eliminate this, although we should protect against infinite loops.
3265 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3266 if (ActiveMacros.size() == MaxNestingDepth) {
3267 std::ostringstream MaxNestingDepthError;
3268 MaxNestingDepthError << "macros cannot be nested more than "
3269 << MaxNestingDepth << " levels deep."
3270 << " Use -asm-macro-max-nesting-depth to increase "
3271 "this limit.";
3272 return TokError(MaxNestingDepthError.str());
3275 MCAsmMacroArguments A;
3276 if (parseMacroArguments(M, A, ArgumentEndTok))
3277 return true;
3279 // Macro instantiation is lexical, unfortunately. We construct a new buffer
3280 // to hold the macro body with substitutions.
3281 SmallString<256> Buf;
3282 StringRef Body = M->Body;
3283 raw_svector_ostream OS(Buf);
3285 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3286 return true;
3288 // We include the endm in the buffer as our cue to exit the macro
3289 // instantiation.
3290 OS << "endm\n";
3292 std::unique_ptr<MemoryBuffer> Instantiation =
3293 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3295 // Create the macro instantiation object and add to the current macro
3296 // instantiation stack.
3297 MacroInstantiation *MI = new MacroInstantiation{
3298 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3299 ActiveMacros.push_back(MI);
3301 ++NumOfMacroInstantiations;
3303 // Jump to the macro instantiation and prime the lexer.
3304 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3305 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3306 EndStatementAtEOFStack.push_back(true);
3307 Lex();
3309 return false;
3312 void MasmParser::handleMacroExit() {
3313 // Jump to the token we should return to, and consume it.
3314 EndStatementAtEOFStack.pop_back();
3315 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3316 EndStatementAtEOFStack.back());
3317 Lex();
3319 // Pop the instantiation entry.
3320 delete ActiveMacros.back();
3321 ActiveMacros.pop_back();
3324 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3325 if (!M->IsFunction)
3326 return Error(NameLoc, "cannot invoke macro procedure as function");
3328 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3329 "' requires arguments in parentheses") ||
3330 handleMacroEntry(M, NameLoc, AsmToken::RParen))
3331 return true;
3333 // Parse all statements in the macro, retrieving the exit value when it ends.
3334 std::string ExitValue;
3335 SmallVector<AsmRewrite, 4> AsmStrRewrites;
3336 while (Lexer.isNot(AsmToken::Eof)) {
3337 ParseStatementInfo Info(&AsmStrRewrites);
3338 bool Parsed = parseStatement(Info, nullptr);
3340 if (!Parsed && Info.ExitValue.hasValue()) {
3341 ExitValue = std::move(*Info.ExitValue);
3342 break;
3345 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3346 // for printing ErrMsg via Lex() only if no (presumably better) parser error
3347 // exists.
3348 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3349 Lex();
3352 // parseStatement returned true so may need to emit an error.
3353 printPendingErrors();
3355 // Skipping to the next line if needed.
3356 if (Parsed && !getLexer().isAtStartOfStatement())
3357 eatToEndOfStatement();
3360 // Consume the right-parenthesis on the other side of the arguments.
3361 if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name +
3362 "' requires arguments in parentheses"))
3363 return true;
3365 // Exit values may require lexing, unfortunately. We construct a new buffer to
3366 // hold the exit value.
3367 std::unique_ptr<MemoryBuffer> MacroValue =
3368 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3370 // Jump from this location to the instantiated exit value, and prime the
3371 // lexer.
3372 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3373 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3374 /*EndStatementAtEOF=*/false);
3375 EndStatementAtEOFStack.push_back(false);
3376 Lex();
3378 return false;
3381 /// parseIdentifier:
3382 /// ::= identifier
3383 /// ::= string
3384 bool MasmParser::parseIdentifier(StringRef &Res,
3385 IdentifierPositionKind Position) {
3386 // The assembler has relaxed rules for accepting identifiers, in particular we
3387 // allow things like '.globl $foo' and '.def @feat.00', which would normally
3388 // be separate tokens. At this level, we have already lexed so we cannot
3389 // (currently) handle this as a context dependent token, instead we detect
3390 // adjacent tokens and return the combined identifier.
3391 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3392 SMLoc PrefixLoc = getLexer().getLoc();
3394 // Consume the prefix character, and check for a following identifier.
3396 AsmToken nextTok = peekTok(false);
3398 if (nextTok.isNot(AsmToken::Identifier))
3399 return true;
3401 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3402 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3403 return true;
3405 // eat $ or @
3406 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3407 // Construct the joined identifier and consume the token.
3408 Res =
3409 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3410 Lex(); // Parser Lex to maintain invariants.
3411 return false;
3414 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3415 return true;
3417 Res = getTok().getIdentifier();
3419 // Consume the identifier token - but if parsing certain directives, avoid
3420 // lexical expansion of the next token.
3421 ExpandKind ExpandNextToken = ExpandMacros;
3422 if (Position == StartOfStatement &&
3423 StringSwitch<bool>(Res)
3424 .CaseLower("echo", true)
3425 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3426 .Default(false)) {
3427 ExpandNextToken = DoNotExpandMacros;
3429 Lex(ExpandNextToken);
3431 return false;
3434 /// parseDirectiveEquate:
3435 /// ::= name "=" expression
3436 /// | name "equ" expression (not redefinable)
3437 /// | name "equ" text-list
3438 /// | name "textequ" text-list (redefinability unspecified)
3439 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3440 DirectiveKind DirKind, SMLoc NameLoc) {
3441 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3442 if (BuiltinIt != BuiltinSymbolMap.end())
3443 return Error(NameLoc, "cannot redefine a built-in symbol");
3445 Variable &Var = Variables[Name.lower()];
3446 if (Var.Name.empty()) {
3447 Var.Name = Name;
3450 SMLoc StartLoc = Lexer.getLoc();
3451 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3452 // "equ" and "textequ" both allow text expressions.
3453 std::string Value;
3454 std::string TextItem;
3455 if (!parseTextItem(TextItem)) {
3456 Value += TextItem;
3458 // Accept a text-list, not just one text-item.
3459 auto parseItem = [&]() -> bool {
3460 if (parseTextItem(TextItem))
3461 return TokError("expected text item");
3462 Value += TextItem;
3463 return false;
3465 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3466 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3468 if (!Var.IsText || Var.TextValue != Value) {
3469 switch (Var.Redefinable) {
3470 case Variable::NOT_REDEFINABLE:
3471 return Error(getTok().getLoc(), "invalid variable redefinition");
3472 case Variable::WARN_ON_REDEFINITION:
3473 if (Warning(NameLoc, "redefining '" + Name +
3474 "', already defined on the command line")) {
3475 return true;
3477 break;
3478 default:
3479 break;
3482 Var.IsText = true;
3483 Var.TextValue = Value;
3484 Var.Redefinable = Variable::REDEFINABLE;
3486 return false;
3489 if (DirKind == DK_TEXTEQU)
3490 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3492 // Parse as expression assignment.
3493 const MCExpr *Expr;
3494 SMLoc EndLoc;
3495 if (parseExpression(Expr, EndLoc))
3496 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3497 StringRef ExprAsString = StringRef(
3498 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3500 int64_t Value;
3501 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3502 if (DirKind == DK_ASSIGN)
3503 return Error(
3504 StartLoc,
3505 "expected absolute expression; not all symbols have known values",
3506 {StartLoc, EndLoc});
3508 // Not an absolute expression; define as a text replacement.
3509 if (!Var.IsText || Var.TextValue != ExprAsString) {
3510 switch (Var.Redefinable) {
3511 case Variable::NOT_REDEFINABLE:
3512 return Error(getTok().getLoc(), "invalid variable redefinition");
3513 case Variable::WARN_ON_REDEFINITION:
3514 if (Warning(NameLoc, "redefining '" + Name +
3515 "', already defined on the command line")) {
3516 return true;
3518 break;
3519 default:
3520 break;
3524 Var.IsText = true;
3525 Var.TextValue = ExprAsString.str();
3526 Var.Redefinable = Variable::REDEFINABLE;
3528 return false;
3531 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3533 const MCConstantExpr *PrevValue =
3534 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3535 Sym->getVariableValue(/*SetUsed=*/false))
3536 : nullptr;
3537 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3538 switch (Var.Redefinable) {
3539 case Variable::NOT_REDEFINABLE:
3540 return Error(getTok().getLoc(), "invalid variable redefinition");
3541 case Variable::WARN_ON_REDEFINITION:
3542 if (Warning(NameLoc, "redefining '" + Name +
3543 "', already defined on the command line")) {
3544 return true;
3546 break;
3547 default:
3548 break;
3552 Var.IsText = false;
3553 Var.TextValue.clear();
3554 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3555 : Variable::NOT_REDEFINABLE;
3557 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3558 Sym->setVariableValue(Expr);
3559 Sym->setExternal(false);
3561 return false;
3564 bool MasmParser::parseEscapedString(std::string &Data) {
3565 if (check(getTok().isNot(AsmToken::String), "expected string"))
3566 return true;
3568 Data = "";
3569 char Quote = getTok().getString().front();
3570 StringRef Str = getTok().getStringContents();
3571 Data.reserve(Str.size());
3572 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3573 Data.push_back(Str[i]);
3574 if (Str[i] == Quote) {
3575 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3576 // If we're escaping the string's trailing delimiter, we're definitely
3577 // missing a quotation mark.
3578 if (i + 1 == Str.size())
3579 return Error(getTok().getLoc(), "missing quotation mark in string");
3580 if (Str[i + 1] == Quote)
3581 ++i;
3585 Lex();
3586 return false;
3589 bool MasmParser::parseAngleBracketString(std::string &Data) {
3590 SMLoc EndLoc, StartLoc = getTok().getLoc();
3591 if (isAngleBracketString(StartLoc, EndLoc)) {
3592 const char *StartChar = StartLoc.getPointer() + 1;
3593 const char *EndChar = EndLoc.getPointer() - 1;
3594 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3595 // Eat from '<' to '>'.
3596 Lex();
3598 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3599 return false;
3601 return true;
3604 /// textItem ::= textLiteral | textMacroID | % constExpr
3605 bool MasmParser::parseTextItem(std::string &Data) {
3606 switch (getTok().getKind()) {
3607 default:
3608 return true;
3609 case AsmToken::Percent: {
3610 int64_t Res;
3611 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3612 return true;
3613 Data = std::to_string(Res);
3614 return false;
3616 case AsmToken::Less:
3617 case AsmToken::LessEqual:
3618 case AsmToken::LessLess:
3619 case AsmToken::LessGreater:
3620 return parseAngleBracketString(Data);
3621 case AsmToken::Identifier: {
3622 // This must be a text macro; we need to expand it accordingly.
3623 StringRef ID;
3624 SMLoc StartLoc = getTok().getLoc();
3625 if (parseIdentifier(ID))
3626 return true;
3627 Data = ID.str();
3629 bool Expanded = false;
3630 while (true) {
3631 // Try to resolve as a built-in text macro
3632 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3633 if (BuiltinIt != BuiltinSymbolMap.end()) {
3634 llvm::Optional<std::string> BuiltinText =
3635 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3636 if (!BuiltinText.hasValue()) {
3637 // Not a text macro; break without substituting
3638 break;
3640 Data = std::move(*BuiltinText);
3641 ID = StringRef(Data);
3642 Expanded = true;
3643 continue;
3646 // Try to resolve as a variable text macro
3647 auto VarIt = Variables.find(ID.lower());
3648 if (VarIt != Variables.end()) {
3649 const Variable &Var = VarIt->getValue();
3650 if (!Var.IsText) {
3651 // Not a text macro; break without substituting
3652 break;
3654 Data = Var.TextValue;
3655 ID = StringRef(Data);
3656 Expanded = true;
3657 continue;
3660 break;
3663 if (!Expanded) {
3664 // Not a text macro; not usable in TextItem context. Since we haven't used
3665 // the token, put it back for better error recovery.
3666 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3667 return true;
3669 return false;
3672 llvm_unreachable("unhandled token kind");
3675 /// parseDirectiveAscii:
3676 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
3677 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3678 auto parseOp = [&]() -> bool {
3679 std::string Data;
3680 if (checkForValidSection() || parseEscapedString(Data))
3681 return true;
3682 getStreamer().emitBytes(Data);
3683 if (ZeroTerminated)
3684 getStreamer().emitBytes(StringRef("\0", 1));
3685 return false;
3688 if (parseMany(parseOp))
3689 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3690 return false;
3693 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3694 // Special case constant expressions to match code generator.
3695 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3696 assert(Size <= 8 && "Invalid size");
3697 int64_t IntValue = MCE->getValue();
3698 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3699 return Error(MCE->getLoc(), "out of range literal value");
3700 getStreamer().emitIntValue(IntValue, Size);
3701 } else {
3702 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3703 if (MSE && MSE->getSymbol().getName() == "?") {
3704 // ? initializer; treat as 0.
3705 getStreamer().emitIntValue(0, Size);
3706 } else {
3707 getStreamer().emitValue(Value, Size, Value->getLoc());
3710 return false;
3713 bool MasmParser::parseScalarInitializer(unsigned Size,
3714 SmallVectorImpl<const MCExpr *> &Values,
3715 unsigned StringPadLength) {
3716 if (Size == 1 && getTok().is(AsmToken::String)) {
3717 std::string Value;
3718 if (parseEscapedString(Value))
3719 return true;
3720 // Treat each character as an initializer.
3721 for (const unsigned char CharVal : Value)
3722 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3724 // Pad the string with spaces to the specified length.
3725 for (size_t i = Value.size(); i < StringPadLength; ++i)
3726 Values.push_back(MCConstantExpr::create(' ', getContext()));
3727 } else {
3728 const MCExpr *Value;
3729 if (parseExpression(Value))
3730 return true;
3731 if (getTok().is(AsmToken::Identifier) &&
3732 getTok().getString().equals_insensitive("dup")) {
3733 Lex(); // Eat 'dup'.
3734 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3735 if (!MCE)
3736 return Error(Value->getLoc(),
3737 "cannot repeat value a non-constant number of times");
3738 const int64_t Repetitions = MCE->getValue();
3739 if (Repetitions < 0)
3740 return Error(Value->getLoc(),
3741 "cannot repeat value a negative number of times");
3743 SmallVector<const MCExpr *, 1> DuplicatedValues;
3744 if (parseToken(AsmToken::LParen,
3745 "parentheses required for 'dup' contents") ||
3746 parseScalarInstList(Size, DuplicatedValues) ||
3747 parseToken(AsmToken::RParen, "unmatched parentheses"))
3748 return true;
3750 for (int i = 0; i < Repetitions; ++i)
3751 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3752 } else {
3753 Values.push_back(Value);
3756 return false;
3759 bool MasmParser::parseScalarInstList(unsigned Size,
3760 SmallVectorImpl<const MCExpr *> &Values,
3761 const AsmToken::TokenKind EndToken) {
3762 while (getTok().isNot(EndToken) &&
3763 (EndToken != AsmToken::Greater ||
3764 getTok().isNot(AsmToken::GreaterGreater))) {
3765 parseScalarInitializer(Size, Values);
3767 // If we see a comma, continue, and allow line continuation.
3768 if (!parseOptionalToken(AsmToken::Comma))
3769 break;
3770 parseOptionalToken(AsmToken::EndOfStatement);
3772 return false;
3775 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3776 SmallVector<const MCExpr *, 1> Values;
3777 if (checkForValidSection() || parseScalarInstList(Size, Values))
3778 return true;
3780 for (auto Value : Values) {
3781 emitIntValue(Value, Size);
3783 if (Count)
3784 *Count = Values.size();
3785 return false;
3788 // Add a field to the current structure.
3789 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3790 StructInfo &Struct = StructInProgress.back();
3791 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3792 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3794 Field.Type = Size;
3796 if (parseScalarInstList(Size, IntInfo.Values))
3797 return true;
3799 Field.SizeOf = Field.Type * IntInfo.Values.size();
3800 Field.LengthOf = IntInfo.Values.size();
3801 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3802 if (!Struct.IsUnion) {
3803 Struct.NextOffset = FieldEnd;
3805 Struct.Size = std::max(Struct.Size, FieldEnd);
3806 return false;
3809 /// parseDirectiveValue
3810 /// ::= (byte | word | ... ) [ expression (, expression)* ]
3811 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3812 if (StructInProgress.empty()) {
3813 // Initialize data value.
3814 if (emitIntegralValues(Size))
3815 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3816 } else if (addIntegralField("", Size)) {
3817 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3820 return false;
3823 /// parseDirectiveNamedValue
3824 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
3825 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3826 StringRef Name, SMLoc NameLoc) {
3827 if (StructInProgress.empty()) {
3828 // Initialize named data value.
3829 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3830 getStreamer().emitLabel(Sym);
3831 unsigned Count;
3832 if (emitIntegralValues(Size, &Count))
3833 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3835 AsmTypeInfo Type;
3836 Type.Name = TypeName;
3837 Type.Size = Size * Count;
3838 Type.ElementSize = Size;
3839 Type.Length = Count;
3840 KnownType[Name.lower()] = Type;
3841 } else if (addIntegralField(Name, Size)) {
3842 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3845 return false;
3848 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3849 if (Asm.getTok().isNot(AsmToken::Integer) &&
3850 Asm.getTok().isNot(AsmToken::BigNum))
3851 return Asm.TokError("unknown token in expression");
3852 SMLoc ExprLoc = Asm.getTok().getLoc();
3853 APInt IntValue = Asm.getTok().getAPIntVal();
3854 Asm.Lex();
3855 if (!IntValue.isIntN(128))
3856 return Asm.Error(ExprLoc, "out of range literal value");
3857 if (!IntValue.isIntN(64)) {
3858 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3859 lo = IntValue.getLoBits(64).getZExtValue();
3860 } else {
3861 hi = 0;
3862 lo = IntValue.getZExtValue();
3864 return false;
3867 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3868 // We don't truly support arithmetic on floating point expressions, so we
3869 // have to manually parse unary prefixes.
3870 bool IsNeg = false;
3871 SMLoc SignLoc;
3872 if (getLexer().is(AsmToken::Minus)) {
3873 SignLoc = getLexer().getLoc();
3874 Lexer.Lex();
3875 IsNeg = true;
3876 } else if (getLexer().is(AsmToken::Plus)) {
3877 SignLoc = getLexer().getLoc();
3878 Lexer.Lex();
3881 if (Lexer.is(AsmToken::Error))
3882 return TokError(Lexer.getErr());
3883 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3884 Lexer.isNot(AsmToken::Identifier))
3885 return TokError("unexpected token in directive");
3887 // Convert to an APFloat.
3888 APFloat Value(Semantics);
3889 StringRef IDVal = getTok().getString();
3890 if (getLexer().is(AsmToken::Identifier)) {
3891 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3892 Value = APFloat::getInf(Semantics);
3893 else if (IDVal.equals_insensitive("nan"))
3894 Value = APFloat::getNaN(Semantics, false, ~0);
3895 else if (IDVal.equals_insensitive("?"))
3896 Value = APFloat::getZero(Semantics);
3897 else
3898 return TokError("invalid floating point literal");
3899 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3900 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3901 // To match ML64.exe, ignore the initial sign.
3902 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3903 if (SizeInBits != (IDVal.size() << 2))
3904 return TokError("invalid floating point literal");
3906 // Consume the numeric token.
3907 Lex();
3909 Res = APInt(SizeInBits, IDVal, 16);
3910 if (SignLoc.isValid())
3911 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3912 return false;
3913 } else if (errorToBool(
3914 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3915 .takeError())) {
3916 return TokError("invalid floating point literal");
3918 if (IsNeg)
3919 Value.changeSign();
3921 // Consume the numeric token.
3922 Lex();
3924 Res = Value.bitcastToAPInt();
3926 return false;
3929 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3930 SmallVectorImpl<APInt> &ValuesAsInt,
3931 const AsmToken::TokenKind EndToken) {
3932 while (getTok().isNot(EndToken) ||
3933 (EndToken == AsmToken::Greater &&
3934 getTok().isNot(AsmToken::GreaterGreater))) {
3935 const AsmToken NextTok = peekTok();
3936 if (NextTok.is(AsmToken::Identifier) &&
3937 NextTok.getString().equals_insensitive("dup")) {
3938 const MCExpr *Value;
3939 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3940 return true;
3941 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3942 if (!MCE)
3943 return Error(Value->getLoc(),
3944 "cannot repeat value a non-constant number of times");
3945 const int64_t Repetitions = MCE->getValue();
3946 if (Repetitions < 0)
3947 return Error(Value->getLoc(),
3948 "cannot repeat value a negative number of times");
3950 SmallVector<APInt, 1> DuplicatedValues;
3951 if (parseToken(AsmToken::LParen,
3952 "parentheses required for 'dup' contents") ||
3953 parseRealInstList(Semantics, DuplicatedValues) ||
3954 parseToken(AsmToken::RParen, "unmatched parentheses"))
3955 return true;
3957 for (int i = 0; i < Repetitions; ++i)
3958 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3959 } else {
3960 APInt AsInt;
3961 if (parseRealValue(Semantics, AsInt))
3962 return true;
3963 ValuesAsInt.push_back(AsInt);
3966 // Continue if we see a comma. (Also, allow line continuation.)
3967 if (!parseOptionalToken(AsmToken::Comma))
3968 break;
3969 parseOptionalToken(AsmToken::EndOfStatement);
3972 return false;
3975 // Initialize real data values.
3976 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3977 unsigned *Count) {
3978 if (checkForValidSection())
3979 return true;
3981 SmallVector<APInt, 1> ValuesAsInt;
3982 if (parseRealInstList(Semantics, ValuesAsInt))
3983 return true;
3985 for (const APInt &AsInt : ValuesAsInt) {
3986 getStreamer().emitIntValue(AsInt);
3988 if (Count)
3989 *Count = ValuesAsInt.size();
3990 return false;
3993 // Add a real field to the current struct.
3994 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3995 size_t Size) {
3996 StructInfo &Struct = StructInProgress.back();
3997 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3998 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
4000 Field.SizeOf = 0;
4002 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
4003 return true;
4005 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
4006 Field.LengthOf = RealInfo.AsIntValues.size();
4007 Field.SizeOf = Field.Type * Field.LengthOf;
4009 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4010 if (!Struct.IsUnion) {
4011 Struct.NextOffset = FieldEnd;
4013 Struct.Size = std::max(Struct.Size, FieldEnd);
4014 return false;
4017 /// parseDirectiveRealValue
4018 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
4019 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
4020 const fltSemantics &Semantics,
4021 size_t Size) {
4022 if (StructInProgress.empty()) {
4023 // Initialize data value.
4024 if (emitRealValues(Semantics))
4025 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4026 } else if (addRealField("", Semantics, Size)) {
4027 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
4029 return false;
4032 /// parseDirectiveNamedRealValue
4033 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
4034 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
4035 const fltSemantics &Semantics,
4036 unsigned Size, StringRef Name,
4037 SMLoc NameLoc) {
4038 if (StructInProgress.empty()) {
4039 // Initialize named data value.
4040 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4041 getStreamer().emitLabel(Sym);
4042 unsigned Count;
4043 if (emitRealValues(Semantics, &Count))
4044 return addErrorSuffix(" in '" + TypeName + "' directive");
4046 AsmTypeInfo Type;
4047 Type.Name = TypeName;
4048 Type.Size = Size * Count;
4049 Type.ElementSize = Size;
4050 Type.Length = Count;
4051 KnownType[Name.lower()] = Type;
4052 } else if (addRealField(Name, Semantics, Size)) {
4053 return addErrorSuffix(" in '" + TypeName + "' directive");
4055 return false;
4058 bool MasmParser::parseOptionalAngleBracketOpen() {
4059 const AsmToken Tok = getTok();
4060 if (parseOptionalToken(AsmToken::LessLess)) {
4061 AngleBracketDepth++;
4062 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4063 return true;
4064 } else if (parseOptionalToken(AsmToken::LessGreater)) {
4065 AngleBracketDepth++;
4066 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4067 return true;
4068 } else if (parseOptionalToken(AsmToken::Less)) {
4069 AngleBracketDepth++;
4070 return true;
4073 return false;
4076 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4077 const AsmToken Tok = getTok();
4078 if (parseOptionalToken(AsmToken::GreaterGreater)) {
4079 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4080 } else if (parseToken(AsmToken::Greater, Msg)) {
4081 return true;
4083 AngleBracketDepth--;
4084 return false;
4087 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4088 const IntFieldInfo &Contents,
4089 FieldInitializer &Initializer) {
4090 SMLoc Loc = getTok().getLoc();
4092 SmallVector<const MCExpr *, 1> Values;
4093 if (parseOptionalToken(AsmToken::LCurly)) {
4094 if (Field.LengthOf == 1 && Field.Type > 1)
4095 return Error(Loc, "Cannot initialize scalar field with array value");
4096 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4097 parseToken(AsmToken::RCurly))
4098 return true;
4099 } else if (parseOptionalAngleBracketOpen()) {
4100 if (Field.LengthOf == 1 && Field.Type > 1)
4101 return Error(Loc, "Cannot initialize scalar field with array value");
4102 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4103 parseAngleBracketClose())
4104 return true;
4105 } else if (Field.LengthOf > 1 && Field.Type > 1) {
4106 return Error(Loc, "Cannot initialize array field with scalar value");
4107 } else if (parseScalarInitializer(Field.Type, Values,
4108 /*StringPadLength=*/Field.LengthOf)) {
4109 return true;
4112 if (Values.size() > Field.LengthOf) {
4113 return Error(Loc, "Initializer too long for field; expected at most " +
4114 std::to_string(Field.LengthOf) + " elements, got " +
4115 std::to_string(Values.size()));
4117 // Default-initialize all remaining values.
4118 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4120 Initializer = FieldInitializer(std::move(Values));
4121 return false;
4124 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4125 const RealFieldInfo &Contents,
4126 FieldInitializer &Initializer) {
4127 const fltSemantics *Semantics;
4128 switch (Field.Type) {
4129 case 4:
4130 Semantics = &APFloat::IEEEsingle();
4131 break;
4132 case 8:
4133 Semantics = &APFloat::IEEEdouble();
4134 break;
4135 case 10:
4136 Semantics = &APFloat::x87DoubleExtended();
4137 break;
4138 default:
4139 llvm_unreachable("unknown real field type");
4142 SMLoc Loc = getTok().getLoc();
4144 SmallVector<APInt, 1> AsIntValues;
4145 if (parseOptionalToken(AsmToken::LCurly)) {
4146 if (Field.LengthOf == 1)
4147 return Error(Loc, "Cannot initialize scalar field with array value");
4148 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4149 parseToken(AsmToken::RCurly))
4150 return true;
4151 } else if (parseOptionalAngleBracketOpen()) {
4152 if (Field.LengthOf == 1)
4153 return Error(Loc, "Cannot initialize scalar field with array value");
4154 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4155 parseAngleBracketClose())
4156 return true;
4157 } else if (Field.LengthOf > 1) {
4158 return Error(Loc, "Cannot initialize array field with scalar value");
4159 } else {
4160 AsIntValues.emplace_back();
4161 if (parseRealValue(*Semantics, AsIntValues.back()))
4162 return true;
4165 if (AsIntValues.size() > Field.LengthOf) {
4166 return Error(Loc, "Initializer too long for field; expected at most " +
4167 std::to_string(Field.LengthOf) + " elements, got " +
4168 std::to_string(AsIntValues.size()));
4170 // Default-initialize all remaining values.
4171 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4172 Contents.AsIntValues.end());
4174 Initializer = FieldInitializer(std::move(AsIntValues));
4175 return false;
4178 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4179 const StructFieldInfo &Contents,
4180 FieldInitializer &Initializer) {
4181 SMLoc Loc = getTok().getLoc();
4183 std::vector<StructInitializer> Initializers;
4184 if (Field.LengthOf > 1) {
4185 if (parseOptionalToken(AsmToken::LCurly)) {
4186 if (parseStructInstList(Contents.Structure, Initializers,
4187 AsmToken::RCurly) ||
4188 parseToken(AsmToken::RCurly))
4189 return true;
4190 } else if (parseOptionalAngleBracketOpen()) {
4191 if (parseStructInstList(Contents.Structure, Initializers,
4192 AsmToken::Greater) ||
4193 parseAngleBracketClose())
4194 return true;
4195 } else {
4196 return Error(Loc, "Cannot initialize array field with scalar value");
4198 } else {
4199 Initializers.emplace_back();
4200 if (parseStructInitializer(Contents.Structure, Initializers.back()))
4201 return true;
4204 if (Initializers.size() > Field.LengthOf) {
4205 return Error(Loc, "Initializer too long for field; expected at most " +
4206 std::to_string(Field.LengthOf) + " elements, got " +
4207 std::to_string(Initializers.size()));
4209 // Default-initialize all remaining values.
4210 Initializers.insert(Initializers.end(),
4211 Contents.Initializers.begin() + Initializers.size(),
4212 Contents.Initializers.end());
4214 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4215 return false;
4218 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4219 FieldInitializer &Initializer) {
4220 switch (Field.Contents.FT) {
4221 case FT_INTEGRAL:
4222 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4223 case FT_REAL:
4224 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4225 case FT_STRUCT:
4226 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4228 llvm_unreachable("Unhandled FieldType enum");
4231 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4232 StructInitializer &Initializer) {
4233 const AsmToken FirstToken = getTok();
4235 Optional<AsmToken::TokenKind> EndToken;
4236 if (parseOptionalToken(AsmToken::LCurly)) {
4237 EndToken = AsmToken::RCurly;
4238 } else if (parseOptionalAngleBracketOpen()) {
4239 EndToken = AsmToken::Greater;
4240 AngleBracketDepth++;
4241 } else if (FirstToken.is(AsmToken::Identifier) &&
4242 FirstToken.getString() == "?") {
4243 // ? initializer; leave EndToken uninitialized to treat as empty.
4244 if (parseToken(AsmToken::Identifier))
4245 return true;
4246 } else {
4247 return Error(FirstToken.getLoc(), "Expected struct initializer");
4250 auto &FieldInitializers = Initializer.FieldInitializers;
4251 size_t FieldIndex = 0;
4252 if (EndToken.hasValue()) {
4253 // Initialize all fields with given initializers.
4254 while (getTok().isNot(EndToken.getValue()) &&
4255 FieldIndex < Structure.Fields.size()) {
4256 const FieldInfo &Field = Structure.Fields[FieldIndex++];
4257 if (parseOptionalToken(AsmToken::Comma)) {
4258 // Empty initializer; use the default and continue. (Also, allow line
4259 // continuation.)
4260 FieldInitializers.push_back(Field.Contents);
4261 parseOptionalToken(AsmToken::EndOfStatement);
4262 continue;
4264 FieldInitializers.emplace_back(Field.Contents.FT);
4265 if (parseFieldInitializer(Field, FieldInitializers.back()))
4266 return true;
4268 // Continue if we see a comma. (Also, allow line continuation.)
4269 SMLoc CommaLoc = getTok().getLoc();
4270 if (!parseOptionalToken(AsmToken::Comma))
4271 break;
4272 if (FieldIndex == Structure.Fields.size())
4273 return Error(CommaLoc, "'" + Structure.Name +
4274 "' initializer initializes too many fields");
4275 parseOptionalToken(AsmToken::EndOfStatement);
4278 // Default-initialize all remaining fields.
4279 for (auto It = Structure.Fields.begin() + FieldIndex;
4280 It != Structure.Fields.end(); ++It) {
4281 const FieldInfo &Field = *It;
4282 FieldInitializers.push_back(Field.Contents);
4285 if (EndToken.hasValue()) {
4286 if (EndToken.getValue() == AsmToken::Greater)
4287 return parseAngleBracketClose();
4289 return parseToken(EndToken.getValue());
4292 return false;
4295 bool MasmParser::parseStructInstList(
4296 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4297 const AsmToken::TokenKind EndToken) {
4298 while (getTok().isNot(EndToken) ||
4299 (EndToken == AsmToken::Greater &&
4300 getTok().isNot(AsmToken::GreaterGreater))) {
4301 const AsmToken NextTok = peekTok();
4302 if (NextTok.is(AsmToken::Identifier) &&
4303 NextTok.getString().equals_insensitive("dup")) {
4304 const MCExpr *Value;
4305 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4306 return true;
4307 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4308 if (!MCE)
4309 return Error(Value->getLoc(),
4310 "cannot repeat value a non-constant number of times");
4311 const int64_t Repetitions = MCE->getValue();
4312 if (Repetitions < 0)
4313 return Error(Value->getLoc(),
4314 "cannot repeat value a negative number of times");
4316 std::vector<StructInitializer> DuplicatedValues;
4317 if (parseToken(AsmToken::LParen,
4318 "parentheses required for 'dup' contents") ||
4319 parseStructInstList(Structure, DuplicatedValues) ||
4320 parseToken(AsmToken::RParen, "unmatched parentheses"))
4321 return true;
4323 for (int i = 0; i < Repetitions; ++i)
4324 llvm::append_range(Initializers, DuplicatedValues);
4325 } else {
4326 Initializers.emplace_back();
4327 if (parseStructInitializer(Structure, Initializers.back()))
4328 return true;
4331 // Continue if we see a comma. (Also, allow line continuation.)
4332 if (!parseOptionalToken(AsmToken::Comma))
4333 break;
4334 parseOptionalToken(AsmToken::EndOfStatement);
4337 return false;
4340 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4341 const IntFieldInfo &Contents) {
4342 // Default-initialize all values.
4343 for (const MCExpr *Value : Contents.Values) {
4344 if (emitIntValue(Value, Field.Type))
4345 return true;
4347 return false;
4350 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4351 const RealFieldInfo &Contents) {
4352 for (const APInt &AsInt : Contents.AsIntValues) {
4353 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4354 AsInt.getBitWidth() / 8);
4356 return false;
4359 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4360 const StructFieldInfo &Contents) {
4361 for (const auto &Initializer : Contents.Initializers) {
4362 size_t Index = 0, Offset = 0;
4363 for (const auto &SubField : Contents.Structure.Fields) {
4364 getStreamer().emitZeros(SubField.Offset - Offset);
4365 Offset = SubField.Offset + SubField.SizeOf;
4366 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4369 return false;
4372 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4373 switch (Field.Contents.FT) {
4374 case FT_INTEGRAL:
4375 return emitFieldValue(Field, Field.Contents.IntInfo);
4376 case FT_REAL:
4377 return emitFieldValue(Field, Field.Contents.RealInfo);
4378 case FT_STRUCT:
4379 return emitFieldValue(Field, Field.Contents.StructInfo);
4381 llvm_unreachable("Unhandled FieldType enum");
4384 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4385 const IntFieldInfo &Contents,
4386 const IntFieldInfo &Initializer) {
4387 for (const auto &Value : Initializer.Values) {
4388 if (emitIntValue(Value, Field.Type))
4389 return true;
4391 // Default-initialize all remaining values.
4392 for (auto it = Contents.Values.begin() + Initializer.Values.size();
4393 it != Contents.Values.end(); ++it) {
4394 const auto &Value = *it;
4395 if (emitIntValue(Value, Field.Type))
4396 return true;
4398 return false;
4401 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4402 const RealFieldInfo &Contents,
4403 const RealFieldInfo &Initializer) {
4404 for (const auto &AsInt : Initializer.AsIntValues) {
4405 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4406 AsInt.getBitWidth() / 8);
4408 // Default-initialize all remaining values.
4409 for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size();
4410 It != Contents.AsIntValues.end(); ++It) {
4411 const auto &AsInt = *It;
4412 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4413 AsInt.getBitWidth() / 8);
4415 return false;
4418 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4419 const StructFieldInfo &Contents,
4420 const StructFieldInfo &Initializer) {
4421 for (const auto &Init : Initializer.Initializers) {
4422 if (emitStructInitializer(Contents.Structure, Init))
4423 return true;
4425 // Default-initialize all remaining values.
4426 for (auto It =
4427 Contents.Initializers.begin() + Initializer.Initializers.size();
4428 It != Contents.Initializers.end(); ++It) {
4429 const auto &Init = *It;
4430 if (emitStructInitializer(Contents.Structure, Init))
4431 return true;
4433 return false;
4436 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4437 const FieldInitializer &Initializer) {
4438 switch (Field.Contents.FT) {
4439 case FT_INTEGRAL:
4440 return emitFieldInitializer(Field, Field.Contents.IntInfo,
4441 Initializer.IntInfo);
4442 case FT_REAL:
4443 return emitFieldInitializer(Field, Field.Contents.RealInfo,
4444 Initializer.RealInfo);
4445 case FT_STRUCT:
4446 return emitFieldInitializer(Field, Field.Contents.StructInfo,
4447 Initializer.StructInfo);
4449 llvm_unreachable("Unhandled FieldType enum");
4452 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4453 const StructInitializer &Initializer) {
4454 if (!Structure.Initializable)
4455 return Error(getLexer().getLoc(),
4456 "cannot initialize a value of type '" + Structure.Name +
4457 "'; 'org' was used in the type's declaration");
4458 size_t Index = 0, Offset = 0;
4459 for (const auto &Init : Initializer.FieldInitializers) {
4460 const auto &Field = Structure.Fields[Index++];
4461 getStreamer().emitZeros(Field.Offset - Offset);
4462 Offset = Field.Offset + Field.SizeOf;
4463 if (emitFieldInitializer(Field, Init))
4464 return true;
4466 // Default-initialize all remaining fields.
4467 for (auto It =
4468 Structure.Fields.begin() + Initializer.FieldInitializers.size();
4469 It != Structure.Fields.end(); ++It) {
4470 const auto &Field = *It;
4471 getStreamer().emitZeros(Field.Offset - Offset);
4472 Offset = Field.Offset + Field.SizeOf;
4473 if (emitFieldValue(Field))
4474 return true;
4476 // Add final padding.
4477 if (Offset != Structure.Size)
4478 getStreamer().emitZeros(Structure.Size - Offset);
4479 return false;
4482 // Set data values from initializers.
4483 bool MasmParser::emitStructValues(const StructInfo &Structure,
4484 unsigned *Count) {
4485 std::vector<StructInitializer> Initializers;
4486 if (parseStructInstList(Structure, Initializers))
4487 return true;
4489 for (const auto &Initializer : Initializers) {
4490 if (emitStructInitializer(Structure, Initializer))
4491 return true;
4494 if (Count)
4495 *Count = Initializers.size();
4496 return false;
4499 // Declare a field in the current struct.
4500 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4501 StructInfo &OwningStruct = StructInProgress.back();
4502 FieldInfo &Field =
4503 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4504 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4506 StructInfo.Structure = Structure;
4507 Field.Type = Structure.Size;
4509 if (parseStructInstList(Structure, StructInfo.Initializers))
4510 return true;
4512 Field.LengthOf = StructInfo.Initializers.size();
4513 Field.SizeOf = Field.Type * Field.LengthOf;
4515 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4516 if (!OwningStruct.IsUnion) {
4517 OwningStruct.NextOffset = FieldEnd;
4519 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4521 return false;
4524 /// parseDirectiveStructValue
4525 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4526 /// [, (<struct-initializer> | {struct-initializer})]*
4527 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4528 StringRef Directive, SMLoc DirLoc) {
4529 if (StructInProgress.empty()) {
4530 if (emitStructValues(Structure))
4531 return true;
4532 } else if (addStructField("", Structure)) {
4533 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4536 return false;
4539 /// parseDirectiveNamedValue
4540 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
4541 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4542 StringRef Directive,
4543 SMLoc DirLoc, StringRef Name) {
4544 if (StructInProgress.empty()) {
4545 // Initialize named data value.
4546 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4547 getStreamer().emitLabel(Sym);
4548 unsigned Count;
4549 if (emitStructValues(Structure, &Count))
4550 return true;
4551 AsmTypeInfo Type;
4552 Type.Name = Structure.Name;
4553 Type.Size = Structure.Size * Count;
4554 Type.ElementSize = Structure.Size;
4555 Type.Length = Count;
4556 KnownType[Name.lower()] = Type;
4557 } else if (addStructField(Name, Structure)) {
4558 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4561 return false;
4564 /// parseDirectiveStruct
4565 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4566 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4567 /// <name> ENDS
4568 ////// dataDir = data declaration
4569 ////// offsetDir = EVEN, ORG, ALIGN
4570 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4571 DirectiveKind DirKind, StringRef Name,
4572 SMLoc NameLoc) {
4573 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4574 // anyway, so all field accesses must be qualified.
4575 AsmToken NextTok = getTok();
4576 int64_t AlignmentValue = 1;
4577 if (NextTok.isNot(AsmToken::Comma) &&
4578 NextTok.isNot(AsmToken::EndOfStatement) &&
4579 parseAbsoluteExpression(AlignmentValue)) {
4580 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4581 "' directive");
4583 if (!isPowerOf2_64(AlignmentValue)) {
4584 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4585 std::to_string(AlignmentValue));
4588 StringRef Qualifier;
4589 SMLoc QualifierLoc;
4590 if (parseOptionalToken(AsmToken::Comma)) {
4591 QualifierLoc = getTok().getLoc();
4592 if (parseIdentifier(Qualifier))
4593 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4594 if (!Qualifier.equals_insensitive("nonunique"))
4595 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4596 Twine(Directive) +
4597 "' directive; expected none or NONUNIQUE");
4600 if (parseToken(AsmToken::EndOfStatement))
4601 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4603 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4604 return false;
4607 /// parseDirectiveNestedStruct
4608 /// ::= (STRUC | STRUCT | UNION) [name]
4609 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4610 /// ENDS
4611 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4612 DirectiveKind DirKind) {
4613 if (StructInProgress.empty())
4614 return TokError("missing name in top-level '" + Twine(Directive) +
4615 "' directive");
4617 StringRef Name;
4618 if (getTok().is(AsmToken::Identifier)) {
4619 Name = getTok().getIdentifier();
4620 parseToken(AsmToken::Identifier);
4622 if (parseToken(AsmToken::EndOfStatement))
4623 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4625 // Reserve space to ensure Alignment doesn't get invalidated when
4626 // StructInProgress grows.
4627 StructInProgress.reserve(StructInProgress.size() + 1);
4628 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4629 StructInProgress.back().Alignment);
4630 return false;
4633 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4634 if (StructInProgress.empty())
4635 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4636 if (StructInProgress.size() > 1)
4637 return Error(NameLoc, "unexpected name in nested ENDS directive");
4638 if (StructInProgress.back().Name.compare_insensitive(Name))
4639 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4640 StructInProgress.back().Name + "'");
4641 StructInfo Structure = StructInProgress.pop_back_val();
4642 // Pad to make the structure's size divisible by the smaller of its alignment
4643 // and the size of its largest field.
4644 Structure.Size = llvm::alignTo(
4645 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4646 Structs[Name.lower()] = Structure;
4648 if (parseToken(AsmToken::EndOfStatement))
4649 return addErrorSuffix(" in ENDS directive");
4651 return false;
4654 bool MasmParser::parseDirectiveNestedEnds() {
4655 if (StructInProgress.empty())
4656 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4657 if (StructInProgress.size() == 1)
4658 return TokError("missing name in top-level ENDS directive");
4660 if (parseToken(AsmToken::EndOfStatement))
4661 return addErrorSuffix(" in nested ENDS directive");
4663 StructInfo Structure = StructInProgress.pop_back_val();
4664 // Pad to make the structure's size divisible by its alignment.
4665 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4667 StructInfo &ParentStruct = StructInProgress.back();
4668 if (Structure.Name.empty()) {
4669 // Anonymous substructures' fields are addressed as if they belong to the
4670 // parent structure - so we transfer them to the parent here.
4671 const size_t OldFields = ParentStruct.Fields.size();
4672 ParentStruct.Fields.insert(
4673 ParentStruct.Fields.end(),
4674 std::make_move_iterator(Structure.Fields.begin()),
4675 std::make_move_iterator(Structure.Fields.end()));
4676 for (const auto &FieldByName : Structure.FieldsByName) {
4677 ParentStruct.FieldsByName[FieldByName.getKey()] =
4678 FieldByName.getValue() + OldFields;
4681 unsigned FirstFieldOffset = 0;
4682 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4683 FirstFieldOffset = llvm::alignTo(
4684 ParentStruct.NextOffset,
4685 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4688 if (ParentStruct.IsUnion) {
4689 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4690 } else {
4691 for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
4692 FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
4693 FieldIter->Offset += FirstFieldOffset;
4696 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4697 if (!ParentStruct.IsUnion) {
4698 ParentStruct.NextOffset = StructureEnd;
4700 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4702 } else {
4703 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4704 Structure.AlignmentSize);
4705 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4706 Field.Type = Structure.Size;
4707 Field.LengthOf = 1;
4708 Field.SizeOf = Structure.Size;
4710 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4711 if (!ParentStruct.IsUnion) {
4712 ParentStruct.NextOffset = StructureEnd;
4714 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4716 StructInfo.Structure = Structure;
4717 StructInfo.Initializers.emplace_back();
4718 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4719 for (const auto &SubField : Structure.Fields) {
4720 FieldInitializers.push_back(SubField.Contents);
4724 return false;
4727 /// parseDirectiveOrg
4728 /// ::= org expression
4729 bool MasmParser::parseDirectiveOrg() {
4730 const MCExpr *Offset;
4731 SMLoc OffsetLoc = Lexer.getLoc();
4732 if (checkForValidSection() || parseExpression(Offset))
4733 return true;
4734 if (parseToken(AsmToken::EndOfStatement))
4735 return addErrorSuffix(" in 'org' directive");
4737 if (StructInProgress.empty()) {
4738 // Not in a struct; change the offset for the next instruction or data
4739 if (checkForValidSection())
4740 return addErrorSuffix(" in 'org' directive");
4742 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4743 } else {
4744 // Offset the next field of this struct
4745 StructInfo &Structure = StructInProgress.back();
4746 int64_t OffsetRes;
4747 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4748 return Error(OffsetLoc,
4749 "expected absolute expression in 'org' directive");
4750 if (OffsetRes < 0)
4751 return Error(
4752 OffsetLoc,
4753 "expected non-negative value in struct's 'org' directive; was " +
4754 std::to_string(OffsetRes));
4755 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4757 // ORG-affected structures cannot be initialized
4758 Structure.Initializable = false;
4761 return false;
4764 bool MasmParser::emitAlignTo(int64_t Alignment) {
4765 if (StructInProgress.empty()) {
4766 // Not in a struct; align the next instruction or data
4767 if (checkForValidSection())
4768 return true;
4770 // Check whether we should use optimal code alignment for this align
4771 // directive.
4772 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4773 assert(Section && "must have section to emit alignment");
4774 if (Section->UseCodeAlign()) {
4775 getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0);
4776 } else {
4777 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4778 getStreamer().emitValueToAlignment(Alignment, /*Value=*/0,
4779 /*ValueSize=*/1,
4780 /*MaxBytesToEmit=*/0);
4782 } else {
4783 // Align the next field of this struct
4784 StructInfo &Structure = StructInProgress.back();
4785 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4788 return false;
4791 /// parseDirectiveAlign
4792 /// ::= align expression
4793 bool MasmParser::parseDirectiveAlign() {
4794 SMLoc AlignmentLoc = getLexer().getLoc();
4795 int64_t Alignment;
4797 // Ignore empty 'align' directives.
4798 if (getTok().is(AsmToken::EndOfStatement)) {
4799 return Warning(AlignmentLoc,
4800 "align directive with no operand is ignored") &&
4801 parseToken(AsmToken::EndOfStatement);
4803 if (parseAbsoluteExpression(Alignment) ||
4804 parseToken(AsmToken::EndOfStatement))
4805 return addErrorSuffix(" in align directive");
4807 // Always emit an alignment here even if we throw an error.
4808 bool ReturnVal = false;
4810 // Reject alignments that aren't either a power of two or zero, for ML.exe
4811 // compatibility. Alignment of zero is silently rounded up to one.
4812 if (Alignment == 0)
4813 Alignment = 1;
4814 if (!isPowerOf2_64(Alignment))
4815 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4816 std::to_string(Alignment));
4818 if (emitAlignTo(Alignment))
4819 ReturnVal |= addErrorSuffix(" in align directive");
4821 return ReturnVal;
4824 /// parseDirectiveEven
4825 /// ::= even
4826 bool MasmParser::parseDirectiveEven() {
4827 if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2))
4828 return addErrorSuffix(" in even directive");
4830 return false;
4833 /// parseDirectiveFile
4834 /// ::= .file filename
4835 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
4836 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4837 // FIXME: I'm not sure what this is.
4838 int64_t FileNumber = -1;
4839 if (getLexer().is(AsmToken::Integer)) {
4840 FileNumber = getTok().getIntVal();
4841 Lex();
4843 if (FileNumber < 0)
4844 return TokError("negative file number");
4847 std::string Path;
4849 // Usually the directory and filename together, otherwise just the directory.
4850 // Allow the strings to have escaped octal character sequence.
4851 if (check(getTok().isNot(AsmToken::String),
4852 "unexpected token in '.file' directive") ||
4853 parseEscapedString(Path))
4854 return true;
4856 StringRef Directory;
4857 StringRef Filename;
4858 std::string FilenameData;
4859 if (getLexer().is(AsmToken::String)) {
4860 if (check(FileNumber == -1,
4861 "explicit path specified, but no file number") ||
4862 parseEscapedString(FilenameData))
4863 return true;
4864 Filename = FilenameData;
4865 Directory = Path;
4866 } else {
4867 Filename = Path;
4870 uint64_t MD5Hi, MD5Lo;
4871 bool HasMD5 = false;
4873 Optional<StringRef> Source;
4874 bool HasSource = false;
4875 std::string SourceString;
4877 while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4878 StringRef Keyword;
4879 if (check(getTok().isNot(AsmToken::Identifier),
4880 "unexpected token in '.file' directive") ||
4881 parseIdentifier(Keyword))
4882 return true;
4883 if (Keyword == "md5") {
4884 HasMD5 = true;
4885 if (check(FileNumber == -1,
4886 "MD5 checksum specified, but no file number") ||
4887 parseHexOcta(*this, MD5Hi, MD5Lo))
4888 return true;
4889 } else if (Keyword == "source") {
4890 HasSource = true;
4891 if (check(FileNumber == -1,
4892 "source specified, but no file number") ||
4893 check(getTok().isNot(AsmToken::String),
4894 "unexpected token in '.file' directive") ||
4895 parseEscapedString(SourceString))
4896 return true;
4897 } else {
4898 return TokError("unexpected token in '.file' directive");
4902 if (FileNumber == -1) {
4903 // Ignore the directive if there is no number and the target doesn't support
4904 // numberless .file directives. This allows some portability of assembler
4905 // between different object file formats.
4906 if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4907 getStreamer().emitFileDirective(Filename);
4908 } else {
4909 // In case there is a -g option as well as debug info from directive .file,
4910 // we turn off the -g option, directly use the existing debug info instead.
4911 // Throw away any implicit file table for the assembler source.
4912 if (Ctx.getGenDwarfForAssembly()) {
4913 Ctx.getMCDwarfLineTable(0).resetFileTable();
4914 Ctx.setGenDwarfForAssembly(false);
4917 Optional<MD5::MD5Result> CKMem;
4918 if (HasMD5) {
4919 MD5::MD5Result Sum;
4920 for (unsigned i = 0; i != 8; ++i) {
4921 Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4922 Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4924 CKMem = Sum;
4926 if (HasSource) {
4927 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4928 memcpy(SourceBuf, SourceString.data(), SourceString.size());
4929 Source = StringRef(SourceBuf, SourceString.size());
4931 if (FileNumber == 0) {
4932 if (Ctx.getDwarfVersion() < 5)
4933 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4934 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4935 } else {
4936 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4937 FileNumber, Directory, Filename, CKMem, Source);
4938 if (!FileNumOrErr)
4939 return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4941 // Alert the user if there are some .file directives with MD5 and some not.
4942 // But only do that once.
4943 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4944 ReportedInconsistentMD5 = true;
4945 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4949 return false;
4952 /// parseDirectiveLine
4953 /// ::= .line [number]
4954 bool MasmParser::parseDirectiveLine() {
4955 int64_t LineNumber;
4956 if (getLexer().is(AsmToken::Integer)) {
4957 if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4958 return true;
4959 (void)LineNumber;
4960 // FIXME: Do something with the .line.
4962 if (parseToken(AsmToken::EndOfStatement,
4963 "unexpected token in '.line' directive"))
4964 return true;
4966 return false;
4969 /// parseDirectiveLoc
4970 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4971 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4972 /// The first number is a file number, must have been previously assigned with
4973 /// a .file directive, the second number is the line number and optionally the
4974 /// third number is a column position (zero if not specified). The remaining
4975 /// optional items are .loc sub-directives.
4976 bool MasmParser::parseDirectiveLoc() {
4977 int64_t FileNumber = 0, LineNumber = 0;
4978 SMLoc Loc = getTok().getLoc();
4979 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4980 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4981 "file number less than one in '.loc' directive") ||
4982 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4983 "unassigned file number in '.loc' directive"))
4984 return true;
4986 // optional
4987 if (getLexer().is(AsmToken::Integer)) {
4988 LineNumber = getTok().getIntVal();
4989 if (LineNumber < 0)
4990 return TokError("line number less than zero in '.loc' directive");
4991 Lex();
4994 int64_t ColumnPos = 0;
4995 if (getLexer().is(AsmToken::Integer)) {
4996 ColumnPos = getTok().getIntVal();
4997 if (ColumnPos < 0)
4998 return TokError("column position less than zero in '.loc' directive");
4999 Lex();
5002 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
5003 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
5004 unsigned Isa = 0;
5005 int64_t Discriminator = 0;
5007 auto parseLocOp = [&]() -> bool {
5008 StringRef Name;
5009 SMLoc Loc = getTok().getLoc();
5010 if (parseIdentifier(Name))
5011 return TokError("unexpected token in '.loc' directive");
5013 if (Name == "basic_block")
5014 Flags |= DWARF2_FLAG_BASIC_BLOCK;
5015 else if (Name == "prologue_end")
5016 Flags |= DWARF2_FLAG_PROLOGUE_END;
5017 else if (Name == "epilogue_begin")
5018 Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
5019 else if (Name == "is_stmt") {
5020 Loc = getTok().getLoc();
5021 const MCExpr *Value;
5022 if (parseExpression(Value))
5023 return true;
5024 // The expression must be the constant 0 or 1.
5025 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5026 int Value = MCE->getValue();
5027 if (Value == 0)
5028 Flags &= ~DWARF2_FLAG_IS_STMT;
5029 else if (Value == 1)
5030 Flags |= DWARF2_FLAG_IS_STMT;
5031 else
5032 return Error(Loc, "is_stmt value not 0 or 1");
5033 } else {
5034 return Error(Loc, "is_stmt value not the constant value of 0 or 1");
5036 } else if (Name == "isa") {
5037 Loc = getTok().getLoc();
5038 const MCExpr *Value;
5039 if (parseExpression(Value))
5040 return true;
5041 // The expression must be a constant greater or equal to 0.
5042 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
5043 int Value = MCE->getValue();
5044 if (Value < 0)
5045 return Error(Loc, "isa number less than zero");
5046 Isa = Value;
5047 } else {
5048 return Error(Loc, "isa number not a constant value");
5050 } else if (Name == "discriminator") {
5051 if (parseAbsoluteExpression(Discriminator))
5052 return true;
5053 } else {
5054 return Error(Loc, "unknown sub-directive in '.loc' directive");
5056 return false;
5059 if (parseMany(parseLocOp, false /*hasComma*/))
5060 return true;
5062 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
5063 Isa, Discriminator, StringRef());
5065 return false;
5068 /// parseDirectiveStabs
5069 /// ::= .stabs string, number, number, number
5070 bool MasmParser::parseDirectiveStabs() {
5071 return TokError("unsupported directive '.stabs'");
5074 /// parseDirectiveCVFile
5075 /// ::= .cv_file number filename [checksum] [checksumkind]
5076 bool MasmParser::parseDirectiveCVFile() {
5077 SMLoc FileNumberLoc = getTok().getLoc();
5078 int64_t FileNumber;
5079 std::string Filename;
5080 std::string Checksum;
5081 int64_t ChecksumKind = 0;
5083 if (parseIntToken(FileNumber,
5084 "expected file number in '.cv_file' directive") ||
5085 check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5086 check(getTok().isNot(AsmToken::String),
5087 "unexpected token in '.cv_file' directive") ||
5088 parseEscapedString(Filename))
5089 return true;
5090 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5091 if (check(getTok().isNot(AsmToken::String),
5092 "unexpected token in '.cv_file' directive") ||
5093 parseEscapedString(Checksum) ||
5094 parseIntToken(ChecksumKind,
5095 "expected checksum kind in '.cv_file' directive") ||
5096 parseToken(AsmToken::EndOfStatement,
5097 "unexpected token in '.cv_file' directive"))
5098 return true;
5101 Checksum = fromHex(Checksum);
5102 void *CKMem = Ctx.allocate(Checksum.size(), 1);
5103 memcpy(CKMem, Checksum.data(), Checksum.size());
5104 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5105 Checksum.size());
5107 if (!getStreamer().EmitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5108 static_cast<uint8_t>(ChecksumKind)))
5109 return Error(FileNumberLoc, "file number already allocated");
5111 return false;
5114 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5115 StringRef DirectiveName) {
5116 SMLoc Loc;
5117 return parseTokenLoc(Loc) ||
5118 parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5119 "' directive") ||
5120 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5121 "expected function id within range [0, UINT_MAX)");
5124 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5125 SMLoc Loc;
5126 return parseTokenLoc(Loc) ||
5127 parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5128 "' directive") ||
5129 check(FileNumber < 1, Loc, "file number less than one in '" +
5130 DirectiveName + "' directive") ||
5131 check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5132 "unassigned file number in '" + DirectiveName + "' directive");
5135 /// parseDirectiveCVFuncId
5136 /// ::= .cv_func_id FunctionId
5138 /// Introduces a function ID that can be used with .cv_loc.
5139 bool MasmParser::parseDirectiveCVFuncId() {
5140 SMLoc FunctionIdLoc = getTok().getLoc();
5141 int64_t FunctionId;
5143 if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
5144 parseToken(AsmToken::EndOfStatement,
5145 "unexpected token in '.cv_func_id' directive"))
5146 return true;
5148 if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
5149 return Error(FunctionIdLoc, "function id already allocated");
5151 return false;
5154 /// parseDirectiveCVInlineSiteId
5155 /// ::= .cv_inline_site_id FunctionId
5156 /// "within" IAFunc
5157 /// "inlined_at" IAFile IALine [IACol]
5159 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5160 /// at" source location information for use in the line table of the caller,
5161 /// whether the caller is a real function or another inlined call site.
5162 bool MasmParser::parseDirectiveCVInlineSiteId() {
5163 SMLoc FunctionIdLoc = getTok().getLoc();
5164 int64_t FunctionId;
5165 int64_t IAFunc;
5166 int64_t IAFile;
5167 int64_t IALine;
5168 int64_t IACol = 0;
5170 // FunctionId
5171 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5172 return true;
5174 // "within"
5175 if (check((getLexer().isNot(AsmToken::Identifier) ||
5176 getTok().getIdentifier() != "within"),
5177 "expected 'within' identifier in '.cv_inline_site_id' directive"))
5178 return true;
5179 Lex();
5181 // IAFunc
5182 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5183 return true;
5185 // "inlined_at"
5186 if (check((getLexer().isNot(AsmToken::Identifier) ||
5187 getTok().getIdentifier() != "inlined_at"),
5188 "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5189 "directive") )
5190 return true;
5191 Lex();
5193 // IAFile IALine
5194 if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5195 parseIntToken(IALine, "expected line number after 'inlined_at'"))
5196 return true;
5198 // [IACol]
5199 if (getLexer().is(AsmToken::Integer)) {
5200 IACol = getTok().getIntVal();
5201 Lex();
5204 if (parseToken(AsmToken::EndOfStatement,
5205 "unexpected token in '.cv_inline_site_id' directive"))
5206 return true;
5208 if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5209 IALine, IACol, FunctionIdLoc))
5210 return Error(FunctionIdLoc, "function id already allocated");
5212 return false;
5215 /// parseDirectiveCVLoc
5216 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5217 /// [is_stmt VALUE]
5218 /// The first number is a file number, must have been previously assigned with
5219 /// a .file directive, the second number is the line number and optionally the
5220 /// third number is a column position (zero if not specified). The remaining
5221 /// optional items are .loc sub-directives.
5222 bool MasmParser::parseDirectiveCVLoc() {
5223 SMLoc DirectiveLoc = getTok().getLoc();
5224 int64_t FunctionId, FileNumber;
5225 if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5226 parseCVFileId(FileNumber, ".cv_loc"))
5227 return true;
5229 int64_t LineNumber = 0;
5230 if (getLexer().is(AsmToken::Integer)) {
5231 LineNumber = getTok().getIntVal();
5232 if (LineNumber < 0)
5233 return TokError("line number less than zero in '.cv_loc' directive");
5234 Lex();
5237 int64_t ColumnPos = 0;
5238 if (getLexer().is(AsmToken::Integer)) {
5239 ColumnPos = getTok().getIntVal();
5240 if (ColumnPos < 0)
5241 return TokError("column position less than zero in '.cv_loc' directive");
5242 Lex();
5245 bool PrologueEnd = false;
5246 uint64_t IsStmt = 0;
5248 auto parseOp = [&]() -> bool {
5249 StringRef Name;
5250 SMLoc Loc = getTok().getLoc();
5251 if (parseIdentifier(Name))
5252 return TokError("unexpected token in '.cv_loc' directive");
5253 if (Name == "prologue_end")
5254 PrologueEnd = true;
5255 else if (Name == "is_stmt") {
5256 Loc = getTok().getLoc();
5257 const MCExpr *Value;
5258 if (parseExpression(Value))
5259 return true;
5260 // The expression must be the constant 0 or 1.
5261 IsStmt = ~0ULL;
5262 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5263 IsStmt = MCE->getValue();
5265 if (IsStmt > 1)
5266 return Error(Loc, "is_stmt value not 0 or 1");
5267 } else {
5268 return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5270 return false;
5273 if (parseMany(parseOp, false /*hasComma*/))
5274 return true;
5276 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5277 ColumnPos, PrologueEnd, IsStmt, StringRef(),
5278 DirectiveLoc);
5279 return false;
5282 /// parseDirectiveCVLinetable
5283 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
5284 bool MasmParser::parseDirectiveCVLinetable() {
5285 int64_t FunctionId;
5286 StringRef FnStartName, FnEndName;
5287 SMLoc Loc = getTok().getLoc();
5288 if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5289 parseToken(AsmToken::Comma,
5290 "unexpected token in '.cv_linetable' directive") ||
5291 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5292 "expected identifier in directive") ||
5293 parseToken(AsmToken::Comma,
5294 "unexpected token in '.cv_linetable' directive") ||
5295 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5296 "expected identifier in directive"))
5297 return true;
5299 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5300 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5302 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5303 return false;
5306 /// parseDirectiveCVInlineLinetable
5307 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
5308 bool MasmParser::parseDirectiveCVInlineLinetable() {
5309 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5310 StringRef FnStartName, FnEndName;
5311 SMLoc Loc = getTok().getLoc();
5312 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5313 parseTokenLoc(Loc) ||
5314 parseIntToken(
5315 SourceFileId,
5316 "expected SourceField in '.cv_inline_linetable' directive") ||
5317 check(SourceFileId <= 0, Loc,
5318 "File id less than zero in '.cv_inline_linetable' directive") ||
5319 parseTokenLoc(Loc) ||
5320 parseIntToken(
5321 SourceLineNum,
5322 "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5323 check(SourceLineNum < 0, Loc,
5324 "Line number less than zero in '.cv_inline_linetable' directive") ||
5325 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5326 "expected identifier in directive") ||
5327 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5328 "expected identifier in directive"))
5329 return true;
5331 if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5332 return true;
5334 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5335 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5336 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5337 SourceLineNum, FnStartSym,
5338 FnEndSym);
5339 return false;
5342 void MasmParser::initializeCVDefRangeTypeMap() {
5343 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5344 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5345 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5346 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5349 /// parseDirectiveCVDefRange
5350 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
5351 bool MasmParser::parseDirectiveCVDefRange() {
5352 SMLoc Loc;
5353 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5354 while (getLexer().is(AsmToken::Identifier)) {
5355 Loc = getLexer().getLoc();
5356 StringRef GapStartName;
5357 if (parseIdentifier(GapStartName))
5358 return Error(Loc, "expected identifier in directive");
5359 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5361 Loc = getLexer().getLoc();
5362 StringRef GapEndName;
5363 if (parseIdentifier(GapEndName))
5364 return Error(Loc, "expected identifier in directive");
5365 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5367 Ranges.push_back({GapStartSym, GapEndSym});
5370 StringRef CVDefRangeTypeStr;
5371 if (parseToken(
5372 AsmToken::Comma,
5373 "expected comma before def_range type in .cv_def_range directive") ||
5374 parseIdentifier(CVDefRangeTypeStr))
5375 return Error(Loc, "expected def_range type in directive");
5377 StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5378 CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5379 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5380 ? CVDR_DEFRANGE
5381 : CVTypeIt->getValue();
5382 switch (CVDRType) {
5383 case CVDR_DEFRANGE_REGISTER: {
5384 int64_t DRRegister;
5385 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5386 ".cv_def_range directive") ||
5387 parseAbsoluteExpression(DRRegister))
5388 return Error(Loc, "expected register number");
5390 codeview::DefRangeRegisterHeader DRHdr;
5391 DRHdr.Register = DRRegister;
5392 DRHdr.MayHaveNoName = 0;
5393 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5394 break;
5396 case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5397 int64_t DROffset;
5398 if (parseToken(AsmToken::Comma,
5399 "expected comma before offset in .cv_def_range directive") ||
5400 parseAbsoluteExpression(DROffset))
5401 return Error(Loc, "expected offset value");
5403 codeview::DefRangeFramePointerRelHeader DRHdr;
5404 DRHdr.Offset = DROffset;
5405 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5406 break;
5408 case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5409 int64_t DRRegister;
5410 int64_t DROffsetInParent;
5411 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5412 ".cv_def_range directive") ||
5413 parseAbsoluteExpression(DRRegister))
5414 return Error(Loc, "expected register number");
5415 if (parseToken(AsmToken::Comma,
5416 "expected comma before offset in .cv_def_range directive") ||
5417 parseAbsoluteExpression(DROffsetInParent))
5418 return Error(Loc, "expected offset value");
5420 codeview::DefRangeSubfieldRegisterHeader DRHdr;
5421 DRHdr.Register = DRRegister;
5422 DRHdr.MayHaveNoName = 0;
5423 DRHdr.OffsetInParent = DROffsetInParent;
5424 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5425 break;
5427 case CVDR_DEFRANGE_REGISTER_REL: {
5428 int64_t DRRegister;
5429 int64_t DRFlags;
5430 int64_t DRBasePointerOffset;
5431 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5432 ".cv_def_range directive") ||
5433 parseAbsoluteExpression(DRRegister))
5434 return Error(Loc, "expected register value");
5435 if (parseToken(
5436 AsmToken::Comma,
5437 "expected comma before flag value in .cv_def_range directive") ||
5438 parseAbsoluteExpression(DRFlags))
5439 return Error(Loc, "expected flag value");
5440 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5441 "in .cv_def_range directive") ||
5442 parseAbsoluteExpression(DRBasePointerOffset))
5443 return Error(Loc, "expected base pointer offset value");
5445 codeview::DefRangeRegisterRelHeader DRHdr;
5446 DRHdr.Register = DRRegister;
5447 DRHdr.Flags = DRFlags;
5448 DRHdr.BasePointerOffset = DRBasePointerOffset;
5449 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5450 break;
5452 default:
5453 return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5455 return true;
5458 /// parseDirectiveCVString
5459 /// ::= .cv_stringtable "string"
5460 bool MasmParser::parseDirectiveCVString() {
5461 std::string Data;
5462 if (checkForValidSection() || parseEscapedString(Data))
5463 return addErrorSuffix(" in '.cv_string' directive");
5465 // Put the string in the table and emit the offset.
5466 std::pair<StringRef, unsigned> Insertion =
5467 getCVContext().addToStringTable(Data);
5468 getStreamer().emitIntValue(Insertion.second, 4);
5469 return false;
5472 /// parseDirectiveCVStringTable
5473 /// ::= .cv_stringtable
5474 bool MasmParser::parseDirectiveCVStringTable() {
5475 getStreamer().emitCVStringTableDirective();
5476 return false;
5479 /// parseDirectiveCVFileChecksums
5480 /// ::= .cv_filechecksums
5481 bool MasmParser::parseDirectiveCVFileChecksums() {
5482 getStreamer().emitCVFileChecksumsDirective();
5483 return false;
5486 /// parseDirectiveCVFileChecksumOffset
5487 /// ::= .cv_filechecksumoffset fileno
5488 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5489 int64_t FileNo;
5490 if (parseIntToken(FileNo, "expected identifier in directive"))
5491 return true;
5492 if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
5493 return true;
5494 getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5495 return false;
5498 /// parseDirectiveCVFPOData
5499 /// ::= .cv_fpo_data procsym
5500 bool MasmParser::parseDirectiveCVFPOData() {
5501 SMLoc DirLoc = getLexer().getLoc();
5502 StringRef ProcName;
5503 if (parseIdentifier(ProcName))
5504 return TokError("expected symbol name");
5505 if (parseEOL("unexpected tokens"))
5506 return addErrorSuffix(" in '.cv_fpo_data' directive");
5507 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5508 getStreamer().EmitCVFPOData(ProcSym, DirLoc);
5509 return false;
5512 /// parseDirectiveCFISections
5513 /// ::= .cfi_sections section [, section]
5514 bool MasmParser::parseDirectiveCFISections() {
5515 StringRef Name;
5516 bool EH = false;
5517 bool Debug = false;
5519 if (parseIdentifier(Name))
5520 return TokError("Expected an identifier");
5522 if (Name == ".eh_frame")
5523 EH = true;
5524 else if (Name == ".debug_frame")
5525 Debug = true;
5527 if (getLexer().is(AsmToken::Comma)) {
5528 Lex();
5530 if (parseIdentifier(Name))
5531 return TokError("Expected an identifier");
5533 if (Name == ".eh_frame")
5534 EH = true;
5535 else if (Name == ".debug_frame")
5536 Debug = true;
5539 getStreamer().emitCFISections(EH, Debug);
5540 return false;
5543 /// parseDirectiveCFIStartProc
5544 /// ::= .cfi_startproc [simple]
5545 bool MasmParser::parseDirectiveCFIStartProc() {
5546 StringRef Simple;
5547 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5548 if (check(parseIdentifier(Simple) || Simple != "simple",
5549 "unexpected token") ||
5550 parseToken(AsmToken::EndOfStatement))
5551 return addErrorSuffix(" in '.cfi_startproc' directive");
5554 // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5555 // being produced if this directive is emitted as part of preprocessor macro
5556 // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5557 // Tools like llvm-mc on the other hand are not affected by it, and report
5558 // correct context information.
5559 getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5560 return false;
5563 /// parseDirectiveCFIEndProc
5564 /// ::= .cfi_endproc
5565 bool MasmParser::parseDirectiveCFIEndProc() {
5566 getStreamer().emitCFIEndProc();
5567 return false;
5570 /// parse register name or number.
5571 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5572 SMLoc DirectiveLoc) {
5573 unsigned RegNo;
5575 if (getLexer().isNot(AsmToken::Integer)) {
5576 if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5577 return true;
5578 Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5579 } else
5580 return parseAbsoluteExpression(Register);
5582 return false;
5585 /// parseDirectiveCFIDefCfa
5586 /// ::= .cfi_def_cfa register, offset
5587 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5588 int64_t Register = 0, Offset = 0;
5589 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5590 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5591 parseAbsoluteExpression(Offset))
5592 return true;
5594 getStreamer().emitCFIDefCfa(Register, Offset);
5595 return false;
5598 /// parseDirectiveCFIDefCfaOffset
5599 /// ::= .cfi_def_cfa_offset offset
5600 bool MasmParser::parseDirectiveCFIDefCfaOffset() {
5601 int64_t Offset = 0;
5602 if (parseAbsoluteExpression(Offset))
5603 return true;
5605 getStreamer().emitCFIDefCfaOffset(Offset);
5606 return false;
5609 /// parseDirectiveCFIRegister
5610 /// ::= .cfi_register register, register
5611 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5612 int64_t Register1 = 0, Register2 = 0;
5613 if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5614 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5615 parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5616 return true;
5618 getStreamer().emitCFIRegister(Register1, Register2);
5619 return false;
5622 /// parseDirectiveCFIWindowSave
5623 /// ::= .cfi_window_save
5624 bool MasmParser::parseDirectiveCFIWindowSave() {
5625 getStreamer().emitCFIWindowSave();
5626 return false;
5629 /// parseDirectiveCFIAdjustCfaOffset
5630 /// ::= .cfi_adjust_cfa_offset adjustment
5631 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
5632 int64_t Adjustment = 0;
5633 if (parseAbsoluteExpression(Adjustment))
5634 return true;
5636 getStreamer().emitCFIAdjustCfaOffset(Adjustment);
5637 return false;
5640 /// parseDirectiveCFIDefCfaRegister
5641 /// ::= .cfi_def_cfa_register register
5642 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5643 int64_t Register = 0;
5644 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5645 return true;
5647 getStreamer().emitCFIDefCfaRegister(Register);
5648 return false;
5651 /// parseDirectiveCFIOffset
5652 /// ::= .cfi_offset register, offset
5653 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5654 int64_t Register = 0;
5655 int64_t Offset = 0;
5657 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5658 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5659 parseAbsoluteExpression(Offset))
5660 return true;
5662 getStreamer().emitCFIOffset(Register, Offset);
5663 return false;
5666 /// parseDirectiveCFIRelOffset
5667 /// ::= .cfi_rel_offset register, offset
5668 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5669 int64_t Register = 0, Offset = 0;
5671 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5672 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5673 parseAbsoluteExpression(Offset))
5674 return true;
5676 getStreamer().emitCFIRelOffset(Register, Offset);
5677 return false;
5680 static bool isValidEncoding(int64_t Encoding) {
5681 if (Encoding & ~0xff)
5682 return false;
5684 if (Encoding == dwarf::DW_EH_PE_omit)
5685 return true;
5687 const unsigned Format = Encoding & 0xf;
5688 if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5689 Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5690 Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5691 Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5692 return false;
5694 const unsigned Application = Encoding & 0x70;
5695 if (Application != dwarf::DW_EH_PE_absptr &&
5696 Application != dwarf::DW_EH_PE_pcrel)
5697 return false;
5699 return true;
5702 /// parseDirectiveCFIPersonalityOrLsda
5703 /// IsPersonality true for cfi_personality, false for cfi_lsda
5704 /// ::= .cfi_personality encoding, [symbol_name]
5705 /// ::= .cfi_lsda encoding, [symbol_name]
5706 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5707 int64_t Encoding = 0;
5708 if (parseAbsoluteExpression(Encoding))
5709 return true;
5710 if (Encoding == dwarf::DW_EH_PE_omit)
5711 return false;
5713 StringRef Name;
5714 if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5715 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5716 check(parseIdentifier(Name), "expected identifier in directive"))
5717 return true;
5719 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5721 if (IsPersonality)
5722 getStreamer().emitCFIPersonality(Sym, Encoding);
5723 else
5724 getStreamer().emitCFILsda(Sym, Encoding);
5725 return false;
5728 /// parseDirectiveCFIRememberState
5729 /// ::= .cfi_remember_state
5730 bool MasmParser::parseDirectiveCFIRememberState() {
5731 getStreamer().emitCFIRememberState();
5732 return false;
5735 /// parseDirectiveCFIRestoreState
5736 /// ::= .cfi_remember_state
5737 bool MasmParser::parseDirectiveCFIRestoreState() {
5738 getStreamer().emitCFIRestoreState();
5739 return false;
5742 /// parseDirectiveCFISameValue
5743 /// ::= .cfi_same_value register
5744 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5745 int64_t Register = 0;
5747 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5748 return true;
5750 getStreamer().emitCFISameValue(Register);
5751 return false;
5754 /// parseDirectiveCFIRestore
5755 /// ::= .cfi_restore register
5756 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5757 int64_t Register = 0;
5758 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5759 return true;
5761 getStreamer().emitCFIRestore(Register);
5762 return false;
5765 /// parseDirectiveCFIEscape
5766 /// ::= .cfi_escape expression[,...]
5767 bool MasmParser::parseDirectiveCFIEscape() {
5768 std::string Values;
5769 int64_t CurrValue;
5770 if (parseAbsoluteExpression(CurrValue))
5771 return true;
5773 Values.push_back((uint8_t)CurrValue);
5775 while (getLexer().is(AsmToken::Comma)) {
5776 Lex();
5778 if (parseAbsoluteExpression(CurrValue))
5779 return true;
5781 Values.push_back((uint8_t)CurrValue);
5784 getStreamer().emitCFIEscape(Values);
5785 return false;
5788 /// parseDirectiveCFIReturnColumn
5789 /// ::= .cfi_return_column register
5790 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5791 int64_t Register = 0;
5792 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5793 return true;
5794 getStreamer().emitCFIReturnColumn(Register);
5795 return false;
5798 /// parseDirectiveCFISignalFrame
5799 /// ::= .cfi_signal_frame
5800 bool MasmParser::parseDirectiveCFISignalFrame() {
5801 if (parseToken(AsmToken::EndOfStatement,
5802 "unexpected token in '.cfi_signal_frame'"))
5803 return true;
5805 getStreamer().emitCFISignalFrame();
5806 return false;
5809 /// parseDirectiveCFIUndefined
5810 /// ::= .cfi_undefined register
5811 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5812 int64_t Register = 0;
5814 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5815 return true;
5817 getStreamer().emitCFIUndefined(Register);
5818 return false;
5821 /// parseDirectiveMacro
5822 /// ::= name macro [parameters]
5823 /// ["LOCAL" identifiers]
5824 /// parameters ::= parameter [, parameter]*
5825 /// parameter ::= name ":" qualifier
5826 /// qualifier ::= "req" | "vararg" | "=" macro_argument
5827 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5828 MCAsmMacroParameters Parameters;
5829 while (getLexer().isNot(AsmToken::EndOfStatement)) {
5830 if (!Parameters.empty() && Parameters.back().Vararg)
5831 return Error(Lexer.getLoc(),
5832 "Vararg parameter '" + Parameters.back().Name +
5833 "' should be last in the list of parameters");
5835 MCAsmMacroParameter Parameter;
5836 if (parseIdentifier(Parameter.Name))
5837 return TokError("expected identifier in 'macro' directive");
5839 // Emit an error if two (or more) named parameters share the same name.
5840 for (const MCAsmMacroParameter& CurrParam : Parameters)
5841 if (CurrParam.Name.equals_insensitive(Parameter.Name))
5842 return TokError("macro '" + Name + "' has multiple parameters"
5843 " named '" + Parameter.Name + "'");
5845 if (Lexer.is(AsmToken::Colon)) {
5846 Lex(); // consume ':'
5848 if (parseOptionalToken(AsmToken::Equal)) {
5849 // Default value
5850 SMLoc ParamLoc;
5852 ParamLoc = Lexer.getLoc();
5853 if (parseMacroArgument(nullptr, Parameter.Value))
5854 return true;
5855 } else {
5856 SMLoc QualLoc;
5857 StringRef Qualifier;
5859 QualLoc = Lexer.getLoc();
5860 if (parseIdentifier(Qualifier))
5861 return Error(QualLoc, "missing parameter qualifier for "
5862 "'" +
5863 Parameter.Name + "' in macro '" + Name +
5864 "'");
5866 if (Qualifier.equals_insensitive("req"))
5867 Parameter.Required = true;
5868 else if (Qualifier.equals_insensitive("vararg"))
5869 Parameter.Vararg = true;
5870 else
5871 return Error(QualLoc,
5872 Qualifier + " is not a valid parameter qualifier for '" +
5873 Parameter.Name + "' in macro '" + Name + "'");
5877 Parameters.push_back(std::move(Parameter));
5879 if (getLexer().is(AsmToken::Comma))
5880 Lex();
5883 // Eat just the end of statement.
5884 Lexer.Lex();
5886 std::vector<std::string> Locals;
5887 if (getTok().is(AsmToken::Identifier) &&
5888 getTok().getIdentifier().equals_insensitive("local")) {
5889 Lex(); // Eat the LOCAL directive.
5891 StringRef ID;
5892 while (true) {
5893 if (parseIdentifier(ID))
5894 return true;
5895 Locals.push_back(ID.lower());
5897 // If we see a comma, continue (and allow line continuation).
5898 if (!parseOptionalToken(AsmToken::Comma))
5899 break;
5900 parseOptionalToken(AsmToken::EndOfStatement);
5904 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5905 AsmToken EndToken, StartToken = getTok();
5906 unsigned MacroDepth = 0;
5907 bool IsMacroFunction = false;
5908 // Lex the macro definition.
5909 while (true) {
5910 // Ignore Lexing errors in macros.
5911 while (Lexer.is(AsmToken::Error)) {
5912 Lexer.Lex();
5915 // Check whether we have reached the end of the file.
5916 if (getLexer().is(AsmToken::Eof))
5917 return Error(NameLoc, "no matching 'endm' in definition");
5919 // Otherwise, check whether we have reached the 'endm'... and determine if
5920 // this is a macro function.
5921 if (getLexer().is(AsmToken::Identifier)) {
5922 if (getTok().getIdentifier().equals_insensitive("endm")) {
5923 if (MacroDepth == 0) { // Outermost macro.
5924 EndToken = getTok();
5925 Lexer.Lex();
5926 if (getLexer().isNot(AsmToken::EndOfStatement))
5927 return TokError("unexpected token in '" + EndToken.getIdentifier() +
5928 "' directive");
5929 break;
5930 } else {
5931 // Otherwise we just found the end of an inner macro.
5932 --MacroDepth;
5934 } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
5935 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
5936 IsMacroFunction = true;
5938 } else if (isMacroLikeDirective()) {
5939 // We allow nested macros. Those aren't instantiated until the
5940 // outermost macro is expanded so just ignore them for now.
5941 ++MacroDepth;
5945 // Otherwise, scan til the end of the statement.
5946 eatToEndOfStatement();
5949 if (getContext().lookupMacro(Name.lower())) {
5950 return Error(NameLoc, "macro '" + Name + "' is already defined");
5953 const char *BodyStart = StartToken.getLoc().getPointer();
5954 const char *BodyEnd = EndToken.getLoc().getPointer();
5955 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5956 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5957 IsMacroFunction);
5958 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5959 Macro.dump());
5960 getContext().defineMacro(Name.lower(), std::move(Macro));
5961 return false;
5964 /// parseDirectiveExitMacro
5965 /// ::= "exitm" [textitem]
5966 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5967 StringRef Directive,
5968 std::string &Value) {
5969 SMLoc EndLoc = getTok().getLoc();
5970 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5971 return Error(EndLoc,
5972 "unable to parse text item in '" + Directive + "' directive");
5973 eatToEndOfStatement();
5975 if (!isInsideMacroInstantiation())
5976 return TokError("unexpected '" + Directive + "' in file, "
5977 "no current macro definition");
5979 // Exit all conditionals that are active in the current macro.
5980 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5981 TheCondState = TheCondStack.back();
5982 TheCondStack.pop_back();
5985 handleMacroExit();
5986 return false;
5989 /// parseDirectiveEndMacro
5990 /// ::= endm
5991 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5992 if (getLexer().isNot(AsmToken::EndOfStatement))
5993 return TokError("unexpected token in '" + Directive + "' directive");
5995 // If we are inside a macro instantiation, terminate the current
5996 // instantiation.
5997 if (isInsideMacroInstantiation()) {
5998 handleMacroExit();
5999 return false;
6002 // Otherwise, this .endmacro is a stray entry in the file; well formed
6003 // .endmacro directives are handled during the macro definition parsing.
6004 return TokError("unexpected '" + Directive + "' in file, "
6005 "no current macro definition");
6008 /// parseDirectivePurgeMacro
6009 /// ::= purge identifier ( , identifier )*
6010 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
6011 StringRef Name;
6012 while (true) {
6013 SMLoc NameLoc;
6014 if (parseTokenLoc(NameLoc) ||
6015 check(parseIdentifier(Name), NameLoc,
6016 "expected identifier in 'purge' directive"))
6017 return true;
6019 DEBUG_WITH_TYPE("asm-macros", dbgs()
6020 << "Un-defining macro: " << Name << "\n");
6021 if (!getContext().lookupMacro(Name.lower()))
6022 return Error(NameLoc, "macro '" + Name + "' is not defined");
6023 getContext().undefineMacro(Name.lower());
6025 if (!parseOptionalToken(AsmToken::Comma))
6026 break;
6027 parseOptionalToken(AsmToken::EndOfStatement);
6030 return false;
6033 /// parseDirectiveSymbolAttribute
6034 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
6035 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
6036 auto parseOp = [&]() -> bool {
6037 StringRef Name;
6038 SMLoc Loc = getTok().getLoc();
6039 if (parseIdentifier(Name))
6040 return Error(Loc, "expected identifier");
6041 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6043 // Assembler local symbols don't make any sense here. Complain loudly.
6044 if (Sym->isTemporary())
6045 return Error(Loc, "non-local symbol required");
6047 if (!getStreamer().emitSymbolAttribute(Sym, Attr))
6048 return Error(Loc, "unable to emit symbol attribute");
6049 return false;
6052 if (parseMany(parseOp))
6053 return addErrorSuffix(" in directive");
6054 return false;
6057 /// parseDirectiveComm
6058 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
6059 bool MasmParser::parseDirectiveComm(bool IsLocal) {
6060 if (checkForValidSection())
6061 return true;
6063 SMLoc IDLoc = getLexer().getLoc();
6064 StringRef Name;
6065 if (parseIdentifier(Name))
6066 return TokError("expected identifier in directive");
6068 // Handle the identifier as the key symbol.
6069 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6071 if (getLexer().isNot(AsmToken::Comma))
6072 return TokError("unexpected token in directive");
6073 Lex();
6075 int64_t Size;
6076 SMLoc SizeLoc = getLexer().getLoc();
6077 if (parseAbsoluteExpression(Size))
6078 return true;
6080 int64_t Pow2Alignment = 0;
6081 SMLoc Pow2AlignmentLoc;
6082 if (getLexer().is(AsmToken::Comma)) {
6083 Lex();
6084 Pow2AlignmentLoc = getLexer().getLoc();
6085 if (parseAbsoluteExpression(Pow2Alignment))
6086 return true;
6088 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
6089 if (IsLocal && LCOMM == LCOMM::NoAlignment)
6090 return Error(Pow2AlignmentLoc, "alignment not supported on this target");
6092 // If this target takes alignments in bytes (not log) validate and convert.
6093 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
6094 (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
6095 if (!isPowerOf2_64(Pow2Alignment))
6096 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
6097 Pow2Alignment = Log2_64(Pow2Alignment);
6101 if (parseToken(AsmToken::EndOfStatement,
6102 "unexpected token in '.comm' or '.lcomm' directive"))
6103 return true;
6105 // NOTE: a size of zero for a .comm should create a undefined symbol
6106 // but a size of .lcomm creates a bss symbol of size zero.
6107 if (Size < 0)
6108 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
6109 "be less than zero");
6111 // NOTE: The alignment in the directive is a power of 2 value, the assembler
6112 // may internally end up wanting an alignment in bytes.
6113 // FIXME: Diagnose overflow.
6114 if (Pow2Alignment < 0)
6115 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
6116 "alignment, can't be less than zero");
6118 Sym->redefineIfPossible();
6119 if (!Sym->isUndefined())
6120 return Error(IDLoc, "invalid symbol redefinition");
6122 // Create the Symbol as a common or local common with Size and Pow2Alignment.
6123 if (IsLocal) {
6124 getStreamer().emitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
6125 return false;
6128 getStreamer().emitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
6129 return false;
6132 /// parseDirectiveComment
6133 /// ::= comment delimiter [[text]]
6134 /// [[text]]
6135 /// [[text]] delimiter [[text]]
6136 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
6137 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
6138 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
6139 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
6140 if (Delimiter.empty())
6141 return Error(DirectiveLoc, "no delimiter in 'comment' directive");
6142 do {
6143 if (getTok().is(AsmToken::Eof))
6144 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
6145 Lex(); // eat end of statement
6146 } while (
6147 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
6148 return parseToken(AsmToken::EndOfStatement,
6149 "unexpected token in 'comment' directive");
6152 /// parseDirectiveInclude
6153 /// ::= include <filename>
6154 /// | include filename
6155 bool MasmParser::parseDirectiveInclude() {
6156 // Allow the strings to have escaped octal character sequence.
6157 std::string Filename;
6158 SMLoc IncludeLoc = getTok().getLoc();
6160 if (parseAngleBracketString(Filename))
6161 Filename = parseStringTo(AsmToken::EndOfStatement);
6162 if (check(Filename.empty(), "missing filename in 'include' directive") ||
6163 check(getTok().isNot(AsmToken::EndOfStatement),
6164 "unexpected token in 'include' directive") ||
6165 // Attempt to switch the lexer to the included file before consuming the
6166 // end of statement to avoid losing it when we switch.
6167 check(enterIncludeFile(Filename), IncludeLoc,
6168 "Could not find include file '" + Filename + "'"))
6169 return true;
6171 return false;
6174 /// parseDirectiveIf
6175 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
6176 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
6177 TheCondStack.push_back(TheCondState);
6178 TheCondState.TheCond = AsmCond::IfCond;
6179 if (TheCondState.Ignore) {
6180 eatToEndOfStatement();
6181 } else {
6182 int64_t ExprValue;
6183 if (parseAbsoluteExpression(ExprValue) ||
6184 parseToken(AsmToken::EndOfStatement,
6185 "unexpected token in '.if' directive"))
6186 return true;
6188 switch (DirKind) {
6189 default:
6190 llvm_unreachable("unsupported directive");
6191 case DK_IF:
6192 break;
6193 case DK_IFE:
6194 ExprValue = ExprValue == 0;
6195 break;
6198 TheCondState.CondMet = ExprValue;
6199 TheCondState.Ignore = !TheCondState.CondMet;
6202 return false;
6205 /// parseDirectiveIfb
6206 /// ::= .ifb textitem
6207 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6208 TheCondStack.push_back(TheCondState);
6209 TheCondState.TheCond = AsmCond::IfCond;
6211 if (TheCondState.Ignore) {
6212 eatToEndOfStatement();
6213 } else {
6214 std::string Str;
6215 if (parseTextItem(Str))
6216 return TokError("expected text item parameter for 'ifb' directive");
6218 if (parseToken(AsmToken::EndOfStatement,
6219 "unexpected token in 'ifb' directive"))
6220 return true;
6222 TheCondState.CondMet = ExpectBlank == Str.empty();
6223 TheCondState.Ignore = !TheCondState.CondMet;
6226 return false;
6229 /// parseDirectiveIfidn
6230 /// ::= ifidn textitem, textitem
6231 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6232 bool CaseInsensitive) {
6233 std::string String1, String2;
6235 if (parseTextItem(String1)) {
6236 if (ExpectEqual)
6237 return TokError("expected text item parameter for 'ifidn' directive");
6238 return TokError("expected text item parameter for 'ifdif' directive");
6241 if (Lexer.isNot(AsmToken::Comma)) {
6242 if (ExpectEqual)
6243 return TokError(
6244 "expected comma after first string for 'ifidn' directive");
6245 return TokError("expected comma after first string for 'ifdif' directive");
6247 Lex();
6249 if (parseTextItem(String2)) {
6250 if (ExpectEqual)
6251 return TokError("expected text item parameter for 'ifidn' directive");
6252 return TokError("expected text item parameter for 'ifdif' directive");
6255 TheCondStack.push_back(TheCondState);
6256 TheCondState.TheCond = AsmCond::IfCond;
6257 if (CaseInsensitive)
6258 TheCondState.CondMet =
6259 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6260 else
6261 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6262 TheCondState.Ignore = !TheCondState.CondMet;
6264 return false;
6267 /// parseDirectiveIfdef
6268 /// ::= ifdef symbol
6269 /// | ifdef variable
6270 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
6271 TheCondStack.push_back(TheCondState);
6272 TheCondState.TheCond = AsmCond::IfCond;
6274 if (TheCondState.Ignore) {
6275 eatToEndOfStatement();
6276 } else {
6277 bool is_defined = false;
6278 unsigned RegNo;
6279 SMLoc StartLoc, EndLoc;
6280 is_defined = (getTargetParser().tryParseRegister(
6281 RegNo, StartLoc, EndLoc) == MatchOperand_Success);
6282 if (!is_defined) {
6283 StringRef Name;
6284 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
6285 parseToken(AsmToken::EndOfStatement, "unexpected token in 'ifdef'"))
6286 return true;
6288 if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
6289 is_defined = true;
6290 } else if (Variables.find(Name.lower()) != Variables.end()) {
6291 is_defined = true;
6292 } else {
6293 MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
6294 is_defined = (Sym && !Sym->isUndefined(false));
6298 TheCondState.CondMet = (is_defined == expect_defined);
6299 TheCondState.Ignore = !TheCondState.CondMet;
6302 return false;
6305 /// parseDirectiveElseIf
6306 /// ::= elseif expression
6307 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
6308 DirectiveKind DirKind) {
6309 if (TheCondState.TheCond != AsmCond::IfCond &&
6310 TheCondState.TheCond != AsmCond::ElseIfCond)
6311 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
6312 " .if or an .elseif");
6313 TheCondState.TheCond = AsmCond::ElseIfCond;
6315 bool LastIgnoreState = false;
6316 if (!TheCondStack.empty())
6317 LastIgnoreState = TheCondStack.back().Ignore;
6318 if (LastIgnoreState || TheCondState.CondMet) {
6319 TheCondState.Ignore = true;
6320 eatToEndOfStatement();
6321 } else {
6322 int64_t ExprValue;
6323 if (parseAbsoluteExpression(ExprValue))
6324 return true;
6326 if (parseToken(AsmToken::EndOfStatement,
6327 "unexpected token in '.elseif' directive"))
6328 return true;
6330 switch (DirKind) {
6331 default:
6332 llvm_unreachable("unsupported directive");
6333 case DK_ELSEIF:
6334 break;
6335 case DK_ELSEIFE:
6336 ExprValue = ExprValue == 0;
6337 break;
6340 TheCondState.CondMet = ExprValue;
6341 TheCondState.Ignore = !TheCondState.CondMet;
6344 return false;
6347 /// parseDirectiveElseIfb
6348 /// ::= elseifb textitem
6349 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6350 if (TheCondState.TheCond != AsmCond::IfCond &&
6351 TheCondState.TheCond != AsmCond::ElseIfCond)
6352 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6353 " if or an elseif");
6354 TheCondState.TheCond = AsmCond::ElseIfCond;
6356 bool LastIgnoreState = false;
6357 if (!TheCondStack.empty())
6358 LastIgnoreState = TheCondStack.back().Ignore;
6359 if (LastIgnoreState || TheCondState.CondMet) {
6360 TheCondState.Ignore = true;
6361 eatToEndOfStatement();
6362 } else {
6363 std::string Str;
6364 if (parseTextItem(Str)) {
6365 if (ExpectBlank)
6366 return TokError("expected text item parameter for 'elseifb' directive");
6367 return TokError("expected text item parameter for 'elseifnb' directive");
6370 if (parseToken(AsmToken::EndOfStatement,
6371 "unexpected token in 'elseifb' directive"))
6372 return true;
6374 TheCondState.CondMet = ExpectBlank == Str.empty();
6375 TheCondState.Ignore = !TheCondState.CondMet;
6378 return false;
6381 /// parseDirectiveElseIfdef
6382 /// ::= elseifdef symbol
6383 /// | elseifdef variable
6384 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6385 bool expect_defined) {
6386 if (TheCondState.TheCond != AsmCond::IfCond &&
6387 TheCondState.TheCond != AsmCond::ElseIfCond)
6388 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6389 " if or an elseif");
6390 TheCondState.TheCond = AsmCond::ElseIfCond;
6392 bool LastIgnoreState = false;
6393 if (!TheCondStack.empty())
6394 LastIgnoreState = TheCondStack.back().Ignore;
6395 if (LastIgnoreState || TheCondState.CondMet) {
6396 TheCondState.Ignore = true;
6397 eatToEndOfStatement();
6398 } else {
6399 bool is_defined = false;
6400 unsigned RegNo;
6401 SMLoc StartLoc, EndLoc;
6402 is_defined = (getTargetParser().tryParseRegister(RegNo, StartLoc, EndLoc) ==
6403 MatchOperand_Success);
6404 if (!is_defined) {
6405 StringRef Name;
6406 if (check(parseIdentifier(Name),
6407 "expected identifier after 'elseifdef'") ||
6408 parseToken(AsmToken::EndOfStatement,
6409 "unexpected token in 'elseifdef'"))
6410 return true;
6412 if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
6413 is_defined = true;
6414 } else if (Variables.find(Name.lower()) != Variables.end()) {
6415 is_defined = true;
6416 } else {
6417 MCSymbol *Sym = getContext().lookupSymbol(Name);
6418 is_defined = (Sym && !Sym->isUndefined(false));
6422 TheCondState.CondMet = (is_defined == expect_defined);
6423 TheCondState.Ignore = !TheCondState.CondMet;
6426 return false;
6429 /// parseDirectiveElseIfidn
6430 /// ::= elseifidn textitem, textitem
6431 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6432 bool CaseInsensitive) {
6433 if (TheCondState.TheCond != AsmCond::IfCond &&
6434 TheCondState.TheCond != AsmCond::ElseIfCond)
6435 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6436 " if or an elseif");
6437 TheCondState.TheCond = AsmCond::ElseIfCond;
6439 bool LastIgnoreState = false;
6440 if (!TheCondStack.empty())
6441 LastIgnoreState = TheCondStack.back().Ignore;
6442 if (LastIgnoreState || TheCondState.CondMet) {
6443 TheCondState.Ignore = true;
6444 eatToEndOfStatement();
6445 } else {
6446 std::string String1, String2;
6448 if (parseTextItem(String1)) {
6449 if (ExpectEqual)
6450 return TokError(
6451 "expected text item parameter for 'elseifidn' directive");
6452 return TokError("expected text item parameter for 'elseifdif' directive");
6455 if (Lexer.isNot(AsmToken::Comma)) {
6456 if (ExpectEqual)
6457 return TokError(
6458 "expected comma after first string for 'elseifidn' directive");
6459 return TokError(
6460 "expected comma after first string for 'elseifdif' directive");
6462 Lex();
6464 if (parseTextItem(String2)) {
6465 if (ExpectEqual)
6466 return TokError(
6467 "expected text item parameter for 'elseifidn' directive");
6468 return TokError("expected text item parameter for 'elseifdif' directive");
6471 if (CaseInsensitive)
6472 TheCondState.CondMet =
6473 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6474 else
6475 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6476 TheCondState.Ignore = !TheCondState.CondMet;
6479 return false;
6482 /// parseDirectiveElse
6483 /// ::= else
6484 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6485 if (parseToken(AsmToken::EndOfStatement,
6486 "unexpected token in 'else' directive"))
6487 return true;
6489 if (TheCondState.TheCond != AsmCond::IfCond &&
6490 TheCondState.TheCond != AsmCond::ElseIfCond)
6491 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6492 " or an elseif");
6493 TheCondState.TheCond = AsmCond::ElseCond;
6494 bool LastIgnoreState = false;
6495 if (!TheCondStack.empty())
6496 LastIgnoreState = TheCondStack.back().Ignore;
6497 if (LastIgnoreState || TheCondState.CondMet)
6498 TheCondState.Ignore = true;
6499 else
6500 TheCondState.Ignore = false;
6502 return false;
6505 /// parseDirectiveEnd
6506 /// ::= end
6507 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6508 if (parseToken(AsmToken::EndOfStatement,
6509 "unexpected token in 'end' directive"))
6510 return true;
6512 while (Lexer.isNot(AsmToken::Eof))
6513 Lexer.Lex();
6515 return false;
6518 /// parseDirectiveError
6519 /// ::= .err [message]
6520 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6521 if (!TheCondStack.empty()) {
6522 if (TheCondStack.back().Ignore) {
6523 eatToEndOfStatement();
6524 return false;
6528 std::string Message = ".err directive invoked in source file";
6529 if (Lexer.isNot(AsmToken::EndOfStatement))
6530 Message = parseStringTo(AsmToken::EndOfStatement);
6531 Lex();
6533 return Error(DirectiveLoc, Message);
6536 /// parseDirectiveErrorIfb
6537 /// ::= .errb textitem[, message]
6538 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6539 if (!TheCondStack.empty()) {
6540 if (TheCondStack.back().Ignore) {
6541 eatToEndOfStatement();
6542 return false;
6546 std::string Text;
6547 if (parseTextItem(Text))
6548 return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6550 std::string Message = ".errb directive invoked in source file";
6551 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6552 if (parseToken(AsmToken::Comma))
6553 return addErrorSuffix(" in '.errb' directive");
6554 Message = parseStringTo(AsmToken::EndOfStatement);
6556 Lex();
6558 if (Text.empty() == ExpectBlank)
6559 return Error(DirectiveLoc, Message);
6560 return false;
6563 /// parseDirectiveErrorIfdef
6564 /// ::= .errdef name[, message]
6565 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6566 bool ExpectDefined) {
6567 if (!TheCondStack.empty()) {
6568 if (TheCondStack.back().Ignore) {
6569 eatToEndOfStatement();
6570 return false;
6574 bool IsDefined = false;
6575 unsigned RegNo;
6576 SMLoc StartLoc, EndLoc;
6577 IsDefined = (getTargetParser().tryParseRegister(RegNo, StartLoc, EndLoc) ==
6578 MatchOperand_Success);
6579 if (!IsDefined) {
6580 StringRef Name;
6581 if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6582 return true;
6584 if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
6585 IsDefined = true;
6586 } else if (Variables.find(Name.lower()) != Variables.end()) {
6587 IsDefined = true;
6588 } else {
6589 MCSymbol *Sym = getContext().lookupSymbol(Name);
6590 IsDefined = (Sym && !Sym->isUndefined(false));
6594 std::string Message = ".errdef directive invoked in source file";
6595 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6596 if (parseToken(AsmToken::Comma))
6597 return addErrorSuffix(" in '.errdef' directive");
6598 Message = parseStringTo(AsmToken::EndOfStatement);
6600 Lex();
6602 if (IsDefined == ExpectDefined)
6603 return Error(DirectiveLoc, Message);
6604 return false;
6607 /// parseDirectiveErrorIfidn
6608 /// ::= .erridn textitem, textitem[, message]
6609 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6610 bool CaseInsensitive) {
6611 if (!TheCondStack.empty()) {
6612 if (TheCondStack.back().Ignore) {
6613 eatToEndOfStatement();
6614 return false;
6618 std::string String1, String2;
6620 if (parseTextItem(String1)) {
6621 if (ExpectEqual)
6622 return TokError("expected string parameter for '.erridn' directive");
6623 return TokError("expected string parameter for '.errdif' directive");
6626 if (Lexer.isNot(AsmToken::Comma)) {
6627 if (ExpectEqual)
6628 return TokError(
6629 "expected comma after first string for '.erridn' directive");
6630 return TokError(
6631 "expected comma after first string for '.errdif' directive");
6633 Lex();
6635 if (parseTextItem(String2)) {
6636 if (ExpectEqual)
6637 return TokError("expected string parameter for '.erridn' directive");
6638 return TokError("expected string parameter for '.errdif' directive");
6641 std::string Message;
6642 if (ExpectEqual)
6643 Message = ".erridn directive invoked in source file";
6644 else
6645 Message = ".errdif directive invoked in source file";
6646 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6647 if (parseToken(AsmToken::Comma))
6648 return addErrorSuffix(" in '.erridn' directive");
6649 Message = parseStringTo(AsmToken::EndOfStatement);
6651 Lex();
6653 if (CaseInsensitive)
6654 TheCondState.CondMet =
6655 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6656 else
6657 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6658 TheCondState.Ignore = !TheCondState.CondMet;
6660 if ((CaseInsensitive &&
6661 ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
6662 (ExpectEqual == (String1 == String2)))
6663 return Error(DirectiveLoc, Message);
6664 return false;
6667 /// parseDirectiveErrorIfe
6668 /// ::= .erre expression[, message]
6669 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6670 if (!TheCondStack.empty()) {
6671 if (TheCondStack.back().Ignore) {
6672 eatToEndOfStatement();
6673 return false;
6677 int64_t ExprValue;
6678 if (parseAbsoluteExpression(ExprValue))
6679 return addErrorSuffix(" in '.erre' directive");
6681 std::string Message = ".erre directive invoked in source file";
6682 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6683 if (parseToken(AsmToken::Comma))
6684 return addErrorSuffix(" in '.erre' directive");
6685 Message = parseStringTo(AsmToken::EndOfStatement);
6687 Lex();
6689 if ((ExprValue == 0) == ExpectZero)
6690 return Error(DirectiveLoc, Message);
6691 return false;
6694 /// parseDirectiveEndIf
6695 /// ::= .endif
6696 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6697 if (parseToken(AsmToken::EndOfStatement,
6698 "unexpected token in '.endif' directive"))
6699 return true;
6701 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6702 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6703 "an .if or .else");
6704 if (!TheCondStack.empty()) {
6705 TheCondState = TheCondStack.back();
6706 TheCondStack.pop_back();
6709 return false;
6712 void MasmParser::initializeDirectiveKindMap() {
6713 DirectiveKindMap["="] = DK_ASSIGN;
6714 DirectiveKindMap["equ"] = DK_EQU;
6715 DirectiveKindMap["textequ"] = DK_TEXTEQU;
6716 // DirectiveKindMap[".ascii"] = DK_ASCII;
6717 // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6718 // DirectiveKindMap[".string"] = DK_STRING;
6719 DirectiveKindMap["byte"] = DK_BYTE;
6720 DirectiveKindMap["sbyte"] = DK_SBYTE;
6721 DirectiveKindMap["word"] = DK_WORD;
6722 DirectiveKindMap["sword"] = DK_SWORD;
6723 DirectiveKindMap["dword"] = DK_DWORD;
6724 DirectiveKindMap["sdword"] = DK_SDWORD;
6725 DirectiveKindMap["fword"] = DK_FWORD;
6726 DirectiveKindMap["qword"] = DK_QWORD;
6727 DirectiveKindMap["sqword"] = DK_SQWORD;
6728 DirectiveKindMap["real4"] = DK_REAL4;
6729 DirectiveKindMap["real8"] = DK_REAL8;
6730 DirectiveKindMap["real10"] = DK_REAL10;
6731 DirectiveKindMap["align"] = DK_ALIGN;
6732 DirectiveKindMap["even"] = DK_EVEN;
6733 DirectiveKindMap["org"] = DK_ORG;
6734 DirectiveKindMap["extern"] = DK_EXTERN;
6735 DirectiveKindMap["public"] = DK_PUBLIC;
6736 // DirectiveKindMap[".comm"] = DK_COMM;
6737 DirectiveKindMap["comment"] = DK_COMMENT;
6738 DirectiveKindMap["include"] = DK_INCLUDE;
6739 DirectiveKindMap["repeat"] = DK_REPEAT;
6740 DirectiveKindMap["rept"] = DK_REPEAT;
6741 DirectiveKindMap["while"] = DK_WHILE;
6742 DirectiveKindMap["for"] = DK_FOR;
6743 DirectiveKindMap["irp"] = DK_FOR;
6744 DirectiveKindMap["forc"] = DK_FORC;
6745 DirectiveKindMap["irpc"] = DK_FORC;
6746 DirectiveKindMap["if"] = DK_IF;
6747 DirectiveKindMap["ife"] = DK_IFE;
6748 DirectiveKindMap["ifb"] = DK_IFB;
6749 DirectiveKindMap["ifnb"] = DK_IFNB;
6750 DirectiveKindMap["ifdef"] = DK_IFDEF;
6751 DirectiveKindMap["ifndef"] = DK_IFNDEF;
6752 DirectiveKindMap["ifdif"] = DK_IFDIF;
6753 DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6754 DirectiveKindMap["ifidn"] = DK_IFIDN;
6755 DirectiveKindMap["ifidni"] = DK_IFIDNI;
6756 DirectiveKindMap["elseif"] = DK_ELSEIF;
6757 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6758 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6759 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6760 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6761 DirectiveKindMap["else"] = DK_ELSE;
6762 DirectiveKindMap["end"] = DK_END;
6763 DirectiveKindMap["endif"] = DK_ENDIF;
6764 // DirectiveKindMap[".file"] = DK_FILE;
6765 // DirectiveKindMap[".line"] = DK_LINE;
6766 // DirectiveKindMap[".loc"] = DK_LOC;
6767 // DirectiveKindMap[".stabs"] = DK_STABS;
6768 // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6769 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6770 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6771 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6772 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6773 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6774 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6775 // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6776 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6777 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6778 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6779 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6780 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6781 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6782 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6783 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6784 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6785 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6786 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6787 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6788 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6789 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6790 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6791 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6792 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6793 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6794 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6795 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6796 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6797 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6798 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6799 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6800 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6801 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6802 DirectiveKindMap["macro"] = DK_MACRO;
6803 DirectiveKindMap["exitm"] = DK_EXITM;
6804 DirectiveKindMap["endm"] = DK_ENDM;
6805 DirectiveKindMap["purge"] = DK_PURGE;
6806 DirectiveKindMap[".err"] = DK_ERR;
6807 DirectiveKindMap[".errb"] = DK_ERRB;
6808 DirectiveKindMap[".errnb"] = DK_ERRNB;
6809 DirectiveKindMap[".errdef"] = DK_ERRDEF;
6810 DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6811 DirectiveKindMap[".errdif"] = DK_ERRDIF;
6812 DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6813 DirectiveKindMap[".erridn"] = DK_ERRIDN;
6814 DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6815 DirectiveKindMap[".erre"] = DK_ERRE;
6816 DirectiveKindMap[".errnz"] = DK_ERRNZ;
6817 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6818 DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6819 DirectiveKindMap[".savereg"] = DK_SAVEREG;
6820 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6821 DirectiveKindMap[".setframe"] = DK_SETFRAME;
6822 DirectiveKindMap[".radix"] = DK_RADIX;
6823 DirectiveKindMap["db"] = DK_DB;
6824 DirectiveKindMap["dd"] = DK_DD;
6825 DirectiveKindMap["df"] = DK_DF;
6826 DirectiveKindMap["dq"] = DK_DQ;
6827 DirectiveKindMap["dw"] = DK_DW;
6828 DirectiveKindMap["echo"] = DK_ECHO;
6829 DirectiveKindMap["struc"] = DK_STRUCT;
6830 DirectiveKindMap["struct"] = DK_STRUCT;
6831 DirectiveKindMap["union"] = DK_UNION;
6832 DirectiveKindMap["ends"] = DK_ENDS;
6835 bool MasmParser::isMacroLikeDirective() {
6836 if (getLexer().is(AsmToken::Identifier)) {
6837 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6838 .CasesLower("repeat", "rept", true)
6839 .CaseLower("while", true)
6840 .CasesLower("for", "irp", true)
6841 .CasesLower("forc", "irpc", true)
6842 .Default(false);
6843 if (IsMacroLike)
6844 return true;
6846 if (peekTok().is(AsmToken::Identifier) &&
6847 peekTok().getIdentifier().equals_insensitive("macro"))
6848 return true;
6850 return false;
6853 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6854 AsmToken EndToken, StartToken = getTok();
6856 unsigned NestLevel = 0;
6857 while (true) {
6858 // Check whether we have reached the end of the file.
6859 if (getLexer().is(AsmToken::Eof)) {
6860 printError(DirectiveLoc, "no matching 'endm' in definition");
6861 return nullptr;
6864 if (isMacroLikeDirective())
6865 ++NestLevel;
6867 // Otherwise, check whether we have reached the endm.
6868 if (Lexer.is(AsmToken::Identifier) &&
6869 getTok().getIdentifier().equals_insensitive("endm")) {
6870 if (NestLevel == 0) {
6871 EndToken = getTok();
6872 Lex();
6873 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6874 printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6875 return nullptr;
6877 break;
6879 --NestLevel;
6882 // Otherwise, scan till the end of the statement.
6883 eatToEndOfStatement();
6886 const char *BodyStart = StartToken.getLoc().getPointer();
6887 const char *BodyEnd = EndToken.getLoc().getPointer();
6888 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6890 // We Are Anonymous.
6891 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6892 return &MacroLikeBodies.back();
6895 bool MasmParser::expandStatement(SMLoc Loc) {
6896 std::string Body = parseStringTo(AsmToken::EndOfStatement);
6897 SMLoc EndLoc = getTok().getLoc();
6899 MCAsmMacroParameters Parameters;
6900 MCAsmMacroArguments Arguments;
6902 StringMap<std::string> BuiltinValues;
6903 for (const auto &S : BuiltinSymbolMap) {
6904 const BuiltinSymbol &Sym = S.getValue();
6905 if (llvm::Optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
6906 BuiltinValues[S.getKey().lower()] = std::move(*Text);
6909 for (const auto &B : BuiltinValues) {
6910 MCAsmMacroParameter P;
6911 MCAsmMacroArgument A;
6912 P.Name = B.getKey();
6913 P.Required = true;
6914 A.push_back(AsmToken(AsmToken::String, B.getValue()));
6916 Parameters.push_back(std::move(P));
6917 Arguments.push_back(std::move(A));
6920 for (const auto &V : Variables) {
6921 const Variable &Var = V.getValue();
6922 if (Var.IsText) {
6923 MCAsmMacroParameter P;
6924 MCAsmMacroArgument A;
6925 P.Name = Var.Name;
6926 P.Required = true;
6927 A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6929 Parameters.push_back(std::move(P));
6930 Arguments.push_back(std::move(A));
6933 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6934 MCAsmMacro M = MacroLikeBodies.back();
6936 // Expand the statement in a new buffer.
6937 SmallString<80> Buf;
6938 raw_svector_ostream OS(Buf);
6939 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6940 return true;
6941 std::unique_ptr<MemoryBuffer> Expansion =
6942 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6944 // Jump to the expanded statement and prime the lexer.
6945 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6946 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6947 EndStatementAtEOFStack.push_back(false);
6948 Lex();
6949 return false;
6952 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6953 raw_svector_ostream &OS) {
6954 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6956 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6957 SMLoc ExitLoc,
6958 raw_svector_ostream &OS) {
6959 OS << "endm\n";
6961 std::unique_ptr<MemoryBuffer> Instantiation =
6962 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6964 // Create the macro instantiation object and add to the current macro
6965 // instantiation stack.
6966 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6967 ExitLoc, TheCondStack.size()};
6968 ActiveMacros.push_back(MI);
6970 // Jump to the macro instantiation and prime the lexer.
6971 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6972 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6973 EndStatementAtEOFStack.push_back(true);
6974 Lex();
6977 /// parseDirectiveRepeat
6978 /// ::= ("repeat" | "rept") count
6979 /// body
6980 /// endm
6981 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6982 const MCExpr *CountExpr;
6983 SMLoc CountLoc = getTok().getLoc();
6984 if (parseExpression(CountExpr))
6985 return true;
6987 int64_t Count;
6988 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6989 return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6992 if (check(Count < 0, CountLoc, "Count is negative") ||
6993 parseToken(AsmToken::EndOfStatement,
6994 "unexpected token in '" + Dir + "' directive"))
6995 return true;
6997 // Lex the repeat definition.
6998 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6999 if (!M)
7000 return true;
7002 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7003 // to hold the macro body with substitutions.
7004 SmallString<256> Buf;
7005 raw_svector_ostream OS(Buf);
7006 while (Count--) {
7007 if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
7008 return true;
7010 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7012 return false;
7015 /// parseDirectiveWhile
7016 /// ::= "while" expression
7017 /// body
7018 /// endm
7019 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
7020 const MCExpr *CondExpr;
7021 SMLoc CondLoc = getTok().getLoc();
7022 if (parseExpression(CondExpr))
7023 return true;
7025 // Lex the repeat definition.
7026 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7027 if (!M)
7028 return true;
7030 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7031 // to hold the macro body with substitutions.
7032 SmallString<256> Buf;
7033 raw_svector_ostream OS(Buf);
7034 int64_t Condition;
7035 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
7036 return Error(CondLoc, "expected absolute expression in 'while' directive");
7037 if (Condition) {
7038 // Instantiate the macro, then resume at this directive to recheck the
7039 // condition.
7040 if (expandMacro(OS, M->Body, None, None, M->Locals, getTok().getLoc()))
7041 return true;
7042 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
7045 return false;
7048 /// parseDirectiveFor
7049 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
7050 /// body
7051 /// endm
7052 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
7053 MCAsmMacroParameter Parameter;
7054 MCAsmMacroArguments A;
7055 if (check(parseIdentifier(Parameter.Name),
7056 "expected identifier in '" + Dir + "' directive"))
7057 return true;
7059 // Parse optional qualifier (default value, or "req")
7060 if (parseOptionalToken(AsmToken::Colon)) {
7061 if (parseOptionalToken(AsmToken::Equal)) {
7062 // Default value
7063 SMLoc ParamLoc;
7065 ParamLoc = Lexer.getLoc();
7066 if (parseMacroArgument(nullptr, Parameter.Value))
7067 return true;
7068 } else {
7069 SMLoc QualLoc;
7070 StringRef Qualifier;
7072 QualLoc = Lexer.getLoc();
7073 if (parseIdentifier(Qualifier))
7074 return Error(QualLoc, "missing parameter qualifier for "
7075 "'" +
7076 Parameter.Name + "' in '" + Dir +
7077 "' directive");
7079 if (Qualifier.equals_insensitive("req"))
7080 Parameter.Required = true;
7081 else
7082 return Error(QualLoc,
7083 Qualifier + " is not a valid parameter qualifier for '" +
7084 Parameter.Name + "' in '" + Dir + "' directive");
7088 if (parseToken(AsmToken::Comma,
7089 "expected comma in '" + Dir + "' directive") ||
7090 parseToken(AsmToken::Less,
7091 "values in '" + Dir +
7092 "' directive must be enclosed in angle brackets"))
7093 return true;
7095 while (true) {
7096 A.emplace_back();
7097 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
7098 return addErrorSuffix(" in arguments for '" + Dir + "' directive");
7100 // If we see a comma, continue, and allow line continuation.
7101 if (!parseOptionalToken(AsmToken::Comma))
7102 break;
7103 parseOptionalToken(AsmToken::EndOfStatement);
7106 if (parseToken(AsmToken::Greater,
7107 "values in '" + Dir +
7108 "' directive must be enclosed in angle brackets") ||
7109 parseToken(AsmToken::EndOfStatement, "expected End of Statement"))
7110 return true;
7112 // Lex the for definition.
7113 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7114 if (!M)
7115 return true;
7117 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7118 // to hold the macro body with substitutions.
7119 SmallString<256> Buf;
7120 raw_svector_ostream OS(Buf);
7122 for (const MCAsmMacroArgument &Arg : A) {
7123 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7124 return true;
7127 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7129 return false;
7132 /// parseDirectiveForc
7133 /// ::= ("forc" | "irpc") symbol, <string>
7134 /// body
7135 /// endm
7136 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
7137 MCAsmMacroParameter Parameter;
7139 std::string Argument;
7140 if (check(parseIdentifier(Parameter.Name),
7141 "expected identifier in '" + Directive + "' directive") ||
7142 parseToken(AsmToken::Comma,
7143 "expected comma in '" + Directive + "' directive"))
7144 return true;
7145 if (parseAngleBracketString(Argument)) {
7146 // Match ml64.exe; treat all characters to end of statement as a string,
7147 // ignoring comment markers, then discard anything following a space (using
7148 // the C locale).
7149 Argument = parseStringTo(AsmToken::EndOfStatement);
7150 if (getTok().is(AsmToken::EndOfStatement))
7151 Argument += getTok().getString();
7152 size_t End = 0;
7153 for (; End < Argument.size(); ++End) {
7154 if (isSpace(Argument[End]))
7155 break;
7157 Argument.resize(End);
7159 if (parseToken(AsmToken::EndOfStatement, "expected end of statement"))
7160 return true;
7162 // Lex the irpc definition.
7163 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7164 if (!M)
7165 return true;
7167 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7168 // to hold the macro body with substitutions.
7169 SmallString<256> Buf;
7170 raw_svector_ostream OS(Buf);
7172 StringRef Values(Argument);
7173 for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
7174 MCAsmMacroArgument Arg;
7175 Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
7177 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7178 return true;
7181 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7183 return false;
7186 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
7187 size_t Len) {
7188 const MCExpr *Value;
7189 SMLoc ExprLoc = getLexer().getLoc();
7190 if (parseExpression(Value))
7191 return true;
7192 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7193 if (!MCE)
7194 return Error(ExprLoc, "unexpected expression in _emit");
7195 uint64_t IntValue = MCE->getValue();
7196 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
7197 return Error(ExprLoc, "literal value out of range for directive");
7199 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
7200 return false;
7203 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
7204 const MCExpr *Value;
7205 SMLoc ExprLoc = getLexer().getLoc();
7206 if (parseExpression(Value))
7207 return true;
7208 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7209 if (!MCE)
7210 return Error(ExprLoc, "unexpected expression in align");
7211 uint64_t IntValue = MCE->getValue();
7212 if (!isPowerOf2_64(IntValue))
7213 return Error(ExprLoc, "literal value not a power of two greater then zero");
7215 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
7216 return false;
7219 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
7220 const SMLoc Loc = getLexer().getLoc();
7221 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
7222 StringRef RadixString = StringRef(RadixStringRaw).trim();
7223 unsigned Radix;
7224 if (RadixString.getAsInteger(10, Radix)) {
7225 return Error(Loc,
7226 "radix must be a decimal number in the range 2 to 16; was " +
7227 RadixString);
7229 if (Radix < 2 || Radix > 16)
7230 return Error(Loc, "radix must be in the range 2 to 16; was " +
7231 std::to_string(Radix));
7232 getLexer().setMasmDefaultRadix(Radix);
7233 return false;
7236 /// parseDirectiveEcho
7237 /// ::= "echo" message
7238 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
7239 std::string Message = parseStringTo(AsmToken::EndOfStatement);
7240 llvm::outs() << Message;
7241 if (!StringRef(Message).endswith("\n"))
7242 llvm::outs() << '\n';
7243 return false;
7246 // We are comparing pointers, but the pointers are relative to a single string.
7247 // Thus, this should always be deterministic.
7248 static int rewritesSort(const AsmRewrite *AsmRewriteA,
7249 const AsmRewrite *AsmRewriteB) {
7250 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
7251 return -1;
7252 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
7253 return 1;
7255 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
7256 // rewrite to the same location. Make sure the SizeDirective rewrite is
7257 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This
7258 // ensures the sort algorithm is stable.
7259 if (AsmRewritePrecedence[AsmRewriteA->Kind] >
7260 AsmRewritePrecedence[AsmRewriteB->Kind])
7261 return -1;
7263 if (AsmRewritePrecedence[AsmRewriteA->Kind] <
7264 AsmRewritePrecedence[AsmRewriteB->Kind])
7265 return 1;
7266 llvm_unreachable("Unstable rewrite sort.");
7269 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
7270 Variable &Var = Variables[Name.lower()];
7271 if (Var.Name.empty()) {
7272 Var.Name = Name;
7273 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
7274 return Error(SMLoc(), "invalid variable redefinition");
7275 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
7276 Warning(SMLoc(), "redefining '" + Name +
7277 "', already defined on the command line")) {
7278 return true;
7280 Var.Redefinable = Variable::WARN_ON_REDEFINITION;
7281 Var.IsText = true;
7282 Var.TextValue = Value.str();
7283 return false;
7286 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
7287 const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
7288 const StringRef Base = BaseMember.first, Member = BaseMember.second;
7289 return lookUpField(Base, Member, Info);
7292 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
7293 AsmFieldInfo &Info) const {
7294 if (Base.empty())
7295 return true;
7297 AsmFieldInfo BaseInfo;
7298 if (Base.contains('.') && !lookUpField(Base, BaseInfo))
7299 Base = BaseInfo.Type.Name;
7301 auto StructIt = Structs.find(Base.lower());
7302 auto TypeIt = KnownType.find(Base.lower());
7303 if (TypeIt != KnownType.end()) {
7304 StructIt = Structs.find(TypeIt->second.Name.lower());
7306 if (StructIt != Structs.end())
7307 return lookUpField(StructIt->second, Member, Info);
7309 return true;
7312 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
7313 AsmFieldInfo &Info) const {
7314 if (Member.empty()) {
7315 Info.Type.Name = Structure.Name;
7316 Info.Type.Size = Structure.Size;
7317 Info.Type.ElementSize = Structure.Size;
7318 Info.Type.Length = 1;
7319 return false;
7322 std::pair<StringRef, StringRef> Split = Member.split('.');
7323 const StringRef FieldName = Split.first, FieldMember = Split.second;
7325 auto StructIt = Structs.find(FieldName.lower());
7326 if (StructIt != Structs.end())
7327 return lookUpField(StructIt->second, FieldMember, Info);
7329 auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
7330 if (FieldIt == Structure.FieldsByName.end())
7331 return true;
7333 const FieldInfo &Field = Structure.Fields[FieldIt->second];
7334 if (FieldMember.empty()) {
7335 Info.Offset += Field.Offset;
7336 Info.Type.Size = Field.SizeOf;
7337 Info.Type.ElementSize = Field.Type;
7338 Info.Type.Length = Field.LengthOf;
7339 if (Field.Contents.FT == FT_STRUCT)
7340 Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
7341 else
7342 Info.Type.Name = "";
7343 return false;
7346 if (Field.Contents.FT != FT_STRUCT)
7347 return true;
7348 const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7350 if (lookUpField(StructInfo.Structure, FieldMember, Info))
7351 return true;
7353 Info.Offset += Field.Offset;
7354 return false;
7357 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7358 unsigned Size = StringSwitch<unsigned>(Name)
7359 .CasesLower("byte", "db", "sbyte", 1)
7360 .CasesLower("word", "dw", "sword", 2)
7361 .CasesLower("dword", "dd", "sdword", 4)
7362 .CasesLower("fword", "df", 6)
7363 .CasesLower("qword", "dq", "sqword", 8)
7364 .CaseLower("real4", 4)
7365 .CaseLower("real8", 8)
7366 .CaseLower("real10", 10)
7367 .Default(0);
7368 if (Size) {
7369 Info.Name = Name;
7370 Info.ElementSize = Size;
7371 Info.Length = 1;
7372 Info.Size = Size;
7373 return false;
7376 auto StructIt = Structs.find(Name.lower());
7377 if (StructIt != Structs.end()) {
7378 const StructInfo &Structure = StructIt->second;
7379 Info.Name = Name;
7380 Info.ElementSize = Structure.Size;
7381 Info.Length = 1;
7382 Info.Size = Structure.Size;
7383 return false;
7386 return true;
7389 bool MasmParser::parseMSInlineAsm(
7390 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
7391 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7392 SmallVectorImpl<std::string> &Constraints,
7393 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7394 const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7395 SmallVector<void *, 4> InputDecls;
7396 SmallVector<void *, 4> OutputDecls;
7397 SmallVector<bool, 4> InputDeclsAddressOf;
7398 SmallVector<bool, 4> OutputDeclsAddressOf;
7399 SmallVector<std::string, 4> InputConstraints;
7400 SmallVector<std::string, 4> OutputConstraints;
7401 SmallVector<unsigned, 4> ClobberRegs;
7403 SmallVector<AsmRewrite, 4> AsmStrRewrites;
7405 // Prime the lexer.
7406 Lex();
7408 // While we have input, parse each statement.
7409 unsigned InputIdx = 0;
7410 unsigned OutputIdx = 0;
7411 while (getLexer().isNot(AsmToken::Eof)) {
7412 // Parse curly braces marking block start/end.
7413 if (parseCurlyBlockScope(AsmStrRewrites))
7414 continue;
7416 ParseStatementInfo Info(&AsmStrRewrites);
7417 bool StatementErr = parseStatement(Info, &SI);
7419 if (StatementErr || Info.ParseError) {
7420 // Emit pending errors if any exist.
7421 printPendingErrors();
7422 return true;
7425 // No pending error should exist here.
7426 assert(!hasPendingError() && "unexpected error from parseStatement");
7428 if (Info.Opcode == ~0U)
7429 continue;
7431 const MCInstrDesc &Desc = MII->get(Info.Opcode);
7433 // Build the list of clobbers, outputs and inputs.
7434 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7435 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7437 // Register operand.
7438 if (Operand.isReg() && !Operand.needAddressOf() &&
7439 !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7440 unsigned NumDefs = Desc.getNumDefs();
7441 // Clobber.
7442 if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7443 ClobberRegs.push_back(Operand.getReg());
7444 continue;
7447 // Expr/Input or Output.
7448 StringRef SymName = Operand.getSymName();
7449 if (SymName.empty())
7450 continue;
7452 void *OpDecl = Operand.getOpDecl();
7453 if (!OpDecl)
7454 continue;
7456 StringRef Constraint = Operand.getConstraint();
7457 if (Operand.isImm()) {
7458 // Offset as immediate.
7459 if (Operand.isOffsetOfLocal())
7460 Constraint = "r";
7461 else
7462 Constraint = "i";
7465 bool isOutput = (i == 1) && Desc.mayStore();
7466 SMLoc Start = SMLoc::getFromPointer(SymName.data());
7467 if (isOutput) {
7468 ++InputIdx;
7469 OutputDecls.push_back(OpDecl);
7470 OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7471 OutputConstraints.push_back(("=" + Constraint).str());
7472 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7473 } else {
7474 InputDecls.push_back(OpDecl);
7475 InputDeclsAddressOf.push_back(Operand.needAddressOf());
7476 InputConstraints.push_back(Constraint.str());
7477 if (Desc.OpInfo[i - 1].isBranchTarget())
7478 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7479 else
7480 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7484 // Consider implicit defs to be clobbers. Think of cpuid and push.
7485 ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
7486 Desc.getNumImplicitDefs());
7487 llvm::append_range(ClobberRegs, ImpDefs);
7490 // Set the number of Outputs and Inputs.
7491 NumOutputs = OutputDecls.size();
7492 NumInputs = InputDecls.size();
7494 // Set the unique clobbers.
7495 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7496 ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
7497 ClobberRegs.end());
7498 Clobbers.assign(ClobberRegs.size(), std::string());
7499 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7500 raw_string_ostream OS(Clobbers[I]);
7501 IP->printRegName(OS, ClobberRegs[I]);
7504 // Merge the various outputs and inputs. Output are expected first.
7505 if (NumOutputs || NumInputs) {
7506 unsigned NumExprs = NumOutputs + NumInputs;
7507 OpDecls.resize(NumExprs);
7508 Constraints.resize(NumExprs);
7509 for (unsigned i = 0; i < NumOutputs; ++i) {
7510 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7511 Constraints[i] = OutputConstraints[i];
7513 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7514 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7515 Constraints[j] = InputConstraints[i];
7519 // Build the IR assembly string.
7520 std::string AsmStringIR;
7521 raw_string_ostream OS(AsmStringIR);
7522 StringRef ASMString =
7523 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7524 const char *AsmStart = ASMString.begin();
7525 const char *AsmEnd = ASMString.end();
7526 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7527 for (auto it = AsmStrRewrites.begin(); it != AsmStrRewrites.end(); ++it) {
7528 const AsmRewrite &AR = *it;
7529 // Check if this has already been covered by another rewrite...
7530 if (AR.Done)
7531 continue;
7532 AsmRewriteKind Kind = AR.Kind;
7534 const char *Loc = AR.Loc.getPointer();
7535 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7537 // Emit everything up to the immediate/expression.
7538 if (unsigned Len = Loc - AsmStart)
7539 OS << StringRef(AsmStart, Len);
7541 // Skip the original expression.
7542 if (Kind == AOK_Skip) {
7543 AsmStart = Loc + AR.Len;
7544 continue;
7547 unsigned AdditionalSkip = 0;
7548 // Rewrite expressions in $N notation.
7549 switch (Kind) {
7550 default:
7551 break;
7552 case AOK_IntelExpr:
7553 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7554 if (AR.IntelExp.NeedBracs)
7555 OS << "[";
7556 if (AR.IntelExp.hasBaseReg())
7557 OS << AR.IntelExp.BaseReg;
7558 if (AR.IntelExp.hasIndexReg())
7559 OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7560 << AR.IntelExp.IndexReg;
7561 if (AR.IntelExp.Scale > 1)
7562 OS << " * $$" << AR.IntelExp.Scale;
7563 if (AR.IntelExp.hasOffset()) {
7564 if (AR.IntelExp.hasRegs())
7565 OS << " + ";
7566 // Fuse this rewrite with a rewrite of the offset name, if present.
7567 StringRef OffsetName = AR.IntelExp.OffsetName;
7568 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7569 size_t OffsetLen = OffsetName.size();
7570 auto rewrite_it = std::find_if(
7571 it, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7572 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7573 (FusingAR.Kind == AOK_Input ||
7574 FusingAR.Kind == AOK_CallInput);
7576 if (rewrite_it == AsmStrRewrites.end()) {
7577 OS << "offset " << OffsetName;
7578 } else if (rewrite_it->Kind == AOK_CallInput) {
7579 OS << "${" << InputIdx++ << ":P}";
7580 rewrite_it->Done = true;
7581 } else {
7582 OS << '$' << InputIdx++;
7583 rewrite_it->Done = true;
7586 if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7587 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7588 if (AR.IntelExp.NeedBracs)
7589 OS << "]";
7590 break;
7591 case AOK_Label:
7592 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7593 break;
7594 case AOK_Input:
7595 OS << '$' << InputIdx++;
7596 break;
7597 case AOK_CallInput:
7598 OS << "${" << InputIdx++ << ":P}";
7599 break;
7600 case AOK_Output:
7601 OS << '$' << OutputIdx++;
7602 break;
7603 case AOK_SizeDirective:
7604 switch (AR.Val) {
7605 default: break;
7606 case 8: OS << "byte ptr "; break;
7607 case 16: OS << "word ptr "; break;
7608 case 32: OS << "dword ptr "; break;
7609 case 64: OS << "qword ptr "; break;
7610 case 80: OS << "xword ptr "; break;
7611 case 128: OS << "xmmword ptr "; break;
7612 case 256: OS << "ymmword ptr "; break;
7614 break;
7615 case AOK_Emit:
7616 OS << ".byte";
7617 break;
7618 case AOK_Align: {
7619 // MS alignment directives are measured in bytes. If the native assembler
7620 // measures alignment in bytes, we can pass it straight through.
7621 OS << ".align";
7622 if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7623 break;
7625 // Alignment is in log2 form, so print that instead and skip the original
7626 // immediate.
7627 unsigned Val = AR.Val;
7628 OS << ' ' << Val;
7629 assert(Val < 10 && "Expected alignment less then 2^10.");
7630 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7631 break;
7633 case AOK_EVEN:
7634 OS << ".even";
7635 break;
7636 case AOK_EndOfStatement:
7637 OS << "\n\t";
7638 break;
7641 // Skip the original expression.
7642 AsmStart = Loc + AR.Len + AdditionalSkip;
7645 // Emit the remainder of the asm string.
7646 if (AsmStart != AsmEnd)
7647 OS << StringRef(AsmStart, AsmEnd - AsmStart);
7649 AsmString = OS.str();
7650 return false;
7653 void MasmParser::initializeBuiltinSymbolMap() {
7654 // Numeric built-ins (supported in all versions)
7655 BuiltinSymbolMap["@version"] = BI_VERSION;
7656 BuiltinSymbolMap["@line"] = BI_LINE;
7658 // Text built-ins (supported in all versions)
7659 BuiltinSymbolMap["@date"] = BI_DATE;
7660 BuiltinSymbolMap["@time"] = BI_TIME;
7661 BuiltinSymbolMap["@filecur"] = BI_FILECUR;
7662 BuiltinSymbolMap["@filename"] = BI_FILENAME;
7663 BuiltinSymbolMap["@curseg"] = BI_CURSEG;
7665 // Some built-ins exist only for MASM32 (32-bit x86)
7666 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
7667 Triple::x86) {
7668 // Numeric built-ins
7669 // BuiltinSymbolMap["@cpu"] = BI_CPU;
7670 // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
7671 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
7672 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
7673 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
7674 // BuiltinSymbolMap["@model"] = BI_MODEL;
7676 // Text built-ins
7677 // BuiltinSymbolMap["@code"] = BI_CODE;
7678 // BuiltinSymbolMap["@data"] = BI_DATA;
7679 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
7680 // BuiltinSymbolMap["@stack"] = BI_STACK;
7684 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
7685 SMLoc StartLoc) {
7686 switch (Symbol) {
7687 default:
7688 return nullptr;
7689 case BI_VERSION:
7690 // Match a recent version of ML.EXE.
7691 return MCConstantExpr::create(1427, getContext());
7692 case BI_LINE: {
7693 int64_t Line;
7694 if (ActiveMacros.empty())
7695 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
7696 else
7697 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
7698 ActiveMacros.front()->ExitBuffer);
7699 return MCConstantExpr::create(Line, getContext());
7702 llvm_unreachable("unhandled built-in symbol");
7705 llvm::Optional<std::string>
7706 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
7707 switch (Symbol) {
7708 default:
7709 return {};
7710 case BI_DATE: {
7711 // Current local date, formatted MM/DD/YY
7712 char TmpBuffer[sizeof("mm/dd/yy")];
7713 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
7714 return std::string(TmpBuffer, Len);
7716 case BI_TIME: {
7717 // Current local time, formatted HH:MM:SS (24-hour clock)
7718 char TmpBuffer[sizeof("hh:mm:ss")];
7719 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
7720 return std::string(TmpBuffer, Len);
7722 case BI_FILECUR:
7723 return SrcMgr
7724 .getMemoryBuffer(
7725 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
7726 ->getBufferIdentifier()
7727 .str();
7728 case BI_FILENAME:
7729 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
7730 ->getBufferIdentifier())
7731 .upper();
7732 case BI_CURSEG:
7733 return getStreamer().getCurrentSectionOnly()->getName().str();
7735 llvm_unreachable("unhandled built-in symbol");
7738 /// Create an MCAsmParser instance.
7739 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7740 MCStreamer &Out, const MCAsmInfo &MAI,
7741 struct tm TM, unsigned CB) {
7742 return new MasmParser(SM, C, Out, MAI, TM, CB);