Recommit [NFC] Better encapsulation of llvm::Optional Storage
[llvm-complete.git] / include / llvm / MC / MCParser / MCTargetAsmParser.h
blobc5683b74c4cefa14c98b855569d1b0ff80e3c49d
1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
15 #include "llvm/MC/MCParser/MCAsmLexer.h"
16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
18 #include "llvm/MC/MCTargetOptions.h"
19 #include "llvm/Support/SMLoc.h"
20 #include <cstdint>
21 #include <memory>
23 namespace llvm {
25 class MCInst;
26 class MCParsedAsmOperand;
27 class MCStreamer;
28 class MCSubtargetInfo;
29 template <typename T> class SmallVectorImpl;
31 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
33 enum AsmRewriteKind {
34 AOK_Align, // Rewrite align as .align.
35 AOK_EVEN, // Rewrite even as .even.
36 AOK_Emit, // Rewrite _emit as .byte.
37 AOK_Input, // Rewrite in terms of $N.
38 AOK_Output, // Rewrite in terms of $N.
39 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
40 AOK_Label, // Rewrite local labels.
41 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
42 AOK_Skip, // Skip emission (e.g., offset/type operators).
43 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
46 const char AsmRewritePrecedence [] = {
47 2, // AOK_Align
48 2, // AOK_EVEN
49 2, // AOK_Emit
50 3, // AOK_Input
51 3, // AOK_Output
52 5, // AOK_SizeDirective
53 1, // AOK_Label
54 5, // AOK_EndOfStatement
55 2, // AOK_Skip
56 2 // AOK_IntelExpr
59 // Represnt the various parts which makes up an intel expression,
60 // used for emitting compound intel expressions
61 struct IntelExpr {
62 bool NeedBracs;
63 int64_t Imm;
64 StringRef BaseReg;
65 StringRef IndexReg;
66 unsigned Scale;
68 IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
69 BaseReg(StringRef()), IndexReg(StringRef()),
70 Scale(1) {}
71 // Compund immediate expression
72 IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
73 Imm = imm;
75 // [Reg + ImmediateExpression]
76 // We don't bother to emit an immediate expression evaluated to zero
77 IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
78 bool needBracs = true) :
79 IntelExpr(imm, needBracs) {
80 IndexReg = reg;
81 if (scale)
82 Scale = scale;
84 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
85 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
86 int64_t imm = 0, bool needBracs = true) :
87 IntelExpr(indexReg, imm, scale, needBracs) {
88 BaseReg = baseReg;
90 bool hasBaseReg() const {
91 return BaseReg.size();
93 bool hasIndexReg() const {
94 return IndexReg.size();
96 bool hasRegs() const {
97 return hasBaseReg() || hasIndexReg();
99 bool isValid() const {
100 return (Scale == 1) ||
101 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
105 struct AsmRewrite {
106 AsmRewriteKind Kind;
107 SMLoc Loc;
108 unsigned Len;
109 int64_t Val;
110 StringRef Label;
111 IntelExpr IntelExp;
113 public:
114 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
115 : Kind(kind), Loc(loc), Len(len), Val(val) {}
116 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
117 : AsmRewrite(kind, loc, len) { Label = label; }
118 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
119 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
122 struct ParseInstructionInfo {
123 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
125 ParseInstructionInfo() = default;
126 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
127 : AsmRewrites(rewrites) {}
130 enum OperandMatchResultTy {
131 MatchOperand_Success, // operand matched successfully
132 MatchOperand_NoMatch, // operand did not match
133 MatchOperand_ParseFail // operand matched but had errors
136 enum class DiagnosticPredicateTy {
137 Match,
138 NearMatch,
139 NoMatch,
142 // When an operand is parsed, the assembler will try to iterate through a set of
143 // possible operand classes that the operand might match and call the
144 // corresponding PredicateMethod to determine that.
146 // If there are two AsmOperands that would give a specific diagnostic if there
147 // is no match, there is currently no mechanism to distinguish which operand is
148 // a closer match. The DiagnosticPredicate distinguishes between 'completely
149 // no match' and 'near match', so the assembler can decide whether to give a
150 // specific diagnostic, or use 'InvalidOperand' and continue to find a
151 // 'better matching' diagnostic.
153 // For example:
154 // opcode opnd0, onpd1, opnd2
156 // where:
157 // opnd2 could be an 'immediate of range [-8, 7]'
158 // opnd2 could be a 'register + shift/extend'.
160 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
161 // little sense to give a diagnostic that the operand should be an immediate
162 // in range [-8, 7].
164 // This is a light-weight alternative to the 'NearMissInfo' approach
165 // below which collects *all* possible diagnostics. This alternative
166 // is optional and fully backward compatible with existing
167 // PredicateMethods that return a 'bool' (match or no match).
168 struct DiagnosticPredicate {
169 DiagnosticPredicateTy Type;
171 explicit DiagnosticPredicate(bool Match)
172 : Type(Match ? DiagnosticPredicateTy::Match
173 : DiagnosticPredicateTy::NearMatch) {}
174 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
175 DiagnosticPredicate(const DiagnosticPredicate &) = default;
177 operator bool() const { return Type == DiagnosticPredicateTy::Match; }
178 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
179 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
180 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
183 // When matching of an assembly instruction fails, there may be multiple
184 // encodings that are close to being a match. It's often ambiguous which one
185 // the programmer intended to use, so we want to report an error which mentions
186 // each of these "near-miss" encodings. This struct contains information about
187 // one such encoding, and why it did not match the parsed instruction.
188 class NearMissInfo {
189 public:
190 enum NearMissKind {
191 NoNearMiss,
192 NearMissOperand,
193 NearMissFeature,
194 NearMissPredicate,
195 NearMissTooFewOperands,
198 // The encoding is valid for the parsed assembly string. This is only used
199 // internally to the table-generated assembly matcher.
200 static NearMissInfo getSuccess() { return NearMissInfo(); }
202 // The instruction encoding is not valid because it requires some target
203 // features that are not currently enabled. MissingFeatures has a bit set for
204 // each feature that the encoding needs but which is not enabled.
205 static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
206 NearMissInfo Result;
207 Result.Kind = NearMissFeature;
208 Result.Features = MissingFeatures;
209 return Result;
212 // The instruction encoding is not valid because the target-specific
213 // predicate function returned an error code. FailureCode is the
214 // target-specific error code returned by the predicate.
215 static NearMissInfo getMissedPredicate(unsigned FailureCode) {
216 NearMissInfo Result;
217 Result.Kind = NearMissPredicate;
218 Result.PredicateError = FailureCode;
219 return Result;
222 // The instruction encoding is not valid because one (and only one) parsed
223 // operand is not of the correct type. OperandError is the error code
224 // relating to the operand class expected by the encoding. OperandClass is
225 // the type of the expected operand. Opcode is the opcode of the encoding.
226 // OperandIndex is the index into the parsed operand list.
227 static NearMissInfo getMissedOperand(unsigned OperandError,
228 unsigned OperandClass, unsigned Opcode,
229 unsigned OperandIndex) {
230 NearMissInfo Result;
231 Result.Kind = NearMissOperand;
232 Result.MissedOperand.Error = OperandError;
233 Result.MissedOperand.Class = OperandClass;
234 Result.MissedOperand.Opcode = Opcode;
235 Result.MissedOperand.Index = OperandIndex;
236 return Result;
239 // The instruction encoding is not valid because it expects more operands
240 // than were parsed. OperandClass is the class of the expected operand that
241 // was not provided. Opcode is the instruction encoding.
242 static NearMissInfo getTooFewOperands(unsigned OperandClass,
243 unsigned Opcode) {
244 NearMissInfo Result;
245 Result.Kind = NearMissTooFewOperands;
246 Result.TooFewOperands.Class = OperandClass;
247 Result.TooFewOperands.Opcode = Opcode;
248 return Result;
251 operator bool() const { return Kind != NoNearMiss; }
253 NearMissKind getKind() const { return Kind; }
255 // Feature flags required by the instruction, that the current target does
256 // not have.
257 uint64_t getFeatures() const {
258 assert(Kind == NearMissFeature);
259 return Features;
261 // Error code returned by the target predicate when validating this
262 // instruction encoding.
263 unsigned getPredicateError() const {
264 assert(Kind == NearMissPredicate);
265 return PredicateError;
267 // MatchClassKind of the operand that we expected to see.
268 unsigned getOperandClass() const {
269 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
270 return MissedOperand.Class;
272 // Opcode of the encoding we were trying to match.
273 unsigned getOpcode() const {
274 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
275 return MissedOperand.Opcode;
277 // Error code returned when validating the operand.
278 unsigned getOperandError() const {
279 assert(Kind == NearMissOperand);
280 return MissedOperand.Error;
282 // Index of the actual operand we were trying to match in the list of parsed
283 // operands.
284 unsigned getOperandIndex() const {
285 assert(Kind == NearMissOperand);
286 return MissedOperand.Index;
289 private:
290 NearMissKind Kind;
292 // These two structs share a common prefix, so we can safely rely on the fact
293 // that they overlap in the union.
294 struct MissedOpInfo {
295 unsigned Class;
296 unsigned Opcode;
297 unsigned Error;
298 unsigned Index;
301 struct TooFewOperandsInfo {
302 unsigned Class;
303 unsigned Opcode;
306 union {
307 uint64_t Features;
308 unsigned PredicateError;
309 MissedOpInfo MissedOperand;
310 TooFewOperandsInfo TooFewOperands;
313 NearMissInfo() : Kind(NoNearMiss) {}
316 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
317 class MCTargetAsmParser : public MCAsmParserExtension {
318 public:
319 enum MatchResultTy {
320 Match_InvalidOperand,
321 Match_InvalidTiedOperand,
322 Match_MissingFeature,
323 Match_MnemonicFail,
324 Match_Success,
325 Match_NearMisses,
326 FIRST_TARGET_MATCH_RESULT_TY
329 protected: // Can only create subclasses.
330 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
331 const MCInstrInfo &MII);
333 /// Create a copy of STI and return a non-const reference to it.
334 MCSubtargetInfo &copySTI();
336 /// AvailableFeatures - The current set of available features.
337 uint64_t AvailableFeatures = 0;
339 /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
340 bool ParsingInlineAsm = false;
342 /// SemaCallback - The Sema callback implementation. Must be set when parsing
343 /// ms-style inline assembly.
344 MCAsmParserSemaCallback *SemaCallback;
346 /// Set of options which affects instrumentation of inline assembly.
347 MCTargetOptions MCOptions;
349 /// Current STI.
350 const MCSubtargetInfo *STI;
352 const MCInstrInfo &MII;
354 public:
355 MCTargetAsmParser(const MCTargetAsmParser &) = delete;
356 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
358 ~MCTargetAsmParser() override;
360 const MCSubtargetInfo &getSTI() const;
362 uint64_t getAvailableFeatures() const { return AvailableFeatures; }
363 void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
365 bool isParsingInlineAsm () { return ParsingInlineAsm; }
366 void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
368 MCTargetOptions getTargetOptions() const { return MCOptions; }
370 void setSemaCallback(MCAsmParserSemaCallback *Callback) {
371 SemaCallback = Callback;
374 // Target-specific parsing of expression.
375 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
376 return getParser().parsePrimaryExpr(Res, EndLoc);
379 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
380 SMLoc &EndLoc) = 0;
382 /// Sets frame register corresponding to the current MachineFunction.
383 virtual void SetFrameRegister(unsigned RegNo) {}
385 /// ParseInstruction - Parse one assembly instruction.
387 /// The parser is positioned following the instruction name. The target
388 /// specific instruction parser should parse the entire instruction and
389 /// construct the appropriate MCInst, or emit an error. On success, the entire
390 /// line should be parsed up to and including the end-of-statement token. On
391 /// failure, the parser is not required to read to the end of the line.
393 /// \param Name - The instruction name.
394 /// \param NameLoc - The source location of the name.
395 /// \param Operands [out] - The list of parsed operands, this returns
396 /// ownership of them to the caller.
397 /// \return True on failure.
398 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
399 SMLoc NameLoc, OperandVector &Operands) = 0;
400 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
401 AsmToken Token, OperandVector &Operands) {
402 return ParseInstruction(Info, Name, Token.getLoc(), Operands);
405 /// ParseDirective - Parse a target specific assembler directive
407 /// The parser is positioned following the directive name. The target
408 /// specific directive parser should parse the entire directive doing or
409 /// recording any target specific work, or return true and do nothing if the
410 /// directive is not target specific. If the directive is specific for
411 /// the target, the entire line is parsed up to and including the
412 /// end-of-statement token and false is returned.
414 /// \param DirectiveID - the identifier token of the directive.
415 virtual bool ParseDirective(AsmToken DirectiveID) = 0;
417 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
418 /// instruction as an actual MCInst and emit it to the specified MCStreamer.
419 /// This returns false on success and returns true on failure to match.
421 /// On failure, the target parser is responsible for emitting a diagnostic
422 /// explaining the match failure.
423 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
424 OperandVector &Operands, MCStreamer &Out,
425 uint64_t &ErrorInfo,
426 bool MatchingInlineAsm) = 0;
428 /// Allows targets to let registers opt out of clobber lists.
429 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
431 /// Allow a target to add special case operand matching for things that
432 /// tblgen doesn't/can't handle effectively. For example, literal
433 /// immediates on ARM. TableGen expects a token operand, but the parser
434 /// will recognize them as immediates.
435 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
436 unsigned Kind) {
437 return Match_InvalidOperand;
440 /// Validate the instruction match against any complex target predicates
441 /// before rendering any operands to it.
442 virtual unsigned
443 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
444 return Match_Success;
447 /// checkTargetMatchPredicate - Validate the instruction match against
448 /// any complex target predicates not expressible via match classes.
449 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
450 return Match_Success;
453 virtual void convertToMapAndConstraints(unsigned Kind,
454 const OperandVector &Operands) = 0;
456 /// Returns whether two registers are equal and is used by the tied-operands
457 /// checks in the AsmMatcher. This method can be overridden allow e.g. a
458 /// sub- or super-register as the tied operand.
459 virtual bool regsEqual(const MCParsedAsmOperand &Op1,
460 const MCParsedAsmOperand &Op2) const {
461 assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
462 return Op1.getReg() == Op2.getReg();
465 // Return whether this parser uses assignment statements with equals tokens
466 virtual bool equalIsAsmAssignment() { return true; };
467 // Return whether this start of statement identifier is a label
468 virtual bool isLabel(AsmToken &Token) { return true; };
469 // Return whether this parser accept star as start of statement
470 virtual bool starIsStartOfStatement() { return false; };
472 virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
473 MCSymbolRefExpr::VariantKind,
474 MCContext &Ctx) {
475 return nullptr;
478 // For actions that have to be performed before a label is emitted
479 virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
481 virtual void onLabelParsed(MCSymbol *Symbol) {}
483 /// Ensure that all previously parsed instructions have been emitted to the
484 /// output streamer, if the target does not emit them immediately.
485 virtual void flushPendingInstructions(MCStreamer &Out) {}
487 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
488 AsmToken::TokenKind OperatorToken,
489 MCContext &Ctx) {
490 return nullptr;
493 // For any checks or cleanups at the end of parsing.
494 virtual void onEndOfFile() {}
497 } // end namespace llvm
499 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H