1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
15 #include "llvm/MC/MCParser/MCAsmLexer.h"
16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
18 #include "llvm/MC/MCTargetOptions.h"
19 #include "llvm/Support/SMLoc.h"
26 class MCParsedAsmOperand
;
28 class MCSubtargetInfo
;
29 template <typename T
> class SmallVectorImpl
;
31 using OperandVector
= SmallVectorImpl
<std::unique_ptr
<MCParsedAsmOperand
>>;
34 AOK_Align
, // Rewrite align as .align.
35 AOK_EVEN
, // Rewrite even as .even.
36 AOK_Emit
, // Rewrite _emit as .byte.
37 AOK_Input
, // Rewrite in terms of $N.
38 AOK_Output
, // Rewrite in terms of $N.
39 AOK_SizeDirective
, // Add a sizing directive (e.g., dword ptr).
40 AOK_Label
, // Rewrite local labels.
41 AOK_EndOfStatement
, // Add EndOfStatement (e.g., "\n\t").
42 AOK_Skip
, // Skip emission (e.g., offset/type operators).
43 AOK_IntelExpr
// SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
46 const char AsmRewritePrecedence
[] = {
52 5, // AOK_SizeDirective
54 5, // AOK_EndOfStatement
59 // Represnt the various parts which makes up an intel expression,
60 // used for emitting compound intel expressions
68 IntelExpr(bool needBracs
= false) : NeedBracs(needBracs
), Imm(0),
69 BaseReg(StringRef()), IndexReg(StringRef()),
71 // Compund immediate expression
72 IntelExpr(int64_t imm
, bool needBracs
) : IntelExpr(needBracs
) {
75 // [Reg + ImmediateExpression]
76 // We don't bother to emit an immediate expression evaluated to zero
77 IntelExpr(StringRef reg
, int64_t imm
= 0, unsigned scale
= 0,
78 bool needBracs
= true) :
79 IntelExpr(imm
, needBracs
) {
84 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
85 IntelExpr(StringRef baseReg
, StringRef indexReg
, unsigned scale
= 0,
86 int64_t imm
= 0, bool needBracs
= true) :
87 IntelExpr(indexReg
, imm
, scale
, needBracs
) {
90 bool hasBaseReg() const {
91 return BaseReg
.size();
93 bool hasIndexReg() const {
94 return IndexReg
.size();
96 bool hasRegs() const {
97 return hasBaseReg() || hasIndexReg();
99 bool isValid() const {
100 return (Scale
== 1) ||
101 (hasIndexReg() && (Scale
== 2 || Scale
== 4 || Scale
== 8));
114 AsmRewrite(AsmRewriteKind kind
, SMLoc loc
, unsigned len
= 0, int64_t val
= 0)
115 : Kind(kind
), Loc(loc
), Len(len
), Val(val
) {}
116 AsmRewrite(AsmRewriteKind kind
, SMLoc loc
, unsigned len
, StringRef label
)
117 : AsmRewrite(kind
, loc
, len
) { Label
= label
; }
118 AsmRewrite(SMLoc loc
, unsigned len
, IntelExpr exp
)
119 : AsmRewrite(AOK_IntelExpr
, loc
, len
) { IntelExp
= exp
; }
122 struct ParseInstructionInfo
{
123 SmallVectorImpl
<AsmRewrite
> *AsmRewrites
= nullptr;
125 ParseInstructionInfo() = default;
126 ParseInstructionInfo(SmallVectorImpl
<AsmRewrite
> *rewrites
)
127 : AsmRewrites(rewrites
) {}
130 enum OperandMatchResultTy
{
131 MatchOperand_Success
, // operand matched successfully
132 MatchOperand_NoMatch
, // operand did not match
133 MatchOperand_ParseFail
// operand matched but had errors
136 enum class DiagnosticPredicateTy
{
142 // When an operand is parsed, the assembler will try to iterate through a set of
143 // possible operand classes that the operand might match and call the
144 // corresponding PredicateMethod to determine that.
146 // If there are two AsmOperands that would give a specific diagnostic if there
147 // is no match, there is currently no mechanism to distinguish which operand is
148 // a closer match. The DiagnosticPredicate distinguishes between 'completely
149 // no match' and 'near match', so the assembler can decide whether to give a
150 // specific diagnostic, or use 'InvalidOperand' and continue to find a
151 // 'better matching' diagnostic.
154 // opcode opnd0, onpd1, opnd2
157 // opnd2 could be an 'immediate of range [-8, 7]'
158 // opnd2 could be a 'register + shift/extend'.
160 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
161 // little sense to give a diagnostic that the operand should be an immediate
164 // This is a light-weight alternative to the 'NearMissInfo' approach
165 // below which collects *all* possible diagnostics. This alternative
166 // is optional and fully backward compatible with existing
167 // PredicateMethods that return a 'bool' (match or no match).
168 struct DiagnosticPredicate
{
169 DiagnosticPredicateTy Type
;
171 explicit DiagnosticPredicate(bool Match
)
172 : Type(Match
? DiagnosticPredicateTy::Match
173 : DiagnosticPredicateTy::NearMatch
) {}
174 DiagnosticPredicate(DiagnosticPredicateTy T
) : Type(T
) {}
175 DiagnosticPredicate(const DiagnosticPredicate
&) = default;
177 operator bool() const { return Type
== DiagnosticPredicateTy::Match
; }
178 bool isMatch() const { return Type
== DiagnosticPredicateTy::Match
; }
179 bool isNearMatch() const { return Type
== DiagnosticPredicateTy::NearMatch
; }
180 bool isNoMatch() const { return Type
== DiagnosticPredicateTy::NoMatch
; }
183 // When matching of an assembly instruction fails, there may be multiple
184 // encodings that are close to being a match. It's often ambiguous which one
185 // the programmer intended to use, so we want to report an error which mentions
186 // each of these "near-miss" encodings. This struct contains information about
187 // one such encoding, and why it did not match the parsed instruction.
195 NearMissTooFewOperands
,
198 // The encoding is valid for the parsed assembly string. This is only used
199 // internally to the table-generated assembly matcher.
200 static NearMissInfo
getSuccess() { return NearMissInfo(); }
202 // The instruction encoding is not valid because it requires some target
203 // features that are not currently enabled. MissingFeatures has a bit set for
204 // each feature that the encoding needs but which is not enabled.
205 static NearMissInfo
getMissedFeature(uint64_t MissingFeatures
) {
207 Result
.Kind
= NearMissFeature
;
208 Result
.Features
= MissingFeatures
;
212 // The instruction encoding is not valid because the target-specific
213 // predicate function returned an error code. FailureCode is the
214 // target-specific error code returned by the predicate.
215 static NearMissInfo
getMissedPredicate(unsigned FailureCode
) {
217 Result
.Kind
= NearMissPredicate
;
218 Result
.PredicateError
= FailureCode
;
222 // The instruction encoding is not valid because one (and only one) parsed
223 // operand is not of the correct type. OperandError is the error code
224 // relating to the operand class expected by the encoding. OperandClass is
225 // the type of the expected operand. Opcode is the opcode of the encoding.
226 // OperandIndex is the index into the parsed operand list.
227 static NearMissInfo
getMissedOperand(unsigned OperandError
,
228 unsigned OperandClass
, unsigned Opcode
,
229 unsigned OperandIndex
) {
231 Result
.Kind
= NearMissOperand
;
232 Result
.MissedOperand
.Error
= OperandError
;
233 Result
.MissedOperand
.Class
= OperandClass
;
234 Result
.MissedOperand
.Opcode
= Opcode
;
235 Result
.MissedOperand
.Index
= OperandIndex
;
239 // The instruction encoding is not valid because it expects more operands
240 // than were parsed. OperandClass is the class of the expected operand that
241 // was not provided. Opcode is the instruction encoding.
242 static NearMissInfo
getTooFewOperands(unsigned OperandClass
,
245 Result
.Kind
= NearMissTooFewOperands
;
246 Result
.TooFewOperands
.Class
= OperandClass
;
247 Result
.TooFewOperands
.Opcode
= Opcode
;
251 operator bool() const { return Kind
!= NoNearMiss
; }
253 NearMissKind
getKind() const { return Kind
; }
255 // Feature flags required by the instruction, that the current target does
257 uint64_t getFeatures() const {
258 assert(Kind
== NearMissFeature
);
261 // Error code returned by the target predicate when validating this
262 // instruction encoding.
263 unsigned getPredicateError() const {
264 assert(Kind
== NearMissPredicate
);
265 return PredicateError
;
267 // MatchClassKind of the operand that we expected to see.
268 unsigned getOperandClass() const {
269 assert(Kind
== NearMissOperand
|| Kind
== NearMissTooFewOperands
);
270 return MissedOperand
.Class
;
272 // Opcode of the encoding we were trying to match.
273 unsigned getOpcode() const {
274 assert(Kind
== NearMissOperand
|| Kind
== NearMissTooFewOperands
);
275 return MissedOperand
.Opcode
;
277 // Error code returned when validating the operand.
278 unsigned getOperandError() const {
279 assert(Kind
== NearMissOperand
);
280 return MissedOperand
.Error
;
282 // Index of the actual operand we were trying to match in the list of parsed
284 unsigned getOperandIndex() const {
285 assert(Kind
== NearMissOperand
);
286 return MissedOperand
.Index
;
292 // These two structs share a common prefix, so we can safely rely on the fact
293 // that they overlap in the union.
294 struct MissedOpInfo
{
301 struct TooFewOperandsInfo
{
308 unsigned PredicateError
;
309 MissedOpInfo MissedOperand
;
310 TooFewOperandsInfo TooFewOperands
;
313 NearMissInfo() : Kind(NoNearMiss
) {}
316 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
317 class MCTargetAsmParser
: public MCAsmParserExtension
{
320 Match_InvalidOperand
,
321 Match_InvalidTiedOperand
,
322 Match_MissingFeature
,
326 FIRST_TARGET_MATCH_RESULT_TY
329 protected: // Can only create subclasses.
330 MCTargetAsmParser(MCTargetOptions
const &, const MCSubtargetInfo
&STI
,
331 const MCInstrInfo
&MII
);
333 /// Create a copy of STI and return a non-const reference to it.
334 MCSubtargetInfo
©STI();
336 /// AvailableFeatures - The current set of available features.
337 uint64_t AvailableFeatures
= 0;
339 /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
340 bool ParsingInlineAsm
= false;
342 /// SemaCallback - The Sema callback implementation. Must be set when parsing
343 /// ms-style inline assembly.
344 MCAsmParserSemaCallback
*SemaCallback
;
346 /// Set of options which affects instrumentation of inline assembly.
347 MCTargetOptions MCOptions
;
350 const MCSubtargetInfo
*STI
;
352 const MCInstrInfo
&MII
;
355 MCTargetAsmParser(const MCTargetAsmParser
&) = delete;
356 MCTargetAsmParser
&operator=(const MCTargetAsmParser
&) = delete;
358 ~MCTargetAsmParser() override
;
360 const MCSubtargetInfo
&getSTI() const;
362 uint64_t getAvailableFeatures() const { return AvailableFeatures
; }
363 void setAvailableFeatures(uint64_t Value
) { AvailableFeatures
= Value
; }
365 bool isParsingInlineAsm () { return ParsingInlineAsm
; }
366 void setParsingInlineAsm (bool Value
) { ParsingInlineAsm
= Value
; }
368 MCTargetOptions
getTargetOptions() const { return MCOptions
; }
370 void setSemaCallback(MCAsmParserSemaCallback
*Callback
) {
371 SemaCallback
= Callback
;
374 // Target-specific parsing of expression.
375 virtual bool parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) {
376 return getParser().parsePrimaryExpr(Res
, EndLoc
);
379 virtual bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
382 /// Sets frame register corresponding to the current MachineFunction.
383 virtual void SetFrameRegister(unsigned RegNo
) {}
385 /// ParseInstruction - Parse one assembly instruction.
387 /// The parser is positioned following the instruction name. The target
388 /// specific instruction parser should parse the entire instruction and
389 /// construct the appropriate MCInst, or emit an error. On success, the entire
390 /// line should be parsed up to and including the end-of-statement token. On
391 /// failure, the parser is not required to read to the end of the line.
393 /// \param Name - The instruction name.
394 /// \param NameLoc - The source location of the name.
395 /// \param Operands [out] - The list of parsed operands, this returns
396 /// ownership of them to the caller.
397 /// \return True on failure.
398 virtual bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
399 SMLoc NameLoc
, OperandVector
&Operands
) = 0;
400 virtual bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
401 AsmToken Token
, OperandVector
&Operands
) {
402 return ParseInstruction(Info
, Name
, Token
.getLoc(), Operands
);
405 /// ParseDirective - Parse a target specific assembler directive
407 /// The parser is positioned following the directive name. The target
408 /// specific directive parser should parse the entire directive doing or
409 /// recording any target specific work, or return true and do nothing if the
410 /// directive is not target specific. If the directive is specific for
411 /// the target, the entire line is parsed up to and including the
412 /// end-of-statement token and false is returned.
414 /// \param DirectiveID - the identifier token of the directive.
415 virtual bool ParseDirective(AsmToken DirectiveID
) = 0;
417 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
418 /// instruction as an actual MCInst and emit it to the specified MCStreamer.
419 /// This returns false on success and returns true on failure to match.
421 /// On failure, the target parser is responsible for emitting a diagnostic
422 /// explaining the match failure.
423 virtual bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
424 OperandVector
&Operands
, MCStreamer
&Out
,
426 bool MatchingInlineAsm
) = 0;
428 /// Allows targets to let registers opt out of clobber lists.
429 virtual bool OmitRegisterFromClobberLists(unsigned RegNo
) { return false; }
431 /// Allow a target to add special case operand matching for things that
432 /// tblgen doesn't/can't handle effectively. For example, literal
433 /// immediates on ARM. TableGen expects a token operand, but the parser
434 /// will recognize them as immediates.
435 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand
&Op
,
437 return Match_InvalidOperand
;
440 /// Validate the instruction match against any complex target predicates
441 /// before rendering any operands to it.
443 checkEarlyTargetMatchPredicate(MCInst
&Inst
, const OperandVector
&Operands
) {
444 return Match_Success
;
447 /// checkTargetMatchPredicate - Validate the instruction match against
448 /// any complex target predicates not expressible via match classes.
449 virtual unsigned checkTargetMatchPredicate(MCInst
&Inst
) {
450 return Match_Success
;
453 virtual void convertToMapAndConstraints(unsigned Kind
,
454 const OperandVector
&Operands
) = 0;
456 /// Returns whether two registers are equal and is used by the tied-operands
457 /// checks in the AsmMatcher. This method can be overridden allow e.g. a
458 /// sub- or super-register as the tied operand.
459 virtual bool regsEqual(const MCParsedAsmOperand
&Op1
,
460 const MCParsedAsmOperand
&Op2
) const {
461 assert(Op1
.isReg() && Op2
.isReg() && "Operands not all regs");
462 return Op1
.getReg() == Op2
.getReg();
465 // Return whether this parser uses assignment statements with equals tokens
466 virtual bool equalIsAsmAssignment() { return true; };
467 // Return whether this start of statement identifier is a label
468 virtual bool isLabel(AsmToken
&Token
) { return true; };
469 // Return whether this parser accept star as start of statement
470 virtual bool starIsStartOfStatement() { return false; };
472 virtual const MCExpr
*applyModifierToExpr(const MCExpr
*E
,
473 MCSymbolRefExpr::VariantKind
,
478 // For actions that have to be performed before a label is emitted
479 virtual void doBeforeLabelEmit(MCSymbol
*Symbol
) {}
481 virtual void onLabelParsed(MCSymbol
*Symbol
) {}
483 /// Ensure that all previously parsed instructions have been emitted to the
484 /// output streamer, if the target does not emit them immediately.
485 virtual void flushPendingInstructions(MCStreamer
&Out
) {}
487 virtual const MCExpr
*createTargetUnaryExpr(const MCExpr
*E
,
488 AsmToken::TokenKind OperatorToken
,
493 // For any checks or cleanups at the end of parsing.
494 virtual void onEndOfFile() {}
497 } // end namespace llvm
499 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H