1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
15 #include "llvm/MC/MCParser/MCAsmLexer.h"
16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
18 #include "llvm/MC/MCTargetOptions.h"
19 #include "llvm/MC/SubtargetFeature.h"
20 #include "llvm/Support/SMLoc.h"
27 class MCParsedAsmOperand
;
29 class MCSubtargetInfo
;
30 template <typename T
> class SmallVectorImpl
;
32 using OperandVector
= SmallVectorImpl
<std::unique_ptr
<MCParsedAsmOperand
>>;
35 AOK_Align
, // Rewrite align as .align.
36 AOK_EVEN
, // Rewrite even as .even.
37 AOK_Emit
, // Rewrite _emit as .byte.
38 AOK_Input
, // Rewrite in terms of $N.
39 AOK_Output
, // Rewrite in terms of $N.
40 AOK_SizeDirective
, // Add a sizing directive (e.g., dword ptr).
41 AOK_Label
, // Rewrite local labels.
42 AOK_EndOfStatement
, // Add EndOfStatement (e.g., "\n\t").
43 AOK_Skip
, // Skip emission (e.g., offset/type operators).
44 AOK_IntelExpr
// SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
47 const char AsmRewritePrecedence
[] = {
53 5, // AOK_SizeDirective
55 5, // AOK_EndOfStatement
60 // Represnt the various parts which makes up an intel expression,
61 // used for emitting compound intel expressions
69 IntelExpr(bool needBracs
= false) : NeedBracs(needBracs
), Imm(0),
70 BaseReg(StringRef()), IndexReg(StringRef()),
72 // Compund immediate expression
73 IntelExpr(int64_t imm
, bool needBracs
) : IntelExpr(needBracs
) {
76 // [Reg + ImmediateExpression]
77 // We don't bother to emit an immediate expression evaluated to zero
78 IntelExpr(StringRef reg
, int64_t imm
= 0, unsigned scale
= 0,
79 bool needBracs
= true) :
80 IntelExpr(imm
, needBracs
) {
85 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
86 IntelExpr(StringRef baseReg
, StringRef indexReg
, unsigned scale
= 0,
87 int64_t imm
= 0, bool needBracs
= true) :
88 IntelExpr(indexReg
, imm
, scale
, needBracs
) {
91 bool hasBaseReg() const {
92 return BaseReg
.size();
94 bool hasIndexReg() const {
95 return IndexReg
.size();
97 bool hasRegs() const {
98 return hasBaseReg() || hasIndexReg();
100 bool isValid() const {
101 return (Scale
== 1) ||
102 (hasIndexReg() && (Scale
== 2 || Scale
== 4 || Scale
== 8));
115 AsmRewrite(AsmRewriteKind kind
, SMLoc loc
, unsigned len
= 0, int64_t val
= 0)
116 : Kind(kind
), Loc(loc
), Len(len
), Val(val
) {}
117 AsmRewrite(AsmRewriteKind kind
, SMLoc loc
, unsigned len
, StringRef label
)
118 : AsmRewrite(kind
, loc
, len
) { Label
= label
; }
119 AsmRewrite(SMLoc loc
, unsigned len
, IntelExpr exp
)
120 : AsmRewrite(AOK_IntelExpr
, loc
, len
) { IntelExp
= exp
; }
123 struct ParseInstructionInfo
{
124 SmallVectorImpl
<AsmRewrite
> *AsmRewrites
= nullptr;
126 ParseInstructionInfo() = default;
127 ParseInstructionInfo(SmallVectorImpl
<AsmRewrite
> *rewrites
)
128 : AsmRewrites(rewrites
) {}
131 enum OperandMatchResultTy
{
132 MatchOperand_Success
, // operand matched successfully
133 MatchOperand_NoMatch
, // operand did not match
134 MatchOperand_ParseFail
// operand matched but had errors
137 enum class DiagnosticPredicateTy
{
143 // When an operand is parsed, the assembler will try to iterate through a set of
144 // possible operand classes that the operand might match and call the
145 // corresponding PredicateMethod to determine that.
147 // If there are two AsmOperands that would give a specific diagnostic if there
148 // is no match, there is currently no mechanism to distinguish which operand is
149 // a closer match. The DiagnosticPredicate distinguishes between 'completely
150 // no match' and 'near match', so the assembler can decide whether to give a
151 // specific diagnostic, or use 'InvalidOperand' and continue to find a
152 // 'better matching' diagnostic.
155 // opcode opnd0, onpd1, opnd2
158 // opnd2 could be an 'immediate of range [-8, 7]'
159 // opnd2 could be a 'register + shift/extend'.
161 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
162 // little sense to give a diagnostic that the operand should be an immediate
165 // This is a light-weight alternative to the 'NearMissInfo' approach
166 // below which collects *all* possible diagnostics. This alternative
167 // is optional and fully backward compatible with existing
168 // PredicateMethods that return a 'bool' (match or no match).
169 struct DiagnosticPredicate
{
170 DiagnosticPredicateTy Type
;
172 explicit DiagnosticPredicate(bool Match
)
173 : Type(Match
? DiagnosticPredicateTy::Match
174 : DiagnosticPredicateTy::NearMatch
) {}
175 DiagnosticPredicate(DiagnosticPredicateTy T
) : Type(T
) {}
176 DiagnosticPredicate(const DiagnosticPredicate
&) = default;
178 operator bool() const { return Type
== DiagnosticPredicateTy::Match
; }
179 bool isMatch() const { return Type
== DiagnosticPredicateTy::Match
; }
180 bool isNearMatch() const { return Type
== DiagnosticPredicateTy::NearMatch
; }
181 bool isNoMatch() const { return Type
== DiagnosticPredicateTy::NoMatch
; }
184 // When matching of an assembly instruction fails, there may be multiple
185 // encodings that are close to being a match. It's often ambiguous which one
186 // the programmer intended to use, so we want to report an error which mentions
187 // each of these "near-miss" encodings. This struct contains information about
188 // one such encoding, and why it did not match the parsed instruction.
196 NearMissTooFewOperands
,
199 // The encoding is valid for the parsed assembly string. This is only used
200 // internally to the table-generated assembly matcher.
201 static NearMissInfo
getSuccess() { return NearMissInfo(); }
203 // The instruction encoding is not valid because it requires some target
204 // features that are not currently enabled. MissingFeatures has a bit set for
205 // each feature that the encoding needs but which is not enabled.
206 static NearMissInfo
getMissedFeature(const FeatureBitset
&MissingFeatures
) {
208 Result
.Kind
= NearMissFeature
;
209 Result
.Features
= MissingFeatures
;
213 // The instruction encoding is not valid because the target-specific
214 // predicate function returned an error code. FailureCode is the
215 // target-specific error code returned by the predicate.
216 static NearMissInfo
getMissedPredicate(unsigned FailureCode
) {
218 Result
.Kind
= NearMissPredicate
;
219 Result
.PredicateError
= FailureCode
;
223 // The instruction encoding is not valid because one (and only one) parsed
224 // operand is not of the correct type. OperandError is the error code
225 // relating to the operand class expected by the encoding. OperandClass is
226 // the type of the expected operand. Opcode is the opcode of the encoding.
227 // OperandIndex is the index into the parsed operand list.
228 static NearMissInfo
getMissedOperand(unsigned OperandError
,
229 unsigned OperandClass
, unsigned Opcode
,
230 unsigned OperandIndex
) {
232 Result
.Kind
= NearMissOperand
;
233 Result
.MissedOperand
.Error
= OperandError
;
234 Result
.MissedOperand
.Class
= OperandClass
;
235 Result
.MissedOperand
.Opcode
= Opcode
;
236 Result
.MissedOperand
.Index
= OperandIndex
;
240 // The instruction encoding is not valid because it expects more operands
241 // than were parsed. OperandClass is the class of the expected operand that
242 // was not provided. Opcode is the instruction encoding.
243 static NearMissInfo
getTooFewOperands(unsigned OperandClass
,
246 Result
.Kind
= NearMissTooFewOperands
;
247 Result
.TooFewOperands
.Class
= OperandClass
;
248 Result
.TooFewOperands
.Opcode
= Opcode
;
252 operator bool() const { return Kind
!= NoNearMiss
; }
254 NearMissKind
getKind() const { return Kind
; }
256 // Feature flags required by the instruction, that the current target does
258 const FeatureBitset
& getFeatures() const {
259 assert(Kind
== NearMissFeature
);
262 // Error code returned by the target predicate when validating this
263 // instruction encoding.
264 unsigned getPredicateError() const {
265 assert(Kind
== NearMissPredicate
);
266 return PredicateError
;
268 // MatchClassKind of the operand that we expected to see.
269 unsigned getOperandClass() const {
270 assert(Kind
== NearMissOperand
|| Kind
== NearMissTooFewOperands
);
271 return MissedOperand
.Class
;
273 // Opcode of the encoding we were trying to match.
274 unsigned getOpcode() const {
275 assert(Kind
== NearMissOperand
|| Kind
== NearMissTooFewOperands
);
276 return MissedOperand
.Opcode
;
278 // Error code returned when validating the operand.
279 unsigned getOperandError() const {
280 assert(Kind
== NearMissOperand
);
281 return MissedOperand
.Error
;
283 // Index of the actual operand we were trying to match in the list of parsed
285 unsigned getOperandIndex() const {
286 assert(Kind
== NearMissOperand
);
287 return MissedOperand
.Index
;
293 // These two structs share a common prefix, so we can safely rely on the fact
294 // that they overlap in the union.
295 struct MissedOpInfo
{
302 struct TooFewOperandsInfo
{
308 FeatureBitset Features
;
309 unsigned PredicateError
;
310 MissedOpInfo MissedOperand
;
311 TooFewOperandsInfo TooFewOperands
;
314 NearMissInfo() : Kind(NoNearMiss
) {}
317 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
318 class MCTargetAsmParser
: public MCAsmParserExtension
{
321 Match_InvalidOperand
,
322 Match_InvalidTiedOperand
,
323 Match_MissingFeature
,
327 FIRST_TARGET_MATCH_RESULT_TY
330 protected: // Can only create subclasses.
331 MCTargetAsmParser(MCTargetOptions
const &, const MCSubtargetInfo
&STI
,
332 const MCInstrInfo
&MII
);
334 /// Create a copy of STI and return a non-const reference to it.
335 MCSubtargetInfo
©STI();
337 /// AvailableFeatures - The current set of available features.
338 FeatureBitset AvailableFeatures
;
340 /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
341 bool ParsingInlineAsm
= false;
343 /// SemaCallback - The Sema callback implementation. Must be set when parsing
344 /// ms-style inline assembly.
345 MCAsmParserSemaCallback
*SemaCallback
;
347 /// Set of options which affects instrumentation of inline assembly.
348 MCTargetOptions MCOptions
;
351 const MCSubtargetInfo
*STI
;
353 const MCInstrInfo
&MII
;
356 MCTargetAsmParser(const MCTargetAsmParser
&) = delete;
357 MCTargetAsmParser
&operator=(const MCTargetAsmParser
&) = delete;
359 ~MCTargetAsmParser() override
;
361 const MCSubtargetInfo
&getSTI() const;
363 const FeatureBitset
& getAvailableFeatures() const {
364 return AvailableFeatures
;
366 void setAvailableFeatures(const FeatureBitset
& Value
) {
367 AvailableFeatures
= Value
;
370 bool isParsingInlineAsm () { return ParsingInlineAsm
; }
371 void setParsingInlineAsm (bool Value
) { ParsingInlineAsm
= Value
; }
373 MCTargetOptions
getTargetOptions() const { return MCOptions
; }
375 void setSemaCallback(MCAsmParserSemaCallback
*Callback
) {
376 SemaCallback
= Callback
;
379 // Target-specific parsing of expression.
380 virtual bool parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) {
381 return getParser().parsePrimaryExpr(Res
, EndLoc
);
384 virtual bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
387 /// ParseInstruction - Parse one assembly instruction.
389 /// The parser is positioned following the instruction name. The target
390 /// specific instruction parser should parse the entire instruction and
391 /// construct the appropriate MCInst, or emit an error. On success, the entire
392 /// line should be parsed up to and including the end-of-statement token. On
393 /// failure, the parser is not required to read to the end of the line.
395 /// \param Name - The instruction name.
396 /// \param NameLoc - The source location of the name.
397 /// \param Operands [out] - The list of parsed operands, this returns
398 /// ownership of them to the caller.
399 /// \return True on failure.
400 virtual bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
401 SMLoc NameLoc
, OperandVector
&Operands
) = 0;
402 virtual bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
403 AsmToken Token
, OperandVector
&Operands
) {
404 return ParseInstruction(Info
, Name
, Token
.getLoc(), Operands
);
407 /// ParseDirective - Parse a target specific assembler directive
409 /// The parser is positioned following the directive name. The target
410 /// specific directive parser should parse the entire directive doing or
411 /// recording any target specific work, or return true and do nothing if the
412 /// directive is not target specific. If the directive is specific for
413 /// the target, the entire line is parsed up to and including the
414 /// end-of-statement token and false is returned.
416 /// \param DirectiveID - the identifier token of the directive.
417 virtual bool ParseDirective(AsmToken DirectiveID
) = 0;
419 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
420 /// instruction as an actual MCInst and emit it to the specified MCStreamer.
421 /// This returns false on success and returns true on failure to match.
423 /// On failure, the target parser is responsible for emitting a diagnostic
424 /// explaining the match failure.
425 virtual bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
426 OperandVector
&Operands
, MCStreamer
&Out
,
428 bool MatchingInlineAsm
) = 0;
430 /// Allows targets to let registers opt out of clobber lists.
431 virtual bool OmitRegisterFromClobberLists(unsigned RegNo
) { return false; }
433 /// Allow a target to add special case operand matching for things that
434 /// tblgen doesn't/can't handle effectively. For example, literal
435 /// immediates on ARM. TableGen expects a token operand, but the parser
436 /// will recognize them as immediates.
437 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand
&Op
,
439 return Match_InvalidOperand
;
442 /// Validate the instruction match against any complex target predicates
443 /// before rendering any operands to it.
445 checkEarlyTargetMatchPredicate(MCInst
&Inst
, const OperandVector
&Operands
) {
446 return Match_Success
;
449 /// checkTargetMatchPredicate - Validate the instruction match against
450 /// any complex target predicates not expressible via match classes.
451 virtual unsigned checkTargetMatchPredicate(MCInst
&Inst
) {
452 return Match_Success
;
455 virtual void convertToMapAndConstraints(unsigned Kind
,
456 const OperandVector
&Operands
) = 0;
458 /// Returns whether two registers are equal and is used by the tied-operands
459 /// checks in the AsmMatcher. This method can be overridden allow e.g. a
460 /// sub- or super-register as the tied operand.
461 virtual bool regsEqual(const MCParsedAsmOperand
&Op1
,
462 const MCParsedAsmOperand
&Op2
) const {
463 assert(Op1
.isReg() && Op2
.isReg() && "Operands not all regs");
464 return Op1
.getReg() == Op2
.getReg();
467 // Return whether this parser uses assignment statements with equals tokens
468 virtual bool equalIsAsmAssignment() { return true; };
469 // Return whether this start of statement identifier is a label
470 virtual bool isLabel(AsmToken
&Token
) { return true; };
471 // Return whether this parser accept star as start of statement
472 virtual bool starIsStartOfStatement() { return false; };
474 virtual const MCExpr
*applyModifierToExpr(const MCExpr
*E
,
475 MCSymbolRefExpr::VariantKind
,
480 // For actions that have to be performed before a label is emitted
481 virtual void doBeforeLabelEmit(MCSymbol
*Symbol
) {}
483 virtual void onLabelParsed(MCSymbol
*Symbol
) {}
485 /// Ensure that all previously parsed instructions have been emitted to the
486 /// output streamer, if the target does not emit them immediately.
487 virtual void flushPendingInstructions(MCStreamer
&Out
) {}
489 virtual const MCExpr
*createTargetUnaryExpr(const MCExpr
*E
,
490 AsmToken::TokenKind OperatorToken
,
495 // For any checks or cleanups at the end of parsing.
496 virtual void onEndOfFile() {}
499 } // end namespace llvm
501 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H