1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86IntelInstPrinter.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86MCTargetDesc.h"
14 #include "MCTargetDesc/X86TargetStreamer.h"
15 #include "TargetInfo/X86TargetInfo.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/SourceMgr.h"
39 #include "llvm/Support/raw_ostream.h"
45 static cl::opt
<bool> LVIInlineAsmHardening(
46 "x86-experimental-lvi-inline-asm-hardening",
47 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
48 " Injection (LVI). This feature is experimental."), cl::Hidden
);
50 static bool checkScale(unsigned Scale
, StringRef
&ErrMsg
) {
51 if (Scale
!= 1 && Scale
!= 2 && Scale
!= 4 && Scale
!= 8) {
52 ErrMsg
= "scale factor in address must be 1, 2, 4 or 8";
60 // Including the generated SSE2AVX compression tables.
61 #define GET_X86_SSE2AVX_TABLE
62 #include "X86GenInstrMapping.inc"
64 static const char OpPrecedence
[] = {
89 class X86AsmParser
: public MCTargetAsmParser
{
90 ParseInstructionInfo
*InstInfo
;
92 unsigned ForcedDataPrefix
= 0;
104 OpcodePrefix ForcedOpcodePrefix
= OpcodePrefix_Default
;
107 DispEncoding_Default
,
112 DispEncoding ForcedDispEncoding
= DispEncoding_Default
;
114 // Does this instruction use apx extended register?
115 bool UseApxExtendedReg
= false;
116 // Is this instruction explicitly required not to update flags?
117 bool ForcedNoFlag
= false;
120 SMLoc
consumeToken() {
121 MCAsmParser
&Parser
= getParser();
122 SMLoc Result
= Parser
.getTok().getLoc();
127 X86TargetStreamer
&getTargetStreamer() {
128 assert(getParser().getStreamer().getTargetStreamer() &&
129 "do not have a target streamer");
130 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
131 return static_cast<X86TargetStreamer
&>(TS
);
134 unsigned MatchInstruction(const OperandVector
&Operands
, MCInst
&Inst
,
135 uint64_t &ErrorInfo
, FeatureBitset
&MissingFeatures
,
136 bool matchingInlineAsm
, unsigned VariantID
= 0) {
137 // In Code16GCC mode, match as 32-bit.
139 SwitchMode(X86::Is32Bit
);
140 unsigned rv
= MatchInstructionImpl(Operands
, Inst
, ErrorInfo
,
141 MissingFeatures
, matchingInlineAsm
,
144 SwitchMode(X86::Is16Bit
);
148 enum InfixCalculatorTok
{
173 enum IntelOperatorKind
{
180 enum MasmOperatorKind
{
187 class InfixCalculator
{
188 typedef std::pair
< InfixCalculatorTok
, int64_t > ICToken
;
189 SmallVector
<InfixCalculatorTok
, 4> InfixOperatorStack
;
190 SmallVector
<ICToken
, 4> PostfixStack
;
192 bool isUnaryOperator(InfixCalculatorTok Op
) const {
193 return Op
== IC_NEG
|| Op
== IC_NOT
;
197 int64_t popOperand() {
198 assert (!PostfixStack
.empty() && "Poped an empty stack!");
199 ICToken Op
= PostfixStack
.pop_back_val();
200 if (!(Op
.first
== IC_IMM
|| Op
.first
== IC_REGISTER
))
201 return -1; // The invalid Scale value will be caught later by checkScale
204 void pushOperand(InfixCalculatorTok Op
, int64_t Val
= 0) {
205 assert ((Op
== IC_IMM
|| Op
== IC_REGISTER
) &&
206 "Unexpected operand!");
207 PostfixStack
.push_back(std::make_pair(Op
, Val
));
210 void popOperator() { InfixOperatorStack
.pop_back(); }
211 void pushOperator(InfixCalculatorTok Op
) {
212 // Push the new operator if the stack is empty.
213 if (InfixOperatorStack
.empty()) {
214 InfixOperatorStack
.push_back(Op
);
218 // Push the new operator if it has a higher precedence than the operator
219 // on the top of the stack or the operator on the top of the stack is a
221 unsigned Idx
= InfixOperatorStack
.size() - 1;
222 InfixCalculatorTok StackOp
= InfixOperatorStack
[Idx
];
223 if (OpPrecedence
[Op
] > OpPrecedence
[StackOp
] || StackOp
== IC_LPAREN
) {
224 InfixOperatorStack
.push_back(Op
);
228 // The operator on the top of the stack has higher precedence than the
230 unsigned ParenCount
= 0;
232 // Nothing to process.
233 if (InfixOperatorStack
.empty())
236 Idx
= InfixOperatorStack
.size() - 1;
237 StackOp
= InfixOperatorStack
[Idx
];
238 if (!(OpPrecedence
[StackOp
] >= OpPrecedence
[Op
] || ParenCount
))
241 // If we have an even parentheses count and we see a left parentheses,
242 // then stop processing.
243 if (!ParenCount
&& StackOp
== IC_LPAREN
)
246 if (StackOp
== IC_RPAREN
) {
248 InfixOperatorStack
.pop_back();
249 } else if (StackOp
== IC_LPAREN
) {
251 InfixOperatorStack
.pop_back();
253 InfixOperatorStack
.pop_back();
254 PostfixStack
.push_back(std::make_pair(StackOp
, 0));
257 // Push the new operator.
258 InfixOperatorStack
.push_back(Op
);
262 // Push any remaining operators onto the postfix stack.
263 while (!InfixOperatorStack
.empty()) {
264 InfixCalculatorTok StackOp
= InfixOperatorStack
.pop_back_val();
265 if (StackOp
!= IC_LPAREN
&& StackOp
!= IC_RPAREN
)
266 PostfixStack
.push_back(std::make_pair(StackOp
, 0));
269 if (PostfixStack
.empty())
272 SmallVector
<ICToken
, 16> OperandStack
;
273 for (const ICToken
&Op
: PostfixStack
) {
274 if (Op
.first
== IC_IMM
|| Op
.first
== IC_REGISTER
) {
275 OperandStack
.push_back(Op
);
276 } else if (isUnaryOperator(Op
.first
)) {
277 assert (OperandStack
.size() > 0 && "Too few operands.");
278 ICToken Operand
= OperandStack
.pop_back_val();
279 assert (Operand
.first
== IC_IMM
&&
280 "Unary operation with a register!");
283 report_fatal_error("Unexpected operator!");
286 OperandStack
.push_back(std::make_pair(IC_IMM
, -Operand
.second
));
289 OperandStack
.push_back(std::make_pair(IC_IMM
, ~Operand
.second
));
293 assert (OperandStack
.size() > 1 && "Too few operands.");
295 ICToken Op2
= OperandStack
.pop_back_val();
296 ICToken Op1
= OperandStack
.pop_back_val();
299 report_fatal_error("Unexpected operator!");
302 Val
= Op1
.second
+ Op2
.second
;
303 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
306 Val
= Op1
.second
- Op2
.second
;
307 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
310 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
311 "Multiply operation with an immediate and a register!");
312 Val
= Op1
.second
* Op2
.second
;
313 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
316 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
317 "Divide operation with an immediate and a register!");
318 assert (Op2
.second
!= 0 && "Division by zero!");
319 Val
= Op1
.second
/ Op2
.second
;
320 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
323 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
324 "Modulo operation with an immediate and a register!");
325 Val
= Op1
.second
% Op2
.second
;
326 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
329 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
330 "Or operation with an immediate and a register!");
331 Val
= Op1
.second
| Op2
.second
;
332 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
335 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
336 "Xor operation with an immediate and a register!");
337 Val
= Op1
.second
^ Op2
.second
;
338 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
341 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
342 "And operation with an immediate and a register!");
343 Val
= Op1
.second
& Op2
.second
;
344 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
347 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
348 "Left shift operation with an immediate and a register!");
349 Val
= Op1
.second
<< Op2
.second
;
350 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
353 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
354 "Right shift operation with an immediate and a register!");
355 Val
= Op1
.second
>> Op2
.second
;
356 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
359 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
360 "Equals operation with an immediate and a register!");
361 Val
= (Op1
.second
== Op2
.second
) ? -1 : 0;
362 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
365 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
366 "Not-equals operation with an immediate and a register!");
367 Val
= (Op1
.second
!= Op2
.second
) ? -1 : 0;
368 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
371 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
372 "Less-than operation with an immediate and a register!");
373 Val
= (Op1
.second
< Op2
.second
) ? -1 : 0;
374 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
377 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
378 "Less-than-or-equal operation with an immediate and a "
380 Val
= (Op1
.second
<= Op2
.second
) ? -1 : 0;
381 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
384 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
385 "Greater-than operation with an immediate and a register!");
386 Val
= (Op1
.second
> Op2
.second
) ? -1 : 0;
387 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
390 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
391 "Greater-than-or-equal operation with an immediate and a "
393 Val
= (Op1
.second
>= Op2
.second
) ? -1 : 0;
394 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
399 assert (OperandStack
.size() == 1 && "Expected a single result.");
400 return OperandStack
.pop_back_val().second
;
404 enum IntelExprState
{
434 class IntelExprStateMachine
{
435 IntelExprState State
= IES_INIT
, PrevState
= IES_ERROR
;
436 MCRegister BaseReg
, IndexReg
, TmpReg
;
439 const MCExpr
*Sym
= nullptr;
442 InlineAsmIdentifierInfo Info
;
444 bool MemExpr
= false;
445 bool BracketUsed
= false;
446 bool OffsetOperator
= false;
447 bool AttachToOperandIdx
= false;
449 SMLoc OffsetOperatorLoc
;
452 bool setSymRef(const MCExpr
*Val
, StringRef ID
, StringRef
&ErrMsg
) {
454 ErrMsg
= "cannot use more than one symbol in memory operand";
463 IntelExprStateMachine() = default;
465 void addImm(int64_t imm
) { Imm
+= imm
; }
466 short getBracCount() const { return BracCount
; }
467 bool isMemExpr() const { return MemExpr
; }
468 bool isBracketUsed() const { return BracketUsed
; }
469 bool isOffsetOperator() const { return OffsetOperator
; }
470 SMLoc
getOffsetLoc() const { return OffsetOperatorLoc
; }
471 MCRegister
getBaseReg() const { return BaseReg
; }
472 MCRegister
getIndexReg() const { return IndexReg
; }
473 unsigned getScale() const { return Scale
; }
474 const MCExpr
*getSym() const { return Sym
; }
475 StringRef
getSymName() const { return SymName
; }
476 StringRef
getType() const { return CurType
.Name
; }
477 unsigned getSize() const { return CurType
.Size
; }
478 unsigned getElementSize() const { return CurType
.ElementSize
; }
479 unsigned getLength() const { return CurType
.Length
; }
480 int64_t getImm() { return Imm
+ IC
.execute(); }
481 bool isValidEndState() const {
482 return State
== IES_RBRAC
|| State
== IES_RPAREN
||
483 State
== IES_INTEGER
|| State
== IES_REGISTER
||
487 // Is the intel expression appended after an operand index.
488 // [OperandIdx][Intel Expression]
489 // This is neccessary for checking if it is an independent
490 // intel expression at back end when parse inline asm.
491 void setAppendAfterOperand() { AttachToOperandIdx
= true; }
493 bool isPIC() const { return IsPIC
; }
494 void setPIC() { IsPIC
= true; }
496 bool hadError() const { return State
== IES_ERROR
; }
497 const InlineAsmIdentifierInfo
&getIdentifierInfo() const { return Info
; }
499 bool regsUseUpError(StringRef
&ErrMsg
) {
500 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
501 // can not intruduce additional register in inline asm in PIC model.
502 if (IsPIC
&& AttachToOperandIdx
)
503 ErrMsg
= "Don't use 2 or more regs for mem offset in PIC model!";
505 ErrMsg
= "BaseReg/IndexReg already set!";
510 IntelExprState CurrState
= State
;
519 IC
.pushOperator(IC_OR
);
522 PrevState
= CurrState
;
525 IntelExprState CurrState
= State
;
534 IC
.pushOperator(IC_XOR
);
537 PrevState
= CurrState
;
540 IntelExprState CurrState
= State
;
549 IC
.pushOperator(IC_AND
);
552 PrevState
= CurrState
;
555 IntelExprState CurrState
= State
;
564 IC
.pushOperator(IC_EQ
);
567 PrevState
= CurrState
;
570 IntelExprState CurrState
= State
;
579 IC
.pushOperator(IC_NE
);
582 PrevState
= CurrState
;
585 IntelExprState CurrState
= State
;
594 IC
.pushOperator(IC_LT
);
597 PrevState
= CurrState
;
600 IntelExprState CurrState
= State
;
609 IC
.pushOperator(IC_LE
);
612 PrevState
= CurrState
;
615 IntelExprState CurrState
= State
;
624 IC
.pushOperator(IC_GT
);
627 PrevState
= CurrState
;
630 IntelExprState CurrState
= State
;
639 IC
.pushOperator(IC_GE
);
642 PrevState
= CurrState
;
645 IntelExprState CurrState
= State
;
654 IC
.pushOperator(IC_LSHIFT
);
657 PrevState
= CurrState
;
660 IntelExprState CurrState
= State
;
669 IC
.pushOperator(IC_RSHIFT
);
672 PrevState
= CurrState
;
674 bool onPlus(StringRef
&ErrMsg
) {
675 IntelExprState CurrState
= State
;
685 IC
.pushOperator(IC_PLUS
);
686 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
687 // If we already have a BaseReg, then assume this is the IndexReg with
688 // no explicit scale.
693 return regsUseUpError(ErrMsg
);
700 PrevState
= CurrState
;
703 bool onMinus(StringRef
&ErrMsg
) {
704 IntelExprState CurrState
= State
;
734 // push minus operator if it is not a negate operator
735 if (CurrState
== IES_REGISTER
|| CurrState
== IES_RPAREN
||
736 CurrState
== IES_INTEGER
|| CurrState
== IES_RBRAC
||
737 CurrState
== IES_OFFSET
)
738 IC
.pushOperator(IC_MINUS
);
739 else if (PrevState
== IES_REGISTER
&& CurrState
== IES_MULTIPLY
) {
740 // We have negate operator for Scale: it's illegal
741 ErrMsg
= "Scale can't be negative";
744 IC
.pushOperator(IC_NEG
);
745 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
746 // If we already have a BaseReg, then assume this is the IndexReg with
747 // no explicit scale.
752 return regsUseUpError(ErrMsg
);
759 PrevState
= CurrState
;
763 IntelExprState CurrState
= State
;
789 IC
.pushOperator(IC_NOT
);
792 PrevState
= CurrState
;
794 bool onRegister(MCRegister Reg
, StringRef
&ErrMsg
) {
795 IntelExprState CurrState
= State
;
803 State
= IES_REGISTER
;
805 IC
.pushOperand(IC_REGISTER
);
808 // Index Register - Scale * Register
809 if (PrevState
== IES_INTEGER
) {
811 return regsUseUpError(ErrMsg
);
812 State
= IES_REGISTER
;
814 // Get the scale and replace the 'Scale * Register' with '0'.
815 Scale
= IC
.popOperand();
816 if (checkScale(Scale
, ErrMsg
))
818 IC
.pushOperand(IC_IMM
);
825 PrevState
= CurrState
;
828 bool onIdentifierExpr(const MCExpr
*SymRef
, StringRef SymRefName
,
829 const InlineAsmIdentifierInfo
&IDInfo
,
830 const AsmTypeInfo
&Type
, bool ParsingMSInlineAsm
,
832 // InlineAsm: Treat an enum value as an integer
833 if (ParsingMSInlineAsm
)
834 if (IDInfo
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
))
835 return onInteger(IDInfo
.Enum
.EnumVal
, ErrMsg
);
836 // Treat a symbolic constant like an integer
837 if (auto *CE
= dyn_cast
<MCConstantExpr
>(SymRef
))
838 return onInteger(CE
->getValue(), ErrMsg
);
851 if (setSymRef(SymRef
, SymRefName
, ErrMsg
))
855 IC
.pushOperand(IC_IMM
);
856 if (ParsingMSInlineAsm
)
863 bool onInteger(int64_t TmpInt
, StringRef
&ErrMsg
) {
864 IntelExprState CurrState
= State
;
890 if (PrevState
== IES_REGISTER
&& CurrState
== IES_MULTIPLY
) {
891 // Index Register - Register * Scale
893 return regsUseUpError(ErrMsg
);
896 if (checkScale(Scale
, ErrMsg
))
898 // Get the scale and replace the 'Register * Scale' with '0'.
901 IC
.pushOperand(IC_IMM
, TmpInt
);
905 PrevState
= CurrState
;
917 State
= IES_MULTIPLY
;
918 IC
.pushOperator(IC_MULTIPLY
);
931 IC
.pushOperator(IC_DIVIDE
);
944 IC
.pushOperator(IC_MOD
);
960 IC
.pushOperator(IC_PLUS
);
962 CurType
.Size
= CurType
.ElementSize
;
966 assert(!BracCount
&& "BracCount should be zero on parsing's start");
975 bool onRBrac(StringRef
&ErrMsg
) {
976 IntelExprState CurrState
= State
;
985 if (BracCount
-- != 1) {
986 ErrMsg
= "unexpected bracket encountered";
990 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
991 // If we already have a BaseReg, then assume this is the IndexReg with
992 // no explicit scale.
997 return regsUseUpError(ErrMsg
);
1004 PrevState
= CurrState
;
1008 IntelExprState CurrState
= State
;
1034 IC
.pushOperator(IC_LPAREN
);
1037 PrevState
= CurrState
;
1051 IC
.pushOperator(IC_RPAREN
);
1055 bool onOffset(const MCExpr
*Val
, SMLoc OffsetLoc
, StringRef ID
,
1056 const InlineAsmIdentifierInfo
&IDInfo
,
1057 bool ParsingMSInlineAsm
, StringRef
&ErrMsg
) {
1061 ErrMsg
= "unexpected offset operator expression";
1066 if (setSymRef(Val
, ID
, ErrMsg
))
1068 OffsetOperator
= true;
1069 OffsetOperatorLoc
= OffsetLoc
;
1071 // As we cannot yet resolve the actual value (offset), we retain
1072 // the requested semantics by pushing a '0' to the operands stack
1073 IC
.pushOperand(IC_IMM
);
1074 if (ParsingMSInlineAsm
) {
1081 void onCast(AsmTypeInfo Info
) {
1093 void setTypeInfo(AsmTypeInfo Type
) { CurType
= Type
; }
1096 bool Error(SMLoc L
, const Twine
&Msg
, SMRange Range
= std::nullopt
,
1097 bool MatchingInlineAsm
= false) {
1098 MCAsmParser
&Parser
= getParser();
1099 if (MatchingInlineAsm
) {
1100 if (!getLexer().isAtStartOfStatement())
1101 Parser
.eatToEndOfStatement();
1104 return Parser
.Error(L
, Msg
, Range
);
1107 bool MatchRegisterByName(MCRegister
&RegNo
, StringRef RegName
, SMLoc StartLoc
,
1109 bool ParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
,
1110 bool RestoreOnFailure
);
1112 std::unique_ptr
<X86Operand
> DefaultMemSIOperand(SMLoc Loc
);
1113 std::unique_ptr
<X86Operand
> DefaultMemDIOperand(SMLoc Loc
);
1114 bool IsSIReg(MCRegister Reg
);
1115 MCRegister
GetSIDIForRegClass(unsigned RegClassID
, bool IsSIReg
);
1117 AddDefaultSrcDestOperands(OperandVector
&Operands
,
1118 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Src
,
1119 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Dst
);
1120 bool VerifyAndAdjustOperands(OperandVector
&OrigOperands
,
1121 OperandVector
&FinalOperands
);
1122 bool parseOperand(OperandVector
&Operands
, StringRef Name
);
1123 bool parseATTOperand(OperandVector
&Operands
);
1124 bool parseIntelOperand(OperandVector
&Operands
, StringRef Name
);
1125 bool ParseIntelOffsetOperator(const MCExpr
*&Val
, StringRef
&ID
,
1126 InlineAsmIdentifierInfo
&Info
, SMLoc
&End
);
1127 bool ParseIntelDotOperator(IntelExprStateMachine
&SM
, SMLoc
&End
);
1128 unsigned IdentifyIntelInlineAsmOperator(StringRef Name
);
1129 unsigned ParseIntelInlineAsmOperator(unsigned OpKind
);
1130 unsigned IdentifyMasmOperator(StringRef Name
);
1131 bool ParseMasmOperator(unsigned OpKind
, int64_t &Val
);
1132 bool ParseRoundingModeOp(SMLoc Start
, OperandVector
&Operands
);
1133 bool parseCFlagsOp(OperandVector
&Operands
);
1134 bool ParseIntelNamedOperator(StringRef Name
, IntelExprStateMachine
&SM
,
1135 bool &ParseError
, SMLoc
&End
);
1136 bool ParseMasmNamedOperator(StringRef Name
, IntelExprStateMachine
&SM
,
1137 bool &ParseError
, SMLoc
&End
);
1138 void RewriteIntelExpression(IntelExprStateMachine
&SM
, SMLoc Start
,
1140 bool ParseIntelExpression(IntelExprStateMachine
&SM
, SMLoc
&End
);
1141 bool ParseIntelInlineAsmIdentifier(const MCExpr
*&Val
, StringRef
&Identifier
,
1142 InlineAsmIdentifierInfo
&Info
,
1143 bool IsUnevaluatedOperand
, SMLoc
&End
,
1144 bool IsParsingOffsetOperator
= false);
1145 void tryParseOperandIdx(AsmToken::TokenKind PrevTK
,
1146 IntelExprStateMachine
&SM
);
1148 bool ParseMemOperand(MCRegister SegReg
, const MCExpr
*Disp
, SMLoc StartLoc
,
1149 SMLoc EndLoc
, OperandVector
&Operands
);
1151 X86::CondCode
ParseConditionCode(StringRef CCode
);
1153 bool ParseIntelMemoryOperandSize(unsigned &Size
);
1154 bool CreateMemForMSInlineAsm(MCRegister SegReg
, const MCExpr
*Disp
,
1155 MCRegister BaseReg
, MCRegister IndexReg
,
1156 unsigned Scale
, bool NonAbsMem
, SMLoc Start
,
1157 SMLoc End
, unsigned Size
, StringRef Identifier
,
1158 const InlineAsmIdentifierInfo
&Info
,
1159 OperandVector
&Operands
);
1161 bool parseDirectiveArch();
1162 bool parseDirectiveNops(SMLoc L
);
1163 bool parseDirectiveEven(SMLoc L
);
1164 bool ParseDirectiveCode(StringRef IDVal
, SMLoc L
);
1166 /// CodeView FPO data directives.
1167 bool parseDirectiveFPOProc(SMLoc L
);
1168 bool parseDirectiveFPOSetFrame(SMLoc L
);
1169 bool parseDirectiveFPOPushReg(SMLoc L
);
1170 bool parseDirectiveFPOStackAlloc(SMLoc L
);
1171 bool parseDirectiveFPOStackAlign(SMLoc L
);
1172 bool parseDirectiveFPOEndPrologue(SMLoc L
);
1173 bool parseDirectiveFPOEndProc(SMLoc L
);
1176 bool parseSEHRegisterNumber(unsigned RegClassID
, MCRegister
&RegNo
);
1177 bool parseDirectiveSEHPushReg(SMLoc
);
1178 bool parseDirectiveSEHSetFrame(SMLoc
);
1179 bool parseDirectiveSEHSaveReg(SMLoc
);
1180 bool parseDirectiveSEHSaveXMM(SMLoc
);
1181 bool parseDirectiveSEHPushFrame(SMLoc
);
1183 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1185 bool validateInstruction(MCInst
&Inst
, const OperandVector
&Ops
);
1186 bool processInstruction(MCInst
&Inst
, const OperandVector
&Ops
);
1188 // Load Value Injection (LVI) Mitigations for machine code
1189 void emitWarningForSpecialLVIInstruction(SMLoc Loc
);
1190 void applyLVICFIMitigation(MCInst
&Inst
, MCStreamer
&Out
);
1191 void applyLVILoadHardeningMitigation(MCInst
&Inst
, MCStreamer
&Out
);
1193 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1194 /// instrumentation around Inst.
1195 void emitInstruction(MCInst
&Inst
, OperandVector
&Operands
, MCStreamer
&Out
);
1197 bool matchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1198 OperandVector
&Operands
, MCStreamer
&Out
,
1199 uint64_t &ErrorInfo
,
1200 bool MatchingInlineAsm
) override
;
1202 void MatchFPUWaitAlias(SMLoc IDLoc
, X86Operand
&Op
, OperandVector
&Operands
,
1203 MCStreamer
&Out
, bool MatchingInlineAsm
);
1205 bool ErrorMissingFeature(SMLoc IDLoc
, const FeatureBitset
&MissingFeatures
,
1206 bool MatchingInlineAsm
);
1208 bool matchAndEmitATTInstruction(SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
,
1209 OperandVector
&Operands
, MCStreamer
&Out
,
1210 uint64_t &ErrorInfo
, bool MatchingInlineAsm
);
1212 bool matchAndEmitIntelInstruction(SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
,
1213 OperandVector
&Operands
, MCStreamer
&Out
,
1214 uint64_t &ErrorInfo
,
1215 bool MatchingInlineAsm
);
1217 bool omitRegisterFromClobberLists(MCRegister Reg
) override
;
1219 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1220 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1221 /// return false if no parsing errors occurred, true otherwise.
1222 bool HandleAVX512Operand(OperandVector
&Operands
);
1224 bool ParseZ(std::unique_ptr
<X86Operand
> &Z
, const SMLoc
&StartLoc
);
1226 bool is64BitMode() const {
1227 // FIXME: Can tablegen auto-generate this?
1228 return getSTI().hasFeature(X86::Is64Bit
);
1230 bool is32BitMode() const {
1231 // FIXME: Can tablegen auto-generate this?
1232 return getSTI().hasFeature(X86::Is32Bit
);
1234 bool is16BitMode() const {
1235 // FIXME: Can tablegen auto-generate this?
1236 return getSTI().hasFeature(X86::Is16Bit
);
1238 void SwitchMode(unsigned mode
) {
1239 MCSubtargetInfo
&STI
= copySTI();
1240 FeatureBitset
AllModes({X86::Is64Bit
, X86::Is32Bit
, X86::Is16Bit
});
1241 FeatureBitset OldMode
= STI
.getFeatureBits() & AllModes
;
1242 FeatureBitset FB
= ComputeAvailableFeatures(
1243 STI
.ToggleFeature(OldMode
.flip(mode
)));
1244 setAvailableFeatures(FB
);
1246 assert(FeatureBitset({mode
}) == (STI
.getFeatureBits() & AllModes
));
1249 unsigned getPointerWidth() {
1250 if (is16BitMode()) return 16;
1251 if (is32BitMode()) return 32;
1252 if (is64BitMode()) return 64;
1253 llvm_unreachable("invalid mode");
1256 bool isParsingIntelSyntax() {
1257 return getParser().getAssemblerDialect();
1260 /// @name Auto-generated Matcher Functions
1263 #define GET_ASSEMBLER_HEADER
1264 #include "X86GenAsmMatcher.inc"
1269 enum X86MatchResultTy
{
1270 Match_Unsupported
= FIRST_TARGET_MATCH_RESULT_TY
,
1271 #define GET_OPERAND_DIAGNOSTIC_TYPES
1272 #include "X86GenAsmMatcher.inc"
1275 X86AsmParser(const MCSubtargetInfo
&sti
, MCAsmParser
&Parser
,
1276 const MCInstrInfo
&mii
, const MCTargetOptions
&Options
)
1277 : MCTargetAsmParser(Options
, sti
, mii
), InstInfo(nullptr),
1280 Parser
.addAliasForDirective(".word", ".2byte");
1282 // Initialize the set of available features.
1283 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1286 bool parseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
, SMLoc
&EndLoc
) override
;
1287 ParseStatus
tryParseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
,
1288 SMLoc
&EndLoc
) override
;
1290 bool parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) override
;
1292 bool parseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1293 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1295 bool ParseDirective(AsmToken DirectiveID
) override
;
1297 } // end anonymous namespace
1299 #define GET_REGISTER_MATCHER
1300 #define GET_SUBTARGET_FEATURE_NAME
1301 #include "X86GenAsmMatcher.inc"
1303 static bool CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg
,
1304 MCRegister IndexReg
, unsigned Scale
,
1306 StringRef
&ErrMsg
) {
1307 // If we have both a base register and an index register make sure they are
1308 // both 64-bit or 32-bit registers.
1309 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1312 !(BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
||
1313 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) ||
1314 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(BaseReg
) ||
1315 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
))) {
1316 ErrMsg
= "invalid base+index expression";
1321 !(IndexReg
== X86::EIZ
|| IndexReg
== X86::RIZ
||
1322 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1323 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1324 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
) ||
1325 X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(IndexReg
) ||
1326 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(IndexReg
) ||
1327 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(IndexReg
))) {
1328 ErrMsg
= "invalid base+index expression";
1332 if (((BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
) && IndexReg
) ||
1333 IndexReg
== X86::EIP
|| IndexReg
== X86::RIP
|| IndexReg
== X86::ESP
||
1334 IndexReg
== X86::RSP
) {
1335 ErrMsg
= "invalid base+index expression";
1339 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1340 // and then only in non-64-bit modes.
1341 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) &&
1342 (Is64BitMode
|| (BaseReg
!= X86::BX
&& BaseReg
!= X86::BP
&&
1343 BaseReg
!= X86::SI
&& BaseReg
!= X86::DI
))) {
1344 ErrMsg
= "invalid 16-bit base register";
1349 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
)) {
1350 ErrMsg
= "16-bit memory operand may not include only index register";
1354 if (BaseReg
&& IndexReg
) {
1355 if (X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
) &&
1356 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1357 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1358 IndexReg
== X86::EIZ
)) {
1359 ErrMsg
= "base register is 64-bit, but index register is not";
1362 if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(BaseReg
) &&
1363 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1364 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
) ||
1365 IndexReg
== X86::RIZ
)) {
1366 ErrMsg
= "base register is 32-bit, but index register is not";
1369 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
)) {
1370 if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1371 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
)) {
1372 ErrMsg
= "base register is 16-bit, but index register is not";
1375 if ((BaseReg
!= X86::BX
&& BaseReg
!= X86::BP
) ||
1376 (IndexReg
!= X86::SI
&& IndexReg
!= X86::DI
)) {
1377 ErrMsg
= "invalid 16-bit base/index register combination";
1383 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1384 if (!Is64BitMode
&& (BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
)) {
1385 ErrMsg
= "IP-relative addressing requires 64-bit mode";
1389 return checkScale(Scale
, ErrMsg
);
1392 bool X86AsmParser::MatchRegisterByName(MCRegister
&RegNo
, StringRef RegName
,
1393 SMLoc StartLoc
, SMLoc EndLoc
) {
1394 // If we encounter a %, ignore it. This code handles registers with and
1395 // without the prefix, unprefixed registers can occur in cfi directives.
1396 RegName
.consume_front("%");
1398 RegNo
= MatchRegisterName(RegName
);
1400 // If the match failed, try the register name as lowercase.
1402 RegNo
= MatchRegisterName(RegName
.lower());
1404 // The "flags" and "mxcsr" registers cannot be referenced directly.
1405 // Treat it as an identifier instead.
1406 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1407 (RegNo
== X86::EFLAGS
|| RegNo
== X86::MXCSR
))
1408 RegNo
= MCRegister();
1410 if (!is64BitMode()) {
1411 // FIXME: This should be done using Requires<Not64BitMode> and
1412 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1414 if (RegNo
== X86::RIZ
|| RegNo
== X86::RIP
||
1415 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(RegNo
) ||
1416 X86II::isX86_64NonExtLowByteReg(RegNo
) ||
1417 X86II::isX86_64ExtendedReg(RegNo
)) {
1418 return Error(StartLoc
,
1419 "register %" + RegName
+ " is only available in 64-bit mode",
1420 SMRange(StartLoc
, EndLoc
));
1424 if (X86II::isApxExtendedReg(RegNo
))
1425 UseApxExtendedReg
= true;
1427 // If this is "db[0-15]", match it as an alias
1429 if (!RegNo
&& RegName
.starts_with("db")) {
1430 if (RegName
.size() == 3) {
1431 switch (RegName
[2]) {
1463 } else if (RegName
.size() == 4 && RegName
[2] == '1') {
1464 switch (RegName
[3]) {
1488 if (isParsingIntelSyntax())
1490 return Error(StartLoc
, "invalid register name", SMRange(StartLoc
, EndLoc
));
1495 bool X86AsmParser::ParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
,
1496 SMLoc
&EndLoc
, bool RestoreOnFailure
) {
1497 MCAsmParser
&Parser
= getParser();
1498 MCAsmLexer
&Lexer
= getLexer();
1499 RegNo
= MCRegister();
1501 SmallVector
<AsmToken
, 5> Tokens
;
1502 auto OnFailure
= [RestoreOnFailure
, &Lexer
, &Tokens
]() {
1503 if (RestoreOnFailure
) {
1504 while (!Tokens
.empty()) {
1505 Lexer
.UnLex(Tokens
.pop_back_val());
1510 const AsmToken
&PercentTok
= Parser
.getTok();
1511 StartLoc
= PercentTok
.getLoc();
1513 // If we encounter a %, ignore it. This code handles registers with and
1514 // without the prefix, unprefixed registers can occur in cfi directives.
1515 if (!isParsingIntelSyntax() && PercentTok
.is(AsmToken::Percent
)) {
1516 Tokens
.push_back(PercentTok
);
1517 Parser
.Lex(); // Eat percent token.
1520 const AsmToken
&Tok
= Parser
.getTok();
1521 EndLoc
= Tok
.getEndLoc();
1523 if (Tok
.isNot(AsmToken::Identifier
)) {
1525 if (isParsingIntelSyntax()) return true;
1526 return Error(StartLoc
, "invalid register name",
1527 SMRange(StartLoc
, EndLoc
));
1530 if (MatchRegisterByName(RegNo
, Tok
.getString(), StartLoc
, EndLoc
)) {
1535 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1536 if (RegNo
== X86::ST0
) {
1537 Tokens
.push_back(Tok
);
1538 Parser
.Lex(); // Eat 'st'
1540 // Check to see if we have '(4)' after %st.
1541 if (Lexer
.isNot(AsmToken::LParen
))
1544 Tokens
.push_back(Parser
.getTok());
1547 const AsmToken
&IntTok
= Parser
.getTok();
1548 if (IntTok
.isNot(AsmToken::Integer
)) {
1550 return Error(IntTok
.getLoc(), "expected stack index");
1552 switch (IntTok
.getIntVal()) {
1553 case 0: RegNo
= X86::ST0
; break;
1554 case 1: RegNo
= X86::ST1
; break;
1555 case 2: RegNo
= X86::ST2
; break;
1556 case 3: RegNo
= X86::ST3
; break;
1557 case 4: RegNo
= X86::ST4
; break;
1558 case 5: RegNo
= X86::ST5
; break;
1559 case 6: RegNo
= X86::ST6
; break;
1560 case 7: RegNo
= X86::ST7
; break;
1563 return Error(IntTok
.getLoc(), "invalid stack index");
1567 Tokens
.push_back(IntTok
);
1569 if (Lexer
.isNot(AsmToken::RParen
)) {
1571 return Error(Parser
.getTok().getLoc(), "expected ')'");
1574 EndLoc
= Parser
.getTok().getEndLoc();
1575 Parser
.Lex(); // Eat ')'
1579 EndLoc
= Parser
.getTok().getEndLoc();
1583 if (isParsingIntelSyntax()) return true;
1584 return Error(StartLoc
, "invalid register name",
1585 SMRange(StartLoc
, EndLoc
));
1588 Parser
.Lex(); // Eat identifier token.
1592 bool X86AsmParser::parseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
,
1594 return ParseRegister(Reg
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/false);
1597 ParseStatus
X86AsmParser::tryParseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
,
1599 bool Result
= ParseRegister(Reg
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/true);
1600 bool PendingErrors
= getParser().hasPendingError();
1601 getParser().clearPendingErrors();
1603 return ParseStatus::Failure
;
1605 return ParseStatus::NoMatch
;
1606 return ParseStatus::Success
;
1609 std::unique_ptr
<X86Operand
> X86AsmParser::DefaultMemSIOperand(SMLoc Loc
) {
1610 bool Parse32
= is32BitMode() || Code16GCC
;
1611 MCRegister Basereg
=
1612 is64BitMode() ? X86::RSI
: (Parse32
? X86::ESI
: X86::SI
);
1613 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
1614 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
1615 /*BaseReg=*/Basereg
, /*IndexReg=*/0, /*Scale=*/1,
1619 std::unique_ptr
<X86Operand
> X86AsmParser::DefaultMemDIOperand(SMLoc Loc
) {
1620 bool Parse32
= is32BitMode() || Code16GCC
;
1621 MCRegister Basereg
=
1622 is64BitMode() ? X86::RDI
: (Parse32
? X86::EDI
: X86::DI
);
1623 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
1624 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
1625 /*BaseReg=*/Basereg
, /*IndexReg=*/0, /*Scale=*/1,
1629 bool X86AsmParser::IsSIReg(MCRegister Reg
) {
1631 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1643 MCRegister
X86AsmParser::GetSIDIForRegClass(unsigned RegClassID
, bool IsSIReg
) {
1644 switch (RegClassID
) {
1645 default: llvm_unreachable("Unexpected register class");
1646 case X86::GR64RegClassID
:
1647 return IsSIReg
? X86::RSI
: X86::RDI
;
1648 case X86::GR32RegClassID
:
1649 return IsSIReg
? X86::ESI
: X86::EDI
;
1650 case X86::GR16RegClassID
:
1651 return IsSIReg
? X86::SI
: X86::DI
;
1655 void X86AsmParser::AddDefaultSrcDestOperands(
1656 OperandVector
& Operands
, std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Src
,
1657 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Dst
) {
1658 if (isParsingIntelSyntax()) {
1659 Operands
.push_back(std::move(Dst
));
1660 Operands
.push_back(std::move(Src
));
1663 Operands
.push_back(std::move(Src
));
1664 Operands
.push_back(std::move(Dst
));
1668 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector
&OrigOperands
,
1669 OperandVector
&FinalOperands
) {
1671 if (OrigOperands
.size() > 1) {
1672 // Check if sizes match, OrigOperands also contains the instruction name
1673 assert(OrigOperands
.size() == FinalOperands
.size() + 1 &&
1674 "Operand size mismatch");
1676 SmallVector
<std::pair
<SMLoc
, std::string
>, 2> Warnings
;
1677 // Verify types match
1678 int RegClassID
= -1;
1679 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
) {
1680 X86Operand
&OrigOp
= static_cast<X86Operand
&>(*OrigOperands
[i
+ 1]);
1681 X86Operand
&FinalOp
= static_cast<X86Operand
&>(*FinalOperands
[i
]);
1683 if (FinalOp
.isReg() &&
1684 (!OrigOp
.isReg() || FinalOp
.getReg() != OrigOp
.getReg()))
1685 // Return false and let a normal complaint about bogus operands happen
1688 if (FinalOp
.isMem()) {
1690 if (!OrigOp
.isMem())
1691 // Return false and let a normal complaint about bogus operands happen
1694 MCRegister OrigReg
= OrigOp
.Mem
.BaseReg
;
1695 MCRegister FinalReg
= FinalOp
.Mem
.BaseReg
;
1697 // If we've already encounterd a register class, make sure all register
1698 // bases are of the same register class
1699 if (RegClassID
!= -1 &&
1700 !X86MCRegisterClasses
[RegClassID
].contains(OrigReg
)) {
1701 return Error(OrigOp
.getStartLoc(),
1702 "mismatching source and destination index registers");
1705 if (X86MCRegisterClasses
[X86::GR64RegClassID
].contains(OrigReg
))
1706 RegClassID
= X86::GR64RegClassID
;
1707 else if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(OrigReg
))
1708 RegClassID
= X86::GR32RegClassID
;
1709 else if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(OrigReg
))
1710 RegClassID
= X86::GR16RegClassID
;
1712 // Unexpected register class type
1713 // Return false and let a normal complaint about bogus operands happen
1716 bool IsSI
= IsSIReg(FinalReg
);
1717 FinalReg
= GetSIDIForRegClass(RegClassID
, IsSI
);
1719 if (FinalReg
!= OrigReg
) {
1720 std::string RegName
= IsSI
? "ES:(R|E)SI" : "ES:(R|E)DI";
1721 Warnings
.push_back(std::make_pair(
1722 OrigOp
.getStartLoc(),
1723 "memory operand is only for determining the size, " + RegName
+
1724 " will be used for the location"));
1727 FinalOp
.Mem
.Size
= OrigOp
.Mem
.Size
;
1728 FinalOp
.Mem
.SegReg
= OrigOp
.Mem
.SegReg
;
1729 FinalOp
.Mem
.BaseReg
= FinalReg
;
1733 // Produce warnings only if all the operands passed the adjustment - prevent
1734 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1735 for (auto &WarningMsg
: Warnings
) {
1736 Warning(WarningMsg
.first
, WarningMsg
.second
);
1739 // Remove old operands
1740 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
)
1741 OrigOperands
.pop_back();
1743 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1744 for (auto &Op
: FinalOperands
)
1745 OrigOperands
.push_back(std::move(Op
));
1750 bool X86AsmParser::parseOperand(OperandVector
&Operands
, StringRef Name
) {
1751 if (isParsingIntelSyntax())
1752 return parseIntelOperand(Operands
, Name
);
1754 return parseATTOperand(Operands
);
1757 bool X86AsmParser::CreateMemForMSInlineAsm(
1758 MCRegister SegReg
, const MCExpr
*Disp
, MCRegister BaseReg
,
1759 MCRegister IndexReg
, unsigned Scale
, bool NonAbsMem
, SMLoc Start
, SMLoc End
,
1760 unsigned Size
, StringRef Identifier
, const InlineAsmIdentifierInfo
&Info
,
1761 OperandVector
&Operands
) {
1762 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1763 // some other label reference.
1764 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Label
)) {
1765 // Create an absolute memory reference in order to match against
1766 // instructions taking a PC relative operand.
1767 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), Disp
, Start
,
1768 End
, Size
, Identifier
,
1772 // We either have a direct symbol reference, or an offset from a symbol. The
1773 // parser always puts the symbol on the LHS, so look there for size
1774 // calculation purposes.
1775 unsigned FrontendSize
= 0;
1776 void *Decl
= nullptr;
1777 bool IsGlobalLV
= false;
1778 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
1779 // Size is in terms of bits in this context.
1780 FrontendSize
= Info
.Var
.Type
* 8;
1781 Decl
= Info
.Var
.Decl
;
1782 IsGlobalLV
= Info
.Var
.IsGlobalLV
;
1784 // It is widely common for MS InlineAsm to use a global variable and one/two
1785 // registers in a mmory expression, and though unaccessible via rip/eip.
1787 if (BaseReg
|| IndexReg
) {
1788 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), Disp
, Start
,
1789 End
, Size
, Identifier
, Decl
, 0,
1790 BaseReg
&& IndexReg
));
1794 BaseReg
= 1; // Make isAbsMem() false
1796 Operands
.push_back(X86Operand::CreateMem(
1797 getPointerWidth(), SegReg
, Disp
, BaseReg
, IndexReg
, Scale
, Start
, End
,
1799 /*DefaultBaseReg=*/X86::RIP
, Identifier
, Decl
, FrontendSize
));
1803 // Some binary bitwise operators have a named synonymous
1804 // Query a candidate string for being such a named operator
1805 // and if so - invoke the appropriate handler
1806 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name
,
1807 IntelExprStateMachine
&SM
,
1808 bool &ParseError
, SMLoc
&End
) {
1809 // A named operator should be either lower or upper case, but not a mix...
1810 // except in MASM, which uses full case-insensitivity.
1811 if (Name
!= Name
.lower() && Name
!= Name
.upper() &&
1812 !getParser().isParsingMasm())
1814 if (Name
.equals_insensitive("not")) {
1816 } else if (Name
.equals_insensitive("or")) {
1818 } else if (Name
.equals_insensitive("shl")) {
1820 } else if (Name
.equals_insensitive("shr")) {
1822 } else if (Name
.equals_insensitive("xor")) {
1824 } else if (Name
.equals_insensitive("and")) {
1826 } else if (Name
.equals_insensitive("mod")) {
1828 } else if (Name
.equals_insensitive("offset")) {
1829 SMLoc OffsetLoc
= getTok().getLoc();
1830 const MCExpr
*Val
= nullptr;
1832 InlineAsmIdentifierInfo Info
;
1833 ParseError
= ParseIntelOffsetOperator(Val
, ID
, Info
, End
);
1838 SM
.onOffset(Val
, OffsetLoc
, ID
, Info
, isParsingMSInlineAsm(), ErrMsg
);
1840 return Error(SMLoc::getFromPointer(Name
.data()), ErrMsg
);
1844 if (!Name
.equals_insensitive("offset"))
1845 End
= consumeToken();
1848 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name
,
1849 IntelExprStateMachine
&SM
,
1850 bool &ParseError
, SMLoc
&End
) {
1851 if (Name
.equals_insensitive("eq")) {
1853 } else if (Name
.equals_insensitive("ne")) {
1855 } else if (Name
.equals_insensitive("lt")) {
1857 } else if (Name
.equals_insensitive("le")) {
1859 } else if (Name
.equals_insensitive("gt")) {
1861 } else if (Name
.equals_insensitive("ge")) {
1866 End
= consumeToken();
1870 // Check if current intel expression append after an operand.
1871 // Like: [Operand][Intel Expression]
1872 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK
,
1873 IntelExprStateMachine
&SM
) {
1874 if (PrevTK
!= AsmToken::RBrac
)
1877 SM
.setAppendAfterOperand();
1880 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine
&SM
, SMLoc
&End
) {
1881 MCAsmParser
&Parser
= getParser();
1884 AsmToken::TokenKind PrevTK
= AsmToken::Error
;
1886 if (getContext().getObjectFileInfo()->isPositionIndependent())
1891 // Get a fresh reference on each loop iteration in case the previous
1892 // iteration moved the token storage during UnLex().
1893 const AsmToken
&Tok
= Parser
.getTok();
1895 bool UpdateLocLex
= true;
1896 AsmToken::TokenKind TK
= getLexer().getKind();
1900 if ((Done
= SM
.isValidEndState()))
1902 return Error(Tok
.getLoc(), "unknown token in expression");
1903 case AsmToken::Error
:
1904 return Error(getLexer().getErrLoc(), getLexer().getErr());
1906 case AsmToken::Real
:
1907 // DotOperator: [ebx].0
1908 UpdateLocLex
= false;
1909 if (ParseIntelDotOperator(SM
, End
))
1913 if (!Parser
.isParsingMasm()) {
1914 if ((Done
= SM
.isValidEndState()))
1916 return Error(Tok
.getLoc(), "unknown token in expression");
1918 // MASM allows spaces around the dot operator (e.g., "var . x")
1920 UpdateLocLex
= false;
1921 if (ParseIntelDotOperator(SM
, End
))
1924 case AsmToken::Dollar
:
1925 if (!Parser
.isParsingMasm()) {
1926 if ((Done
= SM
.isValidEndState()))
1928 return Error(Tok
.getLoc(), "unknown token in expression");
1931 case AsmToken::String
: {
1932 if (Parser
.isParsingMasm()) {
1933 // MASM parsers handle strings in expressions as constants.
1934 SMLoc ValueLoc
= Tok
.getLoc();
1937 if (Parser
.parsePrimaryExpr(Val
, End
, nullptr))
1939 UpdateLocLex
= false;
1940 if (!Val
->evaluateAsAbsolute(Res
, getStreamer().getAssemblerPtr()))
1941 return Error(ValueLoc
, "expected absolute value");
1942 if (SM
.onInteger(Res
, ErrMsg
))
1943 return Error(ValueLoc
, ErrMsg
);
1949 case AsmToken::Identifier
: {
1950 SMLoc IdentLoc
= Tok
.getLoc();
1951 StringRef Identifier
= Tok
.getString();
1952 UpdateLocLex
= false;
1953 if (Parser
.isParsingMasm()) {
1954 size_t DotOffset
= Identifier
.find_first_of('.');
1955 if (DotOffset
!= StringRef::npos
) {
1957 StringRef LHS
= Identifier
.slice(0, DotOffset
);
1958 StringRef Dot
= Identifier
.substr(DotOffset
, 1);
1959 StringRef RHS
= Identifier
.substr(DotOffset
+ 1);
1961 getLexer().UnLex(AsmToken(AsmToken::Identifier
, RHS
));
1963 getLexer().UnLex(AsmToken(AsmToken::Dot
, Dot
));
1965 getLexer().UnLex(AsmToken(AsmToken::Identifier
, LHS
));
1970 // (MASM only) <TYPE> PTR operator
1971 if (Parser
.isParsingMasm()) {
1972 const AsmToken
&NextTok
= getLexer().peekTok();
1973 if (NextTok
.is(AsmToken::Identifier
) &&
1974 NextTok
.getIdentifier().equals_insensitive("ptr")) {
1976 if (Parser
.lookUpType(Identifier
, Info
))
1977 return Error(Tok
.getLoc(), "unknown type");
1979 // Eat type and PTR.
1981 End
= consumeToken();
1985 // Register, or (MASM only) <register>.<field>
1987 if (Tok
.is(AsmToken::Identifier
)) {
1988 if (!ParseRegister(Reg
, IdentLoc
, End
, /*RestoreOnFailure=*/true)) {
1989 if (SM
.onRegister(Reg
, ErrMsg
))
1990 return Error(IdentLoc
, ErrMsg
);
1993 if (Parser
.isParsingMasm()) {
1994 const std::pair
<StringRef
, StringRef
> IDField
=
1995 Tok
.getString().split('.');
1996 const StringRef ID
= IDField
.first
, Field
= IDField
.second
;
1997 SMLoc IDEndLoc
= SMLoc::getFromPointer(ID
.data() + ID
.size());
1998 if (!Field
.empty() &&
1999 !MatchRegisterByName(Reg
, ID
, IdentLoc
, IDEndLoc
)) {
2000 if (SM
.onRegister(Reg
, ErrMsg
))
2001 return Error(IdentLoc
, ErrMsg
);
2004 SMLoc FieldStartLoc
= SMLoc::getFromPointer(Field
.data());
2005 if (Parser
.lookUpField(Field
, Info
))
2006 return Error(FieldStartLoc
, "unknown offset");
2007 else if (SM
.onPlus(ErrMsg
))
2008 return Error(getTok().getLoc(), ErrMsg
);
2009 else if (SM
.onInteger(Info
.Offset
, ErrMsg
))
2010 return Error(IdentLoc
, ErrMsg
);
2011 SM
.setTypeInfo(Info
.Type
);
2013 End
= consumeToken();
2018 // Operator synonymous ("not", "or" etc.)
2019 bool ParseError
= false;
2020 if (ParseIntelNamedOperator(Identifier
, SM
, ParseError
, End
)) {
2025 if (Parser
.isParsingMasm() &&
2026 ParseMasmNamedOperator(Identifier
, SM
, ParseError
, End
)) {
2031 // Symbol reference, when parsing assembly content
2032 InlineAsmIdentifierInfo Info
;
2033 AsmFieldInfo FieldInfo
;
2035 if (isParsingMSInlineAsm() || Parser
.isParsingMasm()) {
2036 // MS Dot Operator expression
2037 if (Identifier
.count('.') &&
2038 (PrevTK
== AsmToken::RBrac
|| PrevTK
== AsmToken::RParen
)) {
2039 if (ParseIntelDotOperator(SM
, End
))
2044 if (isParsingMSInlineAsm()) {
2045 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2046 if (unsigned OpKind
= IdentifyIntelInlineAsmOperator(Identifier
)) {
2047 if (int64_t Val
= ParseIntelInlineAsmOperator(OpKind
)) {
2048 if (SM
.onInteger(Val
, ErrMsg
))
2049 return Error(IdentLoc
, ErrMsg
);
2055 // MS InlineAsm identifier
2056 // Call parseIdentifier() to combine @ with the identifier behind it.
2057 if (TK
== AsmToken::At
&& Parser
.parseIdentifier(Identifier
))
2058 return Error(IdentLoc
, "expected identifier");
2059 if (ParseIntelInlineAsmIdentifier(Val
, Identifier
, Info
, false, End
))
2061 else if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, FieldInfo
.Type
,
2063 return Error(IdentLoc
, ErrMsg
);
2066 if (Parser
.isParsingMasm()) {
2067 if (unsigned OpKind
= IdentifyMasmOperator(Identifier
)) {
2069 if (ParseMasmOperator(OpKind
, Val
))
2071 if (SM
.onInteger(Val
, ErrMsg
))
2072 return Error(IdentLoc
, ErrMsg
);
2075 if (!getParser().lookUpType(Identifier
, FieldInfo
.Type
)) {
2076 // Field offset immediate; <TYPE>.<field specification>
2078 bool EndDot
= parseOptionalToken(AsmToken::Dot
);
2079 while (EndDot
|| (getTok().is(AsmToken::Identifier
) &&
2080 getTok().getString().starts_with("."))) {
2081 getParser().parseIdentifier(Identifier
);
2083 Identifier
.consume_front(".");
2084 EndDot
= Identifier
.consume_back(".");
2085 if (getParser().lookUpField(FieldInfo
.Type
.Name
, Identifier
,
2088 SMLoc::getFromPointer(Identifier
.data() + Identifier
.size());
2089 return Error(IdentLoc
, "Unable to lookup field reference!",
2090 SMRange(IdentLoc
, IDEnd
));
2093 EndDot
= parseOptionalToken(AsmToken::Dot
);
2095 if (SM
.onInteger(FieldInfo
.Offset
, ErrMsg
))
2096 return Error(IdentLoc
, ErrMsg
);
2100 if (getParser().parsePrimaryExpr(Val
, End
, &FieldInfo
.Type
)) {
2101 return Error(Tok
.getLoc(), "Unexpected identifier!");
2102 } else if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, FieldInfo
.Type
,
2104 return Error(IdentLoc
, ErrMsg
);
2108 case AsmToken::Integer
: {
2109 // Look for 'b' or 'f' following an Integer as a directional label
2110 SMLoc Loc
= getTok().getLoc();
2111 int64_t IntVal
= getTok().getIntVal();
2112 End
= consumeToken();
2113 UpdateLocLex
= false;
2114 if (getLexer().getKind() == AsmToken::Identifier
) {
2115 StringRef IDVal
= getTok().getString();
2116 if (IDVal
== "f" || IDVal
== "b") {
2118 getContext().getDirectionalLocalSymbol(IntVal
, IDVal
== "b");
2119 MCSymbolRefExpr::VariantKind Variant
= MCSymbolRefExpr::VK_None
;
2121 MCSymbolRefExpr::create(Sym
, Variant
, getContext());
2122 if (IDVal
== "b" && Sym
->isUndefined())
2123 return Error(Loc
, "invalid reference to undefined symbol");
2124 StringRef Identifier
= Sym
->getName();
2125 InlineAsmIdentifierInfo Info
;
2127 if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, Type
,
2128 isParsingMSInlineAsm(), ErrMsg
))
2129 return Error(Loc
, ErrMsg
);
2130 End
= consumeToken();
2132 if (SM
.onInteger(IntVal
, ErrMsg
))
2133 return Error(Loc
, ErrMsg
);
2136 if (SM
.onInteger(IntVal
, ErrMsg
))
2137 return Error(Loc
, ErrMsg
);
2141 case AsmToken::Plus
:
2142 if (SM
.onPlus(ErrMsg
))
2143 return Error(getTok().getLoc(), ErrMsg
);
2145 case AsmToken::Minus
:
2146 if (SM
.onMinus(ErrMsg
))
2147 return Error(getTok().getLoc(), ErrMsg
);
2149 case AsmToken::Tilde
: SM
.onNot(); break;
2150 case AsmToken::Star
: SM
.onStar(); break;
2151 case AsmToken::Slash
: SM
.onDivide(); break;
2152 case AsmToken::Percent
: SM
.onMod(); break;
2153 case AsmToken::Pipe
: SM
.onOr(); break;
2154 case AsmToken::Caret
: SM
.onXor(); break;
2155 case AsmToken::Amp
: SM
.onAnd(); break;
2156 case AsmToken::LessLess
:
2157 SM
.onLShift(); break;
2158 case AsmToken::GreaterGreater
:
2159 SM
.onRShift(); break;
2160 case AsmToken::LBrac
:
2162 return Error(Tok
.getLoc(), "unexpected bracket encountered");
2163 tryParseOperandIdx(PrevTK
, SM
);
2165 case AsmToken::RBrac
:
2166 if (SM
.onRBrac(ErrMsg
)) {
2167 return Error(Tok
.getLoc(), ErrMsg
);
2170 case AsmToken::LParen
: SM
.onLParen(); break;
2171 case AsmToken::RParen
: SM
.onRParen(); break;
2174 return Error(Tok
.getLoc(), "unknown token in expression");
2176 if (!Done
&& UpdateLocLex
)
2177 End
= consumeToken();
2184 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine
&SM
,
2185 SMLoc Start
, SMLoc End
) {
2187 unsigned ExprLen
= End
.getPointer() - Start
.getPointer();
2188 // Skip everything before a symbol displacement (if we have one)
2189 if (SM
.getSym() && !SM
.isOffsetOperator()) {
2190 StringRef SymName
= SM
.getSymName();
2191 if (unsigned Len
= SymName
.data() - Start
.getPointer())
2192 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, Start
, Len
);
2193 Loc
= SMLoc::getFromPointer(SymName
.data() + SymName
.size());
2194 ExprLen
= End
.getPointer() - (SymName
.data() + SymName
.size());
2195 // If we have only a symbol than there's no need for complex rewrite,
2196 // simply skip everything after it
2197 if (!(SM
.getBaseReg() || SM
.getIndexReg() || SM
.getImm())) {
2199 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, Loc
, ExprLen
);
2203 // Build an Intel Expression rewrite
2204 StringRef BaseRegStr
;
2205 StringRef IndexRegStr
;
2206 StringRef OffsetNameStr
;
2207 if (SM
.getBaseReg())
2208 BaseRegStr
= X86IntelInstPrinter::getRegisterName(SM
.getBaseReg());
2209 if (SM
.getIndexReg())
2210 IndexRegStr
= X86IntelInstPrinter::getRegisterName(SM
.getIndexReg());
2211 if (SM
.isOffsetOperator())
2212 OffsetNameStr
= SM
.getSymName();
2214 IntelExpr
Expr(BaseRegStr
, IndexRegStr
, SM
.getScale(), OffsetNameStr
,
2215 SM
.getImm(), SM
.isMemExpr());
2216 InstInfo
->AsmRewrites
->emplace_back(Loc
, ExprLen
, Expr
);
2219 // Inline assembly may use variable names with namespace alias qualifiers.
2220 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2221 const MCExpr
*&Val
, StringRef
&Identifier
, InlineAsmIdentifierInfo
&Info
,
2222 bool IsUnevaluatedOperand
, SMLoc
&End
, bool IsParsingOffsetOperator
) {
2223 MCAsmParser
&Parser
= getParser();
2224 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2227 StringRef
LineBuf(Identifier
.data());
2228 SemaCallback
->LookupInlineAsmIdentifier(LineBuf
, Info
, IsUnevaluatedOperand
);
2230 const AsmToken
&Tok
= Parser
.getTok();
2231 SMLoc Loc
= Tok
.getLoc();
2233 // Advance the token stream until the end of the current token is
2234 // after the end of what the frontend claimed.
2235 const char *EndPtr
= Tok
.getLoc().getPointer() + LineBuf
.size();
2237 End
= Tok
.getEndLoc();
2239 } while (End
.getPointer() < EndPtr
);
2240 Identifier
= LineBuf
;
2242 // The frontend should end parsing on an assembler token boundary, unless it
2244 assert((End
.getPointer() == EndPtr
||
2245 Info
.isKind(InlineAsmIdentifierInfo::IK_Invalid
)) &&
2246 "frontend claimed part of a token?");
2248 // If the identifier lookup was unsuccessful, assume that we are dealing with
2250 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Invalid
)) {
2251 StringRef InternalName
=
2252 SemaCallback
->LookupInlineAsmLabel(Identifier
, getSourceManager(),
2254 assert(InternalName
.size() && "We should have an internal name here.");
2255 // Push a rewrite for replacing the identifier name with the internal name,
2256 // unless we are parsing the operand of an offset operator
2257 if (!IsParsingOffsetOperator
)
2258 InstInfo
->AsmRewrites
->emplace_back(AOK_Label
, Loc
, Identifier
.size(),
2261 Identifier
= InternalName
;
2262 } else if (Info
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
))
2264 // Create the symbol reference.
2265 MCSymbol
*Sym
= getContext().getOrCreateSymbol(Identifier
);
2266 MCSymbolRefExpr::VariantKind Variant
= MCSymbolRefExpr::VK_None
;
2267 Val
= MCSymbolRefExpr::create(Sym
, Variant
, getParser().getContext());
2271 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2272 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start
, OperandVector
&Operands
) {
2273 MCAsmParser
&Parser
= getParser();
2274 const AsmToken
&Tok
= Parser
.getTok();
2275 // Eat "{" and mark the current place.
2276 const SMLoc consumedToken
= consumeToken();
2277 if (Tok
.isNot(AsmToken::Identifier
))
2278 return Error(Tok
.getLoc(), "Expected an identifier after {");
2279 if (Tok
.getIdentifier().starts_with("r")) {
2280 int rndMode
= StringSwitch
<int>(Tok
.getIdentifier())
2281 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT
)
2282 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF
)
2283 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF
)
2284 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO
)
2287 return Error(Tok
.getLoc(), "Invalid rounding mode.");
2288 Parser
.Lex(); // Eat "r*" of r*-sae
2289 if (!getLexer().is(AsmToken::Minus
))
2290 return Error(Tok
.getLoc(), "Expected - at this point");
2291 Parser
.Lex(); // Eat "-"
2292 Parser
.Lex(); // Eat the sae
2293 if (!getLexer().is(AsmToken::RCurly
))
2294 return Error(Tok
.getLoc(), "Expected } at this point");
2295 SMLoc End
= Tok
.getEndLoc();
2296 Parser
.Lex(); // Eat "}"
2297 const MCExpr
*RndModeOp
=
2298 MCConstantExpr::create(rndMode
, Parser
.getContext());
2299 Operands
.push_back(X86Operand::CreateImm(RndModeOp
, Start
, End
));
2302 if (Tok
.getIdentifier() == "sae") {
2303 Parser
.Lex(); // Eat the sae
2304 if (!getLexer().is(AsmToken::RCurly
))
2305 return Error(Tok
.getLoc(), "Expected } at this point");
2306 Parser
.Lex(); // Eat "}"
2307 Operands
.push_back(X86Operand::CreateToken("{sae}", consumedToken
));
2310 return Error(Tok
.getLoc(), "unknown token in expression");
2313 /// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2315 bool X86AsmParser::parseCFlagsOp(OperandVector
&Operands
) {
2316 MCAsmParser
&Parser
= getParser();
2317 AsmToken Tok
= Parser
.getTok();
2318 const SMLoc Start
= Tok
.getLoc();
2319 if (!Tok
.is(AsmToken::LCurly
))
2320 return Error(Tok
.getLoc(), "Expected { at this point");
2321 Parser
.Lex(); // Eat "{"
2322 Tok
= Parser
.getTok();
2323 if (Tok
.getIdentifier().lower() != "dfv")
2324 return Error(Tok
.getLoc(), "Expected dfv at this point");
2325 Parser
.Lex(); // Eat "dfv"
2326 Tok
= Parser
.getTok();
2327 if (!Tok
.is(AsmToken::Equal
))
2328 return Error(Tok
.getLoc(), "Expected = at this point");
2329 Parser
.Lex(); // Eat "="
2331 Tok
= Parser
.getTok();
2333 if (Tok
.is(AsmToken::RCurly
)) {
2334 End
= Tok
.getEndLoc();
2335 Operands
.push_back(X86Operand::CreateImm(
2336 MCConstantExpr::create(0, Parser
.getContext()), Start
, End
));
2337 Parser
.Lex(); // Eat "}"
2340 unsigned CFlags
= 0;
2341 for (unsigned I
= 0; I
< 4; ++I
) {
2342 Tok
= Parser
.getTok();
2343 unsigned CFlag
= StringSwitch
<unsigned>(Tok
.getIdentifier().lower())
2350 return Error(Tok
.getLoc(), "Invalid conditional flags");
2353 return Error(Tok
.getLoc(), "Duplicated conditional flag");
2356 Parser
.Lex(); // Eat one conditional flag
2357 Tok
= Parser
.getTok();
2358 if (Tok
.is(AsmToken::RCurly
)) {
2359 End
= Tok
.getEndLoc();
2360 Operands
.push_back(X86Operand::CreateImm(
2361 MCConstantExpr::create(CFlags
, Parser
.getContext()), Start
, End
));
2362 Parser
.Lex(); // Eat "}"
2364 } else if (I
== 3) {
2365 return Error(Tok
.getLoc(), "Expected } at this point");
2366 } else if (Tok
.isNot(AsmToken::Comma
)) {
2367 return Error(Tok
.getLoc(), "Expected } or , at this point");
2369 Parser
.Lex(); // Eat ","
2371 llvm_unreachable("Unexpected control flow");
2374 /// Parse the '.' operator.
2375 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine
&SM
,
2377 const AsmToken
&Tok
= getTok();
2380 // Drop the optional '.'.
2381 StringRef DotDispStr
= Tok
.getString();
2382 DotDispStr
.consume_front(".");
2383 StringRef TrailingDot
;
2385 // .Imm gets lexed as a real.
2386 if (Tok
.is(AsmToken::Real
)) {
2388 if (DotDispStr
.getAsInteger(10, DotDisp
))
2389 return Error(Tok
.getLoc(), "Unexpected offset");
2390 Info
.Offset
= DotDisp
.getZExtValue();
2391 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2392 Tok
.is(AsmToken::Identifier
)) {
2393 if (DotDispStr
.ends_with(".")) {
2394 TrailingDot
= DotDispStr
.substr(DotDispStr
.size() - 1);
2395 DotDispStr
= DotDispStr
.drop_back(1);
2397 const std::pair
<StringRef
, StringRef
> BaseMember
= DotDispStr
.split('.');
2398 const StringRef Base
= BaseMember
.first
, Member
= BaseMember
.second
;
2399 if (getParser().lookUpField(SM
.getType(), DotDispStr
, Info
) &&
2400 getParser().lookUpField(SM
.getSymName(), DotDispStr
, Info
) &&
2401 getParser().lookUpField(DotDispStr
, Info
) &&
2403 SemaCallback
->LookupInlineAsmField(Base
, Member
, Info
.Offset
)))
2404 return Error(Tok
.getLoc(), "Unable to lookup field reference!");
2406 return Error(Tok
.getLoc(), "Unexpected token type!");
2409 // Eat the DotExpression and update End
2410 End
= SMLoc::getFromPointer(DotDispStr
.data());
2411 const char *DotExprEndLoc
= DotDispStr
.data() + DotDispStr
.size();
2412 while (Tok
.getLoc().getPointer() < DotExprEndLoc
)
2414 if (!TrailingDot
.empty())
2415 getLexer().UnLex(AsmToken(AsmToken::Dot
, TrailingDot
));
2416 SM
.addImm(Info
.Offset
);
2417 SM
.setTypeInfo(Info
.Type
);
2421 /// Parse the 'offset' operator.
2422 /// This operator is used to specify the location of a given operand
2423 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr
*&Val
, StringRef
&ID
,
2424 InlineAsmIdentifierInfo
&Info
,
2426 // Eat offset, mark start of identifier.
2427 SMLoc Start
= Lex().getLoc();
2428 ID
= getTok().getString();
2429 if (!isParsingMSInlineAsm()) {
2430 if ((getTok().isNot(AsmToken::Identifier
) &&
2431 getTok().isNot(AsmToken::String
)) ||
2432 getParser().parsePrimaryExpr(Val
, End
, nullptr))
2433 return Error(Start
, "unexpected token!");
2434 } else if (ParseIntelInlineAsmIdentifier(Val
, ID
, Info
, false, End
, true)) {
2435 return Error(Start
, "unable to lookup expression");
2436 } else if (Info
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
)) {
2437 return Error(Start
, "offset operator cannot yet handle constants");
2442 // Query a candidate string for being an Intel assembly operator
2443 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2444 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name
) {
2445 return StringSwitch
<unsigned>(Name
)
2446 .Cases("TYPE","type",IOK_TYPE
)
2447 .Cases("SIZE","size",IOK_SIZE
)
2448 .Cases("LENGTH","length",IOK_LENGTH
)
2449 .Default(IOK_INVALID
);
2452 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2453 /// returns the number of elements in an array. It returns the value 1 for
2454 /// non-array variables. The SIZE operator returns the size of a C or C++
2455 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2456 /// TYPE operator returns the size of a C or C++ type or variable. If the
2457 /// variable is an array, TYPE returns the size of a single element.
2458 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind
) {
2459 MCAsmParser
&Parser
= getParser();
2460 const AsmToken
&Tok
= Parser
.getTok();
2461 Parser
.Lex(); // Eat operator.
2463 const MCExpr
*Val
= nullptr;
2464 InlineAsmIdentifierInfo Info
;
2465 SMLoc Start
= Tok
.getLoc(), End
;
2466 StringRef Identifier
= Tok
.getString();
2467 if (ParseIntelInlineAsmIdentifier(Val
, Identifier
, Info
,
2468 /*IsUnevaluatedOperand=*/true, End
))
2471 if (!Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
2472 Error(Start
, "unable to lookup expression");
2478 default: llvm_unreachable("Unexpected operand kind!");
2479 case IOK_LENGTH
: CVal
= Info
.Var
.Length
; break;
2480 case IOK_SIZE
: CVal
= Info
.Var
.Size
; break;
2481 case IOK_TYPE
: CVal
= Info
.Var
.Type
; break;
2487 // Query a candidate string for being an Intel assembly operator
2488 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2489 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name
) {
2490 return StringSwitch
<unsigned>(Name
.lower())
2491 .Case("type", MOK_TYPE
)
2492 .Cases("size", "sizeof", MOK_SIZEOF
)
2493 .Cases("length", "lengthof", MOK_LENGTHOF
)
2494 .Default(MOK_INVALID
);
2497 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2498 /// returns the number of elements in an array. It returns the value 1 for
2499 /// non-array variables. The SIZEOF operator returns the size of a type or
2500 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2501 /// The TYPE operator returns the size of a variable. If the variable is an
2502 /// array, TYPE returns the size of a single element.
2503 bool X86AsmParser::ParseMasmOperator(unsigned OpKind
, int64_t &Val
) {
2504 MCAsmParser
&Parser
= getParser();
2505 SMLoc OpLoc
= Parser
.getTok().getLoc();
2506 Parser
.Lex(); // Eat operator.
2509 if (OpKind
== MOK_SIZEOF
|| OpKind
== MOK_TYPE
) {
2510 // Check for SIZEOF(<type>) and TYPE(<type>).
2511 bool InParens
= Parser
.getTok().is(AsmToken::LParen
);
2512 const AsmToken
&IDTok
= InParens
? getLexer().peekTok() : Parser
.getTok();
2514 if (IDTok
.is(AsmToken::Identifier
) &&
2515 !Parser
.lookUpType(IDTok
.getIdentifier(), Type
)) {
2520 parseToken(AsmToken::LParen
);
2521 parseToken(AsmToken::Identifier
);
2523 parseToken(AsmToken::RParen
);
2528 IntelExprStateMachine SM
;
2529 SMLoc End
, Start
= Parser
.getTok().getLoc();
2530 if (ParseIntelExpression(SM
, End
))
2535 llvm_unreachable("Unexpected operand kind!");
2540 Val
= SM
.getLength();
2543 Val
= SM
.getElementSize();
2548 return Error(OpLoc
, "expression has unknown type", SMRange(Start
, End
));
2554 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size
) {
2555 Size
= StringSwitch
<unsigned>(getTok().getString())
2556 .Cases("BYTE", "byte", 8)
2557 .Cases("WORD", "word", 16)
2558 .Cases("DWORD", "dword", 32)
2559 .Cases("FLOAT", "float", 32)
2560 .Cases("LONG", "long", 32)
2561 .Cases("FWORD", "fword", 48)
2562 .Cases("DOUBLE", "double", 64)
2563 .Cases("QWORD", "qword", 64)
2564 .Cases("MMWORD","mmword", 64)
2565 .Cases("XWORD", "xword", 80)
2566 .Cases("TBYTE", "tbyte", 80)
2567 .Cases("XMMWORD", "xmmword", 128)
2568 .Cases("YMMWORD", "ymmword", 256)
2569 .Cases("ZMMWORD", "zmmword", 512)
2572 const AsmToken
&Tok
= Lex(); // Eat operand size (e.g., byte, word).
2573 if (!(Tok
.getString() == "PTR" || Tok
.getString() == "ptr"))
2574 return Error(Tok
.getLoc(), "Expected 'PTR' or 'ptr' token!");
2580 bool X86AsmParser::parseIntelOperand(OperandVector
&Operands
, StringRef Name
) {
2581 MCAsmParser
&Parser
= getParser();
2582 const AsmToken
&Tok
= Parser
.getTok();
2585 // Parse optional Size directive.
2587 if (ParseIntelMemoryOperandSize(Size
))
2589 bool PtrInOperand
= bool(Size
);
2591 Start
= Tok
.getLoc();
2593 // Rounding mode operand.
2594 if (getLexer().is(AsmToken::LCurly
))
2595 return ParseRoundingModeOp(Start
, Operands
);
2597 // Register operand.
2599 if (Tok
.is(AsmToken::Identifier
) && !parseRegister(RegNo
, Start
, End
)) {
2600 if (RegNo
== X86::RIP
)
2601 return Error(Start
, "rip can only be used as a base register");
2602 // A Register followed by ':' is considered a segment override
2603 if (Tok
.isNot(AsmToken::Colon
)) {
2605 return Error(Start
, "expected memory operand after 'ptr', "
2606 "found register operand instead");
2607 Operands
.push_back(X86Operand::CreateReg(RegNo
, Start
, End
));
2610 // An alleged segment override. check if we have a valid segment register
2611 if (!X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(RegNo
))
2612 return Error(Start
, "invalid segment register");
2613 // Eat ':' and update Start location
2614 Start
= Lex().getLoc();
2617 // Immediates and Memory
2618 IntelExprStateMachine SM
;
2619 if (ParseIntelExpression(SM
, End
))
2622 if (isParsingMSInlineAsm())
2623 RewriteIntelExpression(SM
, Start
, Tok
.getLoc());
2625 int64_t Imm
= SM
.getImm();
2626 const MCExpr
*Disp
= SM
.getSym();
2627 const MCExpr
*ImmDisp
= MCConstantExpr::create(Imm
, getContext());
2629 Disp
= MCBinaryExpr::createAdd(Disp
, ImmDisp
, getContext());
2633 // RegNo != 0 specifies a valid segment register,
2634 // and we are parsing a segment override
2635 if (!SM
.isMemExpr() && !RegNo
) {
2636 if (isParsingMSInlineAsm() && SM
.isOffsetOperator()) {
2637 const InlineAsmIdentifierInfo
&Info
= SM
.getIdentifierInfo();
2638 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
2639 // Disp includes the address of a variable; make sure this is recorded
2640 // for later handling.
2641 Operands
.push_back(X86Operand::CreateImm(Disp
, Start
, End
,
2642 SM
.getSymName(), Info
.Var
.Decl
,
2643 Info
.Var
.IsGlobalLV
));
2648 Operands
.push_back(X86Operand::CreateImm(Disp
, Start
, End
));
2653 MCRegister BaseReg
= SM
.getBaseReg();
2654 MCRegister IndexReg
= SM
.getIndexReg();
2655 if (IndexReg
&& BaseReg
== X86::RIP
)
2656 BaseReg
= MCRegister();
2657 unsigned Scale
= SM
.getScale();
2659 Size
= SM
.getElementSize() << 3;
2661 if (Scale
== 0 && BaseReg
!= X86::ESP
&& BaseReg
!= X86::RSP
&&
2662 (IndexReg
== X86::ESP
|| IndexReg
== X86::RSP
))
2663 std::swap(BaseReg
, IndexReg
);
2665 // If BaseReg is a vector register and IndexReg is not, swap them unless
2666 // Scale was specified in which case it would be an error.
2668 !(X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(IndexReg
) ||
2669 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(IndexReg
) ||
2670 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(IndexReg
)) &&
2671 (X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(BaseReg
) ||
2672 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(BaseReg
) ||
2673 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(BaseReg
)))
2674 std::swap(BaseReg
, IndexReg
);
2677 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
))
2678 return Error(Start
, "16-bit addresses cannot have a scale");
2680 // If there was no explicit scale specified, change it to 1.
2684 // If this is a 16-bit addressing mode with the base and index in the wrong
2685 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2686 // shared with att syntax where order matters.
2687 if ((BaseReg
== X86::SI
|| BaseReg
== X86::DI
) &&
2688 (IndexReg
== X86::BX
|| IndexReg
== X86::BP
))
2689 std::swap(BaseReg
, IndexReg
);
2691 if ((BaseReg
|| IndexReg
) &&
2692 CheckBaseRegAndIndexRegAndScale(BaseReg
, IndexReg
, Scale
, is64BitMode(),
2694 return Error(Start
, ErrMsg
);
2695 bool IsUnconditionalBranch
=
2696 Name
.equals_insensitive("jmp") || Name
.equals_insensitive("call");
2697 if (isParsingMSInlineAsm())
2698 return CreateMemForMSInlineAsm(RegNo
, Disp
, BaseReg
, IndexReg
, Scale
,
2699 IsUnconditionalBranch
&& is64BitMode(),
2700 Start
, End
, Size
, SM
.getSymName(),
2701 SM
.getIdentifierInfo(), Operands
);
2703 // When parsing x64 MS-style assembly, all non-absolute references to a named
2704 // variable default to RIP-relative.
2705 MCRegister DefaultBaseReg
;
2706 bool MaybeDirectBranchDest
= true;
2708 if (Parser
.isParsingMasm()) {
2709 if (is64BitMode() &&
2710 ((PtrInOperand
&& !IndexReg
) || SM
.getElementSize() > 0)) {
2711 DefaultBaseReg
= X86::RIP
;
2713 if (IsUnconditionalBranch
) {
2715 MaybeDirectBranchDest
= false;
2717 DefaultBaseReg
= X86::RIP
;
2718 } else if (!BaseReg
&& !IndexReg
&& Disp
&&
2719 Disp
->getKind() == MCExpr::SymbolRef
) {
2720 if (is64BitMode()) {
2721 if (SM
.getSize() == 8) {
2722 MaybeDirectBranchDest
= false;
2723 DefaultBaseReg
= X86::RIP
;
2726 if (SM
.getSize() == 4 || SM
.getSize() == 2)
2727 MaybeDirectBranchDest
= false;
2731 } else if (IsUnconditionalBranch
) {
2732 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2733 if (!PtrInOperand
&& SM
.isOffsetOperator())
2735 Start
, "`OFFSET` operator cannot be used in an unconditional branch");
2736 if (PtrInOperand
|| SM
.isBracketUsed())
2737 MaybeDirectBranchDest
= false;
2740 if ((BaseReg
|| IndexReg
|| RegNo
|| DefaultBaseReg
))
2741 Operands
.push_back(X86Operand::CreateMem(
2742 getPointerWidth(), RegNo
, Disp
, BaseReg
, IndexReg
, Scale
, Start
, End
,
2743 Size
, DefaultBaseReg
, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2744 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest
));
2746 Operands
.push_back(X86Operand::CreateMem(
2747 getPointerWidth(), Disp
, Start
, End
, Size
, /*SymName=*/StringRef(),
2748 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2749 MaybeDirectBranchDest
));
2753 bool X86AsmParser::parseATTOperand(OperandVector
&Operands
) {
2754 MCAsmParser
&Parser
= getParser();
2755 switch (getLexer().getKind()) {
2756 case AsmToken::Dollar
: {
2757 // $42 or $ID -> immediate.
2758 SMLoc Start
= Parser
.getTok().getLoc(), End
;
2761 // This is an immediate, so we should not parse a register. Do a precheck
2762 // for '%' to supercede intra-register parse errors.
2763 SMLoc L
= Parser
.getTok().getLoc();
2764 if (check(getLexer().is(AsmToken::Percent
), L
,
2765 "expected immediate expression") ||
2766 getParser().parseExpression(Val
, End
) ||
2767 check(isa
<X86MCExpr
>(Val
), L
, "expected immediate expression"))
2769 Operands
.push_back(X86Operand::CreateImm(Val
, Start
, End
));
2772 case AsmToken::LCurly
: {
2773 SMLoc Start
= Parser
.getTok().getLoc();
2774 return ParseRoundingModeOp(Start
, Operands
);
2777 // This a memory operand or a register. We have some parsing complications
2778 // as a '(' may be part of an immediate expression or the addressing mode
2779 // block. This is complicated by the fact that an assembler-level variable
2780 // may refer either to a register or an immediate expression.
2782 SMLoc Loc
= Parser
.getTok().getLoc(), EndLoc
;
2783 const MCExpr
*Expr
= nullptr;
2785 if (getLexer().isNot(AsmToken::LParen
)) {
2786 // No '(' so this is either a displacement expression or a register.
2787 if (Parser
.parseExpression(Expr
, EndLoc
))
2789 if (auto *RE
= dyn_cast
<X86MCExpr
>(Expr
)) {
2790 // Segment Register. Reset Expr and copy value to register.
2794 // Check the register.
2795 if (Reg
== X86::EIZ
|| Reg
== X86::RIZ
)
2797 Loc
, "%eiz and %riz can only be used as index registers",
2798 SMRange(Loc
, EndLoc
));
2799 if (Reg
== X86::RIP
)
2800 return Error(Loc
, "%rip can only be used as a base register",
2801 SMRange(Loc
, EndLoc
));
2802 // Return register that are not segment prefixes immediately.
2803 if (!Parser
.parseOptionalToken(AsmToken::Colon
)) {
2804 Operands
.push_back(X86Operand::CreateReg(Reg
, Loc
, EndLoc
));
2807 if (!X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(Reg
))
2808 return Error(Loc
, "invalid segment register");
2809 // Accept a '*' absolute memory reference after the segment. Place it
2810 // before the full memory operand.
2811 if (getLexer().is(AsmToken::Star
))
2812 Operands
.push_back(X86Operand::CreateToken("*", consumeToken()));
2815 // This is a Memory operand.
2816 return ParseMemOperand(Reg
, Expr
, Loc
, EndLoc
, Operands
);
2821 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2822 // otherwise the EFLAGS Condition Code enumerator.
2823 X86::CondCode
X86AsmParser::ParseConditionCode(StringRef CC
) {
2824 return StringSwitch
<X86::CondCode
>(CC
)
2825 .Case("o", X86::COND_O
) // Overflow
2826 .Case("no", X86::COND_NO
) // No Overflow
2827 .Cases("b", "nae", X86::COND_B
) // Below/Neither Above nor Equal
2828 .Cases("ae", "nb", X86::COND_AE
) // Above or Equal/Not Below
2829 .Cases("e", "z", X86::COND_E
) // Equal/Zero
2830 .Cases("ne", "nz", X86::COND_NE
) // Not Equal/Not Zero
2831 .Cases("be", "na", X86::COND_BE
) // Below or Equal/Not Above
2832 .Cases("a", "nbe", X86::COND_A
) // Above/Neither Below nor Equal
2833 .Case("s", X86::COND_S
) // Sign
2834 .Case("ns", X86::COND_NS
) // No Sign
2835 .Cases("p", "pe", X86::COND_P
) // Parity/Parity Even
2836 .Cases("np", "po", X86::COND_NP
) // No Parity/Parity Odd
2837 .Cases("l", "nge", X86::COND_L
) // Less/Neither Greater nor Equal
2838 .Cases("ge", "nl", X86::COND_GE
) // Greater or Equal/Not Less
2839 .Cases("le", "ng", X86::COND_LE
) // Less or Equal/Not Greater
2840 .Cases("g", "nle", X86::COND_G
) // Greater/Neither Less nor Equal
2841 .Default(X86::COND_INVALID
);
2844 // true on failure, false otherwise
2845 // If no {z} mark was found - Parser doesn't advance
2846 bool X86AsmParser::ParseZ(std::unique_ptr
<X86Operand
> &Z
,
2847 const SMLoc
&StartLoc
) {
2848 MCAsmParser
&Parser
= getParser();
2849 // Assuming we are just pass the '{' mark, quering the next token
2850 // Searched for {z}, but none was found. Return false, as no parsing error was
2852 if (!(getLexer().is(AsmToken::Identifier
) &&
2853 (getLexer().getTok().getIdentifier() == "z")))
2855 Parser
.Lex(); // Eat z
2856 // Query and eat the '}' mark
2857 if (!getLexer().is(AsmToken::RCurly
))
2858 return Error(getLexer().getLoc(), "Expected } at this point");
2859 Parser
.Lex(); // Eat '}'
2860 // Assign Z with the {z} mark operand
2861 Z
= X86Operand::CreateToken("{z}", StartLoc
);
2865 // true on failure, false otherwise
2866 bool X86AsmParser::HandleAVX512Operand(OperandVector
&Operands
) {
2867 MCAsmParser
&Parser
= getParser();
2868 if (getLexer().is(AsmToken::LCurly
)) {
2869 // Eat "{" and mark the current place.
2870 const SMLoc consumedToken
= consumeToken();
2871 // Distinguish {1to<NUM>} from {%k<NUM>}.
2872 if(getLexer().is(AsmToken::Integer
)) {
2873 // Parse memory broadcasting ({1to<NUM>}).
2874 if (getLexer().getTok().getIntVal() != 1)
2875 return TokError("Expected 1to<NUM> at this point");
2876 StringRef Prefix
= getLexer().getTok().getString();
2877 Parser
.Lex(); // Eat first token of 1to8
2878 if (!getLexer().is(AsmToken::Identifier
))
2879 return TokError("Expected 1to<NUM> at this point");
2880 // Recognize only reasonable suffixes.
2881 SmallVector
<char, 5> BroadcastVector
;
2882 StringRef BroadcastString
= (Prefix
+ getLexer().getTok().getIdentifier())
2883 .toStringRef(BroadcastVector
);
2884 if (!BroadcastString
.starts_with("1to"))
2885 return TokError("Expected 1to<NUM> at this point");
2886 const char *BroadcastPrimitive
=
2887 StringSwitch
<const char *>(BroadcastString
)
2888 .Case("1to2", "{1to2}")
2889 .Case("1to4", "{1to4}")
2890 .Case("1to8", "{1to8}")
2891 .Case("1to16", "{1to16}")
2892 .Case("1to32", "{1to32}")
2894 if (!BroadcastPrimitive
)
2895 return TokError("Invalid memory broadcast primitive.");
2896 Parser
.Lex(); // Eat trailing token of 1toN
2897 if (!getLexer().is(AsmToken::RCurly
))
2898 return TokError("Expected } at this point");
2899 Parser
.Lex(); // Eat "}"
2900 Operands
.push_back(X86Operand::CreateToken(BroadcastPrimitive
,
2902 // No AVX512 specific primitives can pass
2903 // after memory broadcasting, so return.
2906 // Parse either {k}{z}, {z}{k}, {k} or {z}
2907 // last one have no meaning, but GCC accepts it
2908 // Currently, we're just pass a '{' mark
2909 std::unique_ptr
<X86Operand
> Z
;
2910 if (ParseZ(Z
, consumedToken
))
2912 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2914 // Query for the need of further parsing for a {%k<NUM>} mark
2915 if (!Z
|| getLexer().is(AsmToken::LCurly
)) {
2916 SMLoc StartLoc
= Z
? consumeToken() : consumedToken
;
2917 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2921 if (!parseRegister(RegNo
, RegLoc
, StartLoc
) &&
2922 X86MCRegisterClasses
[X86::VK1RegClassID
].contains(RegNo
)) {
2923 if (RegNo
== X86::K0
)
2924 return Error(RegLoc
, "Register k0 can't be used as write mask");
2925 if (!getLexer().is(AsmToken::RCurly
))
2926 return Error(getLexer().getLoc(), "Expected } at this point");
2927 Operands
.push_back(X86Operand::CreateToken("{", StartLoc
));
2929 X86Operand::CreateReg(RegNo
, StartLoc
, StartLoc
));
2930 Operands
.push_back(X86Operand::CreateToken("}", consumeToken()));
2932 return Error(getLexer().getLoc(),
2933 "Expected an op-mask register at this point");
2934 // {%k<NUM>} mark is found, inquire for {z}
2935 if (getLexer().is(AsmToken::LCurly
) && !Z
) {
2936 // Have we've found a parsing error, or found no (expected) {z} mark
2937 // - report an error
2938 if (ParseZ(Z
, consumeToken()) || !Z
)
2939 return Error(getLexer().getLoc(),
2940 "Expected a {z} mark at this point");
2943 // '{z}' on its own is meaningless, hence should be ignored.
2944 // on the contrary - have it been accompanied by a K register,
2947 Operands
.push_back(std::move(Z
));
2954 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2955 /// has already been parsed if present. disp may be provided as well.
2956 bool X86AsmParser::ParseMemOperand(MCRegister SegReg
, const MCExpr
*Disp
,
2957 SMLoc StartLoc
, SMLoc EndLoc
,
2958 OperandVector
&Operands
) {
2959 MCAsmParser
&Parser
= getParser();
2961 // Based on the initial passed values, we may be in any of these cases, we are
2962 // in one of these cases (with current position (*)):
2964 // 1. seg : * disp (base-index-scale-expr)
2965 // 2. seg : *(disp) (base-index-scale-expr)
2966 // 3. seg : *(base-index-scale-expr)
2967 // 4. disp *(base-index-scale-expr)
2968 // 5. *(disp) (base-index-scale-expr)
2969 // 6. *(base-index-scale-expr)
2973 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2974 // checking if the first object after the parenthesis is a register (or an
2975 // identifier referring to a register) and parse the displacement or default
2976 // to 0 as appropriate.
2977 auto isAtMemOperand
= [this]() {
2978 if (this->getLexer().isNot(AsmToken::LParen
))
2982 auto TokCount
= this->getLexer().peekTokens(Buf
, true);
2985 switch (Buf
[0].getKind()) {
2986 case AsmToken::Percent
:
2987 case AsmToken::Comma
:
2989 // These lower cases are doing a peekIdentifier.
2991 case AsmToken::Dollar
:
2992 if ((TokCount
> 1) &&
2993 (Buf
[1].is(AsmToken::Identifier
) || Buf
[1].is(AsmToken::String
)) &&
2994 (Buf
[0].getLoc().getPointer() + 1 == Buf
[1].getLoc().getPointer()))
2995 Id
= StringRef(Buf
[0].getLoc().getPointer(),
2996 Buf
[1].getIdentifier().size() + 1);
2998 case AsmToken::Identifier
:
2999 case AsmToken::String
:
3000 Id
= Buf
[0].getIdentifier();
3005 // We have an ID. Check if it is bound to a register.
3007 MCSymbol
*Sym
= this->getContext().getOrCreateSymbol(Id
);
3008 if (Sym
->isVariable()) {
3009 auto V
= Sym
->getVariableValue(/*SetUsed*/ false);
3010 return isa
<X86MCExpr
>(V
);
3017 // Parse immediate if we're not at a mem operand yet.
3018 if (!isAtMemOperand()) {
3019 if (Parser
.parseTokenLoc(Loc
) || Parser
.parseExpression(Disp
, EndLoc
))
3021 assert(!isa
<X86MCExpr
>(Disp
) && "Expected non-register here.");
3023 // Disp is implicitly zero if we haven't parsed it yet.
3024 Disp
= MCConstantExpr::create(0, Parser
.getContext());
3028 // We are now either at the end of the operand or at the '(' at the start of a
3029 // base-index-scale-expr.
3031 if (!parseOptionalToken(AsmToken::LParen
)) {
3034 X86Operand::CreateMem(getPointerWidth(), Disp
, StartLoc
, EndLoc
));
3036 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg
, Disp
,
3037 0, 0, 1, StartLoc
, EndLoc
));
3041 // If we reached here, then eat the '(' and Process
3042 // the rest of the memory operand.
3043 MCRegister BaseReg
, IndexReg
;
3045 SMLoc BaseLoc
= getLexer().getLoc();
3049 // Parse BaseReg if one is provided.
3050 if (getLexer().isNot(AsmToken::Comma
) && getLexer().isNot(AsmToken::RParen
)) {
3051 if (Parser
.parseExpression(E
, EndLoc
) ||
3052 check(!isa
<X86MCExpr
>(E
), BaseLoc
, "expected register here"))
3055 // Check the register.
3056 BaseReg
= cast
<X86MCExpr
>(E
)->getReg();
3057 if (BaseReg
== X86::EIZ
|| BaseReg
== X86::RIZ
)
3058 return Error(BaseLoc
, "eiz and riz can only be used as index registers",
3059 SMRange(BaseLoc
, EndLoc
));
3062 if (parseOptionalToken(AsmToken::Comma
)) {
3063 // Following the comma we should have either an index register, or a scale
3064 // value. We don't support the later form, but we want to parse it
3067 // Even though it would be completely consistent to support syntax like
3068 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3069 if (getLexer().isNot(AsmToken::RParen
)) {
3070 if (Parser
.parseTokenLoc(Loc
) || Parser
.parseExpression(E
, EndLoc
))
3073 if (!isa
<X86MCExpr
>(E
)) {
3074 // We've parsed an unexpected Scale Value instead of an index
3075 // register. Interpret it as an absolute.
3077 if (!E
->evaluateAsAbsolute(ScaleVal
, getStreamer().getAssemblerPtr()))
3078 return Error(Loc
, "expected absolute expression");
3080 Warning(Loc
, "scale factor without index register is ignored");
3082 } else { // IndexReg Found.
3083 IndexReg
= cast
<X86MCExpr
>(E
)->getReg();
3085 if (BaseReg
== X86::RIP
)
3087 "%rip as base register can not have an index register");
3088 if (IndexReg
== X86::RIP
)
3089 return Error(Loc
, "%rip is not allowed as an index register");
3091 if (parseOptionalToken(AsmToken::Comma
)) {
3092 // Parse the scale amount:
3093 // ::= ',' [scale-expression]
3095 // A scale amount without an index is ignored.
3096 if (getLexer().isNot(AsmToken::RParen
)) {
3098 if (Parser
.parseTokenLoc(Loc
) ||
3099 Parser
.parseAbsoluteExpression(ScaleVal
))
3100 return Error(Loc
, "expected scale expression");
3101 Scale
= (unsigned)ScaleVal
;
3102 // Validate the scale amount.
3103 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) &&
3105 return Error(Loc
, "scale factor in 16-bit address must be 1");
3106 if (checkScale(Scale
, ErrMsg
))
3107 return Error(Loc
, ErrMsg
);
3114 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3115 if (parseToken(AsmToken::RParen
, "unexpected token in memory operand"))
3118 // This is to support otherwise illegal operand (%dx) found in various
3119 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3120 // be supported. Mark such DX variants separately fix only in special cases.
3121 if (BaseReg
== X86::DX
&& !IndexReg
&& Scale
== 1 && !SegReg
&&
3122 isa
<MCConstantExpr
>(Disp
) &&
3123 cast
<MCConstantExpr
>(Disp
)->getValue() == 0) {
3124 Operands
.push_back(X86Operand::CreateDXReg(BaseLoc
, BaseLoc
));
3128 if (CheckBaseRegAndIndexRegAndScale(BaseReg
, IndexReg
, Scale
, is64BitMode(),
3130 return Error(BaseLoc
, ErrMsg
);
3132 // If the displacement is a constant, check overflows. For 64-bit addressing,
3133 // gas requires isInt<32> and otherwise reports an error. For others, gas
3134 // reports a warning and allows a wider range. E.g. gas allows
3135 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3136 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3137 if (BaseReg
|| IndexReg
) {
3138 if (auto CE
= dyn_cast
<MCConstantExpr
>(Disp
)) {
3139 auto Imm
= CE
->getValue();
3140 bool Is64
= X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
) ||
3141 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
);
3142 bool Is16
= X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
);
3144 if (!isInt
<32>(Imm
))
3145 return Error(BaseLoc
, "displacement " + Twine(Imm
) +
3146 " is not within [-2147483648, 2147483647]");
3148 if (!isUInt
<32>(Imm
< 0 ? -uint64_t(Imm
) : uint64_t(Imm
))) {
3149 Warning(BaseLoc
, "displacement " + Twine(Imm
) +
3150 " shortened to 32-bit signed " +
3151 Twine(static_cast<int32_t>(Imm
)));
3153 } else if (!isUInt
<16>(Imm
< 0 ? -uint64_t(Imm
) : uint64_t(Imm
))) {
3154 Warning(BaseLoc
, "displacement " + Twine(Imm
) +
3155 " shortened to 16-bit signed " +
3156 Twine(static_cast<int16_t>(Imm
)));
3161 if (SegReg
|| BaseReg
|| IndexReg
)
3162 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg
, Disp
,
3163 BaseReg
, IndexReg
, Scale
, StartLoc
,
3167 X86Operand::CreateMem(getPointerWidth(), Disp
, StartLoc
, EndLoc
));
3171 // Parse either a standard primary expression or a register.
3172 bool X86AsmParser::parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) {
3173 MCAsmParser
&Parser
= getParser();
3174 // See if this is a register first.
3175 if (getTok().is(AsmToken::Percent
) ||
3176 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier
) &&
3177 MatchRegisterName(Parser
.getTok().getString()))) {
3178 SMLoc StartLoc
= Parser
.getTok().getLoc();
3180 if (parseRegister(RegNo
, StartLoc
, EndLoc
))
3182 Res
= X86MCExpr::create(RegNo
, Parser
.getContext());
3185 return Parser
.parsePrimaryExpr(Res
, EndLoc
, nullptr);
3188 bool X86AsmParser::parseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
3189 SMLoc NameLoc
, OperandVector
&Operands
) {
3190 MCAsmParser
&Parser
= getParser();
3193 // Reset the forced VEX encoding.
3194 ForcedOpcodePrefix
= OpcodePrefix_Default
;
3195 ForcedDispEncoding
= DispEncoding_Default
;
3196 UseApxExtendedReg
= false;
3197 ForcedNoFlag
= false;
3199 // Parse pseudo prefixes.
3202 if (getLexer().isNot(AsmToken::Identifier
))
3203 return Error(Parser
.getTok().getLoc(), "Unexpected token after '{'");
3204 std::string Prefix
= Parser
.getTok().getString().lower();
3205 Parser
.Lex(); // Eat identifier.
3206 if (getLexer().isNot(AsmToken::RCurly
))
3207 return Error(Parser
.getTok().getLoc(), "Expected '}'");
3208 Parser
.Lex(); // Eat curly.
3210 if (Prefix
== "rex")
3211 ForcedOpcodePrefix
= OpcodePrefix_REX
;
3212 else if (Prefix
== "rex2")
3213 ForcedOpcodePrefix
= OpcodePrefix_REX2
;
3214 else if (Prefix
== "vex")
3215 ForcedOpcodePrefix
= OpcodePrefix_VEX
;
3216 else if (Prefix
== "vex2")
3217 ForcedOpcodePrefix
= OpcodePrefix_VEX2
;
3218 else if (Prefix
== "vex3")
3219 ForcedOpcodePrefix
= OpcodePrefix_VEX3
;
3220 else if (Prefix
== "evex")
3221 ForcedOpcodePrefix
= OpcodePrefix_EVEX
;
3222 else if (Prefix
== "disp8")
3223 ForcedDispEncoding
= DispEncoding_Disp8
;
3224 else if (Prefix
== "disp32")
3225 ForcedDispEncoding
= DispEncoding_Disp32
;
3226 else if (Prefix
== "nf")
3227 ForcedNoFlag
= true;
3229 return Error(NameLoc
, "unknown prefix");
3231 NameLoc
= Parser
.getTok().getLoc();
3232 if (getLexer().is(AsmToken::LCurly
)) {
3236 if (getLexer().isNot(AsmToken::Identifier
))
3237 return Error(Parser
.getTok().getLoc(), "Expected identifier");
3238 // FIXME: The mnemonic won't match correctly if its not in lower case.
3239 Name
= Parser
.getTok().getString();
3244 // Parse MASM style pseudo prefixes.
3245 if (isParsingMSInlineAsm()) {
3246 if (Name
.equals_insensitive("vex"))
3247 ForcedOpcodePrefix
= OpcodePrefix_VEX
;
3248 else if (Name
.equals_insensitive("vex2"))
3249 ForcedOpcodePrefix
= OpcodePrefix_VEX2
;
3250 else if (Name
.equals_insensitive("vex3"))
3251 ForcedOpcodePrefix
= OpcodePrefix_VEX3
;
3252 else if (Name
.equals_insensitive("evex"))
3253 ForcedOpcodePrefix
= OpcodePrefix_EVEX
;
3255 if (ForcedOpcodePrefix
!= OpcodePrefix_Default
) {
3256 if (getLexer().isNot(AsmToken::Identifier
))
3257 return Error(Parser
.getTok().getLoc(), "Expected identifier");
3258 // FIXME: The mnemonic won't match correctly if its not in lower case.
3259 Name
= Parser
.getTok().getString();
3260 NameLoc
= Parser
.getTok().getLoc();
3267 // Support the suffix syntax for overriding displacement size as well.
3268 if (Name
.consume_back(".d32")) {
3269 ForcedDispEncoding
= DispEncoding_Disp32
;
3270 } else if (Name
.consume_back(".d8")) {
3271 ForcedDispEncoding
= DispEncoding_Disp8
;
3274 StringRef PatchedName
= Name
;
3276 // Hack to skip "short" following Jcc.
3277 if (isParsingIntelSyntax() &&
3278 (PatchedName
== "jmp" || PatchedName
== "jc" || PatchedName
== "jnc" ||
3279 PatchedName
== "jcxz" || PatchedName
== "jecxz" ||
3280 (PatchedName
.starts_with("j") &&
3281 ParseConditionCode(PatchedName
.substr(1)) != X86::COND_INVALID
))) {
3282 StringRef NextTok
= Parser
.getTok().getString();
3283 if (Parser
.isParsingMasm() ? NextTok
.equals_insensitive("short")
3284 : NextTok
== "short") {
3286 NameLoc
.getFromPointer(NameLoc
.getPointer() + Name
.size());
3287 // Eat the short keyword.
3289 // MS and GAS ignore the short keyword; they both determine the jmp type
3290 // based on the distance of the label. (NASM does emit different code with
3291 // and without "short," though.)
3292 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, NameEndLoc
,
3293 NextTok
.size() + 1);
3297 // FIXME: Hack to recognize setneb as setne.
3298 if (PatchedName
.starts_with("set") && PatchedName
.ends_with("b") &&
3299 PatchedName
!= "setzub" && PatchedName
!= "setzunb" &&
3300 PatchedName
!= "setb" && PatchedName
!= "setnb")
3301 PatchedName
= PatchedName
.substr(0, Name
.size()-1);
3303 unsigned ComparisonPredicate
= ~0U;
3305 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3306 if ((PatchedName
.starts_with("cmp") || PatchedName
.starts_with("vcmp")) &&
3307 (PatchedName
.ends_with("ss") || PatchedName
.ends_with("sd") ||
3308 PatchedName
.ends_with("sh") || PatchedName
.ends_with("ph") ||
3309 PatchedName
.ends_with("pbf16") || PatchedName
.ends_with("ps") ||
3310 PatchedName
.ends_with("pd"))) {
3311 bool IsVCMP
= PatchedName
[0] == 'v';
3312 unsigned CCIdx
= IsVCMP
? 4 : 3;
3313 unsigned suffixLength
= PatchedName
.ends_with("pbf16") ? 5 : 2;
3314 unsigned CC
= StringSwitch
<unsigned>(
3315 PatchedName
.slice(CCIdx
, PatchedName
.size() - suffixLength
))
3317 .Case("eq_oq", 0x00)
3319 .Case("lt_os", 0x01)
3321 .Case("le_os", 0x02)
3322 .Case("unord", 0x03)
3323 .Case("unord_q", 0x03)
3325 .Case("neq_uq", 0x04)
3327 .Case("nlt_us", 0x05)
3329 .Case("nle_us", 0x06)
3331 .Case("ord_q", 0x07)
3332 /* AVX only from here */
3333 .Case("eq_uq", 0x08)
3335 .Case("nge_us", 0x09)
3337 .Case("ngt_us", 0x0A)
3338 .Case("false", 0x0B)
3339 .Case("false_oq", 0x0B)
3340 .Case("neq_oq", 0x0C)
3342 .Case("ge_os", 0x0D)
3344 .Case("gt_os", 0x0E)
3346 .Case("true_uq", 0x0F)
3347 .Case("eq_os", 0x10)
3348 .Case("lt_oq", 0x11)
3349 .Case("le_oq", 0x12)
3350 .Case("unord_s", 0x13)
3351 .Case("neq_us", 0x14)
3352 .Case("nlt_uq", 0x15)
3353 .Case("nle_uq", 0x16)
3354 .Case("ord_s", 0x17)
3355 .Case("eq_us", 0x18)
3356 .Case("nge_uq", 0x19)
3357 .Case("ngt_uq", 0x1A)
3358 .Case("false_os", 0x1B)
3359 .Case("neq_os", 0x1C)
3360 .Case("ge_oq", 0x1D)
3361 .Case("gt_oq", 0x1E)
3362 .Case("true_us", 0x1F)
3364 if (CC
!= ~0U && (IsVCMP
|| CC
< 8) &&
3365 (IsVCMP
|| PatchedName
.back() != 'h')) {
3366 if (PatchedName
.ends_with("ss"))
3367 PatchedName
= IsVCMP
? "vcmpss" : "cmpss";
3368 else if (PatchedName
.ends_with("sd"))
3369 PatchedName
= IsVCMP
? "vcmpsd" : "cmpsd";
3370 else if (PatchedName
.ends_with("ps"))
3371 PatchedName
= IsVCMP
? "vcmpps" : "cmpps";
3372 else if (PatchedName
.ends_with("pd"))
3373 PatchedName
= IsVCMP
? "vcmppd" : "cmppd";
3374 else if (PatchedName
.ends_with("sh"))
3375 PatchedName
= "vcmpsh";
3376 else if (PatchedName
.ends_with("ph"))
3377 PatchedName
= "vcmpph";
3378 else if (PatchedName
.ends_with("pbf16"))
3379 PatchedName
= "vcmppbf16";
3381 llvm_unreachable("Unexpected suffix!");
3383 ComparisonPredicate
= CC
;
3387 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3388 if (PatchedName
.starts_with("vpcmp") &&
3389 (PatchedName
.back() == 'b' || PatchedName
.back() == 'w' ||
3390 PatchedName
.back() == 'd' || PatchedName
.back() == 'q')) {
3391 unsigned SuffixSize
= PatchedName
.drop_back().back() == 'u' ? 2 : 1;
3392 unsigned CC
= StringSwitch
<unsigned>(
3393 PatchedName
.slice(5, PatchedName
.size() - SuffixSize
))
3394 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3397 //.Case("false", 0x3) // Not a documented alias.
3401 //.Case("true", 0x7) // Not a documented alias.
3403 if (CC
!= ~0U && (CC
!= 0 || SuffixSize
== 2)) {
3404 switch (PatchedName
.back()) {
3405 default: llvm_unreachable("Unexpected character!");
3406 case 'b': PatchedName
= SuffixSize
== 2 ? "vpcmpub" : "vpcmpb"; break;
3407 case 'w': PatchedName
= SuffixSize
== 2 ? "vpcmpuw" : "vpcmpw"; break;
3408 case 'd': PatchedName
= SuffixSize
== 2 ? "vpcmpud" : "vpcmpd"; break;
3409 case 'q': PatchedName
= SuffixSize
== 2 ? "vpcmpuq" : "vpcmpq"; break;
3411 // Set up the immediate to push into the operands later.
3412 ComparisonPredicate
= CC
;
3416 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3417 if (PatchedName
.starts_with("vpcom") &&
3418 (PatchedName
.back() == 'b' || PatchedName
.back() == 'w' ||
3419 PatchedName
.back() == 'd' || PatchedName
.back() == 'q')) {
3420 unsigned SuffixSize
= PatchedName
.drop_back().back() == 'u' ? 2 : 1;
3421 unsigned CC
= StringSwitch
<unsigned>(
3422 PatchedName
.slice(5, PatchedName
.size() - SuffixSize
))
3433 switch (PatchedName
.back()) {
3434 default: llvm_unreachable("Unexpected character!");
3435 case 'b': PatchedName
= SuffixSize
== 2 ? "vpcomub" : "vpcomb"; break;
3436 case 'w': PatchedName
= SuffixSize
== 2 ? "vpcomuw" : "vpcomw"; break;
3437 case 'd': PatchedName
= SuffixSize
== 2 ? "vpcomud" : "vpcomd"; break;
3438 case 'q': PatchedName
= SuffixSize
== 2 ? "vpcomuq" : "vpcomq"; break;
3440 // Set up the immediate to push into the operands later.
3441 ComparisonPredicate
= CC
;
3445 // Determine whether this is an instruction prefix.
3447 // Enhance prefixes integrity robustness. for example, following forms
3448 // are currently tolerated:
3449 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3450 // lock addq %rax, %rbx ; Destination operand must be of memory type
3451 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3453 StringSwitch
<bool>(Name
)
3454 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3455 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3456 .Cases("xacquire", "xrelease", true)
3457 .Cases("acquire", "release", isParsingIntelSyntax())
3460 auto isLockRepeatNtPrefix
= [](StringRef N
) {
3461 return StringSwitch
<bool>(N
)
3462 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3466 bool CurlyAsEndOfStatement
= false;
3468 unsigned Flags
= X86::IP_NO_PREFIX
;
3469 while (isLockRepeatNtPrefix(Name
.lower())) {
3471 StringSwitch
<unsigned>(Name
)
3472 .Cases("lock", "lock", X86::IP_HAS_LOCK
)
3473 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT
)
3474 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE
)
3475 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK
)
3476 .Default(X86::IP_NO_PREFIX
); // Invalid prefix (impossible)
3478 if (getLexer().is(AsmToken::EndOfStatement
)) {
3479 // We don't have real instr with the given prefix
3480 // let's use the prefix as the instr.
3481 // TODO: there could be several prefixes one after another
3482 Flags
= X86::IP_NO_PREFIX
;
3485 // FIXME: The mnemonic won't match correctly if its not in lower case.
3486 Name
= Parser
.getTok().getString();
3487 Parser
.Lex(); // eat the prefix
3488 // Hack: we could have something like "rep # some comment" or
3489 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3490 while (Name
.starts_with(";") || Name
.starts_with("\n") ||
3491 Name
.starts_with("#") || Name
.starts_with("\t") ||
3492 Name
.starts_with("/")) {
3493 // FIXME: The mnemonic won't match correctly if its not in lower case.
3494 Name
= Parser
.getTok().getString();
3495 Parser
.Lex(); // go to next prefix or instr
3502 // Hacks to handle 'data16' and 'data32'
3503 if (PatchedName
== "data16" && is16BitMode()) {
3504 return Error(NameLoc
, "redundant data16 prefix");
3506 if (PatchedName
== "data32") {
3508 return Error(NameLoc
, "redundant data32 prefix");
3510 return Error(NameLoc
, "'data32' is not supported in 64-bit mode");
3511 // Hack to 'data16' for the table lookup.
3512 PatchedName
= "data16";
3514 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
3515 StringRef Next
= Parser
.getTok().getString();
3517 // data32 effectively changes the instruction suffix.
3519 if (Next
== "callw")
3521 if (Next
== "ljmpw")
3526 ForcedDataPrefix
= X86::Is32Bit
;
3531 Operands
.push_back(X86Operand::CreateToken(PatchedName
, NameLoc
));
3533 // Push the immediate if we extracted one from the mnemonic.
3534 if (ComparisonPredicate
!= ~0U && !isParsingIntelSyntax()) {
3535 const MCExpr
*ImmOp
= MCConstantExpr::create(ComparisonPredicate
,
3536 getParser().getContext());
3537 Operands
.push_back(X86Operand::CreateImm(ImmOp
, NameLoc
, NameLoc
));
3540 // Parse condtional flags after mnemonic.
3541 if ((Name
.starts_with("ccmp") || Name
.starts_with("ctest")) &&
3542 parseCFlagsOp(Operands
))
3545 // This does the actual operand parsing. Don't parse any more if we have a
3546 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3547 // just want to parse the "lock" as the first instruction and the "incl" as
3549 if (getLexer().isNot(AsmToken::EndOfStatement
) && !IsPrefix
) {
3550 // Parse '*' modifier.
3551 if (getLexer().is(AsmToken::Star
))
3552 Operands
.push_back(X86Operand::CreateToken("*", consumeToken()));
3554 // Read the operands.
3556 if (parseOperand(Operands
, Name
))
3558 if (HandleAVX512Operand(Operands
))
3561 // check for comma and eat it
3562 if (getLexer().is(AsmToken::Comma
))
3568 // In MS inline asm curly braces mark the beginning/end of a block,
3569 // therefore they should be interepreted as end of statement
3570 CurlyAsEndOfStatement
=
3571 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3572 (getLexer().is(AsmToken::LCurly
) || getLexer().is(AsmToken::RCurly
));
3573 if (getLexer().isNot(AsmToken::EndOfStatement
) && !CurlyAsEndOfStatement
)
3574 return TokError("unexpected token in argument list");
3577 // Push the immediate if we extracted one from the mnemonic.
3578 if (ComparisonPredicate
!= ~0U && isParsingIntelSyntax()) {
3579 const MCExpr
*ImmOp
= MCConstantExpr::create(ComparisonPredicate
,
3580 getParser().getContext());
3581 Operands
.push_back(X86Operand::CreateImm(ImmOp
, NameLoc
, NameLoc
));
3584 // Consume the EndOfStatement or the prefix separator Slash
3585 if (getLexer().is(AsmToken::EndOfStatement
) ||
3586 (IsPrefix
&& getLexer().is(AsmToken::Slash
)))
3588 else if (CurlyAsEndOfStatement
)
3589 // Add an actual EndOfStatement before the curly brace
3590 Info
.AsmRewrites
->emplace_back(AOK_EndOfStatement
,
3591 getLexer().getTok().getLoc(), 0);
3593 // This is for gas compatibility and cannot be done in td.
3594 // Adding "p" for some floating point with no argument.
3595 // For example: fsub --> fsubp
3597 Name
== "fsub" || Name
== "fdiv" || Name
== "fsubr" || Name
== "fdivr";
3598 if (IsFp
&& Operands
.size() == 1) {
3599 const char *Repl
= StringSwitch
<const char *>(Name
)
3600 .Case("fsub", "fsubp")
3601 .Case("fdiv", "fdivp")
3602 .Case("fsubr", "fsubrp")
3603 .Case("fdivr", "fdivrp");
3604 static_cast<X86Operand
&>(*Operands
[0]).setTokenValue(Repl
);
3607 if ((Name
== "mov" || Name
== "movw" || Name
== "movl") &&
3608 (Operands
.size() == 3)) {
3609 X86Operand
&Op1
= (X86Operand
&)*Operands
[1];
3610 X86Operand
&Op2
= (X86Operand
&)*Operands
[2];
3611 SMLoc Loc
= Op1
.getEndLoc();
3612 // Moving a 32 or 16 bit value into a segment register has the same
3613 // behavior. Modify such instructions to always take shorter form.
3614 if (Op1
.isReg() && Op2
.isReg() &&
3615 X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(
3617 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(Op1
.getReg()) ||
3618 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(Op1
.getReg()))) {
3619 // Change instruction name to match new instruction.
3620 if (Name
!= "mov" && Name
[3] == (is16BitMode() ? 'l' : 'w')) {
3621 Name
= is16BitMode() ? "movw" : "movl";
3622 Operands
[0] = X86Operand::CreateToken(Name
, NameLoc
);
3624 // Select the correct equivalent 16-/32-bit source register.
3626 getX86SubSuperRegister(Op1
.getReg(), is16BitMode() ? 16 : 32);
3627 Operands
[1] = X86Operand::CreateReg(Reg
, Loc
, Loc
);
3631 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3632 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3633 // documented form in various unofficial manuals, so a lot of code uses it.
3634 if ((Name
== "outb" || Name
== "outsb" || Name
== "outw" || Name
== "outsw" ||
3635 Name
== "outl" || Name
== "outsl" || Name
== "out" || Name
== "outs") &&
3636 Operands
.size() == 3) {
3637 X86Operand
&Op
= (X86Operand
&)*Operands
.back();
3639 Operands
.back() = X86Operand::CreateReg(X86::DX
, Op
.getStartLoc(),
3642 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3643 if ((Name
== "inb" || Name
== "insb" || Name
== "inw" || Name
== "insw" ||
3644 Name
== "inl" || Name
== "insl" || Name
== "in" || Name
== "ins") &&
3645 Operands
.size() == 3) {
3646 X86Operand
&Op
= (X86Operand
&)*Operands
[1];
3648 Operands
[1] = X86Operand::CreateReg(X86::DX
, Op
.getStartLoc(),
3652 SmallVector
<std::unique_ptr
<MCParsedAsmOperand
>, 2> TmpOperands
;
3653 bool HadVerifyError
= false;
3655 // Append default arguments to "ins[bwld]"
3656 if (Name
.starts_with("ins") &&
3657 (Operands
.size() == 1 || Operands
.size() == 3) &&
3658 (Name
== "insb" || Name
== "insw" || Name
== "insl" || Name
== "insd" ||
3661 AddDefaultSrcDestOperands(TmpOperands
,
3662 X86Operand::CreateReg(X86::DX
, NameLoc
, NameLoc
),
3663 DefaultMemDIOperand(NameLoc
));
3664 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3667 // Append default arguments to "outs[bwld]"
3668 if (Name
.starts_with("outs") &&
3669 (Operands
.size() == 1 || Operands
.size() == 3) &&
3670 (Name
== "outsb" || Name
== "outsw" || Name
== "outsl" ||
3671 Name
== "outsd" || Name
== "outs")) {
3672 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemSIOperand(NameLoc
),
3673 X86Operand::CreateReg(X86::DX
, NameLoc
, NameLoc
));
3674 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3677 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3678 // values of $SIREG according to the mode. It would be nice if this
3679 // could be achieved with InstAlias in the tables.
3680 if (Name
.starts_with("lods") &&
3681 (Operands
.size() == 1 || Operands
.size() == 2) &&
3682 (Name
== "lods" || Name
== "lodsb" || Name
== "lodsw" ||
3683 Name
== "lodsl" || Name
== "lodsd" || Name
== "lodsq")) {
3684 TmpOperands
.push_back(DefaultMemSIOperand(NameLoc
));
3685 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3688 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3689 // values of $DIREG according to the mode. It would be nice if this
3690 // could be achieved with InstAlias in the tables.
3691 if (Name
.starts_with("stos") &&
3692 (Operands
.size() == 1 || Operands
.size() == 2) &&
3693 (Name
== "stos" || Name
== "stosb" || Name
== "stosw" ||
3694 Name
== "stosl" || Name
== "stosd" || Name
== "stosq")) {
3695 TmpOperands
.push_back(DefaultMemDIOperand(NameLoc
));
3696 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3699 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3700 // values of $DIREG according to the mode. It would be nice if this
3701 // could be achieved with InstAlias in the tables.
3702 if (Name
.starts_with("scas") &&
3703 (Operands
.size() == 1 || Operands
.size() == 2) &&
3704 (Name
== "scas" || Name
== "scasb" || Name
== "scasw" ||
3705 Name
== "scasl" || Name
== "scasd" || Name
== "scasq")) {
3706 TmpOperands
.push_back(DefaultMemDIOperand(NameLoc
));
3707 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3710 // Add default SI and DI operands to "cmps[bwlq]".
3711 if (Name
.starts_with("cmps") &&
3712 (Operands
.size() == 1 || Operands
.size() == 3) &&
3713 (Name
== "cmps" || Name
== "cmpsb" || Name
== "cmpsw" ||
3714 Name
== "cmpsl" || Name
== "cmpsd" || Name
== "cmpsq")) {
3715 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemDIOperand(NameLoc
),
3716 DefaultMemSIOperand(NameLoc
));
3717 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3720 // Add default SI and DI operands to "movs[bwlq]".
3721 if (((Name
.starts_with("movs") &&
3722 (Name
== "movs" || Name
== "movsb" || Name
== "movsw" ||
3723 Name
== "movsl" || Name
== "movsd" || Name
== "movsq")) ||
3724 (Name
.starts_with("smov") &&
3725 (Name
== "smov" || Name
== "smovb" || Name
== "smovw" ||
3726 Name
== "smovl" || Name
== "smovd" || Name
== "smovq"))) &&
3727 (Operands
.size() == 1 || Operands
.size() == 3)) {
3728 if (Name
== "movsd" && Operands
.size() == 1 && !isParsingIntelSyntax())
3729 Operands
.back() = X86Operand::CreateToken("movsl", NameLoc
);
3730 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemSIOperand(NameLoc
),
3731 DefaultMemDIOperand(NameLoc
));
3732 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3735 // Check if we encountered an error for one the string insturctions
3736 if (HadVerifyError
) {
3737 return HadVerifyError
;
3740 // Transforms "xlat mem8" into "xlatb"
3741 if ((Name
== "xlat" || Name
== "xlatb") && Operands
.size() == 2) {
3742 X86Operand
&Op1
= static_cast<X86Operand
&>(*Operands
[1]);
3744 Warning(Op1
.getStartLoc(), "memory operand is only for determining the "
3745 "size, (R|E)BX will be used for the location");
3746 Operands
.pop_back();
3747 static_cast<X86Operand
&>(*Operands
[0]).setTokenValue("xlatb");
3752 Operands
.push_back(X86Operand::CreatePrefix(Flags
, NameLoc
, NameLoc
));
3756 static bool convertSSEToAVX(MCInst
&Inst
) {
3757 ArrayRef
<X86TableEntry
> Table
{X86SSE2AVXTable
};
3758 unsigned Opcode
= Inst
.getOpcode();
3759 const auto I
= llvm::lower_bound(Table
, Opcode
);
3760 if (I
== Table
.end() || I
->OldOpc
!= Opcode
)
3763 Inst
.setOpcode(I
->NewOpc
);
3764 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3765 // operand compare to SSE variant, which is added below
3766 if (X86::isBLENDVPD(Opcode
) || X86::isBLENDVPS(Opcode
) ||
3767 X86::isPBLENDVB(Opcode
))
3768 Inst
.addOperand(Inst
.getOperand(2));
3773 bool X86AsmParser::processInstruction(MCInst
&Inst
, const OperandVector
&Ops
) {
3774 if (MCOptions
.X86Sse2Avx
&& convertSSEToAVX(Inst
))
3777 if (ForcedOpcodePrefix
!= OpcodePrefix_VEX3
&&
3778 X86::optimizeInstFromVEX3ToVEX2(Inst
, MII
.get(Inst
.getOpcode())))
3781 if (X86::optimizeShiftRotateWithImmediateOne(Inst
))
3784 auto replaceWithCCMPCTEST
= [&](unsigned Opcode
) -> bool {
3785 if (ForcedOpcodePrefix
== OpcodePrefix_EVEX
) {
3786 Inst
.setFlags(~(X86::IP_USE_EVEX
)&Inst
.getFlags());
3787 Inst
.setOpcode(Opcode
);
3788 Inst
.addOperand(MCOperand::createImm(0));
3789 Inst
.addOperand(MCOperand::createImm(10));
3795 switch (Inst
.getOpcode()) {
3796 default: return false;
3798 // {disp32} forces a larger displacement as if the instruction was relaxed.
3799 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3800 // This matches GNU assembler.
3801 if (ForcedDispEncoding
== DispEncoding_Disp32
) {
3802 Inst
.setOpcode(is16BitMode() ? X86::JMP_2
: X86::JMP_4
);
3808 // {disp32} forces a larger displacement as if the instruction was relaxed.
3809 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3810 // This matches GNU assembler.
3811 if (ForcedDispEncoding
== DispEncoding_Disp32
) {
3812 Inst
.setOpcode(is16BitMode() ? X86::JCC_2
: X86::JCC_4
);
3818 // Transforms "int $3" into "int3" as a size optimization.
3819 // We can't write this as an InstAlias.
3820 if (!Inst
.getOperand(0).isImm() || Inst
.getOperand(0).getImm() != 3)
3823 Inst
.setOpcode(X86::INT3
);
3826 // `{evex} cmp <>, <>` is alias of `ccmpt {dfv=} <>, <>`, and
3827 // `{evex} test <>, <>` is alias of `ctest {dfv=} <>, <>`
3828 #define FROM_TO(FROM, TO) \
3830 return replaceWithCCMPCTEST(X86::TO);
3831 FROM_TO(CMP64rr
, CCMP64rr
)
3832 FROM_TO(CMP64mi32
, CCMP64mi32
)
3833 FROM_TO(CMP64mi8
, CCMP64mi8
)
3834 FROM_TO(CMP64mr
, CCMP64mr
)
3835 FROM_TO(CMP64ri32
, CCMP64ri32
)
3836 FROM_TO(CMP64ri8
, CCMP64ri8
)
3837 FROM_TO(CMP64rm
, CCMP64rm
)
3839 FROM_TO(CMP32rr
, CCMP32rr
)
3840 FROM_TO(CMP32mi
, CCMP32mi
)
3841 FROM_TO(CMP32mi8
, CCMP32mi8
)
3842 FROM_TO(CMP32mr
, CCMP32mr
)
3843 FROM_TO(CMP32ri
, CCMP32ri
)
3844 FROM_TO(CMP32ri8
, CCMP32ri8
)
3845 FROM_TO(CMP32rm
, CCMP32rm
)
3847 FROM_TO(CMP16rr
, CCMP16rr
)
3848 FROM_TO(CMP16mi
, CCMP16mi
)
3849 FROM_TO(CMP16mi8
, CCMP16mi8
)
3850 FROM_TO(CMP16mr
, CCMP16mr
)
3851 FROM_TO(CMP16ri
, CCMP16ri
)
3852 FROM_TO(CMP16ri8
, CCMP16ri8
)
3853 FROM_TO(CMP16rm
, CCMP16rm
)
3855 FROM_TO(CMP8rr
, CCMP8rr
)
3856 FROM_TO(CMP8mi
, CCMP8mi
)
3857 FROM_TO(CMP8mr
, CCMP8mr
)
3858 FROM_TO(CMP8ri
, CCMP8ri
)
3859 FROM_TO(CMP8rm
, CCMP8rm
)
3861 FROM_TO(TEST64rr
, CTEST64rr
)
3862 FROM_TO(TEST64mi32
, CTEST64mi32
)
3863 FROM_TO(TEST64mr
, CTEST64mr
)
3864 FROM_TO(TEST64ri32
, CTEST64ri32
)
3866 FROM_TO(TEST32rr
, CTEST32rr
)
3867 FROM_TO(TEST32mi
, CTEST32mi
)
3868 FROM_TO(TEST32mr
, CTEST32mr
)
3869 FROM_TO(TEST32ri
, CTEST32ri
)
3871 FROM_TO(TEST16rr
, CTEST16rr
)
3872 FROM_TO(TEST16mi
, CTEST16mi
)
3873 FROM_TO(TEST16mr
, CTEST16mr
)
3874 FROM_TO(TEST16ri
, CTEST16ri
)
3876 FROM_TO(TEST8rr
, CTEST8rr
)
3877 FROM_TO(TEST8mi
, CTEST8mi
)
3878 FROM_TO(TEST8mr
, CTEST8mr
)
3879 FROM_TO(TEST8ri
, CTEST8ri
)
3884 bool X86AsmParser::validateInstruction(MCInst
&Inst
, const OperandVector
&Ops
) {
3885 using namespace X86
;
3886 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
3887 unsigned Opcode
= Inst
.getOpcode();
3888 uint64_t TSFlags
= MII
.get(Opcode
).TSFlags
;
3889 if (isVFCMADDCPH(Opcode
) || isVFCMADDCSH(Opcode
) || isVFMADDCPH(Opcode
) ||
3890 isVFMADDCSH(Opcode
)) {
3891 MCRegister Dest
= Inst
.getOperand(0).getReg();
3892 for (unsigned i
= 2; i
< Inst
.getNumOperands(); i
++)
3893 if (Inst
.getOperand(i
).isReg() && Dest
== Inst
.getOperand(i
).getReg())
3894 return Warning(Ops
[0]->getStartLoc(), "Destination register should be "
3895 "distinct from source registers");
3896 } else if (isVFCMULCPH(Opcode
) || isVFCMULCSH(Opcode
) || isVFMULCPH(Opcode
) ||
3897 isVFMULCSH(Opcode
)) {
3898 MCRegister Dest
= Inst
.getOperand(0).getReg();
3899 // The mask variants have different operand list. Scan from the third
3900 // operand to avoid emitting incorrect warning.
3901 // VFMULCPHZrr Dest, Src1, Src2
3902 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3903 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3904 for (unsigned i
= ((TSFlags
& X86II::EVEX_K
) ? 2 : 1);
3905 i
< Inst
.getNumOperands(); i
++)
3906 if (Inst
.getOperand(i
).isReg() && Dest
== Inst
.getOperand(i
).getReg())
3907 return Warning(Ops
[0]->getStartLoc(), "Destination register should be "
3908 "distinct from source registers");
3909 } else if (isV4FMADDPS(Opcode
) || isV4FMADDSS(Opcode
) ||
3910 isV4FNMADDPS(Opcode
) || isV4FNMADDSS(Opcode
) ||
3911 isVP4DPWSSDS(Opcode
) || isVP4DPWSSD(Opcode
)) {
3913 Inst
.getOperand(Inst
.getNumOperands() - X86::AddrNumOperands
- 1)
3915 unsigned Src2Enc
= MRI
->getEncodingValue(Src2
);
3916 if (Src2Enc
% 4 != 0) {
3917 StringRef RegName
= X86IntelInstPrinter::getRegisterName(Src2
);
3918 unsigned GroupStart
= (Src2Enc
/ 4) * 4;
3919 unsigned GroupEnd
= GroupStart
+ 3;
3920 return Warning(Ops
[0]->getStartLoc(),
3921 "source register '" + RegName
+ "' implicitly denotes '" +
3922 RegName
.take_front(3) + Twine(GroupStart
) + "' to '" +
3923 RegName
.take_front(3) + Twine(GroupEnd
) +
3926 } else if (isVGATHERDPD(Opcode
) || isVGATHERDPS(Opcode
) ||
3927 isVGATHERQPD(Opcode
) || isVGATHERQPS(Opcode
) ||
3928 isVPGATHERDD(Opcode
) || isVPGATHERDQ(Opcode
) ||
3929 isVPGATHERQD(Opcode
) || isVPGATHERQQ(Opcode
)) {
3930 bool HasEVEX
= (TSFlags
& X86II::EncodingMask
) == X86II::EVEX
;
3932 unsigned Dest
= MRI
->getEncodingValue(Inst
.getOperand(0).getReg());
3933 unsigned Index
= MRI
->getEncodingValue(
3934 Inst
.getOperand(4 + X86::AddrIndexReg
).getReg());
3936 return Warning(Ops
[0]->getStartLoc(), "index and destination registers "
3937 "should be distinct");
3939 unsigned Dest
= MRI
->getEncodingValue(Inst
.getOperand(0).getReg());
3940 unsigned Mask
= MRI
->getEncodingValue(Inst
.getOperand(1).getReg());
3941 unsigned Index
= MRI
->getEncodingValue(
3942 Inst
.getOperand(3 + X86::AddrIndexReg
).getReg());
3943 if (Dest
== Mask
|| Dest
== Index
|| Mask
== Index
)
3944 return Warning(Ops
[0]->getStartLoc(), "mask, index, and destination "
3945 "registers should be distinct");
3947 } else if (isTCMMIMFP16PS(Opcode
) || isTCMMRLFP16PS(Opcode
) ||
3948 isTDPBF16PS(Opcode
) || isTDPFP16PS(Opcode
) || isTDPBSSD(Opcode
) ||
3949 isTDPBSUD(Opcode
) || isTDPBUSD(Opcode
) || isTDPBUUD(Opcode
)) {
3950 MCRegister SrcDest
= Inst
.getOperand(0).getReg();
3951 MCRegister Src1
= Inst
.getOperand(2).getReg();
3952 MCRegister Src2
= Inst
.getOperand(3).getReg();
3953 if (SrcDest
== Src1
|| SrcDest
== Src2
|| Src1
== Src2
)
3954 return Error(Ops
[0]->getStartLoc(), "all tmm registers must be distinct");
3957 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3958 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3959 if ((TSFlags
& X86II::EncodingMask
) == 0) {
3961 bool UsesRex
= TSFlags
& X86II::REX_W
;
3962 unsigned NumOps
= Inst
.getNumOperands();
3963 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
3964 const MCOperand
&MO
= Inst
.getOperand(i
);
3967 MCRegister Reg
= MO
.getReg();
3968 if (Reg
== X86::AH
|| Reg
== X86::BH
|| Reg
== X86::CH
|| Reg
== X86::DH
)
3970 if (X86II::isX86_64NonExtLowByteReg(Reg
) ||
3971 X86II::isX86_64ExtendedReg(Reg
))
3975 if (UsesRex
&& HReg
) {
3976 StringRef RegName
= X86IntelInstPrinter::getRegisterName(HReg
);
3977 return Error(Ops
[0]->getStartLoc(),
3978 "can't encode '" + RegName
+ "' in an instruction requiring "
3983 if ((Opcode
== X86::PREFETCHIT0
|| Opcode
== X86::PREFETCHIT1
)) {
3984 const MCOperand
&MO
= Inst
.getOperand(X86::AddrBaseReg
);
3985 if (!MO
.isReg() || MO
.getReg() != X86::RIP
)
3987 Ops
[0]->getStartLoc(),
3988 Twine((Inst
.getOpcode() == X86::PREFETCHIT0
? "'prefetchit0'"
3989 : "'prefetchit1'")) +
3990 " only supports RIP-relative address");
3995 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc
) {
3996 Warning(Loc
, "Instruction may be vulnerable to LVI and "
3997 "requires manual mitigation");
3998 Note(SMLoc(), "See https://software.intel.com/"
3999 "security-software-guidance/insights/"
4000 "deep-dive-load-value-injection#specialinstructions"
4001 " for more information");
4004 /// RET instructions and also instructions that indirect calls/jumps from memory
4005 /// combine a load and a branch within a single instruction. To mitigate these
4006 /// instructions against LVI, they must be decomposed into separate load and
4007 /// branch instructions, with an LFENCE in between. For more details, see:
4008 /// - X86LoadValueInjectionRetHardening.cpp
4009 /// - X86LoadValueInjectionIndirectThunks.cpp
4010 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4012 /// Returns `true` if a mitigation was applied or warning was emitted.
4013 void X86AsmParser::applyLVICFIMitigation(MCInst
&Inst
, MCStreamer
&Out
) {
4014 // Information on control-flow instructions that require manual mitigation can
4016 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4017 switch (Inst
.getOpcode()) {
4024 MCInst ShlInst
, FenceInst
;
4025 bool Parse32
= is32BitMode() || Code16GCC
;
4026 MCRegister Basereg
=
4027 is64BitMode() ? X86::RSP
: (Parse32
? X86::ESP
: X86::SP
);
4028 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
4029 auto ShlMemOp
= X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
4030 /*BaseReg=*/Basereg
, /*IndexReg=*/0,
4031 /*Scale=*/1, SMLoc
{}, SMLoc
{}, 0);
4032 ShlInst
.setOpcode(X86::SHL64mi
);
4033 ShlMemOp
->addMemOperands(ShlInst
, 5);
4034 ShlInst
.addOperand(MCOperand::createImm(0));
4035 FenceInst
.setOpcode(X86::LFENCE
);
4036 Out
.emitInstruction(ShlInst
, getSTI());
4037 Out
.emitInstruction(FenceInst
, getSTI());
4046 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
4051 /// To mitigate LVI, every instruction that performs a load can be followed by
4052 /// an LFENCE instruction to squash any potential mis-speculation. There are
4053 /// some instructions that require additional considerations, and may requre
4054 /// manual mitigation. For more details, see:
4055 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4057 /// Returns `true` if a mitigation was applied or warning was emitted.
4058 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst
&Inst
,
4060 auto Opcode
= Inst
.getOpcode();
4061 auto Flags
= Inst
.getFlags();
4062 if ((Flags
& X86::IP_HAS_REPEAT
) || (Flags
& X86::IP_HAS_REPEAT_NE
)) {
4063 // Information on REP string instructions that require manual mitigation can
4065 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4075 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
4078 } else if (Opcode
== X86::REP_PREFIX
|| Opcode
== X86::REPNE_PREFIX
) {
4079 // If a REP instruction is found on its own line, it may or may not be
4080 // followed by a vulnerable instruction. Emit a warning just in case.
4081 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
4085 const MCInstrDesc
&MCID
= MII
.get(Inst
.getOpcode());
4087 // Can't mitigate after terminators or calls. A control flow change may have
4088 // already occurred.
4089 if (MCID
.isTerminator() || MCID
.isCall())
4092 // LFENCE has the mayLoad property, don't double fence.
4093 if (MCID
.mayLoad() && Inst
.getOpcode() != X86::LFENCE
) {
4095 FenceInst
.setOpcode(X86::LFENCE
);
4096 Out
.emitInstruction(FenceInst
, getSTI());
4100 void X86AsmParser::emitInstruction(MCInst
&Inst
, OperandVector
&Operands
,
4102 if (LVIInlineAsmHardening
&&
4103 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity
))
4104 applyLVICFIMitigation(Inst
, Out
);
4106 Out
.emitInstruction(Inst
, getSTI());
4108 if (LVIInlineAsmHardening
&&
4109 getSTI().hasFeature(X86::FeatureLVILoadHardening
))
4110 applyLVILoadHardeningMitigation(Inst
, Out
);
4113 static unsigned getPrefixes(OperandVector
&Operands
) {
4114 unsigned Result
= 0;
4115 X86Operand
&Prefix
= static_cast<X86Operand
&>(*Operands
.back());
4116 if (Prefix
.isPrefix()) {
4117 Result
= Prefix
.getPrefix();
4118 Operands
.pop_back();
4123 bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4124 OperandVector
&Operands
,
4125 MCStreamer
&Out
, uint64_t &ErrorInfo
,
4126 bool MatchingInlineAsm
) {
4127 assert(!Operands
.empty() && "Unexpect empty operand list!");
4128 assert((*Operands
[0]).isToken() && "Leading operand should always be a mnemonic!");
4130 // First, handle aliases that expand to multiple instructions.
4131 MatchFPUWaitAlias(IDLoc
, static_cast<X86Operand
&>(*Operands
[0]), Operands
,
4132 Out
, MatchingInlineAsm
);
4133 unsigned Prefixes
= getPrefixes(Operands
);
4137 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4138 // the encoder and printer.
4139 if (ForcedOpcodePrefix
== OpcodePrefix_REX
)
4140 Prefixes
|= X86::IP_USE_REX
;
4141 else if (ForcedOpcodePrefix
== OpcodePrefix_REX2
)
4142 Prefixes
|= X86::IP_USE_REX2
;
4143 else if (ForcedOpcodePrefix
== OpcodePrefix_VEX
)
4144 Prefixes
|= X86::IP_USE_VEX
;
4145 else if (ForcedOpcodePrefix
== OpcodePrefix_VEX2
)
4146 Prefixes
|= X86::IP_USE_VEX2
;
4147 else if (ForcedOpcodePrefix
== OpcodePrefix_VEX3
)
4148 Prefixes
|= X86::IP_USE_VEX3
;
4149 else if (ForcedOpcodePrefix
== OpcodePrefix_EVEX
)
4150 Prefixes
|= X86::IP_USE_EVEX
;
4152 // Set encoded flags for {disp8} and {disp32}.
4153 if (ForcedDispEncoding
== DispEncoding_Disp8
)
4154 Prefixes
|= X86::IP_USE_DISP8
;
4155 else if (ForcedDispEncoding
== DispEncoding_Disp32
)
4156 Prefixes
|= X86::IP_USE_DISP32
;
4159 Inst
.setFlags(Prefixes
);
4161 return isParsingIntelSyntax()
4162 ? matchAndEmitIntelInstruction(IDLoc
, Opcode
, Inst
, Operands
, Out
,
4163 ErrorInfo
, MatchingInlineAsm
)
4164 : matchAndEmitATTInstruction(IDLoc
, Opcode
, Inst
, Operands
, Out
,
4165 ErrorInfo
, MatchingInlineAsm
);
4168 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc
, X86Operand
&Op
,
4169 OperandVector
&Operands
, MCStreamer
&Out
,
4170 bool MatchingInlineAsm
) {
4171 // FIXME: This should be replaced with a real .td file alias mechanism.
4172 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4174 const char *Repl
= StringSwitch
<const char *>(Op
.getToken())
4175 .Case("finit", "fninit")
4176 .Case("fsave", "fnsave")
4177 .Case("fstcw", "fnstcw")
4178 .Case("fstcww", "fnstcw")
4179 .Case("fstenv", "fnstenv")
4180 .Case("fstsw", "fnstsw")
4181 .Case("fstsww", "fnstsw")
4182 .Case("fclex", "fnclex")
4186 Inst
.setOpcode(X86::WAIT
);
4188 if (!MatchingInlineAsm
)
4189 emitInstruction(Inst
, Operands
, Out
);
4190 Operands
[0] = X86Operand::CreateToken(Repl
, IDLoc
);
4194 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc
,
4195 const FeatureBitset
&MissingFeatures
,
4196 bool MatchingInlineAsm
) {
4197 assert(MissingFeatures
.any() && "Unknown missing feature!");
4198 SmallString
<126> Msg
;
4199 raw_svector_ostream
OS(Msg
);
4200 OS
<< "instruction requires:";
4201 for (unsigned i
= 0, e
= MissingFeatures
.size(); i
!= e
; ++i
) {
4202 if (MissingFeatures
[i
])
4203 OS
<< ' ' << getSubtargetFeatureName(i
);
4205 return Error(IDLoc
, OS
.str(), SMRange(), MatchingInlineAsm
);
4208 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
4209 unsigned Opc
= Inst
.getOpcode();
4210 const MCInstrDesc
&MCID
= MII
.get(Opc
);
4211 uint64_t TSFlags
= MCID
.TSFlags
;
4213 if (UseApxExtendedReg
&& !X86II::canUseApxExtendedReg(MCID
))
4214 return Match_Unsupported
;
4215 if (ForcedNoFlag
== !(TSFlags
& X86II::EVEX_NF
) && !X86::isCFCMOVCC(Opc
))
4216 return Match_Unsupported
;
4218 switch (ForcedOpcodePrefix
) {
4219 case OpcodePrefix_Default
:
4221 case OpcodePrefix_REX
:
4222 case OpcodePrefix_REX2
:
4223 if (TSFlags
& X86II::EncodingMask
)
4224 return Match_Unsupported
;
4226 case OpcodePrefix_VEX
:
4227 case OpcodePrefix_VEX2
:
4228 case OpcodePrefix_VEX3
:
4229 if ((TSFlags
& X86II::EncodingMask
) != X86II::VEX
)
4230 return Match_Unsupported
;
4232 case OpcodePrefix_EVEX
:
4233 if (is64BitMode() && (TSFlags
& X86II::EncodingMask
) != X86II::EVEX
&&
4234 !X86::isCMP(Opc
) && !X86::isTEST(Opc
))
4235 return Match_Unsupported
;
4236 if (!is64BitMode() && (TSFlags
& X86II::EncodingMask
) != X86II::EVEX
)
4237 return Match_Unsupported
;
4241 if ((TSFlags
& X86II::ExplicitOpPrefixMask
) == X86II::ExplicitVEXPrefix
&&
4242 (ForcedOpcodePrefix
!= OpcodePrefix_VEX
&&
4243 ForcedOpcodePrefix
!= OpcodePrefix_VEX2
&&
4244 ForcedOpcodePrefix
!= OpcodePrefix_VEX3
))
4245 return Match_Unsupported
;
4247 return Match_Success
;
4250 bool X86AsmParser::matchAndEmitATTInstruction(
4251 SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
, OperandVector
&Operands
,
4252 MCStreamer
&Out
, uint64_t &ErrorInfo
, bool MatchingInlineAsm
) {
4253 X86Operand
&Op
= static_cast<X86Operand
&>(*Operands
[0]);
4254 SMRange EmptyRange
= std::nullopt
;
4255 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4256 // when matching the instruction.
4257 if (ForcedDataPrefix
== X86::Is32Bit
)
4258 SwitchMode(X86::Is32Bit
);
4259 // First, try a direct match.
4260 FeatureBitset MissingFeatures
;
4261 unsigned OriginalError
= MatchInstruction(Operands
, Inst
, ErrorInfo
,
4262 MissingFeatures
, MatchingInlineAsm
,
4263 isParsingIntelSyntax());
4264 if (ForcedDataPrefix
== X86::Is32Bit
) {
4265 SwitchMode(X86::Is16Bit
);
4266 ForcedDataPrefix
= 0;
4268 switch (OriginalError
) {
4269 default: llvm_unreachable("Unexpected match result!");
4271 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4273 // Some instructions need post-processing to, for example, tweak which
4274 // encoding is selected. Loop on it while changes happen so the
4275 // individual transformations can chain off each other.
4276 if (!MatchingInlineAsm
)
4277 while (processInstruction(Inst
, Operands
))
4281 if (!MatchingInlineAsm
)
4282 emitInstruction(Inst
, Operands
, Out
);
4283 Opcode
= Inst
.getOpcode();
4285 case Match_InvalidImmUnsignedi4
: {
4286 SMLoc ErrorLoc
= ((X86Operand
&)*Operands
[ErrorInfo
]).getStartLoc();
4287 if (ErrorLoc
== SMLoc())
4289 return Error(ErrorLoc
, "immediate must be an integer in range [0, 15]",
4290 EmptyRange
, MatchingInlineAsm
);
4292 case Match_MissingFeature
:
4293 return ErrorMissingFeature(IDLoc
, MissingFeatures
, MatchingInlineAsm
);
4294 case Match_InvalidOperand
:
4295 case Match_MnemonicFail
:
4296 case Match_Unsupported
:
4299 if (Op
.getToken().empty()) {
4300 Error(IDLoc
, "instruction must have size higher than 0", EmptyRange
,
4305 // FIXME: Ideally, we would only attempt suffix matches for things which are
4306 // valid prefixes, and we could just infer the right unambiguous
4307 // type. However, that requires substantially more matcher support than the
4310 // Change the operand to point to a temporary token.
4311 StringRef Base
= Op
.getToken();
4312 SmallString
<16> Tmp
;
4315 Op
.setTokenValue(Tmp
);
4317 // If this instruction starts with an 'f', then it is a floating point stack
4318 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4319 // 80-bit floating point, which use the suffixes s,l,t respectively.
4321 // Otherwise, we assume that this may be an integer instruction, which comes
4322 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4323 const char *Suffixes
= Base
[0] != 'f' ? "bwlq" : "slt\0";
4324 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4325 const char *MemSize
= Base
[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4327 // Check for the various suffix matches.
4328 uint64_t ErrorInfoIgnore
;
4329 FeatureBitset ErrorInfoMissingFeatures
; // Init suppresses compiler warnings.
4332 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4333 // So we should make sure the suffix matcher only works for memory variant
4334 // that has the same size with the suffix.
4335 // FIXME: This flag is a workaround for legacy instructions that didn't
4336 // declare non suffix variant assembly.
4337 bool HasVectorReg
= false;
4338 X86Operand
*MemOp
= nullptr;
4339 for (const auto &Op
: Operands
) {
4340 X86Operand
*X86Op
= static_cast<X86Operand
*>(Op
.get());
4341 if (X86Op
->isVectorReg())
4342 HasVectorReg
= true;
4343 else if (X86Op
->isMem()) {
4345 assert(MemOp
->Mem
.Size
== 0 && "Memory size always 0 under ATT syntax");
4346 // Have we found an unqualified memory operand,
4347 // break. IA allows only one memory operand.
4352 for (unsigned I
= 0, E
= std::size(Match
); I
!= E
; ++I
) {
4353 Tmp
.back() = Suffixes
[I
];
4354 if (MemOp
&& HasVectorReg
)
4355 MemOp
->Mem
.Size
= MemSize
[I
];
4356 Match
[I
] = Match_MnemonicFail
;
4357 if (MemOp
|| !HasVectorReg
) {
4359 MatchInstruction(Operands
, Inst
, ErrorInfoIgnore
, MissingFeatures
,
4360 MatchingInlineAsm
, isParsingIntelSyntax());
4361 // If this returned as a missing feature failure, remember that.
4362 if (Match
[I
] == Match_MissingFeature
)
4363 ErrorInfoMissingFeatures
= MissingFeatures
;
4367 // Restore the old token.
4368 Op
.setTokenValue(Base
);
4370 // If exactly one matched, then we treat that as a successful match (and the
4371 // instruction will already have been filled in correctly, since the failing
4372 // matches won't have modified it).
4373 unsigned NumSuccessfulMatches
= llvm::count(Match
, Match_Success
);
4374 if (NumSuccessfulMatches
== 1) {
4375 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4377 // Some instructions need post-processing to, for example, tweak which
4378 // encoding is selected. Loop on it while changes happen so the
4379 // individual transformations can chain off each other.
4380 if (!MatchingInlineAsm
)
4381 while (processInstruction(Inst
, Operands
))
4385 if (!MatchingInlineAsm
)
4386 emitInstruction(Inst
, Operands
, Out
);
4387 Opcode
= Inst
.getOpcode();
4391 // Otherwise, the match failed, try to produce a decent error message.
4393 // If we had multiple suffix matches, then identify this as an ambiguous
4395 if (NumSuccessfulMatches
> 1) {
4397 unsigned NumMatches
= 0;
4398 for (unsigned I
= 0, E
= std::size(Match
); I
!= E
; ++I
)
4399 if (Match
[I
] == Match_Success
)
4400 MatchChars
[NumMatches
++] = Suffixes
[I
];
4402 SmallString
<126> Msg
;
4403 raw_svector_ostream
OS(Msg
);
4404 OS
<< "ambiguous instructions require an explicit suffix (could be ";
4405 for (unsigned i
= 0; i
!= NumMatches
; ++i
) {
4408 if (i
+ 1 == NumMatches
)
4410 OS
<< "'" << Base
<< MatchChars
[i
] << "'";
4413 Error(IDLoc
, OS
.str(), EmptyRange
, MatchingInlineAsm
);
4417 // Okay, we know that none of the variants matched successfully.
4419 // If all of the instructions reported an invalid mnemonic, then the original
4420 // mnemonic was invalid.
4421 if (llvm::count(Match
, Match_MnemonicFail
) == 4) {
4422 if (OriginalError
== Match_MnemonicFail
)
4423 return Error(IDLoc
, "invalid instruction mnemonic '" + Base
+ "'",
4424 Op
.getLocRange(), MatchingInlineAsm
);
4426 if (OriginalError
== Match_Unsupported
)
4427 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4430 assert(OriginalError
== Match_InvalidOperand
&& "Unexpected error");
4431 // Recover location info for the operand if we know which was the problem.
4432 if (ErrorInfo
!= ~0ULL) {
4433 if (ErrorInfo
>= Operands
.size())
4434 return Error(IDLoc
, "too few operands for instruction", EmptyRange
,
4437 X86Operand
&Operand
= (X86Operand
&)*Operands
[ErrorInfo
];
4438 if (Operand
.getStartLoc().isValid()) {
4439 SMRange OperandRange
= Operand
.getLocRange();
4440 return Error(Operand
.getStartLoc(), "invalid operand for instruction",
4441 OperandRange
, MatchingInlineAsm
);
4445 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4449 // If one instruction matched as unsupported, report this as unsupported.
4450 if (llvm::count(Match
, Match_Unsupported
) == 1) {
4451 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4455 // If one instruction matched with a missing feature, report this as a
4457 if (llvm::count(Match
, Match_MissingFeature
) == 1) {
4458 ErrorInfo
= Match_MissingFeature
;
4459 return ErrorMissingFeature(IDLoc
, ErrorInfoMissingFeatures
,
4463 // If one instruction matched with an invalid operand, report this as an
4465 if (llvm::count(Match
, Match_InvalidOperand
) == 1) {
4466 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4470 // If all of these were an outright failure, report it in a useless way.
4471 Error(IDLoc
, "unknown use of instruction mnemonic without a size suffix",
4472 EmptyRange
, MatchingInlineAsm
);
4476 bool X86AsmParser::matchAndEmitIntelInstruction(
4477 SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
, OperandVector
&Operands
,
4478 MCStreamer
&Out
, uint64_t &ErrorInfo
, bool MatchingInlineAsm
) {
4479 X86Operand
&Op
= static_cast<X86Operand
&>(*Operands
[0]);
4480 SMRange EmptyRange
= std::nullopt
;
4481 // Find one unsized memory operand, if present.
4482 X86Operand
*UnsizedMemOp
= nullptr;
4483 for (const auto &Op
: Operands
) {
4484 X86Operand
*X86Op
= static_cast<X86Operand
*>(Op
.get());
4485 if (X86Op
->isMemUnsized()) {
4486 UnsizedMemOp
= X86Op
;
4487 // Have we found an unqualified memory operand,
4488 // break. IA allows only one memory operand.
4493 // Allow some instructions to have implicitly pointer-sized operands. This is
4494 // compatible with gas.
4495 StringRef Mnemonic
= (static_cast<X86Operand
&>(*Operands
[0])).getToken();
4497 static const char *const PtrSizedInstrs
[] = {"call", "jmp", "push", "pop"};
4498 for (const char *Instr
: PtrSizedInstrs
) {
4499 if (Mnemonic
== Instr
) {
4500 UnsizedMemOp
->Mem
.Size
= getPointerWidth();
4506 SmallVector
<unsigned, 8> Match
;
4507 FeatureBitset ErrorInfoMissingFeatures
;
4508 FeatureBitset MissingFeatures
;
4509 StringRef Base
= (static_cast<X86Operand
&>(*Operands
[0])).getToken();
4511 // If unsized push has immediate operand we should default the default pointer
4512 // size for the size.
4513 if (Mnemonic
== "push" && Operands
.size() == 2) {
4514 auto *X86Op
= static_cast<X86Operand
*>(Operands
[1].get());
4515 if (X86Op
->isImm()) {
4516 // If it's not a constant fall through and let remainder take care of it.
4517 const auto *CE
= dyn_cast
<MCConstantExpr
>(X86Op
->getImm());
4518 unsigned Size
= getPointerWidth();
4520 (isIntN(Size
, CE
->getValue()) || isUIntN(Size
, CE
->getValue()))) {
4521 SmallString
<16> Tmp
;
4523 Tmp
+= (is64BitMode())
4525 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4526 Op
.setTokenValue(Tmp
);
4527 // Do match in ATT mode to allow explicit suffix usage.
4528 Match
.push_back(MatchInstruction(Operands
, Inst
, ErrorInfo
,
4529 MissingFeatures
, MatchingInlineAsm
,
4530 false /*isParsingIntelSyntax()*/));
4531 Op
.setTokenValue(Base
);
4536 // If an unsized memory operand is present, try to match with each memory
4537 // operand size. In Intel assembly, the size is not part of the instruction
4539 if (UnsizedMemOp
&& UnsizedMemOp
->isMemUnsized()) {
4540 static const unsigned MopSizes
[] = {8, 16, 32, 64, 80, 128, 256, 512};
4541 for (unsigned Size
: MopSizes
) {
4542 UnsizedMemOp
->Mem
.Size
= Size
;
4543 uint64_t ErrorInfoIgnore
;
4544 unsigned LastOpcode
= Inst
.getOpcode();
4545 unsigned M
= MatchInstruction(Operands
, Inst
, ErrorInfoIgnore
,
4546 MissingFeatures
, MatchingInlineAsm
,
4547 isParsingIntelSyntax());
4548 if (Match
.empty() || LastOpcode
!= Inst
.getOpcode())
4551 // If this returned as a missing feature failure, remember that.
4552 if (Match
.back() == Match_MissingFeature
)
4553 ErrorInfoMissingFeatures
= MissingFeatures
;
4556 // Restore the size of the unsized memory operand if we modified it.
4557 UnsizedMemOp
->Mem
.Size
= 0;
4560 // If we haven't matched anything yet, this is not a basic integer or FPU
4561 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4562 // matching with the unsized operand.
4563 if (Match
.empty()) {
4564 Match
.push_back(MatchInstruction(
4565 Operands
, Inst
, ErrorInfo
, MissingFeatures
, MatchingInlineAsm
,
4566 isParsingIntelSyntax()));
4567 // If this returned as a missing feature failure, remember that.
4568 if (Match
.back() == Match_MissingFeature
)
4569 ErrorInfoMissingFeatures
= MissingFeatures
;
4572 // Restore the size of the unsized memory operand if we modified it.
4574 UnsizedMemOp
->Mem
.Size
= 0;
4576 // If it's a bad mnemonic, all results will be the same.
4577 if (Match
.back() == Match_MnemonicFail
) {
4578 return Error(IDLoc
, "invalid instruction mnemonic '" + Mnemonic
+ "'",
4579 Op
.getLocRange(), MatchingInlineAsm
);
4582 unsigned NumSuccessfulMatches
= llvm::count(Match
, Match_Success
);
4584 // If matching was ambiguous and we had size information from the frontend,
4585 // try again with that. This handles cases like "movxz eax, m8/m16".
4586 if (UnsizedMemOp
&& NumSuccessfulMatches
> 1 &&
4587 UnsizedMemOp
->getMemFrontendSize()) {
4588 UnsizedMemOp
->Mem
.Size
= UnsizedMemOp
->getMemFrontendSize();
4589 unsigned M
= MatchInstruction(
4590 Operands
, Inst
, ErrorInfo
, MissingFeatures
, MatchingInlineAsm
,
4591 isParsingIntelSyntax());
4592 if (M
== Match_Success
)
4593 NumSuccessfulMatches
= 1;
4595 // Add a rewrite that encodes the size information we used from the
4597 InstInfo
->AsmRewrites
->emplace_back(
4598 AOK_SizeDirective
, UnsizedMemOp
->getStartLoc(),
4599 /*Len=*/0, UnsizedMemOp
->getMemFrontendSize());
4602 // If exactly one matched, then we treat that as a successful match (and the
4603 // instruction will already have been filled in correctly, since the failing
4604 // matches won't have modified it).
4605 if (NumSuccessfulMatches
== 1) {
4606 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4608 // Some instructions need post-processing to, for example, tweak which
4609 // encoding is selected. Loop on it while changes happen so the individual
4610 // transformations can chain off each other.
4611 if (!MatchingInlineAsm
)
4612 while (processInstruction(Inst
, Operands
))
4615 if (!MatchingInlineAsm
)
4616 emitInstruction(Inst
, Operands
, Out
);
4617 Opcode
= Inst
.getOpcode();
4619 } else if (NumSuccessfulMatches
> 1) {
4620 assert(UnsizedMemOp
&&
4621 "multiple matches only possible with unsized memory operands");
4622 return Error(UnsizedMemOp
->getStartLoc(),
4623 "ambiguous operand size for instruction '" + Mnemonic
+ "\'",
4624 UnsizedMemOp
->getLocRange());
4627 // If one instruction matched as unsupported, report this as unsupported.
4628 if (llvm::count(Match
, Match_Unsupported
) == 1) {
4629 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4633 // If one instruction matched with a missing feature, report this as a
4635 if (llvm::count(Match
, Match_MissingFeature
) == 1) {
4636 ErrorInfo
= Match_MissingFeature
;
4637 return ErrorMissingFeature(IDLoc
, ErrorInfoMissingFeatures
,
4641 // If one instruction matched with an invalid operand, report this as an
4643 if (llvm::count(Match
, Match_InvalidOperand
) == 1) {
4644 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4648 if (llvm::count(Match
, Match_InvalidImmUnsignedi4
) == 1) {
4649 SMLoc ErrorLoc
= ((X86Operand
&)*Operands
[ErrorInfo
]).getStartLoc();
4650 if (ErrorLoc
== SMLoc())
4652 return Error(ErrorLoc
, "immediate must be an integer in range [0, 15]",
4653 EmptyRange
, MatchingInlineAsm
);
4656 // If all of these were an outright failure, report it in a useless way.
4657 return Error(IDLoc
, "unknown instruction mnemonic", EmptyRange
,
4661 bool X86AsmParser::omitRegisterFromClobberLists(MCRegister Reg
) {
4662 return X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(Reg
);
4665 bool X86AsmParser::ParseDirective(AsmToken DirectiveID
) {
4666 MCAsmParser
&Parser
= getParser();
4667 StringRef IDVal
= DirectiveID
.getIdentifier();
4668 if (IDVal
.starts_with(".arch"))
4669 return parseDirectiveArch();
4670 if (IDVal
.starts_with(".code"))
4671 return ParseDirectiveCode(IDVal
, DirectiveID
.getLoc());
4672 else if (IDVal
.starts_with(".att_syntax")) {
4673 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4674 if (Parser
.getTok().getString() == "prefix")
4676 else if (Parser
.getTok().getString() == "noprefix")
4677 return Error(DirectiveID
.getLoc(), "'.att_syntax noprefix' is not "
4678 "supported: registers must have a "
4679 "'%' prefix in .att_syntax");
4681 getParser().setAssemblerDialect(0);
4683 } else if (IDVal
.starts_with(".intel_syntax")) {
4684 getParser().setAssemblerDialect(1);
4685 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4686 if (Parser
.getTok().getString() == "noprefix")
4688 else if (Parser
.getTok().getString() == "prefix")
4689 return Error(DirectiveID
.getLoc(), "'.intel_syntax prefix' is not "
4690 "supported: registers must not have "
4691 "a '%' prefix in .intel_syntax");
4694 } else if (IDVal
== ".nops")
4695 return parseDirectiveNops(DirectiveID
.getLoc());
4696 else if (IDVal
== ".even")
4697 return parseDirectiveEven(DirectiveID
.getLoc());
4698 else if (IDVal
== ".cv_fpo_proc")
4699 return parseDirectiveFPOProc(DirectiveID
.getLoc());
4700 else if (IDVal
== ".cv_fpo_setframe")
4701 return parseDirectiveFPOSetFrame(DirectiveID
.getLoc());
4702 else if (IDVal
== ".cv_fpo_pushreg")
4703 return parseDirectiveFPOPushReg(DirectiveID
.getLoc());
4704 else if (IDVal
== ".cv_fpo_stackalloc")
4705 return parseDirectiveFPOStackAlloc(DirectiveID
.getLoc());
4706 else if (IDVal
== ".cv_fpo_stackalign")
4707 return parseDirectiveFPOStackAlign(DirectiveID
.getLoc());
4708 else if (IDVal
== ".cv_fpo_endprologue")
4709 return parseDirectiveFPOEndPrologue(DirectiveID
.getLoc());
4710 else if (IDVal
== ".cv_fpo_endproc")
4711 return parseDirectiveFPOEndProc(DirectiveID
.getLoc());
4712 else if (IDVal
== ".seh_pushreg" ||
4713 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".pushreg")))
4714 return parseDirectiveSEHPushReg(DirectiveID
.getLoc());
4715 else if (IDVal
== ".seh_setframe" ||
4716 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".setframe")))
4717 return parseDirectiveSEHSetFrame(DirectiveID
.getLoc());
4718 else if (IDVal
== ".seh_savereg" ||
4719 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".savereg")))
4720 return parseDirectiveSEHSaveReg(DirectiveID
.getLoc());
4721 else if (IDVal
== ".seh_savexmm" ||
4722 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".savexmm128")))
4723 return parseDirectiveSEHSaveXMM(DirectiveID
.getLoc());
4724 else if (IDVal
== ".seh_pushframe" ||
4725 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".pushframe")))
4726 return parseDirectiveSEHPushFrame(DirectiveID
.getLoc());
4731 bool X86AsmParser::parseDirectiveArch() {
4732 // Ignore .arch for now.
4733 getParser().parseStringToEndOfStatement();
4737 /// parseDirectiveNops
4738 /// ::= .nops size[, control]
4739 bool X86AsmParser::parseDirectiveNops(SMLoc L
) {
4740 int64_t NumBytes
= 0, Control
= 0;
4741 SMLoc NumBytesLoc
, ControlLoc
;
4742 const MCSubtargetInfo
& STI
= getSTI();
4743 NumBytesLoc
= getTok().getLoc();
4744 if (getParser().checkForValidSection() ||
4745 getParser().parseAbsoluteExpression(NumBytes
))
4748 if (parseOptionalToken(AsmToken::Comma
)) {
4749 ControlLoc
= getTok().getLoc();
4750 if (getParser().parseAbsoluteExpression(Control
))
4753 if (getParser().parseEOL())
4756 if (NumBytes
<= 0) {
4757 Error(NumBytesLoc
, "'.nops' directive with non-positive size");
4762 Error(ControlLoc
, "'.nops' directive with negative NOP size");
4767 getParser().getStreamer().emitNops(NumBytes
, Control
, L
, STI
);
4772 /// parseDirectiveEven
4774 bool X86AsmParser::parseDirectiveEven(SMLoc L
) {
4778 const MCSection
*Section
= getStreamer().getCurrentSectionOnly();
4780 getStreamer().initSections(false, getSTI());
4781 Section
= getStreamer().getCurrentSectionOnly();
4783 if (Section
->useCodeAlign())
4784 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4786 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4790 /// ParseDirectiveCode
4791 /// ::= .code16 | .code32 | .code64
4792 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal
, SMLoc L
) {
4793 MCAsmParser
&Parser
= getParser();
4795 if (IDVal
== ".code16") {
4797 if (!is16BitMode()) {
4798 SwitchMode(X86::Is16Bit
);
4799 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16
);
4801 } else if (IDVal
== ".code16gcc") {
4802 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4805 if (!is16BitMode()) {
4806 SwitchMode(X86::Is16Bit
);
4807 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16
);
4809 } else if (IDVal
== ".code32") {
4811 if (!is32BitMode()) {
4812 SwitchMode(X86::Is32Bit
);
4813 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32
);
4815 } else if (IDVal
== ".code64") {
4817 if (!is64BitMode()) {
4818 SwitchMode(X86::Is64Bit
);
4819 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64
);
4822 Error(L
, "unknown directive " + IDVal
);
4830 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L
) {
4831 MCAsmParser
&Parser
= getParser();
4834 if (Parser
.parseIdentifier(ProcName
))
4835 return Parser
.TokError("expected symbol name");
4836 if (Parser
.parseIntToken(ParamsSize
, "expected parameter byte count"))
4838 if (!isUIntN(32, ParamsSize
))
4839 return Parser
.TokError("parameters size out of range");
4842 MCSymbol
*ProcSym
= getContext().getOrCreateSymbol(ProcName
);
4843 return getTargetStreamer().emitFPOProc(ProcSym
, ParamsSize
, L
);
4846 // .cv_fpo_setframe ebp
4847 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L
) {
4850 if (parseRegister(Reg
, DummyLoc
, DummyLoc
) || parseEOL())
4852 return getTargetStreamer().emitFPOSetFrame(Reg
, L
);
4855 // .cv_fpo_pushreg ebx
4856 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L
) {
4859 if (parseRegister(Reg
, DummyLoc
, DummyLoc
) || parseEOL())
4861 return getTargetStreamer().emitFPOPushReg(Reg
, L
);
4864 // .cv_fpo_stackalloc 20
4865 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L
) {
4866 MCAsmParser
&Parser
= getParser();
4868 if (Parser
.parseIntToken(Offset
, "expected offset") || parseEOL())
4870 return getTargetStreamer().emitFPOStackAlloc(Offset
, L
);
4873 // .cv_fpo_stackalign 8
4874 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L
) {
4875 MCAsmParser
&Parser
= getParser();
4877 if (Parser
.parseIntToken(Offset
, "expected offset") || parseEOL())
4879 return getTargetStreamer().emitFPOStackAlign(Offset
, L
);
4882 // .cv_fpo_endprologue
4883 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L
) {
4884 MCAsmParser
&Parser
= getParser();
4885 if (Parser
.parseEOL())
4887 return getTargetStreamer().emitFPOEndPrologue(L
);
4891 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L
) {
4892 MCAsmParser
&Parser
= getParser();
4893 if (Parser
.parseEOL())
4895 return getTargetStreamer().emitFPOEndProc(L
);
4898 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID
,
4899 MCRegister
&RegNo
) {
4900 SMLoc startLoc
= getLexer().getLoc();
4901 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
4903 // Try parsing the argument as a register first.
4904 if (getLexer().getTok().isNot(AsmToken::Integer
)) {
4906 if (parseRegister(RegNo
, startLoc
, endLoc
))
4909 if (!X86MCRegisterClasses
[RegClassID
].contains(RegNo
)) {
4910 return Error(startLoc
,
4911 "register is not supported for use with this directive");
4914 // Otherwise, an integer number matching the encoding of the desired
4915 // register may appear.
4917 if (getParser().parseAbsoluteExpression(EncodedReg
))
4920 // The SEH register number is the same as the encoding register number. Map
4921 // from the encoding back to the LLVM register number.
4922 RegNo
= MCRegister();
4923 for (MCPhysReg Reg
: X86MCRegisterClasses
[RegClassID
]) {
4924 if (MRI
->getEncodingValue(Reg
) == EncodedReg
) {
4930 return Error(startLoc
,
4931 "incorrect register number for use with this directive");
4938 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc
) {
4940 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4943 if (getLexer().isNot(AsmToken::EndOfStatement
))
4944 return TokError("expected end of directive");
4947 getStreamer().emitWinCFIPushReg(Reg
, Loc
);
4951 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc
) {
4954 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4956 if (getLexer().isNot(AsmToken::Comma
))
4957 return TokError("you must specify a stack pointer offset");
4960 if (getParser().parseAbsoluteExpression(Off
))
4963 if (getLexer().isNot(AsmToken::EndOfStatement
))
4964 return TokError("expected end of directive");
4967 getStreamer().emitWinCFISetFrame(Reg
, Off
, Loc
);
4971 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc
) {
4974 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4976 if (getLexer().isNot(AsmToken::Comma
))
4977 return TokError("you must specify an offset on the stack");
4980 if (getParser().parseAbsoluteExpression(Off
))
4983 if (getLexer().isNot(AsmToken::EndOfStatement
))
4984 return TokError("expected end of directive");
4987 getStreamer().emitWinCFISaveReg(Reg
, Off
, Loc
);
4991 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc
) {
4994 if (parseSEHRegisterNumber(X86::VR128XRegClassID
, Reg
))
4996 if (getLexer().isNot(AsmToken::Comma
))
4997 return TokError("you must specify an offset on the stack");
5000 if (getParser().parseAbsoluteExpression(Off
))
5003 if (getLexer().isNot(AsmToken::EndOfStatement
))
5004 return TokError("expected end of directive");
5007 getStreamer().emitWinCFISaveXMM(Reg
, Off
, Loc
);
5011 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc
) {
5014 if (getLexer().is(AsmToken::At
)) {
5015 SMLoc startLoc
= getLexer().getLoc();
5017 if (!getParser().parseIdentifier(CodeID
)) {
5018 if (CodeID
!= "code")
5019 return Error(startLoc
, "expected @code");
5024 if (getLexer().isNot(AsmToken::EndOfStatement
))
5025 return TokError("expected end of directive");
5028 getStreamer().emitWinCFIPushFrame(Code
, Loc
);
5032 // Force static initialization.
5033 extern "C" LLVM_C_ABI
void LLVMInitializeX86AsmParser() {
5034 RegisterMCAsmParser
<X86AsmParser
> X(getTheX86_32Target());
5035 RegisterMCAsmParser
<X86AsmParser
> Y(getTheX86_64Target());
5038 #define GET_MATCHER_IMPLEMENTATION
5039 #include "X86GenAsmMatcher.inc"