1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86IntelInstPrinter.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86MCTargetDesc.h"
14 #include "MCTargetDesc/X86TargetStreamer.h"
15 #include "TargetInfo/X86TargetInfo.h"
16 #include "X86AsmParserCommon.h"
17 #include "X86Operand.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCExpr.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCInstrInfo.h"
27 #include "llvm/MC/MCParser/MCAsmLexer.h"
28 #include "llvm/MC/MCParser/MCAsmParser.h"
29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCSection.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/MCSymbol.h"
36 #include "llvm/MC/TargetRegistry.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/Compiler.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/raw_ostream.h"
46 static cl::opt
<bool> LVIInlineAsmHardening(
47 "x86-experimental-lvi-inline-asm-hardening",
48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
49 " Injection (LVI). This feature is experimental."), cl::Hidden
);
51 static bool checkScale(unsigned Scale
, StringRef
&ErrMsg
) {
52 if (Scale
!= 1 && Scale
!= 2 && Scale
!= 4 && Scale
!= 8) {
53 ErrMsg
= "scale factor in address must be 1, 2, 4 or 8";
61 // Including the generated SSE2AVX compression tables.
62 #define GET_X86_SSE2AVX_TABLE
63 #include "X86GenInstrMapping.inc"
65 static const char OpPrecedence
[] = {
90 class X86AsmParser
: public MCTargetAsmParser
{
91 ParseInstructionInfo
*InstInfo
;
93 unsigned ForcedDataPrefix
= 0;
105 OpcodePrefix ForcedOpcodePrefix
= OpcodePrefix_Default
;
108 DispEncoding_Default
,
113 DispEncoding ForcedDispEncoding
= DispEncoding_Default
;
115 // Does this instruction use apx extended register?
116 bool UseApxExtendedReg
= false;
117 // Is this instruction explicitly required not to update flags?
118 bool ForcedNoFlag
= false;
121 SMLoc
consumeToken() {
122 MCAsmParser
&Parser
= getParser();
123 SMLoc Result
= Parser
.getTok().getLoc();
128 X86TargetStreamer
&getTargetStreamer() {
129 assert(getParser().getStreamer().getTargetStreamer() &&
130 "do not have a target streamer");
131 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
132 return static_cast<X86TargetStreamer
&>(TS
);
135 unsigned MatchInstruction(const OperandVector
&Operands
, MCInst
&Inst
,
136 uint64_t &ErrorInfo
, FeatureBitset
&MissingFeatures
,
137 bool matchingInlineAsm
, unsigned VariantID
= 0) {
138 // In Code16GCC mode, match as 32-bit.
140 SwitchMode(X86::Is32Bit
);
141 unsigned rv
= MatchInstructionImpl(Operands
, Inst
, ErrorInfo
,
142 MissingFeatures
, matchingInlineAsm
,
145 SwitchMode(X86::Is16Bit
);
149 enum InfixCalculatorTok
{
174 enum IntelOperatorKind
{
181 enum MasmOperatorKind
{
188 class InfixCalculator
{
189 typedef std::pair
< InfixCalculatorTok
, int64_t > ICToken
;
190 SmallVector
<InfixCalculatorTok
, 4> InfixOperatorStack
;
191 SmallVector
<ICToken
, 4> PostfixStack
;
193 bool isUnaryOperator(InfixCalculatorTok Op
) const {
194 return Op
== IC_NEG
|| Op
== IC_NOT
;
198 int64_t popOperand() {
199 assert (!PostfixStack
.empty() && "Poped an empty stack!");
200 ICToken Op
= PostfixStack
.pop_back_val();
201 if (!(Op
.first
== IC_IMM
|| Op
.first
== IC_REGISTER
))
202 return -1; // The invalid Scale value will be caught later by checkScale
205 void pushOperand(InfixCalculatorTok Op
, int64_t Val
= 0) {
206 assert ((Op
== IC_IMM
|| Op
== IC_REGISTER
) &&
207 "Unexpected operand!");
208 PostfixStack
.push_back(std::make_pair(Op
, Val
));
211 void popOperator() { InfixOperatorStack
.pop_back(); }
212 void pushOperator(InfixCalculatorTok Op
) {
213 // Push the new operator if the stack is empty.
214 if (InfixOperatorStack
.empty()) {
215 InfixOperatorStack
.push_back(Op
);
219 // Push the new operator if it has a higher precedence than the operator
220 // on the top of the stack or the operator on the top of the stack is a
222 unsigned Idx
= InfixOperatorStack
.size() - 1;
223 InfixCalculatorTok StackOp
= InfixOperatorStack
[Idx
];
224 if (OpPrecedence
[Op
] > OpPrecedence
[StackOp
] || StackOp
== IC_LPAREN
) {
225 InfixOperatorStack
.push_back(Op
);
229 // The operator on the top of the stack has higher precedence than the
231 unsigned ParenCount
= 0;
233 // Nothing to process.
234 if (InfixOperatorStack
.empty())
237 Idx
= InfixOperatorStack
.size() - 1;
238 StackOp
= InfixOperatorStack
[Idx
];
239 if (!(OpPrecedence
[StackOp
] >= OpPrecedence
[Op
] || ParenCount
))
242 // If we have an even parentheses count and we see a left parentheses,
243 // then stop processing.
244 if (!ParenCount
&& StackOp
== IC_LPAREN
)
247 if (StackOp
== IC_RPAREN
) {
249 InfixOperatorStack
.pop_back();
250 } else if (StackOp
== IC_LPAREN
) {
252 InfixOperatorStack
.pop_back();
254 InfixOperatorStack
.pop_back();
255 PostfixStack
.push_back(std::make_pair(StackOp
, 0));
258 // Push the new operator.
259 InfixOperatorStack
.push_back(Op
);
263 // Push any remaining operators onto the postfix stack.
264 while (!InfixOperatorStack
.empty()) {
265 InfixCalculatorTok StackOp
= InfixOperatorStack
.pop_back_val();
266 if (StackOp
!= IC_LPAREN
&& StackOp
!= IC_RPAREN
)
267 PostfixStack
.push_back(std::make_pair(StackOp
, 0));
270 if (PostfixStack
.empty())
273 SmallVector
<ICToken
, 16> OperandStack
;
274 for (const ICToken
&Op
: PostfixStack
) {
275 if (Op
.first
== IC_IMM
|| Op
.first
== IC_REGISTER
) {
276 OperandStack
.push_back(Op
);
277 } else if (isUnaryOperator(Op
.first
)) {
278 assert (OperandStack
.size() > 0 && "Too few operands.");
279 ICToken Operand
= OperandStack
.pop_back_val();
280 assert (Operand
.first
== IC_IMM
&&
281 "Unary operation with a register!");
284 report_fatal_error("Unexpected operator!");
287 OperandStack
.push_back(std::make_pair(IC_IMM
, -Operand
.second
));
290 OperandStack
.push_back(std::make_pair(IC_IMM
, ~Operand
.second
));
294 assert (OperandStack
.size() > 1 && "Too few operands.");
296 ICToken Op2
= OperandStack
.pop_back_val();
297 ICToken Op1
= OperandStack
.pop_back_val();
300 report_fatal_error("Unexpected operator!");
303 Val
= Op1
.second
+ Op2
.second
;
304 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
307 Val
= Op1
.second
- Op2
.second
;
308 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
311 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
312 "Multiply operation with an immediate and a register!");
313 Val
= Op1
.second
* Op2
.second
;
314 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
317 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
318 "Divide operation with an immediate and a register!");
319 assert (Op2
.second
!= 0 && "Division by zero!");
320 Val
= Op1
.second
/ Op2
.second
;
321 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
324 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
325 "Modulo operation with an immediate and a register!");
326 Val
= Op1
.second
% Op2
.second
;
327 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
330 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
331 "Or operation with an immediate and a register!");
332 Val
= Op1
.second
| Op2
.second
;
333 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
336 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
337 "Xor operation with an immediate and a register!");
338 Val
= Op1
.second
^ Op2
.second
;
339 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
342 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
343 "And operation with an immediate and a register!");
344 Val
= Op1
.second
& Op2
.second
;
345 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
348 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
349 "Left shift operation with an immediate and a register!");
350 Val
= Op1
.second
<< Op2
.second
;
351 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
354 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
355 "Right shift operation with an immediate and a register!");
356 Val
= Op1
.second
>> Op2
.second
;
357 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
360 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
361 "Equals operation with an immediate and a register!");
362 Val
= (Op1
.second
== Op2
.second
) ? -1 : 0;
363 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
366 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
367 "Not-equals operation with an immediate and a register!");
368 Val
= (Op1
.second
!= Op2
.second
) ? -1 : 0;
369 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
372 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
373 "Less-than operation with an immediate and a register!");
374 Val
= (Op1
.second
< Op2
.second
) ? -1 : 0;
375 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
378 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
379 "Less-than-or-equal operation with an immediate and a "
381 Val
= (Op1
.second
<= Op2
.second
) ? -1 : 0;
382 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
385 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
386 "Greater-than operation with an immediate and a register!");
387 Val
= (Op1
.second
> Op2
.second
) ? -1 : 0;
388 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
391 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
392 "Greater-than-or-equal operation with an immediate and a "
394 Val
= (Op1
.second
>= Op2
.second
) ? -1 : 0;
395 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
400 assert (OperandStack
.size() == 1 && "Expected a single result.");
401 return OperandStack
.pop_back_val().second
;
405 enum IntelExprState
{
435 class IntelExprStateMachine
{
436 IntelExprState State
= IES_INIT
, PrevState
= IES_ERROR
;
437 unsigned BaseReg
= 0, IndexReg
= 0, TmpReg
= 0, Scale
= 0;
439 const MCExpr
*Sym
= nullptr;
442 InlineAsmIdentifierInfo Info
;
444 bool MemExpr
= false;
445 bool BracketUsed
= false;
446 bool OffsetOperator
= false;
447 bool AttachToOperandIdx
= false;
449 SMLoc OffsetOperatorLoc
;
452 bool setSymRef(const MCExpr
*Val
, StringRef ID
, StringRef
&ErrMsg
) {
454 ErrMsg
= "cannot use more than one symbol in memory operand";
463 IntelExprStateMachine() = default;
465 void addImm(int64_t imm
) { Imm
+= imm
; }
466 short getBracCount() const { return BracCount
; }
467 bool isMemExpr() const { return MemExpr
; }
468 bool isBracketUsed() const { return BracketUsed
; }
469 bool isOffsetOperator() const { return OffsetOperator
; }
470 SMLoc
getOffsetLoc() const { return OffsetOperatorLoc
; }
471 unsigned getBaseReg() const { return BaseReg
; }
472 unsigned getIndexReg() const { return IndexReg
; }
473 unsigned getScale() const { return Scale
; }
474 const MCExpr
*getSym() const { return Sym
; }
475 StringRef
getSymName() const { return SymName
; }
476 StringRef
getType() const { return CurType
.Name
; }
477 unsigned getSize() const { return CurType
.Size
; }
478 unsigned getElementSize() const { return CurType
.ElementSize
; }
479 unsigned getLength() const { return CurType
.Length
; }
480 int64_t getImm() { return Imm
+ IC
.execute(); }
481 bool isValidEndState() const {
482 return State
== IES_RBRAC
|| State
== IES_RPAREN
||
483 State
== IES_INTEGER
|| State
== IES_REGISTER
||
487 // Is the intel expression appended after an operand index.
488 // [OperandIdx][Intel Expression]
489 // This is neccessary for checking if it is an independent
490 // intel expression at back end when parse inline asm.
491 void setAppendAfterOperand() { AttachToOperandIdx
= true; }
493 bool isPIC() const { return IsPIC
; }
494 void setPIC() { IsPIC
= true; }
496 bool hadError() const { return State
== IES_ERROR
; }
497 const InlineAsmIdentifierInfo
&getIdentifierInfo() const { return Info
; }
499 bool regsUseUpError(StringRef
&ErrMsg
) {
500 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
501 // can not intruduce additional register in inline asm in PIC model.
502 if (IsPIC
&& AttachToOperandIdx
)
503 ErrMsg
= "Don't use 2 or more regs for mem offset in PIC model!";
505 ErrMsg
= "BaseReg/IndexReg already set!";
510 IntelExprState CurrState
= State
;
519 IC
.pushOperator(IC_OR
);
522 PrevState
= CurrState
;
525 IntelExprState CurrState
= State
;
534 IC
.pushOperator(IC_XOR
);
537 PrevState
= CurrState
;
540 IntelExprState CurrState
= State
;
549 IC
.pushOperator(IC_AND
);
552 PrevState
= CurrState
;
555 IntelExprState CurrState
= State
;
564 IC
.pushOperator(IC_EQ
);
567 PrevState
= CurrState
;
570 IntelExprState CurrState
= State
;
579 IC
.pushOperator(IC_NE
);
582 PrevState
= CurrState
;
585 IntelExprState CurrState
= State
;
594 IC
.pushOperator(IC_LT
);
597 PrevState
= CurrState
;
600 IntelExprState CurrState
= State
;
609 IC
.pushOperator(IC_LE
);
612 PrevState
= CurrState
;
615 IntelExprState CurrState
= State
;
624 IC
.pushOperator(IC_GT
);
627 PrevState
= CurrState
;
630 IntelExprState CurrState
= State
;
639 IC
.pushOperator(IC_GE
);
642 PrevState
= CurrState
;
645 IntelExprState CurrState
= State
;
654 IC
.pushOperator(IC_LSHIFT
);
657 PrevState
= CurrState
;
660 IntelExprState CurrState
= State
;
669 IC
.pushOperator(IC_RSHIFT
);
672 PrevState
= CurrState
;
674 bool onPlus(StringRef
&ErrMsg
) {
675 IntelExprState CurrState
= State
;
685 IC
.pushOperator(IC_PLUS
);
686 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
687 // If we already have a BaseReg, then assume this is the IndexReg with
688 // no explicit scale.
693 return regsUseUpError(ErrMsg
);
700 PrevState
= CurrState
;
703 bool onMinus(StringRef
&ErrMsg
) {
704 IntelExprState CurrState
= State
;
734 // push minus operator if it is not a negate operator
735 if (CurrState
== IES_REGISTER
|| CurrState
== IES_RPAREN
||
736 CurrState
== IES_INTEGER
|| CurrState
== IES_RBRAC
||
737 CurrState
== IES_OFFSET
)
738 IC
.pushOperator(IC_MINUS
);
739 else if (PrevState
== IES_REGISTER
&& CurrState
== IES_MULTIPLY
) {
740 // We have negate operator for Scale: it's illegal
741 ErrMsg
= "Scale can't be negative";
744 IC
.pushOperator(IC_NEG
);
745 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
746 // If we already have a BaseReg, then assume this is the IndexReg with
747 // no explicit scale.
752 return regsUseUpError(ErrMsg
);
759 PrevState
= CurrState
;
763 IntelExprState CurrState
= State
;
789 IC
.pushOperator(IC_NOT
);
792 PrevState
= CurrState
;
794 bool onRegister(unsigned Reg
, StringRef
&ErrMsg
) {
795 IntelExprState CurrState
= State
;
803 State
= IES_REGISTER
;
805 IC
.pushOperand(IC_REGISTER
);
808 // Index Register - Scale * Register
809 if (PrevState
== IES_INTEGER
) {
811 return regsUseUpError(ErrMsg
);
812 State
= IES_REGISTER
;
814 // Get the scale and replace the 'Scale * Register' with '0'.
815 Scale
= IC
.popOperand();
816 if (checkScale(Scale
, ErrMsg
))
818 IC
.pushOperand(IC_IMM
);
825 PrevState
= CurrState
;
828 bool onIdentifierExpr(const MCExpr
*SymRef
, StringRef SymRefName
,
829 const InlineAsmIdentifierInfo
&IDInfo
,
830 const AsmTypeInfo
&Type
, bool ParsingMSInlineAsm
,
832 // InlineAsm: Treat an enum value as an integer
833 if (ParsingMSInlineAsm
)
834 if (IDInfo
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
))
835 return onInteger(IDInfo
.Enum
.EnumVal
, ErrMsg
);
836 // Treat a symbolic constant like an integer
837 if (auto *CE
= dyn_cast
<MCConstantExpr
>(SymRef
))
838 return onInteger(CE
->getValue(), ErrMsg
);
851 if (setSymRef(SymRef
, SymRefName
, ErrMsg
))
855 IC
.pushOperand(IC_IMM
);
856 if (ParsingMSInlineAsm
)
863 bool onInteger(int64_t TmpInt
, StringRef
&ErrMsg
) {
864 IntelExprState CurrState
= State
;
890 if (PrevState
== IES_REGISTER
&& CurrState
== IES_MULTIPLY
) {
891 // Index Register - Register * Scale
893 return regsUseUpError(ErrMsg
);
896 if (checkScale(Scale
, ErrMsg
))
898 // Get the scale and replace the 'Register * Scale' with '0'.
901 IC
.pushOperand(IC_IMM
, TmpInt
);
905 PrevState
= CurrState
;
917 State
= IES_MULTIPLY
;
918 IC
.pushOperator(IC_MULTIPLY
);
931 IC
.pushOperator(IC_DIVIDE
);
944 IC
.pushOperator(IC_MOD
);
960 IC
.pushOperator(IC_PLUS
);
962 CurType
.Size
= CurType
.ElementSize
;
966 assert(!BracCount
&& "BracCount should be zero on parsing's start");
975 bool onRBrac(StringRef
&ErrMsg
) {
976 IntelExprState CurrState
= State
;
985 if (BracCount
-- != 1) {
986 ErrMsg
= "unexpected bracket encountered";
990 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
991 // If we already have a BaseReg, then assume this is the IndexReg with
992 // no explicit scale.
997 return regsUseUpError(ErrMsg
);
1004 PrevState
= CurrState
;
1008 IntelExprState CurrState
= State
;
1034 IC
.pushOperator(IC_LPAREN
);
1037 PrevState
= CurrState
;
1051 IC
.pushOperator(IC_RPAREN
);
1055 bool onOffset(const MCExpr
*Val
, SMLoc OffsetLoc
, StringRef ID
,
1056 const InlineAsmIdentifierInfo
&IDInfo
,
1057 bool ParsingMSInlineAsm
, StringRef
&ErrMsg
) {
1061 ErrMsg
= "unexpected offset operator expression";
1066 if (setSymRef(Val
, ID
, ErrMsg
))
1068 OffsetOperator
= true;
1069 OffsetOperatorLoc
= OffsetLoc
;
1071 // As we cannot yet resolve the actual value (offset), we retain
1072 // the requested semantics by pushing a '0' to the operands stack
1073 IC
.pushOperand(IC_IMM
);
1074 if (ParsingMSInlineAsm
) {
1081 void onCast(AsmTypeInfo Info
) {
1093 void setTypeInfo(AsmTypeInfo Type
) { CurType
= Type
; }
1096 bool Error(SMLoc L
, const Twine
&Msg
, SMRange Range
= std::nullopt
,
1097 bool MatchingInlineAsm
= false) {
1098 MCAsmParser
&Parser
= getParser();
1099 if (MatchingInlineAsm
) {
1100 if (!getLexer().isAtStartOfStatement())
1101 Parser
.eatToEndOfStatement();
1104 return Parser
.Error(L
, Msg
, Range
);
1107 bool MatchRegisterByName(MCRegister
&RegNo
, StringRef RegName
, SMLoc StartLoc
,
1109 bool ParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
,
1110 bool RestoreOnFailure
);
1112 std::unique_ptr
<X86Operand
> DefaultMemSIOperand(SMLoc Loc
);
1113 std::unique_ptr
<X86Operand
> DefaultMemDIOperand(SMLoc Loc
);
1114 bool IsSIReg(unsigned Reg
);
1115 unsigned GetSIDIForRegClass(unsigned RegClassID
, unsigned Reg
, bool IsSIReg
);
1117 AddDefaultSrcDestOperands(OperandVector
&Operands
,
1118 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Src
,
1119 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Dst
);
1120 bool VerifyAndAdjustOperands(OperandVector
&OrigOperands
,
1121 OperandVector
&FinalOperands
);
1122 bool parseOperand(OperandVector
&Operands
, StringRef Name
);
1123 bool parseATTOperand(OperandVector
&Operands
);
1124 bool parseIntelOperand(OperandVector
&Operands
, StringRef Name
);
1125 bool ParseIntelOffsetOperator(const MCExpr
*&Val
, StringRef
&ID
,
1126 InlineAsmIdentifierInfo
&Info
, SMLoc
&End
);
1127 bool ParseIntelDotOperator(IntelExprStateMachine
&SM
, SMLoc
&End
);
1128 unsigned IdentifyIntelInlineAsmOperator(StringRef Name
);
1129 unsigned ParseIntelInlineAsmOperator(unsigned OpKind
);
1130 unsigned IdentifyMasmOperator(StringRef Name
);
1131 bool ParseMasmOperator(unsigned OpKind
, int64_t &Val
);
1132 bool ParseRoundingModeOp(SMLoc Start
, OperandVector
&Operands
);
1133 bool parseCFlagsOp(OperandVector
&Operands
);
1134 bool ParseIntelNamedOperator(StringRef Name
, IntelExprStateMachine
&SM
,
1135 bool &ParseError
, SMLoc
&End
);
1136 bool ParseMasmNamedOperator(StringRef Name
, IntelExprStateMachine
&SM
,
1137 bool &ParseError
, SMLoc
&End
);
1138 void RewriteIntelExpression(IntelExprStateMachine
&SM
, SMLoc Start
,
1140 bool ParseIntelExpression(IntelExprStateMachine
&SM
, SMLoc
&End
);
1141 bool ParseIntelInlineAsmIdentifier(const MCExpr
*&Val
, StringRef
&Identifier
,
1142 InlineAsmIdentifierInfo
&Info
,
1143 bool IsUnevaluatedOperand
, SMLoc
&End
,
1144 bool IsParsingOffsetOperator
= false);
1145 void tryParseOperandIdx(AsmToken::TokenKind PrevTK
,
1146 IntelExprStateMachine
&SM
);
1148 bool ParseMemOperand(unsigned SegReg
, const MCExpr
*Disp
, SMLoc StartLoc
,
1149 SMLoc EndLoc
, OperandVector
&Operands
);
1151 X86::CondCode
ParseConditionCode(StringRef CCode
);
1153 bool ParseIntelMemoryOperandSize(unsigned &Size
);
1154 bool CreateMemForMSInlineAsm(unsigned SegReg
, const MCExpr
*Disp
,
1155 unsigned BaseReg
, unsigned IndexReg
,
1156 unsigned Scale
, bool NonAbsMem
, SMLoc Start
,
1157 SMLoc End
, unsigned Size
, StringRef Identifier
,
1158 const InlineAsmIdentifierInfo
&Info
,
1159 OperandVector
&Operands
);
1161 bool parseDirectiveArch();
1162 bool parseDirectiveNops(SMLoc L
);
1163 bool parseDirectiveEven(SMLoc L
);
1164 bool ParseDirectiveCode(StringRef IDVal
, SMLoc L
);
1166 /// CodeView FPO data directives.
1167 bool parseDirectiveFPOProc(SMLoc L
);
1168 bool parseDirectiveFPOSetFrame(SMLoc L
);
1169 bool parseDirectiveFPOPushReg(SMLoc L
);
1170 bool parseDirectiveFPOStackAlloc(SMLoc L
);
1171 bool parseDirectiveFPOStackAlign(SMLoc L
);
1172 bool parseDirectiveFPOEndPrologue(SMLoc L
);
1173 bool parseDirectiveFPOEndProc(SMLoc L
);
1176 bool parseSEHRegisterNumber(unsigned RegClassID
, MCRegister
&RegNo
);
1177 bool parseDirectiveSEHPushReg(SMLoc
);
1178 bool parseDirectiveSEHSetFrame(SMLoc
);
1179 bool parseDirectiveSEHSaveReg(SMLoc
);
1180 bool parseDirectiveSEHSaveXMM(SMLoc
);
1181 bool parseDirectiveSEHPushFrame(SMLoc
);
1183 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1185 bool validateInstruction(MCInst
&Inst
, const OperandVector
&Ops
);
1186 bool processInstruction(MCInst
&Inst
, const OperandVector
&Ops
);
1188 // Load Value Injection (LVI) Mitigations for machine code
1189 void emitWarningForSpecialLVIInstruction(SMLoc Loc
);
1190 void applyLVICFIMitigation(MCInst
&Inst
, MCStreamer
&Out
);
1191 void applyLVILoadHardeningMitigation(MCInst
&Inst
, MCStreamer
&Out
);
1193 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1194 /// instrumentation around Inst.
1195 void emitInstruction(MCInst
&Inst
, OperandVector
&Operands
, MCStreamer
&Out
);
1197 bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1198 OperandVector
&Operands
, MCStreamer
&Out
,
1199 uint64_t &ErrorInfo
,
1200 bool MatchingInlineAsm
) override
;
1202 void MatchFPUWaitAlias(SMLoc IDLoc
, X86Operand
&Op
, OperandVector
&Operands
,
1203 MCStreamer
&Out
, bool MatchingInlineAsm
);
1205 bool ErrorMissingFeature(SMLoc IDLoc
, const FeatureBitset
&MissingFeatures
,
1206 bool MatchingInlineAsm
);
1208 bool matchAndEmitATTInstruction(SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
,
1209 OperandVector
&Operands
, MCStreamer
&Out
,
1210 uint64_t &ErrorInfo
, bool MatchingInlineAsm
);
1212 bool matchAndEmitIntelInstruction(SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
,
1213 OperandVector
&Operands
, MCStreamer
&Out
,
1214 uint64_t &ErrorInfo
,
1215 bool MatchingInlineAsm
);
1217 bool OmitRegisterFromClobberLists(unsigned RegNo
) override
;
1219 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1220 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1221 /// return false if no parsing errors occurred, true otherwise.
1222 bool HandleAVX512Operand(OperandVector
&Operands
);
1224 bool ParseZ(std::unique_ptr
<X86Operand
> &Z
, const SMLoc
&StartLoc
);
1226 bool is64BitMode() const {
1227 // FIXME: Can tablegen auto-generate this?
1228 return getSTI().hasFeature(X86::Is64Bit
);
1230 bool is32BitMode() const {
1231 // FIXME: Can tablegen auto-generate this?
1232 return getSTI().hasFeature(X86::Is32Bit
);
1234 bool is16BitMode() const {
1235 // FIXME: Can tablegen auto-generate this?
1236 return getSTI().hasFeature(X86::Is16Bit
);
1238 void SwitchMode(unsigned mode
) {
1239 MCSubtargetInfo
&STI
= copySTI();
1240 FeatureBitset
AllModes({X86::Is64Bit
, X86::Is32Bit
, X86::Is16Bit
});
1241 FeatureBitset OldMode
= STI
.getFeatureBits() & AllModes
;
1242 FeatureBitset FB
= ComputeAvailableFeatures(
1243 STI
.ToggleFeature(OldMode
.flip(mode
)));
1244 setAvailableFeatures(FB
);
1246 assert(FeatureBitset({mode
}) == (STI
.getFeatureBits() & AllModes
));
1249 unsigned getPointerWidth() {
1250 if (is16BitMode()) return 16;
1251 if (is32BitMode()) return 32;
1252 if (is64BitMode()) return 64;
1253 llvm_unreachable("invalid mode");
1256 bool isParsingIntelSyntax() {
1257 return getParser().getAssemblerDialect();
1260 /// @name Auto-generated Matcher Functions
1263 #define GET_ASSEMBLER_HEADER
1264 #include "X86GenAsmMatcher.inc"
1269 enum X86MatchResultTy
{
1270 Match_Unsupported
= FIRST_TARGET_MATCH_RESULT_TY
,
1271 #define GET_OPERAND_DIAGNOSTIC_TYPES
1272 #include "X86GenAsmMatcher.inc"
1275 X86AsmParser(const MCSubtargetInfo
&sti
, MCAsmParser
&Parser
,
1276 const MCInstrInfo
&mii
, const MCTargetOptions
&Options
)
1277 : MCTargetAsmParser(Options
, sti
, mii
), InstInfo(nullptr),
1280 Parser
.addAliasForDirective(".word", ".2byte");
1282 // Initialize the set of available features.
1283 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1286 bool parseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
, SMLoc
&EndLoc
) override
;
1287 ParseStatus
tryParseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
,
1288 SMLoc
&EndLoc
) override
;
1290 bool parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) override
;
1292 bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1293 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1295 bool ParseDirective(AsmToken DirectiveID
) override
;
1297 } // end anonymous namespace
1299 #define GET_REGISTER_MATCHER
1300 #define GET_SUBTARGET_FEATURE_NAME
1301 #include "X86GenAsmMatcher.inc"
1303 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg
, unsigned IndexReg
,
1304 unsigned Scale
, bool Is64BitMode
,
1305 StringRef
&ErrMsg
) {
1306 // If we have both a base register and an index register make sure they are
1307 // both 64-bit or 32-bit registers.
1308 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1311 !(BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
||
1312 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) ||
1313 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(BaseReg
) ||
1314 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
))) {
1315 ErrMsg
= "invalid base+index expression";
1319 if (IndexReg
!= 0 &&
1320 !(IndexReg
== X86::EIZ
|| IndexReg
== X86::RIZ
||
1321 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1322 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1323 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
) ||
1324 X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(IndexReg
) ||
1325 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(IndexReg
) ||
1326 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(IndexReg
))) {
1327 ErrMsg
= "invalid base+index expression";
1331 if (((BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
) && IndexReg
!= 0) ||
1332 IndexReg
== X86::EIP
|| IndexReg
== X86::RIP
||
1333 IndexReg
== X86::ESP
|| IndexReg
== X86::RSP
) {
1334 ErrMsg
= "invalid base+index expression";
1338 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1339 // and then only in non-64-bit modes.
1340 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) &&
1341 (Is64BitMode
|| (BaseReg
!= X86::BX
&& BaseReg
!= X86::BP
&&
1342 BaseReg
!= X86::SI
&& BaseReg
!= X86::DI
))) {
1343 ErrMsg
= "invalid 16-bit base register";
1348 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
)) {
1349 ErrMsg
= "16-bit memory operand may not include only index register";
1353 if (BaseReg
!= 0 && IndexReg
!= 0) {
1354 if (X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
) &&
1355 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1356 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1357 IndexReg
== X86::EIZ
)) {
1358 ErrMsg
= "base register is 64-bit, but index register is not";
1361 if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(BaseReg
) &&
1362 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1363 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
) ||
1364 IndexReg
== X86::RIZ
)) {
1365 ErrMsg
= "base register is 32-bit, but index register is not";
1368 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
)) {
1369 if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1370 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
)) {
1371 ErrMsg
= "base register is 16-bit, but index register is not";
1374 if ((BaseReg
!= X86::BX
&& BaseReg
!= X86::BP
) ||
1375 (IndexReg
!= X86::SI
&& IndexReg
!= X86::DI
)) {
1376 ErrMsg
= "invalid 16-bit base/index register combination";
1382 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1383 if (!Is64BitMode
&& BaseReg
!= 0 &&
1384 (BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
)) {
1385 ErrMsg
= "IP-relative addressing requires 64-bit mode";
1389 return checkScale(Scale
, ErrMsg
);
1392 bool X86AsmParser::MatchRegisterByName(MCRegister
&RegNo
, StringRef RegName
,
1393 SMLoc StartLoc
, SMLoc EndLoc
) {
1394 // If we encounter a %, ignore it. This code handles registers with and
1395 // without the prefix, unprefixed registers can occur in cfi directives.
1396 RegName
.consume_front("%");
1398 RegNo
= MatchRegisterName(RegName
);
1400 // If the match failed, try the register name as lowercase.
1402 RegNo
= MatchRegisterName(RegName
.lower());
1404 // The "flags" and "mxcsr" registers cannot be referenced directly.
1405 // Treat it as an identifier instead.
1406 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1407 (RegNo
== X86::EFLAGS
|| RegNo
== X86::MXCSR
))
1410 if (!is64BitMode()) {
1411 // FIXME: This should be done using Requires<Not64BitMode> and
1412 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1414 if (RegNo
== X86::RIZ
|| RegNo
== X86::RIP
||
1415 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(RegNo
) ||
1416 X86II::isX86_64NonExtLowByteReg(RegNo
) ||
1417 X86II::isX86_64ExtendedReg(RegNo
)) {
1418 return Error(StartLoc
,
1419 "register %" + RegName
+ " is only available in 64-bit mode",
1420 SMRange(StartLoc
, EndLoc
));
1424 if (X86II::isApxExtendedReg(RegNo
))
1425 UseApxExtendedReg
= true;
1427 // If this is "db[0-15]", match it as an alias
1429 if (RegNo
== 0 && RegName
.starts_with("db")) {
1430 if (RegName
.size() == 3) {
1431 switch (RegName
[2]) {
1463 } else if (RegName
.size() == 4 && RegName
[2] == '1') {
1464 switch (RegName
[3]) {
1488 if (isParsingIntelSyntax())
1490 return Error(StartLoc
, "invalid register name", SMRange(StartLoc
, EndLoc
));
1495 bool X86AsmParser::ParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
,
1496 SMLoc
&EndLoc
, bool RestoreOnFailure
) {
1497 MCAsmParser
&Parser
= getParser();
1498 MCAsmLexer
&Lexer
= getLexer();
1501 SmallVector
<AsmToken
, 5> Tokens
;
1502 auto OnFailure
= [RestoreOnFailure
, &Lexer
, &Tokens
]() {
1503 if (RestoreOnFailure
) {
1504 while (!Tokens
.empty()) {
1505 Lexer
.UnLex(Tokens
.pop_back_val());
1510 const AsmToken
&PercentTok
= Parser
.getTok();
1511 StartLoc
= PercentTok
.getLoc();
1513 // If we encounter a %, ignore it. This code handles registers with and
1514 // without the prefix, unprefixed registers can occur in cfi directives.
1515 if (!isParsingIntelSyntax() && PercentTok
.is(AsmToken::Percent
)) {
1516 Tokens
.push_back(PercentTok
);
1517 Parser
.Lex(); // Eat percent token.
1520 const AsmToken
&Tok
= Parser
.getTok();
1521 EndLoc
= Tok
.getEndLoc();
1523 if (Tok
.isNot(AsmToken::Identifier
)) {
1525 if (isParsingIntelSyntax()) return true;
1526 return Error(StartLoc
, "invalid register name",
1527 SMRange(StartLoc
, EndLoc
));
1530 if (MatchRegisterByName(RegNo
, Tok
.getString(), StartLoc
, EndLoc
)) {
1535 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1536 if (RegNo
== X86::ST0
) {
1537 Tokens
.push_back(Tok
);
1538 Parser
.Lex(); // Eat 'st'
1540 // Check to see if we have '(4)' after %st.
1541 if (Lexer
.isNot(AsmToken::LParen
))
1544 Tokens
.push_back(Parser
.getTok());
1547 const AsmToken
&IntTok
= Parser
.getTok();
1548 if (IntTok
.isNot(AsmToken::Integer
)) {
1550 return Error(IntTok
.getLoc(), "expected stack index");
1552 switch (IntTok
.getIntVal()) {
1553 case 0: RegNo
= X86::ST0
; break;
1554 case 1: RegNo
= X86::ST1
; break;
1555 case 2: RegNo
= X86::ST2
; break;
1556 case 3: RegNo
= X86::ST3
; break;
1557 case 4: RegNo
= X86::ST4
; break;
1558 case 5: RegNo
= X86::ST5
; break;
1559 case 6: RegNo
= X86::ST6
; break;
1560 case 7: RegNo
= X86::ST7
; break;
1563 return Error(IntTok
.getLoc(), "invalid stack index");
1567 Tokens
.push_back(IntTok
);
1569 if (Lexer
.isNot(AsmToken::RParen
)) {
1571 return Error(Parser
.getTok().getLoc(), "expected ')'");
1574 EndLoc
= Parser
.getTok().getEndLoc();
1575 Parser
.Lex(); // Eat ')'
1579 EndLoc
= Parser
.getTok().getEndLoc();
1583 if (isParsingIntelSyntax()) return true;
1584 return Error(StartLoc
, "invalid register name",
1585 SMRange(StartLoc
, EndLoc
));
1588 Parser
.Lex(); // Eat identifier token.
1592 bool X86AsmParser::parseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
,
1594 return ParseRegister(Reg
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/false);
1597 ParseStatus
X86AsmParser::tryParseRegister(MCRegister
&Reg
, SMLoc
&StartLoc
,
1599 bool Result
= ParseRegister(Reg
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/true);
1600 bool PendingErrors
= getParser().hasPendingError();
1601 getParser().clearPendingErrors();
1603 return ParseStatus::Failure
;
1605 return ParseStatus::NoMatch
;
1606 return ParseStatus::Success
;
1609 std::unique_ptr
<X86Operand
> X86AsmParser::DefaultMemSIOperand(SMLoc Loc
) {
1610 bool Parse32
= is32BitMode() || Code16GCC
;
1611 unsigned Basereg
= is64BitMode() ? X86::RSI
: (Parse32
? X86::ESI
: X86::SI
);
1612 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
1613 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
1614 /*BaseReg=*/Basereg
, /*IndexReg=*/0, /*Scale=*/1,
1618 std::unique_ptr
<X86Operand
> X86AsmParser::DefaultMemDIOperand(SMLoc Loc
) {
1619 bool Parse32
= is32BitMode() || Code16GCC
;
1620 unsigned Basereg
= is64BitMode() ? X86::RDI
: (Parse32
? X86::EDI
: X86::DI
);
1621 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
1622 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
1623 /*BaseReg=*/Basereg
, /*IndexReg=*/0, /*Scale=*/1,
1627 bool X86AsmParser::IsSIReg(unsigned Reg
) {
1629 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1641 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID
, unsigned Reg
,
1643 switch (RegClassID
) {
1644 default: llvm_unreachable("Unexpected register class");
1645 case X86::GR64RegClassID
:
1646 return IsSIReg
? X86::RSI
: X86::RDI
;
1647 case X86::GR32RegClassID
:
1648 return IsSIReg
? X86::ESI
: X86::EDI
;
1649 case X86::GR16RegClassID
:
1650 return IsSIReg
? X86::SI
: X86::DI
;
1654 void X86AsmParser::AddDefaultSrcDestOperands(
1655 OperandVector
& Operands
, std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Src
,
1656 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Dst
) {
1657 if (isParsingIntelSyntax()) {
1658 Operands
.push_back(std::move(Dst
));
1659 Operands
.push_back(std::move(Src
));
1662 Operands
.push_back(std::move(Src
));
1663 Operands
.push_back(std::move(Dst
));
1667 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector
&OrigOperands
,
1668 OperandVector
&FinalOperands
) {
1670 if (OrigOperands
.size() > 1) {
1671 // Check if sizes match, OrigOperands also contains the instruction name
1672 assert(OrigOperands
.size() == FinalOperands
.size() + 1 &&
1673 "Operand size mismatch");
1675 SmallVector
<std::pair
<SMLoc
, std::string
>, 2> Warnings
;
1676 // Verify types match
1677 int RegClassID
= -1;
1678 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
) {
1679 X86Operand
&OrigOp
= static_cast<X86Operand
&>(*OrigOperands
[i
+ 1]);
1680 X86Operand
&FinalOp
= static_cast<X86Operand
&>(*FinalOperands
[i
]);
1682 if (FinalOp
.isReg() &&
1683 (!OrigOp
.isReg() || FinalOp
.getReg() != OrigOp
.getReg()))
1684 // Return false and let a normal complaint about bogus operands happen
1687 if (FinalOp
.isMem()) {
1689 if (!OrigOp
.isMem())
1690 // Return false and let a normal complaint about bogus operands happen
1693 unsigned OrigReg
= OrigOp
.Mem
.BaseReg
;
1694 unsigned FinalReg
= FinalOp
.Mem
.BaseReg
;
1696 // If we've already encounterd a register class, make sure all register
1697 // bases are of the same register class
1698 if (RegClassID
!= -1 &&
1699 !X86MCRegisterClasses
[RegClassID
].contains(OrigReg
)) {
1700 return Error(OrigOp
.getStartLoc(),
1701 "mismatching source and destination index registers");
1704 if (X86MCRegisterClasses
[X86::GR64RegClassID
].contains(OrigReg
))
1705 RegClassID
= X86::GR64RegClassID
;
1706 else if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(OrigReg
))
1707 RegClassID
= X86::GR32RegClassID
;
1708 else if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(OrigReg
))
1709 RegClassID
= X86::GR16RegClassID
;
1711 // Unexpected register class type
1712 // Return false and let a normal complaint about bogus operands happen
1715 bool IsSI
= IsSIReg(FinalReg
);
1716 FinalReg
= GetSIDIForRegClass(RegClassID
, FinalReg
, IsSI
);
1718 if (FinalReg
!= OrigReg
) {
1719 std::string RegName
= IsSI
? "ES:(R|E)SI" : "ES:(R|E)DI";
1720 Warnings
.push_back(std::make_pair(
1721 OrigOp
.getStartLoc(),
1722 "memory operand is only for determining the size, " + RegName
+
1723 " will be used for the location"));
1726 FinalOp
.Mem
.Size
= OrigOp
.Mem
.Size
;
1727 FinalOp
.Mem
.SegReg
= OrigOp
.Mem
.SegReg
;
1728 FinalOp
.Mem
.BaseReg
= FinalReg
;
1732 // Produce warnings only if all the operands passed the adjustment - prevent
1733 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1734 for (auto &WarningMsg
: Warnings
) {
1735 Warning(WarningMsg
.first
, WarningMsg
.second
);
1738 // Remove old operands
1739 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
)
1740 OrigOperands
.pop_back();
1742 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1743 for (auto &Op
: FinalOperands
)
1744 OrigOperands
.push_back(std::move(Op
));
1749 bool X86AsmParser::parseOperand(OperandVector
&Operands
, StringRef Name
) {
1750 if (isParsingIntelSyntax())
1751 return parseIntelOperand(Operands
, Name
);
1753 return parseATTOperand(Operands
);
1756 bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg
, const MCExpr
*Disp
,
1757 unsigned BaseReg
, unsigned IndexReg
,
1758 unsigned Scale
, bool NonAbsMem
,
1759 SMLoc Start
, SMLoc End
,
1760 unsigned Size
, StringRef Identifier
,
1761 const InlineAsmIdentifierInfo
&Info
,
1762 OperandVector
&Operands
) {
1763 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1764 // some other label reference.
1765 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Label
)) {
1766 // Create an absolute memory reference in order to match against
1767 // instructions taking a PC relative operand.
1768 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), Disp
, Start
,
1769 End
, Size
, Identifier
,
1773 // We either have a direct symbol reference, or an offset from a symbol. The
1774 // parser always puts the symbol on the LHS, so look there for size
1775 // calculation purposes.
1776 unsigned FrontendSize
= 0;
1777 void *Decl
= nullptr;
1778 bool IsGlobalLV
= false;
1779 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
1780 // Size is in terms of bits in this context.
1781 FrontendSize
= Info
.Var
.Type
* 8;
1782 Decl
= Info
.Var
.Decl
;
1783 IsGlobalLV
= Info
.Var
.IsGlobalLV
;
1785 // It is widely common for MS InlineAsm to use a global variable and one/two
1786 // registers in a mmory expression, and though unaccessible via rip/eip.
1788 if (BaseReg
|| IndexReg
) {
1789 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), Disp
, Start
,
1790 End
, Size
, Identifier
, Decl
, 0,
1791 BaseReg
&& IndexReg
));
1795 BaseReg
= 1; // Make isAbsMem() false
1797 Operands
.push_back(X86Operand::CreateMem(
1798 getPointerWidth(), SegReg
, Disp
, BaseReg
, IndexReg
, Scale
, Start
, End
,
1800 /*DefaultBaseReg=*/X86::RIP
, Identifier
, Decl
, FrontendSize
));
1804 // Some binary bitwise operators have a named synonymous
1805 // Query a candidate string for being such a named operator
1806 // and if so - invoke the appropriate handler
1807 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name
,
1808 IntelExprStateMachine
&SM
,
1809 bool &ParseError
, SMLoc
&End
) {
1810 // A named operator should be either lower or upper case, but not a mix...
1811 // except in MASM, which uses full case-insensitivity.
1812 if (Name
!= Name
.lower() && Name
!= Name
.upper() &&
1813 !getParser().isParsingMasm())
1815 if (Name
.equals_insensitive("not")) {
1817 } else if (Name
.equals_insensitive("or")) {
1819 } else if (Name
.equals_insensitive("shl")) {
1821 } else if (Name
.equals_insensitive("shr")) {
1823 } else if (Name
.equals_insensitive("xor")) {
1825 } else if (Name
.equals_insensitive("and")) {
1827 } else if (Name
.equals_insensitive("mod")) {
1829 } else if (Name
.equals_insensitive("offset")) {
1830 SMLoc OffsetLoc
= getTok().getLoc();
1831 const MCExpr
*Val
= nullptr;
1833 InlineAsmIdentifierInfo Info
;
1834 ParseError
= ParseIntelOffsetOperator(Val
, ID
, Info
, End
);
1839 SM
.onOffset(Val
, OffsetLoc
, ID
, Info
, isParsingMSInlineAsm(), ErrMsg
);
1841 return Error(SMLoc::getFromPointer(Name
.data()), ErrMsg
);
1845 if (!Name
.equals_insensitive("offset"))
1846 End
= consumeToken();
1849 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name
,
1850 IntelExprStateMachine
&SM
,
1851 bool &ParseError
, SMLoc
&End
) {
1852 if (Name
.equals_insensitive("eq")) {
1854 } else if (Name
.equals_insensitive("ne")) {
1856 } else if (Name
.equals_insensitive("lt")) {
1858 } else if (Name
.equals_insensitive("le")) {
1860 } else if (Name
.equals_insensitive("gt")) {
1862 } else if (Name
.equals_insensitive("ge")) {
1867 End
= consumeToken();
1871 // Check if current intel expression append after an operand.
1872 // Like: [Operand][Intel Expression]
1873 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK
,
1874 IntelExprStateMachine
&SM
) {
1875 if (PrevTK
!= AsmToken::RBrac
)
1878 SM
.setAppendAfterOperand();
1881 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine
&SM
, SMLoc
&End
) {
1882 MCAsmParser
&Parser
= getParser();
1885 AsmToken::TokenKind PrevTK
= AsmToken::Error
;
1887 if (getContext().getObjectFileInfo()->isPositionIndependent())
1892 // Get a fresh reference on each loop iteration in case the previous
1893 // iteration moved the token storage during UnLex().
1894 const AsmToken
&Tok
= Parser
.getTok();
1896 bool UpdateLocLex
= true;
1897 AsmToken::TokenKind TK
= getLexer().getKind();
1901 if ((Done
= SM
.isValidEndState()))
1903 return Error(Tok
.getLoc(), "unknown token in expression");
1904 case AsmToken::Error
:
1905 return Error(getLexer().getErrLoc(), getLexer().getErr());
1907 case AsmToken::Real
:
1908 // DotOperator: [ebx].0
1909 UpdateLocLex
= false;
1910 if (ParseIntelDotOperator(SM
, End
))
1914 if (!Parser
.isParsingMasm()) {
1915 if ((Done
= SM
.isValidEndState()))
1917 return Error(Tok
.getLoc(), "unknown token in expression");
1919 // MASM allows spaces around the dot operator (e.g., "var . x")
1921 UpdateLocLex
= false;
1922 if (ParseIntelDotOperator(SM
, End
))
1925 case AsmToken::Dollar
:
1926 if (!Parser
.isParsingMasm()) {
1927 if ((Done
= SM
.isValidEndState()))
1929 return Error(Tok
.getLoc(), "unknown token in expression");
1932 case AsmToken::String
: {
1933 if (Parser
.isParsingMasm()) {
1934 // MASM parsers handle strings in expressions as constants.
1935 SMLoc ValueLoc
= Tok
.getLoc();
1938 if (Parser
.parsePrimaryExpr(Val
, End
, nullptr))
1940 UpdateLocLex
= false;
1941 if (!Val
->evaluateAsAbsolute(Res
, getStreamer().getAssemblerPtr()))
1942 return Error(ValueLoc
, "expected absolute value");
1943 if (SM
.onInteger(Res
, ErrMsg
))
1944 return Error(ValueLoc
, ErrMsg
);
1950 case AsmToken::Identifier
: {
1951 SMLoc IdentLoc
= Tok
.getLoc();
1952 StringRef Identifier
= Tok
.getString();
1953 UpdateLocLex
= false;
1954 if (Parser
.isParsingMasm()) {
1955 size_t DotOffset
= Identifier
.find_first_of('.');
1956 if (DotOffset
!= StringRef::npos
) {
1958 StringRef LHS
= Identifier
.slice(0, DotOffset
);
1959 StringRef Dot
= Identifier
.slice(DotOffset
, DotOffset
+ 1);
1960 StringRef RHS
= Identifier
.slice(DotOffset
+ 1, StringRef::npos
);
1962 getLexer().UnLex(AsmToken(AsmToken::Identifier
, RHS
));
1964 getLexer().UnLex(AsmToken(AsmToken::Dot
, Dot
));
1966 getLexer().UnLex(AsmToken(AsmToken::Identifier
, LHS
));
1971 // (MASM only) <TYPE> PTR operator
1972 if (Parser
.isParsingMasm()) {
1973 const AsmToken
&NextTok
= getLexer().peekTok();
1974 if (NextTok
.is(AsmToken::Identifier
) &&
1975 NextTok
.getIdentifier().equals_insensitive("ptr")) {
1977 if (Parser
.lookUpType(Identifier
, Info
))
1978 return Error(Tok
.getLoc(), "unknown type");
1980 // Eat type and PTR.
1982 End
= consumeToken();
1986 // Register, or (MASM only) <register>.<field>
1988 if (Tok
.is(AsmToken::Identifier
)) {
1989 if (!ParseRegister(Reg
, IdentLoc
, End
, /*RestoreOnFailure=*/true)) {
1990 if (SM
.onRegister(Reg
, ErrMsg
))
1991 return Error(IdentLoc
, ErrMsg
);
1994 if (Parser
.isParsingMasm()) {
1995 const std::pair
<StringRef
, StringRef
> IDField
=
1996 Tok
.getString().split('.');
1997 const StringRef ID
= IDField
.first
, Field
= IDField
.second
;
1998 SMLoc IDEndLoc
= SMLoc::getFromPointer(ID
.data() + ID
.size());
1999 if (!Field
.empty() &&
2000 !MatchRegisterByName(Reg
, ID
, IdentLoc
, IDEndLoc
)) {
2001 if (SM
.onRegister(Reg
, ErrMsg
))
2002 return Error(IdentLoc
, ErrMsg
);
2005 SMLoc FieldStartLoc
= SMLoc::getFromPointer(Field
.data());
2006 if (Parser
.lookUpField(Field
, Info
))
2007 return Error(FieldStartLoc
, "unknown offset");
2008 else if (SM
.onPlus(ErrMsg
))
2009 return Error(getTok().getLoc(), ErrMsg
);
2010 else if (SM
.onInteger(Info
.Offset
, ErrMsg
))
2011 return Error(IdentLoc
, ErrMsg
);
2012 SM
.setTypeInfo(Info
.Type
);
2014 End
= consumeToken();
2019 // Operator synonymous ("not", "or" etc.)
2020 bool ParseError
= false;
2021 if (ParseIntelNamedOperator(Identifier
, SM
, ParseError
, End
)) {
2026 if (Parser
.isParsingMasm() &&
2027 ParseMasmNamedOperator(Identifier
, SM
, ParseError
, End
)) {
2032 // Symbol reference, when parsing assembly content
2033 InlineAsmIdentifierInfo Info
;
2034 AsmFieldInfo FieldInfo
;
2036 if (isParsingMSInlineAsm() || Parser
.isParsingMasm()) {
2037 // MS Dot Operator expression
2038 if (Identifier
.count('.') &&
2039 (PrevTK
== AsmToken::RBrac
|| PrevTK
== AsmToken::RParen
)) {
2040 if (ParseIntelDotOperator(SM
, End
))
2045 if (isParsingMSInlineAsm()) {
2046 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2047 if (unsigned OpKind
= IdentifyIntelInlineAsmOperator(Identifier
)) {
2048 if (int64_t Val
= ParseIntelInlineAsmOperator(OpKind
)) {
2049 if (SM
.onInteger(Val
, ErrMsg
))
2050 return Error(IdentLoc
, ErrMsg
);
2056 // MS InlineAsm identifier
2057 // Call parseIdentifier() to combine @ with the identifier behind it.
2058 if (TK
== AsmToken::At
&& Parser
.parseIdentifier(Identifier
))
2059 return Error(IdentLoc
, "expected identifier");
2060 if (ParseIntelInlineAsmIdentifier(Val
, Identifier
, Info
, false, End
))
2062 else if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, FieldInfo
.Type
,
2064 return Error(IdentLoc
, ErrMsg
);
2067 if (Parser
.isParsingMasm()) {
2068 if (unsigned OpKind
= IdentifyMasmOperator(Identifier
)) {
2070 if (ParseMasmOperator(OpKind
, Val
))
2072 if (SM
.onInteger(Val
, ErrMsg
))
2073 return Error(IdentLoc
, ErrMsg
);
2076 if (!getParser().lookUpType(Identifier
, FieldInfo
.Type
)) {
2077 // Field offset immediate; <TYPE>.<field specification>
2079 bool EndDot
= parseOptionalToken(AsmToken::Dot
);
2080 while (EndDot
|| (getTok().is(AsmToken::Identifier
) &&
2081 getTok().getString().starts_with("."))) {
2082 getParser().parseIdentifier(Identifier
);
2084 Identifier
.consume_front(".");
2085 EndDot
= Identifier
.consume_back(".");
2086 if (getParser().lookUpField(FieldInfo
.Type
.Name
, Identifier
,
2089 SMLoc::getFromPointer(Identifier
.data() + Identifier
.size());
2090 return Error(IdentLoc
, "Unable to lookup field reference!",
2091 SMRange(IdentLoc
, IDEnd
));
2094 EndDot
= parseOptionalToken(AsmToken::Dot
);
2096 if (SM
.onInteger(FieldInfo
.Offset
, ErrMsg
))
2097 return Error(IdentLoc
, ErrMsg
);
2101 if (getParser().parsePrimaryExpr(Val
, End
, &FieldInfo
.Type
)) {
2102 return Error(Tok
.getLoc(), "Unexpected identifier!");
2103 } else if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, FieldInfo
.Type
,
2105 return Error(IdentLoc
, ErrMsg
);
2109 case AsmToken::Integer
: {
2110 // Look for 'b' or 'f' following an Integer as a directional label
2111 SMLoc Loc
= getTok().getLoc();
2112 int64_t IntVal
= getTok().getIntVal();
2113 End
= consumeToken();
2114 UpdateLocLex
= false;
2115 if (getLexer().getKind() == AsmToken::Identifier
) {
2116 StringRef IDVal
= getTok().getString();
2117 if (IDVal
== "f" || IDVal
== "b") {
2119 getContext().getDirectionalLocalSymbol(IntVal
, IDVal
== "b");
2120 MCSymbolRefExpr::VariantKind Variant
= MCSymbolRefExpr::VK_None
;
2122 MCSymbolRefExpr::create(Sym
, Variant
, getContext());
2123 if (IDVal
== "b" && Sym
->isUndefined())
2124 return Error(Loc
, "invalid reference to undefined symbol");
2125 StringRef Identifier
= Sym
->getName();
2126 InlineAsmIdentifierInfo Info
;
2128 if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, Type
,
2129 isParsingMSInlineAsm(), ErrMsg
))
2130 return Error(Loc
, ErrMsg
);
2131 End
= consumeToken();
2133 if (SM
.onInteger(IntVal
, ErrMsg
))
2134 return Error(Loc
, ErrMsg
);
2137 if (SM
.onInteger(IntVal
, ErrMsg
))
2138 return Error(Loc
, ErrMsg
);
2142 case AsmToken::Plus
:
2143 if (SM
.onPlus(ErrMsg
))
2144 return Error(getTok().getLoc(), ErrMsg
);
2146 case AsmToken::Minus
:
2147 if (SM
.onMinus(ErrMsg
))
2148 return Error(getTok().getLoc(), ErrMsg
);
2150 case AsmToken::Tilde
: SM
.onNot(); break;
2151 case AsmToken::Star
: SM
.onStar(); break;
2152 case AsmToken::Slash
: SM
.onDivide(); break;
2153 case AsmToken::Percent
: SM
.onMod(); break;
2154 case AsmToken::Pipe
: SM
.onOr(); break;
2155 case AsmToken::Caret
: SM
.onXor(); break;
2156 case AsmToken::Amp
: SM
.onAnd(); break;
2157 case AsmToken::LessLess
:
2158 SM
.onLShift(); break;
2159 case AsmToken::GreaterGreater
:
2160 SM
.onRShift(); break;
2161 case AsmToken::LBrac
:
2163 return Error(Tok
.getLoc(), "unexpected bracket encountered");
2164 tryParseOperandIdx(PrevTK
, SM
);
2166 case AsmToken::RBrac
:
2167 if (SM
.onRBrac(ErrMsg
)) {
2168 return Error(Tok
.getLoc(), ErrMsg
);
2171 case AsmToken::LParen
: SM
.onLParen(); break;
2172 case AsmToken::RParen
: SM
.onRParen(); break;
2175 return Error(Tok
.getLoc(), "unknown token in expression");
2177 if (!Done
&& UpdateLocLex
)
2178 End
= consumeToken();
2185 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine
&SM
,
2186 SMLoc Start
, SMLoc End
) {
2188 unsigned ExprLen
= End
.getPointer() - Start
.getPointer();
2189 // Skip everything before a symbol displacement (if we have one)
2190 if (SM
.getSym() && !SM
.isOffsetOperator()) {
2191 StringRef SymName
= SM
.getSymName();
2192 if (unsigned Len
= SymName
.data() - Start
.getPointer())
2193 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, Start
, Len
);
2194 Loc
= SMLoc::getFromPointer(SymName
.data() + SymName
.size());
2195 ExprLen
= End
.getPointer() - (SymName
.data() + SymName
.size());
2196 // If we have only a symbol than there's no need for complex rewrite,
2197 // simply skip everything after it
2198 if (!(SM
.getBaseReg() || SM
.getIndexReg() || SM
.getImm())) {
2200 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, Loc
, ExprLen
);
2204 // Build an Intel Expression rewrite
2205 StringRef BaseRegStr
;
2206 StringRef IndexRegStr
;
2207 StringRef OffsetNameStr
;
2208 if (SM
.getBaseReg())
2209 BaseRegStr
= X86IntelInstPrinter::getRegisterName(SM
.getBaseReg());
2210 if (SM
.getIndexReg())
2211 IndexRegStr
= X86IntelInstPrinter::getRegisterName(SM
.getIndexReg());
2212 if (SM
.isOffsetOperator())
2213 OffsetNameStr
= SM
.getSymName();
2215 IntelExpr
Expr(BaseRegStr
, IndexRegStr
, SM
.getScale(), OffsetNameStr
,
2216 SM
.getImm(), SM
.isMemExpr());
2217 InstInfo
->AsmRewrites
->emplace_back(Loc
, ExprLen
, Expr
);
2220 // Inline assembly may use variable names with namespace alias qualifiers.
2221 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2222 const MCExpr
*&Val
, StringRef
&Identifier
, InlineAsmIdentifierInfo
&Info
,
2223 bool IsUnevaluatedOperand
, SMLoc
&End
, bool IsParsingOffsetOperator
) {
2224 MCAsmParser
&Parser
= getParser();
2225 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2228 StringRef
LineBuf(Identifier
.data());
2229 SemaCallback
->LookupInlineAsmIdentifier(LineBuf
, Info
, IsUnevaluatedOperand
);
2231 const AsmToken
&Tok
= Parser
.getTok();
2232 SMLoc Loc
= Tok
.getLoc();
2234 // Advance the token stream until the end of the current token is
2235 // after the end of what the frontend claimed.
2236 const char *EndPtr
= Tok
.getLoc().getPointer() + LineBuf
.size();
2238 End
= Tok
.getEndLoc();
2240 } while (End
.getPointer() < EndPtr
);
2241 Identifier
= LineBuf
;
2243 // The frontend should end parsing on an assembler token boundary, unless it
2245 assert((End
.getPointer() == EndPtr
||
2246 Info
.isKind(InlineAsmIdentifierInfo::IK_Invalid
)) &&
2247 "frontend claimed part of a token?");
2249 // If the identifier lookup was unsuccessful, assume that we are dealing with
2251 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Invalid
)) {
2252 StringRef InternalName
=
2253 SemaCallback
->LookupInlineAsmLabel(Identifier
, getSourceManager(),
2255 assert(InternalName
.size() && "We should have an internal name here.");
2256 // Push a rewrite for replacing the identifier name with the internal name,
2257 // unless we are parsing the operand of an offset operator
2258 if (!IsParsingOffsetOperator
)
2259 InstInfo
->AsmRewrites
->emplace_back(AOK_Label
, Loc
, Identifier
.size(),
2262 Identifier
= InternalName
;
2263 } else if (Info
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
))
2265 // Create the symbol reference.
2266 MCSymbol
*Sym
= getContext().getOrCreateSymbol(Identifier
);
2267 MCSymbolRefExpr::VariantKind Variant
= MCSymbolRefExpr::VK_None
;
2268 Val
= MCSymbolRefExpr::create(Sym
, Variant
, getParser().getContext());
2272 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2273 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start
, OperandVector
&Operands
) {
2274 MCAsmParser
&Parser
= getParser();
2275 const AsmToken
&Tok
= Parser
.getTok();
2276 // Eat "{" and mark the current place.
2277 const SMLoc consumedToken
= consumeToken();
2278 if (Tok
.isNot(AsmToken::Identifier
))
2279 return Error(Tok
.getLoc(), "Expected an identifier after {");
2280 if (Tok
.getIdentifier().starts_with("r")) {
2281 int rndMode
= StringSwitch
<int>(Tok
.getIdentifier())
2282 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT
)
2283 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF
)
2284 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF
)
2285 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO
)
2288 return Error(Tok
.getLoc(), "Invalid rounding mode.");
2289 Parser
.Lex(); // Eat "r*" of r*-sae
2290 if (!getLexer().is(AsmToken::Minus
))
2291 return Error(Tok
.getLoc(), "Expected - at this point");
2292 Parser
.Lex(); // Eat "-"
2293 Parser
.Lex(); // Eat the sae
2294 if (!getLexer().is(AsmToken::RCurly
))
2295 return Error(Tok
.getLoc(), "Expected } at this point");
2296 SMLoc End
= Tok
.getEndLoc();
2297 Parser
.Lex(); // Eat "}"
2298 const MCExpr
*RndModeOp
=
2299 MCConstantExpr::create(rndMode
, Parser
.getContext());
2300 Operands
.push_back(X86Operand::CreateImm(RndModeOp
, Start
, End
));
2303 if (Tok
.getIdentifier() == "sae") {
2304 Parser
.Lex(); // Eat the sae
2305 if (!getLexer().is(AsmToken::RCurly
))
2306 return Error(Tok
.getLoc(), "Expected } at this point");
2307 Parser
.Lex(); // Eat "}"
2308 Operands
.push_back(X86Operand::CreateToken("{sae}", consumedToken
));
2311 return Error(Tok
.getLoc(), "unknown token in expression");
2314 /// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2316 bool X86AsmParser::parseCFlagsOp(OperandVector
&Operands
) {
2317 MCAsmParser
&Parser
= getParser();
2318 AsmToken Tok
= Parser
.getTok();
2319 const SMLoc Start
= Tok
.getLoc();
2320 if (!Tok
.is(AsmToken::LCurly
))
2321 return Error(Tok
.getLoc(), "Expected { at this point");
2322 Parser
.Lex(); // Eat "{"
2323 Tok
= Parser
.getTok();
2324 if (Tok
.getIdentifier().lower() != "dfv")
2325 return Error(Tok
.getLoc(), "Expected dfv at this point");
2326 Parser
.Lex(); // Eat "dfv"
2327 Tok
= Parser
.getTok();
2328 if (!Tok
.is(AsmToken::Equal
))
2329 return Error(Tok
.getLoc(), "Expected = at this point");
2330 Parser
.Lex(); // Eat "="
2332 Tok
= Parser
.getTok();
2334 if (Tok
.is(AsmToken::RCurly
)) {
2335 End
= Tok
.getEndLoc();
2336 Operands
.push_back(X86Operand::CreateImm(
2337 MCConstantExpr::create(0, Parser
.getContext()), Start
, End
));
2338 Parser
.Lex(); // Eat "}"
2341 unsigned CFlags
= 0;
2342 for (unsigned I
= 0; I
< 4; ++I
) {
2343 Tok
= Parser
.getTok();
2344 unsigned CFlag
= StringSwitch
<unsigned>(Tok
.getIdentifier().lower())
2351 return Error(Tok
.getLoc(), "Invalid conditional flags");
2354 return Error(Tok
.getLoc(), "Duplicated conditional flag");
2357 Parser
.Lex(); // Eat one conditional flag
2358 Tok
= Parser
.getTok();
2359 if (Tok
.is(AsmToken::RCurly
)) {
2360 End
= Tok
.getEndLoc();
2361 Operands
.push_back(X86Operand::CreateImm(
2362 MCConstantExpr::create(CFlags
, Parser
.getContext()), Start
, End
));
2363 Parser
.Lex(); // Eat "}"
2365 } else if (I
== 3) {
2366 return Error(Tok
.getLoc(), "Expected } at this point");
2367 } else if (Tok
.isNot(AsmToken::Comma
)) {
2368 return Error(Tok
.getLoc(), "Expected } or , at this point");
2370 Parser
.Lex(); // Eat ","
2372 llvm_unreachable("Unexpected control flow");
2375 /// Parse the '.' operator.
2376 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine
&SM
,
2378 const AsmToken
&Tok
= getTok();
2381 // Drop the optional '.'.
2382 StringRef DotDispStr
= Tok
.getString();
2383 DotDispStr
.consume_front(".");
2384 StringRef TrailingDot
;
2386 // .Imm gets lexed as a real.
2387 if (Tok
.is(AsmToken::Real
)) {
2389 if (DotDispStr
.getAsInteger(10, DotDisp
))
2390 return Error(Tok
.getLoc(), "Unexpected offset");
2391 Info
.Offset
= DotDisp
.getZExtValue();
2392 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2393 Tok
.is(AsmToken::Identifier
)) {
2394 if (DotDispStr
.ends_with(".")) {
2395 TrailingDot
= DotDispStr
.substr(DotDispStr
.size() - 1);
2396 DotDispStr
= DotDispStr
.drop_back(1);
2398 const std::pair
<StringRef
, StringRef
> BaseMember
= DotDispStr
.split('.');
2399 const StringRef Base
= BaseMember
.first
, Member
= BaseMember
.second
;
2400 if (getParser().lookUpField(SM
.getType(), DotDispStr
, Info
) &&
2401 getParser().lookUpField(SM
.getSymName(), DotDispStr
, Info
) &&
2402 getParser().lookUpField(DotDispStr
, Info
) &&
2404 SemaCallback
->LookupInlineAsmField(Base
, Member
, Info
.Offset
)))
2405 return Error(Tok
.getLoc(), "Unable to lookup field reference!");
2407 return Error(Tok
.getLoc(), "Unexpected token type!");
2410 // Eat the DotExpression and update End
2411 End
= SMLoc::getFromPointer(DotDispStr
.data());
2412 const char *DotExprEndLoc
= DotDispStr
.data() + DotDispStr
.size();
2413 while (Tok
.getLoc().getPointer() < DotExprEndLoc
)
2415 if (!TrailingDot
.empty())
2416 getLexer().UnLex(AsmToken(AsmToken::Dot
, TrailingDot
));
2417 SM
.addImm(Info
.Offset
);
2418 SM
.setTypeInfo(Info
.Type
);
2422 /// Parse the 'offset' operator.
2423 /// This operator is used to specify the location of a given operand
2424 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr
*&Val
, StringRef
&ID
,
2425 InlineAsmIdentifierInfo
&Info
,
2427 // Eat offset, mark start of identifier.
2428 SMLoc Start
= Lex().getLoc();
2429 ID
= getTok().getString();
2430 if (!isParsingMSInlineAsm()) {
2431 if ((getTok().isNot(AsmToken::Identifier
) &&
2432 getTok().isNot(AsmToken::String
)) ||
2433 getParser().parsePrimaryExpr(Val
, End
, nullptr))
2434 return Error(Start
, "unexpected token!");
2435 } else if (ParseIntelInlineAsmIdentifier(Val
, ID
, Info
, false, End
, true)) {
2436 return Error(Start
, "unable to lookup expression");
2437 } else if (Info
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
)) {
2438 return Error(Start
, "offset operator cannot yet handle constants");
2443 // Query a candidate string for being an Intel assembly operator
2444 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2445 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name
) {
2446 return StringSwitch
<unsigned>(Name
)
2447 .Cases("TYPE","type",IOK_TYPE
)
2448 .Cases("SIZE","size",IOK_SIZE
)
2449 .Cases("LENGTH","length",IOK_LENGTH
)
2450 .Default(IOK_INVALID
);
2453 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2454 /// returns the number of elements in an array. It returns the value 1 for
2455 /// non-array variables. The SIZE operator returns the size of a C or C++
2456 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2457 /// TYPE operator returns the size of a C or C++ type or variable. If the
2458 /// variable is an array, TYPE returns the size of a single element.
2459 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind
) {
2460 MCAsmParser
&Parser
= getParser();
2461 const AsmToken
&Tok
= Parser
.getTok();
2462 Parser
.Lex(); // Eat operator.
2464 const MCExpr
*Val
= nullptr;
2465 InlineAsmIdentifierInfo Info
;
2466 SMLoc Start
= Tok
.getLoc(), End
;
2467 StringRef Identifier
= Tok
.getString();
2468 if (ParseIntelInlineAsmIdentifier(Val
, Identifier
, Info
,
2469 /*IsUnevaluatedOperand=*/true, End
))
2472 if (!Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
2473 Error(Start
, "unable to lookup expression");
2479 default: llvm_unreachable("Unexpected operand kind!");
2480 case IOK_LENGTH
: CVal
= Info
.Var
.Length
; break;
2481 case IOK_SIZE
: CVal
= Info
.Var
.Size
; break;
2482 case IOK_TYPE
: CVal
= Info
.Var
.Type
; break;
2488 // Query a candidate string for being an Intel assembly operator
2489 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2490 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name
) {
2491 return StringSwitch
<unsigned>(Name
.lower())
2492 .Case("type", MOK_TYPE
)
2493 .Cases("size", "sizeof", MOK_SIZEOF
)
2494 .Cases("length", "lengthof", MOK_LENGTHOF
)
2495 .Default(MOK_INVALID
);
2498 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2499 /// returns the number of elements in an array. It returns the value 1 for
2500 /// non-array variables. The SIZEOF operator returns the size of a type or
2501 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2502 /// The TYPE operator returns the size of a variable. If the variable is an
2503 /// array, TYPE returns the size of a single element.
2504 bool X86AsmParser::ParseMasmOperator(unsigned OpKind
, int64_t &Val
) {
2505 MCAsmParser
&Parser
= getParser();
2506 SMLoc OpLoc
= Parser
.getTok().getLoc();
2507 Parser
.Lex(); // Eat operator.
2510 if (OpKind
== MOK_SIZEOF
|| OpKind
== MOK_TYPE
) {
2511 // Check for SIZEOF(<type>) and TYPE(<type>).
2512 bool InParens
= Parser
.getTok().is(AsmToken::LParen
);
2513 const AsmToken
&IDTok
= InParens
? getLexer().peekTok() : Parser
.getTok();
2515 if (IDTok
.is(AsmToken::Identifier
) &&
2516 !Parser
.lookUpType(IDTok
.getIdentifier(), Type
)) {
2521 parseToken(AsmToken::LParen
);
2522 parseToken(AsmToken::Identifier
);
2524 parseToken(AsmToken::RParen
);
2529 IntelExprStateMachine SM
;
2530 SMLoc End
, Start
= Parser
.getTok().getLoc();
2531 if (ParseIntelExpression(SM
, End
))
2536 llvm_unreachable("Unexpected operand kind!");
2541 Val
= SM
.getLength();
2544 Val
= SM
.getElementSize();
2549 return Error(OpLoc
, "expression has unknown type", SMRange(Start
, End
));
2555 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size
) {
2556 Size
= StringSwitch
<unsigned>(getTok().getString())
2557 .Cases("BYTE", "byte", 8)
2558 .Cases("WORD", "word", 16)
2559 .Cases("DWORD", "dword", 32)
2560 .Cases("FLOAT", "float", 32)
2561 .Cases("LONG", "long", 32)
2562 .Cases("FWORD", "fword", 48)
2563 .Cases("DOUBLE", "double", 64)
2564 .Cases("QWORD", "qword", 64)
2565 .Cases("MMWORD","mmword", 64)
2566 .Cases("XWORD", "xword", 80)
2567 .Cases("TBYTE", "tbyte", 80)
2568 .Cases("XMMWORD", "xmmword", 128)
2569 .Cases("YMMWORD", "ymmword", 256)
2570 .Cases("ZMMWORD", "zmmword", 512)
2573 const AsmToken
&Tok
= Lex(); // Eat operand size (e.g., byte, word).
2574 if (!(Tok
.getString() == "PTR" || Tok
.getString() == "ptr"))
2575 return Error(Tok
.getLoc(), "Expected 'PTR' or 'ptr' token!");
2581 bool X86AsmParser::parseIntelOperand(OperandVector
&Operands
, StringRef Name
) {
2582 MCAsmParser
&Parser
= getParser();
2583 const AsmToken
&Tok
= Parser
.getTok();
2586 // Parse optional Size directive.
2588 if (ParseIntelMemoryOperandSize(Size
))
2590 bool PtrInOperand
= bool(Size
);
2592 Start
= Tok
.getLoc();
2594 // Rounding mode operand.
2595 if (getLexer().is(AsmToken::LCurly
))
2596 return ParseRoundingModeOp(Start
, Operands
);
2598 // Register operand.
2600 if (Tok
.is(AsmToken::Identifier
) && !parseRegister(RegNo
, Start
, End
)) {
2601 if (RegNo
== X86::RIP
)
2602 return Error(Start
, "rip can only be used as a base register");
2603 // A Register followed by ':' is considered a segment override
2604 if (Tok
.isNot(AsmToken::Colon
)) {
2606 return Error(Start
, "expected memory operand after 'ptr', "
2607 "found register operand instead");
2608 Operands
.push_back(X86Operand::CreateReg(RegNo
, Start
, End
));
2611 // An alleged segment override. check if we have a valid segment register
2612 if (!X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(RegNo
))
2613 return Error(Start
, "invalid segment register");
2614 // Eat ':' and update Start location
2615 Start
= Lex().getLoc();
2618 // Immediates and Memory
2619 IntelExprStateMachine SM
;
2620 if (ParseIntelExpression(SM
, End
))
2623 if (isParsingMSInlineAsm())
2624 RewriteIntelExpression(SM
, Start
, Tok
.getLoc());
2626 int64_t Imm
= SM
.getImm();
2627 const MCExpr
*Disp
= SM
.getSym();
2628 const MCExpr
*ImmDisp
= MCConstantExpr::create(Imm
, getContext());
2630 Disp
= MCBinaryExpr::createAdd(Disp
, ImmDisp
, getContext());
2634 // RegNo != 0 specifies a valid segment register,
2635 // and we are parsing a segment override
2636 if (!SM
.isMemExpr() && !RegNo
) {
2637 if (isParsingMSInlineAsm() && SM
.isOffsetOperator()) {
2638 const InlineAsmIdentifierInfo
&Info
= SM
.getIdentifierInfo();
2639 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
2640 // Disp includes the address of a variable; make sure this is recorded
2641 // for later handling.
2642 Operands
.push_back(X86Operand::CreateImm(Disp
, Start
, End
,
2643 SM
.getSymName(), Info
.Var
.Decl
,
2644 Info
.Var
.IsGlobalLV
));
2649 Operands
.push_back(X86Operand::CreateImm(Disp
, Start
, End
));
2654 unsigned BaseReg
= SM
.getBaseReg();
2655 unsigned IndexReg
= SM
.getIndexReg();
2656 if (IndexReg
&& BaseReg
== X86::RIP
)
2658 unsigned Scale
= SM
.getScale();
2660 Size
= SM
.getElementSize() << 3;
2662 if (Scale
== 0 && BaseReg
!= X86::ESP
&& BaseReg
!= X86::RSP
&&
2663 (IndexReg
== X86::ESP
|| IndexReg
== X86::RSP
))
2664 std::swap(BaseReg
, IndexReg
);
2666 // If BaseReg is a vector register and IndexReg is not, swap them unless
2667 // Scale was specified in which case it would be an error.
2669 !(X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(IndexReg
) ||
2670 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(IndexReg
) ||
2671 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(IndexReg
)) &&
2672 (X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(BaseReg
) ||
2673 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(BaseReg
) ||
2674 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(BaseReg
)))
2675 std::swap(BaseReg
, IndexReg
);
2678 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
))
2679 return Error(Start
, "16-bit addresses cannot have a scale");
2681 // If there was no explicit scale specified, change it to 1.
2685 // If this is a 16-bit addressing mode with the base and index in the wrong
2686 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2687 // shared with att syntax where order matters.
2688 if ((BaseReg
== X86::SI
|| BaseReg
== X86::DI
) &&
2689 (IndexReg
== X86::BX
|| IndexReg
== X86::BP
))
2690 std::swap(BaseReg
, IndexReg
);
2692 if ((BaseReg
|| IndexReg
) &&
2693 CheckBaseRegAndIndexRegAndScale(BaseReg
, IndexReg
, Scale
, is64BitMode(),
2695 return Error(Start
, ErrMsg
);
2696 bool IsUnconditionalBranch
=
2697 Name
.equals_insensitive("jmp") || Name
.equals_insensitive("call");
2698 if (isParsingMSInlineAsm())
2699 return CreateMemForMSInlineAsm(RegNo
, Disp
, BaseReg
, IndexReg
, Scale
,
2700 IsUnconditionalBranch
&& is64BitMode(),
2701 Start
, End
, Size
, SM
.getSymName(),
2702 SM
.getIdentifierInfo(), Operands
);
2704 // When parsing x64 MS-style assembly, all non-absolute references to a named
2705 // variable default to RIP-relative.
2706 unsigned DefaultBaseReg
= X86::NoRegister
;
2707 bool MaybeDirectBranchDest
= true;
2709 if (Parser
.isParsingMasm()) {
2710 if (is64BitMode() &&
2711 ((PtrInOperand
&& !IndexReg
) || SM
.getElementSize() > 0)) {
2712 DefaultBaseReg
= X86::RIP
;
2714 if (IsUnconditionalBranch
) {
2716 MaybeDirectBranchDest
= false;
2718 DefaultBaseReg
= X86::RIP
;
2719 } else if (!BaseReg
&& !IndexReg
&& Disp
&&
2720 Disp
->getKind() == MCExpr::SymbolRef
) {
2721 if (is64BitMode()) {
2722 if (SM
.getSize() == 8) {
2723 MaybeDirectBranchDest
= false;
2724 DefaultBaseReg
= X86::RIP
;
2727 if (SM
.getSize() == 4 || SM
.getSize() == 2)
2728 MaybeDirectBranchDest
= false;
2732 } else if (IsUnconditionalBranch
) {
2733 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2734 if (!PtrInOperand
&& SM
.isOffsetOperator())
2736 Start
, "`OFFSET` operator cannot be used in an unconditional branch");
2737 if (PtrInOperand
|| SM
.isBracketUsed())
2738 MaybeDirectBranchDest
= false;
2741 if ((BaseReg
|| IndexReg
|| RegNo
|| DefaultBaseReg
!= X86::NoRegister
))
2742 Operands
.push_back(X86Operand::CreateMem(
2743 getPointerWidth(), RegNo
, Disp
, BaseReg
, IndexReg
, Scale
, Start
, End
,
2744 Size
, DefaultBaseReg
, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2745 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest
));
2747 Operands
.push_back(X86Operand::CreateMem(
2748 getPointerWidth(), Disp
, Start
, End
, Size
, /*SymName=*/StringRef(),
2749 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2750 MaybeDirectBranchDest
));
2754 bool X86AsmParser::parseATTOperand(OperandVector
&Operands
) {
2755 MCAsmParser
&Parser
= getParser();
2756 switch (getLexer().getKind()) {
2757 case AsmToken::Dollar
: {
2758 // $42 or $ID -> immediate.
2759 SMLoc Start
= Parser
.getTok().getLoc(), End
;
2762 // This is an immediate, so we should not parse a register. Do a precheck
2763 // for '%' to supercede intra-register parse errors.
2764 SMLoc L
= Parser
.getTok().getLoc();
2765 if (check(getLexer().is(AsmToken::Percent
), L
,
2766 "expected immediate expression") ||
2767 getParser().parseExpression(Val
, End
) ||
2768 check(isa
<X86MCExpr
>(Val
), L
, "expected immediate expression"))
2770 Operands
.push_back(X86Operand::CreateImm(Val
, Start
, End
));
2773 case AsmToken::LCurly
: {
2774 SMLoc Start
= Parser
.getTok().getLoc();
2775 return ParseRoundingModeOp(Start
, Operands
);
2778 // This a memory operand or a register. We have some parsing complications
2779 // as a '(' may be part of an immediate expression or the addressing mode
2780 // block. This is complicated by the fact that an assembler-level variable
2781 // may refer either to a register or an immediate expression.
2783 SMLoc Loc
= Parser
.getTok().getLoc(), EndLoc
;
2784 const MCExpr
*Expr
= nullptr;
2786 if (getLexer().isNot(AsmToken::LParen
)) {
2787 // No '(' so this is either a displacement expression or a register.
2788 if (Parser
.parseExpression(Expr
, EndLoc
))
2790 if (auto *RE
= dyn_cast
<X86MCExpr
>(Expr
)) {
2791 // Segment Register. Reset Expr and copy value to register.
2793 Reg
= RE
->getRegNo();
2795 // Check the register.
2796 if (Reg
== X86::EIZ
|| Reg
== X86::RIZ
)
2798 Loc
, "%eiz and %riz can only be used as index registers",
2799 SMRange(Loc
, EndLoc
));
2800 if (Reg
== X86::RIP
)
2801 return Error(Loc
, "%rip can only be used as a base register",
2802 SMRange(Loc
, EndLoc
));
2803 // Return register that are not segment prefixes immediately.
2804 if (!Parser
.parseOptionalToken(AsmToken::Colon
)) {
2805 Operands
.push_back(X86Operand::CreateReg(Reg
, Loc
, EndLoc
));
2808 if (!X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(Reg
))
2809 return Error(Loc
, "invalid segment register");
2810 // Accept a '*' absolute memory reference after the segment. Place it
2811 // before the full memory operand.
2812 if (getLexer().is(AsmToken::Star
))
2813 Operands
.push_back(X86Operand::CreateToken("*", consumeToken()));
2816 // This is a Memory operand.
2817 return ParseMemOperand(Reg
, Expr
, Loc
, EndLoc
, Operands
);
2822 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2823 // otherwise the EFLAGS Condition Code enumerator.
2824 X86::CondCode
X86AsmParser::ParseConditionCode(StringRef CC
) {
2825 return StringSwitch
<X86::CondCode
>(CC
)
2826 .Case("o", X86::COND_O
) // Overflow
2827 .Case("no", X86::COND_NO
) // No Overflow
2828 .Cases("b", "nae", X86::COND_B
) // Below/Neither Above nor Equal
2829 .Cases("ae", "nb", X86::COND_AE
) // Above or Equal/Not Below
2830 .Cases("e", "z", X86::COND_E
) // Equal/Zero
2831 .Cases("ne", "nz", X86::COND_NE
) // Not Equal/Not Zero
2832 .Cases("be", "na", X86::COND_BE
) // Below or Equal/Not Above
2833 .Cases("a", "nbe", X86::COND_A
) // Above/Neither Below nor Equal
2834 .Case("s", X86::COND_S
) // Sign
2835 .Case("ns", X86::COND_NS
) // No Sign
2836 .Cases("p", "pe", X86::COND_P
) // Parity/Parity Even
2837 .Cases("np", "po", X86::COND_NP
) // No Parity/Parity Odd
2838 .Cases("l", "nge", X86::COND_L
) // Less/Neither Greater nor Equal
2839 .Cases("ge", "nl", X86::COND_GE
) // Greater or Equal/Not Less
2840 .Cases("le", "ng", X86::COND_LE
) // Less or Equal/Not Greater
2841 .Cases("g", "nle", X86::COND_G
) // Greater/Neither Less nor Equal
2842 .Default(X86::COND_INVALID
);
2845 // true on failure, false otherwise
2846 // If no {z} mark was found - Parser doesn't advance
2847 bool X86AsmParser::ParseZ(std::unique_ptr
<X86Operand
> &Z
,
2848 const SMLoc
&StartLoc
) {
2849 MCAsmParser
&Parser
= getParser();
2850 // Assuming we are just pass the '{' mark, quering the next token
2851 // Searched for {z}, but none was found. Return false, as no parsing error was
2853 if (!(getLexer().is(AsmToken::Identifier
) &&
2854 (getLexer().getTok().getIdentifier() == "z")))
2856 Parser
.Lex(); // Eat z
2857 // Query and eat the '}' mark
2858 if (!getLexer().is(AsmToken::RCurly
))
2859 return Error(getLexer().getLoc(), "Expected } at this point");
2860 Parser
.Lex(); // Eat '}'
2861 // Assign Z with the {z} mark operand
2862 Z
= X86Operand::CreateToken("{z}", StartLoc
);
2866 // true on failure, false otherwise
2867 bool X86AsmParser::HandleAVX512Operand(OperandVector
&Operands
) {
2868 MCAsmParser
&Parser
= getParser();
2869 if (getLexer().is(AsmToken::LCurly
)) {
2870 // Eat "{" and mark the current place.
2871 const SMLoc consumedToken
= consumeToken();
2872 // Distinguish {1to<NUM>} from {%k<NUM>}.
2873 if(getLexer().is(AsmToken::Integer
)) {
2874 // Parse memory broadcasting ({1to<NUM>}).
2875 if (getLexer().getTok().getIntVal() != 1)
2876 return TokError("Expected 1to<NUM> at this point");
2877 StringRef Prefix
= getLexer().getTok().getString();
2878 Parser
.Lex(); // Eat first token of 1to8
2879 if (!getLexer().is(AsmToken::Identifier
))
2880 return TokError("Expected 1to<NUM> at this point");
2881 // Recognize only reasonable suffixes.
2882 SmallVector
<char, 5> BroadcastVector
;
2883 StringRef BroadcastString
= (Prefix
+ getLexer().getTok().getIdentifier())
2884 .toStringRef(BroadcastVector
);
2885 if (!BroadcastString
.starts_with("1to"))
2886 return TokError("Expected 1to<NUM> at this point");
2887 const char *BroadcastPrimitive
=
2888 StringSwitch
<const char *>(BroadcastString
)
2889 .Case("1to2", "{1to2}")
2890 .Case("1to4", "{1to4}")
2891 .Case("1to8", "{1to8}")
2892 .Case("1to16", "{1to16}")
2893 .Case("1to32", "{1to32}")
2895 if (!BroadcastPrimitive
)
2896 return TokError("Invalid memory broadcast primitive.");
2897 Parser
.Lex(); // Eat trailing token of 1toN
2898 if (!getLexer().is(AsmToken::RCurly
))
2899 return TokError("Expected } at this point");
2900 Parser
.Lex(); // Eat "}"
2901 Operands
.push_back(X86Operand::CreateToken(BroadcastPrimitive
,
2903 // No AVX512 specific primitives can pass
2904 // after memory broadcasting, so return.
2907 // Parse either {k}{z}, {z}{k}, {k} or {z}
2908 // last one have no meaning, but GCC accepts it
2909 // Currently, we're just pass a '{' mark
2910 std::unique_ptr
<X86Operand
> Z
;
2911 if (ParseZ(Z
, consumedToken
))
2913 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2915 // Query for the need of further parsing for a {%k<NUM>} mark
2916 if (!Z
|| getLexer().is(AsmToken::LCurly
)) {
2917 SMLoc StartLoc
= Z
? consumeToken() : consumedToken
;
2918 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2922 if (!parseRegister(RegNo
, RegLoc
, StartLoc
) &&
2923 X86MCRegisterClasses
[X86::VK1RegClassID
].contains(RegNo
)) {
2924 if (RegNo
== X86::K0
)
2925 return Error(RegLoc
, "Register k0 can't be used as write mask");
2926 if (!getLexer().is(AsmToken::RCurly
))
2927 return Error(getLexer().getLoc(), "Expected } at this point");
2928 Operands
.push_back(X86Operand::CreateToken("{", StartLoc
));
2930 X86Operand::CreateReg(RegNo
, StartLoc
, StartLoc
));
2931 Operands
.push_back(X86Operand::CreateToken("}", consumeToken()));
2933 return Error(getLexer().getLoc(),
2934 "Expected an op-mask register at this point");
2935 // {%k<NUM>} mark is found, inquire for {z}
2936 if (getLexer().is(AsmToken::LCurly
) && !Z
) {
2937 // Have we've found a parsing error, or found no (expected) {z} mark
2938 // - report an error
2939 if (ParseZ(Z
, consumeToken()) || !Z
)
2940 return Error(getLexer().getLoc(),
2941 "Expected a {z} mark at this point");
2944 // '{z}' on its own is meaningless, hence should be ignored.
2945 // on the contrary - have it been accompanied by a K register,
2948 Operands
.push_back(std::move(Z
));
2955 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2956 /// has already been parsed if present. disp may be provided as well.
2957 bool X86AsmParser::ParseMemOperand(unsigned SegReg
, const MCExpr
*Disp
,
2958 SMLoc StartLoc
, SMLoc EndLoc
,
2959 OperandVector
&Operands
) {
2960 MCAsmParser
&Parser
= getParser();
2962 // Based on the initial passed values, we may be in any of these cases, we are
2963 // in one of these cases (with current position (*)):
2965 // 1. seg : * disp (base-index-scale-expr)
2966 // 2. seg : *(disp) (base-index-scale-expr)
2967 // 3. seg : *(base-index-scale-expr)
2968 // 4. disp *(base-index-scale-expr)
2969 // 5. *(disp) (base-index-scale-expr)
2970 // 6. *(base-index-scale-expr)
2974 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2975 // checking if the first object after the parenthesis is a register (or an
2976 // identifier referring to a register) and parse the displacement or default
2977 // to 0 as appropriate.
2978 auto isAtMemOperand
= [this]() {
2979 if (this->getLexer().isNot(AsmToken::LParen
))
2983 auto TokCount
= this->getLexer().peekTokens(Buf
, true);
2986 switch (Buf
[0].getKind()) {
2987 case AsmToken::Percent
:
2988 case AsmToken::Comma
:
2990 // These lower cases are doing a peekIdentifier.
2992 case AsmToken::Dollar
:
2993 if ((TokCount
> 1) &&
2994 (Buf
[1].is(AsmToken::Identifier
) || Buf
[1].is(AsmToken::String
)) &&
2995 (Buf
[0].getLoc().getPointer() + 1 == Buf
[1].getLoc().getPointer()))
2996 Id
= StringRef(Buf
[0].getLoc().getPointer(),
2997 Buf
[1].getIdentifier().size() + 1);
2999 case AsmToken::Identifier
:
3000 case AsmToken::String
:
3001 Id
= Buf
[0].getIdentifier();
3006 // We have an ID. Check if it is bound to a register.
3008 MCSymbol
*Sym
= this->getContext().getOrCreateSymbol(Id
);
3009 if (Sym
->isVariable()) {
3010 auto V
= Sym
->getVariableValue(/*SetUsed*/ false);
3011 return isa
<X86MCExpr
>(V
);
3018 // Parse immediate if we're not at a mem operand yet.
3019 if (!isAtMemOperand()) {
3020 if (Parser
.parseTokenLoc(Loc
) || Parser
.parseExpression(Disp
, EndLoc
))
3022 assert(!isa
<X86MCExpr
>(Disp
) && "Expected non-register here.");
3024 // Disp is implicitly zero if we haven't parsed it yet.
3025 Disp
= MCConstantExpr::create(0, Parser
.getContext());
3029 // We are now either at the end of the operand or at the '(' at the start of a
3030 // base-index-scale-expr.
3032 if (!parseOptionalToken(AsmToken::LParen
)) {
3035 X86Operand::CreateMem(getPointerWidth(), Disp
, StartLoc
, EndLoc
));
3037 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg
, Disp
,
3038 0, 0, 1, StartLoc
, EndLoc
));
3042 // If we reached here, then eat the '(' and Process
3043 // the rest of the memory operand.
3044 unsigned BaseReg
= 0, IndexReg
= 0, Scale
= 1;
3045 SMLoc BaseLoc
= getLexer().getLoc();
3049 // Parse BaseReg if one is provided.
3050 if (getLexer().isNot(AsmToken::Comma
) && getLexer().isNot(AsmToken::RParen
)) {
3051 if (Parser
.parseExpression(E
, EndLoc
) ||
3052 check(!isa
<X86MCExpr
>(E
), BaseLoc
, "expected register here"))
3055 // Check the register.
3056 BaseReg
= cast
<X86MCExpr
>(E
)->getRegNo();
3057 if (BaseReg
== X86::EIZ
|| BaseReg
== X86::RIZ
)
3058 return Error(BaseLoc
, "eiz and riz can only be used as index registers",
3059 SMRange(BaseLoc
, EndLoc
));
3062 if (parseOptionalToken(AsmToken::Comma
)) {
3063 // Following the comma we should have either an index register, or a scale
3064 // value. We don't support the later form, but we want to parse it
3067 // Even though it would be completely consistent to support syntax like
3068 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3069 if (getLexer().isNot(AsmToken::RParen
)) {
3070 if (Parser
.parseTokenLoc(Loc
) || Parser
.parseExpression(E
, EndLoc
))
3073 if (!isa
<X86MCExpr
>(E
)) {
3074 // We've parsed an unexpected Scale Value instead of an index
3075 // register. Interpret it as an absolute.
3077 if (!E
->evaluateAsAbsolute(ScaleVal
, getStreamer().getAssemblerPtr()))
3078 return Error(Loc
, "expected absolute expression");
3080 Warning(Loc
, "scale factor without index register is ignored");
3082 } else { // IndexReg Found.
3083 IndexReg
= cast
<X86MCExpr
>(E
)->getRegNo();
3085 if (BaseReg
== X86::RIP
)
3087 "%rip as base register can not have an index register");
3088 if (IndexReg
== X86::RIP
)
3089 return Error(Loc
, "%rip is not allowed as an index register");
3091 if (parseOptionalToken(AsmToken::Comma
)) {
3092 // Parse the scale amount:
3093 // ::= ',' [scale-expression]
3095 // A scale amount without an index is ignored.
3096 if (getLexer().isNot(AsmToken::RParen
)) {
3098 if (Parser
.parseTokenLoc(Loc
) ||
3099 Parser
.parseAbsoluteExpression(ScaleVal
))
3100 return Error(Loc
, "expected scale expression");
3101 Scale
= (unsigned)ScaleVal
;
3102 // Validate the scale amount.
3103 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) &&
3105 return Error(Loc
, "scale factor in 16-bit address must be 1");
3106 if (checkScale(Scale
, ErrMsg
))
3107 return Error(Loc
, ErrMsg
);
3114 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3115 if (parseToken(AsmToken::RParen
, "unexpected token in memory operand"))
3118 // This is to support otherwise illegal operand (%dx) found in various
3119 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3120 // be supported. Mark such DX variants separately fix only in special cases.
3121 if (BaseReg
== X86::DX
&& IndexReg
== 0 && Scale
== 1 && SegReg
== 0 &&
3122 isa
<MCConstantExpr
>(Disp
) &&
3123 cast
<MCConstantExpr
>(Disp
)->getValue() == 0) {
3124 Operands
.push_back(X86Operand::CreateDXReg(BaseLoc
, BaseLoc
));
3128 if (CheckBaseRegAndIndexRegAndScale(BaseReg
, IndexReg
, Scale
, is64BitMode(),
3130 return Error(BaseLoc
, ErrMsg
);
3132 // If the displacement is a constant, check overflows. For 64-bit addressing,
3133 // gas requires isInt<32> and otherwise reports an error. For others, gas
3134 // reports a warning and allows a wider range. E.g. gas allows
3135 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3136 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3137 if (BaseReg
|| IndexReg
) {
3138 if (auto CE
= dyn_cast
<MCConstantExpr
>(Disp
)) {
3139 auto Imm
= CE
->getValue();
3140 bool Is64
= X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
) ||
3141 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
);
3142 bool Is16
= X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
);
3144 if (!isInt
<32>(Imm
))
3145 return Error(BaseLoc
, "displacement " + Twine(Imm
) +
3146 " is not within [-2147483648, 2147483647]");
3148 if (!isUInt
<32>(Imm
< 0 ? -uint64_t(Imm
) : uint64_t(Imm
))) {
3149 Warning(BaseLoc
, "displacement " + Twine(Imm
) +
3150 " shortened to 32-bit signed " +
3151 Twine(static_cast<int32_t>(Imm
)));
3153 } else if (!isUInt
<16>(Imm
< 0 ? -uint64_t(Imm
) : uint64_t(Imm
))) {
3154 Warning(BaseLoc
, "displacement " + Twine(Imm
) +
3155 " shortened to 16-bit signed " +
3156 Twine(static_cast<int16_t>(Imm
)));
3161 if (SegReg
|| BaseReg
|| IndexReg
)
3162 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg
, Disp
,
3163 BaseReg
, IndexReg
, Scale
, StartLoc
,
3167 X86Operand::CreateMem(getPointerWidth(), Disp
, StartLoc
, EndLoc
));
3171 // Parse either a standard primary expression or a register.
3172 bool X86AsmParser::parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) {
3173 MCAsmParser
&Parser
= getParser();
3174 // See if this is a register first.
3175 if (getTok().is(AsmToken::Percent
) ||
3176 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier
) &&
3177 MatchRegisterName(Parser
.getTok().getString()))) {
3178 SMLoc StartLoc
= Parser
.getTok().getLoc();
3180 if (parseRegister(RegNo
, StartLoc
, EndLoc
))
3182 Res
= X86MCExpr::create(RegNo
, Parser
.getContext());
3185 return Parser
.parsePrimaryExpr(Res
, EndLoc
, nullptr);
3188 bool X86AsmParser::ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
3189 SMLoc NameLoc
, OperandVector
&Operands
) {
3190 MCAsmParser
&Parser
= getParser();
3193 // Reset the forced VEX encoding.
3194 ForcedOpcodePrefix
= OpcodePrefix_Default
;
3195 ForcedDispEncoding
= DispEncoding_Default
;
3196 UseApxExtendedReg
= false;
3197 ForcedNoFlag
= false;
3199 // Parse pseudo prefixes.
3202 if (getLexer().isNot(AsmToken::Identifier
))
3203 return Error(Parser
.getTok().getLoc(), "Unexpected token after '{'");
3204 std::string Prefix
= Parser
.getTok().getString().lower();
3205 Parser
.Lex(); // Eat identifier.
3206 if (getLexer().isNot(AsmToken::RCurly
))
3207 return Error(Parser
.getTok().getLoc(), "Expected '}'");
3208 Parser
.Lex(); // Eat curly.
3210 if (Prefix
== "rex")
3211 ForcedOpcodePrefix
= OpcodePrefix_REX
;
3212 else if (Prefix
== "rex2")
3213 ForcedOpcodePrefix
= OpcodePrefix_REX2
;
3214 else if (Prefix
== "vex")
3215 ForcedOpcodePrefix
= OpcodePrefix_VEX
;
3216 else if (Prefix
== "vex2")
3217 ForcedOpcodePrefix
= OpcodePrefix_VEX2
;
3218 else if (Prefix
== "vex3")
3219 ForcedOpcodePrefix
= OpcodePrefix_VEX3
;
3220 else if (Prefix
== "evex")
3221 ForcedOpcodePrefix
= OpcodePrefix_EVEX
;
3222 else if (Prefix
== "disp8")
3223 ForcedDispEncoding
= DispEncoding_Disp8
;
3224 else if (Prefix
== "disp32")
3225 ForcedDispEncoding
= DispEncoding_Disp32
;
3226 else if (Prefix
== "nf")
3227 ForcedNoFlag
= true;
3229 return Error(NameLoc
, "unknown prefix");
3231 NameLoc
= Parser
.getTok().getLoc();
3232 if (getLexer().is(AsmToken::LCurly
)) {
3236 if (getLexer().isNot(AsmToken::Identifier
))
3237 return Error(Parser
.getTok().getLoc(), "Expected identifier");
3238 // FIXME: The mnemonic won't match correctly if its not in lower case.
3239 Name
= Parser
.getTok().getString();
3244 // Parse MASM style pseudo prefixes.
3245 if (isParsingMSInlineAsm()) {
3246 if (Name
.equals_insensitive("vex"))
3247 ForcedOpcodePrefix
= OpcodePrefix_VEX
;
3248 else if (Name
.equals_insensitive("vex2"))
3249 ForcedOpcodePrefix
= OpcodePrefix_VEX2
;
3250 else if (Name
.equals_insensitive("vex3"))
3251 ForcedOpcodePrefix
= OpcodePrefix_VEX3
;
3252 else if (Name
.equals_insensitive("evex"))
3253 ForcedOpcodePrefix
= OpcodePrefix_EVEX
;
3255 if (ForcedOpcodePrefix
!= OpcodePrefix_Default
) {
3256 if (getLexer().isNot(AsmToken::Identifier
))
3257 return Error(Parser
.getTok().getLoc(), "Expected identifier");
3258 // FIXME: The mnemonic won't match correctly if its not in lower case.
3259 Name
= Parser
.getTok().getString();
3260 NameLoc
= Parser
.getTok().getLoc();
3267 // Support the suffix syntax for overriding displacement size as well.
3268 if (Name
.consume_back(".d32")) {
3269 ForcedDispEncoding
= DispEncoding_Disp32
;
3270 } else if (Name
.consume_back(".d8")) {
3271 ForcedDispEncoding
= DispEncoding_Disp8
;
3274 StringRef PatchedName
= Name
;
3276 // Hack to skip "short" following Jcc.
3277 if (isParsingIntelSyntax() &&
3278 (PatchedName
== "jmp" || PatchedName
== "jc" || PatchedName
== "jnc" ||
3279 PatchedName
== "jcxz" || PatchedName
== "jecxz" ||
3280 (PatchedName
.starts_with("j") &&
3281 ParseConditionCode(PatchedName
.substr(1)) != X86::COND_INVALID
))) {
3282 StringRef NextTok
= Parser
.getTok().getString();
3283 if (Parser
.isParsingMasm() ? NextTok
.equals_insensitive("short")
3284 : NextTok
== "short") {
3286 NameLoc
.getFromPointer(NameLoc
.getPointer() + Name
.size());
3287 // Eat the short keyword.
3289 // MS and GAS ignore the short keyword; they both determine the jmp type
3290 // based on the distance of the label. (NASM does emit different code with
3291 // and without "short," though.)
3292 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, NameEndLoc
,
3293 NextTok
.size() + 1);
3297 // FIXME: Hack to recognize setneb as setne.
3298 if (PatchedName
.starts_with("set") && PatchedName
.ends_with("b") &&
3299 PatchedName
!= "setzub" && PatchedName
!= "setzunb" &&
3300 PatchedName
!= "setb" && PatchedName
!= "setnb")
3301 PatchedName
= PatchedName
.substr(0, Name
.size()-1);
3303 unsigned ComparisonPredicate
= ~0U;
3305 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3306 if ((PatchedName
.starts_with("cmp") || PatchedName
.starts_with("vcmp")) &&
3307 (PatchedName
.ends_with("ss") || PatchedName
.ends_with("sd") ||
3308 PatchedName
.ends_with("sh") || PatchedName
.ends_with("ph") ||
3309 PatchedName
.ends_with("ps") || PatchedName
.ends_with("pd"))) {
3310 bool IsVCMP
= PatchedName
[0] == 'v';
3311 unsigned CCIdx
= IsVCMP
? 4 : 3;
3312 unsigned CC
= StringSwitch
<unsigned>(
3313 PatchedName
.slice(CCIdx
, PatchedName
.size() - 2))
3315 .Case("eq_oq", 0x00)
3317 .Case("lt_os", 0x01)
3319 .Case("le_os", 0x02)
3320 .Case("unord", 0x03)
3321 .Case("unord_q", 0x03)
3323 .Case("neq_uq", 0x04)
3325 .Case("nlt_us", 0x05)
3327 .Case("nle_us", 0x06)
3329 .Case("ord_q", 0x07)
3330 /* AVX only from here */
3331 .Case("eq_uq", 0x08)
3333 .Case("nge_us", 0x09)
3335 .Case("ngt_us", 0x0A)
3336 .Case("false", 0x0B)
3337 .Case("false_oq", 0x0B)
3338 .Case("neq_oq", 0x0C)
3340 .Case("ge_os", 0x0D)
3342 .Case("gt_os", 0x0E)
3344 .Case("true_uq", 0x0F)
3345 .Case("eq_os", 0x10)
3346 .Case("lt_oq", 0x11)
3347 .Case("le_oq", 0x12)
3348 .Case("unord_s", 0x13)
3349 .Case("neq_us", 0x14)
3350 .Case("nlt_uq", 0x15)
3351 .Case("nle_uq", 0x16)
3352 .Case("ord_s", 0x17)
3353 .Case("eq_us", 0x18)
3354 .Case("nge_uq", 0x19)
3355 .Case("ngt_uq", 0x1A)
3356 .Case("false_os", 0x1B)
3357 .Case("neq_os", 0x1C)
3358 .Case("ge_oq", 0x1D)
3359 .Case("gt_oq", 0x1E)
3360 .Case("true_us", 0x1F)
3362 if (CC
!= ~0U && (IsVCMP
|| CC
< 8) &&
3363 (IsVCMP
|| PatchedName
.back() != 'h')) {
3364 if (PatchedName
.ends_with("ss"))
3365 PatchedName
= IsVCMP
? "vcmpss" : "cmpss";
3366 else if (PatchedName
.ends_with("sd"))
3367 PatchedName
= IsVCMP
? "vcmpsd" : "cmpsd";
3368 else if (PatchedName
.ends_with("ps"))
3369 PatchedName
= IsVCMP
? "vcmpps" : "cmpps";
3370 else if (PatchedName
.ends_with("pd"))
3371 PatchedName
= IsVCMP
? "vcmppd" : "cmppd";
3372 else if (PatchedName
.ends_with("sh"))
3373 PatchedName
= "vcmpsh";
3374 else if (PatchedName
.ends_with("ph"))
3375 PatchedName
= "vcmpph";
3377 llvm_unreachable("Unexpected suffix!");
3379 ComparisonPredicate
= CC
;
3383 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3384 if (PatchedName
.starts_with("vpcmp") &&
3385 (PatchedName
.back() == 'b' || PatchedName
.back() == 'w' ||
3386 PatchedName
.back() == 'd' || PatchedName
.back() == 'q')) {
3387 unsigned SuffixSize
= PatchedName
.drop_back().back() == 'u' ? 2 : 1;
3388 unsigned CC
= StringSwitch
<unsigned>(
3389 PatchedName
.slice(5, PatchedName
.size() - SuffixSize
))
3390 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3393 //.Case("false", 0x3) // Not a documented alias.
3397 //.Case("true", 0x7) // Not a documented alias.
3399 if (CC
!= ~0U && (CC
!= 0 || SuffixSize
== 2)) {
3400 switch (PatchedName
.back()) {
3401 default: llvm_unreachable("Unexpected character!");
3402 case 'b': PatchedName
= SuffixSize
== 2 ? "vpcmpub" : "vpcmpb"; break;
3403 case 'w': PatchedName
= SuffixSize
== 2 ? "vpcmpuw" : "vpcmpw"; break;
3404 case 'd': PatchedName
= SuffixSize
== 2 ? "vpcmpud" : "vpcmpd"; break;
3405 case 'q': PatchedName
= SuffixSize
== 2 ? "vpcmpuq" : "vpcmpq"; break;
3407 // Set up the immediate to push into the operands later.
3408 ComparisonPredicate
= CC
;
3412 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3413 if (PatchedName
.starts_with("vpcom") &&
3414 (PatchedName
.back() == 'b' || PatchedName
.back() == 'w' ||
3415 PatchedName
.back() == 'd' || PatchedName
.back() == 'q')) {
3416 unsigned SuffixSize
= PatchedName
.drop_back().back() == 'u' ? 2 : 1;
3417 unsigned CC
= StringSwitch
<unsigned>(
3418 PatchedName
.slice(5, PatchedName
.size() - SuffixSize
))
3429 switch (PatchedName
.back()) {
3430 default: llvm_unreachable("Unexpected character!");
3431 case 'b': PatchedName
= SuffixSize
== 2 ? "vpcomub" : "vpcomb"; break;
3432 case 'w': PatchedName
= SuffixSize
== 2 ? "vpcomuw" : "vpcomw"; break;
3433 case 'd': PatchedName
= SuffixSize
== 2 ? "vpcomud" : "vpcomd"; break;
3434 case 'q': PatchedName
= SuffixSize
== 2 ? "vpcomuq" : "vpcomq"; break;
3436 // Set up the immediate to push into the operands later.
3437 ComparisonPredicate
= CC
;
3441 // Determine whether this is an instruction prefix.
3443 // Enhance prefixes integrity robustness. for example, following forms
3444 // are currently tolerated:
3445 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3446 // lock addq %rax, %rbx ; Destination operand must be of memory type
3447 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3449 StringSwitch
<bool>(Name
)
3450 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3451 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3452 .Cases("xacquire", "xrelease", true)
3453 .Cases("acquire", "release", isParsingIntelSyntax())
3456 auto isLockRepeatNtPrefix
= [](StringRef N
) {
3457 return StringSwitch
<bool>(N
)
3458 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3462 bool CurlyAsEndOfStatement
= false;
3464 unsigned Flags
= X86::IP_NO_PREFIX
;
3465 while (isLockRepeatNtPrefix(Name
.lower())) {
3467 StringSwitch
<unsigned>(Name
)
3468 .Cases("lock", "lock", X86::IP_HAS_LOCK
)
3469 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT
)
3470 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE
)
3471 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK
)
3472 .Default(X86::IP_NO_PREFIX
); // Invalid prefix (impossible)
3474 if (getLexer().is(AsmToken::EndOfStatement
)) {
3475 // We don't have real instr with the given prefix
3476 // let's use the prefix as the instr.
3477 // TODO: there could be several prefixes one after another
3478 Flags
= X86::IP_NO_PREFIX
;
3481 // FIXME: The mnemonic won't match correctly if its not in lower case.
3482 Name
= Parser
.getTok().getString();
3483 Parser
.Lex(); // eat the prefix
3484 // Hack: we could have something like "rep # some comment" or
3485 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3486 while (Name
.starts_with(";") || Name
.starts_with("\n") ||
3487 Name
.starts_with("#") || Name
.starts_with("\t") ||
3488 Name
.starts_with("/")) {
3489 // FIXME: The mnemonic won't match correctly if its not in lower case.
3490 Name
= Parser
.getTok().getString();
3491 Parser
.Lex(); // go to next prefix or instr
3498 // Hacks to handle 'data16' and 'data32'
3499 if (PatchedName
== "data16" && is16BitMode()) {
3500 return Error(NameLoc
, "redundant data16 prefix");
3502 if (PatchedName
== "data32") {
3504 return Error(NameLoc
, "redundant data32 prefix");
3506 return Error(NameLoc
, "'data32' is not supported in 64-bit mode");
3507 // Hack to 'data16' for the table lookup.
3508 PatchedName
= "data16";
3510 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
3511 StringRef Next
= Parser
.getTok().getString();
3513 // data32 effectively changes the instruction suffix.
3515 if (Next
== "callw")
3517 if (Next
== "ljmpw")
3522 ForcedDataPrefix
= X86::Is32Bit
;
3527 Operands
.push_back(X86Operand::CreateToken(PatchedName
, NameLoc
));
3529 // Push the immediate if we extracted one from the mnemonic.
3530 if (ComparisonPredicate
!= ~0U && !isParsingIntelSyntax()) {
3531 const MCExpr
*ImmOp
= MCConstantExpr::create(ComparisonPredicate
,
3532 getParser().getContext());
3533 Operands
.push_back(X86Operand::CreateImm(ImmOp
, NameLoc
, NameLoc
));
3536 // Parse condtional flags after mnemonic.
3537 if ((Name
.starts_with("ccmp") || Name
.starts_with("ctest")) &&
3538 parseCFlagsOp(Operands
))
3541 // This does the actual operand parsing. Don't parse any more if we have a
3542 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3543 // just want to parse the "lock" as the first instruction and the "incl" as
3545 if (getLexer().isNot(AsmToken::EndOfStatement
) && !IsPrefix
) {
3546 // Parse '*' modifier.
3547 if (getLexer().is(AsmToken::Star
))
3548 Operands
.push_back(X86Operand::CreateToken("*", consumeToken()));
3550 // Read the operands.
3552 if (parseOperand(Operands
, Name
))
3554 if (HandleAVX512Operand(Operands
))
3557 // check for comma and eat it
3558 if (getLexer().is(AsmToken::Comma
))
3564 // In MS inline asm curly braces mark the beginning/end of a block,
3565 // therefore they should be interepreted as end of statement
3566 CurlyAsEndOfStatement
=
3567 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3568 (getLexer().is(AsmToken::LCurly
) || getLexer().is(AsmToken::RCurly
));
3569 if (getLexer().isNot(AsmToken::EndOfStatement
) && !CurlyAsEndOfStatement
)
3570 return TokError("unexpected token in argument list");
3573 // Push the immediate if we extracted one from the mnemonic.
3574 if (ComparisonPredicate
!= ~0U && isParsingIntelSyntax()) {
3575 const MCExpr
*ImmOp
= MCConstantExpr::create(ComparisonPredicate
,
3576 getParser().getContext());
3577 Operands
.push_back(X86Operand::CreateImm(ImmOp
, NameLoc
, NameLoc
));
3580 // Consume the EndOfStatement or the prefix separator Slash
3581 if (getLexer().is(AsmToken::EndOfStatement
) ||
3582 (IsPrefix
&& getLexer().is(AsmToken::Slash
)))
3584 else if (CurlyAsEndOfStatement
)
3585 // Add an actual EndOfStatement before the curly brace
3586 Info
.AsmRewrites
->emplace_back(AOK_EndOfStatement
,
3587 getLexer().getTok().getLoc(), 0);
3589 // This is for gas compatibility and cannot be done in td.
3590 // Adding "p" for some floating point with no argument.
3591 // For example: fsub --> fsubp
3593 Name
== "fsub" || Name
== "fdiv" || Name
== "fsubr" || Name
== "fdivr";
3594 if (IsFp
&& Operands
.size() == 1) {
3595 const char *Repl
= StringSwitch
<const char *>(Name
)
3596 .Case("fsub", "fsubp")
3597 .Case("fdiv", "fdivp")
3598 .Case("fsubr", "fsubrp")
3599 .Case("fdivr", "fdivrp");
3600 static_cast<X86Operand
&>(*Operands
[0]).setTokenValue(Repl
);
3603 if ((Name
== "mov" || Name
== "movw" || Name
== "movl") &&
3604 (Operands
.size() == 3)) {
3605 X86Operand
&Op1
= (X86Operand
&)*Operands
[1];
3606 X86Operand
&Op2
= (X86Operand
&)*Operands
[2];
3607 SMLoc Loc
= Op1
.getEndLoc();
3608 // Moving a 32 or 16 bit value into a segment register has the same
3609 // behavior. Modify such instructions to always take shorter form.
3610 if (Op1
.isReg() && Op2
.isReg() &&
3611 X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(
3613 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(Op1
.getReg()) ||
3614 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(Op1
.getReg()))) {
3615 // Change instruction name to match new instruction.
3616 if (Name
!= "mov" && Name
[3] == (is16BitMode() ? 'l' : 'w')) {
3617 Name
= is16BitMode() ? "movw" : "movl";
3618 Operands
[0] = X86Operand::CreateToken(Name
, NameLoc
);
3620 // Select the correct equivalent 16-/32-bit source register.
3622 getX86SubSuperRegister(Op1
.getReg(), is16BitMode() ? 16 : 32);
3623 Operands
[1] = X86Operand::CreateReg(Reg
, Loc
, Loc
);
3627 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3628 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3629 // documented form in various unofficial manuals, so a lot of code uses it.
3630 if ((Name
== "outb" || Name
== "outsb" || Name
== "outw" || Name
== "outsw" ||
3631 Name
== "outl" || Name
== "outsl" || Name
== "out" || Name
== "outs") &&
3632 Operands
.size() == 3) {
3633 X86Operand
&Op
= (X86Operand
&)*Operands
.back();
3635 Operands
.back() = X86Operand::CreateReg(X86::DX
, Op
.getStartLoc(),
3638 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3639 if ((Name
== "inb" || Name
== "insb" || Name
== "inw" || Name
== "insw" ||
3640 Name
== "inl" || Name
== "insl" || Name
== "in" || Name
== "ins") &&
3641 Operands
.size() == 3) {
3642 X86Operand
&Op
= (X86Operand
&)*Operands
[1];
3644 Operands
[1] = X86Operand::CreateReg(X86::DX
, Op
.getStartLoc(),
3648 SmallVector
<std::unique_ptr
<MCParsedAsmOperand
>, 2> TmpOperands
;
3649 bool HadVerifyError
= false;
3651 // Append default arguments to "ins[bwld]"
3652 if (Name
.starts_with("ins") &&
3653 (Operands
.size() == 1 || Operands
.size() == 3) &&
3654 (Name
== "insb" || Name
== "insw" || Name
== "insl" || Name
== "insd" ||
3657 AddDefaultSrcDestOperands(TmpOperands
,
3658 X86Operand::CreateReg(X86::DX
, NameLoc
, NameLoc
),
3659 DefaultMemDIOperand(NameLoc
));
3660 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3663 // Append default arguments to "outs[bwld]"
3664 if (Name
.starts_with("outs") &&
3665 (Operands
.size() == 1 || Operands
.size() == 3) &&
3666 (Name
== "outsb" || Name
== "outsw" || Name
== "outsl" ||
3667 Name
== "outsd" || Name
== "outs")) {
3668 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemSIOperand(NameLoc
),
3669 X86Operand::CreateReg(X86::DX
, NameLoc
, NameLoc
));
3670 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3673 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3674 // values of $SIREG according to the mode. It would be nice if this
3675 // could be achieved with InstAlias in the tables.
3676 if (Name
.starts_with("lods") &&
3677 (Operands
.size() == 1 || Operands
.size() == 2) &&
3678 (Name
== "lods" || Name
== "lodsb" || Name
== "lodsw" ||
3679 Name
== "lodsl" || Name
== "lodsd" || Name
== "lodsq")) {
3680 TmpOperands
.push_back(DefaultMemSIOperand(NameLoc
));
3681 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3684 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3685 // values of $DIREG according to the mode. It would be nice if this
3686 // could be achieved with InstAlias in the tables.
3687 if (Name
.starts_with("stos") &&
3688 (Operands
.size() == 1 || Operands
.size() == 2) &&
3689 (Name
== "stos" || Name
== "stosb" || Name
== "stosw" ||
3690 Name
== "stosl" || Name
== "stosd" || Name
== "stosq")) {
3691 TmpOperands
.push_back(DefaultMemDIOperand(NameLoc
));
3692 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3695 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3696 // values of $DIREG according to the mode. It would be nice if this
3697 // could be achieved with InstAlias in the tables.
3698 if (Name
.starts_with("scas") &&
3699 (Operands
.size() == 1 || Operands
.size() == 2) &&
3700 (Name
== "scas" || Name
== "scasb" || Name
== "scasw" ||
3701 Name
== "scasl" || Name
== "scasd" || Name
== "scasq")) {
3702 TmpOperands
.push_back(DefaultMemDIOperand(NameLoc
));
3703 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3706 // Add default SI and DI operands to "cmps[bwlq]".
3707 if (Name
.starts_with("cmps") &&
3708 (Operands
.size() == 1 || Operands
.size() == 3) &&
3709 (Name
== "cmps" || Name
== "cmpsb" || Name
== "cmpsw" ||
3710 Name
== "cmpsl" || Name
== "cmpsd" || Name
== "cmpsq")) {
3711 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemDIOperand(NameLoc
),
3712 DefaultMemSIOperand(NameLoc
));
3713 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3716 // Add default SI and DI operands to "movs[bwlq]".
3717 if (((Name
.starts_with("movs") &&
3718 (Name
== "movs" || Name
== "movsb" || Name
== "movsw" ||
3719 Name
== "movsl" || Name
== "movsd" || Name
== "movsq")) ||
3720 (Name
.starts_with("smov") &&
3721 (Name
== "smov" || Name
== "smovb" || Name
== "smovw" ||
3722 Name
== "smovl" || Name
== "smovd" || Name
== "smovq"))) &&
3723 (Operands
.size() == 1 || Operands
.size() == 3)) {
3724 if (Name
== "movsd" && Operands
.size() == 1 && !isParsingIntelSyntax())
3725 Operands
.back() = X86Operand::CreateToken("movsl", NameLoc
);
3726 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemSIOperand(NameLoc
),
3727 DefaultMemDIOperand(NameLoc
));
3728 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3731 // Check if we encountered an error for one the string insturctions
3732 if (HadVerifyError
) {
3733 return HadVerifyError
;
3736 // Transforms "xlat mem8" into "xlatb"
3737 if ((Name
== "xlat" || Name
== "xlatb") && Operands
.size() == 2) {
3738 X86Operand
&Op1
= static_cast<X86Operand
&>(*Operands
[1]);
3740 Warning(Op1
.getStartLoc(), "memory operand is only for determining the "
3741 "size, (R|E)BX will be used for the location");
3742 Operands
.pop_back();
3743 static_cast<X86Operand
&>(*Operands
[0]).setTokenValue("xlatb");
3748 Operands
.push_back(X86Operand::CreatePrefix(Flags
, NameLoc
, NameLoc
));
3752 static bool convertSSEToAVX(MCInst
&Inst
) {
3753 ArrayRef
<X86TableEntry
> Table
{X86SSE2AVXTable
};
3754 unsigned Opcode
= Inst
.getOpcode();
3755 const auto I
= llvm::lower_bound(Table
, Opcode
);
3756 if (I
== Table
.end() || I
->OldOpc
!= Opcode
)
3759 Inst
.setOpcode(I
->NewOpc
);
3760 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3761 // operand compare to SSE variant, which is added below
3762 if (X86::isBLENDVPD(Opcode
) || X86::isBLENDVPS(Opcode
) ||
3763 X86::isPBLENDVB(Opcode
))
3764 Inst
.addOperand(Inst
.getOperand(2));
3769 bool X86AsmParser::processInstruction(MCInst
&Inst
, const OperandVector
&Ops
) {
3770 if (MCOptions
.X86Sse2Avx
&& convertSSEToAVX(Inst
))
3773 if (ForcedOpcodePrefix
!= OpcodePrefix_VEX3
&&
3774 X86::optimizeInstFromVEX3ToVEX2(Inst
, MII
.get(Inst
.getOpcode())))
3777 if (X86::optimizeShiftRotateWithImmediateOne(Inst
))
3780 switch (Inst
.getOpcode()) {
3781 default: return false;
3783 // {disp32} forces a larger displacement as if the instruction was relaxed.
3784 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3785 // This matches GNU assembler.
3786 if (ForcedDispEncoding
== DispEncoding_Disp32
) {
3787 Inst
.setOpcode(is16BitMode() ? X86::JMP_2
: X86::JMP_4
);
3793 // {disp32} forces a larger displacement as if the instruction was relaxed.
3794 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3795 // This matches GNU assembler.
3796 if (ForcedDispEncoding
== DispEncoding_Disp32
) {
3797 Inst
.setOpcode(is16BitMode() ? X86::JCC_2
: X86::JCC_4
);
3803 // Transforms "int $3" into "int3" as a size optimization.
3804 // We can't write this as an InstAlias.
3805 if (!Inst
.getOperand(0).isImm() || Inst
.getOperand(0).getImm() != 3)
3808 Inst
.setOpcode(X86::INT3
);
3814 bool X86AsmParser::validateInstruction(MCInst
&Inst
, const OperandVector
&Ops
) {
3815 using namespace X86
;
3816 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
3817 unsigned Opcode
= Inst
.getOpcode();
3818 uint64_t TSFlags
= MII
.get(Opcode
).TSFlags
;
3819 if (isVFCMADDCPH(Opcode
) || isVFCMADDCSH(Opcode
) || isVFMADDCPH(Opcode
) ||
3820 isVFMADDCSH(Opcode
)) {
3821 unsigned Dest
= Inst
.getOperand(0).getReg();
3822 for (unsigned i
= 2; i
< Inst
.getNumOperands(); i
++)
3823 if (Inst
.getOperand(i
).isReg() && Dest
== Inst
.getOperand(i
).getReg())
3824 return Warning(Ops
[0]->getStartLoc(), "Destination register should be "
3825 "distinct from source registers");
3826 } else if (isVFCMULCPH(Opcode
) || isVFCMULCSH(Opcode
) || isVFMULCPH(Opcode
) ||
3827 isVFMULCSH(Opcode
)) {
3828 unsigned Dest
= Inst
.getOperand(0).getReg();
3829 // The mask variants have different operand list. Scan from the third
3830 // operand to avoid emitting incorrect warning.
3831 // VFMULCPHZrr Dest, Src1, Src2
3832 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3833 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3834 for (unsigned i
= ((TSFlags
& X86II::EVEX_K
) ? 2 : 1);
3835 i
< Inst
.getNumOperands(); i
++)
3836 if (Inst
.getOperand(i
).isReg() && Dest
== Inst
.getOperand(i
).getReg())
3837 return Warning(Ops
[0]->getStartLoc(), "Destination register should be "
3838 "distinct from source registers");
3839 } else if (isV4FMADDPS(Opcode
) || isV4FMADDSS(Opcode
) ||
3840 isV4FNMADDPS(Opcode
) || isV4FNMADDSS(Opcode
) ||
3841 isVP4DPWSSDS(Opcode
) || isVP4DPWSSD(Opcode
)) {
3842 unsigned Src2
= Inst
.getOperand(Inst
.getNumOperands() -
3843 X86::AddrNumOperands
- 1).getReg();
3844 unsigned Src2Enc
= MRI
->getEncodingValue(Src2
);
3845 if (Src2Enc
% 4 != 0) {
3846 StringRef RegName
= X86IntelInstPrinter::getRegisterName(Src2
);
3847 unsigned GroupStart
= (Src2Enc
/ 4) * 4;
3848 unsigned GroupEnd
= GroupStart
+ 3;
3849 return Warning(Ops
[0]->getStartLoc(),
3850 "source register '" + RegName
+ "' implicitly denotes '" +
3851 RegName
.take_front(3) + Twine(GroupStart
) + "' to '" +
3852 RegName
.take_front(3) + Twine(GroupEnd
) +
3855 } else if (isVGATHERDPD(Opcode
) || isVGATHERDPS(Opcode
) ||
3856 isVGATHERQPD(Opcode
) || isVGATHERQPS(Opcode
) ||
3857 isVPGATHERDD(Opcode
) || isVPGATHERDQ(Opcode
) ||
3858 isVPGATHERQD(Opcode
) || isVPGATHERQQ(Opcode
)) {
3859 bool HasEVEX
= (TSFlags
& X86II::EncodingMask
) == X86II::EVEX
;
3861 unsigned Dest
= MRI
->getEncodingValue(Inst
.getOperand(0).getReg());
3862 unsigned Index
= MRI
->getEncodingValue(
3863 Inst
.getOperand(4 + X86::AddrIndexReg
).getReg());
3865 return Warning(Ops
[0]->getStartLoc(), "index and destination registers "
3866 "should be distinct");
3868 unsigned Dest
= MRI
->getEncodingValue(Inst
.getOperand(0).getReg());
3869 unsigned Mask
= MRI
->getEncodingValue(Inst
.getOperand(1).getReg());
3870 unsigned Index
= MRI
->getEncodingValue(
3871 Inst
.getOperand(3 + X86::AddrIndexReg
).getReg());
3872 if (Dest
== Mask
|| Dest
== Index
|| Mask
== Index
)
3873 return Warning(Ops
[0]->getStartLoc(), "mask, index, and destination "
3874 "registers should be distinct");
3876 } else if (isTCMMIMFP16PS(Opcode
) || isTCMMRLFP16PS(Opcode
) ||
3877 isTDPBF16PS(Opcode
) || isTDPFP16PS(Opcode
) || isTDPBSSD(Opcode
) ||
3878 isTDPBSUD(Opcode
) || isTDPBUSD(Opcode
) || isTDPBUUD(Opcode
)) {
3879 unsigned SrcDest
= Inst
.getOperand(0).getReg();
3880 unsigned Src1
= Inst
.getOperand(2).getReg();
3881 unsigned Src2
= Inst
.getOperand(3).getReg();
3882 if (SrcDest
== Src1
|| SrcDest
== Src2
|| Src1
== Src2
)
3883 return Error(Ops
[0]->getStartLoc(), "all tmm registers must be distinct");
3886 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3887 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3888 if ((TSFlags
& X86II::EncodingMask
) == 0) {
3889 MCPhysReg HReg
= X86::NoRegister
;
3890 bool UsesRex
= TSFlags
& X86II::REX_W
;
3891 unsigned NumOps
= Inst
.getNumOperands();
3892 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
3893 const MCOperand
&MO
= Inst
.getOperand(i
);
3896 unsigned Reg
= MO
.getReg();
3897 if (Reg
== X86::AH
|| Reg
== X86::BH
|| Reg
== X86::CH
|| Reg
== X86::DH
)
3899 if (X86II::isX86_64NonExtLowByteReg(Reg
) ||
3900 X86II::isX86_64ExtendedReg(Reg
))
3904 if (UsesRex
&& HReg
!= X86::NoRegister
) {
3905 StringRef RegName
= X86IntelInstPrinter::getRegisterName(HReg
);
3906 return Error(Ops
[0]->getStartLoc(),
3907 "can't encode '" + RegName
+ "' in an instruction requiring "
3912 if ((Opcode
== X86::PREFETCHIT0
|| Opcode
== X86::PREFETCHIT1
)) {
3913 const MCOperand
&MO
= Inst
.getOperand(X86::AddrBaseReg
);
3914 if (!MO
.isReg() || MO
.getReg() != X86::RIP
)
3916 Ops
[0]->getStartLoc(),
3917 Twine((Inst
.getOpcode() == X86::PREFETCHIT0
? "'prefetchit0'"
3918 : "'prefetchit1'")) +
3919 " only supports RIP-relative address");
3924 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc
) {
3925 Warning(Loc
, "Instruction may be vulnerable to LVI and "
3926 "requires manual mitigation");
3927 Note(SMLoc(), "See https://software.intel.com/"
3928 "security-software-guidance/insights/"
3929 "deep-dive-load-value-injection#specialinstructions"
3930 " for more information");
3933 /// RET instructions and also instructions that indirect calls/jumps from memory
3934 /// combine a load and a branch within a single instruction. To mitigate these
3935 /// instructions against LVI, they must be decomposed into separate load and
3936 /// branch instructions, with an LFENCE in between. For more details, see:
3937 /// - X86LoadValueInjectionRetHardening.cpp
3938 /// - X86LoadValueInjectionIndirectThunks.cpp
3939 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3941 /// Returns `true` if a mitigation was applied or warning was emitted.
3942 void X86AsmParser::applyLVICFIMitigation(MCInst
&Inst
, MCStreamer
&Out
) {
3943 // Information on control-flow instructions that require manual mitigation can
3945 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3946 switch (Inst
.getOpcode()) {
3953 MCInst ShlInst
, FenceInst
;
3954 bool Parse32
= is32BitMode() || Code16GCC
;
3956 is64BitMode() ? X86::RSP
: (Parse32
? X86::ESP
: X86::SP
);
3957 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
3958 auto ShlMemOp
= X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
3959 /*BaseReg=*/Basereg
, /*IndexReg=*/0,
3960 /*Scale=*/1, SMLoc
{}, SMLoc
{}, 0);
3961 ShlInst
.setOpcode(X86::SHL64mi
);
3962 ShlMemOp
->addMemOperands(ShlInst
, 5);
3963 ShlInst
.addOperand(MCOperand::createImm(0));
3964 FenceInst
.setOpcode(X86::LFENCE
);
3965 Out
.emitInstruction(ShlInst
, getSTI());
3966 Out
.emitInstruction(FenceInst
, getSTI());
3975 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
3980 /// To mitigate LVI, every instruction that performs a load can be followed by
3981 /// an LFENCE instruction to squash any potential mis-speculation. There are
3982 /// some instructions that require additional considerations, and may requre
3983 /// manual mitigation. For more details, see:
3984 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3986 /// Returns `true` if a mitigation was applied or warning was emitted.
3987 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst
&Inst
,
3989 auto Opcode
= Inst
.getOpcode();
3990 auto Flags
= Inst
.getFlags();
3991 if ((Flags
& X86::IP_HAS_REPEAT
) || (Flags
& X86::IP_HAS_REPEAT_NE
)) {
3992 // Information on REP string instructions that require manual mitigation can
3994 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4004 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
4007 } else if (Opcode
== X86::REP_PREFIX
|| Opcode
== X86::REPNE_PREFIX
) {
4008 // If a REP instruction is found on its own line, it may or may not be
4009 // followed by a vulnerable instruction. Emit a warning just in case.
4010 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
4014 const MCInstrDesc
&MCID
= MII
.get(Inst
.getOpcode());
4016 // Can't mitigate after terminators or calls. A control flow change may have
4017 // already occurred.
4018 if (MCID
.isTerminator() || MCID
.isCall())
4021 // LFENCE has the mayLoad property, don't double fence.
4022 if (MCID
.mayLoad() && Inst
.getOpcode() != X86::LFENCE
) {
4024 FenceInst
.setOpcode(X86::LFENCE
);
4025 Out
.emitInstruction(FenceInst
, getSTI());
4029 void X86AsmParser::emitInstruction(MCInst
&Inst
, OperandVector
&Operands
,
4031 if (LVIInlineAsmHardening
&&
4032 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity
))
4033 applyLVICFIMitigation(Inst
, Out
);
4035 Out
.emitInstruction(Inst
, getSTI());
4037 if (LVIInlineAsmHardening
&&
4038 getSTI().hasFeature(X86::FeatureLVILoadHardening
))
4039 applyLVILoadHardeningMitigation(Inst
, Out
);
4042 static unsigned getPrefixes(OperandVector
&Operands
) {
4043 unsigned Result
= 0;
4044 X86Operand
&Prefix
= static_cast<X86Operand
&>(*Operands
.back());
4045 if (Prefix
.isPrefix()) {
4046 Result
= Prefix
.getPrefix();
4047 Operands
.pop_back();
4052 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4053 OperandVector
&Operands
,
4054 MCStreamer
&Out
, uint64_t &ErrorInfo
,
4055 bool MatchingInlineAsm
) {
4056 assert(!Operands
.empty() && "Unexpect empty operand list!");
4057 assert((*Operands
[0]).isToken() && "Leading operand should always be a mnemonic!");
4059 // First, handle aliases that expand to multiple instructions.
4060 MatchFPUWaitAlias(IDLoc
, static_cast<X86Operand
&>(*Operands
[0]), Operands
,
4061 Out
, MatchingInlineAsm
);
4062 unsigned Prefixes
= getPrefixes(Operands
);
4066 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4067 // the encoder and printer.
4068 if (ForcedOpcodePrefix
== OpcodePrefix_REX
)
4069 Prefixes
|= X86::IP_USE_REX
;
4070 else if (ForcedOpcodePrefix
== OpcodePrefix_REX2
)
4071 Prefixes
|= X86::IP_USE_REX2
;
4072 else if (ForcedOpcodePrefix
== OpcodePrefix_VEX
)
4073 Prefixes
|= X86::IP_USE_VEX
;
4074 else if (ForcedOpcodePrefix
== OpcodePrefix_VEX2
)
4075 Prefixes
|= X86::IP_USE_VEX2
;
4076 else if (ForcedOpcodePrefix
== OpcodePrefix_VEX3
)
4077 Prefixes
|= X86::IP_USE_VEX3
;
4078 else if (ForcedOpcodePrefix
== OpcodePrefix_EVEX
)
4079 Prefixes
|= X86::IP_USE_EVEX
;
4081 // Set encoded flags for {disp8} and {disp32}.
4082 if (ForcedDispEncoding
== DispEncoding_Disp8
)
4083 Prefixes
|= X86::IP_USE_DISP8
;
4084 else if (ForcedDispEncoding
== DispEncoding_Disp32
)
4085 Prefixes
|= X86::IP_USE_DISP32
;
4088 Inst
.setFlags(Prefixes
);
4090 return isParsingIntelSyntax()
4091 ? matchAndEmitIntelInstruction(IDLoc
, Opcode
, Inst
, Operands
, Out
,
4092 ErrorInfo
, MatchingInlineAsm
)
4093 : matchAndEmitATTInstruction(IDLoc
, Opcode
, Inst
, Operands
, Out
,
4094 ErrorInfo
, MatchingInlineAsm
);
4097 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc
, X86Operand
&Op
,
4098 OperandVector
&Operands
, MCStreamer
&Out
,
4099 bool MatchingInlineAsm
) {
4100 // FIXME: This should be replaced with a real .td file alias mechanism.
4101 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4103 const char *Repl
= StringSwitch
<const char *>(Op
.getToken())
4104 .Case("finit", "fninit")
4105 .Case("fsave", "fnsave")
4106 .Case("fstcw", "fnstcw")
4107 .Case("fstcww", "fnstcw")
4108 .Case("fstenv", "fnstenv")
4109 .Case("fstsw", "fnstsw")
4110 .Case("fstsww", "fnstsw")
4111 .Case("fclex", "fnclex")
4115 Inst
.setOpcode(X86::WAIT
);
4117 if (!MatchingInlineAsm
)
4118 emitInstruction(Inst
, Operands
, Out
);
4119 Operands
[0] = X86Operand::CreateToken(Repl
, IDLoc
);
4123 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc
,
4124 const FeatureBitset
&MissingFeatures
,
4125 bool MatchingInlineAsm
) {
4126 assert(MissingFeatures
.any() && "Unknown missing feature!");
4127 SmallString
<126> Msg
;
4128 raw_svector_ostream
OS(Msg
);
4129 OS
<< "instruction requires:";
4130 for (unsigned i
= 0, e
= MissingFeatures
.size(); i
!= e
; ++i
) {
4131 if (MissingFeatures
[i
])
4132 OS
<< ' ' << getSubtargetFeatureName(i
);
4134 return Error(IDLoc
, OS
.str(), SMRange(), MatchingInlineAsm
);
4137 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
4138 unsigned Opc
= Inst
.getOpcode();
4139 const MCInstrDesc
&MCID
= MII
.get(Opc
);
4140 uint64_t TSFlags
= MCID
.TSFlags
;
4142 if (UseApxExtendedReg
&& !X86II::canUseApxExtendedReg(MCID
))
4143 return Match_Unsupported
;
4144 if (ForcedNoFlag
== !(TSFlags
& X86II::EVEX_NF
) && !X86::isCFCMOVCC(Opc
))
4145 return Match_Unsupported
;
4147 switch (ForcedOpcodePrefix
) {
4148 case OpcodePrefix_Default
:
4150 case OpcodePrefix_REX
:
4151 case OpcodePrefix_REX2
:
4152 if (TSFlags
& X86II::EncodingMask
)
4153 return Match_Unsupported
;
4155 case OpcodePrefix_VEX
:
4156 case OpcodePrefix_VEX2
:
4157 case OpcodePrefix_VEX3
:
4158 if ((TSFlags
& X86II::EncodingMask
) != X86II::VEX
)
4159 return Match_Unsupported
;
4161 case OpcodePrefix_EVEX
:
4162 if ((TSFlags
& X86II::EncodingMask
) != X86II::EVEX
)
4163 return Match_Unsupported
;
4167 if ((TSFlags
& X86II::ExplicitOpPrefixMask
) == X86II::ExplicitVEXPrefix
&&
4168 (ForcedOpcodePrefix
!= OpcodePrefix_VEX
&&
4169 ForcedOpcodePrefix
!= OpcodePrefix_VEX2
&&
4170 ForcedOpcodePrefix
!= OpcodePrefix_VEX3
))
4171 return Match_Unsupported
;
4173 return Match_Success
;
4176 bool X86AsmParser::matchAndEmitATTInstruction(
4177 SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
, OperandVector
&Operands
,
4178 MCStreamer
&Out
, uint64_t &ErrorInfo
, bool MatchingInlineAsm
) {
4179 X86Operand
&Op
= static_cast<X86Operand
&>(*Operands
[0]);
4180 SMRange EmptyRange
= std::nullopt
;
4181 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4182 // when matching the instruction.
4183 if (ForcedDataPrefix
== X86::Is32Bit
)
4184 SwitchMode(X86::Is32Bit
);
4185 // First, try a direct match.
4186 FeatureBitset MissingFeatures
;
4187 unsigned OriginalError
= MatchInstruction(Operands
, Inst
, ErrorInfo
,
4188 MissingFeatures
, MatchingInlineAsm
,
4189 isParsingIntelSyntax());
4190 if (ForcedDataPrefix
== X86::Is32Bit
) {
4191 SwitchMode(X86::Is16Bit
);
4192 ForcedDataPrefix
= 0;
4194 switch (OriginalError
) {
4195 default: llvm_unreachable("Unexpected match result!");
4197 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4199 // Some instructions need post-processing to, for example, tweak which
4200 // encoding is selected. Loop on it while changes happen so the
4201 // individual transformations can chain off each other.
4202 if (!MatchingInlineAsm
)
4203 while (processInstruction(Inst
, Operands
))
4207 if (!MatchingInlineAsm
)
4208 emitInstruction(Inst
, Operands
, Out
);
4209 Opcode
= Inst
.getOpcode();
4211 case Match_InvalidImmUnsignedi4
: {
4212 SMLoc ErrorLoc
= ((X86Operand
&)*Operands
[ErrorInfo
]).getStartLoc();
4213 if (ErrorLoc
== SMLoc())
4215 return Error(ErrorLoc
, "immediate must be an integer in range [0, 15]",
4216 EmptyRange
, MatchingInlineAsm
);
4218 case Match_MissingFeature
:
4219 return ErrorMissingFeature(IDLoc
, MissingFeatures
, MatchingInlineAsm
);
4220 case Match_InvalidOperand
:
4221 case Match_MnemonicFail
:
4222 case Match_Unsupported
:
4225 if (Op
.getToken().empty()) {
4226 Error(IDLoc
, "instruction must have size higher than 0", EmptyRange
,
4231 // FIXME: Ideally, we would only attempt suffix matches for things which are
4232 // valid prefixes, and we could just infer the right unambiguous
4233 // type. However, that requires substantially more matcher support than the
4236 // Change the operand to point to a temporary token.
4237 StringRef Base
= Op
.getToken();
4238 SmallString
<16> Tmp
;
4241 Op
.setTokenValue(Tmp
);
4243 // If this instruction starts with an 'f', then it is a floating point stack
4244 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4245 // 80-bit floating point, which use the suffixes s,l,t respectively.
4247 // Otherwise, we assume that this may be an integer instruction, which comes
4248 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4249 const char *Suffixes
= Base
[0] != 'f' ? "bwlq" : "slt\0";
4250 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4251 const char *MemSize
= Base
[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4253 // Check for the various suffix matches.
4254 uint64_t ErrorInfoIgnore
;
4255 FeatureBitset ErrorInfoMissingFeatures
; // Init suppresses compiler warnings.
4258 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4259 // So we should make sure the suffix matcher only works for memory variant
4260 // that has the same size with the suffix.
4261 // FIXME: This flag is a workaround for legacy instructions that didn't
4262 // declare non suffix variant assembly.
4263 bool HasVectorReg
= false;
4264 X86Operand
*MemOp
= nullptr;
4265 for (const auto &Op
: Operands
) {
4266 X86Operand
*X86Op
= static_cast<X86Operand
*>(Op
.get());
4267 if (X86Op
->isVectorReg())
4268 HasVectorReg
= true;
4269 else if (X86Op
->isMem()) {
4271 assert(MemOp
->Mem
.Size
== 0 && "Memory size always 0 under ATT syntax");
4272 // Have we found an unqualified memory operand,
4273 // break. IA allows only one memory operand.
4278 for (unsigned I
= 0, E
= std::size(Match
); I
!= E
; ++I
) {
4279 Tmp
.back() = Suffixes
[I
];
4280 if (MemOp
&& HasVectorReg
)
4281 MemOp
->Mem
.Size
= MemSize
[I
];
4282 Match
[I
] = Match_MnemonicFail
;
4283 if (MemOp
|| !HasVectorReg
) {
4285 MatchInstruction(Operands
, Inst
, ErrorInfoIgnore
, MissingFeatures
,
4286 MatchingInlineAsm
, isParsingIntelSyntax());
4287 // If this returned as a missing feature failure, remember that.
4288 if (Match
[I
] == Match_MissingFeature
)
4289 ErrorInfoMissingFeatures
= MissingFeatures
;
4293 // Restore the old token.
4294 Op
.setTokenValue(Base
);
4296 // If exactly one matched, then we treat that as a successful match (and the
4297 // instruction will already have been filled in correctly, since the failing
4298 // matches won't have modified it).
4299 unsigned NumSuccessfulMatches
= llvm::count(Match
, Match_Success
);
4300 if (NumSuccessfulMatches
== 1) {
4301 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4303 // Some instructions need post-processing to, for example, tweak which
4304 // encoding is selected. Loop on it while changes happen so the
4305 // individual transformations can chain off each other.
4306 if (!MatchingInlineAsm
)
4307 while (processInstruction(Inst
, Operands
))
4311 if (!MatchingInlineAsm
)
4312 emitInstruction(Inst
, Operands
, Out
);
4313 Opcode
= Inst
.getOpcode();
4317 // Otherwise, the match failed, try to produce a decent error message.
4319 // If we had multiple suffix matches, then identify this as an ambiguous
4321 if (NumSuccessfulMatches
> 1) {
4323 unsigned NumMatches
= 0;
4324 for (unsigned I
= 0, E
= std::size(Match
); I
!= E
; ++I
)
4325 if (Match
[I
] == Match_Success
)
4326 MatchChars
[NumMatches
++] = Suffixes
[I
];
4328 SmallString
<126> Msg
;
4329 raw_svector_ostream
OS(Msg
);
4330 OS
<< "ambiguous instructions require an explicit suffix (could be ";
4331 for (unsigned i
= 0; i
!= NumMatches
; ++i
) {
4334 if (i
+ 1 == NumMatches
)
4336 OS
<< "'" << Base
<< MatchChars
[i
] << "'";
4339 Error(IDLoc
, OS
.str(), EmptyRange
, MatchingInlineAsm
);
4343 // Okay, we know that none of the variants matched successfully.
4345 // If all of the instructions reported an invalid mnemonic, then the original
4346 // mnemonic was invalid.
4347 if (llvm::count(Match
, Match_MnemonicFail
) == 4) {
4348 if (OriginalError
== Match_MnemonicFail
)
4349 return Error(IDLoc
, "invalid instruction mnemonic '" + Base
+ "'",
4350 Op
.getLocRange(), MatchingInlineAsm
);
4352 if (OriginalError
== Match_Unsupported
)
4353 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4356 assert(OriginalError
== Match_InvalidOperand
&& "Unexpected error");
4357 // Recover location info for the operand if we know which was the problem.
4358 if (ErrorInfo
!= ~0ULL) {
4359 if (ErrorInfo
>= Operands
.size())
4360 return Error(IDLoc
, "too few operands for instruction", EmptyRange
,
4363 X86Operand
&Operand
= (X86Operand
&)*Operands
[ErrorInfo
];
4364 if (Operand
.getStartLoc().isValid()) {
4365 SMRange OperandRange
= Operand
.getLocRange();
4366 return Error(Operand
.getStartLoc(), "invalid operand for instruction",
4367 OperandRange
, MatchingInlineAsm
);
4371 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4375 // If one instruction matched as unsupported, report this as unsupported.
4376 if (llvm::count(Match
, Match_Unsupported
) == 1) {
4377 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4381 // If one instruction matched with a missing feature, report this as a
4383 if (llvm::count(Match
, Match_MissingFeature
) == 1) {
4384 ErrorInfo
= Match_MissingFeature
;
4385 return ErrorMissingFeature(IDLoc
, ErrorInfoMissingFeatures
,
4389 // If one instruction matched with an invalid operand, report this as an
4391 if (llvm::count(Match
, Match_InvalidOperand
) == 1) {
4392 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4396 // If all of these were an outright failure, report it in a useless way.
4397 Error(IDLoc
, "unknown use of instruction mnemonic without a size suffix",
4398 EmptyRange
, MatchingInlineAsm
);
4402 bool X86AsmParser::matchAndEmitIntelInstruction(
4403 SMLoc IDLoc
, unsigned &Opcode
, MCInst
&Inst
, OperandVector
&Operands
,
4404 MCStreamer
&Out
, uint64_t &ErrorInfo
, bool MatchingInlineAsm
) {
4405 X86Operand
&Op
= static_cast<X86Operand
&>(*Operands
[0]);
4406 SMRange EmptyRange
= std::nullopt
;
4407 // Find one unsized memory operand, if present.
4408 X86Operand
*UnsizedMemOp
= nullptr;
4409 for (const auto &Op
: Operands
) {
4410 X86Operand
*X86Op
= static_cast<X86Operand
*>(Op
.get());
4411 if (X86Op
->isMemUnsized()) {
4412 UnsizedMemOp
= X86Op
;
4413 // Have we found an unqualified memory operand,
4414 // break. IA allows only one memory operand.
4419 // Allow some instructions to have implicitly pointer-sized operands. This is
4420 // compatible with gas.
4421 StringRef Mnemonic
= (static_cast<X86Operand
&>(*Operands
[0])).getToken();
4423 static const char *const PtrSizedInstrs
[] = {"call", "jmp", "push"};
4424 for (const char *Instr
: PtrSizedInstrs
) {
4425 if (Mnemonic
== Instr
) {
4426 UnsizedMemOp
->Mem
.Size
= getPointerWidth();
4432 SmallVector
<unsigned, 8> Match
;
4433 FeatureBitset ErrorInfoMissingFeatures
;
4434 FeatureBitset MissingFeatures
;
4435 StringRef Base
= (static_cast<X86Operand
&>(*Operands
[0])).getToken();
4437 // If unsized push has immediate operand we should default the default pointer
4438 // size for the size.
4439 if (Mnemonic
== "push" && Operands
.size() == 2) {
4440 auto *X86Op
= static_cast<X86Operand
*>(Operands
[1].get());
4441 if (X86Op
->isImm()) {
4442 // If it's not a constant fall through and let remainder take care of it.
4443 const auto *CE
= dyn_cast
<MCConstantExpr
>(X86Op
->getImm());
4444 unsigned Size
= getPointerWidth();
4446 (isIntN(Size
, CE
->getValue()) || isUIntN(Size
, CE
->getValue()))) {
4447 SmallString
<16> Tmp
;
4449 Tmp
+= (is64BitMode())
4451 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4452 Op
.setTokenValue(Tmp
);
4453 // Do match in ATT mode to allow explicit suffix usage.
4454 Match
.push_back(MatchInstruction(Operands
, Inst
, ErrorInfo
,
4455 MissingFeatures
, MatchingInlineAsm
,
4456 false /*isParsingIntelSyntax()*/));
4457 Op
.setTokenValue(Base
);
4462 // If an unsized memory operand is present, try to match with each memory
4463 // operand size. In Intel assembly, the size is not part of the instruction
4465 if (UnsizedMemOp
&& UnsizedMemOp
->isMemUnsized()) {
4466 static const unsigned MopSizes
[] = {8, 16, 32, 64, 80, 128, 256, 512};
4467 for (unsigned Size
: MopSizes
) {
4468 UnsizedMemOp
->Mem
.Size
= Size
;
4469 uint64_t ErrorInfoIgnore
;
4470 unsigned LastOpcode
= Inst
.getOpcode();
4471 unsigned M
= MatchInstruction(Operands
, Inst
, ErrorInfoIgnore
,
4472 MissingFeatures
, MatchingInlineAsm
,
4473 isParsingIntelSyntax());
4474 if (Match
.empty() || LastOpcode
!= Inst
.getOpcode())
4477 // If this returned as a missing feature failure, remember that.
4478 if (Match
.back() == Match_MissingFeature
)
4479 ErrorInfoMissingFeatures
= MissingFeatures
;
4482 // Restore the size of the unsized memory operand if we modified it.
4483 UnsizedMemOp
->Mem
.Size
= 0;
4486 // If we haven't matched anything yet, this is not a basic integer or FPU
4487 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4488 // matching with the unsized operand.
4489 if (Match
.empty()) {
4490 Match
.push_back(MatchInstruction(
4491 Operands
, Inst
, ErrorInfo
, MissingFeatures
, MatchingInlineAsm
,
4492 isParsingIntelSyntax()));
4493 // If this returned as a missing feature failure, remember that.
4494 if (Match
.back() == Match_MissingFeature
)
4495 ErrorInfoMissingFeatures
= MissingFeatures
;
4498 // Restore the size of the unsized memory operand if we modified it.
4500 UnsizedMemOp
->Mem
.Size
= 0;
4502 // If it's a bad mnemonic, all results will be the same.
4503 if (Match
.back() == Match_MnemonicFail
) {
4504 return Error(IDLoc
, "invalid instruction mnemonic '" + Mnemonic
+ "'",
4505 Op
.getLocRange(), MatchingInlineAsm
);
4508 unsigned NumSuccessfulMatches
= llvm::count(Match
, Match_Success
);
4510 // If matching was ambiguous and we had size information from the frontend,
4511 // try again with that. This handles cases like "movxz eax, m8/m16".
4512 if (UnsizedMemOp
&& NumSuccessfulMatches
> 1 &&
4513 UnsizedMemOp
->getMemFrontendSize()) {
4514 UnsizedMemOp
->Mem
.Size
= UnsizedMemOp
->getMemFrontendSize();
4515 unsigned M
= MatchInstruction(
4516 Operands
, Inst
, ErrorInfo
, MissingFeatures
, MatchingInlineAsm
,
4517 isParsingIntelSyntax());
4518 if (M
== Match_Success
)
4519 NumSuccessfulMatches
= 1;
4521 // Add a rewrite that encodes the size information we used from the
4523 InstInfo
->AsmRewrites
->emplace_back(
4524 AOK_SizeDirective
, UnsizedMemOp
->getStartLoc(),
4525 /*Len=*/0, UnsizedMemOp
->getMemFrontendSize());
4528 // If exactly one matched, then we treat that as a successful match (and the
4529 // instruction will already have been filled in correctly, since the failing
4530 // matches won't have modified it).
4531 if (NumSuccessfulMatches
== 1) {
4532 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4534 // Some instructions need post-processing to, for example, tweak which
4535 // encoding is selected. Loop on it while changes happen so the individual
4536 // transformations can chain off each other.
4537 if (!MatchingInlineAsm
)
4538 while (processInstruction(Inst
, Operands
))
4541 if (!MatchingInlineAsm
)
4542 emitInstruction(Inst
, Operands
, Out
);
4543 Opcode
= Inst
.getOpcode();
4545 } else if (NumSuccessfulMatches
> 1) {
4546 assert(UnsizedMemOp
&&
4547 "multiple matches only possible with unsized memory operands");
4548 return Error(UnsizedMemOp
->getStartLoc(),
4549 "ambiguous operand size for instruction '" + Mnemonic
+ "\'",
4550 UnsizedMemOp
->getLocRange());
4553 // If one instruction matched as unsupported, report this as unsupported.
4554 if (llvm::count(Match
, Match_Unsupported
) == 1) {
4555 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4559 // If one instruction matched with a missing feature, report this as a
4561 if (llvm::count(Match
, Match_MissingFeature
) == 1) {
4562 ErrorInfo
= Match_MissingFeature
;
4563 return ErrorMissingFeature(IDLoc
, ErrorInfoMissingFeatures
,
4567 // If one instruction matched with an invalid operand, report this as an
4569 if (llvm::count(Match
, Match_InvalidOperand
) == 1) {
4570 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4574 if (llvm::count(Match
, Match_InvalidImmUnsignedi4
) == 1) {
4575 SMLoc ErrorLoc
= ((X86Operand
&)*Operands
[ErrorInfo
]).getStartLoc();
4576 if (ErrorLoc
== SMLoc())
4578 return Error(ErrorLoc
, "immediate must be an integer in range [0, 15]",
4579 EmptyRange
, MatchingInlineAsm
);
4582 // If all of these were an outright failure, report it in a useless way.
4583 return Error(IDLoc
, "unknown instruction mnemonic", EmptyRange
,
4587 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo
) {
4588 return X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(RegNo
);
4591 bool X86AsmParser::ParseDirective(AsmToken DirectiveID
) {
4592 MCAsmParser
&Parser
= getParser();
4593 StringRef IDVal
= DirectiveID
.getIdentifier();
4594 if (IDVal
.starts_with(".arch"))
4595 return parseDirectiveArch();
4596 if (IDVal
.starts_with(".code"))
4597 return ParseDirectiveCode(IDVal
, DirectiveID
.getLoc());
4598 else if (IDVal
.starts_with(".att_syntax")) {
4599 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4600 if (Parser
.getTok().getString() == "prefix")
4602 else if (Parser
.getTok().getString() == "noprefix")
4603 return Error(DirectiveID
.getLoc(), "'.att_syntax noprefix' is not "
4604 "supported: registers must have a "
4605 "'%' prefix in .att_syntax");
4607 getParser().setAssemblerDialect(0);
4609 } else if (IDVal
.starts_with(".intel_syntax")) {
4610 getParser().setAssemblerDialect(1);
4611 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4612 if (Parser
.getTok().getString() == "noprefix")
4614 else if (Parser
.getTok().getString() == "prefix")
4615 return Error(DirectiveID
.getLoc(), "'.intel_syntax prefix' is not "
4616 "supported: registers must not have "
4617 "a '%' prefix in .intel_syntax");
4620 } else if (IDVal
== ".nops")
4621 return parseDirectiveNops(DirectiveID
.getLoc());
4622 else if (IDVal
== ".even")
4623 return parseDirectiveEven(DirectiveID
.getLoc());
4624 else if (IDVal
== ".cv_fpo_proc")
4625 return parseDirectiveFPOProc(DirectiveID
.getLoc());
4626 else if (IDVal
== ".cv_fpo_setframe")
4627 return parseDirectiveFPOSetFrame(DirectiveID
.getLoc());
4628 else if (IDVal
== ".cv_fpo_pushreg")
4629 return parseDirectiveFPOPushReg(DirectiveID
.getLoc());
4630 else if (IDVal
== ".cv_fpo_stackalloc")
4631 return parseDirectiveFPOStackAlloc(DirectiveID
.getLoc());
4632 else if (IDVal
== ".cv_fpo_stackalign")
4633 return parseDirectiveFPOStackAlign(DirectiveID
.getLoc());
4634 else if (IDVal
== ".cv_fpo_endprologue")
4635 return parseDirectiveFPOEndPrologue(DirectiveID
.getLoc());
4636 else if (IDVal
== ".cv_fpo_endproc")
4637 return parseDirectiveFPOEndProc(DirectiveID
.getLoc());
4638 else if (IDVal
== ".seh_pushreg" ||
4639 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".pushreg")))
4640 return parseDirectiveSEHPushReg(DirectiveID
.getLoc());
4641 else if (IDVal
== ".seh_setframe" ||
4642 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".setframe")))
4643 return parseDirectiveSEHSetFrame(DirectiveID
.getLoc());
4644 else if (IDVal
== ".seh_savereg" ||
4645 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".savereg")))
4646 return parseDirectiveSEHSaveReg(DirectiveID
.getLoc());
4647 else if (IDVal
== ".seh_savexmm" ||
4648 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".savexmm128")))
4649 return parseDirectiveSEHSaveXMM(DirectiveID
.getLoc());
4650 else if (IDVal
== ".seh_pushframe" ||
4651 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".pushframe")))
4652 return parseDirectiveSEHPushFrame(DirectiveID
.getLoc());
4657 bool X86AsmParser::parseDirectiveArch() {
4658 // Ignore .arch for now.
4659 getParser().parseStringToEndOfStatement();
4663 /// parseDirectiveNops
4664 /// ::= .nops size[, control]
4665 bool X86AsmParser::parseDirectiveNops(SMLoc L
) {
4666 int64_t NumBytes
= 0, Control
= 0;
4667 SMLoc NumBytesLoc
, ControlLoc
;
4668 const MCSubtargetInfo
& STI
= getSTI();
4669 NumBytesLoc
= getTok().getLoc();
4670 if (getParser().checkForValidSection() ||
4671 getParser().parseAbsoluteExpression(NumBytes
))
4674 if (parseOptionalToken(AsmToken::Comma
)) {
4675 ControlLoc
= getTok().getLoc();
4676 if (getParser().parseAbsoluteExpression(Control
))
4679 if (getParser().parseEOL())
4682 if (NumBytes
<= 0) {
4683 Error(NumBytesLoc
, "'.nops' directive with non-positive size");
4688 Error(ControlLoc
, "'.nops' directive with negative NOP size");
4693 getParser().getStreamer().emitNops(NumBytes
, Control
, L
, STI
);
4698 /// parseDirectiveEven
4700 bool X86AsmParser::parseDirectiveEven(SMLoc L
) {
4704 const MCSection
*Section
= getStreamer().getCurrentSectionOnly();
4706 getStreamer().initSections(false, getSTI());
4707 Section
= getStreamer().getCurrentSectionOnly();
4709 if (Section
->useCodeAlign())
4710 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4712 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4716 /// ParseDirectiveCode
4717 /// ::= .code16 | .code32 | .code64
4718 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal
, SMLoc L
) {
4719 MCAsmParser
&Parser
= getParser();
4721 if (IDVal
== ".code16") {
4723 if (!is16BitMode()) {
4724 SwitchMode(X86::Is16Bit
);
4725 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16
);
4727 } else if (IDVal
== ".code16gcc") {
4728 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4731 if (!is16BitMode()) {
4732 SwitchMode(X86::Is16Bit
);
4733 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16
);
4735 } else if (IDVal
== ".code32") {
4737 if (!is32BitMode()) {
4738 SwitchMode(X86::Is32Bit
);
4739 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32
);
4741 } else if (IDVal
== ".code64") {
4743 if (!is64BitMode()) {
4744 SwitchMode(X86::Is64Bit
);
4745 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64
);
4748 Error(L
, "unknown directive " + IDVal
);
4756 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L
) {
4757 MCAsmParser
&Parser
= getParser();
4760 if (Parser
.parseIdentifier(ProcName
))
4761 return Parser
.TokError("expected symbol name");
4762 if (Parser
.parseIntToken(ParamsSize
, "expected parameter byte count"))
4764 if (!isUIntN(32, ParamsSize
))
4765 return Parser
.TokError("parameters size out of range");
4768 MCSymbol
*ProcSym
= getContext().getOrCreateSymbol(ProcName
);
4769 return getTargetStreamer().emitFPOProc(ProcSym
, ParamsSize
, L
);
4772 // .cv_fpo_setframe ebp
4773 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L
) {
4776 if (parseRegister(Reg
, DummyLoc
, DummyLoc
) || parseEOL())
4778 return getTargetStreamer().emitFPOSetFrame(Reg
, L
);
4781 // .cv_fpo_pushreg ebx
4782 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L
) {
4785 if (parseRegister(Reg
, DummyLoc
, DummyLoc
) || parseEOL())
4787 return getTargetStreamer().emitFPOPushReg(Reg
, L
);
4790 // .cv_fpo_stackalloc 20
4791 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L
) {
4792 MCAsmParser
&Parser
= getParser();
4794 if (Parser
.parseIntToken(Offset
, "expected offset") || parseEOL())
4796 return getTargetStreamer().emitFPOStackAlloc(Offset
, L
);
4799 // .cv_fpo_stackalign 8
4800 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L
) {
4801 MCAsmParser
&Parser
= getParser();
4803 if (Parser
.parseIntToken(Offset
, "expected offset") || parseEOL())
4805 return getTargetStreamer().emitFPOStackAlign(Offset
, L
);
4808 // .cv_fpo_endprologue
4809 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L
) {
4810 MCAsmParser
&Parser
= getParser();
4811 if (Parser
.parseEOL())
4813 return getTargetStreamer().emitFPOEndPrologue(L
);
4817 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L
) {
4818 MCAsmParser
&Parser
= getParser();
4819 if (Parser
.parseEOL())
4821 return getTargetStreamer().emitFPOEndProc(L
);
4824 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID
,
4825 MCRegister
&RegNo
) {
4826 SMLoc startLoc
= getLexer().getLoc();
4827 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
4829 // Try parsing the argument as a register first.
4830 if (getLexer().getTok().isNot(AsmToken::Integer
)) {
4832 if (parseRegister(RegNo
, startLoc
, endLoc
))
4835 if (!X86MCRegisterClasses
[RegClassID
].contains(RegNo
)) {
4836 return Error(startLoc
,
4837 "register is not supported for use with this directive");
4840 // Otherwise, an integer number matching the encoding of the desired
4841 // register may appear.
4843 if (getParser().parseAbsoluteExpression(EncodedReg
))
4846 // The SEH register number is the same as the encoding register number. Map
4847 // from the encoding back to the LLVM register number.
4849 for (MCPhysReg Reg
: X86MCRegisterClasses
[RegClassID
]) {
4850 if (MRI
->getEncodingValue(Reg
) == EncodedReg
) {
4856 return Error(startLoc
,
4857 "incorrect register number for use with this directive");
4864 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc
) {
4866 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4869 if (getLexer().isNot(AsmToken::EndOfStatement
))
4870 return TokError("expected end of directive");
4873 getStreamer().emitWinCFIPushReg(Reg
, Loc
);
4877 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc
) {
4880 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4882 if (getLexer().isNot(AsmToken::Comma
))
4883 return TokError("you must specify a stack pointer offset");
4886 if (getParser().parseAbsoluteExpression(Off
))
4889 if (getLexer().isNot(AsmToken::EndOfStatement
))
4890 return TokError("expected end of directive");
4893 getStreamer().emitWinCFISetFrame(Reg
, Off
, Loc
);
4897 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc
) {
4900 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4902 if (getLexer().isNot(AsmToken::Comma
))
4903 return TokError("you must specify an offset on the stack");
4906 if (getParser().parseAbsoluteExpression(Off
))
4909 if (getLexer().isNot(AsmToken::EndOfStatement
))
4910 return TokError("expected end of directive");
4913 getStreamer().emitWinCFISaveReg(Reg
, Off
, Loc
);
4917 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc
) {
4920 if (parseSEHRegisterNumber(X86::VR128XRegClassID
, Reg
))
4922 if (getLexer().isNot(AsmToken::Comma
))
4923 return TokError("you must specify an offset on the stack");
4926 if (getParser().parseAbsoluteExpression(Off
))
4929 if (getLexer().isNot(AsmToken::EndOfStatement
))
4930 return TokError("expected end of directive");
4933 getStreamer().emitWinCFISaveXMM(Reg
, Off
, Loc
);
4937 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc
) {
4940 if (getLexer().is(AsmToken::At
)) {
4941 SMLoc startLoc
= getLexer().getLoc();
4943 if (!getParser().parseIdentifier(CodeID
)) {
4944 if (CodeID
!= "code")
4945 return Error(startLoc
, "expected @code");
4950 if (getLexer().isNot(AsmToken::EndOfStatement
))
4951 return TokError("expected end of directive");
4954 getStreamer().emitWinCFIPushFrame(Code
, Loc
);
4958 // Force static initialization.
4959 extern "C" LLVM_EXTERNAL_VISIBILITY
void LLVMInitializeX86AsmParser() {
4960 RegisterMCAsmParser
<X86AsmParser
> X(getTheX86_32Target());
4961 RegisterMCAsmParser
<X86AsmParser
> Y(getTheX86_64Target());
4964 #define GET_MATCHER_IMPLEMENTATION
4965 #include "X86GenAsmMatcher.inc"