1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86MCExpr.h"
12 #include "MCTargetDesc/X86TargetStreamer.h"
13 #include "TargetInfo/X86TargetInfo.h"
14 #include "X86AsmParserCommon.h"
15 #include "X86Operand.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSection.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/SourceMgr.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/raw_ostream.h"
44 static cl::opt
<bool> LVIInlineAsmHardening(
45 "x86-experimental-lvi-inline-asm-hardening",
46 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
47 " Injection (LVI). This feature is experimental."), cl::Hidden
);
49 static bool checkScale(unsigned Scale
, StringRef
&ErrMsg
) {
50 if (Scale
!= 1 && Scale
!= 2 && Scale
!= 4 && Scale
!= 8) {
51 ErrMsg
= "scale factor in address must be 1, 2, 4 or 8";
59 static const char OpPrecedence
[] = {
84 class X86AsmParser
: public MCTargetAsmParser
{
85 ParseInstructionInfo
*InstInfo
;
87 unsigned ForcedDataPrefix
= 0;
97 VEXEncoding ForcedVEXEncoding
= VEXEncoding_Default
;
100 DispEncoding_Default
,
105 DispEncoding ForcedDispEncoding
= DispEncoding_Default
;
108 SMLoc
consumeToken() {
109 MCAsmParser
&Parser
= getParser();
110 SMLoc Result
= Parser
.getTok().getLoc();
115 X86TargetStreamer
&getTargetStreamer() {
116 assert(getParser().getStreamer().getTargetStreamer() &&
117 "do not have a target streamer");
118 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
119 return static_cast<X86TargetStreamer
&>(TS
);
122 unsigned MatchInstruction(const OperandVector
&Operands
, MCInst
&Inst
,
123 uint64_t &ErrorInfo
, FeatureBitset
&MissingFeatures
,
124 bool matchingInlineAsm
, unsigned VariantID
= 0) {
125 // In Code16GCC mode, match as 32-bit.
127 SwitchMode(X86::Mode32Bit
);
128 unsigned rv
= MatchInstructionImpl(Operands
, Inst
, ErrorInfo
,
129 MissingFeatures
, matchingInlineAsm
,
132 SwitchMode(X86::Mode16Bit
);
136 enum InfixCalculatorTok
{
161 enum IntelOperatorKind
{
168 enum MasmOperatorKind
{
175 class InfixCalculator
{
176 typedef std::pair
< InfixCalculatorTok
, int64_t > ICToken
;
177 SmallVector
<InfixCalculatorTok
, 4> InfixOperatorStack
;
178 SmallVector
<ICToken
, 4> PostfixStack
;
180 bool isUnaryOperator(InfixCalculatorTok Op
) const {
181 return Op
== IC_NEG
|| Op
== IC_NOT
;
185 int64_t popOperand() {
186 assert (!PostfixStack
.empty() && "Poped an empty stack!");
187 ICToken Op
= PostfixStack
.pop_back_val();
188 if (!(Op
.first
== IC_IMM
|| Op
.first
== IC_REGISTER
))
189 return -1; // The invalid Scale value will be caught later by checkScale
192 void pushOperand(InfixCalculatorTok Op
, int64_t Val
= 0) {
193 assert ((Op
== IC_IMM
|| Op
== IC_REGISTER
) &&
194 "Unexpected operand!");
195 PostfixStack
.push_back(std::make_pair(Op
, Val
));
198 void popOperator() { InfixOperatorStack
.pop_back(); }
199 void pushOperator(InfixCalculatorTok Op
) {
200 // Push the new operator if the stack is empty.
201 if (InfixOperatorStack
.empty()) {
202 InfixOperatorStack
.push_back(Op
);
206 // Push the new operator if it has a higher precedence than the operator
207 // on the top of the stack or the operator on the top of the stack is a
209 unsigned Idx
= InfixOperatorStack
.size() - 1;
210 InfixCalculatorTok StackOp
= InfixOperatorStack
[Idx
];
211 if (OpPrecedence
[Op
] > OpPrecedence
[StackOp
] || StackOp
== IC_LPAREN
) {
212 InfixOperatorStack
.push_back(Op
);
216 // The operator on the top of the stack has higher precedence than the
218 unsigned ParenCount
= 0;
220 // Nothing to process.
221 if (InfixOperatorStack
.empty())
224 Idx
= InfixOperatorStack
.size() - 1;
225 StackOp
= InfixOperatorStack
[Idx
];
226 if (!(OpPrecedence
[StackOp
] >= OpPrecedence
[Op
] || ParenCount
))
229 // If we have an even parentheses count and we see a left parentheses,
230 // then stop processing.
231 if (!ParenCount
&& StackOp
== IC_LPAREN
)
234 if (StackOp
== IC_RPAREN
) {
236 InfixOperatorStack
.pop_back();
237 } else if (StackOp
== IC_LPAREN
) {
239 InfixOperatorStack
.pop_back();
241 InfixOperatorStack
.pop_back();
242 PostfixStack
.push_back(std::make_pair(StackOp
, 0));
245 // Push the new operator.
246 InfixOperatorStack
.push_back(Op
);
250 // Push any remaining operators onto the postfix stack.
251 while (!InfixOperatorStack
.empty()) {
252 InfixCalculatorTok StackOp
= InfixOperatorStack
.pop_back_val();
253 if (StackOp
!= IC_LPAREN
&& StackOp
!= IC_RPAREN
)
254 PostfixStack
.push_back(std::make_pair(StackOp
, 0));
257 if (PostfixStack
.empty())
260 SmallVector
<ICToken
, 16> OperandStack
;
261 for (unsigned i
= 0, e
= PostfixStack
.size(); i
!= e
; ++i
) {
262 ICToken Op
= PostfixStack
[i
];
263 if (Op
.first
== IC_IMM
|| Op
.first
== IC_REGISTER
) {
264 OperandStack
.push_back(Op
);
265 } else if (isUnaryOperator(Op
.first
)) {
266 assert (OperandStack
.size() > 0 && "Too few operands.");
267 ICToken Operand
= OperandStack
.pop_back_val();
268 assert (Operand
.first
== IC_IMM
&&
269 "Unary operation with a register!");
272 report_fatal_error("Unexpected operator!");
275 OperandStack
.push_back(std::make_pair(IC_IMM
, -Operand
.second
));
278 OperandStack
.push_back(std::make_pair(IC_IMM
, ~Operand
.second
));
282 assert (OperandStack
.size() > 1 && "Too few operands.");
284 ICToken Op2
= OperandStack
.pop_back_val();
285 ICToken Op1
= OperandStack
.pop_back_val();
288 report_fatal_error("Unexpected operator!");
291 Val
= Op1
.second
+ Op2
.second
;
292 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
295 Val
= Op1
.second
- Op2
.second
;
296 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
299 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
300 "Multiply operation with an immediate and a register!");
301 Val
= Op1
.second
* Op2
.second
;
302 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
305 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
306 "Divide operation with an immediate and a register!");
307 assert (Op2
.second
!= 0 && "Division by zero!");
308 Val
= Op1
.second
/ Op2
.second
;
309 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
312 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
313 "Modulo operation with an immediate and a register!");
314 Val
= Op1
.second
% Op2
.second
;
315 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
318 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
319 "Or operation with an immediate and a register!");
320 Val
= Op1
.second
| Op2
.second
;
321 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
324 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
325 "Xor operation with an immediate and a register!");
326 Val
= Op1
.second
^ Op2
.second
;
327 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
330 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
331 "And operation with an immediate and a register!");
332 Val
= Op1
.second
& Op2
.second
;
333 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
336 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
337 "Left shift operation with an immediate and a register!");
338 Val
= Op1
.second
<< Op2
.second
;
339 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
342 assert (Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
343 "Right shift operation with an immediate and a register!");
344 Val
= Op1
.second
>> Op2
.second
;
345 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
348 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
349 "Equals operation with an immediate and a register!");
350 Val
= (Op1
.second
== Op2
.second
) ? -1 : 0;
351 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
354 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
355 "Not-equals operation with an immediate and a register!");
356 Val
= (Op1
.second
!= Op2
.second
) ? -1 : 0;
357 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
360 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
361 "Less-than operation with an immediate and a register!");
362 Val
= (Op1
.second
< Op2
.second
) ? -1 : 0;
363 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
366 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
367 "Less-than-or-equal operation with an immediate and a "
369 Val
= (Op1
.second
<= Op2
.second
) ? -1 : 0;
370 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
373 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
374 "Greater-than operation with an immediate and a register!");
375 Val
= (Op1
.second
> Op2
.second
) ? -1 : 0;
376 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
379 assert(Op1
.first
== IC_IMM
&& Op2
.first
== IC_IMM
&&
380 "Greater-than-or-equal operation with an immediate and a "
382 Val
= (Op1
.second
>= Op2
.second
) ? -1 : 0;
383 OperandStack
.push_back(std::make_pair(IC_IMM
, Val
));
388 assert (OperandStack
.size() == 1 && "Expected a single result.");
389 return OperandStack
.pop_back_val().second
;
393 enum IntelExprState
{
424 class IntelExprStateMachine
{
425 IntelExprState State
, PrevState
;
426 unsigned BaseReg
, IndexReg
, TmpReg
, Scale
;
431 InlineAsmIdentifierInfo Info
;
435 SMLoc OffsetOperatorLoc
;
438 bool setSymRef(const MCExpr
*Val
, StringRef ID
, StringRef
&ErrMsg
) {
440 ErrMsg
= "cannot use more than one symbol in memory operand";
449 IntelExprStateMachine()
450 : State(IES_INIT
), PrevState(IES_ERROR
), BaseReg(0), IndexReg(0),
451 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
452 MemExpr(false), OffsetOperator(false) {}
454 void addImm(int64_t imm
) { Imm
+= imm
; }
455 short getBracCount() const { return BracCount
; }
456 bool isMemExpr() const { return MemExpr
; }
457 bool isOffsetOperator() const { return OffsetOperator
; }
458 SMLoc
getOffsetLoc() const { return OffsetOperatorLoc
; }
459 unsigned getBaseReg() const { return BaseReg
; }
460 unsigned getIndexReg() const { return IndexReg
; }
461 unsigned getScale() const { return Scale
; }
462 const MCExpr
*getSym() const { return Sym
; }
463 StringRef
getSymName() const { return SymName
; }
464 StringRef
getType() const { return CurType
.Name
; }
465 unsigned getSize() const { return CurType
.Size
; }
466 unsigned getElementSize() const { return CurType
.ElementSize
; }
467 unsigned getLength() const { return CurType
.Length
; }
468 int64_t getImm() { return Imm
+ IC
.execute(); }
469 bool isValidEndState() const {
470 return State
== IES_RBRAC
|| State
== IES_INTEGER
;
472 bool hadError() const { return State
== IES_ERROR
; }
473 const InlineAsmIdentifierInfo
&getIdentifierInfo() const { return Info
; }
476 IntelExprState CurrState
= State
;
485 IC
.pushOperator(IC_OR
);
488 PrevState
= CurrState
;
491 IntelExprState CurrState
= State
;
500 IC
.pushOperator(IC_XOR
);
503 PrevState
= CurrState
;
506 IntelExprState CurrState
= State
;
515 IC
.pushOperator(IC_AND
);
518 PrevState
= CurrState
;
521 IntelExprState CurrState
= State
;
530 IC
.pushOperator(IC_EQ
);
533 PrevState
= CurrState
;
536 IntelExprState CurrState
= State
;
545 IC
.pushOperator(IC_NE
);
548 PrevState
= CurrState
;
551 IntelExprState CurrState
= State
;
560 IC
.pushOperator(IC_LT
);
563 PrevState
= CurrState
;
566 IntelExprState CurrState
= State
;
575 IC
.pushOperator(IC_LE
);
578 PrevState
= CurrState
;
581 IntelExprState CurrState
= State
;
590 IC
.pushOperator(IC_GT
);
593 PrevState
= CurrState
;
596 IntelExprState CurrState
= State
;
605 IC
.pushOperator(IC_GE
);
608 PrevState
= CurrState
;
611 IntelExprState CurrState
= State
;
620 IC
.pushOperator(IC_LSHIFT
);
623 PrevState
= CurrState
;
626 IntelExprState CurrState
= State
;
635 IC
.pushOperator(IC_RSHIFT
);
638 PrevState
= CurrState
;
640 bool onPlus(StringRef
&ErrMsg
) {
641 IntelExprState CurrState
= State
;
651 IC
.pushOperator(IC_PLUS
);
652 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
653 // If we already have a BaseReg, then assume this is the IndexReg with
654 // no explicit scale.
659 ErrMsg
= "BaseReg/IndexReg already set!";
668 PrevState
= CurrState
;
671 bool onMinus(StringRef
&ErrMsg
) {
672 IntelExprState CurrState
= State
;
702 // push minus operator if it is not a negate operator
703 if (CurrState
== IES_REGISTER
|| CurrState
== IES_RPAREN
||
704 CurrState
== IES_INTEGER
|| CurrState
== IES_RBRAC
||
705 CurrState
== IES_OFFSET
)
706 IC
.pushOperator(IC_MINUS
);
707 else if (PrevState
== IES_REGISTER
&& CurrState
== IES_MULTIPLY
) {
708 // We have negate operator for Scale: it's illegal
709 ErrMsg
= "Scale can't be negative";
712 IC
.pushOperator(IC_NEG
);
713 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
714 // If we already have a BaseReg, then assume this is the IndexReg with
715 // no explicit scale.
720 ErrMsg
= "BaseReg/IndexReg already set!";
729 PrevState
= CurrState
;
733 IntelExprState CurrState
= State
;
759 IC
.pushOperator(IC_NOT
);
762 PrevState
= CurrState
;
764 bool onRegister(unsigned Reg
, StringRef
&ErrMsg
) {
765 IntelExprState CurrState
= State
;
773 State
= IES_REGISTER
;
775 IC
.pushOperand(IC_REGISTER
);
778 // Index Register - Scale * Register
779 if (PrevState
== IES_INTEGER
) {
781 ErrMsg
= "BaseReg/IndexReg already set!";
784 State
= IES_REGISTER
;
786 // Get the scale and replace the 'Scale * Register' with '0'.
787 Scale
= IC
.popOperand();
788 if (checkScale(Scale
, ErrMsg
))
790 IC
.pushOperand(IC_IMM
);
797 PrevState
= CurrState
;
800 bool onIdentifierExpr(const MCExpr
*SymRef
, StringRef SymRefName
,
801 const InlineAsmIdentifierInfo
&IDInfo
,
802 const AsmTypeInfo
&Type
, bool ParsingMSInlineAsm
,
804 // InlineAsm: Treat an enum value as an integer
805 if (ParsingMSInlineAsm
)
806 if (IDInfo
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
))
807 return onInteger(IDInfo
.Enum
.EnumVal
, ErrMsg
);
808 // Treat a symbolic constant like an integer
809 if (auto *CE
= dyn_cast
<MCConstantExpr
>(SymRef
))
810 return onInteger(CE
->getValue(), ErrMsg
);
823 if (setSymRef(SymRef
, SymRefName
, ErrMsg
))
827 IC
.pushOperand(IC_IMM
);
828 if (ParsingMSInlineAsm
)
835 bool onInteger(int64_t TmpInt
, StringRef
&ErrMsg
) {
836 IntelExprState CurrState
= State
;
862 if (PrevState
== IES_REGISTER
&& CurrState
== IES_MULTIPLY
) {
863 // Index Register - Register * Scale
865 ErrMsg
= "BaseReg/IndexReg already set!";
870 if (checkScale(Scale
, ErrMsg
))
872 // Get the scale and replace the 'Register * Scale' with '0'.
875 IC
.pushOperand(IC_IMM
, TmpInt
);
879 PrevState
= CurrState
;
891 State
= IES_MULTIPLY
;
892 IC
.pushOperator(IC_MULTIPLY
);
905 IC
.pushOperator(IC_DIVIDE
);
918 IC
.pushOperator(IC_MOD
);
934 IC
.pushOperator(IC_PLUS
);
936 CurType
.Size
= CurType
.ElementSize
;
940 assert(!BracCount
&& "BracCount should be zero on parsing's start");
949 IntelExprState CurrState
= State
;
958 if (BracCount
-- != 1)
961 if (CurrState
== IES_REGISTER
&& PrevState
!= IES_MULTIPLY
) {
962 // If we already have a BaseReg, then assume this is the IndexReg with
963 // no explicit scale.
967 assert (!IndexReg
&& "BaseReg/IndexReg already set!");
974 PrevState
= CurrState
;
978 IntelExprState CurrState
= State
;
1004 IC
.pushOperator(IC_LPAREN
);
1007 PrevState
= CurrState
;
1021 IC
.pushOperator(IC_RPAREN
);
1025 bool onOffset(const MCExpr
*Val
, SMLoc OffsetLoc
, StringRef ID
,
1026 const InlineAsmIdentifierInfo
&IDInfo
,
1027 bool ParsingMSInlineAsm
, StringRef
&ErrMsg
) {
1031 ErrMsg
= "unexpected offset operator expression";
1036 if (setSymRef(Val
, ID
, ErrMsg
))
1038 OffsetOperator
= true;
1039 OffsetOperatorLoc
= OffsetLoc
;
1041 // As we cannot yet resolve the actual value (offset), we retain
1042 // the requested semantics by pushing a '0' to the operands stack
1043 IC
.pushOperand(IC_IMM
);
1044 if (ParsingMSInlineAsm
) {
1051 void onCast(AsmTypeInfo Info
) {
1063 void setTypeInfo(AsmTypeInfo Type
) { CurType
= Type
; }
1066 bool Error(SMLoc L
, const Twine
&Msg
, SMRange Range
= None
,
1067 bool MatchingInlineAsm
= false) {
1068 MCAsmParser
&Parser
= getParser();
1069 if (MatchingInlineAsm
) {
1070 if (!getLexer().isAtStartOfStatement())
1071 Parser
.eatToEndOfStatement();
1074 return Parser
.Error(L
, Msg
, Range
);
1077 bool MatchRegisterByName(unsigned &RegNo
, StringRef RegName
, SMLoc StartLoc
,
1079 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
,
1080 bool RestoreOnFailure
);
1082 std::unique_ptr
<X86Operand
> DefaultMemSIOperand(SMLoc Loc
);
1083 std::unique_ptr
<X86Operand
> DefaultMemDIOperand(SMLoc Loc
);
1084 bool IsSIReg(unsigned Reg
);
1085 unsigned GetSIDIForRegClass(unsigned RegClassID
, unsigned Reg
, bool IsSIReg
);
1087 AddDefaultSrcDestOperands(OperandVector
&Operands
,
1088 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Src
,
1089 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Dst
);
1090 bool VerifyAndAdjustOperands(OperandVector
&OrigOperands
,
1091 OperandVector
&FinalOperands
);
1092 bool ParseOperand(OperandVector
&Operands
);
1093 bool ParseATTOperand(OperandVector
&Operands
);
1094 bool ParseIntelOperand(OperandVector
&Operands
);
1095 bool ParseIntelOffsetOperator(const MCExpr
*&Val
, StringRef
&ID
,
1096 InlineAsmIdentifierInfo
&Info
, SMLoc
&End
);
1097 bool ParseIntelDotOperator(IntelExprStateMachine
&SM
, SMLoc
&End
);
1098 unsigned IdentifyIntelInlineAsmOperator(StringRef Name
);
1099 unsigned ParseIntelInlineAsmOperator(unsigned OpKind
);
1100 unsigned IdentifyMasmOperator(StringRef Name
);
1101 bool ParseMasmOperator(unsigned OpKind
, int64_t &Val
);
1102 bool ParseRoundingModeOp(SMLoc Start
, OperandVector
&Operands
);
1103 bool ParseIntelNamedOperator(StringRef Name
, IntelExprStateMachine
&SM
,
1104 bool &ParseError
, SMLoc
&End
);
1105 bool ParseMasmNamedOperator(StringRef Name
, IntelExprStateMachine
&SM
,
1106 bool &ParseError
, SMLoc
&End
);
1107 void RewriteIntelExpression(IntelExprStateMachine
&SM
, SMLoc Start
,
1109 bool ParseIntelExpression(IntelExprStateMachine
&SM
, SMLoc
&End
);
1110 bool ParseIntelInlineAsmIdentifier(const MCExpr
*&Val
, StringRef
&Identifier
,
1111 InlineAsmIdentifierInfo
&Info
,
1112 bool IsUnevaluatedOperand
, SMLoc
&End
,
1113 bool IsParsingOffsetOperator
= false);
1115 bool ParseMemOperand(unsigned SegReg
, const MCExpr
*Disp
, SMLoc StartLoc
,
1116 SMLoc EndLoc
, OperandVector
&Operands
);
1118 X86::CondCode
ParseConditionCode(StringRef CCode
);
1120 bool ParseIntelMemoryOperandSize(unsigned &Size
);
1121 bool CreateMemForMSInlineAsm(unsigned SegReg
, const MCExpr
*Disp
,
1122 unsigned BaseReg
, unsigned IndexReg
,
1123 unsigned Scale
, SMLoc Start
, SMLoc End
,
1124 unsigned Size
, StringRef Identifier
,
1125 const InlineAsmIdentifierInfo
&Info
,
1126 OperandVector
&Operands
);
1128 bool parseDirectiveArch();
1129 bool parseDirectiveNops(SMLoc L
);
1130 bool parseDirectiveEven(SMLoc L
);
1131 bool ParseDirectiveCode(StringRef IDVal
, SMLoc L
);
1133 /// CodeView FPO data directives.
1134 bool parseDirectiveFPOProc(SMLoc L
);
1135 bool parseDirectiveFPOSetFrame(SMLoc L
);
1136 bool parseDirectiveFPOPushReg(SMLoc L
);
1137 bool parseDirectiveFPOStackAlloc(SMLoc L
);
1138 bool parseDirectiveFPOStackAlign(SMLoc L
);
1139 bool parseDirectiveFPOEndPrologue(SMLoc L
);
1140 bool parseDirectiveFPOEndProc(SMLoc L
);
1143 bool parseSEHRegisterNumber(unsigned RegClassID
, unsigned &RegNo
);
1144 bool parseDirectiveSEHPushReg(SMLoc
);
1145 bool parseDirectiveSEHSetFrame(SMLoc
);
1146 bool parseDirectiveSEHSaveReg(SMLoc
);
1147 bool parseDirectiveSEHSaveXMM(SMLoc
);
1148 bool parseDirectiveSEHPushFrame(SMLoc
);
1150 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1152 bool validateInstruction(MCInst
&Inst
, const OperandVector
&Ops
);
1153 bool processInstruction(MCInst
&Inst
, const OperandVector
&Ops
);
1155 // Load Value Injection (LVI) Mitigations for machine code
1156 void emitWarningForSpecialLVIInstruction(SMLoc Loc
);
1157 void applyLVICFIMitigation(MCInst
&Inst
, MCStreamer
&Out
);
1158 void applyLVILoadHardeningMitigation(MCInst
&Inst
, MCStreamer
&Out
);
1160 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1161 /// instrumentation around Inst.
1162 void emitInstruction(MCInst
&Inst
, OperandVector
&Operands
, MCStreamer
&Out
);
1164 bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1165 OperandVector
&Operands
, MCStreamer
&Out
,
1166 uint64_t &ErrorInfo
,
1167 bool MatchingInlineAsm
) override
;
1169 void MatchFPUWaitAlias(SMLoc IDLoc
, X86Operand
&Op
, OperandVector
&Operands
,
1170 MCStreamer
&Out
, bool MatchingInlineAsm
);
1172 bool ErrorMissingFeature(SMLoc IDLoc
, const FeatureBitset
&MissingFeatures
,
1173 bool MatchingInlineAsm
);
1175 bool MatchAndEmitATTInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1176 OperandVector
&Operands
, MCStreamer
&Out
,
1177 uint64_t &ErrorInfo
,
1178 bool MatchingInlineAsm
);
1180 bool MatchAndEmitIntelInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1181 OperandVector
&Operands
, MCStreamer
&Out
,
1182 uint64_t &ErrorInfo
,
1183 bool MatchingInlineAsm
);
1185 bool OmitRegisterFromClobberLists(unsigned RegNo
) override
;
1187 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1188 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1189 /// return false if no parsing errors occurred, true otherwise.
1190 bool HandleAVX512Operand(OperandVector
&Operands
);
1192 bool ParseZ(std::unique_ptr
<X86Operand
> &Z
, const SMLoc
&StartLoc
);
1194 bool is64BitMode() const {
1195 // FIXME: Can tablegen auto-generate this?
1196 return getSTI().getFeatureBits()[X86::Mode64Bit
];
1198 bool is32BitMode() const {
1199 // FIXME: Can tablegen auto-generate this?
1200 return getSTI().getFeatureBits()[X86::Mode32Bit
];
1202 bool is16BitMode() const {
1203 // FIXME: Can tablegen auto-generate this?
1204 return getSTI().getFeatureBits()[X86::Mode16Bit
];
1206 void SwitchMode(unsigned mode
) {
1207 MCSubtargetInfo
&STI
= copySTI();
1208 FeatureBitset
AllModes({X86::Mode64Bit
, X86::Mode32Bit
, X86::Mode16Bit
});
1209 FeatureBitset OldMode
= STI
.getFeatureBits() & AllModes
;
1210 FeatureBitset FB
= ComputeAvailableFeatures(
1211 STI
.ToggleFeature(OldMode
.flip(mode
)));
1212 setAvailableFeatures(FB
);
1214 assert(FeatureBitset({mode
}) == (STI
.getFeatureBits() & AllModes
));
1217 unsigned getPointerWidth() {
1218 if (is16BitMode()) return 16;
1219 if (is32BitMode()) return 32;
1220 if (is64BitMode()) return 64;
1221 llvm_unreachable("invalid mode");
1224 bool isParsingIntelSyntax() {
1225 return getParser().getAssemblerDialect();
1228 /// @name Auto-generated Matcher Functions
1231 #define GET_ASSEMBLER_HEADER
1232 #include "X86GenAsmMatcher.inc"
1237 enum X86MatchResultTy
{
1238 Match_Unsupported
= FIRST_TARGET_MATCH_RESULT_TY
,
1239 #define GET_OPERAND_DIAGNOSTIC_TYPES
1240 #include "X86GenAsmMatcher.inc"
1243 X86AsmParser(const MCSubtargetInfo
&sti
, MCAsmParser
&Parser
,
1244 const MCInstrInfo
&mii
, const MCTargetOptions
&Options
)
1245 : MCTargetAsmParser(Options
, sti
, mii
), InstInfo(nullptr),
1248 Parser
.addAliasForDirective(".word", ".2byte");
1250 // Initialize the set of available features.
1251 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1254 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
) override
;
1255 OperandMatchResultTy
tryParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
1256 SMLoc
&EndLoc
) override
;
1258 bool parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) override
;
1260 bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1261 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1263 bool ParseDirective(AsmToken DirectiveID
) override
;
1265 } // end anonymous namespace
1267 /// @name Auto-generated Match Functions
1270 static unsigned MatchRegisterName(StringRef Name
);
1274 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg
, unsigned IndexReg
,
1275 unsigned Scale
, bool Is64BitMode
,
1276 StringRef
&ErrMsg
) {
1277 // If we have both a base register and an index register make sure they are
1278 // both 64-bit or 32-bit registers.
1279 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1282 !(BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
||
1283 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) ||
1284 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(BaseReg
) ||
1285 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
))) {
1286 ErrMsg
= "invalid base+index expression";
1290 if (IndexReg
!= 0 &&
1291 !(IndexReg
== X86::EIZ
|| IndexReg
== X86::RIZ
||
1292 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1293 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1294 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
) ||
1295 X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(IndexReg
) ||
1296 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(IndexReg
) ||
1297 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(IndexReg
))) {
1298 ErrMsg
= "invalid base+index expression";
1302 if (((BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
) && IndexReg
!= 0) ||
1303 IndexReg
== X86::EIP
|| IndexReg
== X86::RIP
||
1304 IndexReg
== X86::ESP
|| IndexReg
== X86::RSP
) {
1305 ErrMsg
= "invalid base+index expression";
1309 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1310 // and then only in non-64-bit modes.
1311 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) &&
1312 (Is64BitMode
|| (BaseReg
!= X86::BX
&& BaseReg
!= X86::BP
&&
1313 BaseReg
!= X86::SI
&& BaseReg
!= X86::DI
))) {
1314 ErrMsg
= "invalid 16-bit base register";
1319 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
)) {
1320 ErrMsg
= "16-bit memory operand may not include only index register";
1324 if (BaseReg
!= 0 && IndexReg
!= 0) {
1325 if (X86MCRegisterClasses
[X86::GR64RegClassID
].contains(BaseReg
) &&
1326 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1327 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1328 IndexReg
== X86::EIZ
)) {
1329 ErrMsg
= "base register is 64-bit, but index register is not";
1332 if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(BaseReg
) &&
1333 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
) ||
1334 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
) ||
1335 IndexReg
== X86::RIZ
)) {
1336 ErrMsg
= "base register is 32-bit, but index register is not";
1339 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
)) {
1340 if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(IndexReg
) ||
1341 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(IndexReg
)) {
1342 ErrMsg
= "base register is 16-bit, but index register is not";
1345 if ((BaseReg
!= X86::BX
&& BaseReg
!= X86::BP
) ||
1346 (IndexReg
!= X86::SI
&& IndexReg
!= X86::DI
)) {
1347 ErrMsg
= "invalid 16-bit base/index register combination";
1353 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1354 if (!Is64BitMode
&& BaseReg
!= 0 &&
1355 (BaseReg
== X86::RIP
|| BaseReg
== X86::EIP
)) {
1356 ErrMsg
= "IP-relative addressing requires 64-bit mode";
1360 return checkScale(Scale
, ErrMsg
);
1363 bool X86AsmParser::MatchRegisterByName(unsigned &RegNo
, StringRef RegName
,
1364 SMLoc StartLoc
, SMLoc EndLoc
) {
1365 // If we encounter a %, ignore it. This code handles registers with and
1366 // without the prefix, unprefixed registers can occur in cfi directives.
1367 RegName
.consume_front("%");
1369 RegNo
= MatchRegisterName(RegName
);
1371 // If the match failed, try the register name as lowercase.
1373 RegNo
= MatchRegisterName(RegName
.lower());
1375 // The "flags" and "mxcsr" registers cannot be referenced directly.
1376 // Treat it as an identifier instead.
1377 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1378 (RegNo
== X86::EFLAGS
|| RegNo
== X86::MXCSR
))
1381 if (!is64BitMode()) {
1382 // FIXME: This should be done using Requires<Not64BitMode> and
1383 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1385 if (RegNo
== X86::RIZ
|| RegNo
== X86::RIP
||
1386 X86MCRegisterClasses
[X86::GR64RegClassID
].contains(RegNo
) ||
1387 X86II::isX86_64NonExtLowByteReg(RegNo
) ||
1388 X86II::isX86_64ExtendedReg(RegNo
)) {
1389 return Error(StartLoc
,
1390 "register %" + RegName
+ " is only available in 64-bit mode",
1391 SMRange(StartLoc
, EndLoc
));
1395 // If this is "db[0-15]", match it as an alias
1397 if (RegNo
== 0 && RegName
.startswith("db")) {
1398 if (RegName
.size() == 3) {
1399 switch (RegName
[2]) {
1431 } else if (RegName
.size() == 4 && RegName
[2] == '1') {
1432 switch (RegName
[3]) {
1456 if (isParsingIntelSyntax())
1458 return Error(StartLoc
, "invalid register name", SMRange(StartLoc
, EndLoc
));
1463 bool X86AsmParser::ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
1464 SMLoc
&EndLoc
, bool RestoreOnFailure
) {
1465 MCAsmParser
&Parser
= getParser();
1466 MCAsmLexer
&Lexer
= getLexer();
1469 SmallVector
<AsmToken
, 5> Tokens
;
1470 auto OnFailure
= [RestoreOnFailure
, &Lexer
, &Tokens
]() {
1471 if (RestoreOnFailure
) {
1472 while (!Tokens
.empty()) {
1473 Lexer
.UnLex(Tokens
.pop_back_val());
1478 const AsmToken
&PercentTok
= Parser
.getTok();
1479 StartLoc
= PercentTok
.getLoc();
1481 // If we encounter a %, ignore it. This code handles registers with and
1482 // without the prefix, unprefixed registers can occur in cfi directives.
1483 if (!isParsingIntelSyntax() && PercentTok
.is(AsmToken::Percent
)) {
1484 Tokens
.push_back(PercentTok
);
1485 Parser
.Lex(); // Eat percent token.
1488 const AsmToken
&Tok
= Parser
.getTok();
1489 EndLoc
= Tok
.getEndLoc();
1491 if (Tok
.isNot(AsmToken::Identifier
)) {
1493 if (isParsingIntelSyntax()) return true;
1494 return Error(StartLoc
, "invalid register name",
1495 SMRange(StartLoc
, EndLoc
));
1498 if (MatchRegisterByName(RegNo
, Tok
.getString(), StartLoc
, EndLoc
)) {
1503 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1504 if (RegNo
== X86::ST0
) {
1505 Tokens
.push_back(Tok
);
1506 Parser
.Lex(); // Eat 'st'
1508 // Check to see if we have '(4)' after %st.
1509 if (Lexer
.isNot(AsmToken::LParen
))
1512 Tokens
.push_back(Parser
.getTok());
1515 const AsmToken
&IntTok
= Parser
.getTok();
1516 if (IntTok
.isNot(AsmToken::Integer
)) {
1518 return Error(IntTok
.getLoc(), "expected stack index");
1520 switch (IntTok
.getIntVal()) {
1521 case 0: RegNo
= X86::ST0
; break;
1522 case 1: RegNo
= X86::ST1
; break;
1523 case 2: RegNo
= X86::ST2
; break;
1524 case 3: RegNo
= X86::ST3
; break;
1525 case 4: RegNo
= X86::ST4
; break;
1526 case 5: RegNo
= X86::ST5
; break;
1527 case 6: RegNo
= X86::ST6
; break;
1528 case 7: RegNo
= X86::ST7
; break;
1531 return Error(IntTok
.getLoc(), "invalid stack index");
1535 Tokens
.push_back(IntTok
);
1537 if (Lexer
.isNot(AsmToken::RParen
)) {
1539 return Error(Parser
.getTok().getLoc(), "expected ')'");
1542 EndLoc
= Parser
.getTok().getEndLoc();
1543 Parser
.Lex(); // Eat ')'
1547 EndLoc
= Parser
.getTok().getEndLoc();
1551 if (isParsingIntelSyntax()) return true;
1552 return Error(StartLoc
, "invalid register name",
1553 SMRange(StartLoc
, EndLoc
));
1556 Parser
.Lex(); // Eat identifier token.
1560 bool X86AsmParser::ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
1562 return ParseRegister(RegNo
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/false);
1565 OperandMatchResultTy
X86AsmParser::tryParseRegister(unsigned &RegNo
,
1569 ParseRegister(RegNo
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/true);
1570 bool PendingErrors
= getParser().hasPendingError();
1571 getParser().clearPendingErrors();
1573 return MatchOperand_ParseFail
;
1575 return MatchOperand_NoMatch
;
1576 return MatchOperand_Success
;
1579 std::unique_ptr
<X86Operand
> X86AsmParser::DefaultMemSIOperand(SMLoc Loc
) {
1580 bool Parse32
= is32BitMode() || Code16GCC
;
1581 unsigned Basereg
= is64BitMode() ? X86::RSI
: (Parse32
? X86::ESI
: X86::SI
);
1582 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
1583 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
1584 /*BaseReg=*/Basereg
, /*IndexReg=*/0, /*Scale=*/1,
1588 std::unique_ptr
<X86Operand
> X86AsmParser::DefaultMemDIOperand(SMLoc Loc
) {
1589 bool Parse32
= is32BitMode() || Code16GCC
;
1590 unsigned Basereg
= is64BitMode() ? X86::RDI
: (Parse32
? X86::EDI
: X86::DI
);
1591 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
1592 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
1593 /*BaseReg=*/Basereg
, /*IndexReg=*/0, /*Scale=*/1,
1597 bool X86AsmParser::IsSIReg(unsigned Reg
) {
1599 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1611 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID
, unsigned Reg
,
1613 switch (RegClassID
) {
1614 default: llvm_unreachable("Unexpected register class");
1615 case X86::GR64RegClassID
:
1616 return IsSIReg
? X86::RSI
: X86::RDI
;
1617 case X86::GR32RegClassID
:
1618 return IsSIReg
? X86::ESI
: X86::EDI
;
1619 case X86::GR16RegClassID
:
1620 return IsSIReg
? X86::SI
: X86::DI
;
1624 void X86AsmParser::AddDefaultSrcDestOperands(
1625 OperandVector
& Operands
, std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Src
,
1626 std::unique_ptr
<llvm::MCParsedAsmOperand
> &&Dst
) {
1627 if (isParsingIntelSyntax()) {
1628 Operands
.push_back(std::move(Dst
));
1629 Operands
.push_back(std::move(Src
));
1632 Operands
.push_back(std::move(Src
));
1633 Operands
.push_back(std::move(Dst
));
1637 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector
&OrigOperands
,
1638 OperandVector
&FinalOperands
) {
1640 if (OrigOperands
.size() > 1) {
1641 // Check if sizes match, OrigOperands also contains the instruction name
1642 assert(OrigOperands
.size() == FinalOperands
.size() + 1 &&
1643 "Operand size mismatch");
1645 SmallVector
<std::pair
<SMLoc
, std::string
>, 2> Warnings
;
1646 // Verify types match
1647 int RegClassID
= -1;
1648 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
) {
1649 X86Operand
&OrigOp
= static_cast<X86Operand
&>(*OrigOperands
[i
+ 1]);
1650 X86Operand
&FinalOp
= static_cast<X86Operand
&>(*FinalOperands
[i
]);
1652 if (FinalOp
.isReg() &&
1653 (!OrigOp
.isReg() || FinalOp
.getReg() != OrigOp
.getReg()))
1654 // Return false and let a normal complaint about bogus operands happen
1657 if (FinalOp
.isMem()) {
1659 if (!OrigOp
.isMem())
1660 // Return false and let a normal complaint about bogus operands happen
1663 unsigned OrigReg
= OrigOp
.Mem
.BaseReg
;
1664 unsigned FinalReg
= FinalOp
.Mem
.BaseReg
;
1666 // If we've already encounterd a register class, make sure all register
1667 // bases are of the same register class
1668 if (RegClassID
!= -1 &&
1669 !X86MCRegisterClasses
[RegClassID
].contains(OrigReg
)) {
1670 return Error(OrigOp
.getStartLoc(),
1671 "mismatching source and destination index registers");
1674 if (X86MCRegisterClasses
[X86::GR64RegClassID
].contains(OrigReg
))
1675 RegClassID
= X86::GR64RegClassID
;
1676 else if (X86MCRegisterClasses
[X86::GR32RegClassID
].contains(OrigReg
))
1677 RegClassID
= X86::GR32RegClassID
;
1678 else if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(OrigReg
))
1679 RegClassID
= X86::GR16RegClassID
;
1681 // Unexpected register class type
1682 // Return false and let a normal complaint about bogus operands happen
1685 bool IsSI
= IsSIReg(FinalReg
);
1686 FinalReg
= GetSIDIForRegClass(RegClassID
, FinalReg
, IsSI
);
1688 if (FinalReg
!= OrigReg
) {
1689 std::string RegName
= IsSI
? "ES:(R|E)SI" : "ES:(R|E)DI";
1690 Warnings
.push_back(std::make_pair(
1691 OrigOp
.getStartLoc(),
1692 "memory operand is only for determining the size, " + RegName
+
1693 " will be used for the location"));
1696 FinalOp
.Mem
.Size
= OrigOp
.Mem
.Size
;
1697 FinalOp
.Mem
.SegReg
= OrigOp
.Mem
.SegReg
;
1698 FinalOp
.Mem
.BaseReg
= FinalReg
;
1702 // Produce warnings only if all the operands passed the adjustment - prevent
1703 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1704 for (auto &WarningMsg
: Warnings
) {
1705 Warning(WarningMsg
.first
, WarningMsg
.second
);
1708 // Remove old operands
1709 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
)
1710 OrigOperands
.pop_back();
1712 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1713 for (unsigned int i
= 0; i
< FinalOperands
.size(); ++i
)
1714 OrigOperands
.push_back(std::move(FinalOperands
[i
]));
1719 bool X86AsmParser::ParseOperand(OperandVector
&Operands
) {
1720 if (isParsingIntelSyntax())
1721 return ParseIntelOperand(Operands
);
1723 return ParseATTOperand(Operands
);
1726 bool X86AsmParser::CreateMemForMSInlineAsm(
1727 unsigned SegReg
, const MCExpr
*Disp
, unsigned BaseReg
, unsigned IndexReg
,
1728 unsigned Scale
, SMLoc Start
, SMLoc End
, unsigned Size
, StringRef Identifier
,
1729 const InlineAsmIdentifierInfo
&Info
, OperandVector
&Operands
) {
1730 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1731 // some other label reference.
1732 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Label
)) {
1733 // Insert an explicit size if the user didn't have one.
1735 Size
= getPointerWidth();
1736 InstInfo
->AsmRewrites
->emplace_back(AOK_SizeDirective
, Start
,
1739 // Create an absolute memory reference in order to match against
1740 // instructions taking a PC relative operand.
1741 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), Disp
, Start
,
1742 End
, Size
, Identifier
,
1746 // We either have a direct symbol reference, or an offset from a symbol. The
1747 // parser always puts the symbol on the LHS, so look there for size
1748 // calculation purposes.
1749 unsigned FrontendSize
= 0;
1750 void *Decl
= nullptr;
1751 bool IsGlobalLV
= false;
1752 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
1753 // Size is in terms of bits in this context.
1754 FrontendSize
= Info
.Var
.Type
* 8;
1755 Decl
= Info
.Var
.Decl
;
1756 IsGlobalLV
= Info
.Var
.IsGlobalLV
;
1758 // It is widely common for MS InlineAsm to use a global variable and one/two
1759 // registers in a mmory expression, and though unaccessible via rip/eip.
1760 if (IsGlobalLV
&& (BaseReg
|| IndexReg
)) {
1762 X86Operand::CreateMem(getPointerWidth(), Disp
, Start
, End
));
1765 // Otherwise, we set the base register to a non-zero value
1766 // if we don't know the actual value at this time. This is necessary to
1767 // get the matching correct in some cases.
1768 BaseReg
= BaseReg
? BaseReg
: 1;
1769 Operands
.push_back(X86Operand::CreateMem(
1770 getPointerWidth(), SegReg
, Disp
, BaseReg
, IndexReg
, Scale
, Start
, End
,
1772 /*DefaultBaseReg=*/X86::RIP
, Identifier
, Decl
, FrontendSize
));
1776 // Some binary bitwise operators have a named synonymous
1777 // Query a candidate string for being such a named operator
1778 // and if so - invoke the appropriate handler
1779 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name
,
1780 IntelExprStateMachine
&SM
,
1781 bool &ParseError
, SMLoc
&End
) {
1782 // A named operator should be either lower or upper case, but not a mix...
1783 // except in MASM, which uses full case-insensitivity.
1784 if (Name
.compare(Name
.lower()) && Name
.compare(Name
.upper()) &&
1785 !getParser().isParsingMasm())
1787 if (Name
.equals_insensitive("not")) {
1789 } else if (Name
.equals_insensitive("or")) {
1791 } else if (Name
.equals_insensitive("shl")) {
1793 } else if (Name
.equals_insensitive("shr")) {
1795 } else if (Name
.equals_insensitive("xor")) {
1797 } else if (Name
.equals_insensitive("and")) {
1799 } else if (Name
.equals_insensitive("mod")) {
1801 } else if (Name
.equals_insensitive("offset")) {
1802 SMLoc OffsetLoc
= getTok().getLoc();
1803 const MCExpr
*Val
= nullptr;
1805 InlineAsmIdentifierInfo Info
;
1806 ParseError
= ParseIntelOffsetOperator(Val
, ID
, Info
, End
);
1811 SM
.onOffset(Val
, OffsetLoc
, ID
, Info
, isParsingMSInlineAsm(), ErrMsg
);
1813 return Error(SMLoc::getFromPointer(Name
.data()), ErrMsg
);
1817 if (!Name
.equals_insensitive("offset"))
1818 End
= consumeToken();
1821 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name
,
1822 IntelExprStateMachine
&SM
,
1823 bool &ParseError
, SMLoc
&End
) {
1824 if (Name
.equals_insensitive("eq")) {
1826 } else if (Name
.equals_insensitive("ne")) {
1828 } else if (Name
.equals_insensitive("lt")) {
1830 } else if (Name
.equals_insensitive("le")) {
1832 } else if (Name
.equals_insensitive("gt")) {
1834 } else if (Name
.equals_insensitive("ge")) {
1839 End
= consumeToken();
1843 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine
&SM
, SMLoc
&End
) {
1844 MCAsmParser
&Parser
= getParser();
1847 AsmToken::TokenKind PrevTK
= AsmToken::Error
;
1850 // Get a fresh reference on each loop iteration in case the previous
1851 // iteration moved the token storage during UnLex().
1852 const AsmToken
&Tok
= Parser
.getTok();
1854 bool UpdateLocLex
= true;
1855 AsmToken::TokenKind TK
= getLexer().getKind();
1859 if ((Done
= SM
.isValidEndState()))
1861 return Error(Tok
.getLoc(), "unknown token in expression");
1862 case AsmToken::Error
:
1863 return Error(getLexer().getErrLoc(), getLexer().getErr());
1865 case AsmToken::EndOfStatement
:
1868 case AsmToken::Real
:
1869 // DotOperator: [ebx].0
1870 UpdateLocLex
= false;
1871 if (ParseIntelDotOperator(SM
, End
))
1875 if (!Parser
.isParsingMasm()) {
1876 if ((Done
= SM
.isValidEndState()))
1878 return Error(Tok
.getLoc(), "unknown token in expression");
1880 // MASM allows spaces around the dot operator (e.g., "var . x")
1882 UpdateLocLex
= false;
1883 if (ParseIntelDotOperator(SM
, End
))
1886 case AsmToken::Dollar
:
1887 if (!Parser
.isParsingMasm()) {
1888 if ((Done
= SM
.isValidEndState()))
1890 return Error(Tok
.getLoc(), "unknown token in expression");
1893 case AsmToken::String
: {
1894 if (Parser
.isParsingMasm()) {
1895 // MASM parsers handle strings in expressions as constants.
1896 SMLoc ValueLoc
= Tok
.getLoc();
1899 if (Parser
.parsePrimaryExpr(Val
, End
, nullptr))
1901 UpdateLocLex
= false;
1902 if (!Val
->evaluateAsAbsolute(Res
, getStreamer().getAssemblerPtr()))
1903 return Error(ValueLoc
, "expected absolute value");
1904 if (SM
.onInteger(Res
, ErrMsg
))
1905 return Error(ValueLoc
, ErrMsg
);
1911 case AsmToken::Identifier
: {
1912 SMLoc IdentLoc
= Tok
.getLoc();
1913 StringRef Identifier
= Tok
.getString();
1914 UpdateLocLex
= false;
1915 if (Parser
.isParsingMasm()) {
1916 size_t DotOffset
= Identifier
.find_first_of('.');
1917 if (DotOffset
!= StringRef::npos
) {
1919 StringRef LHS
= Identifier
.slice(0, DotOffset
);
1920 StringRef Dot
= Identifier
.slice(DotOffset
, DotOffset
+ 1);
1921 StringRef RHS
= Identifier
.slice(DotOffset
+ 1, StringRef::npos
);
1923 getLexer().UnLex(AsmToken(AsmToken::Identifier
, RHS
));
1925 getLexer().UnLex(AsmToken(AsmToken::Dot
, Dot
));
1927 getLexer().UnLex(AsmToken(AsmToken::Identifier
, LHS
));
1932 // (MASM only) <TYPE> PTR operator
1933 if (Parser
.isParsingMasm()) {
1934 const AsmToken
&NextTok
= getLexer().peekTok();
1935 if (NextTok
.is(AsmToken::Identifier
) &&
1936 NextTok
.getIdentifier().equals_insensitive("ptr")) {
1938 if (Parser
.lookUpType(Identifier
, Info
))
1939 return Error(Tok
.getLoc(), "unknown type");
1941 // Eat type and PTR.
1943 End
= consumeToken();
1947 // Register, or (MASM only) <register>.<field>
1949 if (Tok
.is(AsmToken::Identifier
)) {
1950 if (!ParseRegister(Reg
, IdentLoc
, End
, /*RestoreOnFailure=*/true)) {
1951 if (SM
.onRegister(Reg
, ErrMsg
))
1952 return Error(IdentLoc
, ErrMsg
);
1955 if (Parser
.isParsingMasm()) {
1956 const std::pair
<StringRef
, StringRef
> IDField
=
1957 Tok
.getString().split('.');
1958 const StringRef ID
= IDField
.first
, Field
= IDField
.second
;
1959 SMLoc IDEndLoc
= SMLoc::getFromPointer(ID
.data() + ID
.size());
1960 if (!Field
.empty() &&
1961 !MatchRegisterByName(Reg
, ID
, IdentLoc
, IDEndLoc
)) {
1962 if (SM
.onRegister(Reg
, ErrMsg
))
1963 return Error(IdentLoc
, ErrMsg
);
1966 SMLoc FieldStartLoc
= SMLoc::getFromPointer(Field
.data());
1967 if (Parser
.lookUpField(Field
, Info
))
1968 return Error(FieldStartLoc
, "unknown offset");
1969 else if (SM
.onPlus(ErrMsg
))
1970 return Error(getTok().getLoc(), ErrMsg
);
1971 else if (SM
.onInteger(Info
.Offset
, ErrMsg
))
1972 return Error(IdentLoc
, ErrMsg
);
1973 SM
.setTypeInfo(Info
.Type
);
1975 End
= consumeToken();
1980 // Operator synonymous ("not", "or" etc.)
1981 bool ParseError
= false;
1982 if (ParseIntelNamedOperator(Identifier
, SM
, ParseError
, End
)) {
1987 if (Parser
.isParsingMasm() &&
1988 ParseMasmNamedOperator(Identifier
, SM
, ParseError
, End
)) {
1993 // Symbol reference, when parsing assembly content
1994 InlineAsmIdentifierInfo Info
;
1995 AsmFieldInfo FieldInfo
;
1997 if (isParsingMSInlineAsm() || Parser
.isParsingMasm()) {
1998 // MS Dot Operator expression
1999 if (Identifier
.count('.') &&
2000 (PrevTK
== AsmToken::RBrac
|| PrevTK
== AsmToken::RParen
)) {
2001 if (ParseIntelDotOperator(SM
, End
))
2006 if (isParsingMSInlineAsm()) {
2007 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2008 if (unsigned OpKind
= IdentifyIntelInlineAsmOperator(Identifier
)) {
2009 if (int64_t Val
= ParseIntelInlineAsmOperator(OpKind
)) {
2010 if (SM
.onInteger(Val
, ErrMsg
))
2011 return Error(IdentLoc
, ErrMsg
);
2017 // MS InlineAsm identifier
2018 // Call parseIdentifier() to combine @ with the identifier behind it.
2019 if (TK
== AsmToken::At
&& Parser
.parseIdentifier(Identifier
))
2020 return Error(IdentLoc
, "expected identifier");
2021 if (ParseIntelInlineAsmIdentifier(Val
, Identifier
, Info
, false, End
))
2023 else if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, FieldInfo
.Type
,
2025 return Error(IdentLoc
, ErrMsg
);
2028 if (Parser
.isParsingMasm()) {
2029 if (unsigned OpKind
= IdentifyMasmOperator(Identifier
)) {
2031 if (ParseMasmOperator(OpKind
, Val
))
2033 if (SM
.onInteger(Val
, ErrMsg
))
2034 return Error(IdentLoc
, ErrMsg
);
2037 if (!getParser().lookUpType(Identifier
, FieldInfo
.Type
)) {
2038 // Field offset immediate; <TYPE>.<field specification>
2040 bool EndDot
= parseOptionalToken(AsmToken::Dot
);
2041 while (EndDot
|| (getTok().is(AsmToken::Identifier
) &&
2042 getTok().getString().startswith("."))) {
2043 getParser().parseIdentifier(Identifier
);
2045 Identifier
.consume_front(".");
2046 EndDot
= Identifier
.consume_back(".");
2047 if (getParser().lookUpField(FieldInfo
.Type
.Name
, Identifier
,
2050 SMLoc::getFromPointer(Identifier
.data() + Identifier
.size());
2051 return Error(IdentLoc
, "Unable to lookup field reference!",
2052 SMRange(IdentLoc
, IDEnd
));
2055 EndDot
= parseOptionalToken(AsmToken::Dot
);
2057 if (SM
.onInteger(FieldInfo
.Offset
, ErrMsg
))
2058 return Error(IdentLoc
, ErrMsg
);
2062 if (getParser().parsePrimaryExpr(Val
, End
, &FieldInfo
.Type
)) {
2063 return Error(Tok
.getLoc(), "Unexpected identifier!");
2064 } else if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, FieldInfo
.Type
,
2066 return Error(IdentLoc
, ErrMsg
);
2070 case AsmToken::Integer
: {
2071 // Look for 'b' or 'f' following an Integer as a directional label
2072 SMLoc Loc
= getTok().getLoc();
2073 int64_t IntVal
= getTok().getIntVal();
2074 End
= consumeToken();
2075 UpdateLocLex
= false;
2076 if (getLexer().getKind() == AsmToken::Identifier
) {
2077 StringRef IDVal
= getTok().getString();
2078 if (IDVal
== "f" || IDVal
== "b") {
2080 getContext().getDirectionalLocalSymbol(IntVal
, IDVal
== "b");
2081 MCSymbolRefExpr::VariantKind Variant
= MCSymbolRefExpr::VK_None
;
2083 MCSymbolRefExpr::create(Sym
, Variant
, getContext());
2084 if (IDVal
== "b" && Sym
->isUndefined())
2085 return Error(Loc
, "invalid reference to undefined symbol");
2086 StringRef Identifier
= Sym
->getName();
2087 InlineAsmIdentifierInfo Info
;
2089 if (SM
.onIdentifierExpr(Val
, Identifier
, Info
, Type
,
2090 isParsingMSInlineAsm(), ErrMsg
))
2091 return Error(Loc
, ErrMsg
);
2092 End
= consumeToken();
2094 if (SM
.onInteger(IntVal
, ErrMsg
))
2095 return Error(Loc
, ErrMsg
);
2098 if (SM
.onInteger(IntVal
, ErrMsg
))
2099 return Error(Loc
, ErrMsg
);
2103 case AsmToken::Plus
:
2104 if (SM
.onPlus(ErrMsg
))
2105 return Error(getTok().getLoc(), ErrMsg
);
2107 case AsmToken::Minus
:
2108 if (SM
.onMinus(ErrMsg
))
2109 return Error(getTok().getLoc(), ErrMsg
);
2111 case AsmToken::Tilde
: SM
.onNot(); break;
2112 case AsmToken::Star
: SM
.onStar(); break;
2113 case AsmToken::Slash
: SM
.onDivide(); break;
2114 case AsmToken::Percent
: SM
.onMod(); break;
2115 case AsmToken::Pipe
: SM
.onOr(); break;
2116 case AsmToken::Caret
: SM
.onXor(); break;
2117 case AsmToken::Amp
: SM
.onAnd(); break;
2118 case AsmToken::LessLess
:
2119 SM
.onLShift(); break;
2120 case AsmToken::GreaterGreater
:
2121 SM
.onRShift(); break;
2122 case AsmToken::LBrac
:
2124 return Error(Tok
.getLoc(), "unexpected bracket encountered");
2126 case AsmToken::RBrac
:
2128 return Error(Tok
.getLoc(), "unexpected bracket encountered");
2130 case AsmToken::LParen
: SM
.onLParen(); break;
2131 case AsmToken::RParen
: SM
.onRParen(); break;
2134 return Error(Tok
.getLoc(), "unknown token in expression");
2136 if (!Done
&& UpdateLocLex
)
2137 End
= consumeToken();
2144 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine
&SM
,
2145 SMLoc Start
, SMLoc End
) {
2147 unsigned ExprLen
= End
.getPointer() - Start
.getPointer();
2148 // Skip everything before a symbol displacement (if we have one)
2149 if (SM
.getSym() && !SM
.isOffsetOperator()) {
2150 StringRef SymName
= SM
.getSymName();
2151 if (unsigned Len
= SymName
.data() - Start
.getPointer())
2152 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, Start
, Len
);
2153 Loc
= SMLoc::getFromPointer(SymName
.data() + SymName
.size());
2154 ExprLen
= End
.getPointer() - (SymName
.data() + SymName
.size());
2155 // If we have only a symbol than there's no need for complex rewrite,
2156 // simply skip everything after it
2157 if (!(SM
.getBaseReg() || SM
.getIndexReg() || SM
.getImm())) {
2159 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, Loc
, ExprLen
);
2163 // Build an Intel Expression rewrite
2164 StringRef BaseRegStr
;
2165 StringRef IndexRegStr
;
2166 StringRef OffsetNameStr
;
2167 if (SM
.getBaseReg())
2168 BaseRegStr
= X86IntelInstPrinter::getRegisterName(SM
.getBaseReg());
2169 if (SM
.getIndexReg())
2170 IndexRegStr
= X86IntelInstPrinter::getRegisterName(SM
.getIndexReg());
2171 if (SM
.isOffsetOperator())
2172 OffsetNameStr
= SM
.getSymName();
2174 IntelExpr
Expr(BaseRegStr
, IndexRegStr
, SM
.getScale(), OffsetNameStr
,
2175 SM
.getImm(), SM
.isMemExpr());
2176 InstInfo
->AsmRewrites
->emplace_back(Loc
, ExprLen
, Expr
);
2179 // Inline assembly may use variable names with namespace alias qualifiers.
2180 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2181 const MCExpr
*&Val
, StringRef
&Identifier
, InlineAsmIdentifierInfo
&Info
,
2182 bool IsUnevaluatedOperand
, SMLoc
&End
, bool IsParsingOffsetOperator
) {
2183 MCAsmParser
&Parser
= getParser();
2184 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2187 StringRef
LineBuf(Identifier
.data());
2188 SemaCallback
->LookupInlineAsmIdentifier(LineBuf
, Info
, IsUnevaluatedOperand
);
2190 const AsmToken
&Tok
= Parser
.getTok();
2191 SMLoc Loc
= Tok
.getLoc();
2193 // Advance the token stream until the end of the current token is
2194 // after the end of what the frontend claimed.
2195 const char *EndPtr
= Tok
.getLoc().getPointer() + LineBuf
.size();
2197 End
= Tok
.getEndLoc();
2199 } while (End
.getPointer() < EndPtr
);
2200 Identifier
= LineBuf
;
2202 // The frontend should end parsing on an assembler token boundary, unless it
2204 assert((End
.getPointer() == EndPtr
||
2205 Info
.isKind(InlineAsmIdentifierInfo::IK_Invalid
)) &&
2206 "frontend claimed part of a token?");
2208 // If the identifier lookup was unsuccessful, assume that we are dealing with
2210 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Invalid
)) {
2211 StringRef InternalName
=
2212 SemaCallback
->LookupInlineAsmLabel(Identifier
, getSourceManager(),
2214 assert(InternalName
.size() && "We should have an internal name here.");
2215 // Push a rewrite for replacing the identifier name with the internal name,
2216 // unless we are parsing the operand of an offset operator
2217 if (!IsParsingOffsetOperator
)
2218 InstInfo
->AsmRewrites
->emplace_back(AOK_Label
, Loc
, Identifier
.size(),
2221 Identifier
= InternalName
;
2222 } else if (Info
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
))
2224 // Create the symbol reference.
2225 MCSymbol
*Sym
= getContext().getOrCreateSymbol(Identifier
);
2226 MCSymbolRefExpr::VariantKind Variant
= MCSymbolRefExpr::VK_None
;
2227 Val
= MCSymbolRefExpr::create(Sym
, Variant
, getParser().getContext());
2231 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2232 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start
, OperandVector
&Operands
) {
2233 MCAsmParser
&Parser
= getParser();
2234 const AsmToken
&Tok
= Parser
.getTok();
2235 // Eat "{" and mark the current place.
2236 const SMLoc consumedToken
= consumeToken();
2237 if (Tok
.isNot(AsmToken::Identifier
))
2238 return Error(Tok
.getLoc(), "Expected an identifier after {");
2239 if (Tok
.getIdentifier().startswith("r")){
2240 int rndMode
= StringSwitch
<int>(Tok
.getIdentifier())
2241 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT
)
2242 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF
)
2243 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF
)
2244 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO
)
2247 return Error(Tok
.getLoc(), "Invalid rounding mode.");
2248 Parser
.Lex(); // Eat "r*" of r*-sae
2249 if (!getLexer().is(AsmToken::Minus
))
2250 return Error(Tok
.getLoc(), "Expected - at this point");
2251 Parser
.Lex(); // Eat "-"
2252 Parser
.Lex(); // Eat the sae
2253 if (!getLexer().is(AsmToken::RCurly
))
2254 return Error(Tok
.getLoc(), "Expected } at this point");
2255 SMLoc End
= Tok
.getEndLoc();
2256 Parser
.Lex(); // Eat "}"
2257 const MCExpr
*RndModeOp
=
2258 MCConstantExpr::create(rndMode
, Parser
.getContext());
2259 Operands
.push_back(X86Operand::CreateImm(RndModeOp
, Start
, End
));
2262 if(Tok
.getIdentifier().equals("sae")){
2263 Parser
.Lex(); // Eat the sae
2264 if (!getLexer().is(AsmToken::RCurly
))
2265 return Error(Tok
.getLoc(), "Expected } at this point");
2266 Parser
.Lex(); // Eat "}"
2267 Operands
.push_back(X86Operand::CreateToken("{sae}", consumedToken
));
2270 return Error(Tok
.getLoc(), "unknown token in expression");
2273 /// Parse the '.' operator.
2274 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine
&SM
,
2276 const AsmToken
&Tok
= getTok();
2279 // Drop the optional '.'.
2280 StringRef DotDispStr
= Tok
.getString();
2281 if (DotDispStr
.startswith("."))
2282 DotDispStr
= DotDispStr
.drop_front(1);
2283 StringRef TrailingDot
;
2285 // .Imm gets lexed as a real.
2286 if (Tok
.is(AsmToken::Real
)) {
2288 DotDispStr
.getAsInteger(10, DotDisp
);
2289 Info
.Offset
= DotDisp
.getZExtValue();
2290 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2291 Tok
.is(AsmToken::Identifier
)) {
2292 if (DotDispStr
.endswith(".")) {
2293 TrailingDot
= DotDispStr
.substr(DotDispStr
.size() - 1);
2294 DotDispStr
= DotDispStr
.drop_back(1);
2296 const std::pair
<StringRef
, StringRef
> BaseMember
= DotDispStr
.split('.');
2297 const StringRef Base
= BaseMember
.first
, Member
= BaseMember
.second
;
2298 if (getParser().lookUpField(SM
.getType(), DotDispStr
, Info
) &&
2299 getParser().lookUpField(SM
.getSymName(), DotDispStr
, Info
) &&
2300 getParser().lookUpField(DotDispStr
, Info
) &&
2302 SemaCallback
->LookupInlineAsmField(Base
, Member
, Info
.Offset
)))
2303 return Error(Tok
.getLoc(), "Unable to lookup field reference!");
2305 return Error(Tok
.getLoc(), "Unexpected token type!");
2308 // Eat the DotExpression and update End
2309 End
= SMLoc::getFromPointer(DotDispStr
.data());
2310 const char *DotExprEndLoc
= DotDispStr
.data() + DotDispStr
.size();
2311 while (Tok
.getLoc().getPointer() < DotExprEndLoc
)
2313 if (!TrailingDot
.empty())
2314 getLexer().UnLex(AsmToken(AsmToken::Dot
, TrailingDot
));
2315 SM
.addImm(Info
.Offset
);
2316 SM
.setTypeInfo(Info
.Type
);
2320 /// Parse the 'offset' operator.
2321 /// This operator is used to specify the location of a given operand
2322 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr
*&Val
, StringRef
&ID
,
2323 InlineAsmIdentifierInfo
&Info
,
2325 // Eat offset, mark start of identifier.
2326 SMLoc Start
= Lex().getLoc();
2327 ID
= getTok().getString();
2328 if (!isParsingMSInlineAsm()) {
2329 if ((getTok().isNot(AsmToken::Identifier
) &&
2330 getTok().isNot(AsmToken::String
)) ||
2331 getParser().parsePrimaryExpr(Val
, End
, nullptr))
2332 return Error(Start
, "unexpected token!");
2333 } else if (ParseIntelInlineAsmIdentifier(Val
, ID
, Info
, false, End
, true)) {
2334 return Error(Start
, "unable to lookup expression");
2335 } else if (Info
.isKind(InlineAsmIdentifierInfo::IK_EnumVal
)) {
2336 return Error(Start
, "offset operator cannot yet handle constants");
2341 // Query a candidate string for being an Intel assembly operator
2342 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2343 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name
) {
2344 return StringSwitch
<unsigned>(Name
)
2345 .Cases("TYPE","type",IOK_TYPE
)
2346 .Cases("SIZE","size",IOK_SIZE
)
2347 .Cases("LENGTH","length",IOK_LENGTH
)
2348 .Default(IOK_INVALID
);
2351 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2352 /// returns the number of elements in an array. It returns the value 1 for
2353 /// non-array variables. The SIZE operator returns the size of a C or C++
2354 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2355 /// TYPE operator returns the size of a C or C++ type or variable. If the
2356 /// variable is an array, TYPE returns the size of a single element.
2357 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind
) {
2358 MCAsmParser
&Parser
= getParser();
2359 const AsmToken
&Tok
= Parser
.getTok();
2360 Parser
.Lex(); // Eat operator.
2362 const MCExpr
*Val
= nullptr;
2363 InlineAsmIdentifierInfo Info
;
2364 SMLoc Start
= Tok
.getLoc(), End
;
2365 StringRef Identifier
= Tok
.getString();
2366 if (ParseIntelInlineAsmIdentifier(Val
, Identifier
, Info
,
2367 /*IsUnevaluatedOperand=*/true, End
))
2370 if (!Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
2371 Error(Start
, "unable to lookup expression");
2377 default: llvm_unreachable("Unexpected operand kind!");
2378 case IOK_LENGTH
: CVal
= Info
.Var
.Length
; break;
2379 case IOK_SIZE
: CVal
= Info
.Var
.Size
; break;
2380 case IOK_TYPE
: CVal
= Info
.Var
.Type
; break;
2386 // Query a candidate string for being an Intel assembly operator
2387 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2388 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name
) {
2389 return StringSwitch
<unsigned>(Name
.lower())
2390 .Case("type", MOK_TYPE
)
2391 .Cases("size", "sizeof", MOK_SIZEOF
)
2392 .Cases("length", "lengthof", MOK_LENGTHOF
)
2393 .Default(MOK_INVALID
);
2396 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2397 /// returns the number of elements in an array. It returns the value 1 for
2398 /// non-array variables. The SIZEOF operator returns the size of a type or
2399 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2400 /// The TYPE operator returns the size of a variable. If the variable is an
2401 /// array, TYPE returns the size of a single element.
2402 bool X86AsmParser::ParseMasmOperator(unsigned OpKind
, int64_t &Val
) {
2403 MCAsmParser
&Parser
= getParser();
2404 SMLoc OpLoc
= Parser
.getTok().getLoc();
2405 Parser
.Lex(); // Eat operator.
2408 if (OpKind
== MOK_SIZEOF
|| OpKind
== MOK_TYPE
) {
2409 // Check for SIZEOF(<type>) and TYPE(<type>).
2410 bool InParens
= Parser
.getTok().is(AsmToken::LParen
);
2411 const AsmToken
&IDTok
= InParens
? getLexer().peekTok() : Parser
.getTok();
2413 if (IDTok
.is(AsmToken::Identifier
) &&
2414 !Parser
.lookUpType(IDTok
.getIdentifier(), Type
)) {
2419 parseToken(AsmToken::LParen
);
2420 parseToken(AsmToken::Identifier
);
2422 parseToken(AsmToken::RParen
);
2427 IntelExprStateMachine SM
;
2428 SMLoc End
, Start
= Parser
.getTok().getLoc();
2429 if (ParseIntelExpression(SM
, End
))
2434 llvm_unreachable("Unexpected operand kind!");
2439 Val
= SM
.getLength();
2442 Val
= SM
.getElementSize();
2447 return Error(OpLoc
, "expression has unknown type", SMRange(Start
, End
));
2453 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size
) {
2454 Size
= StringSwitch
<unsigned>(getTok().getString())
2455 .Cases("BYTE", "byte", 8)
2456 .Cases("WORD", "word", 16)
2457 .Cases("DWORD", "dword", 32)
2458 .Cases("FLOAT", "float", 32)
2459 .Cases("LONG", "long", 32)
2460 .Cases("FWORD", "fword", 48)
2461 .Cases("DOUBLE", "double", 64)
2462 .Cases("QWORD", "qword", 64)
2463 .Cases("MMWORD","mmword", 64)
2464 .Cases("XWORD", "xword", 80)
2465 .Cases("TBYTE", "tbyte", 80)
2466 .Cases("XMMWORD", "xmmword", 128)
2467 .Cases("YMMWORD", "ymmword", 256)
2468 .Cases("ZMMWORD", "zmmword", 512)
2471 const AsmToken
&Tok
= Lex(); // Eat operand size (e.g., byte, word).
2472 if (!(Tok
.getString().equals("PTR") || Tok
.getString().equals("ptr")))
2473 return Error(Tok
.getLoc(), "Expected 'PTR' or 'ptr' token!");
2479 bool X86AsmParser::ParseIntelOperand(OperandVector
&Operands
) {
2480 MCAsmParser
&Parser
= getParser();
2481 const AsmToken
&Tok
= Parser
.getTok();
2484 // Parse optional Size directive.
2486 if (ParseIntelMemoryOperandSize(Size
))
2488 bool PtrInOperand
= bool(Size
);
2490 Start
= Tok
.getLoc();
2492 // Rounding mode operand.
2493 if (getLexer().is(AsmToken::LCurly
))
2494 return ParseRoundingModeOp(Start
, Operands
);
2496 // Register operand.
2498 if (Tok
.is(AsmToken::Identifier
) && !ParseRegister(RegNo
, Start
, End
)) {
2499 if (RegNo
== X86::RIP
)
2500 return Error(Start
, "rip can only be used as a base register");
2501 // A Register followed by ':' is considered a segment override
2502 if (Tok
.isNot(AsmToken::Colon
)) {
2504 return Error(Start
, "expected memory operand after 'ptr', "
2505 "found register operand instead");
2506 Operands
.push_back(X86Operand::CreateReg(RegNo
, Start
, End
));
2509 // An alleged segment override. check if we have a valid segment register
2510 if (!X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(RegNo
))
2511 return Error(Start
, "invalid segment register");
2512 // Eat ':' and update Start location
2513 Start
= Lex().getLoc();
2516 // Immediates and Memory
2517 IntelExprStateMachine SM
;
2518 if (ParseIntelExpression(SM
, End
))
2521 if (isParsingMSInlineAsm())
2522 RewriteIntelExpression(SM
, Start
, Tok
.getLoc());
2524 int64_t Imm
= SM
.getImm();
2525 const MCExpr
*Disp
= SM
.getSym();
2526 const MCExpr
*ImmDisp
= MCConstantExpr::create(Imm
, getContext());
2528 Disp
= MCBinaryExpr::createAdd(Disp
, ImmDisp
, getContext());
2532 // RegNo != 0 specifies a valid segment register,
2533 // and we are parsing a segment override
2534 if (!SM
.isMemExpr() && !RegNo
) {
2535 if (isParsingMSInlineAsm() && SM
.isOffsetOperator()) {
2536 const InlineAsmIdentifierInfo
&Info
= SM
.getIdentifierInfo();
2537 if (Info
.isKind(InlineAsmIdentifierInfo::IK_Var
)) {
2538 // Disp includes the address of a variable; make sure this is recorded
2539 // for later handling.
2540 Operands
.push_back(X86Operand::CreateImm(Disp
, Start
, End
,
2541 SM
.getSymName(), Info
.Var
.Decl
,
2542 Info
.Var
.IsGlobalLV
));
2547 Operands
.push_back(X86Operand::CreateImm(Disp
, Start
, End
));
2552 unsigned BaseReg
= SM
.getBaseReg();
2553 unsigned IndexReg
= SM
.getIndexReg();
2554 unsigned Scale
= SM
.getScale();
2556 Size
= SM
.getElementSize() << 3;
2558 if (Scale
== 0 && BaseReg
!= X86::ESP
&& BaseReg
!= X86::RSP
&&
2559 (IndexReg
== X86::ESP
|| IndexReg
== X86::RSP
))
2560 std::swap(BaseReg
, IndexReg
);
2562 // If BaseReg is a vector register and IndexReg is not, swap them unless
2563 // Scale was specified in which case it would be an error.
2565 !(X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(IndexReg
) ||
2566 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(IndexReg
) ||
2567 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(IndexReg
)) &&
2568 (X86MCRegisterClasses
[X86::VR128XRegClassID
].contains(BaseReg
) ||
2569 X86MCRegisterClasses
[X86::VR256XRegClassID
].contains(BaseReg
) ||
2570 X86MCRegisterClasses
[X86::VR512RegClassID
].contains(BaseReg
)))
2571 std::swap(BaseReg
, IndexReg
);
2574 X86MCRegisterClasses
[X86::GR16RegClassID
].contains(IndexReg
))
2575 return Error(Start
, "16-bit addresses cannot have a scale");
2577 // If there was no explicit scale specified, change it to 1.
2581 // If this is a 16-bit addressing mode with the base and index in the wrong
2582 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2583 // shared with att syntax where order matters.
2584 if ((BaseReg
== X86::SI
|| BaseReg
== X86::DI
) &&
2585 (IndexReg
== X86::BX
|| IndexReg
== X86::BP
))
2586 std::swap(BaseReg
, IndexReg
);
2588 if ((BaseReg
|| IndexReg
) &&
2589 CheckBaseRegAndIndexRegAndScale(BaseReg
, IndexReg
, Scale
, is64BitMode(),
2591 return Error(Start
, ErrMsg
);
2592 if (isParsingMSInlineAsm())
2593 return CreateMemForMSInlineAsm(RegNo
, Disp
, BaseReg
, IndexReg
, Scale
, Start
,
2594 End
, Size
, SM
.getSymName(),
2595 SM
.getIdentifierInfo(), Operands
);
2597 // When parsing x64 MS-style assembly, all non-absolute references to a named
2598 // variable default to RIP-relative.
2599 if (Parser
.isParsingMasm() && is64BitMode() && SM
.getElementSize() > 0) {
2600 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo
, Disp
,
2601 BaseReg
, IndexReg
, Scale
, Start
,
2603 /*DefaultBaseReg=*/X86::RIP
));
2607 if ((BaseReg
|| IndexReg
|| RegNo
))
2608 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo
, Disp
,
2609 BaseReg
, IndexReg
, Scale
, Start
,
2613 X86Operand::CreateMem(getPointerWidth(), Disp
, Start
, End
, Size
));
2617 bool X86AsmParser::ParseATTOperand(OperandVector
&Operands
) {
2618 MCAsmParser
&Parser
= getParser();
2619 switch (getLexer().getKind()) {
2620 case AsmToken::Dollar
: {
2621 // $42 or $ID -> immediate.
2622 SMLoc Start
= Parser
.getTok().getLoc(), End
;
2625 // This is an immediate, so we should not parse a register. Do a precheck
2626 // for '%' to supercede intra-register parse errors.
2627 SMLoc L
= Parser
.getTok().getLoc();
2628 if (check(getLexer().is(AsmToken::Percent
), L
,
2629 "expected immediate expression") ||
2630 getParser().parseExpression(Val
, End
) ||
2631 check(isa
<X86MCExpr
>(Val
), L
, "expected immediate expression"))
2633 Operands
.push_back(X86Operand::CreateImm(Val
, Start
, End
));
2636 case AsmToken::LCurly
: {
2637 SMLoc Start
= Parser
.getTok().getLoc();
2638 return ParseRoundingModeOp(Start
, Operands
);
2641 // This a memory operand or a register. We have some parsing complications
2642 // as a '(' may be part of an immediate expression or the addressing mode
2643 // block. This is complicated by the fact that an assembler-level variable
2644 // may refer either to a register or an immediate expression.
2646 SMLoc Loc
= Parser
.getTok().getLoc(), EndLoc
;
2647 const MCExpr
*Expr
= nullptr;
2649 if (getLexer().isNot(AsmToken::LParen
)) {
2650 // No '(' so this is either a displacement expression or a register.
2651 if (Parser
.parseExpression(Expr
, EndLoc
))
2653 if (auto *RE
= dyn_cast
<X86MCExpr
>(Expr
)) {
2654 // Segment Register. Reset Expr and copy value to register.
2656 Reg
= RE
->getRegNo();
2658 // Sanity check register.
2659 if (Reg
== X86::EIZ
|| Reg
== X86::RIZ
)
2661 Loc
, "%eiz and %riz can only be used as index registers",
2662 SMRange(Loc
, EndLoc
));
2663 if (Reg
== X86::RIP
)
2664 return Error(Loc
, "%rip can only be used as a base register",
2665 SMRange(Loc
, EndLoc
));
2666 // Return register that are not segment prefixes immediately.
2667 if (!Parser
.parseOptionalToken(AsmToken::Colon
)) {
2668 Operands
.push_back(X86Operand::CreateReg(Reg
, Loc
, EndLoc
));
2671 if (!X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(Reg
))
2672 return Error(Loc
, "invalid segment register");
2673 // Accept a '*' absolute memory reference after the segment. Place it
2674 // before the full memory operand.
2675 if (getLexer().is(AsmToken::Star
))
2676 Operands
.push_back(X86Operand::CreateToken("*", consumeToken()));
2679 // This is a Memory operand.
2680 return ParseMemOperand(Reg
, Expr
, Loc
, EndLoc
, Operands
);
2685 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2686 // otherwise the EFLAGS Condition Code enumerator.
2687 X86::CondCode
X86AsmParser::ParseConditionCode(StringRef CC
) {
2688 return StringSwitch
<X86::CondCode
>(CC
)
2689 .Case("o", X86::COND_O
) // Overflow
2690 .Case("no", X86::COND_NO
) // No Overflow
2691 .Cases("b", "nae", X86::COND_B
) // Below/Neither Above nor Equal
2692 .Cases("ae", "nb", X86::COND_AE
) // Above or Equal/Not Below
2693 .Cases("e", "z", X86::COND_E
) // Equal/Zero
2694 .Cases("ne", "nz", X86::COND_NE
) // Not Equal/Not Zero
2695 .Cases("be", "na", X86::COND_BE
) // Below or Equal/Not Above
2696 .Cases("a", "nbe", X86::COND_A
) // Above/Neither Below nor Equal
2697 .Case("s", X86::COND_S
) // Sign
2698 .Case("ns", X86::COND_NS
) // No Sign
2699 .Cases("p", "pe", X86::COND_P
) // Parity/Parity Even
2700 .Cases("np", "po", X86::COND_NP
) // No Parity/Parity Odd
2701 .Cases("l", "nge", X86::COND_L
) // Less/Neither Greater nor Equal
2702 .Cases("ge", "nl", X86::COND_GE
) // Greater or Equal/Not Less
2703 .Cases("le", "ng", X86::COND_LE
) // Less or Equal/Not Greater
2704 .Cases("g", "nle", X86::COND_G
) // Greater/Neither Less nor Equal
2705 .Default(X86::COND_INVALID
);
2708 // true on failure, false otherwise
2709 // If no {z} mark was found - Parser doesn't advance
2710 bool X86AsmParser::ParseZ(std::unique_ptr
<X86Operand
> &Z
,
2711 const SMLoc
&StartLoc
) {
2712 MCAsmParser
&Parser
= getParser();
2713 // Assuming we are just pass the '{' mark, quering the next token
2714 // Searched for {z}, but none was found. Return false, as no parsing error was
2716 if (!(getLexer().is(AsmToken::Identifier
) &&
2717 (getLexer().getTok().getIdentifier() == "z")))
2719 Parser
.Lex(); // Eat z
2720 // Query and eat the '}' mark
2721 if (!getLexer().is(AsmToken::RCurly
))
2722 return Error(getLexer().getLoc(), "Expected } at this point");
2723 Parser
.Lex(); // Eat '}'
2724 // Assign Z with the {z} mark opernad
2725 Z
= X86Operand::CreateToken("{z}", StartLoc
);
2729 // true on failure, false otherwise
2730 bool X86AsmParser::HandleAVX512Operand(OperandVector
&Operands
) {
2731 MCAsmParser
&Parser
= getParser();
2732 if (getLexer().is(AsmToken::LCurly
)) {
2733 // Eat "{" and mark the current place.
2734 const SMLoc consumedToken
= consumeToken();
2735 // Distinguish {1to<NUM>} from {%k<NUM>}.
2736 if(getLexer().is(AsmToken::Integer
)) {
2737 // Parse memory broadcasting ({1to<NUM>}).
2738 if (getLexer().getTok().getIntVal() != 1)
2739 return TokError("Expected 1to<NUM> at this point");
2740 StringRef Prefix
= getLexer().getTok().getString();
2741 Parser
.Lex(); // Eat first token of 1to8
2742 if (!getLexer().is(AsmToken::Identifier
))
2743 return TokError("Expected 1to<NUM> at this point");
2744 // Recognize only reasonable suffixes.
2745 SmallVector
<char, 5> BroadcastVector
;
2746 StringRef BroadcastString
= (Prefix
+ getLexer().getTok().getIdentifier())
2747 .toStringRef(BroadcastVector
);
2748 if (!BroadcastString
.startswith("1to"))
2749 return TokError("Expected 1to<NUM> at this point");
2750 const char *BroadcastPrimitive
=
2751 StringSwitch
<const char *>(BroadcastString
)
2752 .Case("1to2", "{1to2}")
2753 .Case("1to4", "{1to4}")
2754 .Case("1to8", "{1to8}")
2755 .Case("1to16", "{1to16}")
2756 .Case("1to32", "{1to32}")
2758 if (!BroadcastPrimitive
)
2759 return TokError("Invalid memory broadcast primitive.");
2760 Parser
.Lex(); // Eat trailing token of 1toN
2761 if (!getLexer().is(AsmToken::RCurly
))
2762 return TokError("Expected } at this point");
2763 Parser
.Lex(); // Eat "}"
2764 Operands
.push_back(X86Operand::CreateToken(BroadcastPrimitive
,
2766 // No AVX512 specific primitives can pass
2767 // after memory broadcasting, so return.
2770 // Parse either {k}{z}, {z}{k}, {k} or {z}
2771 // last one have no meaning, but GCC accepts it
2772 // Currently, we're just pass a '{' mark
2773 std::unique_ptr
<X86Operand
> Z
;
2774 if (ParseZ(Z
, consumedToken
))
2776 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2778 // Query for the need of further parsing for a {%k<NUM>} mark
2779 if (!Z
|| getLexer().is(AsmToken::LCurly
)) {
2780 SMLoc StartLoc
= Z
? consumeToken() : consumedToken
;
2781 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2785 if (!ParseRegister(RegNo
, RegLoc
, StartLoc
) &&
2786 X86MCRegisterClasses
[X86::VK1RegClassID
].contains(RegNo
)) {
2787 if (RegNo
== X86::K0
)
2788 return Error(RegLoc
, "Register k0 can't be used as write mask");
2789 if (!getLexer().is(AsmToken::RCurly
))
2790 return Error(getLexer().getLoc(), "Expected } at this point");
2791 Operands
.push_back(X86Operand::CreateToken("{", StartLoc
));
2793 X86Operand::CreateReg(RegNo
, StartLoc
, StartLoc
));
2794 Operands
.push_back(X86Operand::CreateToken("}", consumeToken()));
2796 return Error(getLexer().getLoc(),
2797 "Expected an op-mask register at this point");
2798 // {%k<NUM>} mark is found, inquire for {z}
2799 if (getLexer().is(AsmToken::LCurly
) && !Z
) {
2800 // Have we've found a parsing error, or found no (expected) {z} mark
2801 // - report an error
2802 if (ParseZ(Z
, consumeToken()) || !Z
)
2803 return Error(getLexer().getLoc(),
2804 "Expected a {z} mark at this point");
2807 // '{z}' on its own is meaningless, hence should be ignored.
2808 // on the contrary - have it been accompanied by a K register,
2811 Operands
.push_back(std::move(Z
));
2818 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2819 /// has already been parsed if present. disp may be provided as well.
2820 bool X86AsmParser::ParseMemOperand(unsigned SegReg
, const MCExpr
*Disp
,
2821 SMLoc StartLoc
, SMLoc EndLoc
,
2822 OperandVector
&Operands
) {
2823 MCAsmParser
&Parser
= getParser();
2825 // Based on the initial passed values, we may be in any of these cases, we are
2826 // in one of these cases (with current position (*)):
2828 // 1. seg : * disp (base-index-scale-expr)
2829 // 2. seg : *(disp) (base-index-scale-expr)
2830 // 3. seg : *(base-index-scale-expr)
2831 // 4. disp *(base-index-scale-expr)
2832 // 5. *(disp) (base-index-scale-expr)
2833 // 6. *(base-index-scale-expr)
2837 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2838 // checking if the first object after the parenthesis is a register (or an
2839 // identifier referring to a register) and parse the displacement or default
2840 // to 0 as appropriate.
2841 auto isAtMemOperand
= [this]() {
2842 if (this->getLexer().isNot(AsmToken::LParen
))
2846 auto TokCount
= this->getLexer().peekTokens(Buf
, true);
2849 switch (Buf
[0].getKind()) {
2850 case AsmToken::Percent
:
2851 case AsmToken::Comma
:
2853 // These lower cases are doing a peekIdentifier.
2855 case AsmToken::Dollar
:
2856 if ((TokCount
> 1) &&
2857 (Buf
[1].is(AsmToken::Identifier
) || Buf
[1].is(AsmToken::String
)) &&
2858 (Buf
[0].getLoc().getPointer() + 1 == Buf
[1].getLoc().getPointer()))
2859 Id
= StringRef(Buf
[0].getLoc().getPointer(),
2860 Buf
[1].getIdentifier().size() + 1);
2862 case AsmToken::Identifier
:
2863 case AsmToken::String
:
2864 Id
= Buf
[0].getIdentifier();
2869 // We have an ID. Check if it is bound to a register.
2871 MCSymbol
*Sym
= this->getContext().getOrCreateSymbol(Id
);
2872 if (Sym
->isVariable()) {
2873 auto V
= Sym
->getVariableValue(/*SetUsed*/ false);
2874 return isa
<X86MCExpr
>(V
);
2881 // Parse immediate if we're not at a mem operand yet.
2882 if (!isAtMemOperand()) {
2883 if (Parser
.parseTokenLoc(Loc
) || Parser
.parseExpression(Disp
, EndLoc
))
2885 assert(!isa
<X86MCExpr
>(Disp
) && "Expected non-register here.");
2887 // Disp is implicitly zero if we haven't parsed it yet.
2888 Disp
= MCConstantExpr::create(0, Parser
.getContext());
2892 // We are now either at the end of the operand or at the '(' at the start of a
2893 // base-index-scale-expr.
2895 if (!parseOptionalToken(AsmToken::LParen
)) {
2898 X86Operand::CreateMem(getPointerWidth(), Disp
, StartLoc
, EndLoc
));
2900 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg
, Disp
,
2901 0, 0, 1, StartLoc
, EndLoc
));
2905 // If we reached here, then eat the '(' and Process
2906 // the rest of the memory operand.
2907 unsigned BaseReg
= 0, IndexReg
= 0, Scale
= 1;
2908 SMLoc BaseLoc
= getLexer().getLoc();
2912 // Parse BaseReg if one is provided.
2913 if (getLexer().isNot(AsmToken::Comma
) && getLexer().isNot(AsmToken::RParen
)) {
2914 if (Parser
.parseExpression(E
, EndLoc
) ||
2915 check(!isa
<X86MCExpr
>(E
), BaseLoc
, "expected register here"))
2918 // Sanity check register.
2919 BaseReg
= cast
<X86MCExpr
>(E
)->getRegNo();
2920 if (BaseReg
== X86::EIZ
|| BaseReg
== X86::RIZ
)
2921 return Error(BaseLoc
, "eiz and riz can only be used as index registers",
2922 SMRange(BaseLoc
, EndLoc
));
2925 if (parseOptionalToken(AsmToken::Comma
)) {
2926 // Following the comma we should have either an index register, or a scale
2927 // value. We don't support the later form, but we want to parse it
2930 // Even though it would be completely consistent to support syntax like
2931 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2932 if (getLexer().isNot(AsmToken::RParen
)) {
2933 if (Parser
.parseTokenLoc(Loc
) || Parser
.parseExpression(E
, EndLoc
))
2936 if (!isa
<X86MCExpr
>(E
)) {
2937 // We've parsed an unexpected Scale Value instead of an index
2938 // register. Interpret it as an absolute.
2940 if (!E
->evaluateAsAbsolute(ScaleVal
, getStreamer().getAssemblerPtr()))
2941 return Error(Loc
, "expected absolute expression");
2943 Warning(Loc
, "scale factor without index register is ignored");
2945 } else { // IndexReg Found.
2946 IndexReg
= cast
<X86MCExpr
>(E
)->getRegNo();
2948 if (BaseReg
== X86::RIP
)
2950 "%rip as base register can not have an index register");
2951 if (IndexReg
== X86::RIP
)
2952 return Error(Loc
, "%rip is not allowed as an index register");
2954 if (parseOptionalToken(AsmToken::Comma
)) {
2955 // Parse the scale amount:
2956 // ::= ',' [scale-expression]
2958 // A scale amount without an index is ignored.
2959 if (getLexer().isNot(AsmToken::RParen
)) {
2961 if (Parser
.parseTokenLoc(Loc
) ||
2962 Parser
.parseAbsoluteExpression(ScaleVal
))
2963 return Error(Loc
, "expected scale expression");
2964 Scale
= (unsigned)ScaleVal
;
2965 // Validate the scale amount.
2966 if (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(BaseReg
) &&
2968 return Error(Loc
, "scale factor in 16-bit address must be 1");
2969 if (checkScale(Scale
, ErrMsg
))
2970 return Error(Loc
, ErrMsg
);
2977 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2978 if (parseToken(AsmToken::RParen
, "unexpected token in memory operand"))
2981 // This is to support otherwise illegal operand (%dx) found in various
2982 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
2983 // be supported. Mark such DX variants separately fix only in special cases.
2984 if (BaseReg
== X86::DX
&& IndexReg
== 0 && Scale
== 1 && SegReg
== 0 &&
2985 isa
<MCConstantExpr
>(Disp
) &&
2986 cast
<MCConstantExpr
>(Disp
)->getValue() == 0) {
2987 Operands
.push_back(X86Operand::CreateDXReg(BaseLoc
, BaseLoc
));
2991 if (CheckBaseRegAndIndexRegAndScale(BaseReg
, IndexReg
, Scale
, is64BitMode(),
2993 return Error(BaseLoc
, ErrMsg
);
2995 if (SegReg
|| BaseReg
|| IndexReg
)
2996 Operands
.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg
, Disp
,
2997 BaseReg
, IndexReg
, Scale
, StartLoc
,
3001 X86Operand::CreateMem(getPointerWidth(), Disp
, StartLoc
, EndLoc
));
3005 // Parse either a standard primary expression or a register.
3006 bool X86AsmParser::parsePrimaryExpr(const MCExpr
*&Res
, SMLoc
&EndLoc
) {
3007 MCAsmParser
&Parser
= getParser();
3008 // See if this is a register first.
3009 if (getTok().is(AsmToken::Percent
) ||
3010 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier
) &&
3011 MatchRegisterName(Parser
.getTok().getString()))) {
3012 SMLoc StartLoc
= Parser
.getTok().getLoc();
3014 if (ParseRegister(RegNo
, StartLoc
, EndLoc
))
3016 Res
= X86MCExpr::create(RegNo
, Parser
.getContext());
3019 return Parser
.parsePrimaryExpr(Res
, EndLoc
, nullptr);
3022 bool X86AsmParser::ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
3023 SMLoc NameLoc
, OperandVector
&Operands
) {
3024 MCAsmParser
&Parser
= getParser();
3027 // Reset the forced VEX encoding.
3028 ForcedVEXEncoding
= VEXEncoding_Default
;
3029 ForcedDispEncoding
= DispEncoding_Default
;
3031 // Parse pseudo prefixes.
3034 if (getLexer().isNot(AsmToken::Identifier
))
3035 return Error(Parser
.getTok().getLoc(), "Unexpected token after '{'");
3036 std::string Prefix
= Parser
.getTok().getString().lower();
3037 Parser
.Lex(); // Eat identifier.
3038 if (getLexer().isNot(AsmToken::RCurly
))
3039 return Error(Parser
.getTok().getLoc(), "Expected '}'");
3040 Parser
.Lex(); // Eat curly.
3042 if (Prefix
== "vex")
3043 ForcedVEXEncoding
= VEXEncoding_VEX
;
3044 else if (Prefix
== "vex2")
3045 ForcedVEXEncoding
= VEXEncoding_VEX2
;
3046 else if (Prefix
== "vex3")
3047 ForcedVEXEncoding
= VEXEncoding_VEX3
;
3048 else if (Prefix
== "evex")
3049 ForcedVEXEncoding
= VEXEncoding_EVEX
;
3050 else if (Prefix
== "disp8")
3051 ForcedDispEncoding
= DispEncoding_Disp8
;
3052 else if (Prefix
== "disp32")
3053 ForcedDispEncoding
= DispEncoding_Disp32
;
3055 return Error(NameLoc
, "unknown prefix");
3057 NameLoc
= Parser
.getTok().getLoc();
3058 if (getLexer().is(AsmToken::LCurly
)) {
3062 if (getLexer().isNot(AsmToken::Identifier
))
3063 return Error(Parser
.getTok().getLoc(), "Expected identifier");
3064 // FIXME: The mnemonic won't match correctly if its not in lower case.
3065 Name
= Parser
.getTok().getString();
3070 // Parse MASM style pseudo prefixes.
3071 if (isParsingMSInlineAsm()) {
3072 if (Name
.equals_insensitive("vex"))
3073 ForcedVEXEncoding
= VEXEncoding_VEX
;
3074 else if (Name
.equals_insensitive("vex2"))
3075 ForcedVEXEncoding
= VEXEncoding_VEX2
;
3076 else if (Name
.equals_insensitive("vex3"))
3077 ForcedVEXEncoding
= VEXEncoding_VEX3
;
3078 else if (Name
.equals_insensitive("evex"))
3079 ForcedVEXEncoding
= VEXEncoding_EVEX
;
3081 if (ForcedVEXEncoding
!= VEXEncoding_Default
) {
3082 if (getLexer().isNot(AsmToken::Identifier
))
3083 return Error(Parser
.getTok().getLoc(), "Expected identifier");
3084 // FIXME: The mnemonic won't match correctly if its not in lower case.
3085 Name
= Parser
.getTok().getString();
3086 NameLoc
= Parser
.getTok().getLoc();
3093 // Support the suffix syntax for overriding displacement size as well.
3094 if (Name
.consume_back(".d32")) {
3095 ForcedDispEncoding
= DispEncoding_Disp32
;
3096 } else if (Name
.consume_back(".d8")) {
3097 ForcedDispEncoding
= DispEncoding_Disp8
;
3100 StringRef PatchedName
= Name
;
3102 // Hack to skip "short" following Jcc.
3103 if (isParsingIntelSyntax() &&
3104 (PatchedName
== "jmp" || PatchedName
== "jc" || PatchedName
== "jnc" ||
3105 PatchedName
== "jcxz" || PatchedName
== "jecxz" ||
3106 (PatchedName
.startswith("j") &&
3107 ParseConditionCode(PatchedName
.substr(1)) != X86::COND_INVALID
))) {
3108 StringRef NextTok
= Parser
.getTok().getString();
3109 if (Parser
.isParsingMasm() ? NextTok
.equals_insensitive("short")
3110 : NextTok
== "short") {
3112 NameLoc
.getFromPointer(NameLoc
.getPointer() + Name
.size());
3113 // Eat the short keyword.
3115 // MS and GAS ignore the short keyword; they both determine the jmp type
3116 // based on the distance of the label. (NASM does emit different code with
3117 // and without "short," though.)
3118 InstInfo
->AsmRewrites
->emplace_back(AOK_Skip
, NameEndLoc
,
3119 NextTok
.size() + 1);
3123 // FIXME: Hack to recognize setneb as setne.
3124 if (PatchedName
.startswith("set") && PatchedName
.endswith("b") &&
3125 PatchedName
!= "setb" && PatchedName
!= "setnb")
3126 PatchedName
= PatchedName
.substr(0, Name
.size()-1);
3128 unsigned ComparisonPredicate
= ~0U;
3130 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3131 if ((PatchedName
.startswith("cmp") || PatchedName
.startswith("vcmp")) &&
3132 (PatchedName
.endswith("ss") || PatchedName
.endswith("sd") ||
3133 PatchedName
.endswith("sh") || PatchedName
.endswith("ph") ||
3134 PatchedName
.endswith("ps") || PatchedName
.endswith("pd"))) {
3135 bool IsVCMP
= PatchedName
[0] == 'v';
3136 unsigned CCIdx
= IsVCMP
? 4 : 3;
3137 unsigned CC
= StringSwitch
<unsigned>(
3138 PatchedName
.slice(CCIdx
, PatchedName
.size() - 2))
3140 .Case("eq_oq", 0x00)
3142 .Case("lt_os", 0x01)
3144 .Case("le_os", 0x02)
3145 .Case("unord", 0x03)
3146 .Case("unord_q", 0x03)
3148 .Case("neq_uq", 0x04)
3150 .Case("nlt_us", 0x05)
3152 .Case("nle_us", 0x06)
3154 .Case("ord_q", 0x07)
3155 /* AVX only from here */
3156 .Case("eq_uq", 0x08)
3158 .Case("nge_us", 0x09)
3160 .Case("ngt_us", 0x0A)
3161 .Case("false", 0x0B)
3162 .Case("false_oq", 0x0B)
3163 .Case("neq_oq", 0x0C)
3165 .Case("ge_os", 0x0D)
3167 .Case("gt_os", 0x0E)
3169 .Case("true_uq", 0x0F)
3170 .Case("eq_os", 0x10)
3171 .Case("lt_oq", 0x11)
3172 .Case("le_oq", 0x12)
3173 .Case("unord_s", 0x13)
3174 .Case("neq_us", 0x14)
3175 .Case("nlt_uq", 0x15)
3176 .Case("nle_uq", 0x16)
3177 .Case("ord_s", 0x17)
3178 .Case("eq_us", 0x18)
3179 .Case("nge_uq", 0x19)
3180 .Case("ngt_uq", 0x1A)
3181 .Case("false_os", 0x1B)
3182 .Case("neq_os", 0x1C)
3183 .Case("ge_oq", 0x1D)
3184 .Case("gt_oq", 0x1E)
3185 .Case("true_us", 0x1F)
3187 if (CC
!= ~0U && (IsVCMP
|| CC
< 8) &&
3188 (IsVCMP
|| PatchedName
.back() != 'h')) {
3189 if (PatchedName
.endswith("ss"))
3190 PatchedName
= IsVCMP
? "vcmpss" : "cmpss";
3191 else if (PatchedName
.endswith("sd"))
3192 PatchedName
= IsVCMP
? "vcmpsd" : "cmpsd";
3193 else if (PatchedName
.endswith("ps"))
3194 PatchedName
= IsVCMP
? "vcmpps" : "cmpps";
3195 else if (PatchedName
.endswith("pd"))
3196 PatchedName
= IsVCMP
? "vcmppd" : "cmppd";
3197 else if (PatchedName
.endswith("sh"))
3198 PatchedName
= "vcmpsh";
3199 else if (PatchedName
.endswith("ph"))
3200 PatchedName
= "vcmpph";
3202 llvm_unreachable("Unexpected suffix!");
3204 ComparisonPredicate
= CC
;
3208 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3209 if (PatchedName
.startswith("vpcmp") &&
3210 (PatchedName
.back() == 'b' || PatchedName
.back() == 'w' ||
3211 PatchedName
.back() == 'd' || PatchedName
.back() == 'q')) {
3212 unsigned SuffixSize
= PatchedName
.drop_back().back() == 'u' ? 2 : 1;
3213 unsigned CC
= StringSwitch
<unsigned>(
3214 PatchedName
.slice(5, PatchedName
.size() - SuffixSize
))
3215 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3218 //.Case("false", 0x3) // Not a documented alias.
3222 //.Case("true", 0x7) // Not a documented alias.
3224 if (CC
!= ~0U && (CC
!= 0 || SuffixSize
== 2)) {
3225 switch (PatchedName
.back()) {
3226 default: llvm_unreachable("Unexpected character!");
3227 case 'b': PatchedName
= SuffixSize
== 2 ? "vpcmpub" : "vpcmpb"; break;
3228 case 'w': PatchedName
= SuffixSize
== 2 ? "vpcmpuw" : "vpcmpw"; break;
3229 case 'd': PatchedName
= SuffixSize
== 2 ? "vpcmpud" : "vpcmpd"; break;
3230 case 'q': PatchedName
= SuffixSize
== 2 ? "vpcmpuq" : "vpcmpq"; break;
3232 // Set up the immediate to push into the operands later.
3233 ComparisonPredicate
= CC
;
3237 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3238 if (PatchedName
.startswith("vpcom") &&
3239 (PatchedName
.back() == 'b' || PatchedName
.back() == 'w' ||
3240 PatchedName
.back() == 'd' || PatchedName
.back() == 'q')) {
3241 unsigned SuffixSize
= PatchedName
.drop_back().back() == 'u' ? 2 : 1;
3242 unsigned CC
= StringSwitch
<unsigned>(
3243 PatchedName
.slice(5, PatchedName
.size() - SuffixSize
))
3254 switch (PatchedName
.back()) {
3255 default: llvm_unreachable("Unexpected character!");
3256 case 'b': PatchedName
= SuffixSize
== 2 ? "vpcomub" : "vpcomb"; break;
3257 case 'w': PatchedName
= SuffixSize
== 2 ? "vpcomuw" : "vpcomw"; break;
3258 case 'd': PatchedName
= SuffixSize
== 2 ? "vpcomud" : "vpcomd"; break;
3259 case 'q': PatchedName
= SuffixSize
== 2 ? "vpcomuq" : "vpcomq"; break;
3261 // Set up the immediate to push into the operands later.
3262 ComparisonPredicate
= CC
;
3267 // Determine whether this is an instruction prefix.
3269 // Enhance prefixes integrity robustness. for example, following forms
3270 // are currently tolerated:
3271 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3272 // lock addq %rax, %rbx ; Destination operand must be of memory type
3273 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3275 StringSwitch
<bool>(Name
)
3276 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3277 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3278 .Cases("xacquire", "xrelease", true)
3279 .Cases("acquire", "release", isParsingIntelSyntax())
3282 auto isLockRepeatNtPrefix
= [](StringRef N
) {
3283 return StringSwitch
<bool>(N
)
3284 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3288 bool CurlyAsEndOfStatement
= false;
3290 unsigned Flags
= X86::IP_NO_PREFIX
;
3291 while (isLockRepeatNtPrefix(Name
.lower())) {
3293 StringSwitch
<unsigned>(Name
)
3294 .Cases("lock", "lock", X86::IP_HAS_LOCK
)
3295 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT
)
3296 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE
)
3297 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK
)
3298 .Default(X86::IP_NO_PREFIX
); // Invalid prefix (impossible)
3300 if (getLexer().is(AsmToken::EndOfStatement
)) {
3301 // We don't have real instr with the given prefix
3302 // let's use the prefix as the instr.
3303 // TODO: there could be several prefixes one after another
3304 Flags
= X86::IP_NO_PREFIX
;
3307 // FIXME: The mnemonic won't match correctly if its not in lower case.
3308 Name
= Parser
.getTok().getString();
3309 Parser
.Lex(); // eat the prefix
3310 // Hack: we could have something like "rep # some comment" or
3311 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3312 while (Name
.startswith(";") || Name
.startswith("\n") ||
3313 Name
.startswith("#") || Name
.startswith("\t") ||
3314 Name
.startswith("/")) {
3315 // FIXME: The mnemonic won't match correctly if its not in lower case.
3316 Name
= Parser
.getTok().getString();
3317 Parser
.Lex(); // go to next prefix or instr
3324 // Hacks to handle 'data16' and 'data32'
3325 if (PatchedName
== "data16" && is16BitMode()) {
3326 return Error(NameLoc
, "redundant data16 prefix");
3328 if (PatchedName
== "data32") {
3330 return Error(NameLoc
, "redundant data32 prefix");
3332 return Error(NameLoc
, "'data32' is not supported in 64-bit mode");
3333 // Hack to 'data16' for the table lookup.
3334 PatchedName
= "data16";
3336 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
3337 StringRef Next
= Parser
.getTok().getString();
3339 // data32 effectively changes the instruction suffix.
3341 if (Next
== "callw")
3343 if (Next
== "ljmpw")
3348 ForcedDataPrefix
= X86::Mode32Bit
;
3353 Operands
.push_back(X86Operand::CreateToken(PatchedName
, NameLoc
));
3355 // Push the immediate if we extracted one from the mnemonic.
3356 if (ComparisonPredicate
!= ~0U && !isParsingIntelSyntax()) {
3357 const MCExpr
*ImmOp
= MCConstantExpr::create(ComparisonPredicate
,
3358 getParser().getContext());
3359 Operands
.push_back(X86Operand::CreateImm(ImmOp
, NameLoc
, NameLoc
));
3362 // This does the actual operand parsing. Don't parse any more if we have a
3363 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3364 // just want to parse the "lock" as the first instruction and the "incl" as
3366 if (getLexer().isNot(AsmToken::EndOfStatement
) && !IsPrefix
) {
3367 // Parse '*' modifier.
3368 if (getLexer().is(AsmToken::Star
))
3369 Operands
.push_back(X86Operand::CreateToken("*", consumeToken()));
3371 // Read the operands.
3373 if (ParseOperand(Operands
))
3375 if (HandleAVX512Operand(Operands
))
3378 // check for comma and eat it
3379 if (getLexer().is(AsmToken::Comma
))
3385 // In MS inline asm curly braces mark the beginning/end of a block,
3386 // therefore they should be interepreted as end of statement
3387 CurlyAsEndOfStatement
=
3388 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3389 (getLexer().is(AsmToken::LCurly
) || getLexer().is(AsmToken::RCurly
));
3390 if (getLexer().isNot(AsmToken::EndOfStatement
) && !CurlyAsEndOfStatement
)
3391 return TokError("unexpected token in argument list");
3394 // Push the immediate if we extracted one from the mnemonic.
3395 if (ComparisonPredicate
!= ~0U && isParsingIntelSyntax()) {
3396 const MCExpr
*ImmOp
= MCConstantExpr::create(ComparisonPredicate
,
3397 getParser().getContext());
3398 Operands
.push_back(X86Operand::CreateImm(ImmOp
, NameLoc
, NameLoc
));
3401 // Consume the EndOfStatement or the prefix separator Slash
3402 if (getLexer().is(AsmToken::EndOfStatement
) ||
3403 (IsPrefix
&& getLexer().is(AsmToken::Slash
)))
3405 else if (CurlyAsEndOfStatement
)
3406 // Add an actual EndOfStatement before the curly brace
3407 Info
.AsmRewrites
->emplace_back(AOK_EndOfStatement
,
3408 getLexer().getTok().getLoc(), 0);
3410 // This is for gas compatibility and cannot be done in td.
3411 // Adding "p" for some floating point with no argument.
3412 // For example: fsub --> fsubp
3414 Name
== "fsub" || Name
== "fdiv" || Name
== "fsubr" || Name
== "fdivr";
3415 if (IsFp
&& Operands
.size() == 1) {
3416 const char *Repl
= StringSwitch
<const char *>(Name
)
3417 .Case("fsub", "fsubp")
3418 .Case("fdiv", "fdivp")
3419 .Case("fsubr", "fsubrp")
3420 .Case("fdivr", "fdivrp");
3421 static_cast<X86Operand
&>(*Operands
[0]).setTokenValue(Repl
);
3424 if ((Name
== "mov" || Name
== "movw" || Name
== "movl") &&
3425 (Operands
.size() == 3)) {
3426 X86Operand
&Op1
= (X86Operand
&)*Operands
[1];
3427 X86Operand
&Op2
= (X86Operand
&)*Operands
[2];
3428 SMLoc Loc
= Op1
.getEndLoc();
3429 // Moving a 32 or 16 bit value into a segment register has the same
3430 // behavior. Modify such instructions to always take shorter form.
3431 if (Op1
.isReg() && Op2
.isReg() &&
3432 X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(
3434 (X86MCRegisterClasses
[X86::GR16RegClassID
].contains(Op1
.getReg()) ||
3435 X86MCRegisterClasses
[X86::GR32RegClassID
].contains(Op1
.getReg()))) {
3436 // Change instruction name to match new instruction.
3437 if (Name
!= "mov" && Name
[3] == (is16BitMode() ? 'l' : 'w')) {
3438 Name
= is16BitMode() ? "movw" : "movl";
3439 Operands
[0] = X86Operand::CreateToken(Name
, NameLoc
);
3441 // Select the correct equivalent 16-/32-bit source register.
3443 getX86SubSuperRegisterOrZero(Op1
.getReg(), is16BitMode() ? 16 : 32);
3444 Operands
[1] = X86Operand::CreateReg(Reg
, Loc
, Loc
);
3448 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3449 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3450 // documented form in various unofficial manuals, so a lot of code uses it.
3451 if ((Name
== "outb" || Name
== "outsb" || Name
== "outw" || Name
== "outsw" ||
3452 Name
== "outl" || Name
== "outsl" || Name
== "out" || Name
== "outs") &&
3453 Operands
.size() == 3) {
3454 X86Operand
&Op
= (X86Operand
&)*Operands
.back();
3456 Operands
.back() = X86Operand::CreateReg(X86::DX
, Op
.getStartLoc(),
3459 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3460 if ((Name
== "inb" || Name
== "insb" || Name
== "inw" || Name
== "insw" ||
3461 Name
== "inl" || Name
== "insl" || Name
== "in" || Name
== "ins") &&
3462 Operands
.size() == 3) {
3463 X86Operand
&Op
= (X86Operand
&)*Operands
[1];
3465 Operands
[1] = X86Operand::CreateReg(X86::DX
, Op
.getStartLoc(),
3469 SmallVector
<std::unique_ptr
<MCParsedAsmOperand
>, 2> TmpOperands
;
3470 bool HadVerifyError
= false;
3472 // Append default arguments to "ins[bwld]"
3473 if (Name
.startswith("ins") &&
3474 (Operands
.size() == 1 || Operands
.size() == 3) &&
3475 (Name
== "insb" || Name
== "insw" || Name
== "insl" || Name
== "insd" ||
3478 AddDefaultSrcDestOperands(TmpOperands
,
3479 X86Operand::CreateReg(X86::DX
, NameLoc
, NameLoc
),
3480 DefaultMemDIOperand(NameLoc
));
3481 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3484 // Append default arguments to "outs[bwld]"
3485 if (Name
.startswith("outs") &&
3486 (Operands
.size() == 1 || Operands
.size() == 3) &&
3487 (Name
== "outsb" || Name
== "outsw" || Name
== "outsl" ||
3488 Name
== "outsd" || Name
== "outs")) {
3489 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemSIOperand(NameLoc
),
3490 X86Operand::CreateReg(X86::DX
, NameLoc
, NameLoc
));
3491 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3494 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3495 // values of $SIREG according to the mode. It would be nice if this
3496 // could be achieved with InstAlias in the tables.
3497 if (Name
.startswith("lods") &&
3498 (Operands
.size() == 1 || Operands
.size() == 2) &&
3499 (Name
== "lods" || Name
== "lodsb" || Name
== "lodsw" ||
3500 Name
== "lodsl" || Name
== "lodsd" || Name
== "lodsq")) {
3501 TmpOperands
.push_back(DefaultMemSIOperand(NameLoc
));
3502 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3505 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3506 // values of $DIREG according to the mode. It would be nice if this
3507 // could be achieved with InstAlias in the tables.
3508 if (Name
.startswith("stos") &&
3509 (Operands
.size() == 1 || Operands
.size() == 2) &&
3510 (Name
== "stos" || Name
== "stosb" || Name
== "stosw" ||
3511 Name
== "stosl" || Name
== "stosd" || Name
== "stosq")) {
3512 TmpOperands
.push_back(DefaultMemDIOperand(NameLoc
));
3513 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3516 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3517 // values of $DIREG according to the mode. It would be nice if this
3518 // could be achieved with InstAlias in the tables.
3519 if (Name
.startswith("scas") &&
3520 (Operands
.size() == 1 || Operands
.size() == 2) &&
3521 (Name
== "scas" || Name
== "scasb" || Name
== "scasw" ||
3522 Name
== "scasl" || Name
== "scasd" || Name
== "scasq")) {
3523 TmpOperands
.push_back(DefaultMemDIOperand(NameLoc
));
3524 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3527 // Add default SI and DI operands to "cmps[bwlq]".
3528 if (Name
.startswith("cmps") &&
3529 (Operands
.size() == 1 || Operands
.size() == 3) &&
3530 (Name
== "cmps" || Name
== "cmpsb" || Name
== "cmpsw" ||
3531 Name
== "cmpsl" || Name
== "cmpsd" || Name
== "cmpsq")) {
3532 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemDIOperand(NameLoc
),
3533 DefaultMemSIOperand(NameLoc
));
3534 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3537 // Add default SI and DI operands to "movs[bwlq]".
3538 if (((Name
.startswith("movs") &&
3539 (Name
== "movs" || Name
== "movsb" || Name
== "movsw" ||
3540 Name
== "movsl" || Name
== "movsd" || Name
== "movsq")) ||
3541 (Name
.startswith("smov") &&
3542 (Name
== "smov" || Name
== "smovb" || Name
== "smovw" ||
3543 Name
== "smovl" || Name
== "smovd" || Name
== "smovq"))) &&
3544 (Operands
.size() == 1 || Operands
.size() == 3)) {
3545 if (Name
== "movsd" && Operands
.size() == 1 && !isParsingIntelSyntax())
3546 Operands
.back() = X86Operand::CreateToken("movsl", NameLoc
);
3547 AddDefaultSrcDestOperands(TmpOperands
, DefaultMemSIOperand(NameLoc
),
3548 DefaultMemDIOperand(NameLoc
));
3549 HadVerifyError
= VerifyAndAdjustOperands(Operands
, TmpOperands
);
3552 // Check if we encountered an error for one the string insturctions
3553 if (HadVerifyError
) {
3554 return HadVerifyError
;
3557 // Transforms "xlat mem8" into "xlatb"
3558 if ((Name
== "xlat" || Name
== "xlatb") && Operands
.size() == 2) {
3559 X86Operand
&Op1
= static_cast<X86Operand
&>(*Operands
[1]);
3561 Warning(Op1
.getStartLoc(), "memory operand is only for determining the "
3562 "size, (R|E)BX will be used for the location");
3563 Operands
.pop_back();
3564 static_cast<X86Operand
&>(*Operands
[0]).setTokenValue("xlatb");
3569 Operands
.push_back(X86Operand::CreatePrefix(Flags
, NameLoc
, NameLoc
));
3573 bool X86AsmParser::processInstruction(MCInst
&Inst
, const OperandVector
&Ops
) {
3574 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
3576 switch (Inst
.getOpcode()) {
3577 default: return false;
3579 // {disp32} forces a larger displacement as if the instruction was relaxed.
3580 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3581 // This matches GNU assembler.
3582 if (ForcedDispEncoding
== DispEncoding_Disp32
) {
3583 Inst
.setOpcode(is16BitMode() ? X86::JMP_2
: X86::JMP_4
);
3589 // {disp32} forces a larger displacement as if the instruction was relaxed.
3590 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3591 // This matches GNU assembler.
3592 if (ForcedDispEncoding
== DispEncoding_Disp32
) {
3593 Inst
.setOpcode(is16BitMode() ? X86::JCC_2
: X86::JCC_4
);
3598 case X86::VMOVZPQILo2PQIrr
:
3599 case X86::VMOVAPDrr
:
3600 case X86::VMOVAPDYrr
:
3601 case X86::VMOVAPSrr
:
3602 case X86::VMOVAPSYrr
:
3603 case X86::VMOVDQArr
:
3604 case X86::VMOVDQAYrr
:
3605 case X86::VMOVDQUrr
:
3606 case X86::VMOVDQUYrr
:
3607 case X86::VMOVUPDrr
:
3608 case X86::VMOVUPDYrr
:
3609 case X86::VMOVUPSrr
:
3610 case X86::VMOVUPSYrr
: {
3611 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3612 // the registers is extended, but other isn't.
3613 if (ForcedVEXEncoding
== VEXEncoding_VEX3
||
3614 MRI
->getEncodingValue(Inst
.getOperand(0).getReg()) >= 8 ||
3615 MRI
->getEncodingValue(Inst
.getOperand(1).getReg()) < 8)
3619 switch (Inst
.getOpcode()) {
3620 default: llvm_unreachable("Invalid opcode");
3621 case X86::VMOVZPQILo2PQIrr
: NewOpc
= X86::VMOVPQI2QIrr
; break;
3622 case X86::VMOVAPDrr
: NewOpc
= X86::VMOVAPDrr_REV
; break;
3623 case X86::VMOVAPDYrr
: NewOpc
= X86::VMOVAPDYrr_REV
; break;
3624 case X86::VMOVAPSrr
: NewOpc
= X86::VMOVAPSrr_REV
; break;
3625 case X86::VMOVAPSYrr
: NewOpc
= X86::VMOVAPSYrr_REV
; break;
3626 case X86::VMOVDQArr
: NewOpc
= X86::VMOVDQArr_REV
; break;
3627 case X86::VMOVDQAYrr
: NewOpc
= X86::VMOVDQAYrr_REV
; break;
3628 case X86::VMOVDQUrr
: NewOpc
= X86::VMOVDQUrr_REV
; break;
3629 case X86::VMOVDQUYrr
: NewOpc
= X86::VMOVDQUYrr_REV
; break;
3630 case X86::VMOVUPDrr
: NewOpc
= X86::VMOVUPDrr_REV
; break;
3631 case X86::VMOVUPDYrr
: NewOpc
= X86::VMOVUPDYrr_REV
; break;
3632 case X86::VMOVUPSrr
: NewOpc
= X86::VMOVUPSrr_REV
; break;
3633 case X86::VMOVUPSYrr
: NewOpc
= X86::VMOVUPSYrr_REV
; break;
3635 Inst
.setOpcode(NewOpc
);
3639 case X86::VMOVSSrr
: {
3640 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3641 // the registers is extended, but other isn't.
3642 if (ForcedVEXEncoding
== VEXEncoding_VEX3
||
3643 MRI
->getEncodingValue(Inst
.getOperand(0).getReg()) >= 8 ||
3644 MRI
->getEncodingValue(Inst
.getOperand(2).getReg()) < 8)
3648 switch (Inst
.getOpcode()) {
3649 default: llvm_unreachable("Invalid opcode");
3650 case X86::VMOVSDrr
: NewOpc
= X86::VMOVSDrr_REV
; break;
3651 case X86::VMOVSSrr
: NewOpc
= X86::VMOVSSrr_REV
; break;
3653 Inst
.setOpcode(NewOpc
);
3656 case X86::RCR8ri
: case X86::RCR16ri
: case X86::RCR32ri
: case X86::RCR64ri
:
3657 case X86::RCL8ri
: case X86::RCL16ri
: case X86::RCL32ri
: case X86::RCL64ri
:
3658 case X86::ROR8ri
: case X86::ROR16ri
: case X86::ROR32ri
: case X86::ROR64ri
:
3659 case X86::ROL8ri
: case X86::ROL16ri
: case X86::ROL32ri
: case X86::ROL64ri
:
3660 case X86::SAR8ri
: case X86::SAR16ri
: case X86::SAR32ri
: case X86::SAR64ri
:
3661 case X86::SHR8ri
: case X86::SHR16ri
: case X86::SHR32ri
: case X86::SHR64ri
:
3662 case X86::SHL8ri
: case X86::SHL16ri
: case X86::SHL32ri
: case X86::SHL64ri
: {
3663 // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3664 // FIXME: It would be great if we could just do this with an InstAlias.
3665 if (!Inst
.getOperand(2).isImm() || Inst
.getOperand(2).getImm() != 1)
3669 switch (Inst
.getOpcode()) {
3670 default: llvm_unreachable("Invalid opcode");
3671 case X86::RCR8ri
: NewOpc
= X86::RCR8r1
; break;
3672 case X86::RCR16ri
: NewOpc
= X86::RCR16r1
; break;
3673 case X86::RCR32ri
: NewOpc
= X86::RCR32r1
; break;
3674 case X86::RCR64ri
: NewOpc
= X86::RCR64r1
; break;
3675 case X86::RCL8ri
: NewOpc
= X86::RCL8r1
; break;
3676 case X86::RCL16ri
: NewOpc
= X86::RCL16r1
; break;
3677 case X86::RCL32ri
: NewOpc
= X86::RCL32r1
; break;
3678 case X86::RCL64ri
: NewOpc
= X86::RCL64r1
; break;
3679 case X86::ROR8ri
: NewOpc
= X86::ROR8r1
; break;
3680 case X86::ROR16ri
: NewOpc
= X86::ROR16r1
; break;
3681 case X86::ROR32ri
: NewOpc
= X86::ROR32r1
; break;
3682 case X86::ROR64ri
: NewOpc
= X86::ROR64r1
; break;
3683 case X86::ROL8ri
: NewOpc
= X86::ROL8r1
; break;
3684 case X86::ROL16ri
: NewOpc
= X86::ROL16r1
; break;
3685 case X86::ROL32ri
: NewOpc
= X86::ROL32r1
; break;
3686 case X86::ROL64ri
: NewOpc
= X86::ROL64r1
; break;
3687 case X86::SAR8ri
: NewOpc
= X86::SAR8r1
; break;
3688 case X86::SAR16ri
: NewOpc
= X86::SAR16r1
; break;
3689 case X86::SAR32ri
: NewOpc
= X86::SAR32r1
; break;
3690 case X86::SAR64ri
: NewOpc
= X86::SAR64r1
; break;
3691 case X86::SHR8ri
: NewOpc
= X86::SHR8r1
; break;
3692 case X86::SHR16ri
: NewOpc
= X86::SHR16r1
; break;
3693 case X86::SHR32ri
: NewOpc
= X86::SHR32r1
; break;
3694 case X86::SHR64ri
: NewOpc
= X86::SHR64r1
; break;
3695 case X86::SHL8ri
: NewOpc
= X86::SHL8r1
; break;
3696 case X86::SHL16ri
: NewOpc
= X86::SHL16r1
; break;
3697 case X86::SHL32ri
: NewOpc
= X86::SHL32r1
; break;
3698 case X86::SHL64ri
: NewOpc
= X86::SHL64r1
; break;
3702 TmpInst
.setOpcode(NewOpc
);
3703 TmpInst
.addOperand(Inst
.getOperand(0));
3704 TmpInst
.addOperand(Inst
.getOperand(1));
3708 case X86::RCR8mi
: case X86::RCR16mi
: case X86::RCR32mi
: case X86::RCR64mi
:
3709 case X86::RCL8mi
: case X86::RCL16mi
: case X86::RCL32mi
: case X86::RCL64mi
:
3710 case X86::ROR8mi
: case X86::ROR16mi
: case X86::ROR32mi
: case X86::ROR64mi
:
3711 case X86::ROL8mi
: case X86::ROL16mi
: case X86::ROL32mi
: case X86::ROL64mi
:
3712 case X86::SAR8mi
: case X86::SAR16mi
: case X86::SAR32mi
: case X86::SAR64mi
:
3713 case X86::SHR8mi
: case X86::SHR16mi
: case X86::SHR32mi
: case X86::SHR64mi
:
3714 case X86::SHL8mi
: case X86::SHL16mi
: case X86::SHL32mi
: case X86::SHL64mi
: {
3715 // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3716 // FIXME: It would be great if we could just do this with an InstAlias.
3717 if (!Inst
.getOperand(X86::AddrNumOperands
).isImm() ||
3718 Inst
.getOperand(X86::AddrNumOperands
).getImm() != 1)
3722 switch (Inst
.getOpcode()) {
3723 default: llvm_unreachable("Invalid opcode");
3724 case X86::RCR8mi
: NewOpc
= X86::RCR8m1
; break;
3725 case X86::RCR16mi
: NewOpc
= X86::RCR16m1
; break;
3726 case X86::RCR32mi
: NewOpc
= X86::RCR32m1
; break;
3727 case X86::RCR64mi
: NewOpc
= X86::RCR64m1
; break;
3728 case X86::RCL8mi
: NewOpc
= X86::RCL8m1
; break;
3729 case X86::RCL16mi
: NewOpc
= X86::RCL16m1
; break;
3730 case X86::RCL32mi
: NewOpc
= X86::RCL32m1
; break;
3731 case X86::RCL64mi
: NewOpc
= X86::RCL64m1
; break;
3732 case X86::ROR8mi
: NewOpc
= X86::ROR8m1
; break;
3733 case X86::ROR16mi
: NewOpc
= X86::ROR16m1
; break;
3734 case X86::ROR32mi
: NewOpc
= X86::ROR32m1
; break;
3735 case X86::ROR64mi
: NewOpc
= X86::ROR64m1
; break;
3736 case X86::ROL8mi
: NewOpc
= X86::ROL8m1
; break;
3737 case X86::ROL16mi
: NewOpc
= X86::ROL16m1
; break;
3738 case X86::ROL32mi
: NewOpc
= X86::ROL32m1
; break;
3739 case X86::ROL64mi
: NewOpc
= X86::ROL64m1
; break;
3740 case X86::SAR8mi
: NewOpc
= X86::SAR8m1
; break;
3741 case X86::SAR16mi
: NewOpc
= X86::SAR16m1
; break;
3742 case X86::SAR32mi
: NewOpc
= X86::SAR32m1
; break;
3743 case X86::SAR64mi
: NewOpc
= X86::SAR64m1
; break;
3744 case X86::SHR8mi
: NewOpc
= X86::SHR8m1
; break;
3745 case X86::SHR16mi
: NewOpc
= X86::SHR16m1
; break;
3746 case X86::SHR32mi
: NewOpc
= X86::SHR32m1
; break;
3747 case X86::SHR64mi
: NewOpc
= X86::SHR64m1
; break;
3748 case X86::SHL8mi
: NewOpc
= X86::SHL8m1
; break;
3749 case X86::SHL16mi
: NewOpc
= X86::SHL16m1
; break;
3750 case X86::SHL32mi
: NewOpc
= X86::SHL32m1
; break;
3751 case X86::SHL64mi
: NewOpc
= X86::SHL64m1
; break;
3755 TmpInst
.setOpcode(NewOpc
);
3756 for (int i
= 0; i
!= X86::AddrNumOperands
; ++i
)
3757 TmpInst
.addOperand(Inst
.getOperand(i
));
3762 // Transforms "int $3" into "int3" as a size optimization. We can't write an
3763 // instalias with an immediate operand yet.
3764 if (!Inst
.getOperand(0).isImm() || Inst
.getOperand(0).getImm() != 3)
3768 TmpInst
.setOpcode(X86::INT3
);
3775 bool X86AsmParser::validateInstruction(MCInst
&Inst
, const OperandVector
&Ops
) {
3776 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
3778 switch (Inst
.getOpcode()) {
3779 case X86::VGATHERDPDYrm
:
3780 case X86::VGATHERDPDrm
:
3781 case X86::VGATHERDPSYrm
:
3782 case X86::VGATHERDPSrm
:
3783 case X86::VGATHERQPDYrm
:
3784 case X86::VGATHERQPDrm
:
3785 case X86::VGATHERQPSYrm
:
3786 case X86::VGATHERQPSrm
:
3787 case X86::VPGATHERDDYrm
:
3788 case X86::VPGATHERDDrm
:
3789 case X86::VPGATHERDQYrm
:
3790 case X86::VPGATHERDQrm
:
3791 case X86::VPGATHERQDYrm
:
3792 case X86::VPGATHERQDrm
:
3793 case X86::VPGATHERQQYrm
:
3794 case X86::VPGATHERQQrm
: {
3795 unsigned Dest
= MRI
->getEncodingValue(Inst
.getOperand(0).getReg());
3796 unsigned Mask
= MRI
->getEncodingValue(Inst
.getOperand(1).getReg());
3798 MRI
->getEncodingValue(Inst
.getOperand(3 + X86::AddrIndexReg
).getReg());
3799 if (Dest
== Mask
|| Dest
== Index
|| Mask
== Index
)
3800 return Warning(Ops
[0]->getStartLoc(), "mask, index, and destination "
3801 "registers should be distinct");
3804 case X86::VGATHERDPDZ128rm
:
3805 case X86::VGATHERDPDZ256rm
:
3806 case X86::VGATHERDPDZrm
:
3807 case X86::VGATHERDPSZ128rm
:
3808 case X86::VGATHERDPSZ256rm
:
3809 case X86::VGATHERDPSZrm
:
3810 case X86::VGATHERQPDZ128rm
:
3811 case X86::VGATHERQPDZ256rm
:
3812 case X86::VGATHERQPDZrm
:
3813 case X86::VGATHERQPSZ128rm
:
3814 case X86::VGATHERQPSZ256rm
:
3815 case X86::VGATHERQPSZrm
:
3816 case X86::VPGATHERDDZ128rm
:
3817 case X86::VPGATHERDDZ256rm
:
3818 case X86::VPGATHERDDZrm
:
3819 case X86::VPGATHERDQZ128rm
:
3820 case X86::VPGATHERDQZ256rm
:
3821 case X86::VPGATHERDQZrm
:
3822 case X86::VPGATHERQDZ128rm
:
3823 case X86::VPGATHERQDZ256rm
:
3824 case X86::VPGATHERQDZrm
:
3825 case X86::VPGATHERQQZ128rm
:
3826 case X86::VPGATHERQQZ256rm
:
3827 case X86::VPGATHERQQZrm
: {
3828 unsigned Dest
= MRI
->getEncodingValue(Inst
.getOperand(0).getReg());
3830 MRI
->getEncodingValue(Inst
.getOperand(4 + X86::AddrIndexReg
).getReg());
3832 return Warning(Ops
[0]->getStartLoc(), "index and destination registers "
3833 "should be distinct");
3836 case X86::V4FMADDPSrm
:
3837 case X86::V4FMADDPSrmk
:
3838 case X86::V4FMADDPSrmkz
:
3839 case X86::V4FMADDSSrm
:
3840 case X86::V4FMADDSSrmk
:
3841 case X86::V4FMADDSSrmkz
:
3842 case X86::V4FNMADDPSrm
:
3843 case X86::V4FNMADDPSrmk
:
3844 case X86::V4FNMADDPSrmkz
:
3845 case X86::V4FNMADDSSrm
:
3846 case X86::V4FNMADDSSrmk
:
3847 case X86::V4FNMADDSSrmkz
:
3848 case X86::VP4DPWSSDSrm
:
3849 case X86::VP4DPWSSDSrmk
:
3850 case X86::VP4DPWSSDSrmkz
:
3851 case X86::VP4DPWSSDrm
:
3852 case X86::VP4DPWSSDrmk
:
3853 case X86::VP4DPWSSDrmkz
: {
3854 unsigned Src2
= Inst
.getOperand(Inst
.getNumOperands() -
3855 X86::AddrNumOperands
- 1).getReg();
3856 unsigned Src2Enc
= MRI
->getEncodingValue(Src2
);
3857 if (Src2Enc
% 4 != 0) {
3858 StringRef RegName
= X86IntelInstPrinter::getRegisterName(Src2
);
3859 unsigned GroupStart
= (Src2Enc
/ 4) * 4;
3860 unsigned GroupEnd
= GroupStart
+ 3;
3861 return Warning(Ops
[0]->getStartLoc(),
3862 "source register '" + RegName
+ "' implicitly denotes '" +
3863 RegName
.take_front(3) + Twine(GroupStart
) + "' to '" +
3864 RegName
.take_front(3) + Twine(GroupEnd
) +
3871 const MCInstrDesc
&MCID
= MII
.get(Inst
.getOpcode());
3872 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3873 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3874 if ((MCID
.TSFlags
& X86II::EncodingMask
) == 0) {
3875 MCPhysReg HReg
= X86::NoRegister
;
3876 bool UsesRex
= MCID
.TSFlags
& X86II::REX_W
;
3877 unsigned NumOps
= Inst
.getNumOperands();
3878 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
3879 const MCOperand
&MO
= Inst
.getOperand(i
);
3882 unsigned Reg
= MO
.getReg();
3883 if (Reg
== X86::AH
|| Reg
== X86::BH
|| Reg
== X86::CH
|| Reg
== X86::DH
)
3885 if (X86II::isX86_64NonExtLowByteReg(Reg
) ||
3886 X86II::isX86_64ExtendedReg(Reg
))
3890 if (UsesRex
&& HReg
!= X86::NoRegister
) {
3891 StringRef RegName
= X86IntelInstPrinter::getRegisterName(HReg
);
3892 return Error(Ops
[0]->getStartLoc(),
3893 "can't encode '" + RegName
+ "' in an instruction requiring "
3901 static const char *getSubtargetFeatureName(uint64_t Val
);
3903 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc
) {
3904 Warning(Loc
, "Instruction may be vulnerable to LVI and "
3905 "requires manual mitigation");
3906 Note(SMLoc(), "See https://software.intel.com/"
3907 "security-software-guidance/insights/"
3908 "deep-dive-load-value-injection#specialinstructions"
3909 " for more information");
3912 /// RET instructions and also instructions that indirect calls/jumps from memory
3913 /// combine a load and a branch within a single instruction. To mitigate these
3914 /// instructions against LVI, they must be decomposed into separate load and
3915 /// branch instructions, with an LFENCE in between. For more details, see:
3916 /// - X86LoadValueInjectionRetHardening.cpp
3917 /// - X86LoadValueInjectionIndirectThunks.cpp
3918 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3920 /// Returns `true` if a mitigation was applied or warning was emitted.
3921 void X86AsmParser::applyLVICFIMitigation(MCInst
&Inst
, MCStreamer
&Out
) {
3922 // Information on control-flow instructions that require manual mitigation can
3924 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3925 switch (Inst
.getOpcode()) {
3932 MCInst ShlInst
, FenceInst
;
3933 bool Parse32
= is32BitMode() || Code16GCC
;
3935 is64BitMode() ? X86::RSP
: (Parse32
? X86::ESP
: X86::SP
);
3936 const MCExpr
*Disp
= MCConstantExpr::create(0, getContext());
3937 auto ShlMemOp
= X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp
,
3938 /*BaseReg=*/Basereg
, /*IndexReg=*/0,
3939 /*Scale=*/1, SMLoc
{}, SMLoc
{}, 0);
3940 ShlInst
.setOpcode(X86::SHL64mi
);
3941 ShlMemOp
->addMemOperands(ShlInst
, 5);
3942 ShlInst
.addOperand(MCOperand::createImm(0));
3943 FenceInst
.setOpcode(X86::LFENCE
);
3944 Out
.emitInstruction(ShlInst
, getSTI());
3945 Out
.emitInstruction(FenceInst
, getSTI());
3954 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
3959 /// To mitigate LVI, every instruction that performs a load can be followed by
3960 /// an LFENCE instruction to squash any potential mis-speculation. There are
3961 /// some instructions that require additional considerations, and may requre
3962 /// manual mitigation. For more details, see:
3963 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3965 /// Returns `true` if a mitigation was applied or warning was emitted.
3966 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst
&Inst
,
3968 auto Opcode
= Inst
.getOpcode();
3969 auto Flags
= Inst
.getFlags();
3970 if ((Flags
& X86::IP_HAS_REPEAT
) || (Flags
& X86::IP_HAS_REPEAT_NE
)) {
3971 // Information on REP string instructions that require manual mitigation can
3973 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3983 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
3986 } else if (Opcode
== X86::REP_PREFIX
|| Opcode
== X86::REPNE_PREFIX
) {
3987 // If a REP instruction is found on its own line, it may or may not be
3988 // followed by a vulnerable instruction. Emit a warning just in case.
3989 emitWarningForSpecialLVIInstruction(Inst
.getLoc());
3993 const MCInstrDesc
&MCID
= MII
.get(Inst
.getOpcode());
3995 // Can't mitigate after terminators or calls. A control flow change may have
3996 // already occurred.
3997 if (MCID
.isTerminator() || MCID
.isCall())
4000 // LFENCE has the mayLoad property, don't double fence.
4001 if (MCID
.mayLoad() && Inst
.getOpcode() != X86::LFENCE
) {
4003 FenceInst
.setOpcode(X86::LFENCE
);
4004 Out
.emitInstruction(FenceInst
, getSTI());
4008 void X86AsmParser::emitInstruction(MCInst
&Inst
, OperandVector
&Operands
,
4010 if (LVIInlineAsmHardening
&&
4011 getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity
])
4012 applyLVICFIMitigation(Inst
, Out
);
4014 Out
.emitInstruction(Inst
, getSTI());
4016 if (LVIInlineAsmHardening
&&
4017 getSTI().getFeatureBits()[X86::FeatureLVILoadHardening
])
4018 applyLVILoadHardeningMitigation(Inst
, Out
);
4021 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4022 OperandVector
&Operands
,
4023 MCStreamer
&Out
, uint64_t &ErrorInfo
,
4024 bool MatchingInlineAsm
) {
4025 if (isParsingIntelSyntax())
4026 return MatchAndEmitIntelInstruction(IDLoc
, Opcode
, Operands
, Out
, ErrorInfo
,
4028 return MatchAndEmitATTInstruction(IDLoc
, Opcode
, Operands
, Out
, ErrorInfo
,
4032 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc
, X86Operand
&Op
,
4033 OperandVector
&Operands
, MCStreamer
&Out
,
4034 bool MatchingInlineAsm
) {
4035 // FIXME: This should be replaced with a real .td file alias mechanism.
4036 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4038 const char *Repl
= StringSwitch
<const char *>(Op
.getToken())
4039 .Case("finit", "fninit")
4040 .Case("fsave", "fnsave")
4041 .Case("fstcw", "fnstcw")
4042 .Case("fstcww", "fnstcw")
4043 .Case("fstenv", "fnstenv")
4044 .Case("fstsw", "fnstsw")
4045 .Case("fstsww", "fnstsw")
4046 .Case("fclex", "fnclex")
4050 Inst
.setOpcode(X86::WAIT
);
4052 if (!MatchingInlineAsm
)
4053 emitInstruction(Inst
, Operands
, Out
);
4054 Operands
[0] = X86Operand::CreateToken(Repl
, IDLoc
);
4058 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc
,
4059 const FeatureBitset
&MissingFeatures
,
4060 bool MatchingInlineAsm
) {
4061 assert(MissingFeatures
.any() && "Unknown missing feature!");
4062 SmallString
<126> Msg
;
4063 raw_svector_ostream
OS(Msg
);
4064 OS
<< "instruction requires:";
4065 for (unsigned i
= 0, e
= MissingFeatures
.size(); i
!= e
; ++i
) {
4066 if (MissingFeatures
[i
])
4067 OS
<< ' ' << getSubtargetFeatureName(i
);
4069 return Error(IDLoc
, OS
.str(), SMRange(), MatchingInlineAsm
);
4072 static unsigned getPrefixes(OperandVector
&Operands
) {
4073 unsigned Result
= 0;
4074 X86Operand
&Prefix
= static_cast<X86Operand
&>(*Operands
.back());
4075 if (Prefix
.isPrefix()) {
4076 Result
= Prefix
.getPrefix();
4077 Operands
.pop_back();
4082 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
4083 unsigned Opc
= Inst
.getOpcode();
4084 const MCInstrDesc
&MCID
= MII
.get(Opc
);
4086 if (ForcedVEXEncoding
== VEXEncoding_EVEX
&&
4087 (MCID
.TSFlags
& X86II::EncodingMask
) != X86II::EVEX
)
4088 return Match_Unsupported
;
4090 if ((ForcedVEXEncoding
== VEXEncoding_VEX
||
4091 ForcedVEXEncoding
== VEXEncoding_VEX2
||
4092 ForcedVEXEncoding
== VEXEncoding_VEX3
) &&
4093 (MCID
.TSFlags
& X86II::EncodingMask
) != X86II::VEX
)
4094 return Match_Unsupported
;
4096 // These instructions are only available with {vex}, {vex2} or {vex3} prefix
4097 if (MCID
.TSFlags
& X86II::ExplicitVEXPrefix
&&
4098 (ForcedVEXEncoding
!= VEXEncoding_VEX
&&
4099 ForcedVEXEncoding
!= VEXEncoding_VEX2
&&
4100 ForcedVEXEncoding
!= VEXEncoding_VEX3
))
4101 return Match_Unsupported
;
4103 // These instructions match ambiguously with their VEX encoded counterparts
4104 // and appear first in the matching table. Reject them unless we're forcing
4106 // FIXME: We really need a way to break the ambiguity.
4108 case X86::VCVTSD2SIZrm_Int
:
4109 case X86::VCVTSD2SI64Zrm_Int
:
4110 case X86::VCVTSS2SIZrm_Int
:
4111 case X86::VCVTSS2SI64Zrm_Int
:
4112 case X86::VCVTTSD2SIZrm
: case X86::VCVTTSD2SIZrm_Int
:
4113 case X86::VCVTTSD2SI64Zrm
: case X86::VCVTTSD2SI64Zrm_Int
:
4114 case X86::VCVTTSS2SIZrm
: case X86::VCVTTSS2SIZrm_Int
:
4115 case X86::VCVTTSS2SI64Zrm
: case X86::VCVTTSS2SI64Zrm_Int
:
4116 if (ForcedVEXEncoding
!= VEXEncoding_EVEX
)
4117 return Match_Unsupported
;
4121 return Match_Success
;
4124 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4125 OperandVector
&Operands
,
4127 uint64_t &ErrorInfo
,
4128 bool MatchingInlineAsm
) {
4129 assert(!Operands
.empty() && "Unexpect empty operand list!");
4130 assert((*Operands
[0]).isToken() && "Leading operand should always be a mnemonic!");
4131 SMRange EmptyRange
= None
;
4133 // First, handle aliases that expand to multiple instructions.
4134 MatchFPUWaitAlias(IDLoc
, static_cast<X86Operand
&>(*Operands
[0]), Operands
,
4135 Out
, MatchingInlineAsm
);
4136 X86Operand
&Op
= static_cast<X86Operand
&>(*Operands
[0]);
4137 unsigned Prefixes
= getPrefixes(Operands
);
4141 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4142 // encoder and printer.
4143 if (ForcedVEXEncoding
== VEXEncoding_VEX
)
4144 Prefixes
|= X86::IP_USE_VEX
;
4145 else if (ForcedVEXEncoding
== VEXEncoding_VEX2
)
4146 Prefixes
|= X86::IP_USE_VEX2
;
4147 else if (ForcedVEXEncoding
== VEXEncoding_VEX3
)
4148 Prefixes
|= X86::IP_USE_VEX3
;
4149 else if (ForcedVEXEncoding
== VEXEncoding_EVEX
)
4150 Prefixes
|= X86::IP_USE_EVEX
;
4152 // Set encoded flags for {disp8} and {disp32}.
4153 if (ForcedDispEncoding
== DispEncoding_Disp8
)
4154 Prefixes
|= X86::IP_USE_DISP8
;
4155 else if (ForcedDispEncoding
== DispEncoding_Disp32
)
4156 Prefixes
|= X86::IP_USE_DISP32
;
4159 Inst
.setFlags(Prefixes
);
4161 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4162 // when matching the instruction.
4163 if (ForcedDataPrefix
== X86::Mode32Bit
)
4164 SwitchMode(X86::Mode32Bit
);
4165 // First, try a direct match.
4166 FeatureBitset MissingFeatures
;
4167 unsigned OriginalError
= MatchInstruction(Operands
, Inst
, ErrorInfo
,
4168 MissingFeatures
, MatchingInlineAsm
,
4169 isParsingIntelSyntax());
4170 if (ForcedDataPrefix
== X86::Mode32Bit
) {
4171 SwitchMode(X86::Mode16Bit
);
4172 ForcedDataPrefix
= 0;
4174 switch (OriginalError
) {
4175 default: llvm_unreachable("Unexpected match result!");
4177 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4179 // Some instructions need post-processing to, for example, tweak which
4180 // encoding is selected. Loop on it while changes happen so the
4181 // individual transformations can chain off each other.
4182 if (!MatchingInlineAsm
)
4183 while (processInstruction(Inst
, Operands
))
4187 if (!MatchingInlineAsm
)
4188 emitInstruction(Inst
, Operands
, Out
);
4189 Opcode
= Inst
.getOpcode();
4191 case Match_InvalidImmUnsignedi4
: {
4192 SMLoc ErrorLoc
= ((X86Operand
&)*Operands
[ErrorInfo
]).getStartLoc();
4193 if (ErrorLoc
== SMLoc())
4195 return Error(ErrorLoc
, "immediate must be an integer in range [0, 15]",
4196 EmptyRange
, MatchingInlineAsm
);
4198 case Match_MissingFeature
:
4199 return ErrorMissingFeature(IDLoc
, MissingFeatures
, MatchingInlineAsm
);
4200 case Match_InvalidOperand
:
4201 case Match_MnemonicFail
:
4202 case Match_Unsupported
:
4205 if (Op
.getToken().empty()) {
4206 Error(IDLoc
, "instruction must have size higher than 0", EmptyRange
,
4211 // FIXME: Ideally, we would only attempt suffix matches for things which are
4212 // valid prefixes, and we could just infer the right unambiguous
4213 // type. However, that requires substantially more matcher support than the
4216 // Change the operand to point to a temporary token.
4217 StringRef Base
= Op
.getToken();
4218 SmallString
<16> Tmp
;
4221 Op
.setTokenValue(Tmp
);
4223 // If this instruction starts with an 'f', then it is a floating point stack
4224 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4225 // 80-bit floating point, which use the suffixes s,l,t respectively.
4227 // Otherwise, we assume that this may be an integer instruction, which comes
4228 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4229 const char *Suffixes
= Base
[0] != 'f' ? "bwlq" : "slt\0";
4230 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4231 const char *MemSize
= Base
[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4233 // Check for the various suffix matches.
4234 uint64_t ErrorInfoIgnore
;
4235 FeatureBitset ErrorInfoMissingFeatures
; // Init suppresses compiler warnings.
4238 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4239 // So we should make sure the suffix matcher only works for memory variant
4240 // that has the same size with the suffix.
4241 // FIXME: This flag is a workaround for legacy instructions that didn't
4242 // declare non suffix variant assembly.
4243 bool HasVectorReg
= false;
4244 X86Operand
*MemOp
= nullptr;
4245 for (const auto &Op
: Operands
) {
4246 X86Operand
*X86Op
= static_cast<X86Operand
*>(Op
.get());
4247 if (X86Op
->isVectorReg())
4248 HasVectorReg
= true;
4249 else if (X86Op
->isMem()) {
4251 assert(MemOp
->Mem
.Size
== 0 && "Memory size always 0 under ATT syntax");
4252 // Have we found an unqualified memory operand,
4253 // break. IA allows only one memory operand.
4258 for (unsigned I
= 0, E
= array_lengthof(Match
); I
!= E
; ++I
) {
4259 Tmp
.back() = Suffixes
[I
];
4260 if (MemOp
&& HasVectorReg
)
4261 MemOp
->Mem
.Size
= MemSize
[I
];
4262 Match
[I
] = Match_MnemonicFail
;
4263 if (MemOp
|| !HasVectorReg
) {
4265 MatchInstruction(Operands
, Inst
, ErrorInfoIgnore
, MissingFeatures
,
4266 MatchingInlineAsm
, isParsingIntelSyntax());
4267 // If this returned as a missing feature failure, remember that.
4268 if (Match
[I
] == Match_MissingFeature
)
4269 ErrorInfoMissingFeatures
= MissingFeatures
;
4273 // Restore the old token.
4274 Op
.setTokenValue(Base
);
4276 // If exactly one matched, then we treat that as a successful match (and the
4277 // instruction will already have been filled in correctly, since the failing
4278 // matches won't have modified it).
4279 unsigned NumSuccessfulMatches
=
4280 std::count(std::begin(Match
), std::end(Match
), Match_Success
);
4281 if (NumSuccessfulMatches
== 1) {
4282 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4284 // Some instructions need post-processing to, for example, tweak which
4285 // encoding is selected. Loop on it while changes happen so the
4286 // individual transformations can chain off each other.
4287 if (!MatchingInlineAsm
)
4288 while (processInstruction(Inst
, Operands
))
4292 if (!MatchingInlineAsm
)
4293 emitInstruction(Inst
, Operands
, Out
);
4294 Opcode
= Inst
.getOpcode();
4298 // Otherwise, the match failed, try to produce a decent error message.
4300 // If we had multiple suffix matches, then identify this as an ambiguous
4302 if (NumSuccessfulMatches
> 1) {
4304 unsigned NumMatches
= 0;
4305 for (unsigned I
= 0, E
= array_lengthof(Match
); I
!= E
; ++I
)
4306 if (Match
[I
] == Match_Success
)
4307 MatchChars
[NumMatches
++] = Suffixes
[I
];
4309 SmallString
<126> Msg
;
4310 raw_svector_ostream
OS(Msg
);
4311 OS
<< "ambiguous instructions require an explicit suffix (could be ";
4312 for (unsigned i
= 0; i
!= NumMatches
; ++i
) {
4315 if (i
+ 1 == NumMatches
)
4317 OS
<< "'" << Base
<< MatchChars
[i
] << "'";
4320 Error(IDLoc
, OS
.str(), EmptyRange
, MatchingInlineAsm
);
4324 // Okay, we know that none of the variants matched successfully.
4326 // If all of the instructions reported an invalid mnemonic, then the original
4327 // mnemonic was invalid.
4328 if (std::count(std::begin(Match
), std::end(Match
), Match_MnemonicFail
) == 4) {
4329 if (OriginalError
== Match_MnemonicFail
)
4330 return Error(IDLoc
, "invalid instruction mnemonic '" + Base
+ "'",
4331 Op
.getLocRange(), MatchingInlineAsm
);
4333 if (OriginalError
== Match_Unsupported
)
4334 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4337 assert(OriginalError
== Match_InvalidOperand
&& "Unexpected error");
4338 // Recover location info for the operand if we know which was the problem.
4339 if (ErrorInfo
!= ~0ULL) {
4340 if (ErrorInfo
>= Operands
.size())
4341 return Error(IDLoc
, "too few operands for instruction", EmptyRange
,
4344 X86Operand
&Operand
= (X86Operand
&)*Operands
[ErrorInfo
];
4345 if (Operand
.getStartLoc().isValid()) {
4346 SMRange OperandRange
= Operand
.getLocRange();
4347 return Error(Operand
.getStartLoc(), "invalid operand for instruction",
4348 OperandRange
, MatchingInlineAsm
);
4352 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4356 // If one instruction matched as unsupported, report this as unsupported.
4357 if (std::count(std::begin(Match
), std::end(Match
),
4358 Match_Unsupported
) == 1) {
4359 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4363 // If one instruction matched with a missing feature, report this as a
4365 if (std::count(std::begin(Match
), std::end(Match
),
4366 Match_MissingFeature
) == 1) {
4367 ErrorInfo
= Match_MissingFeature
;
4368 return ErrorMissingFeature(IDLoc
, ErrorInfoMissingFeatures
,
4372 // If one instruction matched with an invalid operand, report this as an
4374 if (std::count(std::begin(Match
), std::end(Match
),
4375 Match_InvalidOperand
) == 1) {
4376 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4380 // If all of these were an outright failure, report it in a useless way.
4381 Error(IDLoc
, "unknown use of instruction mnemonic without a size suffix",
4382 EmptyRange
, MatchingInlineAsm
);
4386 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4387 OperandVector
&Operands
,
4389 uint64_t &ErrorInfo
,
4390 bool MatchingInlineAsm
) {
4391 assert(!Operands
.empty() && "Unexpect empty operand list!");
4392 assert((*Operands
[0]).isToken() && "Leading operand should always be a mnemonic!");
4393 StringRef Mnemonic
= (static_cast<X86Operand
&>(*Operands
[0])).getToken();
4394 SMRange EmptyRange
= None
;
4395 StringRef Base
= (static_cast<X86Operand
&>(*Operands
[0])).getToken();
4396 unsigned Prefixes
= getPrefixes(Operands
);
4398 // First, handle aliases that expand to multiple instructions.
4399 MatchFPUWaitAlias(IDLoc
, static_cast<X86Operand
&>(*Operands
[0]), Operands
, Out
, MatchingInlineAsm
);
4400 X86Operand
&Op
= static_cast<X86Operand
&>(*Operands
[0]);
4404 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4405 // encoder and printer.
4406 if (ForcedVEXEncoding
== VEXEncoding_VEX
)
4407 Prefixes
|= X86::IP_USE_VEX
;
4408 else if (ForcedVEXEncoding
== VEXEncoding_VEX2
)
4409 Prefixes
|= X86::IP_USE_VEX2
;
4410 else if (ForcedVEXEncoding
== VEXEncoding_VEX3
)
4411 Prefixes
|= X86::IP_USE_VEX3
;
4412 else if (ForcedVEXEncoding
== VEXEncoding_EVEX
)
4413 Prefixes
|= X86::IP_USE_EVEX
;
4415 // Set encoded flags for {disp8} and {disp32}.
4416 if (ForcedDispEncoding
== DispEncoding_Disp8
)
4417 Prefixes
|= X86::IP_USE_DISP8
;
4418 else if (ForcedDispEncoding
== DispEncoding_Disp32
)
4419 Prefixes
|= X86::IP_USE_DISP32
;
4422 Inst
.setFlags(Prefixes
);
4424 // Find one unsized memory operand, if present.
4425 X86Operand
*UnsizedMemOp
= nullptr;
4426 for (const auto &Op
: Operands
) {
4427 X86Operand
*X86Op
= static_cast<X86Operand
*>(Op
.get());
4428 if (X86Op
->isMemUnsized()) {
4429 UnsizedMemOp
= X86Op
;
4430 // Have we found an unqualified memory operand,
4431 // break. IA allows only one memory operand.
4436 // Allow some instructions to have implicitly pointer-sized operands. This is
4437 // compatible with gas.
4439 static const char *const PtrSizedInstrs
[] = {"call", "jmp", "push"};
4440 for (const char *Instr
: PtrSizedInstrs
) {
4441 if (Mnemonic
== Instr
) {
4442 UnsizedMemOp
->Mem
.Size
= getPointerWidth();
4448 SmallVector
<unsigned, 8> Match
;
4449 FeatureBitset ErrorInfoMissingFeatures
;
4450 FeatureBitset MissingFeatures
;
4452 // If unsized push has immediate operand we should default the default pointer
4453 // size for the size.
4454 if (Mnemonic
== "push" && Operands
.size() == 2) {
4455 auto *X86Op
= static_cast<X86Operand
*>(Operands
[1].get());
4456 if (X86Op
->isImm()) {
4457 // If it's not a constant fall through and let remainder take care of it.
4458 const auto *CE
= dyn_cast
<MCConstantExpr
>(X86Op
->getImm());
4459 unsigned Size
= getPointerWidth();
4461 (isIntN(Size
, CE
->getValue()) || isUIntN(Size
, CE
->getValue()))) {
4462 SmallString
<16> Tmp
;
4464 Tmp
+= (is64BitMode())
4466 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4467 Op
.setTokenValue(Tmp
);
4468 // Do match in ATT mode to allow explicit suffix usage.
4469 Match
.push_back(MatchInstruction(Operands
, Inst
, ErrorInfo
,
4470 MissingFeatures
, MatchingInlineAsm
,
4471 false /*isParsingIntelSyntax()*/));
4472 Op
.setTokenValue(Base
);
4477 // If an unsized memory operand is present, try to match with each memory
4478 // operand size. In Intel assembly, the size is not part of the instruction
4480 if (UnsizedMemOp
&& UnsizedMemOp
->isMemUnsized()) {
4481 static const unsigned MopSizes
[] = {8, 16, 32, 64, 80, 128, 256, 512};
4482 for (unsigned Size
: MopSizes
) {
4483 UnsizedMemOp
->Mem
.Size
= Size
;
4484 uint64_t ErrorInfoIgnore
;
4485 unsigned LastOpcode
= Inst
.getOpcode();
4486 unsigned M
= MatchInstruction(Operands
, Inst
, ErrorInfoIgnore
,
4487 MissingFeatures
, MatchingInlineAsm
,
4488 isParsingIntelSyntax());
4489 if (Match
.empty() || LastOpcode
!= Inst
.getOpcode())
4492 // If this returned as a missing feature failure, remember that.
4493 if (Match
.back() == Match_MissingFeature
)
4494 ErrorInfoMissingFeatures
= MissingFeatures
;
4497 // Restore the size of the unsized memory operand if we modified it.
4498 UnsizedMemOp
->Mem
.Size
= 0;
4501 // If we haven't matched anything yet, this is not a basic integer or FPU
4502 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4503 // matching with the unsized operand.
4504 if (Match
.empty()) {
4505 Match
.push_back(MatchInstruction(
4506 Operands
, Inst
, ErrorInfo
, MissingFeatures
, MatchingInlineAsm
,
4507 isParsingIntelSyntax()));
4508 // If this returned as a missing feature failure, remember that.
4509 if (Match
.back() == Match_MissingFeature
)
4510 ErrorInfoMissingFeatures
= MissingFeatures
;
4513 // Restore the size of the unsized memory operand if we modified it.
4515 UnsizedMemOp
->Mem
.Size
= 0;
4517 // If it's a bad mnemonic, all results will be the same.
4518 if (Match
.back() == Match_MnemonicFail
) {
4519 return Error(IDLoc
, "invalid instruction mnemonic '" + Mnemonic
+ "'",
4520 Op
.getLocRange(), MatchingInlineAsm
);
4523 unsigned NumSuccessfulMatches
=
4524 std::count(std::begin(Match
), std::end(Match
), Match_Success
);
4526 // If matching was ambiguous and we had size information from the frontend,
4527 // try again with that. This handles cases like "movxz eax, m8/m16".
4528 if (UnsizedMemOp
&& NumSuccessfulMatches
> 1 &&
4529 UnsizedMemOp
->getMemFrontendSize()) {
4530 UnsizedMemOp
->Mem
.Size
= UnsizedMemOp
->getMemFrontendSize();
4531 unsigned M
= MatchInstruction(
4532 Operands
, Inst
, ErrorInfo
, MissingFeatures
, MatchingInlineAsm
,
4533 isParsingIntelSyntax());
4534 if (M
== Match_Success
)
4535 NumSuccessfulMatches
= 1;
4537 // Add a rewrite that encodes the size information we used from the
4539 InstInfo
->AsmRewrites
->emplace_back(
4540 AOK_SizeDirective
, UnsizedMemOp
->getStartLoc(),
4541 /*Len=*/0, UnsizedMemOp
->getMemFrontendSize());
4544 // If exactly one matched, then we treat that as a successful match (and the
4545 // instruction will already have been filled in correctly, since the failing
4546 // matches won't have modified it).
4547 if (NumSuccessfulMatches
== 1) {
4548 if (!MatchingInlineAsm
&& validateInstruction(Inst
, Operands
))
4550 // Some instructions need post-processing to, for example, tweak which
4551 // encoding is selected. Loop on it while changes happen so the individual
4552 // transformations can chain off each other.
4553 if (!MatchingInlineAsm
)
4554 while (processInstruction(Inst
, Operands
))
4557 if (!MatchingInlineAsm
)
4558 emitInstruction(Inst
, Operands
, Out
);
4559 Opcode
= Inst
.getOpcode();
4561 } else if (NumSuccessfulMatches
> 1) {
4562 assert(UnsizedMemOp
&&
4563 "multiple matches only possible with unsized memory operands");
4564 return Error(UnsizedMemOp
->getStartLoc(),
4565 "ambiguous operand size for instruction '" + Mnemonic
+ "\'",
4566 UnsizedMemOp
->getLocRange());
4569 // If one instruction matched as unsupported, report this as unsupported.
4570 if (std::count(std::begin(Match
), std::end(Match
),
4571 Match_Unsupported
) == 1) {
4572 return Error(IDLoc
, "unsupported instruction", EmptyRange
,
4576 // If one instruction matched with a missing feature, report this as a
4578 if (std::count(std::begin(Match
), std::end(Match
),
4579 Match_MissingFeature
) == 1) {
4580 ErrorInfo
= Match_MissingFeature
;
4581 return ErrorMissingFeature(IDLoc
, ErrorInfoMissingFeatures
,
4585 // If one instruction matched with an invalid operand, report this as an
4587 if (std::count(std::begin(Match
), std::end(Match
),
4588 Match_InvalidOperand
) == 1) {
4589 return Error(IDLoc
, "invalid operand for instruction", EmptyRange
,
4593 if (std::count(std::begin(Match
), std::end(Match
),
4594 Match_InvalidImmUnsignedi4
) == 1) {
4595 SMLoc ErrorLoc
= ((X86Operand
&)*Operands
[ErrorInfo
]).getStartLoc();
4596 if (ErrorLoc
== SMLoc())
4598 return Error(ErrorLoc
, "immediate must be an integer in range [0, 15]",
4599 EmptyRange
, MatchingInlineAsm
);
4602 // If all of these were an outright failure, report it in a useless way.
4603 return Error(IDLoc
, "unknown instruction mnemonic", EmptyRange
,
4607 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo
) {
4608 return X86MCRegisterClasses
[X86::SEGMENT_REGRegClassID
].contains(RegNo
);
4611 bool X86AsmParser::ParseDirective(AsmToken DirectiveID
) {
4612 MCAsmParser
&Parser
= getParser();
4613 StringRef IDVal
= DirectiveID
.getIdentifier();
4614 if (IDVal
.startswith(".arch"))
4615 return parseDirectiveArch();
4616 if (IDVal
.startswith(".code"))
4617 return ParseDirectiveCode(IDVal
, DirectiveID
.getLoc());
4618 else if (IDVal
.startswith(".att_syntax")) {
4619 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4620 if (Parser
.getTok().getString() == "prefix")
4622 else if (Parser
.getTok().getString() == "noprefix")
4623 return Error(DirectiveID
.getLoc(), "'.att_syntax noprefix' is not "
4624 "supported: registers must have a "
4625 "'%' prefix in .att_syntax");
4627 getParser().setAssemblerDialect(0);
4629 } else if (IDVal
.startswith(".intel_syntax")) {
4630 getParser().setAssemblerDialect(1);
4631 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4632 if (Parser
.getTok().getString() == "noprefix")
4634 else if (Parser
.getTok().getString() == "prefix")
4635 return Error(DirectiveID
.getLoc(), "'.intel_syntax prefix' is not "
4636 "supported: registers must not have "
4637 "a '%' prefix in .intel_syntax");
4640 } else if (IDVal
== ".nops")
4641 return parseDirectiveNops(DirectiveID
.getLoc());
4642 else if (IDVal
== ".even")
4643 return parseDirectiveEven(DirectiveID
.getLoc());
4644 else if (IDVal
== ".cv_fpo_proc")
4645 return parseDirectiveFPOProc(DirectiveID
.getLoc());
4646 else if (IDVal
== ".cv_fpo_setframe")
4647 return parseDirectiveFPOSetFrame(DirectiveID
.getLoc());
4648 else if (IDVal
== ".cv_fpo_pushreg")
4649 return parseDirectiveFPOPushReg(DirectiveID
.getLoc());
4650 else if (IDVal
== ".cv_fpo_stackalloc")
4651 return parseDirectiveFPOStackAlloc(DirectiveID
.getLoc());
4652 else if (IDVal
== ".cv_fpo_stackalign")
4653 return parseDirectiveFPOStackAlign(DirectiveID
.getLoc());
4654 else if (IDVal
== ".cv_fpo_endprologue")
4655 return parseDirectiveFPOEndPrologue(DirectiveID
.getLoc());
4656 else if (IDVal
== ".cv_fpo_endproc")
4657 return parseDirectiveFPOEndProc(DirectiveID
.getLoc());
4658 else if (IDVal
== ".seh_pushreg" ||
4659 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".pushreg")))
4660 return parseDirectiveSEHPushReg(DirectiveID
.getLoc());
4661 else if (IDVal
== ".seh_setframe" ||
4662 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".setframe")))
4663 return parseDirectiveSEHSetFrame(DirectiveID
.getLoc());
4664 else if (IDVal
== ".seh_savereg" ||
4665 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".savereg")))
4666 return parseDirectiveSEHSaveReg(DirectiveID
.getLoc());
4667 else if (IDVal
== ".seh_savexmm" ||
4668 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".savexmm128")))
4669 return parseDirectiveSEHSaveXMM(DirectiveID
.getLoc());
4670 else if (IDVal
== ".seh_pushframe" ||
4671 (Parser
.isParsingMasm() && IDVal
.equals_insensitive(".pushframe")))
4672 return parseDirectiveSEHPushFrame(DirectiveID
.getLoc());
4677 bool X86AsmParser::parseDirectiveArch() {
4678 // Ignore .arch for now.
4679 getParser().parseStringToEndOfStatement();
4683 /// parseDirectiveNops
4684 /// ::= .nops size[, control]
4685 bool X86AsmParser::parseDirectiveNops(SMLoc L
) {
4686 int64_t NumBytes
= 0, Control
= 0;
4687 SMLoc NumBytesLoc
, ControlLoc
;
4688 const MCSubtargetInfo STI
= getSTI();
4689 NumBytesLoc
= getTok().getLoc();
4690 if (getParser().checkForValidSection() ||
4691 getParser().parseAbsoluteExpression(NumBytes
))
4694 if (parseOptionalToken(AsmToken::Comma
)) {
4695 ControlLoc
= getTok().getLoc();
4696 if (getParser().parseAbsoluteExpression(Control
))
4699 if (getParser().parseToken(AsmToken::EndOfStatement
,
4700 "unexpected token in '.nops' directive"))
4703 if (NumBytes
<= 0) {
4704 Error(NumBytesLoc
, "'.nops' directive with non-positive size");
4709 Error(ControlLoc
, "'.nops' directive with negative NOP size");
4714 getParser().getStreamer().emitNops(NumBytes
, Control
, L
);
4719 /// parseDirectiveEven
4721 bool X86AsmParser::parseDirectiveEven(SMLoc L
) {
4722 if (parseToken(AsmToken::EndOfStatement
, "unexpected token in directive"))
4725 const MCSection
*Section
= getStreamer().getCurrentSectionOnly();
4727 getStreamer().InitSections(false);
4728 Section
= getStreamer().getCurrentSectionOnly();
4730 if (Section
->UseCodeAlign())
4731 getStreamer().emitCodeAlignment(2, 0);
4733 getStreamer().emitValueToAlignment(2, 0, 1, 0);
4737 /// ParseDirectiveCode
4738 /// ::= .code16 | .code32 | .code64
4739 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal
, SMLoc L
) {
4740 MCAsmParser
&Parser
= getParser();
4742 if (IDVal
== ".code16") {
4744 if (!is16BitMode()) {
4745 SwitchMode(X86::Mode16Bit
);
4746 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16
);
4748 } else if (IDVal
== ".code16gcc") {
4749 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4752 if (!is16BitMode()) {
4753 SwitchMode(X86::Mode16Bit
);
4754 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16
);
4756 } else if (IDVal
== ".code32") {
4758 if (!is32BitMode()) {
4759 SwitchMode(X86::Mode32Bit
);
4760 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32
);
4762 } else if (IDVal
== ".code64") {
4764 if (!is64BitMode()) {
4765 SwitchMode(X86::Mode64Bit
);
4766 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64
);
4769 Error(L
, "unknown directive " + IDVal
);
4777 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L
) {
4778 MCAsmParser
&Parser
= getParser();
4781 if (Parser
.parseIdentifier(ProcName
))
4782 return Parser
.TokError("expected symbol name");
4783 if (Parser
.parseIntToken(ParamsSize
, "expected parameter byte count"))
4785 if (!isUIntN(32, ParamsSize
))
4786 return Parser
.TokError("parameters size out of range");
4789 MCSymbol
*ProcSym
= getContext().getOrCreateSymbol(ProcName
);
4790 return getTargetStreamer().emitFPOProc(ProcSym
, ParamsSize
, L
);
4793 // .cv_fpo_setframe ebp
4794 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L
) {
4797 if (ParseRegister(Reg
, DummyLoc
, DummyLoc
) || parseEOL())
4799 return getTargetStreamer().emitFPOSetFrame(Reg
, L
);
4802 // .cv_fpo_pushreg ebx
4803 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L
) {
4806 if (ParseRegister(Reg
, DummyLoc
, DummyLoc
) || parseEOL())
4808 return getTargetStreamer().emitFPOPushReg(Reg
, L
);
4811 // .cv_fpo_stackalloc 20
4812 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L
) {
4813 MCAsmParser
&Parser
= getParser();
4815 if (Parser
.parseIntToken(Offset
, "expected offset") || parseEOL())
4817 return getTargetStreamer().emitFPOStackAlloc(Offset
, L
);
4820 // .cv_fpo_stackalign 8
4821 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L
) {
4822 MCAsmParser
&Parser
= getParser();
4824 if (Parser
.parseIntToken(Offset
, "expected offset") || parseEOL())
4826 return getTargetStreamer().emitFPOStackAlign(Offset
, L
);
4829 // .cv_fpo_endprologue
4830 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L
) {
4831 MCAsmParser
&Parser
= getParser();
4832 if (Parser
.parseEOL())
4834 return getTargetStreamer().emitFPOEndPrologue(L
);
4838 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L
) {
4839 MCAsmParser
&Parser
= getParser();
4840 if (Parser
.parseEOL())
4842 return getTargetStreamer().emitFPOEndProc(L
);
4845 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID
,
4847 SMLoc startLoc
= getLexer().getLoc();
4848 const MCRegisterInfo
*MRI
= getContext().getRegisterInfo();
4850 // Try parsing the argument as a register first.
4851 if (getLexer().getTok().isNot(AsmToken::Integer
)) {
4853 if (ParseRegister(RegNo
, startLoc
, endLoc
))
4856 if (!X86MCRegisterClasses
[RegClassID
].contains(RegNo
)) {
4857 return Error(startLoc
,
4858 "register is not supported for use with this directive");
4861 // Otherwise, an integer number matching the encoding of the desired
4862 // register may appear.
4864 if (getParser().parseAbsoluteExpression(EncodedReg
))
4867 // The SEH register number is the same as the encoding register number. Map
4868 // from the encoding back to the LLVM register number.
4870 for (MCPhysReg Reg
: X86MCRegisterClasses
[RegClassID
]) {
4871 if (MRI
->getEncodingValue(Reg
) == EncodedReg
) {
4877 return Error(startLoc
,
4878 "incorrect register number for use with this directive");
4885 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc
) {
4887 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4890 if (getLexer().isNot(AsmToken::EndOfStatement
))
4891 return TokError("unexpected token in directive");
4894 getStreamer().EmitWinCFIPushReg(Reg
, Loc
);
4898 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc
) {
4901 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4903 if (getLexer().isNot(AsmToken::Comma
))
4904 return TokError("you must specify a stack pointer offset");
4907 if (getParser().parseAbsoluteExpression(Off
))
4910 if (getLexer().isNot(AsmToken::EndOfStatement
))
4911 return TokError("unexpected token in directive");
4914 getStreamer().EmitWinCFISetFrame(Reg
, Off
, Loc
);
4918 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc
) {
4921 if (parseSEHRegisterNumber(X86::GR64RegClassID
, Reg
))
4923 if (getLexer().isNot(AsmToken::Comma
))
4924 return TokError("you must specify an offset on the stack");
4927 if (getParser().parseAbsoluteExpression(Off
))
4930 if (getLexer().isNot(AsmToken::EndOfStatement
))
4931 return TokError("unexpected token in directive");
4934 getStreamer().EmitWinCFISaveReg(Reg
, Off
, Loc
);
4938 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc
) {
4941 if (parseSEHRegisterNumber(X86::VR128XRegClassID
, Reg
))
4943 if (getLexer().isNot(AsmToken::Comma
))
4944 return TokError("you must specify an offset on the stack");
4947 if (getParser().parseAbsoluteExpression(Off
))
4950 if (getLexer().isNot(AsmToken::EndOfStatement
))
4951 return TokError("unexpected token in directive");
4954 getStreamer().EmitWinCFISaveXMM(Reg
, Off
, Loc
);
4958 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc
) {
4961 if (getLexer().is(AsmToken::At
)) {
4962 SMLoc startLoc
= getLexer().getLoc();
4964 if (!getParser().parseIdentifier(CodeID
)) {
4965 if (CodeID
!= "code")
4966 return Error(startLoc
, "expected @code");
4971 if (getLexer().isNot(AsmToken::EndOfStatement
))
4972 return TokError("unexpected token in directive");
4975 getStreamer().EmitWinCFIPushFrame(Code
, Loc
);
4979 // Force static initialization.
4980 extern "C" LLVM_EXTERNAL_VISIBILITY
void LLVMInitializeX86AsmParser() {
4981 RegisterMCAsmParser
<X86AsmParser
> X(getTheX86_32Target());
4982 RegisterMCAsmParser
<X86AsmParser
> Y(getTheX86_64Target());
4985 #define GET_REGISTER_MATCHER
4986 #define GET_MATCHER_IMPLEMENTATION
4987 #define GET_SUBTARGET_FEATURE_NAME
4988 #include "X86GenAsmMatcher.inc"