[test] Pre-commit llvm.experimental.memset.pattern tests prior to MemoryLocation...
[llvm-project.git] / llvm / lib / Target / X86 / AsmParser / X86AsmParser.cpp
blob41a646621c7ead60de805f9e1615297eec364bf1
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86IntelInstPrinter.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86MCTargetDesc.h"
14 #include "MCTargetDesc/X86TargetStreamer.h"
15 #include "TargetInfo/X86TargetInfo.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/SourceMgr.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <memory>
43 using namespace llvm;
45 static cl::opt<bool> LVIInlineAsmHardening(
46 "x86-experimental-lvi-inline-asm-hardening",
47 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
48 " Injection (LVI). This feature is experimental."), cl::Hidden);
50 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
51 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
52 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
53 return true;
55 return false;
58 namespace {
60 // Including the generated SSE2AVX compression tables.
61 #define GET_X86_SSE2AVX_TABLE
62 #include "X86GenInstrMapping.inc"
64 static const char OpPrecedence[] = {
65 0, // IC_OR
66 1, // IC_XOR
67 2, // IC_AND
68 4, // IC_LSHIFT
69 4, // IC_RSHIFT
70 5, // IC_PLUS
71 5, // IC_MINUS
72 6, // IC_MULTIPLY
73 6, // IC_DIVIDE
74 6, // IC_MOD
75 7, // IC_NOT
76 8, // IC_NEG
77 9, // IC_RPAREN
78 10, // IC_LPAREN
79 0, // IC_IMM
80 0, // IC_REGISTER
81 3, // IC_EQ
82 3, // IC_NE
83 3, // IC_LT
84 3, // IC_LE
85 3, // IC_GT
86 3 // IC_GE
89 class X86AsmParser : public MCTargetAsmParser {
90 ParseInstructionInfo *InstInfo;
91 bool Code16GCC;
92 unsigned ForcedDataPrefix = 0;
94 enum OpcodePrefix {
95 OpcodePrefix_Default,
96 OpcodePrefix_REX,
97 OpcodePrefix_REX2,
98 OpcodePrefix_VEX,
99 OpcodePrefix_VEX2,
100 OpcodePrefix_VEX3,
101 OpcodePrefix_EVEX,
104 OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default;
106 enum DispEncoding {
107 DispEncoding_Default,
108 DispEncoding_Disp8,
109 DispEncoding_Disp32,
112 DispEncoding ForcedDispEncoding = DispEncoding_Default;
114 // Does this instruction use apx extended register?
115 bool UseApxExtendedReg = false;
116 // Is this instruction explicitly required not to update flags?
117 bool ForcedNoFlag = false;
119 private:
120 SMLoc consumeToken() {
121 MCAsmParser &Parser = getParser();
122 SMLoc Result = Parser.getTok().getLoc();
123 Parser.Lex();
124 return Result;
127 X86TargetStreamer &getTargetStreamer() {
128 assert(getParser().getStreamer().getTargetStreamer() &&
129 "do not have a target streamer");
130 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
131 return static_cast<X86TargetStreamer &>(TS);
134 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
135 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
136 bool matchingInlineAsm, unsigned VariantID = 0) {
137 // In Code16GCC mode, match as 32-bit.
138 if (Code16GCC)
139 SwitchMode(X86::Is32Bit);
140 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
141 MissingFeatures, matchingInlineAsm,
142 VariantID);
143 if (Code16GCC)
144 SwitchMode(X86::Is16Bit);
145 return rv;
148 enum InfixCalculatorTok {
149 IC_OR = 0,
150 IC_XOR,
151 IC_AND,
152 IC_LSHIFT,
153 IC_RSHIFT,
154 IC_PLUS,
155 IC_MINUS,
156 IC_MULTIPLY,
157 IC_DIVIDE,
158 IC_MOD,
159 IC_NOT,
160 IC_NEG,
161 IC_RPAREN,
162 IC_LPAREN,
163 IC_IMM,
164 IC_REGISTER,
165 IC_EQ,
166 IC_NE,
167 IC_LT,
168 IC_LE,
169 IC_GT,
170 IC_GE
173 enum IntelOperatorKind {
174 IOK_INVALID = 0,
175 IOK_LENGTH,
176 IOK_SIZE,
177 IOK_TYPE,
180 enum MasmOperatorKind {
181 MOK_INVALID = 0,
182 MOK_LENGTHOF,
183 MOK_SIZEOF,
184 MOK_TYPE,
187 class InfixCalculator {
188 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
189 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
190 SmallVector<ICToken, 4> PostfixStack;
192 bool isUnaryOperator(InfixCalculatorTok Op) const {
193 return Op == IC_NEG || Op == IC_NOT;
196 public:
197 int64_t popOperand() {
198 assert (!PostfixStack.empty() && "Poped an empty stack!");
199 ICToken Op = PostfixStack.pop_back_val();
200 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
201 return -1; // The invalid Scale value will be caught later by checkScale
202 return Op.second;
204 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
205 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
206 "Unexpected operand!");
207 PostfixStack.push_back(std::make_pair(Op, Val));
210 void popOperator() { InfixOperatorStack.pop_back(); }
211 void pushOperator(InfixCalculatorTok Op) {
212 // Push the new operator if the stack is empty.
213 if (InfixOperatorStack.empty()) {
214 InfixOperatorStack.push_back(Op);
215 return;
218 // Push the new operator if it has a higher precedence than the operator
219 // on the top of the stack or the operator on the top of the stack is a
220 // left parentheses.
221 unsigned Idx = InfixOperatorStack.size() - 1;
222 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
223 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
224 InfixOperatorStack.push_back(Op);
225 return;
228 // The operator on the top of the stack has higher precedence than the
229 // new operator.
230 unsigned ParenCount = 0;
231 while (true) {
232 // Nothing to process.
233 if (InfixOperatorStack.empty())
234 break;
236 Idx = InfixOperatorStack.size() - 1;
237 StackOp = InfixOperatorStack[Idx];
238 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
239 break;
241 // If we have an even parentheses count and we see a left parentheses,
242 // then stop processing.
243 if (!ParenCount && StackOp == IC_LPAREN)
244 break;
246 if (StackOp == IC_RPAREN) {
247 ++ParenCount;
248 InfixOperatorStack.pop_back();
249 } else if (StackOp == IC_LPAREN) {
250 --ParenCount;
251 InfixOperatorStack.pop_back();
252 } else {
253 InfixOperatorStack.pop_back();
254 PostfixStack.push_back(std::make_pair(StackOp, 0));
257 // Push the new operator.
258 InfixOperatorStack.push_back(Op);
261 int64_t execute() {
262 // Push any remaining operators onto the postfix stack.
263 while (!InfixOperatorStack.empty()) {
264 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
265 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
266 PostfixStack.push_back(std::make_pair(StackOp, 0));
269 if (PostfixStack.empty())
270 return 0;
272 SmallVector<ICToken, 16> OperandStack;
273 for (const ICToken &Op : PostfixStack) {
274 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
275 OperandStack.push_back(Op);
276 } else if (isUnaryOperator(Op.first)) {
277 assert (OperandStack.size() > 0 && "Too few operands.");
278 ICToken Operand = OperandStack.pop_back_val();
279 assert (Operand.first == IC_IMM &&
280 "Unary operation with a register!");
281 switch (Op.first) {
282 default:
283 report_fatal_error("Unexpected operator!");
284 break;
285 case IC_NEG:
286 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
287 break;
288 case IC_NOT:
289 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
290 break;
292 } else {
293 assert (OperandStack.size() > 1 && "Too few operands.");
294 int64_t Val;
295 ICToken Op2 = OperandStack.pop_back_val();
296 ICToken Op1 = OperandStack.pop_back_val();
297 switch (Op.first) {
298 default:
299 report_fatal_error("Unexpected operator!");
300 break;
301 case IC_PLUS:
302 Val = Op1.second + Op2.second;
303 OperandStack.push_back(std::make_pair(IC_IMM, Val));
304 break;
305 case IC_MINUS:
306 Val = Op1.second - Op2.second;
307 OperandStack.push_back(std::make_pair(IC_IMM, Val));
308 break;
309 case IC_MULTIPLY:
310 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
311 "Multiply operation with an immediate and a register!");
312 Val = Op1.second * Op2.second;
313 OperandStack.push_back(std::make_pair(IC_IMM, Val));
314 break;
315 case IC_DIVIDE:
316 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
317 "Divide operation with an immediate and a register!");
318 assert (Op2.second != 0 && "Division by zero!");
319 Val = Op1.second / Op2.second;
320 OperandStack.push_back(std::make_pair(IC_IMM, Val));
321 break;
322 case IC_MOD:
323 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
324 "Modulo operation with an immediate and a register!");
325 Val = Op1.second % Op2.second;
326 OperandStack.push_back(std::make_pair(IC_IMM, Val));
327 break;
328 case IC_OR:
329 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
330 "Or operation with an immediate and a register!");
331 Val = Op1.second | Op2.second;
332 OperandStack.push_back(std::make_pair(IC_IMM, Val));
333 break;
334 case IC_XOR:
335 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
336 "Xor operation with an immediate and a register!");
337 Val = Op1.second ^ Op2.second;
338 OperandStack.push_back(std::make_pair(IC_IMM, Val));
339 break;
340 case IC_AND:
341 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
342 "And operation with an immediate and a register!");
343 Val = Op1.second & Op2.second;
344 OperandStack.push_back(std::make_pair(IC_IMM, Val));
345 break;
346 case IC_LSHIFT:
347 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
348 "Left shift operation with an immediate and a register!");
349 Val = Op1.second << Op2.second;
350 OperandStack.push_back(std::make_pair(IC_IMM, Val));
351 break;
352 case IC_RSHIFT:
353 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
354 "Right shift operation with an immediate and a register!");
355 Val = Op1.second >> Op2.second;
356 OperandStack.push_back(std::make_pair(IC_IMM, Val));
357 break;
358 case IC_EQ:
359 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
360 "Equals operation with an immediate and a register!");
361 Val = (Op1.second == Op2.second) ? -1 : 0;
362 OperandStack.push_back(std::make_pair(IC_IMM, Val));
363 break;
364 case IC_NE:
365 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
366 "Not-equals operation with an immediate and a register!");
367 Val = (Op1.second != Op2.second) ? -1 : 0;
368 OperandStack.push_back(std::make_pair(IC_IMM, Val));
369 break;
370 case IC_LT:
371 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
372 "Less-than operation with an immediate and a register!");
373 Val = (Op1.second < Op2.second) ? -1 : 0;
374 OperandStack.push_back(std::make_pair(IC_IMM, Val));
375 break;
376 case IC_LE:
377 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
378 "Less-than-or-equal operation with an immediate and a "
379 "register!");
380 Val = (Op1.second <= Op2.second) ? -1 : 0;
381 OperandStack.push_back(std::make_pair(IC_IMM, Val));
382 break;
383 case IC_GT:
384 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
385 "Greater-than operation with an immediate and a register!");
386 Val = (Op1.second > Op2.second) ? -1 : 0;
387 OperandStack.push_back(std::make_pair(IC_IMM, Val));
388 break;
389 case IC_GE:
390 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
391 "Greater-than-or-equal operation with an immediate and a "
392 "register!");
393 Val = (Op1.second >= Op2.second) ? -1 : 0;
394 OperandStack.push_back(std::make_pair(IC_IMM, Val));
395 break;
399 assert (OperandStack.size() == 1 && "Expected a single result.");
400 return OperandStack.pop_back_val().second;
404 enum IntelExprState {
405 IES_INIT,
406 IES_OR,
407 IES_XOR,
408 IES_AND,
409 IES_EQ,
410 IES_NE,
411 IES_LT,
412 IES_LE,
413 IES_GT,
414 IES_GE,
415 IES_LSHIFT,
416 IES_RSHIFT,
417 IES_PLUS,
418 IES_MINUS,
419 IES_OFFSET,
420 IES_CAST,
421 IES_NOT,
422 IES_MULTIPLY,
423 IES_DIVIDE,
424 IES_MOD,
425 IES_LBRAC,
426 IES_RBRAC,
427 IES_LPAREN,
428 IES_RPAREN,
429 IES_REGISTER,
430 IES_INTEGER,
431 IES_ERROR
434 class IntelExprStateMachine {
435 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
436 MCRegister BaseReg, IndexReg, TmpReg;
437 unsigned Scale = 0;
438 int64_t Imm = 0;
439 const MCExpr *Sym = nullptr;
440 StringRef SymName;
441 InfixCalculator IC;
442 InlineAsmIdentifierInfo Info;
443 short BracCount = 0;
444 bool MemExpr = false;
445 bool BracketUsed = false;
446 bool OffsetOperator = false;
447 bool AttachToOperandIdx = false;
448 bool IsPIC = false;
449 SMLoc OffsetOperatorLoc;
450 AsmTypeInfo CurType;
452 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
453 if (Sym) {
454 ErrMsg = "cannot use more than one symbol in memory operand";
455 return true;
457 Sym = Val;
458 SymName = ID;
459 return false;
462 public:
463 IntelExprStateMachine() = default;
465 void addImm(int64_t imm) { Imm += imm; }
466 short getBracCount() const { return BracCount; }
467 bool isMemExpr() const { return MemExpr; }
468 bool isBracketUsed() const { return BracketUsed; }
469 bool isOffsetOperator() const { return OffsetOperator; }
470 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
471 MCRegister getBaseReg() const { return BaseReg; }
472 MCRegister getIndexReg() const { return IndexReg; }
473 unsigned getScale() const { return Scale; }
474 const MCExpr *getSym() const { return Sym; }
475 StringRef getSymName() const { return SymName; }
476 StringRef getType() const { return CurType.Name; }
477 unsigned getSize() const { return CurType.Size; }
478 unsigned getElementSize() const { return CurType.ElementSize; }
479 unsigned getLength() const { return CurType.Length; }
480 int64_t getImm() { return Imm + IC.execute(); }
481 bool isValidEndState() const {
482 return State == IES_RBRAC || State == IES_RPAREN ||
483 State == IES_INTEGER || State == IES_REGISTER ||
484 State == IES_OFFSET;
487 // Is the intel expression appended after an operand index.
488 // [OperandIdx][Intel Expression]
489 // This is neccessary for checking if it is an independent
490 // intel expression at back end when parse inline asm.
491 void setAppendAfterOperand() { AttachToOperandIdx = true; }
493 bool isPIC() const { return IsPIC; }
494 void setPIC() { IsPIC = true; }
496 bool hadError() const { return State == IES_ERROR; }
497 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
499 bool regsUseUpError(StringRef &ErrMsg) {
500 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
501 // can not intruduce additional register in inline asm in PIC model.
502 if (IsPIC && AttachToOperandIdx)
503 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
504 else
505 ErrMsg = "BaseReg/IndexReg already set!";
506 return true;
509 void onOr() {
510 IntelExprState CurrState = State;
511 switch (State) {
512 default:
513 State = IES_ERROR;
514 break;
515 case IES_INTEGER:
516 case IES_RPAREN:
517 case IES_REGISTER:
518 State = IES_OR;
519 IC.pushOperator(IC_OR);
520 break;
522 PrevState = CurrState;
524 void onXor() {
525 IntelExprState CurrState = State;
526 switch (State) {
527 default:
528 State = IES_ERROR;
529 break;
530 case IES_INTEGER:
531 case IES_RPAREN:
532 case IES_REGISTER:
533 State = IES_XOR;
534 IC.pushOperator(IC_XOR);
535 break;
537 PrevState = CurrState;
539 void onAnd() {
540 IntelExprState CurrState = State;
541 switch (State) {
542 default:
543 State = IES_ERROR;
544 break;
545 case IES_INTEGER:
546 case IES_RPAREN:
547 case IES_REGISTER:
548 State = IES_AND;
549 IC.pushOperator(IC_AND);
550 break;
552 PrevState = CurrState;
554 void onEq() {
555 IntelExprState CurrState = State;
556 switch (State) {
557 default:
558 State = IES_ERROR;
559 break;
560 case IES_INTEGER:
561 case IES_RPAREN:
562 case IES_REGISTER:
563 State = IES_EQ;
564 IC.pushOperator(IC_EQ);
565 break;
567 PrevState = CurrState;
569 void onNE() {
570 IntelExprState CurrState = State;
571 switch (State) {
572 default:
573 State = IES_ERROR;
574 break;
575 case IES_INTEGER:
576 case IES_RPAREN:
577 case IES_REGISTER:
578 State = IES_NE;
579 IC.pushOperator(IC_NE);
580 break;
582 PrevState = CurrState;
584 void onLT() {
585 IntelExprState CurrState = State;
586 switch (State) {
587 default:
588 State = IES_ERROR;
589 break;
590 case IES_INTEGER:
591 case IES_RPAREN:
592 case IES_REGISTER:
593 State = IES_LT;
594 IC.pushOperator(IC_LT);
595 break;
597 PrevState = CurrState;
599 void onLE() {
600 IntelExprState CurrState = State;
601 switch (State) {
602 default:
603 State = IES_ERROR;
604 break;
605 case IES_INTEGER:
606 case IES_RPAREN:
607 case IES_REGISTER:
608 State = IES_LE;
609 IC.pushOperator(IC_LE);
610 break;
612 PrevState = CurrState;
614 void onGT() {
615 IntelExprState CurrState = State;
616 switch (State) {
617 default:
618 State = IES_ERROR;
619 break;
620 case IES_INTEGER:
621 case IES_RPAREN:
622 case IES_REGISTER:
623 State = IES_GT;
624 IC.pushOperator(IC_GT);
625 break;
627 PrevState = CurrState;
629 void onGE() {
630 IntelExprState CurrState = State;
631 switch (State) {
632 default:
633 State = IES_ERROR;
634 break;
635 case IES_INTEGER:
636 case IES_RPAREN:
637 case IES_REGISTER:
638 State = IES_GE;
639 IC.pushOperator(IC_GE);
640 break;
642 PrevState = CurrState;
644 void onLShift() {
645 IntelExprState CurrState = State;
646 switch (State) {
647 default:
648 State = IES_ERROR;
649 break;
650 case IES_INTEGER:
651 case IES_RPAREN:
652 case IES_REGISTER:
653 State = IES_LSHIFT;
654 IC.pushOperator(IC_LSHIFT);
655 break;
657 PrevState = CurrState;
659 void onRShift() {
660 IntelExprState CurrState = State;
661 switch (State) {
662 default:
663 State = IES_ERROR;
664 break;
665 case IES_INTEGER:
666 case IES_RPAREN:
667 case IES_REGISTER:
668 State = IES_RSHIFT;
669 IC.pushOperator(IC_RSHIFT);
670 break;
672 PrevState = CurrState;
674 bool onPlus(StringRef &ErrMsg) {
675 IntelExprState CurrState = State;
676 switch (State) {
677 default:
678 State = IES_ERROR;
679 break;
680 case IES_INTEGER:
681 case IES_RPAREN:
682 case IES_REGISTER:
683 case IES_OFFSET:
684 State = IES_PLUS;
685 IC.pushOperator(IC_PLUS);
686 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
687 // If we already have a BaseReg, then assume this is the IndexReg with
688 // no explicit scale.
689 if (!BaseReg) {
690 BaseReg = TmpReg;
691 } else {
692 if (IndexReg)
693 return regsUseUpError(ErrMsg);
694 IndexReg = TmpReg;
695 Scale = 0;
698 break;
700 PrevState = CurrState;
701 return false;
703 bool onMinus(StringRef &ErrMsg) {
704 IntelExprState CurrState = State;
705 switch (State) {
706 default:
707 State = IES_ERROR;
708 break;
709 case IES_OR:
710 case IES_XOR:
711 case IES_AND:
712 case IES_EQ:
713 case IES_NE:
714 case IES_LT:
715 case IES_LE:
716 case IES_GT:
717 case IES_GE:
718 case IES_LSHIFT:
719 case IES_RSHIFT:
720 case IES_PLUS:
721 case IES_NOT:
722 case IES_MULTIPLY:
723 case IES_DIVIDE:
724 case IES_MOD:
725 case IES_LPAREN:
726 case IES_RPAREN:
727 case IES_LBRAC:
728 case IES_RBRAC:
729 case IES_INTEGER:
730 case IES_REGISTER:
731 case IES_INIT:
732 case IES_OFFSET:
733 State = IES_MINUS;
734 // push minus operator if it is not a negate operator
735 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
736 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
737 CurrState == IES_OFFSET)
738 IC.pushOperator(IC_MINUS);
739 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
740 // We have negate operator for Scale: it's illegal
741 ErrMsg = "Scale can't be negative";
742 return true;
743 } else
744 IC.pushOperator(IC_NEG);
745 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
746 // If we already have a BaseReg, then assume this is the IndexReg with
747 // no explicit scale.
748 if (!BaseReg) {
749 BaseReg = TmpReg;
750 } else {
751 if (IndexReg)
752 return regsUseUpError(ErrMsg);
753 IndexReg = TmpReg;
754 Scale = 0;
757 break;
759 PrevState = CurrState;
760 return false;
762 void onNot() {
763 IntelExprState CurrState = State;
764 switch (State) {
765 default:
766 State = IES_ERROR;
767 break;
768 case IES_OR:
769 case IES_XOR:
770 case IES_AND:
771 case IES_EQ:
772 case IES_NE:
773 case IES_LT:
774 case IES_LE:
775 case IES_GT:
776 case IES_GE:
777 case IES_LSHIFT:
778 case IES_RSHIFT:
779 case IES_PLUS:
780 case IES_MINUS:
781 case IES_NOT:
782 case IES_MULTIPLY:
783 case IES_DIVIDE:
784 case IES_MOD:
785 case IES_LPAREN:
786 case IES_LBRAC:
787 case IES_INIT:
788 State = IES_NOT;
789 IC.pushOperator(IC_NOT);
790 break;
792 PrevState = CurrState;
794 bool onRegister(MCRegister Reg, StringRef &ErrMsg) {
795 IntelExprState CurrState = State;
796 switch (State) {
797 default:
798 State = IES_ERROR;
799 break;
800 case IES_PLUS:
801 case IES_LPAREN:
802 case IES_LBRAC:
803 State = IES_REGISTER;
804 TmpReg = Reg;
805 IC.pushOperand(IC_REGISTER);
806 break;
807 case IES_MULTIPLY:
808 // Index Register - Scale * Register
809 if (PrevState == IES_INTEGER) {
810 if (IndexReg)
811 return regsUseUpError(ErrMsg);
812 State = IES_REGISTER;
813 IndexReg = Reg;
814 // Get the scale and replace the 'Scale * Register' with '0'.
815 Scale = IC.popOperand();
816 if (checkScale(Scale, ErrMsg))
817 return true;
818 IC.pushOperand(IC_IMM);
819 IC.popOperator();
820 } else {
821 State = IES_ERROR;
823 break;
825 PrevState = CurrState;
826 return false;
828 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
829 const InlineAsmIdentifierInfo &IDInfo,
830 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
831 StringRef &ErrMsg) {
832 // InlineAsm: Treat an enum value as an integer
833 if (ParsingMSInlineAsm)
834 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
835 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
836 // Treat a symbolic constant like an integer
837 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
838 return onInteger(CE->getValue(), ErrMsg);
839 PrevState = State;
840 switch (State) {
841 default:
842 State = IES_ERROR;
843 break;
844 case IES_CAST:
845 case IES_PLUS:
846 case IES_MINUS:
847 case IES_NOT:
848 case IES_INIT:
849 case IES_LBRAC:
850 case IES_LPAREN:
851 if (setSymRef(SymRef, SymRefName, ErrMsg))
852 return true;
853 MemExpr = true;
854 State = IES_INTEGER;
855 IC.pushOperand(IC_IMM);
856 if (ParsingMSInlineAsm)
857 Info = IDInfo;
858 setTypeInfo(Type);
859 break;
861 return false;
863 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
864 IntelExprState CurrState = State;
865 switch (State) {
866 default:
867 State = IES_ERROR;
868 break;
869 case IES_PLUS:
870 case IES_MINUS:
871 case IES_NOT:
872 case IES_OR:
873 case IES_XOR:
874 case IES_AND:
875 case IES_EQ:
876 case IES_NE:
877 case IES_LT:
878 case IES_LE:
879 case IES_GT:
880 case IES_GE:
881 case IES_LSHIFT:
882 case IES_RSHIFT:
883 case IES_DIVIDE:
884 case IES_MOD:
885 case IES_MULTIPLY:
886 case IES_LPAREN:
887 case IES_INIT:
888 case IES_LBRAC:
889 State = IES_INTEGER;
890 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
891 // Index Register - Register * Scale
892 if (IndexReg)
893 return regsUseUpError(ErrMsg);
894 IndexReg = TmpReg;
895 Scale = TmpInt;
896 if (checkScale(Scale, ErrMsg))
897 return true;
898 // Get the scale and replace the 'Register * Scale' with '0'.
899 IC.popOperator();
900 } else {
901 IC.pushOperand(IC_IMM, TmpInt);
903 break;
905 PrevState = CurrState;
906 return false;
908 void onStar() {
909 PrevState = State;
910 switch (State) {
911 default:
912 State = IES_ERROR;
913 break;
914 case IES_INTEGER:
915 case IES_REGISTER:
916 case IES_RPAREN:
917 State = IES_MULTIPLY;
918 IC.pushOperator(IC_MULTIPLY);
919 break;
922 void onDivide() {
923 PrevState = State;
924 switch (State) {
925 default:
926 State = IES_ERROR;
927 break;
928 case IES_INTEGER:
929 case IES_RPAREN:
930 State = IES_DIVIDE;
931 IC.pushOperator(IC_DIVIDE);
932 break;
935 void onMod() {
936 PrevState = State;
937 switch (State) {
938 default:
939 State = IES_ERROR;
940 break;
941 case IES_INTEGER:
942 case IES_RPAREN:
943 State = IES_MOD;
944 IC.pushOperator(IC_MOD);
945 break;
948 bool onLBrac() {
949 if (BracCount)
950 return true;
951 PrevState = State;
952 switch (State) {
953 default:
954 State = IES_ERROR;
955 break;
956 case IES_RBRAC:
957 case IES_INTEGER:
958 case IES_RPAREN:
959 State = IES_PLUS;
960 IC.pushOperator(IC_PLUS);
961 CurType.Length = 1;
962 CurType.Size = CurType.ElementSize;
963 break;
964 case IES_INIT:
965 case IES_CAST:
966 assert(!BracCount && "BracCount should be zero on parsing's start");
967 State = IES_LBRAC;
968 break;
970 MemExpr = true;
971 BracketUsed = true;
972 BracCount++;
973 return false;
975 bool onRBrac(StringRef &ErrMsg) {
976 IntelExprState CurrState = State;
977 switch (State) {
978 default:
979 State = IES_ERROR;
980 break;
981 case IES_INTEGER:
982 case IES_OFFSET:
983 case IES_REGISTER:
984 case IES_RPAREN:
985 if (BracCount-- != 1) {
986 ErrMsg = "unexpected bracket encountered";
987 return true;
989 State = IES_RBRAC;
990 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
991 // If we already have a BaseReg, then assume this is the IndexReg with
992 // no explicit scale.
993 if (!BaseReg) {
994 BaseReg = TmpReg;
995 } else {
996 if (IndexReg)
997 return regsUseUpError(ErrMsg);
998 IndexReg = TmpReg;
999 Scale = 0;
1002 break;
1004 PrevState = CurrState;
1005 return false;
1007 void onLParen() {
1008 IntelExprState CurrState = State;
1009 switch (State) {
1010 default:
1011 State = IES_ERROR;
1012 break;
1013 case IES_PLUS:
1014 case IES_MINUS:
1015 case IES_NOT:
1016 case IES_OR:
1017 case IES_XOR:
1018 case IES_AND:
1019 case IES_EQ:
1020 case IES_NE:
1021 case IES_LT:
1022 case IES_LE:
1023 case IES_GT:
1024 case IES_GE:
1025 case IES_LSHIFT:
1026 case IES_RSHIFT:
1027 case IES_MULTIPLY:
1028 case IES_DIVIDE:
1029 case IES_MOD:
1030 case IES_LPAREN:
1031 case IES_INIT:
1032 case IES_LBRAC:
1033 State = IES_LPAREN;
1034 IC.pushOperator(IC_LPAREN);
1035 break;
1037 PrevState = CurrState;
1039 void onRParen() {
1040 PrevState = State;
1041 switch (State) {
1042 default:
1043 State = IES_ERROR;
1044 break;
1045 case IES_INTEGER:
1046 case IES_OFFSET:
1047 case IES_REGISTER:
1048 case IES_RBRAC:
1049 case IES_RPAREN:
1050 State = IES_RPAREN;
1051 IC.pushOperator(IC_RPAREN);
1052 break;
1055 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1056 const InlineAsmIdentifierInfo &IDInfo,
1057 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1058 PrevState = State;
1059 switch (State) {
1060 default:
1061 ErrMsg = "unexpected offset operator expression";
1062 return true;
1063 case IES_PLUS:
1064 case IES_INIT:
1065 case IES_LBRAC:
1066 if (setSymRef(Val, ID, ErrMsg))
1067 return true;
1068 OffsetOperator = true;
1069 OffsetOperatorLoc = OffsetLoc;
1070 State = IES_OFFSET;
1071 // As we cannot yet resolve the actual value (offset), we retain
1072 // the requested semantics by pushing a '0' to the operands stack
1073 IC.pushOperand(IC_IMM);
1074 if (ParsingMSInlineAsm) {
1075 Info = IDInfo;
1077 break;
1079 return false;
1081 void onCast(AsmTypeInfo Info) {
1082 PrevState = State;
1083 switch (State) {
1084 default:
1085 State = IES_ERROR;
1086 break;
1087 case IES_LPAREN:
1088 setTypeInfo(Info);
1089 State = IES_CAST;
1090 break;
1093 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1096 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1097 bool MatchingInlineAsm = false) {
1098 MCAsmParser &Parser = getParser();
1099 if (MatchingInlineAsm) {
1100 if (!getLexer().isAtStartOfStatement())
1101 Parser.eatToEndOfStatement();
1102 return false;
1104 return Parser.Error(L, Msg, Range);
1107 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1108 SMLoc EndLoc);
1109 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1110 bool RestoreOnFailure);
1112 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1113 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1114 bool IsSIReg(MCRegister Reg);
1115 MCRegister GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg);
1116 void
1117 AddDefaultSrcDestOperands(OperandVector &Operands,
1118 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1119 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1120 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1121 OperandVector &FinalOperands);
1122 bool parseOperand(OperandVector &Operands, StringRef Name);
1123 bool parseATTOperand(OperandVector &Operands);
1124 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1125 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1126 InlineAsmIdentifierInfo &Info, SMLoc &End);
1127 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1128 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1129 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1130 unsigned IdentifyMasmOperator(StringRef Name);
1131 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1132 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1133 bool parseCFlagsOp(OperandVector &Operands);
1134 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1135 bool &ParseError, SMLoc &End);
1136 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1137 bool &ParseError, SMLoc &End);
1138 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1139 SMLoc End);
1140 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1141 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1142 InlineAsmIdentifierInfo &Info,
1143 bool IsUnevaluatedOperand, SMLoc &End,
1144 bool IsParsingOffsetOperator = false);
1145 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1146 IntelExprStateMachine &SM);
1148 bool ParseMemOperand(MCRegister SegReg, const MCExpr *Disp, SMLoc StartLoc,
1149 SMLoc EndLoc, OperandVector &Operands);
1151 X86::CondCode ParseConditionCode(StringRef CCode);
1153 bool ParseIntelMemoryOperandSize(unsigned &Size);
1154 bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
1155 MCRegister BaseReg, MCRegister IndexReg,
1156 unsigned Scale, bool NonAbsMem, SMLoc Start,
1157 SMLoc End, unsigned Size, StringRef Identifier,
1158 const InlineAsmIdentifierInfo &Info,
1159 OperandVector &Operands);
1161 bool parseDirectiveArch();
1162 bool parseDirectiveNops(SMLoc L);
1163 bool parseDirectiveEven(SMLoc L);
1164 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1166 /// CodeView FPO data directives.
1167 bool parseDirectiveFPOProc(SMLoc L);
1168 bool parseDirectiveFPOSetFrame(SMLoc L);
1169 bool parseDirectiveFPOPushReg(SMLoc L);
1170 bool parseDirectiveFPOStackAlloc(SMLoc L);
1171 bool parseDirectiveFPOStackAlign(SMLoc L);
1172 bool parseDirectiveFPOEndPrologue(SMLoc L);
1173 bool parseDirectiveFPOEndProc(SMLoc L);
1175 /// SEH directives.
1176 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1177 bool parseDirectiveSEHPushReg(SMLoc);
1178 bool parseDirectiveSEHSetFrame(SMLoc);
1179 bool parseDirectiveSEHSaveReg(SMLoc);
1180 bool parseDirectiveSEHSaveXMM(SMLoc);
1181 bool parseDirectiveSEHPushFrame(SMLoc);
1183 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1185 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1186 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1188 // Load Value Injection (LVI) Mitigations for machine code
1189 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1190 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1191 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1193 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1194 /// instrumentation around Inst.
1195 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1197 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1198 OperandVector &Operands, MCStreamer &Out,
1199 uint64_t &ErrorInfo,
1200 bool MatchingInlineAsm) override;
1202 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1203 MCStreamer &Out, bool MatchingInlineAsm);
1205 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1206 bool MatchingInlineAsm);
1208 bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1209 OperandVector &Operands, MCStreamer &Out,
1210 uint64_t &ErrorInfo, bool MatchingInlineAsm);
1212 bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1213 OperandVector &Operands, MCStreamer &Out,
1214 uint64_t &ErrorInfo,
1215 bool MatchingInlineAsm);
1217 bool omitRegisterFromClobberLists(MCRegister Reg) override;
1219 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1220 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1221 /// return false if no parsing errors occurred, true otherwise.
1222 bool HandleAVX512Operand(OperandVector &Operands);
1224 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1226 bool is64BitMode() const {
1227 // FIXME: Can tablegen auto-generate this?
1228 return getSTI().hasFeature(X86::Is64Bit);
1230 bool is32BitMode() const {
1231 // FIXME: Can tablegen auto-generate this?
1232 return getSTI().hasFeature(X86::Is32Bit);
1234 bool is16BitMode() const {
1235 // FIXME: Can tablegen auto-generate this?
1236 return getSTI().hasFeature(X86::Is16Bit);
1238 void SwitchMode(unsigned mode) {
1239 MCSubtargetInfo &STI = copySTI();
1240 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1241 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1242 FeatureBitset FB = ComputeAvailableFeatures(
1243 STI.ToggleFeature(OldMode.flip(mode)));
1244 setAvailableFeatures(FB);
1246 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1249 unsigned getPointerWidth() {
1250 if (is16BitMode()) return 16;
1251 if (is32BitMode()) return 32;
1252 if (is64BitMode()) return 64;
1253 llvm_unreachable("invalid mode");
1256 bool isParsingIntelSyntax() {
1257 return getParser().getAssemblerDialect();
1260 /// @name Auto-generated Matcher Functions
1261 /// {
1263 #define GET_ASSEMBLER_HEADER
1264 #include "X86GenAsmMatcher.inc"
1266 /// }
1268 public:
1269 enum X86MatchResultTy {
1270 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1271 #define GET_OPERAND_DIAGNOSTIC_TYPES
1272 #include "X86GenAsmMatcher.inc"
1275 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1276 const MCInstrInfo &mii, const MCTargetOptions &Options)
1277 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1278 Code16GCC(false) {
1280 Parser.addAliasForDirective(".word", ".2byte");
1282 // Initialize the set of available features.
1283 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1286 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1287 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1288 SMLoc &EndLoc) override;
1290 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1292 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1293 SMLoc NameLoc, OperandVector &Operands) override;
1295 bool ParseDirective(AsmToken DirectiveID) override;
1297 } // end anonymous namespace
1299 #define GET_REGISTER_MATCHER
1300 #define GET_SUBTARGET_FEATURE_NAME
1301 #include "X86GenAsmMatcher.inc"
1303 static bool CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg,
1304 MCRegister IndexReg, unsigned Scale,
1305 bool Is64BitMode,
1306 StringRef &ErrMsg) {
1307 // If we have both a base register and an index register make sure they are
1308 // both 64-bit or 32-bit registers.
1309 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1311 if (BaseReg &&
1312 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1313 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1314 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1315 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1316 ErrMsg = "invalid base+index expression";
1317 return true;
1320 if (IndexReg &&
1321 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1322 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1323 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1324 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1325 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1326 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1327 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1328 ErrMsg = "invalid base+index expression";
1329 return true;
1332 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg) ||
1333 IndexReg == X86::EIP || IndexReg == X86::RIP || IndexReg == X86::ESP ||
1334 IndexReg == X86::RSP) {
1335 ErrMsg = "invalid base+index expression";
1336 return true;
1339 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1340 // and then only in non-64-bit modes.
1341 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1342 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1343 BaseReg != X86::SI && BaseReg != X86::DI))) {
1344 ErrMsg = "invalid 16-bit base register";
1345 return true;
1348 if (!BaseReg &&
1349 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1350 ErrMsg = "16-bit memory operand may not include only index register";
1351 return true;
1354 if (BaseReg && IndexReg) {
1355 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1356 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1357 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1358 IndexReg == X86::EIZ)) {
1359 ErrMsg = "base register is 64-bit, but index register is not";
1360 return true;
1362 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1363 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1364 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1365 IndexReg == X86::RIZ)) {
1366 ErrMsg = "base register is 32-bit, but index register is not";
1367 return true;
1369 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1370 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1371 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1372 ErrMsg = "base register is 16-bit, but index register is not";
1373 return true;
1375 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1376 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1377 ErrMsg = "invalid 16-bit base/index register combination";
1378 return true;
1383 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1384 if (!Is64BitMode && (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1385 ErrMsg = "IP-relative addressing requires 64-bit mode";
1386 return true;
1389 return checkScale(Scale, ErrMsg);
1392 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1393 SMLoc StartLoc, SMLoc EndLoc) {
1394 // If we encounter a %, ignore it. This code handles registers with and
1395 // without the prefix, unprefixed registers can occur in cfi directives.
1396 RegName.consume_front("%");
1398 RegNo = MatchRegisterName(RegName);
1400 // If the match failed, try the register name as lowercase.
1401 if (!RegNo)
1402 RegNo = MatchRegisterName(RegName.lower());
1404 // The "flags" and "mxcsr" registers cannot be referenced directly.
1405 // Treat it as an identifier instead.
1406 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1407 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1408 RegNo = MCRegister();
1410 if (!is64BitMode()) {
1411 // FIXME: This should be done using Requires<Not64BitMode> and
1412 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1413 // checked.
1414 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1415 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1416 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1417 X86II::isX86_64ExtendedReg(RegNo)) {
1418 return Error(StartLoc,
1419 "register %" + RegName + " is only available in 64-bit mode",
1420 SMRange(StartLoc, EndLoc));
1424 if (X86II::isApxExtendedReg(RegNo))
1425 UseApxExtendedReg = true;
1427 // If this is "db[0-15]", match it as an alias
1428 // for dr[0-15].
1429 if (!RegNo && RegName.starts_with("db")) {
1430 if (RegName.size() == 3) {
1431 switch (RegName[2]) {
1432 case '0':
1433 RegNo = X86::DR0;
1434 break;
1435 case '1':
1436 RegNo = X86::DR1;
1437 break;
1438 case '2':
1439 RegNo = X86::DR2;
1440 break;
1441 case '3':
1442 RegNo = X86::DR3;
1443 break;
1444 case '4':
1445 RegNo = X86::DR4;
1446 break;
1447 case '5':
1448 RegNo = X86::DR5;
1449 break;
1450 case '6':
1451 RegNo = X86::DR6;
1452 break;
1453 case '7':
1454 RegNo = X86::DR7;
1455 break;
1456 case '8':
1457 RegNo = X86::DR8;
1458 break;
1459 case '9':
1460 RegNo = X86::DR9;
1461 break;
1463 } else if (RegName.size() == 4 && RegName[2] == '1') {
1464 switch (RegName[3]) {
1465 case '0':
1466 RegNo = X86::DR10;
1467 break;
1468 case '1':
1469 RegNo = X86::DR11;
1470 break;
1471 case '2':
1472 RegNo = X86::DR12;
1473 break;
1474 case '3':
1475 RegNo = X86::DR13;
1476 break;
1477 case '4':
1478 RegNo = X86::DR14;
1479 break;
1480 case '5':
1481 RegNo = X86::DR15;
1482 break;
1487 if (!RegNo) {
1488 if (isParsingIntelSyntax())
1489 return true;
1490 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1492 return false;
1495 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1496 SMLoc &EndLoc, bool RestoreOnFailure) {
1497 MCAsmParser &Parser = getParser();
1498 MCAsmLexer &Lexer = getLexer();
1499 RegNo = MCRegister();
1501 SmallVector<AsmToken, 5> Tokens;
1502 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1503 if (RestoreOnFailure) {
1504 while (!Tokens.empty()) {
1505 Lexer.UnLex(Tokens.pop_back_val());
1510 const AsmToken &PercentTok = Parser.getTok();
1511 StartLoc = PercentTok.getLoc();
1513 // If we encounter a %, ignore it. This code handles registers with and
1514 // without the prefix, unprefixed registers can occur in cfi directives.
1515 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1516 Tokens.push_back(PercentTok);
1517 Parser.Lex(); // Eat percent token.
1520 const AsmToken &Tok = Parser.getTok();
1521 EndLoc = Tok.getEndLoc();
1523 if (Tok.isNot(AsmToken::Identifier)) {
1524 OnFailure();
1525 if (isParsingIntelSyntax()) return true;
1526 return Error(StartLoc, "invalid register name",
1527 SMRange(StartLoc, EndLoc));
1530 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1531 OnFailure();
1532 return true;
1535 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1536 if (RegNo == X86::ST0) {
1537 Tokens.push_back(Tok);
1538 Parser.Lex(); // Eat 'st'
1540 // Check to see if we have '(4)' after %st.
1541 if (Lexer.isNot(AsmToken::LParen))
1542 return false;
1543 // Lex the paren.
1544 Tokens.push_back(Parser.getTok());
1545 Parser.Lex();
1547 const AsmToken &IntTok = Parser.getTok();
1548 if (IntTok.isNot(AsmToken::Integer)) {
1549 OnFailure();
1550 return Error(IntTok.getLoc(), "expected stack index");
1552 switch (IntTok.getIntVal()) {
1553 case 0: RegNo = X86::ST0; break;
1554 case 1: RegNo = X86::ST1; break;
1555 case 2: RegNo = X86::ST2; break;
1556 case 3: RegNo = X86::ST3; break;
1557 case 4: RegNo = X86::ST4; break;
1558 case 5: RegNo = X86::ST5; break;
1559 case 6: RegNo = X86::ST6; break;
1560 case 7: RegNo = X86::ST7; break;
1561 default:
1562 OnFailure();
1563 return Error(IntTok.getLoc(), "invalid stack index");
1566 // Lex IntTok
1567 Tokens.push_back(IntTok);
1568 Parser.Lex();
1569 if (Lexer.isNot(AsmToken::RParen)) {
1570 OnFailure();
1571 return Error(Parser.getTok().getLoc(), "expected ')'");
1574 EndLoc = Parser.getTok().getEndLoc();
1575 Parser.Lex(); // Eat ')'
1576 return false;
1579 EndLoc = Parser.getTok().getEndLoc();
1581 if (!RegNo) {
1582 OnFailure();
1583 if (isParsingIntelSyntax()) return true;
1584 return Error(StartLoc, "invalid register name",
1585 SMRange(StartLoc, EndLoc));
1588 Parser.Lex(); // Eat identifier token.
1589 return false;
1592 bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
1593 SMLoc &EndLoc) {
1594 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1597 ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1598 SMLoc &EndLoc) {
1599 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1600 bool PendingErrors = getParser().hasPendingError();
1601 getParser().clearPendingErrors();
1602 if (PendingErrors)
1603 return ParseStatus::Failure;
1604 if (Result)
1605 return ParseStatus::NoMatch;
1606 return ParseStatus::Success;
1609 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1610 bool Parse32 = is32BitMode() || Code16GCC;
1611 MCRegister Basereg =
1612 is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1613 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1614 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1615 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1616 Loc, Loc, 0);
1619 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1620 bool Parse32 = is32BitMode() || Code16GCC;
1621 MCRegister Basereg =
1622 is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1623 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1624 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1625 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1626 Loc, Loc, 0);
1629 bool X86AsmParser::IsSIReg(MCRegister Reg) {
1630 switch (Reg.id()) {
1631 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1632 case X86::RSI:
1633 case X86::ESI:
1634 case X86::SI:
1635 return true;
1636 case X86::RDI:
1637 case X86::EDI:
1638 case X86::DI:
1639 return false;
1643 MCRegister X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg) {
1644 switch (RegClassID) {
1645 default: llvm_unreachable("Unexpected register class");
1646 case X86::GR64RegClassID:
1647 return IsSIReg ? X86::RSI : X86::RDI;
1648 case X86::GR32RegClassID:
1649 return IsSIReg ? X86::ESI : X86::EDI;
1650 case X86::GR16RegClassID:
1651 return IsSIReg ? X86::SI : X86::DI;
1655 void X86AsmParser::AddDefaultSrcDestOperands(
1656 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1657 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1658 if (isParsingIntelSyntax()) {
1659 Operands.push_back(std::move(Dst));
1660 Operands.push_back(std::move(Src));
1662 else {
1663 Operands.push_back(std::move(Src));
1664 Operands.push_back(std::move(Dst));
1668 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1669 OperandVector &FinalOperands) {
1671 if (OrigOperands.size() > 1) {
1672 // Check if sizes match, OrigOperands also contains the instruction name
1673 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1674 "Operand size mismatch");
1676 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1677 // Verify types match
1678 int RegClassID = -1;
1679 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1680 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1681 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1683 if (FinalOp.isReg() &&
1684 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1685 // Return false and let a normal complaint about bogus operands happen
1686 return false;
1688 if (FinalOp.isMem()) {
1690 if (!OrigOp.isMem())
1691 // Return false and let a normal complaint about bogus operands happen
1692 return false;
1694 MCRegister OrigReg = OrigOp.Mem.BaseReg;
1695 MCRegister FinalReg = FinalOp.Mem.BaseReg;
1697 // If we've already encounterd a register class, make sure all register
1698 // bases are of the same register class
1699 if (RegClassID != -1 &&
1700 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1701 return Error(OrigOp.getStartLoc(),
1702 "mismatching source and destination index registers");
1705 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1706 RegClassID = X86::GR64RegClassID;
1707 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1708 RegClassID = X86::GR32RegClassID;
1709 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1710 RegClassID = X86::GR16RegClassID;
1711 else
1712 // Unexpected register class type
1713 // Return false and let a normal complaint about bogus operands happen
1714 return false;
1716 bool IsSI = IsSIReg(FinalReg);
1717 FinalReg = GetSIDIForRegClass(RegClassID, IsSI);
1719 if (FinalReg != OrigReg) {
1720 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1721 Warnings.push_back(std::make_pair(
1722 OrigOp.getStartLoc(),
1723 "memory operand is only for determining the size, " + RegName +
1724 " will be used for the location"));
1727 FinalOp.Mem.Size = OrigOp.Mem.Size;
1728 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1729 FinalOp.Mem.BaseReg = FinalReg;
1733 // Produce warnings only if all the operands passed the adjustment - prevent
1734 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1735 for (auto &WarningMsg : Warnings) {
1736 Warning(WarningMsg.first, WarningMsg.second);
1739 // Remove old operands
1740 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1741 OrigOperands.pop_back();
1743 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1744 for (auto &Op : FinalOperands)
1745 OrigOperands.push_back(std::move(Op));
1747 return false;
1750 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1751 if (isParsingIntelSyntax())
1752 return parseIntelOperand(Operands, Name);
1754 return parseATTOperand(Operands);
1757 bool X86AsmParser::CreateMemForMSInlineAsm(
1758 MCRegister SegReg, const MCExpr *Disp, MCRegister BaseReg,
1759 MCRegister IndexReg, unsigned Scale, bool NonAbsMem, SMLoc Start, SMLoc End,
1760 unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info,
1761 OperandVector &Operands) {
1762 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1763 // some other label reference.
1764 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1765 // Create an absolute memory reference in order to match against
1766 // instructions taking a PC relative operand.
1767 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1768 End, Size, Identifier,
1769 Info.Label.Decl));
1770 return false;
1772 // We either have a direct symbol reference, or an offset from a symbol. The
1773 // parser always puts the symbol on the LHS, so look there for size
1774 // calculation purposes.
1775 unsigned FrontendSize = 0;
1776 void *Decl = nullptr;
1777 bool IsGlobalLV = false;
1778 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1779 // Size is in terms of bits in this context.
1780 FrontendSize = Info.Var.Type * 8;
1781 Decl = Info.Var.Decl;
1782 IsGlobalLV = Info.Var.IsGlobalLV;
1784 // It is widely common for MS InlineAsm to use a global variable and one/two
1785 // registers in a mmory expression, and though unaccessible via rip/eip.
1786 if (IsGlobalLV) {
1787 if (BaseReg || IndexReg) {
1788 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1789 End, Size, Identifier, Decl, 0,
1790 BaseReg && IndexReg));
1791 return false;
1793 if (NonAbsMem)
1794 BaseReg = 1; // Make isAbsMem() false
1796 Operands.push_back(X86Operand::CreateMem(
1797 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1798 Size,
1799 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1800 return false;
1803 // Some binary bitwise operators have a named synonymous
1804 // Query a candidate string for being such a named operator
1805 // and if so - invoke the appropriate handler
1806 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1807 IntelExprStateMachine &SM,
1808 bool &ParseError, SMLoc &End) {
1809 // A named operator should be either lower or upper case, but not a mix...
1810 // except in MASM, which uses full case-insensitivity.
1811 if (Name != Name.lower() && Name != Name.upper() &&
1812 !getParser().isParsingMasm())
1813 return false;
1814 if (Name.equals_insensitive("not")) {
1815 SM.onNot();
1816 } else if (Name.equals_insensitive("or")) {
1817 SM.onOr();
1818 } else if (Name.equals_insensitive("shl")) {
1819 SM.onLShift();
1820 } else if (Name.equals_insensitive("shr")) {
1821 SM.onRShift();
1822 } else if (Name.equals_insensitive("xor")) {
1823 SM.onXor();
1824 } else if (Name.equals_insensitive("and")) {
1825 SM.onAnd();
1826 } else if (Name.equals_insensitive("mod")) {
1827 SM.onMod();
1828 } else if (Name.equals_insensitive("offset")) {
1829 SMLoc OffsetLoc = getTok().getLoc();
1830 const MCExpr *Val = nullptr;
1831 StringRef ID;
1832 InlineAsmIdentifierInfo Info;
1833 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1834 if (ParseError)
1835 return true;
1836 StringRef ErrMsg;
1837 ParseError =
1838 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1839 if (ParseError)
1840 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1841 } else {
1842 return false;
1844 if (!Name.equals_insensitive("offset"))
1845 End = consumeToken();
1846 return true;
1848 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1849 IntelExprStateMachine &SM,
1850 bool &ParseError, SMLoc &End) {
1851 if (Name.equals_insensitive("eq")) {
1852 SM.onEq();
1853 } else if (Name.equals_insensitive("ne")) {
1854 SM.onNE();
1855 } else if (Name.equals_insensitive("lt")) {
1856 SM.onLT();
1857 } else if (Name.equals_insensitive("le")) {
1858 SM.onLE();
1859 } else if (Name.equals_insensitive("gt")) {
1860 SM.onGT();
1861 } else if (Name.equals_insensitive("ge")) {
1862 SM.onGE();
1863 } else {
1864 return false;
1866 End = consumeToken();
1867 return true;
1870 // Check if current intel expression append after an operand.
1871 // Like: [Operand][Intel Expression]
1872 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1873 IntelExprStateMachine &SM) {
1874 if (PrevTK != AsmToken::RBrac)
1875 return;
1877 SM.setAppendAfterOperand();
1880 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1881 MCAsmParser &Parser = getParser();
1882 StringRef ErrMsg;
1884 AsmToken::TokenKind PrevTK = AsmToken::Error;
1886 if (getContext().getObjectFileInfo()->isPositionIndependent())
1887 SM.setPIC();
1889 bool Done = false;
1890 while (!Done) {
1891 // Get a fresh reference on each loop iteration in case the previous
1892 // iteration moved the token storage during UnLex().
1893 const AsmToken &Tok = Parser.getTok();
1895 bool UpdateLocLex = true;
1896 AsmToken::TokenKind TK = getLexer().getKind();
1898 switch (TK) {
1899 default:
1900 if ((Done = SM.isValidEndState()))
1901 break;
1902 return Error(Tok.getLoc(), "unknown token in expression");
1903 case AsmToken::Error:
1904 return Error(getLexer().getErrLoc(), getLexer().getErr());
1905 break;
1906 case AsmToken::Real:
1907 // DotOperator: [ebx].0
1908 UpdateLocLex = false;
1909 if (ParseIntelDotOperator(SM, End))
1910 return true;
1911 break;
1912 case AsmToken::Dot:
1913 if (!Parser.isParsingMasm()) {
1914 if ((Done = SM.isValidEndState()))
1915 break;
1916 return Error(Tok.getLoc(), "unknown token in expression");
1918 // MASM allows spaces around the dot operator (e.g., "var . x")
1919 Lex();
1920 UpdateLocLex = false;
1921 if (ParseIntelDotOperator(SM, End))
1922 return true;
1923 break;
1924 case AsmToken::Dollar:
1925 if (!Parser.isParsingMasm()) {
1926 if ((Done = SM.isValidEndState()))
1927 break;
1928 return Error(Tok.getLoc(), "unknown token in expression");
1930 [[fallthrough]];
1931 case AsmToken::String: {
1932 if (Parser.isParsingMasm()) {
1933 // MASM parsers handle strings in expressions as constants.
1934 SMLoc ValueLoc = Tok.getLoc();
1935 int64_t Res;
1936 const MCExpr *Val;
1937 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1938 return true;
1939 UpdateLocLex = false;
1940 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1941 return Error(ValueLoc, "expected absolute value");
1942 if (SM.onInteger(Res, ErrMsg))
1943 return Error(ValueLoc, ErrMsg);
1944 break;
1946 [[fallthrough]];
1948 case AsmToken::At:
1949 case AsmToken::Identifier: {
1950 SMLoc IdentLoc = Tok.getLoc();
1951 StringRef Identifier = Tok.getString();
1952 UpdateLocLex = false;
1953 if (Parser.isParsingMasm()) {
1954 size_t DotOffset = Identifier.find_first_of('.');
1955 if (DotOffset != StringRef::npos) {
1956 consumeToken();
1957 StringRef LHS = Identifier.slice(0, DotOffset);
1958 StringRef Dot = Identifier.substr(DotOffset, 1);
1959 StringRef RHS = Identifier.substr(DotOffset + 1);
1960 if (!RHS.empty()) {
1961 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1963 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1964 if (!LHS.empty()) {
1965 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1967 break;
1970 // (MASM only) <TYPE> PTR operator
1971 if (Parser.isParsingMasm()) {
1972 const AsmToken &NextTok = getLexer().peekTok();
1973 if (NextTok.is(AsmToken::Identifier) &&
1974 NextTok.getIdentifier().equals_insensitive("ptr")) {
1975 AsmTypeInfo Info;
1976 if (Parser.lookUpType(Identifier, Info))
1977 return Error(Tok.getLoc(), "unknown type");
1978 SM.onCast(Info);
1979 // Eat type and PTR.
1980 consumeToken();
1981 End = consumeToken();
1982 break;
1985 // Register, or (MASM only) <register>.<field>
1986 MCRegister Reg;
1987 if (Tok.is(AsmToken::Identifier)) {
1988 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1989 if (SM.onRegister(Reg, ErrMsg))
1990 return Error(IdentLoc, ErrMsg);
1991 break;
1993 if (Parser.isParsingMasm()) {
1994 const std::pair<StringRef, StringRef> IDField =
1995 Tok.getString().split('.');
1996 const StringRef ID = IDField.first, Field = IDField.second;
1997 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1998 if (!Field.empty() &&
1999 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
2000 if (SM.onRegister(Reg, ErrMsg))
2001 return Error(IdentLoc, ErrMsg);
2003 AsmFieldInfo Info;
2004 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2005 if (Parser.lookUpField(Field, Info))
2006 return Error(FieldStartLoc, "unknown offset");
2007 else if (SM.onPlus(ErrMsg))
2008 return Error(getTok().getLoc(), ErrMsg);
2009 else if (SM.onInteger(Info.Offset, ErrMsg))
2010 return Error(IdentLoc, ErrMsg);
2011 SM.setTypeInfo(Info.Type);
2013 End = consumeToken();
2014 break;
2018 // Operator synonymous ("not", "or" etc.)
2019 bool ParseError = false;
2020 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2021 if (ParseError)
2022 return true;
2023 break;
2025 if (Parser.isParsingMasm() &&
2026 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2027 if (ParseError)
2028 return true;
2029 break;
2031 // Symbol reference, when parsing assembly content
2032 InlineAsmIdentifierInfo Info;
2033 AsmFieldInfo FieldInfo;
2034 const MCExpr *Val;
2035 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2036 // MS Dot Operator expression
2037 if (Identifier.count('.') &&
2038 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2039 if (ParseIntelDotOperator(SM, End))
2040 return true;
2041 break;
2044 if (isParsingMSInlineAsm()) {
2045 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2046 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2047 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2048 if (SM.onInteger(Val, ErrMsg))
2049 return Error(IdentLoc, ErrMsg);
2050 } else {
2051 return true;
2053 break;
2055 // MS InlineAsm identifier
2056 // Call parseIdentifier() to combine @ with the identifier behind it.
2057 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2058 return Error(IdentLoc, "expected identifier");
2059 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2060 return true;
2061 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2062 true, ErrMsg))
2063 return Error(IdentLoc, ErrMsg);
2064 break;
2066 if (Parser.isParsingMasm()) {
2067 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2068 int64_t Val;
2069 if (ParseMasmOperator(OpKind, Val))
2070 return true;
2071 if (SM.onInteger(Val, ErrMsg))
2072 return Error(IdentLoc, ErrMsg);
2073 break;
2075 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2076 // Field offset immediate; <TYPE>.<field specification>
2077 Lex(); // eat type
2078 bool EndDot = parseOptionalToken(AsmToken::Dot);
2079 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2080 getTok().getString().starts_with("."))) {
2081 getParser().parseIdentifier(Identifier);
2082 if (!EndDot)
2083 Identifier.consume_front(".");
2084 EndDot = Identifier.consume_back(".");
2085 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2086 FieldInfo)) {
2087 SMLoc IDEnd =
2088 SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2089 return Error(IdentLoc, "Unable to lookup field reference!",
2090 SMRange(IdentLoc, IDEnd));
2092 if (!EndDot)
2093 EndDot = parseOptionalToken(AsmToken::Dot);
2095 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2096 return Error(IdentLoc, ErrMsg);
2097 break;
2100 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2101 return Error(Tok.getLoc(), "Unexpected identifier!");
2102 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2103 false, ErrMsg)) {
2104 return Error(IdentLoc, ErrMsg);
2106 break;
2108 case AsmToken::Integer: {
2109 // Look for 'b' or 'f' following an Integer as a directional label
2110 SMLoc Loc = getTok().getLoc();
2111 int64_t IntVal = getTok().getIntVal();
2112 End = consumeToken();
2113 UpdateLocLex = false;
2114 if (getLexer().getKind() == AsmToken::Identifier) {
2115 StringRef IDVal = getTok().getString();
2116 if (IDVal == "f" || IDVal == "b") {
2117 MCSymbol *Sym =
2118 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2119 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
2120 const MCExpr *Val =
2121 MCSymbolRefExpr::create(Sym, Variant, getContext());
2122 if (IDVal == "b" && Sym->isUndefined())
2123 return Error(Loc, "invalid reference to undefined symbol");
2124 StringRef Identifier = Sym->getName();
2125 InlineAsmIdentifierInfo Info;
2126 AsmTypeInfo Type;
2127 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2128 isParsingMSInlineAsm(), ErrMsg))
2129 return Error(Loc, ErrMsg);
2130 End = consumeToken();
2131 } else {
2132 if (SM.onInteger(IntVal, ErrMsg))
2133 return Error(Loc, ErrMsg);
2135 } else {
2136 if (SM.onInteger(IntVal, ErrMsg))
2137 return Error(Loc, ErrMsg);
2139 break;
2141 case AsmToken::Plus:
2142 if (SM.onPlus(ErrMsg))
2143 return Error(getTok().getLoc(), ErrMsg);
2144 break;
2145 case AsmToken::Minus:
2146 if (SM.onMinus(ErrMsg))
2147 return Error(getTok().getLoc(), ErrMsg);
2148 break;
2149 case AsmToken::Tilde: SM.onNot(); break;
2150 case AsmToken::Star: SM.onStar(); break;
2151 case AsmToken::Slash: SM.onDivide(); break;
2152 case AsmToken::Percent: SM.onMod(); break;
2153 case AsmToken::Pipe: SM.onOr(); break;
2154 case AsmToken::Caret: SM.onXor(); break;
2155 case AsmToken::Amp: SM.onAnd(); break;
2156 case AsmToken::LessLess:
2157 SM.onLShift(); break;
2158 case AsmToken::GreaterGreater:
2159 SM.onRShift(); break;
2160 case AsmToken::LBrac:
2161 if (SM.onLBrac())
2162 return Error(Tok.getLoc(), "unexpected bracket encountered");
2163 tryParseOperandIdx(PrevTK, SM);
2164 break;
2165 case AsmToken::RBrac:
2166 if (SM.onRBrac(ErrMsg)) {
2167 return Error(Tok.getLoc(), ErrMsg);
2169 break;
2170 case AsmToken::LParen: SM.onLParen(); break;
2171 case AsmToken::RParen: SM.onRParen(); break;
2173 if (SM.hadError())
2174 return Error(Tok.getLoc(), "unknown token in expression");
2176 if (!Done && UpdateLocLex)
2177 End = consumeToken();
2179 PrevTK = TK;
2181 return false;
2184 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2185 SMLoc Start, SMLoc End) {
2186 SMLoc Loc = Start;
2187 unsigned ExprLen = End.getPointer() - Start.getPointer();
2188 // Skip everything before a symbol displacement (if we have one)
2189 if (SM.getSym() && !SM.isOffsetOperator()) {
2190 StringRef SymName = SM.getSymName();
2191 if (unsigned Len = SymName.data() - Start.getPointer())
2192 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2193 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2194 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2195 // If we have only a symbol than there's no need for complex rewrite,
2196 // simply skip everything after it
2197 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2198 if (ExprLen)
2199 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2200 return;
2203 // Build an Intel Expression rewrite
2204 StringRef BaseRegStr;
2205 StringRef IndexRegStr;
2206 StringRef OffsetNameStr;
2207 if (SM.getBaseReg())
2208 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2209 if (SM.getIndexReg())
2210 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2211 if (SM.isOffsetOperator())
2212 OffsetNameStr = SM.getSymName();
2213 // Emit it
2214 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2215 SM.getImm(), SM.isMemExpr());
2216 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2219 // Inline assembly may use variable names with namespace alias qualifiers.
2220 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2221 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2222 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2223 MCAsmParser &Parser = getParser();
2224 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2225 Val = nullptr;
2227 StringRef LineBuf(Identifier.data());
2228 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2230 const AsmToken &Tok = Parser.getTok();
2231 SMLoc Loc = Tok.getLoc();
2233 // Advance the token stream until the end of the current token is
2234 // after the end of what the frontend claimed.
2235 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2236 do {
2237 End = Tok.getEndLoc();
2238 getLexer().Lex();
2239 } while (End.getPointer() < EndPtr);
2240 Identifier = LineBuf;
2242 // The frontend should end parsing on an assembler token boundary, unless it
2243 // failed parsing.
2244 assert((End.getPointer() == EndPtr ||
2245 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
2246 "frontend claimed part of a token?");
2248 // If the identifier lookup was unsuccessful, assume that we are dealing with
2249 // a label.
2250 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
2251 StringRef InternalName =
2252 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2253 Loc, false);
2254 assert(InternalName.size() && "We should have an internal name here.");
2255 // Push a rewrite for replacing the identifier name with the internal name,
2256 // unless we are parsing the operand of an offset operator
2257 if (!IsParsingOffsetOperator)
2258 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2259 InternalName);
2260 else
2261 Identifier = InternalName;
2262 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2263 return false;
2264 // Create the symbol reference.
2265 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2266 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
2267 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2268 return false;
2271 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
2272 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2273 MCAsmParser &Parser = getParser();
2274 const AsmToken &Tok = Parser.getTok();
2275 // Eat "{" and mark the current place.
2276 const SMLoc consumedToken = consumeToken();
2277 if (Tok.isNot(AsmToken::Identifier))
2278 return Error(Tok.getLoc(), "Expected an identifier after {");
2279 if (Tok.getIdentifier().starts_with("r")) {
2280 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2281 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2282 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2283 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2284 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2285 .Default(-1);
2286 if (-1 == rndMode)
2287 return Error(Tok.getLoc(), "Invalid rounding mode.");
2288 Parser.Lex(); // Eat "r*" of r*-sae
2289 if (!getLexer().is(AsmToken::Minus))
2290 return Error(Tok.getLoc(), "Expected - at this point");
2291 Parser.Lex(); // Eat "-"
2292 Parser.Lex(); // Eat the sae
2293 if (!getLexer().is(AsmToken::RCurly))
2294 return Error(Tok.getLoc(), "Expected } at this point");
2295 SMLoc End = Tok.getEndLoc();
2296 Parser.Lex(); // Eat "}"
2297 const MCExpr *RndModeOp =
2298 MCConstantExpr::create(rndMode, Parser.getContext());
2299 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2300 return false;
2302 if (Tok.getIdentifier() == "sae") {
2303 Parser.Lex(); // Eat the sae
2304 if (!getLexer().is(AsmToken::RCurly))
2305 return Error(Tok.getLoc(), "Expected } at this point");
2306 Parser.Lex(); // Eat "}"
2307 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2308 return false;
2310 return Error(Tok.getLoc(), "unknown token in expression");
2313 /// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2314 /// mnemonic.
2315 bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) {
2316 MCAsmParser &Parser = getParser();
2317 AsmToken Tok = Parser.getTok();
2318 const SMLoc Start = Tok.getLoc();
2319 if (!Tok.is(AsmToken::LCurly))
2320 return Error(Tok.getLoc(), "Expected { at this point");
2321 Parser.Lex(); // Eat "{"
2322 Tok = Parser.getTok();
2323 if (Tok.getIdentifier().lower() != "dfv")
2324 return Error(Tok.getLoc(), "Expected dfv at this point");
2325 Parser.Lex(); // Eat "dfv"
2326 Tok = Parser.getTok();
2327 if (!Tok.is(AsmToken::Equal))
2328 return Error(Tok.getLoc(), "Expected = at this point");
2329 Parser.Lex(); // Eat "="
2331 Tok = Parser.getTok();
2332 SMLoc End;
2333 if (Tok.is(AsmToken::RCurly)) {
2334 End = Tok.getEndLoc();
2335 Operands.push_back(X86Operand::CreateImm(
2336 MCConstantExpr::create(0, Parser.getContext()), Start, End));
2337 Parser.Lex(); // Eat "}"
2338 return false;
2340 unsigned CFlags = 0;
2341 for (unsigned I = 0; I < 4; ++I) {
2342 Tok = Parser.getTok();
2343 unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower())
2344 .Case("of", 0x8)
2345 .Case("sf", 0x4)
2346 .Case("zf", 0x2)
2347 .Case("cf", 0x1)
2348 .Default(~0U);
2349 if (CFlag == ~0U)
2350 return Error(Tok.getLoc(), "Invalid conditional flags");
2352 if (CFlags & CFlag)
2353 return Error(Tok.getLoc(), "Duplicated conditional flag");
2354 CFlags |= CFlag;
2356 Parser.Lex(); // Eat one conditional flag
2357 Tok = Parser.getTok();
2358 if (Tok.is(AsmToken::RCurly)) {
2359 End = Tok.getEndLoc();
2360 Operands.push_back(X86Operand::CreateImm(
2361 MCConstantExpr::create(CFlags, Parser.getContext()), Start, End));
2362 Parser.Lex(); // Eat "}"
2363 return false;
2364 } else if (I == 3) {
2365 return Error(Tok.getLoc(), "Expected } at this point");
2366 } else if (Tok.isNot(AsmToken::Comma)) {
2367 return Error(Tok.getLoc(), "Expected } or , at this point");
2369 Parser.Lex(); // Eat ","
2371 llvm_unreachable("Unexpected control flow");
2374 /// Parse the '.' operator.
2375 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2376 SMLoc &End) {
2377 const AsmToken &Tok = getTok();
2378 AsmFieldInfo Info;
2380 // Drop the optional '.'.
2381 StringRef DotDispStr = Tok.getString();
2382 DotDispStr.consume_front(".");
2383 StringRef TrailingDot;
2385 // .Imm gets lexed as a real.
2386 if (Tok.is(AsmToken::Real)) {
2387 APInt DotDisp;
2388 if (DotDispStr.getAsInteger(10, DotDisp))
2389 return Error(Tok.getLoc(), "Unexpected offset");
2390 Info.Offset = DotDisp.getZExtValue();
2391 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2392 Tok.is(AsmToken::Identifier)) {
2393 if (DotDispStr.ends_with(".")) {
2394 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2395 DotDispStr = DotDispStr.drop_back(1);
2397 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2398 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2399 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2400 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2401 getParser().lookUpField(DotDispStr, Info) &&
2402 (!SemaCallback ||
2403 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2404 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2405 } else {
2406 return Error(Tok.getLoc(), "Unexpected token type!");
2409 // Eat the DotExpression and update End
2410 End = SMLoc::getFromPointer(DotDispStr.data());
2411 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2412 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2413 Lex();
2414 if (!TrailingDot.empty())
2415 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2416 SM.addImm(Info.Offset);
2417 SM.setTypeInfo(Info.Type);
2418 return false;
2421 /// Parse the 'offset' operator.
2422 /// This operator is used to specify the location of a given operand
2423 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2424 InlineAsmIdentifierInfo &Info,
2425 SMLoc &End) {
2426 // Eat offset, mark start of identifier.
2427 SMLoc Start = Lex().getLoc();
2428 ID = getTok().getString();
2429 if (!isParsingMSInlineAsm()) {
2430 if ((getTok().isNot(AsmToken::Identifier) &&
2431 getTok().isNot(AsmToken::String)) ||
2432 getParser().parsePrimaryExpr(Val, End, nullptr))
2433 return Error(Start, "unexpected token!");
2434 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2435 return Error(Start, "unable to lookup expression");
2436 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2437 return Error(Start, "offset operator cannot yet handle constants");
2439 return false;
2442 // Query a candidate string for being an Intel assembly operator
2443 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2444 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2445 return StringSwitch<unsigned>(Name)
2446 .Cases("TYPE","type",IOK_TYPE)
2447 .Cases("SIZE","size",IOK_SIZE)
2448 .Cases("LENGTH","length",IOK_LENGTH)
2449 .Default(IOK_INVALID);
2452 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2453 /// returns the number of elements in an array. It returns the value 1 for
2454 /// non-array variables. The SIZE operator returns the size of a C or C++
2455 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2456 /// TYPE operator returns the size of a C or C++ type or variable. If the
2457 /// variable is an array, TYPE returns the size of a single element.
2458 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2459 MCAsmParser &Parser = getParser();
2460 const AsmToken &Tok = Parser.getTok();
2461 Parser.Lex(); // Eat operator.
2463 const MCExpr *Val = nullptr;
2464 InlineAsmIdentifierInfo Info;
2465 SMLoc Start = Tok.getLoc(), End;
2466 StringRef Identifier = Tok.getString();
2467 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2468 /*IsUnevaluatedOperand=*/true, End))
2469 return 0;
2471 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2472 Error(Start, "unable to lookup expression");
2473 return 0;
2476 unsigned CVal = 0;
2477 switch(OpKind) {
2478 default: llvm_unreachable("Unexpected operand kind!");
2479 case IOK_LENGTH: CVal = Info.Var.Length; break;
2480 case IOK_SIZE: CVal = Info.Var.Size; break;
2481 case IOK_TYPE: CVal = Info.Var.Type; break;
2484 return CVal;
2487 // Query a candidate string for being an Intel assembly operator
2488 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
2489 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2490 return StringSwitch<unsigned>(Name.lower())
2491 .Case("type", MOK_TYPE)
2492 .Cases("size", "sizeof", MOK_SIZEOF)
2493 .Cases("length", "lengthof", MOK_LENGTHOF)
2494 .Default(MOK_INVALID);
2497 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2498 /// returns the number of elements in an array. It returns the value 1 for
2499 /// non-array variables. The SIZEOF operator returns the size of a type or
2500 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2501 /// The TYPE operator returns the size of a variable. If the variable is an
2502 /// array, TYPE returns the size of a single element.
2503 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2504 MCAsmParser &Parser = getParser();
2505 SMLoc OpLoc = Parser.getTok().getLoc();
2506 Parser.Lex(); // Eat operator.
2508 Val = 0;
2509 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2510 // Check for SIZEOF(<type>) and TYPE(<type>).
2511 bool InParens = Parser.getTok().is(AsmToken::LParen);
2512 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2513 AsmTypeInfo Type;
2514 if (IDTok.is(AsmToken::Identifier) &&
2515 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2516 Val = Type.Size;
2518 // Eat tokens.
2519 if (InParens)
2520 parseToken(AsmToken::LParen);
2521 parseToken(AsmToken::Identifier);
2522 if (InParens)
2523 parseToken(AsmToken::RParen);
2527 if (!Val) {
2528 IntelExprStateMachine SM;
2529 SMLoc End, Start = Parser.getTok().getLoc();
2530 if (ParseIntelExpression(SM, End))
2531 return true;
2533 switch (OpKind) {
2534 default:
2535 llvm_unreachable("Unexpected operand kind!");
2536 case MOK_SIZEOF:
2537 Val = SM.getSize();
2538 break;
2539 case MOK_LENGTHOF:
2540 Val = SM.getLength();
2541 break;
2542 case MOK_TYPE:
2543 Val = SM.getElementSize();
2544 break;
2547 if (!Val)
2548 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2551 return false;
2554 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2555 Size = StringSwitch<unsigned>(getTok().getString())
2556 .Cases("BYTE", "byte", 8)
2557 .Cases("WORD", "word", 16)
2558 .Cases("DWORD", "dword", 32)
2559 .Cases("FLOAT", "float", 32)
2560 .Cases("LONG", "long", 32)
2561 .Cases("FWORD", "fword", 48)
2562 .Cases("DOUBLE", "double", 64)
2563 .Cases("QWORD", "qword", 64)
2564 .Cases("MMWORD","mmword", 64)
2565 .Cases("XWORD", "xword", 80)
2566 .Cases("TBYTE", "tbyte", 80)
2567 .Cases("XMMWORD", "xmmword", 128)
2568 .Cases("YMMWORD", "ymmword", 256)
2569 .Cases("ZMMWORD", "zmmword", 512)
2570 .Default(0);
2571 if (Size) {
2572 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2573 if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
2574 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2575 Lex(); // Eat ptr.
2577 return false;
2580 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2581 MCAsmParser &Parser = getParser();
2582 const AsmToken &Tok = Parser.getTok();
2583 SMLoc Start, End;
2585 // Parse optional Size directive.
2586 unsigned Size;
2587 if (ParseIntelMemoryOperandSize(Size))
2588 return true;
2589 bool PtrInOperand = bool(Size);
2591 Start = Tok.getLoc();
2593 // Rounding mode operand.
2594 if (getLexer().is(AsmToken::LCurly))
2595 return ParseRoundingModeOp(Start, Operands);
2597 // Register operand.
2598 MCRegister RegNo;
2599 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2600 if (RegNo == X86::RIP)
2601 return Error(Start, "rip can only be used as a base register");
2602 // A Register followed by ':' is considered a segment override
2603 if (Tok.isNot(AsmToken::Colon)) {
2604 if (PtrInOperand)
2605 return Error(Start, "expected memory operand after 'ptr', "
2606 "found register operand instead");
2607 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2608 return false;
2610 // An alleged segment override. check if we have a valid segment register
2611 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2612 return Error(Start, "invalid segment register");
2613 // Eat ':' and update Start location
2614 Start = Lex().getLoc();
2617 // Immediates and Memory
2618 IntelExprStateMachine SM;
2619 if (ParseIntelExpression(SM, End))
2620 return true;
2622 if (isParsingMSInlineAsm())
2623 RewriteIntelExpression(SM, Start, Tok.getLoc());
2625 int64_t Imm = SM.getImm();
2626 const MCExpr *Disp = SM.getSym();
2627 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2628 if (Disp && Imm)
2629 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2630 if (!Disp)
2631 Disp = ImmDisp;
2633 // RegNo != 0 specifies a valid segment register,
2634 // and we are parsing a segment override
2635 if (!SM.isMemExpr() && !RegNo) {
2636 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2637 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2638 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2639 // Disp includes the address of a variable; make sure this is recorded
2640 // for later handling.
2641 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2642 SM.getSymName(), Info.Var.Decl,
2643 Info.Var.IsGlobalLV));
2644 return false;
2648 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2649 return false;
2652 StringRef ErrMsg;
2653 MCRegister BaseReg = SM.getBaseReg();
2654 MCRegister IndexReg = SM.getIndexReg();
2655 if (IndexReg && BaseReg == X86::RIP)
2656 BaseReg = MCRegister();
2657 unsigned Scale = SM.getScale();
2658 if (!PtrInOperand)
2659 Size = SM.getElementSize() << 3;
2661 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2662 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2663 std::swap(BaseReg, IndexReg);
2665 // If BaseReg is a vector register and IndexReg is not, swap them unless
2666 // Scale was specified in which case it would be an error.
2667 if (Scale == 0 &&
2668 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2669 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2670 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2671 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2672 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2673 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2674 std::swap(BaseReg, IndexReg);
2676 if (Scale != 0 &&
2677 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2678 return Error(Start, "16-bit addresses cannot have a scale");
2680 // If there was no explicit scale specified, change it to 1.
2681 if (Scale == 0)
2682 Scale = 1;
2684 // If this is a 16-bit addressing mode with the base and index in the wrong
2685 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2686 // shared with att syntax where order matters.
2687 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2688 (IndexReg == X86::BX || IndexReg == X86::BP))
2689 std::swap(BaseReg, IndexReg);
2691 if ((BaseReg || IndexReg) &&
2692 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2693 ErrMsg))
2694 return Error(Start, ErrMsg);
2695 bool IsUnconditionalBranch =
2696 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2697 if (isParsingMSInlineAsm())
2698 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
2699 IsUnconditionalBranch && is64BitMode(),
2700 Start, End, Size, SM.getSymName(),
2701 SM.getIdentifierInfo(), Operands);
2703 // When parsing x64 MS-style assembly, all non-absolute references to a named
2704 // variable default to RIP-relative.
2705 MCRegister DefaultBaseReg;
2706 bool MaybeDirectBranchDest = true;
2708 if (Parser.isParsingMasm()) {
2709 if (is64BitMode() &&
2710 ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
2711 DefaultBaseReg = X86::RIP;
2713 if (IsUnconditionalBranch) {
2714 if (PtrInOperand) {
2715 MaybeDirectBranchDest = false;
2716 if (is64BitMode())
2717 DefaultBaseReg = X86::RIP;
2718 } else if (!BaseReg && !IndexReg && Disp &&
2719 Disp->getKind() == MCExpr::SymbolRef) {
2720 if (is64BitMode()) {
2721 if (SM.getSize() == 8) {
2722 MaybeDirectBranchDest = false;
2723 DefaultBaseReg = X86::RIP;
2725 } else {
2726 if (SM.getSize() == 4 || SM.getSize() == 2)
2727 MaybeDirectBranchDest = false;
2731 } else if (IsUnconditionalBranch) {
2732 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2733 if (!PtrInOperand && SM.isOffsetOperator())
2734 return Error(
2735 Start, "`OFFSET` operator cannot be used in an unconditional branch");
2736 if (PtrInOperand || SM.isBracketUsed())
2737 MaybeDirectBranchDest = false;
2740 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
2741 Operands.push_back(X86Operand::CreateMem(
2742 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2743 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2744 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2745 else
2746 Operands.push_back(X86Operand::CreateMem(
2747 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2748 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2749 MaybeDirectBranchDest));
2750 return false;
2753 bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2754 MCAsmParser &Parser = getParser();
2755 switch (getLexer().getKind()) {
2756 case AsmToken::Dollar: {
2757 // $42 or $ID -> immediate.
2758 SMLoc Start = Parser.getTok().getLoc(), End;
2759 Parser.Lex();
2760 const MCExpr *Val;
2761 // This is an immediate, so we should not parse a register. Do a precheck
2762 // for '%' to supercede intra-register parse errors.
2763 SMLoc L = Parser.getTok().getLoc();
2764 if (check(getLexer().is(AsmToken::Percent), L,
2765 "expected immediate expression") ||
2766 getParser().parseExpression(Val, End) ||
2767 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2768 return true;
2769 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2770 return false;
2772 case AsmToken::LCurly: {
2773 SMLoc Start = Parser.getTok().getLoc();
2774 return ParseRoundingModeOp(Start, Operands);
2776 default: {
2777 // This a memory operand or a register. We have some parsing complications
2778 // as a '(' may be part of an immediate expression or the addressing mode
2779 // block. This is complicated by the fact that an assembler-level variable
2780 // may refer either to a register or an immediate expression.
2782 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2783 const MCExpr *Expr = nullptr;
2784 MCRegister Reg;
2785 if (getLexer().isNot(AsmToken::LParen)) {
2786 // No '(' so this is either a displacement expression or a register.
2787 if (Parser.parseExpression(Expr, EndLoc))
2788 return true;
2789 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2790 // Segment Register. Reset Expr and copy value to register.
2791 Expr = nullptr;
2792 Reg = RE->getReg();
2794 // Check the register.
2795 if (Reg == X86::EIZ || Reg == X86::RIZ)
2796 return Error(
2797 Loc, "%eiz and %riz can only be used as index registers",
2798 SMRange(Loc, EndLoc));
2799 if (Reg == X86::RIP)
2800 return Error(Loc, "%rip can only be used as a base register",
2801 SMRange(Loc, EndLoc));
2802 // Return register that are not segment prefixes immediately.
2803 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2804 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2805 return false;
2807 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2808 return Error(Loc, "invalid segment register");
2809 // Accept a '*' absolute memory reference after the segment. Place it
2810 // before the full memory operand.
2811 if (getLexer().is(AsmToken::Star))
2812 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2815 // This is a Memory operand.
2816 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2821 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2822 // otherwise the EFLAGS Condition Code enumerator.
2823 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2824 return StringSwitch<X86::CondCode>(CC)
2825 .Case("o", X86::COND_O) // Overflow
2826 .Case("no", X86::COND_NO) // No Overflow
2827 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2828 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2829 .Cases("e", "z", X86::COND_E) // Equal/Zero
2830 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2831 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2832 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2833 .Case("s", X86::COND_S) // Sign
2834 .Case("ns", X86::COND_NS) // No Sign
2835 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2836 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2837 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2838 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2839 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2840 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2841 .Default(X86::COND_INVALID);
2844 // true on failure, false otherwise
2845 // If no {z} mark was found - Parser doesn't advance
2846 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2847 const SMLoc &StartLoc) {
2848 MCAsmParser &Parser = getParser();
2849 // Assuming we are just pass the '{' mark, quering the next token
2850 // Searched for {z}, but none was found. Return false, as no parsing error was
2851 // encountered
2852 if (!(getLexer().is(AsmToken::Identifier) &&
2853 (getLexer().getTok().getIdentifier() == "z")))
2854 return false;
2855 Parser.Lex(); // Eat z
2856 // Query and eat the '}' mark
2857 if (!getLexer().is(AsmToken::RCurly))
2858 return Error(getLexer().getLoc(), "Expected } at this point");
2859 Parser.Lex(); // Eat '}'
2860 // Assign Z with the {z} mark operand
2861 Z = X86Operand::CreateToken("{z}", StartLoc);
2862 return false;
2865 // true on failure, false otherwise
2866 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2867 MCAsmParser &Parser = getParser();
2868 if (getLexer().is(AsmToken::LCurly)) {
2869 // Eat "{" and mark the current place.
2870 const SMLoc consumedToken = consumeToken();
2871 // Distinguish {1to<NUM>} from {%k<NUM>}.
2872 if(getLexer().is(AsmToken::Integer)) {
2873 // Parse memory broadcasting ({1to<NUM>}).
2874 if (getLexer().getTok().getIntVal() != 1)
2875 return TokError("Expected 1to<NUM> at this point");
2876 StringRef Prefix = getLexer().getTok().getString();
2877 Parser.Lex(); // Eat first token of 1to8
2878 if (!getLexer().is(AsmToken::Identifier))
2879 return TokError("Expected 1to<NUM> at this point");
2880 // Recognize only reasonable suffixes.
2881 SmallVector<char, 5> BroadcastVector;
2882 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2883 .toStringRef(BroadcastVector);
2884 if (!BroadcastString.starts_with("1to"))
2885 return TokError("Expected 1to<NUM> at this point");
2886 const char *BroadcastPrimitive =
2887 StringSwitch<const char *>(BroadcastString)
2888 .Case("1to2", "{1to2}")
2889 .Case("1to4", "{1to4}")
2890 .Case("1to8", "{1to8}")
2891 .Case("1to16", "{1to16}")
2892 .Case("1to32", "{1to32}")
2893 .Default(nullptr);
2894 if (!BroadcastPrimitive)
2895 return TokError("Invalid memory broadcast primitive.");
2896 Parser.Lex(); // Eat trailing token of 1toN
2897 if (!getLexer().is(AsmToken::RCurly))
2898 return TokError("Expected } at this point");
2899 Parser.Lex(); // Eat "}"
2900 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2901 consumedToken));
2902 // No AVX512 specific primitives can pass
2903 // after memory broadcasting, so return.
2904 return false;
2905 } else {
2906 // Parse either {k}{z}, {z}{k}, {k} or {z}
2907 // last one have no meaning, but GCC accepts it
2908 // Currently, we're just pass a '{' mark
2909 std::unique_ptr<X86Operand> Z;
2910 if (ParseZ(Z, consumedToken))
2911 return true;
2912 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2913 // no errors.
2914 // Query for the need of further parsing for a {%k<NUM>} mark
2915 if (!Z || getLexer().is(AsmToken::LCurly)) {
2916 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2917 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2918 // expected
2919 MCRegister RegNo;
2920 SMLoc RegLoc;
2921 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2922 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2923 if (RegNo == X86::K0)
2924 return Error(RegLoc, "Register k0 can't be used as write mask");
2925 if (!getLexer().is(AsmToken::RCurly))
2926 return Error(getLexer().getLoc(), "Expected } at this point");
2927 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2928 Operands.push_back(
2929 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2930 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2931 } else
2932 return Error(getLexer().getLoc(),
2933 "Expected an op-mask register at this point");
2934 // {%k<NUM>} mark is found, inquire for {z}
2935 if (getLexer().is(AsmToken::LCurly) && !Z) {
2936 // Have we've found a parsing error, or found no (expected) {z} mark
2937 // - report an error
2938 if (ParseZ(Z, consumeToken()) || !Z)
2939 return Error(getLexer().getLoc(),
2940 "Expected a {z} mark at this point");
2943 // '{z}' on its own is meaningless, hence should be ignored.
2944 // on the contrary - have it been accompanied by a K register,
2945 // allow it.
2946 if (Z)
2947 Operands.push_back(std::move(Z));
2951 return false;
2954 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2955 /// has already been parsed if present. disp may be provided as well.
2956 bool X86AsmParser::ParseMemOperand(MCRegister SegReg, const MCExpr *Disp,
2957 SMLoc StartLoc, SMLoc EndLoc,
2958 OperandVector &Operands) {
2959 MCAsmParser &Parser = getParser();
2960 SMLoc Loc;
2961 // Based on the initial passed values, we may be in any of these cases, we are
2962 // in one of these cases (with current position (*)):
2964 // 1. seg : * disp (base-index-scale-expr)
2965 // 2. seg : *(disp) (base-index-scale-expr)
2966 // 3. seg : *(base-index-scale-expr)
2967 // 4. disp *(base-index-scale-expr)
2968 // 5. *(disp) (base-index-scale-expr)
2969 // 6. *(base-index-scale-expr)
2970 // 7. disp *
2971 // 8. *(disp)
2973 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2974 // checking if the first object after the parenthesis is a register (or an
2975 // identifier referring to a register) and parse the displacement or default
2976 // to 0 as appropriate.
2977 auto isAtMemOperand = [this]() {
2978 if (this->getLexer().isNot(AsmToken::LParen))
2979 return false;
2980 AsmToken Buf[2];
2981 StringRef Id;
2982 auto TokCount = this->getLexer().peekTokens(Buf, true);
2983 if (TokCount == 0)
2984 return false;
2985 switch (Buf[0].getKind()) {
2986 case AsmToken::Percent:
2987 case AsmToken::Comma:
2988 return true;
2989 // These lower cases are doing a peekIdentifier.
2990 case AsmToken::At:
2991 case AsmToken::Dollar:
2992 if ((TokCount > 1) &&
2993 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2994 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2995 Id = StringRef(Buf[0].getLoc().getPointer(),
2996 Buf[1].getIdentifier().size() + 1);
2997 break;
2998 case AsmToken::Identifier:
2999 case AsmToken::String:
3000 Id = Buf[0].getIdentifier();
3001 break;
3002 default:
3003 return false;
3005 // We have an ID. Check if it is bound to a register.
3006 if (!Id.empty()) {
3007 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
3008 if (Sym->isVariable()) {
3009 auto V = Sym->getVariableValue(/*SetUsed*/ false);
3010 return isa<X86MCExpr>(V);
3013 return false;
3016 if (!Disp) {
3017 // Parse immediate if we're not at a mem operand yet.
3018 if (!isAtMemOperand()) {
3019 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
3020 return true;
3021 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
3022 } else {
3023 // Disp is implicitly zero if we haven't parsed it yet.
3024 Disp = MCConstantExpr::create(0, Parser.getContext());
3028 // We are now either at the end of the operand or at the '(' at the start of a
3029 // base-index-scale-expr.
3031 if (!parseOptionalToken(AsmToken::LParen)) {
3032 if (!SegReg)
3033 Operands.push_back(
3034 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3035 else
3036 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3037 0, 0, 1, StartLoc, EndLoc));
3038 return false;
3041 // If we reached here, then eat the '(' and Process
3042 // the rest of the memory operand.
3043 MCRegister BaseReg, IndexReg;
3044 unsigned Scale = 1;
3045 SMLoc BaseLoc = getLexer().getLoc();
3046 const MCExpr *E;
3047 StringRef ErrMsg;
3049 // Parse BaseReg if one is provided.
3050 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
3051 if (Parser.parseExpression(E, EndLoc) ||
3052 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
3053 return true;
3055 // Check the register.
3056 BaseReg = cast<X86MCExpr>(E)->getReg();
3057 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
3058 return Error(BaseLoc, "eiz and riz can only be used as index registers",
3059 SMRange(BaseLoc, EndLoc));
3062 if (parseOptionalToken(AsmToken::Comma)) {
3063 // Following the comma we should have either an index register, or a scale
3064 // value. We don't support the later form, but we want to parse it
3065 // correctly.
3067 // Even though it would be completely consistent to support syntax like
3068 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3069 if (getLexer().isNot(AsmToken::RParen)) {
3070 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
3071 return true;
3073 if (!isa<X86MCExpr>(E)) {
3074 // We've parsed an unexpected Scale Value instead of an index
3075 // register. Interpret it as an absolute.
3076 int64_t ScaleVal;
3077 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3078 return Error(Loc, "expected absolute expression");
3079 if (ScaleVal != 1)
3080 Warning(Loc, "scale factor without index register is ignored");
3081 Scale = 1;
3082 } else { // IndexReg Found.
3083 IndexReg = cast<X86MCExpr>(E)->getReg();
3085 if (BaseReg == X86::RIP)
3086 return Error(Loc,
3087 "%rip as base register can not have an index register");
3088 if (IndexReg == X86::RIP)
3089 return Error(Loc, "%rip is not allowed as an index register");
3091 if (parseOptionalToken(AsmToken::Comma)) {
3092 // Parse the scale amount:
3093 // ::= ',' [scale-expression]
3095 // A scale amount without an index is ignored.
3096 if (getLexer().isNot(AsmToken::RParen)) {
3097 int64_t ScaleVal;
3098 if (Parser.parseTokenLoc(Loc) ||
3099 Parser.parseAbsoluteExpression(ScaleVal))
3100 return Error(Loc, "expected scale expression");
3101 Scale = (unsigned)ScaleVal;
3102 // Validate the scale amount.
3103 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3104 Scale != 1)
3105 return Error(Loc, "scale factor in 16-bit address must be 1");
3106 if (checkScale(Scale, ErrMsg))
3107 return Error(Loc, ErrMsg);
3114 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3115 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3116 return true;
3118 // This is to support otherwise illegal operand (%dx) found in various
3119 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3120 // be supported. Mark such DX variants separately fix only in special cases.
3121 if (BaseReg == X86::DX && !IndexReg && Scale == 1 && !SegReg &&
3122 isa<MCConstantExpr>(Disp) &&
3123 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3124 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3125 return false;
3128 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3129 ErrMsg))
3130 return Error(BaseLoc, ErrMsg);
3132 // If the displacement is a constant, check overflows. For 64-bit addressing,
3133 // gas requires isInt<32> and otherwise reports an error. For others, gas
3134 // reports a warning and allows a wider range. E.g. gas allows
3135 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3136 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3137 if (BaseReg || IndexReg) {
3138 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
3139 auto Imm = CE->getValue();
3140 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
3141 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
3142 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
3143 if (Is64) {
3144 if (!isInt<32>(Imm))
3145 return Error(BaseLoc, "displacement " + Twine(Imm) +
3146 " is not within [-2147483648, 2147483647]");
3147 } else if (!Is16) {
3148 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3149 Warning(BaseLoc, "displacement " + Twine(Imm) +
3150 " shortened to 32-bit signed " +
3151 Twine(static_cast<int32_t>(Imm)));
3153 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3154 Warning(BaseLoc, "displacement " + Twine(Imm) +
3155 " shortened to 16-bit signed " +
3156 Twine(static_cast<int16_t>(Imm)));
3161 if (SegReg || BaseReg || IndexReg)
3162 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3163 BaseReg, IndexReg, Scale, StartLoc,
3164 EndLoc));
3165 else
3166 Operands.push_back(
3167 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3168 return false;
3171 // Parse either a standard primary expression or a register.
3172 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3173 MCAsmParser &Parser = getParser();
3174 // See if this is a register first.
3175 if (getTok().is(AsmToken::Percent) ||
3176 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3177 MatchRegisterName(Parser.getTok().getString()))) {
3178 SMLoc StartLoc = Parser.getTok().getLoc();
3179 MCRegister RegNo;
3180 if (parseRegister(RegNo, StartLoc, EndLoc))
3181 return true;
3182 Res = X86MCExpr::create(RegNo, Parser.getContext());
3183 return false;
3185 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3188 bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
3189 SMLoc NameLoc, OperandVector &Operands) {
3190 MCAsmParser &Parser = getParser();
3191 InstInfo = &Info;
3193 // Reset the forced VEX encoding.
3194 ForcedOpcodePrefix = OpcodePrefix_Default;
3195 ForcedDispEncoding = DispEncoding_Default;
3196 UseApxExtendedReg = false;
3197 ForcedNoFlag = false;
3199 // Parse pseudo prefixes.
3200 while (true) {
3201 if (Name == "{") {
3202 if (getLexer().isNot(AsmToken::Identifier))
3203 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3204 std::string Prefix = Parser.getTok().getString().lower();
3205 Parser.Lex(); // Eat identifier.
3206 if (getLexer().isNot(AsmToken::RCurly))
3207 return Error(Parser.getTok().getLoc(), "Expected '}'");
3208 Parser.Lex(); // Eat curly.
3210 if (Prefix == "rex")
3211 ForcedOpcodePrefix = OpcodePrefix_REX;
3212 else if (Prefix == "rex2")
3213 ForcedOpcodePrefix = OpcodePrefix_REX2;
3214 else if (Prefix == "vex")
3215 ForcedOpcodePrefix = OpcodePrefix_VEX;
3216 else if (Prefix == "vex2")
3217 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3218 else if (Prefix == "vex3")
3219 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3220 else if (Prefix == "evex")
3221 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3222 else if (Prefix == "disp8")
3223 ForcedDispEncoding = DispEncoding_Disp8;
3224 else if (Prefix == "disp32")
3225 ForcedDispEncoding = DispEncoding_Disp32;
3226 else if (Prefix == "nf")
3227 ForcedNoFlag = true;
3228 else
3229 return Error(NameLoc, "unknown prefix");
3231 NameLoc = Parser.getTok().getLoc();
3232 if (getLexer().is(AsmToken::LCurly)) {
3233 Parser.Lex();
3234 Name = "{";
3235 } else {
3236 if (getLexer().isNot(AsmToken::Identifier))
3237 return Error(Parser.getTok().getLoc(), "Expected identifier");
3238 // FIXME: The mnemonic won't match correctly if its not in lower case.
3239 Name = Parser.getTok().getString();
3240 Parser.Lex();
3242 continue;
3244 // Parse MASM style pseudo prefixes.
3245 if (isParsingMSInlineAsm()) {
3246 if (Name.equals_insensitive("vex"))
3247 ForcedOpcodePrefix = OpcodePrefix_VEX;
3248 else if (Name.equals_insensitive("vex2"))
3249 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3250 else if (Name.equals_insensitive("vex3"))
3251 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3252 else if (Name.equals_insensitive("evex"))
3253 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3255 if (ForcedOpcodePrefix != OpcodePrefix_Default) {
3256 if (getLexer().isNot(AsmToken::Identifier))
3257 return Error(Parser.getTok().getLoc(), "Expected identifier");
3258 // FIXME: The mnemonic won't match correctly if its not in lower case.
3259 Name = Parser.getTok().getString();
3260 NameLoc = Parser.getTok().getLoc();
3261 Parser.Lex();
3264 break;
3267 // Support the suffix syntax for overriding displacement size as well.
3268 if (Name.consume_back(".d32")) {
3269 ForcedDispEncoding = DispEncoding_Disp32;
3270 } else if (Name.consume_back(".d8")) {
3271 ForcedDispEncoding = DispEncoding_Disp8;
3274 StringRef PatchedName = Name;
3276 // Hack to skip "short" following Jcc.
3277 if (isParsingIntelSyntax() &&
3278 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3279 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3280 (PatchedName.starts_with("j") &&
3281 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3282 StringRef NextTok = Parser.getTok().getString();
3283 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3284 : NextTok == "short") {
3285 SMLoc NameEndLoc =
3286 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3287 // Eat the short keyword.
3288 Parser.Lex();
3289 // MS and GAS ignore the short keyword; they both determine the jmp type
3290 // based on the distance of the label. (NASM does emit different code with
3291 // and without "short," though.)
3292 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3293 NextTok.size() + 1);
3297 // FIXME: Hack to recognize setneb as setne.
3298 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
3299 PatchedName != "setzub" && PatchedName != "setzunb" &&
3300 PatchedName != "setb" && PatchedName != "setnb")
3301 PatchedName = PatchedName.substr(0, Name.size()-1);
3303 unsigned ComparisonPredicate = ~0U;
3305 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3306 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
3307 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
3308 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
3309 PatchedName.ends_with("pbf16") || PatchedName.ends_with("ps") ||
3310 PatchedName.ends_with("pd"))) {
3311 bool IsVCMP = PatchedName[0] == 'v';
3312 unsigned CCIdx = IsVCMP ? 4 : 3;
3313 unsigned suffixLength = PatchedName.ends_with("pbf16") ? 5 : 2;
3314 unsigned CC = StringSwitch<unsigned>(
3315 PatchedName.slice(CCIdx, PatchedName.size() - suffixLength))
3316 .Case("eq", 0x00)
3317 .Case("eq_oq", 0x00)
3318 .Case("lt", 0x01)
3319 .Case("lt_os", 0x01)
3320 .Case("le", 0x02)
3321 .Case("le_os", 0x02)
3322 .Case("unord", 0x03)
3323 .Case("unord_q", 0x03)
3324 .Case("neq", 0x04)
3325 .Case("neq_uq", 0x04)
3326 .Case("nlt", 0x05)
3327 .Case("nlt_us", 0x05)
3328 .Case("nle", 0x06)
3329 .Case("nle_us", 0x06)
3330 .Case("ord", 0x07)
3331 .Case("ord_q", 0x07)
3332 /* AVX only from here */
3333 .Case("eq_uq", 0x08)
3334 .Case("nge", 0x09)
3335 .Case("nge_us", 0x09)
3336 .Case("ngt", 0x0A)
3337 .Case("ngt_us", 0x0A)
3338 .Case("false", 0x0B)
3339 .Case("false_oq", 0x0B)
3340 .Case("neq_oq", 0x0C)
3341 .Case("ge", 0x0D)
3342 .Case("ge_os", 0x0D)
3343 .Case("gt", 0x0E)
3344 .Case("gt_os", 0x0E)
3345 .Case("true", 0x0F)
3346 .Case("true_uq", 0x0F)
3347 .Case("eq_os", 0x10)
3348 .Case("lt_oq", 0x11)
3349 .Case("le_oq", 0x12)
3350 .Case("unord_s", 0x13)
3351 .Case("neq_us", 0x14)
3352 .Case("nlt_uq", 0x15)
3353 .Case("nle_uq", 0x16)
3354 .Case("ord_s", 0x17)
3355 .Case("eq_us", 0x18)
3356 .Case("nge_uq", 0x19)
3357 .Case("ngt_uq", 0x1A)
3358 .Case("false_os", 0x1B)
3359 .Case("neq_os", 0x1C)
3360 .Case("ge_oq", 0x1D)
3361 .Case("gt_oq", 0x1E)
3362 .Case("true_us", 0x1F)
3363 .Default(~0U);
3364 if (CC != ~0U && (IsVCMP || CC < 8) &&
3365 (IsVCMP || PatchedName.back() != 'h')) {
3366 if (PatchedName.ends_with("ss"))
3367 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3368 else if (PatchedName.ends_with("sd"))
3369 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3370 else if (PatchedName.ends_with("ps"))
3371 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3372 else if (PatchedName.ends_with("pd"))
3373 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3374 else if (PatchedName.ends_with("sh"))
3375 PatchedName = "vcmpsh";
3376 else if (PatchedName.ends_with("ph"))
3377 PatchedName = "vcmpph";
3378 else if (PatchedName.ends_with("pbf16"))
3379 PatchedName = "vcmppbf16";
3380 else
3381 llvm_unreachable("Unexpected suffix!");
3383 ComparisonPredicate = CC;
3387 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3388 if (PatchedName.starts_with("vpcmp") &&
3389 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3390 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3391 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3392 unsigned CC = StringSwitch<unsigned>(
3393 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3394 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3395 .Case("lt", 0x1)
3396 .Case("le", 0x2)
3397 //.Case("false", 0x3) // Not a documented alias.
3398 .Case("neq", 0x4)
3399 .Case("nlt", 0x5)
3400 .Case("nle", 0x6)
3401 //.Case("true", 0x7) // Not a documented alias.
3402 .Default(~0U);
3403 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3404 switch (PatchedName.back()) {
3405 default: llvm_unreachable("Unexpected character!");
3406 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3407 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3408 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3409 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3411 // Set up the immediate to push into the operands later.
3412 ComparisonPredicate = CC;
3416 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3417 if (PatchedName.starts_with("vpcom") &&
3418 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3419 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3420 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3421 unsigned CC = StringSwitch<unsigned>(
3422 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3423 .Case("lt", 0x0)
3424 .Case("le", 0x1)
3425 .Case("gt", 0x2)
3426 .Case("ge", 0x3)
3427 .Case("eq", 0x4)
3428 .Case("neq", 0x5)
3429 .Case("false", 0x6)
3430 .Case("true", 0x7)
3431 .Default(~0U);
3432 if (CC != ~0U) {
3433 switch (PatchedName.back()) {
3434 default: llvm_unreachable("Unexpected character!");
3435 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3436 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3437 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3438 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3440 // Set up the immediate to push into the operands later.
3441 ComparisonPredicate = CC;
3445 // Determine whether this is an instruction prefix.
3446 // FIXME:
3447 // Enhance prefixes integrity robustness. for example, following forms
3448 // are currently tolerated:
3449 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3450 // lock addq %rax, %rbx ; Destination operand must be of memory type
3451 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3452 bool IsPrefix =
3453 StringSwitch<bool>(Name)
3454 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3455 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3456 .Cases("xacquire", "xrelease", true)
3457 .Cases("acquire", "release", isParsingIntelSyntax())
3458 .Default(false);
3460 auto isLockRepeatNtPrefix = [](StringRef N) {
3461 return StringSwitch<bool>(N)
3462 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3463 .Default(false);
3466 bool CurlyAsEndOfStatement = false;
3468 unsigned Flags = X86::IP_NO_PREFIX;
3469 while (isLockRepeatNtPrefix(Name.lower())) {
3470 unsigned Prefix =
3471 StringSwitch<unsigned>(Name)
3472 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3473 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3474 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3475 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3476 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3477 Flags |= Prefix;
3478 if (getLexer().is(AsmToken::EndOfStatement)) {
3479 // We don't have real instr with the given prefix
3480 // let's use the prefix as the instr.
3481 // TODO: there could be several prefixes one after another
3482 Flags = X86::IP_NO_PREFIX;
3483 break;
3485 // FIXME: The mnemonic won't match correctly if its not in lower case.
3486 Name = Parser.getTok().getString();
3487 Parser.Lex(); // eat the prefix
3488 // Hack: we could have something like "rep # some comment" or
3489 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3490 while (Name.starts_with(";") || Name.starts_with("\n") ||
3491 Name.starts_with("#") || Name.starts_with("\t") ||
3492 Name.starts_with("/")) {
3493 // FIXME: The mnemonic won't match correctly if its not in lower case.
3494 Name = Parser.getTok().getString();
3495 Parser.Lex(); // go to next prefix or instr
3499 if (Flags)
3500 PatchedName = Name;
3502 // Hacks to handle 'data16' and 'data32'
3503 if (PatchedName == "data16" && is16BitMode()) {
3504 return Error(NameLoc, "redundant data16 prefix");
3506 if (PatchedName == "data32") {
3507 if (is32BitMode())
3508 return Error(NameLoc, "redundant data32 prefix");
3509 if (is64BitMode())
3510 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3511 // Hack to 'data16' for the table lookup.
3512 PatchedName = "data16";
3514 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3515 StringRef Next = Parser.getTok().getString();
3516 getLexer().Lex();
3517 // data32 effectively changes the instruction suffix.
3518 // TODO Generalize.
3519 if (Next == "callw")
3520 Next = "calll";
3521 if (Next == "ljmpw")
3522 Next = "ljmpl";
3524 Name = Next;
3525 PatchedName = Name;
3526 ForcedDataPrefix = X86::Is32Bit;
3527 IsPrefix = false;
3531 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3533 // Push the immediate if we extracted one from the mnemonic.
3534 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3535 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3536 getParser().getContext());
3537 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3540 // Parse condtional flags after mnemonic.
3541 if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) &&
3542 parseCFlagsOp(Operands))
3543 return true;
3545 // This does the actual operand parsing. Don't parse any more if we have a
3546 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3547 // just want to parse the "lock" as the first instruction and the "incl" as
3548 // the next one.
3549 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3550 // Parse '*' modifier.
3551 if (getLexer().is(AsmToken::Star))
3552 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3554 // Read the operands.
3555 while (true) {
3556 if (parseOperand(Operands, Name))
3557 return true;
3558 if (HandleAVX512Operand(Operands))
3559 return true;
3561 // check for comma and eat it
3562 if (getLexer().is(AsmToken::Comma))
3563 Parser.Lex();
3564 else
3565 break;
3568 // In MS inline asm curly braces mark the beginning/end of a block,
3569 // therefore they should be interepreted as end of statement
3570 CurlyAsEndOfStatement =
3571 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3572 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3573 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3574 return TokError("unexpected token in argument list");
3577 // Push the immediate if we extracted one from the mnemonic.
3578 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3579 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3580 getParser().getContext());
3581 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3584 // Consume the EndOfStatement or the prefix separator Slash
3585 if (getLexer().is(AsmToken::EndOfStatement) ||
3586 (IsPrefix && getLexer().is(AsmToken::Slash)))
3587 Parser.Lex();
3588 else if (CurlyAsEndOfStatement)
3589 // Add an actual EndOfStatement before the curly brace
3590 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3591 getLexer().getTok().getLoc(), 0);
3593 // This is for gas compatibility and cannot be done in td.
3594 // Adding "p" for some floating point with no argument.
3595 // For example: fsub --> fsubp
3596 bool IsFp =
3597 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3598 if (IsFp && Operands.size() == 1) {
3599 const char *Repl = StringSwitch<const char *>(Name)
3600 .Case("fsub", "fsubp")
3601 .Case("fdiv", "fdivp")
3602 .Case("fsubr", "fsubrp")
3603 .Case("fdivr", "fdivrp");
3604 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3607 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3608 (Operands.size() == 3)) {
3609 X86Operand &Op1 = (X86Operand &)*Operands[1];
3610 X86Operand &Op2 = (X86Operand &)*Operands[2];
3611 SMLoc Loc = Op1.getEndLoc();
3612 // Moving a 32 or 16 bit value into a segment register has the same
3613 // behavior. Modify such instructions to always take shorter form.
3614 if (Op1.isReg() && Op2.isReg() &&
3615 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3616 Op2.getReg()) &&
3617 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3618 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3619 // Change instruction name to match new instruction.
3620 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3621 Name = is16BitMode() ? "movw" : "movl";
3622 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3624 // Select the correct equivalent 16-/32-bit source register.
3625 MCRegister Reg =
3626 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3627 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3631 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3632 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3633 // documented form in various unofficial manuals, so a lot of code uses it.
3634 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3635 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3636 Operands.size() == 3) {
3637 X86Operand &Op = (X86Operand &)*Operands.back();
3638 if (Op.isDXReg())
3639 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3640 Op.getEndLoc());
3642 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3643 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3644 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3645 Operands.size() == 3) {
3646 X86Operand &Op = (X86Operand &)*Operands[1];
3647 if (Op.isDXReg())
3648 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3649 Op.getEndLoc());
3652 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
3653 bool HadVerifyError = false;
3655 // Append default arguments to "ins[bwld]"
3656 if (Name.starts_with("ins") &&
3657 (Operands.size() == 1 || Operands.size() == 3) &&
3658 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3659 Name == "ins")) {
3661 AddDefaultSrcDestOperands(TmpOperands,
3662 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3663 DefaultMemDIOperand(NameLoc));
3664 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3667 // Append default arguments to "outs[bwld]"
3668 if (Name.starts_with("outs") &&
3669 (Operands.size() == 1 || Operands.size() == 3) &&
3670 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3671 Name == "outsd" || Name == "outs")) {
3672 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3673 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3674 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3677 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3678 // values of $SIREG according to the mode. It would be nice if this
3679 // could be achieved with InstAlias in the tables.
3680 if (Name.starts_with("lods") &&
3681 (Operands.size() == 1 || Operands.size() == 2) &&
3682 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3683 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3684 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3685 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3688 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3689 // values of $DIREG according to the mode. It would be nice if this
3690 // could be achieved with InstAlias in the tables.
3691 if (Name.starts_with("stos") &&
3692 (Operands.size() == 1 || Operands.size() == 2) &&
3693 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3694 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3695 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3696 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3699 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3700 // values of $DIREG according to the mode. It would be nice if this
3701 // could be achieved with InstAlias in the tables.
3702 if (Name.starts_with("scas") &&
3703 (Operands.size() == 1 || Operands.size() == 2) &&
3704 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3705 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3706 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3707 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3710 // Add default SI and DI operands to "cmps[bwlq]".
3711 if (Name.starts_with("cmps") &&
3712 (Operands.size() == 1 || Operands.size() == 3) &&
3713 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3714 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3715 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3716 DefaultMemSIOperand(NameLoc));
3717 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3720 // Add default SI and DI operands to "movs[bwlq]".
3721 if (((Name.starts_with("movs") &&
3722 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3723 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3724 (Name.starts_with("smov") &&
3725 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3726 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3727 (Operands.size() == 1 || Operands.size() == 3)) {
3728 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3729 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3730 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3731 DefaultMemDIOperand(NameLoc));
3732 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3735 // Check if we encountered an error for one the string insturctions
3736 if (HadVerifyError) {
3737 return HadVerifyError;
3740 // Transforms "xlat mem8" into "xlatb"
3741 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3742 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3743 if (Op1.isMem8()) {
3744 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3745 "size, (R|E)BX will be used for the location");
3746 Operands.pop_back();
3747 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3751 if (Flags)
3752 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3753 return false;
3756 static bool convertSSEToAVX(MCInst &Inst) {
3757 ArrayRef<X86TableEntry> Table{X86SSE2AVXTable};
3758 unsigned Opcode = Inst.getOpcode();
3759 const auto I = llvm::lower_bound(Table, Opcode);
3760 if (I == Table.end() || I->OldOpc != Opcode)
3761 return false;
3763 Inst.setOpcode(I->NewOpc);
3764 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3765 // operand compare to SSE variant, which is added below
3766 if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) ||
3767 X86::isPBLENDVB(Opcode))
3768 Inst.addOperand(Inst.getOperand(2));
3770 return true;
3773 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3774 if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst))
3775 return true;
3777 if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
3778 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3779 return true;
3781 if (X86::optimizeShiftRotateWithImmediateOne(Inst))
3782 return true;
3784 auto replaceWithCCMPCTEST = [&](unsigned Opcode) -> bool {
3785 if (ForcedOpcodePrefix == OpcodePrefix_EVEX) {
3786 Inst.setFlags(~(X86::IP_USE_EVEX)&Inst.getFlags());
3787 Inst.setOpcode(Opcode);
3788 Inst.addOperand(MCOperand::createImm(0));
3789 Inst.addOperand(MCOperand::createImm(10));
3790 return true;
3792 return false;
3795 switch (Inst.getOpcode()) {
3796 default: return false;
3797 case X86::JMP_1:
3798 // {disp32} forces a larger displacement as if the instruction was relaxed.
3799 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3800 // This matches GNU assembler.
3801 if (ForcedDispEncoding == DispEncoding_Disp32) {
3802 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3803 return true;
3806 return false;
3807 case X86::JCC_1:
3808 // {disp32} forces a larger displacement as if the instruction was relaxed.
3809 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3810 // This matches GNU assembler.
3811 if (ForcedDispEncoding == DispEncoding_Disp32) {
3812 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3813 return true;
3816 return false;
3817 case X86::INT: {
3818 // Transforms "int $3" into "int3" as a size optimization.
3819 // We can't write this as an InstAlias.
3820 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3821 return false;
3822 Inst.clear();
3823 Inst.setOpcode(X86::INT3);
3824 return true;
3826 // `{evex} cmp <>, <>` is alias of `ccmpt {dfv=} <>, <>`, and
3827 // `{evex} test <>, <>` is alias of `ctest {dfv=} <>, <>`
3828 #define FROM_TO(FROM, TO) \
3829 case X86::FROM: \
3830 return replaceWithCCMPCTEST(X86::TO);
3831 FROM_TO(CMP64rr, CCMP64rr)
3832 FROM_TO(CMP64mi32, CCMP64mi32)
3833 FROM_TO(CMP64mi8, CCMP64mi8)
3834 FROM_TO(CMP64mr, CCMP64mr)
3835 FROM_TO(CMP64ri32, CCMP64ri32)
3836 FROM_TO(CMP64ri8, CCMP64ri8)
3837 FROM_TO(CMP64rm, CCMP64rm)
3839 FROM_TO(CMP32rr, CCMP32rr)
3840 FROM_TO(CMP32mi, CCMP32mi)
3841 FROM_TO(CMP32mi8, CCMP32mi8)
3842 FROM_TO(CMP32mr, CCMP32mr)
3843 FROM_TO(CMP32ri, CCMP32ri)
3844 FROM_TO(CMP32ri8, CCMP32ri8)
3845 FROM_TO(CMP32rm, CCMP32rm)
3847 FROM_TO(CMP16rr, CCMP16rr)
3848 FROM_TO(CMP16mi, CCMP16mi)
3849 FROM_TO(CMP16mi8, CCMP16mi8)
3850 FROM_TO(CMP16mr, CCMP16mr)
3851 FROM_TO(CMP16ri, CCMP16ri)
3852 FROM_TO(CMP16ri8, CCMP16ri8)
3853 FROM_TO(CMP16rm, CCMP16rm)
3855 FROM_TO(CMP8rr, CCMP8rr)
3856 FROM_TO(CMP8mi, CCMP8mi)
3857 FROM_TO(CMP8mr, CCMP8mr)
3858 FROM_TO(CMP8ri, CCMP8ri)
3859 FROM_TO(CMP8rm, CCMP8rm)
3861 FROM_TO(TEST64rr, CTEST64rr)
3862 FROM_TO(TEST64mi32, CTEST64mi32)
3863 FROM_TO(TEST64mr, CTEST64mr)
3864 FROM_TO(TEST64ri32, CTEST64ri32)
3866 FROM_TO(TEST32rr, CTEST32rr)
3867 FROM_TO(TEST32mi, CTEST32mi)
3868 FROM_TO(TEST32mr, CTEST32mr)
3869 FROM_TO(TEST32ri, CTEST32ri)
3871 FROM_TO(TEST16rr, CTEST16rr)
3872 FROM_TO(TEST16mi, CTEST16mi)
3873 FROM_TO(TEST16mr, CTEST16mr)
3874 FROM_TO(TEST16ri, CTEST16ri)
3876 FROM_TO(TEST8rr, CTEST8rr)
3877 FROM_TO(TEST8mi, CTEST8mi)
3878 FROM_TO(TEST8mr, CTEST8mr)
3879 FROM_TO(TEST8ri, CTEST8ri)
3880 #undef FROM_TO
3884 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3885 using namespace X86;
3886 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3887 unsigned Opcode = Inst.getOpcode();
3888 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3889 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3890 isVFMADDCSH(Opcode)) {
3891 MCRegister Dest = Inst.getOperand(0).getReg();
3892 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3893 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3894 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3895 "distinct from source registers");
3896 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3897 isVFMULCSH(Opcode)) {
3898 MCRegister Dest = Inst.getOperand(0).getReg();
3899 // The mask variants have different operand list. Scan from the third
3900 // operand to avoid emitting incorrect warning.
3901 // VFMULCPHZrr Dest, Src1, Src2
3902 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3903 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3904 for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1);
3905 i < Inst.getNumOperands(); i++)
3906 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3907 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3908 "distinct from source registers");
3909 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3910 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3911 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3912 MCRegister Src2 =
3913 Inst.getOperand(Inst.getNumOperands() - X86::AddrNumOperands - 1)
3914 .getReg();
3915 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3916 if (Src2Enc % 4 != 0) {
3917 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3918 unsigned GroupStart = (Src2Enc / 4) * 4;
3919 unsigned GroupEnd = GroupStart + 3;
3920 return Warning(Ops[0]->getStartLoc(),
3921 "source register '" + RegName + "' implicitly denotes '" +
3922 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3923 RegName.take_front(3) + Twine(GroupEnd) +
3924 "' source group");
3926 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3927 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3928 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3929 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3930 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3931 if (HasEVEX) {
3932 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3933 unsigned Index = MRI->getEncodingValue(
3934 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3935 if (Dest == Index)
3936 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3937 "should be distinct");
3938 } else {
3939 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3940 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3941 unsigned Index = MRI->getEncodingValue(
3942 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3943 if (Dest == Mask || Dest == Index || Mask == Index)
3944 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3945 "registers should be distinct");
3947 } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) ||
3948 isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) ||
3949 isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) {
3950 MCRegister SrcDest = Inst.getOperand(0).getReg();
3951 MCRegister Src1 = Inst.getOperand(2).getReg();
3952 MCRegister Src2 = Inst.getOperand(3).getReg();
3953 if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2)
3954 return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct");
3957 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3958 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3959 if ((TSFlags & X86II::EncodingMask) == 0) {
3960 MCRegister HReg;
3961 bool UsesRex = TSFlags & X86II::REX_W;
3962 unsigned NumOps = Inst.getNumOperands();
3963 for (unsigned i = 0; i != NumOps; ++i) {
3964 const MCOperand &MO = Inst.getOperand(i);
3965 if (!MO.isReg())
3966 continue;
3967 MCRegister Reg = MO.getReg();
3968 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3969 HReg = Reg;
3970 if (X86II::isX86_64NonExtLowByteReg(Reg) ||
3971 X86II::isX86_64ExtendedReg(Reg))
3972 UsesRex = true;
3975 if (UsesRex && HReg) {
3976 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
3977 return Error(Ops[0]->getStartLoc(),
3978 "can't encode '" + RegName + "' in an instruction requiring "
3979 "REX prefix");
3983 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
3984 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
3985 if (!MO.isReg() || MO.getReg() != X86::RIP)
3986 return Warning(
3987 Ops[0]->getStartLoc(),
3988 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
3989 : "'prefetchit1'")) +
3990 " only supports RIP-relative address");
3992 return false;
3995 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3996 Warning(Loc, "Instruction may be vulnerable to LVI and "
3997 "requires manual mitigation");
3998 Note(SMLoc(), "See https://software.intel.com/"
3999 "security-software-guidance/insights/"
4000 "deep-dive-load-value-injection#specialinstructions"
4001 " for more information");
4004 /// RET instructions and also instructions that indirect calls/jumps from memory
4005 /// combine a load and a branch within a single instruction. To mitigate these
4006 /// instructions against LVI, they must be decomposed into separate load and
4007 /// branch instructions, with an LFENCE in between. For more details, see:
4008 /// - X86LoadValueInjectionRetHardening.cpp
4009 /// - X86LoadValueInjectionIndirectThunks.cpp
4010 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4012 /// Returns `true` if a mitigation was applied or warning was emitted.
4013 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
4014 // Information on control-flow instructions that require manual mitigation can
4015 // be found here:
4016 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4017 switch (Inst.getOpcode()) {
4018 case X86::RET16:
4019 case X86::RET32:
4020 case X86::RET64:
4021 case X86::RETI16:
4022 case X86::RETI32:
4023 case X86::RETI64: {
4024 MCInst ShlInst, FenceInst;
4025 bool Parse32 = is32BitMode() || Code16GCC;
4026 MCRegister Basereg =
4027 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
4028 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
4029 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
4030 /*BaseReg=*/Basereg, /*IndexReg=*/0,
4031 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
4032 ShlInst.setOpcode(X86::SHL64mi);
4033 ShlMemOp->addMemOperands(ShlInst, 5);
4034 ShlInst.addOperand(MCOperand::createImm(0));
4035 FenceInst.setOpcode(X86::LFENCE);
4036 Out.emitInstruction(ShlInst, getSTI());
4037 Out.emitInstruction(FenceInst, getSTI());
4038 return;
4040 case X86::JMP16m:
4041 case X86::JMP32m:
4042 case X86::JMP64m:
4043 case X86::CALL16m:
4044 case X86::CALL32m:
4045 case X86::CALL64m:
4046 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4047 return;
4051 /// To mitigate LVI, every instruction that performs a load can be followed by
4052 /// an LFENCE instruction to squash any potential mis-speculation. There are
4053 /// some instructions that require additional considerations, and may requre
4054 /// manual mitigation. For more details, see:
4055 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4057 /// Returns `true` if a mitigation was applied or warning was emitted.
4058 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
4059 MCStreamer &Out) {
4060 auto Opcode = Inst.getOpcode();
4061 auto Flags = Inst.getFlags();
4062 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
4063 // Information on REP string instructions that require manual mitigation can
4064 // be found here:
4065 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4066 switch (Opcode) {
4067 case X86::CMPSB:
4068 case X86::CMPSW:
4069 case X86::CMPSL:
4070 case X86::CMPSQ:
4071 case X86::SCASB:
4072 case X86::SCASW:
4073 case X86::SCASL:
4074 case X86::SCASQ:
4075 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4076 return;
4078 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4079 // If a REP instruction is found on its own line, it may or may not be
4080 // followed by a vulnerable instruction. Emit a warning just in case.
4081 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4082 return;
4085 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4087 // Can't mitigate after terminators or calls. A control flow change may have
4088 // already occurred.
4089 if (MCID.isTerminator() || MCID.isCall())
4090 return;
4092 // LFENCE has the mayLoad property, don't double fence.
4093 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4094 MCInst FenceInst;
4095 FenceInst.setOpcode(X86::LFENCE);
4096 Out.emitInstruction(FenceInst, getSTI());
4100 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4101 MCStreamer &Out) {
4102 if (LVIInlineAsmHardening &&
4103 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4104 applyLVICFIMitigation(Inst, Out);
4106 Out.emitInstruction(Inst, getSTI());
4108 if (LVIInlineAsmHardening &&
4109 getSTI().hasFeature(X86::FeatureLVILoadHardening))
4110 applyLVILoadHardeningMitigation(Inst, Out);
4113 static unsigned getPrefixes(OperandVector &Operands) {
4114 unsigned Result = 0;
4115 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4116 if (Prefix.isPrefix()) {
4117 Result = Prefix.getPrefix();
4118 Operands.pop_back();
4120 return Result;
4123 bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4124 OperandVector &Operands,
4125 MCStreamer &Out, uint64_t &ErrorInfo,
4126 bool MatchingInlineAsm) {
4127 assert(!Operands.empty() && "Unexpect empty operand list!");
4128 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4130 // First, handle aliases that expand to multiple instructions.
4131 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4132 Out, MatchingInlineAsm);
4133 unsigned Prefixes = getPrefixes(Operands);
4135 MCInst Inst;
4137 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4138 // the encoder and printer.
4139 if (ForcedOpcodePrefix == OpcodePrefix_REX)
4140 Prefixes |= X86::IP_USE_REX;
4141 else if (ForcedOpcodePrefix == OpcodePrefix_REX2)
4142 Prefixes |= X86::IP_USE_REX2;
4143 else if (ForcedOpcodePrefix == OpcodePrefix_VEX)
4144 Prefixes |= X86::IP_USE_VEX;
4145 else if (ForcedOpcodePrefix == OpcodePrefix_VEX2)
4146 Prefixes |= X86::IP_USE_VEX2;
4147 else if (ForcedOpcodePrefix == OpcodePrefix_VEX3)
4148 Prefixes |= X86::IP_USE_VEX3;
4149 else if (ForcedOpcodePrefix == OpcodePrefix_EVEX)
4150 Prefixes |= X86::IP_USE_EVEX;
4152 // Set encoded flags for {disp8} and {disp32}.
4153 if (ForcedDispEncoding == DispEncoding_Disp8)
4154 Prefixes |= X86::IP_USE_DISP8;
4155 else if (ForcedDispEncoding == DispEncoding_Disp32)
4156 Prefixes |= X86::IP_USE_DISP32;
4158 if (Prefixes)
4159 Inst.setFlags(Prefixes);
4161 return isParsingIntelSyntax()
4162 ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out,
4163 ErrorInfo, MatchingInlineAsm)
4164 : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out,
4165 ErrorInfo, MatchingInlineAsm);
4168 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4169 OperandVector &Operands, MCStreamer &Out,
4170 bool MatchingInlineAsm) {
4171 // FIXME: This should be replaced with a real .td file alias mechanism.
4172 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4173 // call.
4174 const char *Repl = StringSwitch<const char *>(Op.getToken())
4175 .Case("finit", "fninit")
4176 .Case("fsave", "fnsave")
4177 .Case("fstcw", "fnstcw")
4178 .Case("fstcww", "fnstcw")
4179 .Case("fstenv", "fnstenv")
4180 .Case("fstsw", "fnstsw")
4181 .Case("fstsww", "fnstsw")
4182 .Case("fclex", "fnclex")
4183 .Default(nullptr);
4184 if (Repl) {
4185 MCInst Inst;
4186 Inst.setOpcode(X86::WAIT);
4187 Inst.setLoc(IDLoc);
4188 if (!MatchingInlineAsm)
4189 emitInstruction(Inst, Operands, Out);
4190 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4194 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4195 const FeatureBitset &MissingFeatures,
4196 bool MatchingInlineAsm) {
4197 assert(MissingFeatures.any() && "Unknown missing feature!");
4198 SmallString<126> Msg;
4199 raw_svector_ostream OS(Msg);
4200 OS << "instruction requires:";
4201 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4202 if (MissingFeatures[i])
4203 OS << ' ' << getSubtargetFeatureName(i);
4205 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4208 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4209 unsigned Opc = Inst.getOpcode();
4210 const MCInstrDesc &MCID = MII.get(Opc);
4211 uint64_t TSFlags = MCID.TSFlags;
4213 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
4214 return Match_Unsupported;
4215 if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc))
4216 return Match_Unsupported;
4218 switch (ForcedOpcodePrefix) {
4219 case OpcodePrefix_Default:
4220 break;
4221 case OpcodePrefix_REX:
4222 case OpcodePrefix_REX2:
4223 if (TSFlags & X86II::EncodingMask)
4224 return Match_Unsupported;
4225 break;
4226 case OpcodePrefix_VEX:
4227 case OpcodePrefix_VEX2:
4228 case OpcodePrefix_VEX3:
4229 if ((TSFlags & X86II::EncodingMask) != X86II::VEX)
4230 return Match_Unsupported;
4231 break;
4232 case OpcodePrefix_EVEX:
4233 if (is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
4234 !X86::isCMP(Opc) && !X86::isTEST(Opc))
4235 return Match_Unsupported;
4236 if (!is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX)
4237 return Match_Unsupported;
4238 break;
4241 if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix &&
4242 (ForcedOpcodePrefix != OpcodePrefix_VEX &&
4243 ForcedOpcodePrefix != OpcodePrefix_VEX2 &&
4244 ForcedOpcodePrefix != OpcodePrefix_VEX3))
4245 return Match_Unsupported;
4247 return Match_Success;
4250 bool X86AsmParser::matchAndEmitATTInstruction(
4251 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4252 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4253 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4254 SMRange EmptyRange = std::nullopt;
4255 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4256 // when matching the instruction.
4257 if (ForcedDataPrefix == X86::Is32Bit)
4258 SwitchMode(X86::Is32Bit);
4259 // First, try a direct match.
4260 FeatureBitset MissingFeatures;
4261 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4262 MissingFeatures, MatchingInlineAsm,
4263 isParsingIntelSyntax());
4264 if (ForcedDataPrefix == X86::Is32Bit) {
4265 SwitchMode(X86::Is16Bit);
4266 ForcedDataPrefix = 0;
4268 switch (OriginalError) {
4269 default: llvm_unreachable("Unexpected match result!");
4270 case Match_Success:
4271 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4272 return true;
4273 // Some instructions need post-processing to, for example, tweak which
4274 // encoding is selected. Loop on it while changes happen so the
4275 // individual transformations can chain off each other.
4276 if (!MatchingInlineAsm)
4277 while (processInstruction(Inst, Operands))
4280 Inst.setLoc(IDLoc);
4281 if (!MatchingInlineAsm)
4282 emitInstruction(Inst, Operands, Out);
4283 Opcode = Inst.getOpcode();
4284 return false;
4285 case Match_InvalidImmUnsignedi4: {
4286 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4287 if (ErrorLoc == SMLoc())
4288 ErrorLoc = IDLoc;
4289 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4290 EmptyRange, MatchingInlineAsm);
4292 case Match_MissingFeature:
4293 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4294 case Match_InvalidOperand:
4295 case Match_MnemonicFail:
4296 case Match_Unsupported:
4297 break;
4299 if (Op.getToken().empty()) {
4300 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4301 MatchingInlineAsm);
4302 return true;
4305 // FIXME: Ideally, we would only attempt suffix matches for things which are
4306 // valid prefixes, and we could just infer the right unambiguous
4307 // type. However, that requires substantially more matcher support than the
4308 // following hack.
4310 // Change the operand to point to a temporary token.
4311 StringRef Base = Op.getToken();
4312 SmallString<16> Tmp;
4313 Tmp += Base;
4314 Tmp += ' ';
4315 Op.setTokenValue(Tmp);
4317 // If this instruction starts with an 'f', then it is a floating point stack
4318 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4319 // 80-bit floating point, which use the suffixes s,l,t respectively.
4321 // Otherwise, we assume that this may be an integer instruction, which comes
4322 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4323 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4324 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4325 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4327 // Check for the various suffix matches.
4328 uint64_t ErrorInfoIgnore;
4329 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4330 unsigned Match[4];
4332 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4333 // So we should make sure the suffix matcher only works for memory variant
4334 // that has the same size with the suffix.
4335 // FIXME: This flag is a workaround for legacy instructions that didn't
4336 // declare non suffix variant assembly.
4337 bool HasVectorReg = false;
4338 X86Operand *MemOp = nullptr;
4339 for (const auto &Op : Operands) {
4340 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4341 if (X86Op->isVectorReg())
4342 HasVectorReg = true;
4343 else if (X86Op->isMem()) {
4344 MemOp = X86Op;
4345 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4346 // Have we found an unqualified memory operand,
4347 // break. IA allows only one memory operand.
4348 break;
4352 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4353 Tmp.back() = Suffixes[I];
4354 if (MemOp && HasVectorReg)
4355 MemOp->Mem.Size = MemSize[I];
4356 Match[I] = Match_MnemonicFail;
4357 if (MemOp || !HasVectorReg) {
4358 Match[I] =
4359 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4360 MatchingInlineAsm, isParsingIntelSyntax());
4361 // If this returned as a missing feature failure, remember that.
4362 if (Match[I] == Match_MissingFeature)
4363 ErrorInfoMissingFeatures = MissingFeatures;
4367 // Restore the old token.
4368 Op.setTokenValue(Base);
4370 // If exactly one matched, then we treat that as a successful match (and the
4371 // instruction will already have been filled in correctly, since the failing
4372 // matches won't have modified it).
4373 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4374 if (NumSuccessfulMatches == 1) {
4375 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4376 return true;
4377 // Some instructions need post-processing to, for example, tweak which
4378 // encoding is selected. Loop on it while changes happen so the
4379 // individual transformations can chain off each other.
4380 if (!MatchingInlineAsm)
4381 while (processInstruction(Inst, Operands))
4384 Inst.setLoc(IDLoc);
4385 if (!MatchingInlineAsm)
4386 emitInstruction(Inst, Operands, Out);
4387 Opcode = Inst.getOpcode();
4388 return false;
4391 // Otherwise, the match failed, try to produce a decent error message.
4393 // If we had multiple suffix matches, then identify this as an ambiguous
4394 // match.
4395 if (NumSuccessfulMatches > 1) {
4396 char MatchChars[4];
4397 unsigned NumMatches = 0;
4398 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4399 if (Match[I] == Match_Success)
4400 MatchChars[NumMatches++] = Suffixes[I];
4402 SmallString<126> Msg;
4403 raw_svector_ostream OS(Msg);
4404 OS << "ambiguous instructions require an explicit suffix (could be ";
4405 for (unsigned i = 0; i != NumMatches; ++i) {
4406 if (i != 0)
4407 OS << ", ";
4408 if (i + 1 == NumMatches)
4409 OS << "or ";
4410 OS << "'" << Base << MatchChars[i] << "'";
4412 OS << ")";
4413 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4414 return true;
4417 // Okay, we know that none of the variants matched successfully.
4419 // If all of the instructions reported an invalid mnemonic, then the original
4420 // mnemonic was invalid.
4421 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4422 if (OriginalError == Match_MnemonicFail)
4423 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4424 Op.getLocRange(), MatchingInlineAsm);
4426 if (OriginalError == Match_Unsupported)
4427 return Error(IDLoc, "unsupported instruction", EmptyRange,
4428 MatchingInlineAsm);
4430 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4431 // Recover location info for the operand if we know which was the problem.
4432 if (ErrorInfo != ~0ULL) {
4433 if (ErrorInfo >= Operands.size())
4434 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4435 MatchingInlineAsm);
4437 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4438 if (Operand.getStartLoc().isValid()) {
4439 SMRange OperandRange = Operand.getLocRange();
4440 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4441 OperandRange, MatchingInlineAsm);
4445 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4446 MatchingInlineAsm);
4449 // If one instruction matched as unsupported, report this as unsupported.
4450 if (llvm::count(Match, Match_Unsupported) == 1) {
4451 return Error(IDLoc, "unsupported instruction", EmptyRange,
4452 MatchingInlineAsm);
4455 // If one instruction matched with a missing feature, report this as a
4456 // missing feature.
4457 if (llvm::count(Match, Match_MissingFeature) == 1) {
4458 ErrorInfo = Match_MissingFeature;
4459 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4460 MatchingInlineAsm);
4463 // If one instruction matched with an invalid operand, report this as an
4464 // operand failure.
4465 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4466 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4467 MatchingInlineAsm);
4470 // If all of these were an outright failure, report it in a useless way.
4471 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4472 EmptyRange, MatchingInlineAsm);
4473 return true;
4476 bool X86AsmParser::matchAndEmitIntelInstruction(
4477 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4478 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4479 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4480 SMRange EmptyRange = std::nullopt;
4481 // Find one unsized memory operand, if present.
4482 X86Operand *UnsizedMemOp = nullptr;
4483 for (const auto &Op : Operands) {
4484 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4485 if (X86Op->isMemUnsized()) {
4486 UnsizedMemOp = X86Op;
4487 // Have we found an unqualified memory operand,
4488 // break. IA allows only one memory operand.
4489 break;
4493 // Allow some instructions to have implicitly pointer-sized operands. This is
4494 // compatible with gas.
4495 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4496 if (UnsizedMemOp) {
4497 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push", "pop"};
4498 for (const char *Instr : PtrSizedInstrs) {
4499 if (Mnemonic == Instr) {
4500 UnsizedMemOp->Mem.Size = getPointerWidth();
4501 break;
4506 SmallVector<unsigned, 8> Match;
4507 FeatureBitset ErrorInfoMissingFeatures;
4508 FeatureBitset MissingFeatures;
4509 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4511 // If unsized push has immediate operand we should default the default pointer
4512 // size for the size.
4513 if (Mnemonic == "push" && Operands.size() == 2) {
4514 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4515 if (X86Op->isImm()) {
4516 // If it's not a constant fall through and let remainder take care of it.
4517 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4518 unsigned Size = getPointerWidth();
4519 if (CE &&
4520 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4521 SmallString<16> Tmp;
4522 Tmp += Base;
4523 Tmp += (is64BitMode())
4524 ? "q"
4525 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4526 Op.setTokenValue(Tmp);
4527 // Do match in ATT mode to allow explicit suffix usage.
4528 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4529 MissingFeatures, MatchingInlineAsm,
4530 false /*isParsingIntelSyntax()*/));
4531 Op.setTokenValue(Base);
4536 // If an unsized memory operand is present, try to match with each memory
4537 // operand size. In Intel assembly, the size is not part of the instruction
4538 // mnemonic.
4539 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4540 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4541 for (unsigned Size : MopSizes) {
4542 UnsizedMemOp->Mem.Size = Size;
4543 uint64_t ErrorInfoIgnore;
4544 unsigned LastOpcode = Inst.getOpcode();
4545 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4546 MissingFeatures, MatchingInlineAsm,
4547 isParsingIntelSyntax());
4548 if (Match.empty() || LastOpcode != Inst.getOpcode())
4549 Match.push_back(M);
4551 // If this returned as a missing feature failure, remember that.
4552 if (Match.back() == Match_MissingFeature)
4553 ErrorInfoMissingFeatures = MissingFeatures;
4556 // Restore the size of the unsized memory operand if we modified it.
4557 UnsizedMemOp->Mem.Size = 0;
4560 // If we haven't matched anything yet, this is not a basic integer or FPU
4561 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4562 // matching with the unsized operand.
4563 if (Match.empty()) {
4564 Match.push_back(MatchInstruction(
4565 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4566 isParsingIntelSyntax()));
4567 // If this returned as a missing feature failure, remember that.
4568 if (Match.back() == Match_MissingFeature)
4569 ErrorInfoMissingFeatures = MissingFeatures;
4572 // Restore the size of the unsized memory operand if we modified it.
4573 if (UnsizedMemOp)
4574 UnsizedMemOp->Mem.Size = 0;
4576 // If it's a bad mnemonic, all results will be the same.
4577 if (Match.back() == Match_MnemonicFail) {
4578 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4579 Op.getLocRange(), MatchingInlineAsm);
4582 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4584 // If matching was ambiguous and we had size information from the frontend,
4585 // try again with that. This handles cases like "movxz eax, m8/m16".
4586 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4587 UnsizedMemOp->getMemFrontendSize()) {
4588 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4589 unsigned M = MatchInstruction(
4590 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4591 isParsingIntelSyntax());
4592 if (M == Match_Success)
4593 NumSuccessfulMatches = 1;
4595 // Add a rewrite that encodes the size information we used from the
4596 // frontend.
4597 InstInfo->AsmRewrites->emplace_back(
4598 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4599 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4602 // If exactly one matched, then we treat that as a successful match (and the
4603 // instruction will already have been filled in correctly, since the failing
4604 // matches won't have modified it).
4605 if (NumSuccessfulMatches == 1) {
4606 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4607 return true;
4608 // Some instructions need post-processing to, for example, tweak which
4609 // encoding is selected. Loop on it while changes happen so the individual
4610 // transformations can chain off each other.
4611 if (!MatchingInlineAsm)
4612 while (processInstruction(Inst, Operands))
4614 Inst.setLoc(IDLoc);
4615 if (!MatchingInlineAsm)
4616 emitInstruction(Inst, Operands, Out);
4617 Opcode = Inst.getOpcode();
4618 return false;
4619 } else if (NumSuccessfulMatches > 1) {
4620 assert(UnsizedMemOp &&
4621 "multiple matches only possible with unsized memory operands");
4622 return Error(UnsizedMemOp->getStartLoc(),
4623 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4624 UnsizedMemOp->getLocRange());
4627 // If one instruction matched as unsupported, report this as unsupported.
4628 if (llvm::count(Match, Match_Unsupported) == 1) {
4629 return Error(IDLoc, "unsupported instruction", EmptyRange,
4630 MatchingInlineAsm);
4633 // If one instruction matched with a missing feature, report this as a
4634 // missing feature.
4635 if (llvm::count(Match, Match_MissingFeature) == 1) {
4636 ErrorInfo = Match_MissingFeature;
4637 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4638 MatchingInlineAsm);
4641 // If one instruction matched with an invalid operand, report this as an
4642 // operand failure.
4643 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4644 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4645 MatchingInlineAsm);
4648 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4649 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4650 if (ErrorLoc == SMLoc())
4651 ErrorLoc = IDLoc;
4652 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4653 EmptyRange, MatchingInlineAsm);
4656 // If all of these were an outright failure, report it in a useless way.
4657 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4658 MatchingInlineAsm);
4661 bool X86AsmParser::omitRegisterFromClobberLists(MCRegister Reg) {
4662 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg);
4665 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4666 MCAsmParser &Parser = getParser();
4667 StringRef IDVal = DirectiveID.getIdentifier();
4668 if (IDVal.starts_with(".arch"))
4669 return parseDirectiveArch();
4670 if (IDVal.starts_with(".code"))
4671 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4672 else if (IDVal.starts_with(".att_syntax")) {
4673 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4674 if (Parser.getTok().getString() == "prefix")
4675 Parser.Lex();
4676 else if (Parser.getTok().getString() == "noprefix")
4677 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4678 "supported: registers must have a "
4679 "'%' prefix in .att_syntax");
4681 getParser().setAssemblerDialect(0);
4682 return false;
4683 } else if (IDVal.starts_with(".intel_syntax")) {
4684 getParser().setAssemblerDialect(1);
4685 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4686 if (Parser.getTok().getString() == "noprefix")
4687 Parser.Lex();
4688 else if (Parser.getTok().getString() == "prefix")
4689 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4690 "supported: registers must not have "
4691 "a '%' prefix in .intel_syntax");
4693 return false;
4694 } else if (IDVal == ".nops")
4695 return parseDirectiveNops(DirectiveID.getLoc());
4696 else if (IDVal == ".even")
4697 return parseDirectiveEven(DirectiveID.getLoc());
4698 else if (IDVal == ".cv_fpo_proc")
4699 return parseDirectiveFPOProc(DirectiveID.getLoc());
4700 else if (IDVal == ".cv_fpo_setframe")
4701 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4702 else if (IDVal == ".cv_fpo_pushreg")
4703 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4704 else if (IDVal == ".cv_fpo_stackalloc")
4705 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4706 else if (IDVal == ".cv_fpo_stackalign")
4707 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4708 else if (IDVal == ".cv_fpo_endprologue")
4709 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4710 else if (IDVal == ".cv_fpo_endproc")
4711 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4712 else if (IDVal == ".seh_pushreg" ||
4713 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4714 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4715 else if (IDVal == ".seh_setframe" ||
4716 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4717 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4718 else if (IDVal == ".seh_savereg" ||
4719 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4720 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4721 else if (IDVal == ".seh_savexmm" ||
4722 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4723 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4724 else if (IDVal == ".seh_pushframe" ||
4725 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4726 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4728 return true;
4731 bool X86AsmParser::parseDirectiveArch() {
4732 // Ignore .arch for now.
4733 getParser().parseStringToEndOfStatement();
4734 return false;
4737 /// parseDirectiveNops
4738 /// ::= .nops size[, control]
4739 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4740 int64_t NumBytes = 0, Control = 0;
4741 SMLoc NumBytesLoc, ControlLoc;
4742 const MCSubtargetInfo& STI = getSTI();
4743 NumBytesLoc = getTok().getLoc();
4744 if (getParser().checkForValidSection() ||
4745 getParser().parseAbsoluteExpression(NumBytes))
4746 return true;
4748 if (parseOptionalToken(AsmToken::Comma)) {
4749 ControlLoc = getTok().getLoc();
4750 if (getParser().parseAbsoluteExpression(Control))
4751 return true;
4753 if (getParser().parseEOL())
4754 return true;
4756 if (NumBytes <= 0) {
4757 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4758 return false;
4761 if (Control < 0) {
4762 Error(ControlLoc, "'.nops' directive with negative NOP size");
4763 return false;
4766 /// Emit nops
4767 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4769 return false;
4772 /// parseDirectiveEven
4773 /// ::= .even
4774 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4775 if (parseEOL())
4776 return false;
4778 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4779 if (!Section) {
4780 getStreamer().initSections(false, getSTI());
4781 Section = getStreamer().getCurrentSectionOnly();
4783 if (Section->useCodeAlign())
4784 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4785 else
4786 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4787 return false;
4790 /// ParseDirectiveCode
4791 /// ::= .code16 | .code32 | .code64
4792 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4793 MCAsmParser &Parser = getParser();
4794 Code16GCC = false;
4795 if (IDVal == ".code16") {
4796 Parser.Lex();
4797 if (!is16BitMode()) {
4798 SwitchMode(X86::Is16Bit);
4799 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4801 } else if (IDVal == ".code16gcc") {
4802 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4803 Parser.Lex();
4804 Code16GCC = true;
4805 if (!is16BitMode()) {
4806 SwitchMode(X86::Is16Bit);
4807 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4809 } else if (IDVal == ".code32") {
4810 Parser.Lex();
4811 if (!is32BitMode()) {
4812 SwitchMode(X86::Is32Bit);
4813 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4815 } else if (IDVal == ".code64") {
4816 Parser.Lex();
4817 if (!is64BitMode()) {
4818 SwitchMode(X86::Is64Bit);
4819 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4821 } else {
4822 Error(L, "unknown directive " + IDVal);
4823 return false;
4826 return false;
4829 // .cv_fpo_proc foo
4830 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4831 MCAsmParser &Parser = getParser();
4832 StringRef ProcName;
4833 int64_t ParamsSize;
4834 if (Parser.parseIdentifier(ProcName))
4835 return Parser.TokError("expected symbol name");
4836 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4837 return true;
4838 if (!isUIntN(32, ParamsSize))
4839 return Parser.TokError("parameters size out of range");
4840 if (parseEOL())
4841 return true;
4842 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4843 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4846 // .cv_fpo_setframe ebp
4847 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4848 MCRegister Reg;
4849 SMLoc DummyLoc;
4850 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4851 return true;
4852 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4855 // .cv_fpo_pushreg ebx
4856 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4857 MCRegister Reg;
4858 SMLoc DummyLoc;
4859 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4860 return true;
4861 return getTargetStreamer().emitFPOPushReg(Reg, L);
4864 // .cv_fpo_stackalloc 20
4865 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4866 MCAsmParser &Parser = getParser();
4867 int64_t Offset;
4868 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4869 return true;
4870 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4873 // .cv_fpo_stackalign 8
4874 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4875 MCAsmParser &Parser = getParser();
4876 int64_t Offset;
4877 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4878 return true;
4879 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4882 // .cv_fpo_endprologue
4883 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4884 MCAsmParser &Parser = getParser();
4885 if (Parser.parseEOL())
4886 return true;
4887 return getTargetStreamer().emitFPOEndPrologue(L);
4890 // .cv_fpo_endproc
4891 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4892 MCAsmParser &Parser = getParser();
4893 if (Parser.parseEOL())
4894 return true;
4895 return getTargetStreamer().emitFPOEndProc(L);
4898 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4899 MCRegister &RegNo) {
4900 SMLoc startLoc = getLexer().getLoc();
4901 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4903 // Try parsing the argument as a register first.
4904 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4905 SMLoc endLoc;
4906 if (parseRegister(RegNo, startLoc, endLoc))
4907 return true;
4909 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4910 return Error(startLoc,
4911 "register is not supported for use with this directive");
4913 } else {
4914 // Otherwise, an integer number matching the encoding of the desired
4915 // register may appear.
4916 int64_t EncodedReg;
4917 if (getParser().parseAbsoluteExpression(EncodedReg))
4918 return true;
4920 // The SEH register number is the same as the encoding register number. Map
4921 // from the encoding back to the LLVM register number.
4922 RegNo = MCRegister();
4923 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4924 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4925 RegNo = Reg;
4926 break;
4929 if (!RegNo) {
4930 return Error(startLoc,
4931 "incorrect register number for use with this directive");
4935 return false;
4938 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4939 MCRegister Reg;
4940 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4941 return true;
4943 if (getLexer().isNot(AsmToken::EndOfStatement))
4944 return TokError("expected end of directive");
4946 getParser().Lex();
4947 getStreamer().emitWinCFIPushReg(Reg, Loc);
4948 return false;
4951 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4952 MCRegister Reg;
4953 int64_t Off;
4954 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4955 return true;
4956 if (getLexer().isNot(AsmToken::Comma))
4957 return TokError("you must specify a stack pointer offset");
4959 getParser().Lex();
4960 if (getParser().parseAbsoluteExpression(Off))
4961 return true;
4963 if (getLexer().isNot(AsmToken::EndOfStatement))
4964 return TokError("expected end of directive");
4966 getParser().Lex();
4967 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4968 return false;
4971 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4972 MCRegister Reg;
4973 int64_t Off;
4974 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4975 return true;
4976 if (getLexer().isNot(AsmToken::Comma))
4977 return TokError("you must specify an offset on the stack");
4979 getParser().Lex();
4980 if (getParser().parseAbsoluteExpression(Off))
4981 return true;
4983 if (getLexer().isNot(AsmToken::EndOfStatement))
4984 return TokError("expected end of directive");
4986 getParser().Lex();
4987 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4988 return false;
4991 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4992 MCRegister Reg;
4993 int64_t Off;
4994 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4995 return true;
4996 if (getLexer().isNot(AsmToken::Comma))
4997 return TokError("you must specify an offset on the stack");
4999 getParser().Lex();
5000 if (getParser().parseAbsoluteExpression(Off))
5001 return true;
5003 if (getLexer().isNot(AsmToken::EndOfStatement))
5004 return TokError("expected end of directive");
5006 getParser().Lex();
5007 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
5008 return false;
5011 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
5012 bool Code = false;
5013 StringRef CodeID;
5014 if (getLexer().is(AsmToken::At)) {
5015 SMLoc startLoc = getLexer().getLoc();
5016 getParser().Lex();
5017 if (!getParser().parseIdentifier(CodeID)) {
5018 if (CodeID != "code")
5019 return Error(startLoc, "expected @code");
5020 Code = true;
5024 if (getLexer().isNot(AsmToken::EndOfStatement))
5025 return TokError("expected end of directive");
5027 getParser().Lex();
5028 getStreamer().emitWinCFIPushFrame(Code, Loc);
5029 return false;
5032 // Force static initialization.
5033 extern "C" LLVM_C_ABI void LLVMInitializeX86AsmParser() {
5034 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
5035 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
5038 #define GET_MATCHER_IMPLEMENTATION
5039 #include "X86GenAsmMatcher.inc"