[InstCombine] Signed saturation patterns
[llvm-complete.git] / lib / Target / X86 / AsmParser / X86AsmParser.cpp
blob25be79ec2b1edf377f753620aa2b0a50a5a4ae3f
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86MCExpr.h"
12 #include "MCTargetDesc/X86TargetStreamer.h"
13 #include "TargetInfo/X86TargetInfo.h"
14 #include "X86AsmParserCommon.h"
15 #include "X86Operand.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSection.h"
31 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/Support/SourceMgr.h"
35 #include "llvm/Support/TargetRegistry.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <memory>
40 using namespace llvm;
42 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
43 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
44 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
45 return true;
47 return false;
50 namespace {
52 static const char OpPrecedence[] = {
53 0, // IC_OR
54 1, // IC_XOR
55 2, // IC_AND
56 3, // IC_LSHIFT
57 3, // IC_RSHIFT
58 4, // IC_PLUS
59 4, // IC_MINUS
60 5, // IC_MULTIPLY
61 5, // IC_DIVIDE
62 5, // IC_MOD
63 6, // IC_NOT
64 7, // IC_NEG
65 8, // IC_RPAREN
66 9, // IC_LPAREN
67 0, // IC_IMM
68 0 // IC_REGISTER
71 class X86AsmParser : public MCTargetAsmParser {
72 ParseInstructionInfo *InstInfo;
73 bool Code16GCC;
75 enum VEXEncoding {
76 VEXEncoding_Default,
77 VEXEncoding_VEX2,
78 VEXEncoding_VEX3,
79 VEXEncoding_EVEX,
82 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
84 private:
85 SMLoc consumeToken() {
86 MCAsmParser &Parser = getParser();
87 SMLoc Result = Parser.getTok().getLoc();
88 Parser.Lex();
89 return Result;
92 X86TargetStreamer &getTargetStreamer() {
93 assert(getParser().getStreamer().getTargetStreamer() &&
94 "do not have a target streamer");
95 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
96 return static_cast<X86TargetStreamer &>(TS);
99 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
100 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
101 bool matchingInlineAsm, unsigned VariantID = 0) {
102 // In Code16GCC mode, match as 32-bit.
103 if (Code16GCC)
104 SwitchMode(X86::Mode32Bit);
105 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
106 MissingFeatures, matchingInlineAsm,
107 VariantID);
108 if (Code16GCC)
109 SwitchMode(X86::Mode16Bit);
110 return rv;
113 enum InfixCalculatorTok {
114 IC_OR = 0,
115 IC_XOR,
116 IC_AND,
117 IC_LSHIFT,
118 IC_RSHIFT,
119 IC_PLUS,
120 IC_MINUS,
121 IC_MULTIPLY,
122 IC_DIVIDE,
123 IC_MOD,
124 IC_NOT,
125 IC_NEG,
126 IC_RPAREN,
127 IC_LPAREN,
128 IC_IMM,
129 IC_REGISTER
132 enum IntelOperatorKind {
133 IOK_INVALID = 0,
134 IOK_LENGTH,
135 IOK_SIZE,
136 IOK_TYPE,
137 IOK_OFFSET
140 class InfixCalculator {
141 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
142 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
143 SmallVector<ICToken, 4> PostfixStack;
145 bool isUnaryOperator(const InfixCalculatorTok Op) {
146 return Op == IC_NEG || Op == IC_NOT;
149 public:
150 int64_t popOperand() {
151 assert (!PostfixStack.empty() && "Poped an empty stack!");
152 ICToken Op = PostfixStack.pop_back_val();
153 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
154 return -1; // The invalid Scale value will be caught later by checkScale
155 return Op.second;
157 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
158 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
159 "Unexpected operand!");
160 PostfixStack.push_back(std::make_pair(Op, Val));
163 void popOperator() { InfixOperatorStack.pop_back(); }
164 void pushOperator(InfixCalculatorTok Op) {
165 // Push the new operator if the stack is empty.
166 if (InfixOperatorStack.empty()) {
167 InfixOperatorStack.push_back(Op);
168 return;
171 // Push the new operator if it has a higher precedence than the operator
172 // on the top of the stack or the operator on the top of the stack is a
173 // left parentheses.
174 unsigned Idx = InfixOperatorStack.size() - 1;
175 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
176 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
177 InfixOperatorStack.push_back(Op);
178 return;
181 // The operator on the top of the stack has higher precedence than the
182 // new operator.
183 unsigned ParenCount = 0;
184 while (1) {
185 // Nothing to process.
186 if (InfixOperatorStack.empty())
187 break;
189 Idx = InfixOperatorStack.size() - 1;
190 StackOp = InfixOperatorStack[Idx];
191 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
192 break;
194 // If we have an even parentheses count and we see a left parentheses,
195 // then stop processing.
196 if (!ParenCount && StackOp == IC_LPAREN)
197 break;
199 if (StackOp == IC_RPAREN) {
200 ++ParenCount;
201 InfixOperatorStack.pop_back();
202 } else if (StackOp == IC_LPAREN) {
203 --ParenCount;
204 InfixOperatorStack.pop_back();
205 } else {
206 InfixOperatorStack.pop_back();
207 PostfixStack.push_back(std::make_pair(StackOp, 0));
210 // Push the new operator.
211 InfixOperatorStack.push_back(Op);
214 int64_t execute() {
215 // Push any remaining operators onto the postfix stack.
216 while (!InfixOperatorStack.empty()) {
217 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
218 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
219 PostfixStack.push_back(std::make_pair(StackOp, 0));
222 if (PostfixStack.empty())
223 return 0;
225 SmallVector<ICToken, 16> OperandStack;
226 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
227 ICToken Op = PostfixStack[i];
228 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
229 OperandStack.push_back(Op);
230 } else if (isUnaryOperator(Op.first)) {
231 assert (OperandStack.size() > 0 && "Too few operands.");
232 ICToken Operand = OperandStack.pop_back_val();
233 assert (Operand.first == IC_IMM &&
234 "Unary operation with a register!");
235 switch (Op.first) {
236 default:
237 report_fatal_error("Unexpected operator!");
238 break;
239 case IC_NEG:
240 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
241 break;
242 case IC_NOT:
243 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
244 break;
246 } else {
247 assert (OperandStack.size() > 1 && "Too few operands.");
248 int64_t Val;
249 ICToken Op2 = OperandStack.pop_back_val();
250 ICToken Op1 = OperandStack.pop_back_val();
251 switch (Op.first) {
252 default:
253 report_fatal_error("Unexpected operator!");
254 break;
255 case IC_PLUS:
256 Val = Op1.second + Op2.second;
257 OperandStack.push_back(std::make_pair(IC_IMM, Val));
258 break;
259 case IC_MINUS:
260 Val = Op1.second - Op2.second;
261 OperandStack.push_back(std::make_pair(IC_IMM, Val));
262 break;
263 case IC_MULTIPLY:
264 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
265 "Multiply operation with an immediate and a register!");
266 Val = Op1.second * Op2.second;
267 OperandStack.push_back(std::make_pair(IC_IMM, Val));
268 break;
269 case IC_DIVIDE:
270 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
271 "Divide operation with an immediate and a register!");
272 assert (Op2.second != 0 && "Division by zero!");
273 Val = Op1.second / Op2.second;
274 OperandStack.push_back(std::make_pair(IC_IMM, Val));
275 break;
276 case IC_MOD:
277 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
278 "Modulo operation with an immediate and a register!");
279 Val = Op1.second % Op2.second;
280 OperandStack.push_back(std::make_pair(IC_IMM, Val));
281 break;
282 case IC_OR:
283 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
284 "Or operation with an immediate and a register!");
285 Val = Op1.second | Op2.second;
286 OperandStack.push_back(std::make_pair(IC_IMM, Val));
287 break;
288 case IC_XOR:
289 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
290 "Xor operation with an immediate and a register!");
291 Val = Op1.second ^ Op2.second;
292 OperandStack.push_back(std::make_pair(IC_IMM, Val));
293 break;
294 case IC_AND:
295 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
296 "And operation with an immediate and a register!");
297 Val = Op1.second & Op2.second;
298 OperandStack.push_back(std::make_pair(IC_IMM, Val));
299 break;
300 case IC_LSHIFT:
301 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
302 "Left shift operation with an immediate and a register!");
303 Val = Op1.second << Op2.second;
304 OperandStack.push_back(std::make_pair(IC_IMM, Val));
305 break;
306 case IC_RSHIFT:
307 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
308 "Right shift operation with an immediate and a register!");
309 Val = Op1.second >> Op2.second;
310 OperandStack.push_back(std::make_pair(IC_IMM, Val));
311 break;
315 assert (OperandStack.size() == 1 && "Expected a single result.");
316 return OperandStack.pop_back_val().second;
320 enum IntelExprState {
321 IES_INIT,
322 IES_OR,
323 IES_XOR,
324 IES_AND,
325 IES_LSHIFT,
326 IES_RSHIFT,
327 IES_PLUS,
328 IES_MINUS,
329 IES_NOT,
330 IES_MULTIPLY,
331 IES_DIVIDE,
332 IES_MOD,
333 IES_LBRAC,
334 IES_RBRAC,
335 IES_LPAREN,
336 IES_RPAREN,
337 IES_REGISTER,
338 IES_INTEGER,
339 IES_IDENTIFIER,
340 IES_ERROR
343 class IntelExprStateMachine {
344 IntelExprState State, PrevState;
345 unsigned BaseReg, IndexReg, TmpReg, Scale;
346 int64_t Imm;
347 const MCExpr *Sym;
348 StringRef SymName;
349 InfixCalculator IC;
350 InlineAsmIdentifierInfo Info;
351 short BracCount;
352 bool MemExpr;
354 public:
355 IntelExprStateMachine()
356 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
357 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
358 MemExpr(false) {}
360 void addImm(int64_t imm) { Imm += imm; }
361 short getBracCount() { return BracCount; }
362 bool isMemExpr() { return MemExpr; }
363 unsigned getBaseReg() { return BaseReg; }
364 unsigned getIndexReg() { return IndexReg; }
365 unsigned getScale() { return Scale; }
366 const MCExpr *getSym() { return Sym; }
367 StringRef getSymName() { return SymName; }
368 int64_t getImm() { return Imm + IC.execute(); }
369 bool isValidEndState() {
370 return State == IES_RBRAC || State == IES_INTEGER;
372 bool hadError() { return State == IES_ERROR; }
373 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
375 void onOr() {
376 IntelExprState CurrState = State;
377 switch (State) {
378 default:
379 State = IES_ERROR;
380 break;
381 case IES_INTEGER:
382 case IES_RPAREN:
383 case IES_REGISTER:
384 State = IES_OR;
385 IC.pushOperator(IC_OR);
386 break;
388 PrevState = CurrState;
390 void onXor() {
391 IntelExprState CurrState = State;
392 switch (State) {
393 default:
394 State = IES_ERROR;
395 break;
396 case IES_INTEGER:
397 case IES_RPAREN:
398 case IES_REGISTER:
399 State = IES_XOR;
400 IC.pushOperator(IC_XOR);
401 break;
403 PrevState = CurrState;
405 void onAnd() {
406 IntelExprState CurrState = State;
407 switch (State) {
408 default:
409 State = IES_ERROR;
410 break;
411 case IES_INTEGER:
412 case IES_RPAREN:
413 case IES_REGISTER:
414 State = IES_AND;
415 IC.pushOperator(IC_AND);
416 break;
418 PrevState = CurrState;
420 void onLShift() {
421 IntelExprState CurrState = State;
422 switch (State) {
423 default:
424 State = IES_ERROR;
425 break;
426 case IES_INTEGER:
427 case IES_RPAREN:
428 case IES_REGISTER:
429 State = IES_LSHIFT;
430 IC.pushOperator(IC_LSHIFT);
431 break;
433 PrevState = CurrState;
435 void onRShift() {
436 IntelExprState CurrState = State;
437 switch (State) {
438 default:
439 State = IES_ERROR;
440 break;
441 case IES_INTEGER:
442 case IES_RPAREN:
443 case IES_REGISTER:
444 State = IES_RSHIFT;
445 IC.pushOperator(IC_RSHIFT);
446 break;
448 PrevState = CurrState;
450 bool onPlus(StringRef &ErrMsg) {
451 IntelExprState CurrState = State;
452 switch (State) {
453 default:
454 State = IES_ERROR;
455 break;
456 case IES_INTEGER:
457 case IES_RPAREN:
458 case IES_REGISTER:
459 State = IES_PLUS;
460 IC.pushOperator(IC_PLUS);
461 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
462 // If we already have a BaseReg, then assume this is the IndexReg with
463 // no explicit scale.
464 if (!BaseReg) {
465 BaseReg = TmpReg;
466 } else {
467 if (IndexReg) {
468 ErrMsg = "BaseReg/IndexReg already set!";
469 return true;
471 IndexReg = TmpReg;
472 Scale = 0;
475 break;
477 PrevState = CurrState;
478 return false;
480 bool onMinus(StringRef &ErrMsg) {
481 IntelExprState CurrState = State;
482 switch (State) {
483 default:
484 State = IES_ERROR;
485 break;
486 case IES_OR:
487 case IES_XOR:
488 case IES_AND:
489 case IES_LSHIFT:
490 case IES_RSHIFT:
491 case IES_PLUS:
492 case IES_NOT:
493 case IES_MULTIPLY:
494 case IES_DIVIDE:
495 case IES_MOD:
496 case IES_LPAREN:
497 case IES_RPAREN:
498 case IES_LBRAC:
499 case IES_RBRAC:
500 case IES_INTEGER:
501 case IES_REGISTER:
502 case IES_INIT:
503 State = IES_MINUS;
504 // push minus operator if it is not a negate operator
505 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
506 CurrState == IES_INTEGER || CurrState == IES_RBRAC)
507 IC.pushOperator(IC_MINUS);
508 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
509 // We have negate operator for Scale: it's illegal
510 ErrMsg = "Scale can't be negative";
511 return true;
512 } else
513 IC.pushOperator(IC_NEG);
514 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
515 // If we already have a BaseReg, then assume this is the IndexReg with
516 // no explicit scale.
517 if (!BaseReg) {
518 BaseReg = TmpReg;
519 } else {
520 if (IndexReg) {
521 ErrMsg = "BaseReg/IndexReg already set!";
522 return true;
524 IndexReg = TmpReg;
525 Scale = 0;
528 break;
530 PrevState = CurrState;
531 return false;
533 void onNot() {
534 IntelExprState CurrState = State;
535 switch (State) {
536 default:
537 State = IES_ERROR;
538 break;
539 case IES_OR:
540 case IES_XOR:
541 case IES_AND:
542 case IES_LSHIFT:
543 case IES_RSHIFT:
544 case IES_PLUS:
545 case IES_MINUS:
546 case IES_NOT:
547 case IES_MULTIPLY:
548 case IES_DIVIDE:
549 case IES_MOD:
550 case IES_LPAREN:
551 case IES_LBRAC:
552 case IES_INIT:
553 State = IES_NOT;
554 IC.pushOperator(IC_NOT);
555 break;
557 PrevState = CurrState;
560 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
561 IntelExprState CurrState = State;
562 switch (State) {
563 default:
564 State = IES_ERROR;
565 break;
566 case IES_PLUS:
567 case IES_LPAREN:
568 case IES_LBRAC:
569 State = IES_REGISTER;
570 TmpReg = Reg;
571 IC.pushOperand(IC_REGISTER);
572 break;
573 case IES_MULTIPLY:
574 // Index Register - Scale * Register
575 if (PrevState == IES_INTEGER) {
576 if (IndexReg) {
577 ErrMsg = "BaseReg/IndexReg already set!";
578 return true;
580 State = IES_REGISTER;
581 IndexReg = Reg;
582 // Get the scale and replace the 'Scale * Register' with '0'.
583 Scale = IC.popOperand();
584 if (checkScale(Scale, ErrMsg))
585 return true;
586 IC.pushOperand(IC_IMM);
587 IC.popOperator();
588 } else {
589 State = IES_ERROR;
591 break;
593 PrevState = CurrState;
594 return false;
596 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
597 const InlineAsmIdentifierInfo &IDInfo,
598 bool ParsingInlineAsm, StringRef &ErrMsg) {
599 // InlineAsm: Treat an enum value as an integer
600 if (ParsingInlineAsm)
601 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
602 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
603 // Treat a symbolic constant like an integer
604 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
605 return onInteger(CE->getValue(), ErrMsg);
606 PrevState = State;
607 bool HasSymbol = Sym != nullptr;
608 switch (State) {
609 default:
610 State = IES_ERROR;
611 break;
612 case IES_PLUS:
613 case IES_MINUS:
614 case IES_NOT:
615 case IES_INIT:
616 case IES_LBRAC:
617 MemExpr = true;
618 State = IES_INTEGER;
619 Sym = SymRef;
620 SymName = SymRefName;
621 IC.pushOperand(IC_IMM);
622 if (ParsingInlineAsm)
623 Info = IDInfo;
624 break;
626 if (HasSymbol)
627 ErrMsg = "cannot use more than one symbol in memory operand";
628 return HasSymbol;
630 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
631 IntelExprState CurrState = State;
632 switch (State) {
633 default:
634 State = IES_ERROR;
635 break;
636 case IES_PLUS:
637 case IES_MINUS:
638 case IES_NOT:
639 case IES_OR:
640 case IES_XOR:
641 case IES_AND:
642 case IES_LSHIFT:
643 case IES_RSHIFT:
644 case IES_DIVIDE:
645 case IES_MOD:
646 case IES_MULTIPLY:
647 case IES_LPAREN:
648 case IES_INIT:
649 case IES_LBRAC:
650 State = IES_INTEGER;
651 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
652 // Index Register - Register * Scale
653 if (IndexReg) {
654 ErrMsg = "BaseReg/IndexReg already set!";
655 return true;
657 IndexReg = TmpReg;
658 Scale = TmpInt;
659 if (checkScale(Scale, ErrMsg))
660 return true;
661 // Get the scale and replace the 'Register * Scale' with '0'.
662 IC.popOperator();
663 } else {
664 IC.pushOperand(IC_IMM, TmpInt);
666 break;
668 PrevState = CurrState;
669 return false;
671 void onStar() {
672 PrevState = State;
673 switch (State) {
674 default:
675 State = IES_ERROR;
676 break;
677 case IES_INTEGER:
678 case IES_REGISTER:
679 case IES_RPAREN:
680 State = IES_MULTIPLY;
681 IC.pushOperator(IC_MULTIPLY);
682 break;
685 void onDivide() {
686 PrevState = State;
687 switch (State) {
688 default:
689 State = IES_ERROR;
690 break;
691 case IES_INTEGER:
692 case IES_RPAREN:
693 State = IES_DIVIDE;
694 IC.pushOperator(IC_DIVIDE);
695 break;
698 void onMod() {
699 PrevState = State;
700 switch (State) {
701 default:
702 State = IES_ERROR;
703 break;
704 case IES_INTEGER:
705 case IES_RPAREN:
706 State = IES_MOD;
707 IC.pushOperator(IC_MOD);
708 break;
711 bool onLBrac() {
712 if (BracCount)
713 return true;
714 PrevState = State;
715 switch (State) {
716 default:
717 State = IES_ERROR;
718 break;
719 case IES_RBRAC:
720 case IES_INTEGER:
721 case IES_RPAREN:
722 State = IES_PLUS;
723 IC.pushOperator(IC_PLUS);
724 break;
725 case IES_INIT:
726 assert(!BracCount && "BracCount should be zero on parsing's start");
727 State = IES_LBRAC;
728 break;
730 MemExpr = true;
731 BracCount++;
732 return false;
734 bool onRBrac() {
735 IntelExprState CurrState = State;
736 switch (State) {
737 default:
738 State = IES_ERROR;
739 break;
740 case IES_INTEGER:
741 case IES_REGISTER:
742 case IES_RPAREN:
743 if (BracCount-- != 1)
744 return true;
745 State = IES_RBRAC;
746 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
747 // If we already have a BaseReg, then assume this is the IndexReg with
748 // no explicit scale.
749 if (!BaseReg) {
750 BaseReg = TmpReg;
751 } else {
752 assert (!IndexReg && "BaseReg/IndexReg already set!");
753 IndexReg = TmpReg;
754 Scale = 0;
757 break;
759 PrevState = CurrState;
760 return false;
762 void onLParen() {
763 IntelExprState CurrState = State;
764 switch (State) {
765 default:
766 State = IES_ERROR;
767 break;
768 case IES_PLUS:
769 case IES_MINUS:
770 case IES_NOT:
771 case IES_OR:
772 case IES_XOR:
773 case IES_AND:
774 case IES_LSHIFT:
775 case IES_RSHIFT:
776 case IES_MULTIPLY:
777 case IES_DIVIDE:
778 case IES_MOD:
779 case IES_LPAREN:
780 case IES_INIT:
781 case IES_LBRAC:
782 State = IES_LPAREN;
783 IC.pushOperator(IC_LPAREN);
784 break;
786 PrevState = CurrState;
788 void onRParen() {
789 PrevState = State;
790 switch (State) {
791 default:
792 State = IES_ERROR;
793 break;
794 case IES_INTEGER:
795 case IES_REGISTER:
796 case IES_RPAREN:
797 State = IES_RPAREN;
798 IC.pushOperator(IC_RPAREN);
799 break;
804 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
805 bool MatchingInlineAsm = false) {
806 MCAsmParser &Parser = getParser();
807 if (MatchingInlineAsm) {
808 if (!getLexer().isAtStartOfStatement())
809 Parser.eatToEndOfStatement();
810 return false;
812 return Parser.Error(L, Msg, Range);
815 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
816 Error(Loc, Msg, R);
817 return nullptr;
820 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
821 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
822 bool IsSIReg(unsigned Reg);
823 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
824 void
825 AddDefaultSrcDestOperands(OperandVector &Operands,
826 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
827 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
828 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
829 OperandVector &FinalOperands);
830 std::unique_ptr<X86Operand> ParseOperand();
831 std::unique_ptr<X86Operand> ParseATTOperand();
832 std::unique_ptr<X86Operand> ParseIntelOperand();
833 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
834 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
835 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
836 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
837 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
838 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
839 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
840 SMLoc End);
841 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
842 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
843 InlineAsmIdentifierInfo &Info,
844 bool IsUnevaluatedOperand, SMLoc &End);
846 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
847 const MCExpr *&Disp,
848 const SMLoc &StartLoc,
849 SMLoc &EndLoc);
851 X86::CondCode ParseConditionCode(StringRef CCode);
853 bool ParseIntelMemoryOperandSize(unsigned &Size);
854 std::unique_ptr<X86Operand>
855 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
856 unsigned IndexReg, unsigned Scale, SMLoc Start,
857 SMLoc End, unsigned Size, StringRef Identifier,
858 const InlineAsmIdentifierInfo &Info);
860 bool parseDirectiveEven(SMLoc L);
861 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
863 /// CodeView FPO data directives.
864 bool parseDirectiveFPOProc(SMLoc L);
865 bool parseDirectiveFPOSetFrame(SMLoc L);
866 bool parseDirectiveFPOPushReg(SMLoc L);
867 bool parseDirectiveFPOStackAlloc(SMLoc L);
868 bool parseDirectiveFPOStackAlign(SMLoc L);
869 bool parseDirectiveFPOEndPrologue(SMLoc L);
870 bool parseDirectiveFPOEndProc(SMLoc L);
871 bool parseDirectiveFPOData(SMLoc L);
873 /// SEH directives.
874 bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo);
875 bool parseDirectiveSEHPushReg(SMLoc);
876 bool parseDirectiveSEHSetFrame(SMLoc);
877 bool parseDirectiveSEHSaveReg(SMLoc);
878 bool parseDirectiveSEHSaveXMM(SMLoc);
879 bool parseDirectiveSEHPushFrame(SMLoc);
881 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
883 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
884 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
886 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
887 /// instrumentation around Inst.
888 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
890 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
891 OperandVector &Operands, MCStreamer &Out,
892 uint64_t &ErrorInfo,
893 bool MatchingInlineAsm) override;
895 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
896 MCStreamer &Out, bool MatchingInlineAsm);
898 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
899 bool MatchingInlineAsm);
901 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
902 OperandVector &Operands, MCStreamer &Out,
903 uint64_t &ErrorInfo,
904 bool MatchingInlineAsm);
906 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
907 OperandVector &Operands, MCStreamer &Out,
908 uint64_t &ErrorInfo,
909 bool MatchingInlineAsm);
911 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
913 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
914 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
915 /// return false if no parsing errors occurred, true otherwise.
916 bool HandleAVX512Operand(OperandVector &Operands,
917 const MCParsedAsmOperand &Op);
919 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
921 bool is64BitMode() const {
922 // FIXME: Can tablegen auto-generate this?
923 return getSTI().getFeatureBits()[X86::Mode64Bit];
925 bool is32BitMode() const {
926 // FIXME: Can tablegen auto-generate this?
927 return getSTI().getFeatureBits()[X86::Mode32Bit];
929 bool is16BitMode() const {
930 // FIXME: Can tablegen auto-generate this?
931 return getSTI().getFeatureBits()[X86::Mode16Bit];
933 void SwitchMode(unsigned mode) {
934 MCSubtargetInfo &STI = copySTI();
935 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
936 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
937 FeatureBitset FB = ComputeAvailableFeatures(
938 STI.ToggleFeature(OldMode.flip(mode)));
939 setAvailableFeatures(FB);
941 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
944 unsigned getPointerWidth() {
945 if (is16BitMode()) return 16;
946 if (is32BitMode()) return 32;
947 if (is64BitMode()) return 64;
948 llvm_unreachable("invalid mode");
951 bool isParsingIntelSyntax() {
952 return getParser().getAssemblerDialect();
955 /// @name Auto-generated Matcher Functions
956 /// {
958 #define GET_ASSEMBLER_HEADER
959 #include "X86GenAsmMatcher.inc"
961 /// }
963 public:
964 enum X86MatchResultTy {
965 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
966 #define GET_OPERAND_DIAGNOSTIC_TYPES
967 #include "X86GenAsmMatcher.inc"
970 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
971 const MCInstrInfo &mii, const MCTargetOptions &Options)
972 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
973 Code16GCC(false) {
975 Parser.addAliasForDirective(".word", ".2byte");
977 // Initialize the set of available features.
978 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
981 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
983 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
985 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
986 SMLoc NameLoc, OperandVector &Operands) override;
988 bool ParseDirective(AsmToken DirectiveID) override;
990 } // end anonymous namespace
992 /// @name Auto-generated Match Functions
993 /// {
995 static unsigned MatchRegisterName(StringRef Name);
997 /// }
999 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1000 unsigned Scale, bool Is64BitMode,
1001 StringRef &ErrMsg) {
1002 // If we have both a base register and an index register make sure they are
1003 // both 64-bit or 32-bit registers.
1004 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1006 if (BaseReg != 0 &&
1007 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1008 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1009 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1010 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1011 ErrMsg = "invalid base+index expression";
1012 return true;
1015 if (IndexReg != 0 &&
1016 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1017 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1018 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1019 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1020 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1021 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1022 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1023 ErrMsg = "invalid base+index expression";
1024 return true;
1027 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1028 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1029 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1030 ErrMsg = "invalid base+index expression";
1031 return true;
1034 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1035 // and then only in non-64-bit modes.
1036 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1037 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1038 BaseReg != X86::SI && BaseReg != X86::DI))) {
1039 ErrMsg = "invalid 16-bit base register";
1040 return true;
1043 if (BaseReg == 0 &&
1044 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1045 ErrMsg = "16-bit memory operand may not include only index register";
1046 return true;
1049 if (BaseReg != 0 && IndexReg != 0) {
1050 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1051 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1052 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1053 IndexReg == X86::EIZ)) {
1054 ErrMsg = "base register is 64-bit, but index register is not";
1055 return true;
1057 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1058 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1059 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1060 IndexReg == X86::RIZ)) {
1061 ErrMsg = "base register is 32-bit, but index register is not";
1062 return true;
1064 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1065 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1066 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1067 ErrMsg = "base register is 16-bit, but index register is not";
1068 return true;
1070 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1071 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1072 ErrMsg = "invalid 16-bit base/index register combination";
1073 return true;
1078 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1079 if (!Is64BitMode && BaseReg != 0 &&
1080 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1081 ErrMsg = "IP-relative addressing requires 64-bit mode";
1082 return true;
1085 return checkScale(Scale, ErrMsg);
1088 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1089 SMLoc &StartLoc, SMLoc &EndLoc) {
1090 MCAsmParser &Parser = getParser();
1091 RegNo = 0;
1092 const AsmToken &PercentTok = Parser.getTok();
1093 StartLoc = PercentTok.getLoc();
1095 // If we encounter a %, ignore it. This code handles registers with and
1096 // without the prefix, unprefixed registers can occur in cfi directives.
1097 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1098 Parser.Lex(); // Eat percent token.
1100 const AsmToken &Tok = Parser.getTok();
1101 EndLoc = Tok.getEndLoc();
1103 if (Tok.isNot(AsmToken::Identifier)) {
1104 if (isParsingIntelSyntax()) return true;
1105 return Error(StartLoc, "invalid register name",
1106 SMRange(StartLoc, EndLoc));
1109 RegNo = MatchRegisterName(Tok.getString());
1111 // If the match failed, try the register name as lowercase.
1112 if (RegNo == 0)
1113 RegNo = MatchRegisterName(Tok.getString().lower());
1115 // The "flags" register cannot be referenced directly.
1116 // Treat it as an identifier instead.
1117 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1118 RegNo = 0;
1120 if (!is64BitMode()) {
1121 // FIXME: This should be done using Requires<Not64BitMode> and
1122 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1123 // checked.
1124 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1125 // REX prefix.
1126 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1127 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1128 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1129 X86II::isX86_64ExtendedReg(RegNo)) {
1130 StringRef RegName = Tok.getString();
1131 Parser.Lex(); // Eat register name.
1132 return Error(StartLoc,
1133 "register %" + RegName + " is only available in 64-bit mode",
1134 SMRange(StartLoc, EndLoc));
1138 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1139 if (RegNo == X86::ST0) {
1140 Parser.Lex(); // Eat 'st'
1142 // Check to see if we have '(4)' after %st.
1143 if (getLexer().isNot(AsmToken::LParen))
1144 return false;
1145 // Lex the paren.
1146 getParser().Lex();
1148 const AsmToken &IntTok = Parser.getTok();
1149 if (IntTok.isNot(AsmToken::Integer))
1150 return Error(IntTok.getLoc(), "expected stack index");
1151 switch (IntTok.getIntVal()) {
1152 case 0: RegNo = X86::ST0; break;
1153 case 1: RegNo = X86::ST1; break;
1154 case 2: RegNo = X86::ST2; break;
1155 case 3: RegNo = X86::ST3; break;
1156 case 4: RegNo = X86::ST4; break;
1157 case 5: RegNo = X86::ST5; break;
1158 case 6: RegNo = X86::ST6; break;
1159 case 7: RegNo = X86::ST7; break;
1160 default: return Error(IntTok.getLoc(), "invalid stack index");
1163 if (getParser().Lex().isNot(AsmToken::RParen))
1164 return Error(Parser.getTok().getLoc(), "expected ')'");
1166 EndLoc = Parser.getTok().getEndLoc();
1167 Parser.Lex(); // Eat ')'
1168 return false;
1171 EndLoc = Parser.getTok().getEndLoc();
1173 // If this is "db[0-15]", match it as an alias
1174 // for dr[0-15].
1175 if (RegNo == 0 && Tok.getString().startswith("db")) {
1176 if (Tok.getString().size() == 3) {
1177 switch (Tok.getString()[2]) {
1178 case '0': RegNo = X86::DR0; break;
1179 case '1': RegNo = X86::DR1; break;
1180 case '2': RegNo = X86::DR2; break;
1181 case '3': RegNo = X86::DR3; break;
1182 case '4': RegNo = X86::DR4; break;
1183 case '5': RegNo = X86::DR5; break;
1184 case '6': RegNo = X86::DR6; break;
1185 case '7': RegNo = X86::DR7; break;
1186 case '8': RegNo = X86::DR8; break;
1187 case '9': RegNo = X86::DR9; break;
1189 } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
1190 switch (Tok.getString()[3]) {
1191 case '0': RegNo = X86::DR10; break;
1192 case '1': RegNo = X86::DR11; break;
1193 case '2': RegNo = X86::DR12; break;
1194 case '3': RegNo = X86::DR13; break;
1195 case '4': RegNo = X86::DR14; break;
1196 case '5': RegNo = X86::DR15; break;
1200 if (RegNo != 0) {
1201 EndLoc = Parser.getTok().getEndLoc();
1202 Parser.Lex(); // Eat it.
1203 return false;
1207 if (RegNo == 0) {
1208 if (isParsingIntelSyntax()) return true;
1209 return Error(StartLoc, "invalid register name",
1210 SMRange(StartLoc, EndLoc));
1213 Parser.Lex(); // Eat identifier token.
1214 return false;
1217 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1218 bool Parse32 = is32BitMode() || Code16GCC;
1219 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1220 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1221 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1222 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1223 Loc, Loc, 0);
1226 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1227 bool Parse32 = is32BitMode() || Code16GCC;
1228 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1229 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1230 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1231 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1232 Loc, Loc, 0);
1235 bool X86AsmParser::IsSIReg(unsigned Reg) {
1236 switch (Reg) {
1237 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1238 case X86::RSI:
1239 case X86::ESI:
1240 case X86::SI:
1241 return true;
1242 case X86::RDI:
1243 case X86::EDI:
1244 case X86::DI:
1245 return false;
1249 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1250 bool IsSIReg) {
1251 switch (RegClassID) {
1252 default: llvm_unreachable("Unexpected register class");
1253 case X86::GR64RegClassID:
1254 return IsSIReg ? X86::RSI : X86::RDI;
1255 case X86::GR32RegClassID:
1256 return IsSIReg ? X86::ESI : X86::EDI;
1257 case X86::GR16RegClassID:
1258 return IsSIReg ? X86::SI : X86::DI;
1262 void X86AsmParser::AddDefaultSrcDestOperands(
1263 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1264 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1265 if (isParsingIntelSyntax()) {
1266 Operands.push_back(std::move(Dst));
1267 Operands.push_back(std::move(Src));
1269 else {
1270 Operands.push_back(std::move(Src));
1271 Operands.push_back(std::move(Dst));
1275 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1276 OperandVector &FinalOperands) {
1278 if (OrigOperands.size() > 1) {
1279 // Check if sizes match, OrigOperands also contains the instruction name
1280 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1281 "Operand size mismatch");
1283 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1284 // Verify types match
1285 int RegClassID = -1;
1286 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1287 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1288 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1290 if (FinalOp.isReg() &&
1291 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1292 // Return false and let a normal complaint about bogus operands happen
1293 return false;
1295 if (FinalOp.isMem()) {
1297 if (!OrigOp.isMem())
1298 // Return false and let a normal complaint about bogus operands happen
1299 return false;
1301 unsigned OrigReg = OrigOp.Mem.BaseReg;
1302 unsigned FinalReg = FinalOp.Mem.BaseReg;
1304 // If we've already encounterd a register class, make sure all register
1305 // bases are of the same register class
1306 if (RegClassID != -1 &&
1307 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1308 return Error(OrigOp.getStartLoc(),
1309 "mismatching source and destination index registers");
1312 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1313 RegClassID = X86::GR64RegClassID;
1314 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1315 RegClassID = X86::GR32RegClassID;
1316 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1317 RegClassID = X86::GR16RegClassID;
1318 else
1319 // Unexpected register class type
1320 // Return false and let a normal complaint about bogus operands happen
1321 return false;
1323 bool IsSI = IsSIReg(FinalReg);
1324 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1326 if (FinalReg != OrigReg) {
1327 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1328 Warnings.push_back(std::make_pair(
1329 OrigOp.getStartLoc(),
1330 "memory operand is only for determining the size, " + RegName +
1331 " will be used for the location"));
1334 FinalOp.Mem.Size = OrigOp.Mem.Size;
1335 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1336 FinalOp.Mem.BaseReg = FinalReg;
1340 // Produce warnings only if all the operands passed the adjustment - prevent
1341 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1342 for (auto &WarningMsg : Warnings) {
1343 Warning(WarningMsg.first, WarningMsg.second);
1346 // Remove old operands
1347 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1348 OrigOperands.pop_back();
1350 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1351 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1352 OrigOperands.push_back(std::move(FinalOperands[i]));
1354 return false;
1357 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1358 if (isParsingIntelSyntax())
1359 return ParseIntelOperand();
1360 return ParseATTOperand();
1363 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1364 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1365 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1366 const InlineAsmIdentifierInfo &Info) {
1367 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1368 // some other label reference.
1369 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1370 // Insert an explicit size if the user didn't have one.
1371 if (!Size) {
1372 Size = getPointerWidth();
1373 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1374 /*Len=*/0, Size);
1376 // Create an absolute memory reference in order to match against
1377 // instructions taking a PC relative operand.
1378 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1379 Identifier, Info.Label.Decl);
1381 // We either have a direct symbol reference, or an offset from a symbol. The
1382 // parser always puts the symbol on the LHS, so look there for size
1383 // calculation purposes.
1384 unsigned FrontendSize = 0;
1385 void *Decl = nullptr;
1386 bool IsGlobalLV = false;
1387 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1388 // Size is in terms of bits in this context.
1389 FrontendSize = Info.Var.Type * 8;
1390 Decl = Info.Var.Decl;
1391 IsGlobalLV = Info.Var.IsGlobalLV;
1393 // It is widely common for MS InlineAsm to use a global variable and one/two
1394 // registers in a mmory expression, and though unaccessible via rip/eip.
1395 if (IsGlobalLV && (BaseReg || IndexReg)) {
1396 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
1397 // Otherwise, we set the base register to a non-zero value
1398 // if we don't know the actual value at this time. This is necessary to
1399 // get the matching correct in some cases.
1400 } else {
1401 BaseReg = BaseReg ? BaseReg : 1;
1402 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1403 IndexReg, Scale, Start, End, Size, Identifier,
1404 Decl, FrontendSize);
1408 // Some binary bitwise operators have a named synonymous
1409 // Query a candidate string for being such a named operator
1410 // and if so - invoke the appropriate handler
1411 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1412 // A named operator should be either lower or upper case, but not a mix
1413 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1414 return false;
1415 if (Name.equals_lower("not"))
1416 SM.onNot();
1417 else if (Name.equals_lower("or"))
1418 SM.onOr();
1419 else if (Name.equals_lower("shl"))
1420 SM.onLShift();
1421 else if (Name.equals_lower("shr"))
1422 SM.onRShift();
1423 else if (Name.equals_lower("xor"))
1424 SM.onXor();
1425 else if (Name.equals_lower("and"))
1426 SM.onAnd();
1427 else if (Name.equals_lower("mod"))
1428 SM.onMod();
1429 else
1430 return false;
1431 return true;
1434 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1435 MCAsmParser &Parser = getParser();
1436 const AsmToken &Tok = Parser.getTok();
1437 StringRef ErrMsg;
1439 AsmToken::TokenKind PrevTK = AsmToken::Error;
1440 bool Done = false;
1441 while (!Done) {
1442 bool UpdateLocLex = true;
1443 AsmToken::TokenKind TK = getLexer().getKind();
1445 switch (TK) {
1446 default:
1447 if ((Done = SM.isValidEndState()))
1448 break;
1449 return Error(Tok.getLoc(), "unknown token in expression");
1450 case AsmToken::EndOfStatement:
1451 Done = true;
1452 break;
1453 case AsmToken::Real:
1454 // DotOperator: [ebx].0
1455 UpdateLocLex = false;
1456 if (ParseIntelDotOperator(SM, End))
1457 return true;
1458 break;
1459 case AsmToken::At:
1460 case AsmToken::String:
1461 case AsmToken::Identifier: {
1462 SMLoc IdentLoc = Tok.getLoc();
1463 StringRef Identifier = Tok.getString();
1464 UpdateLocLex = false;
1465 // Register
1466 unsigned Reg;
1467 if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
1468 if (SM.onRegister(Reg, ErrMsg))
1469 return Error(Tok.getLoc(), ErrMsg);
1470 break;
1472 // Operator synonymous ("not", "or" etc.)
1473 if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
1474 break;
1475 // Symbol reference, when parsing assembly content
1476 InlineAsmIdentifierInfo Info;
1477 const MCExpr *Val;
1478 if (!isParsingInlineAsm()) {
1479 if (getParser().parsePrimaryExpr(Val, End)) {
1480 return Error(Tok.getLoc(), "Unexpected identifier!");
1481 } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
1482 return Error(IdentLoc, ErrMsg);
1483 } else
1484 break;
1486 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
1487 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1488 if (OpKind == IOK_OFFSET)
1489 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1490 "a compound immediate expression is yet to be supported");
1491 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
1492 if (SM.onInteger(Val, ErrMsg))
1493 return Error(IdentLoc, ErrMsg);
1494 } else
1495 return true;
1496 break;
1498 // MS Dot Operator expression
1499 if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1500 if (ParseIntelDotOperator(SM, End))
1501 return true;
1502 break;
1504 // MS InlineAsm identifier
1505 // Call parseIdentifier() to combine @ with the identifier behind it.
1506 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
1507 return Error(IdentLoc, "expected identifier");
1508 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
1509 return true;
1510 else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
1511 return Error(IdentLoc, ErrMsg);
1512 break;
1514 case AsmToken::Integer: {
1515 // Look for 'b' or 'f' following an Integer as a directional label
1516 SMLoc Loc = getTok().getLoc();
1517 int64_t IntVal = getTok().getIntVal();
1518 End = consumeToken();
1519 UpdateLocLex = false;
1520 if (getLexer().getKind() == AsmToken::Identifier) {
1521 StringRef IDVal = getTok().getString();
1522 if (IDVal == "f" || IDVal == "b") {
1523 MCSymbol *Sym =
1524 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1525 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1526 const MCExpr *Val =
1527 MCSymbolRefExpr::create(Sym, Variant, getContext());
1528 if (IDVal == "b" && Sym->isUndefined())
1529 return Error(Loc, "invalid reference to undefined symbol");
1530 StringRef Identifier = Sym->getName();
1531 InlineAsmIdentifierInfo Info;
1532 if (SM.onIdentifierExpr(Val, Identifier, Info,
1533 isParsingInlineAsm(), ErrMsg))
1534 return Error(Loc, ErrMsg);
1535 End = consumeToken();
1536 } else {
1537 if (SM.onInteger(IntVal, ErrMsg))
1538 return Error(Loc, ErrMsg);
1540 } else {
1541 if (SM.onInteger(IntVal, ErrMsg))
1542 return Error(Loc, ErrMsg);
1544 break;
1546 case AsmToken::Plus:
1547 if (SM.onPlus(ErrMsg))
1548 return Error(getTok().getLoc(), ErrMsg);
1549 break;
1550 case AsmToken::Minus:
1551 if (SM.onMinus(ErrMsg))
1552 return Error(getTok().getLoc(), ErrMsg);
1553 break;
1554 case AsmToken::Tilde: SM.onNot(); break;
1555 case AsmToken::Star: SM.onStar(); break;
1556 case AsmToken::Slash: SM.onDivide(); break;
1557 case AsmToken::Percent: SM.onMod(); break;
1558 case AsmToken::Pipe: SM.onOr(); break;
1559 case AsmToken::Caret: SM.onXor(); break;
1560 case AsmToken::Amp: SM.onAnd(); break;
1561 case AsmToken::LessLess:
1562 SM.onLShift(); break;
1563 case AsmToken::GreaterGreater:
1564 SM.onRShift(); break;
1565 case AsmToken::LBrac:
1566 if (SM.onLBrac())
1567 return Error(Tok.getLoc(), "unexpected bracket encountered");
1568 break;
1569 case AsmToken::RBrac:
1570 if (SM.onRBrac())
1571 return Error(Tok.getLoc(), "unexpected bracket encountered");
1572 break;
1573 case AsmToken::LParen: SM.onLParen(); break;
1574 case AsmToken::RParen: SM.onRParen(); break;
1576 if (SM.hadError())
1577 return Error(Tok.getLoc(), "unknown token in expression");
1579 if (!Done && UpdateLocLex)
1580 End = consumeToken();
1582 PrevTK = TK;
1584 return false;
1587 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1588 SMLoc Start, SMLoc End) {
1589 SMLoc Loc = Start;
1590 unsigned ExprLen = End.getPointer() - Start.getPointer();
1591 // Skip everything before a symbol displacement (if we have one)
1592 if (SM.getSym()) {
1593 StringRef SymName = SM.getSymName();
1594 if (unsigned Len = SymName.data() - Start.getPointer())
1595 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1596 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1597 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1598 // If we have only a symbol than there's no need for complex rewrite,
1599 // simply skip everything after it
1600 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1601 if (ExprLen)
1602 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1603 return;
1606 // Build an Intel Expression rewrite
1607 StringRef BaseRegStr;
1608 StringRef IndexRegStr;
1609 if (SM.getBaseReg())
1610 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1611 if (SM.getIndexReg())
1612 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1613 // Emit it
1614 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1615 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1618 // Inline assembly may use variable names with namespace alias qualifiers.
1619 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1620 StringRef &Identifier,
1621 InlineAsmIdentifierInfo &Info,
1622 bool IsUnevaluatedOperand,
1623 SMLoc &End) {
1624 MCAsmParser &Parser = getParser();
1625 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1626 Val = nullptr;
1628 StringRef LineBuf(Identifier.data());
1629 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1631 const AsmToken &Tok = Parser.getTok();
1632 SMLoc Loc = Tok.getLoc();
1634 // Advance the token stream until the end of the current token is
1635 // after the end of what the frontend claimed.
1636 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1637 do {
1638 End = Tok.getEndLoc();
1639 getLexer().Lex();
1640 } while (End.getPointer() < EndPtr);
1641 Identifier = LineBuf;
1643 // The frontend should end parsing on an assembler token boundary, unless it
1644 // failed parsing.
1645 assert((End.getPointer() == EndPtr ||
1646 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
1647 "frontend claimed part of a token?");
1649 // If the identifier lookup was unsuccessful, assume that we are dealing with
1650 // a label.
1651 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
1652 StringRef InternalName =
1653 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1654 Loc, false);
1655 assert(InternalName.size() && "We should have an internal name here.");
1656 // Push a rewrite for replacing the identifier name with the internal name.
1657 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1658 InternalName);
1659 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1660 return false;
1661 // Create the symbol reference.
1662 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1663 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1664 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1665 return false;
1668 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1669 std::unique_ptr<X86Operand>
1670 X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
1671 MCAsmParser &Parser = getParser();
1672 const AsmToken &Tok = Parser.getTok();
1673 // Eat "{" and mark the current place.
1674 const SMLoc consumedToken = consumeToken();
1675 if (Tok.isNot(AsmToken::Identifier))
1676 return ErrorOperand(Tok.getLoc(), "Expected an identifier after {");
1677 if (Tok.getIdentifier().startswith("r")){
1678 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1679 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1680 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1681 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1682 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1683 .Default(-1);
1684 if (-1 == rndMode)
1685 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1686 Parser.Lex(); // Eat "r*" of r*-sae
1687 if (!getLexer().is(AsmToken::Minus))
1688 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1689 Parser.Lex(); // Eat "-"
1690 Parser.Lex(); // Eat the sae
1691 if (!getLexer().is(AsmToken::RCurly))
1692 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1693 SMLoc End = Tok.getEndLoc();
1694 Parser.Lex(); // Eat "}"
1695 const MCExpr *RndModeOp =
1696 MCConstantExpr::create(rndMode, Parser.getContext());
1697 return X86Operand::CreateImm(RndModeOp, Start, End);
1699 if(Tok.getIdentifier().equals("sae")){
1700 Parser.Lex(); // Eat the sae
1701 if (!getLexer().is(AsmToken::RCurly))
1702 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1703 Parser.Lex(); // Eat "}"
1704 return X86Operand::CreateToken("{sae}", consumedToken);
1706 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1709 /// Parse the '.' operator.
1710 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1711 const AsmToken &Tok = getTok();
1712 unsigned Offset;
1714 // Drop the optional '.'.
1715 StringRef DotDispStr = Tok.getString();
1716 if (DotDispStr.startswith("."))
1717 DotDispStr = DotDispStr.drop_front(1);
1719 // .Imm gets lexed as a real.
1720 if (Tok.is(AsmToken::Real)) {
1721 APInt DotDisp;
1722 DotDispStr.getAsInteger(10, DotDisp);
1723 Offset = DotDisp.getZExtValue();
1724 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1725 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1726 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1727 Offset))
1728 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1729 } else
1730 return Error(Tok.getLoc(), "Unexpected token type!");
1732 // Eat the DotExpression and update End
1733 End = SMLoc::getFromPointer(DotDispStr.data());
1734 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1735 while (Tok.getLoc().getPointer() < DotExprEndLoc)
1736 Lex();
1737 SM.addImm(Offset);
1738 return false;
1741 /// Parse the 'offset' operator. This operator is used to specify the
1742 /// location rather then the content of a variable.
1743 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1744 MCAsmParser &Parser = getParser();
1745 const AsmToken &Tok = Parser.getTok();
1746 SMLoc OffsetOfLoc = Tok.getLoc();
1747 Parser.Lex(); // Eat offset.
1749 const MCExpr *Val;
1750 InlineAsmIdentifierInfo Info;
1751 SMLoc Start = Tok.getLoc(), End;
1752 StringRef Identifier = Tok.getString();
1753 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1754 /*Unevaluated=*/false, End))
1755 return nullptr;
1757 void *Decl = nullptr;
1758 // FIXME: MS evaluates "offset <Constant>" to the underlying integral
1759 if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1760 return ErrorOperand(Start, "offset operator cannot yet handle constants");
1761 else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
1762 Decl = Info.Var.Decl;
1763 // Don't emit the offset operator.
1764 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1766 // The offset operator will have an 'r' constraint, thus we need to create
1767 // register operand to ensure proper matching. Just pick a GPR based on
1768 // the size of a pointer.
1769 bool Parse32 = is32BitMode() || Code16GCC;
1770 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1772 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1773 OffsetOfLoc, Identifier, Decl);
1776 // Query a candidate string for being an Intel assembly operator
1777 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1778 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1779 return StringSwitch<unsigned>(Name)
1780 .Cases("TYPE","type",IOK_TYPE)
1781 .Cases("SIZE","size",IOK_SIZE)
1782 .Cases("LENGTH","length",IOK_LENGTH)
1783 .Cases("OFFSET","offset",IOK_OFFSET)
1784 .Default(IOK_INVALID);
1787 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1788 /// returns the number of elements in an array. It returns the value 1 for
1789 /// non-array variables. The SIZE operator returns the size of a C or C++
1790 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1791 /// TYPE operator returns the size of a C or C++ type or variable. If the
1792 /// variable is an array, TYPE returns the size of a single element.
1793 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1794 MCAsmParser &Parser = getParser();
1795 const AsmToken &Tok = Parser.getTok();
1796 Parser.Lex(); // Eat operator.
1798 const MCExpr *Val = nullptr;
1799 InlineAsmIdentifierInfo Info;
1800 SMLoc Start = Tok.getLoc(), End;
1801 StringRef Identifier = Tok.getString();
1802 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1803 /*Unevaluated=*/true, End))
1804 return 0;
1806 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1807 Error(Start, "unable to lookup expression");
1808 return 0;
1811 unsigned CVal = 0;
1812 switch(OpKind) {
1813 default: llvm_unreachable("Unexpected operand kind!");
1814 case IOK_LENGTH: CVal = Info.Var.Length; break;
1815 case IOK_SIZE: CVal = Info.Var.Size; break;
1816 case IOK_TYPE: CVal = Info.Var.Type; break;
1819 return CVal;
1822 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1823 Size = StringSwitch<unsigned>(getTok().getString())
1824 .Cases("BYTE", "byte", 8)
1825 .Cases("WORD", "word", 16)
1826 .Cases("DWORD", "dword", 32)
1827 .Cases("FLOAT", "float", 32)
1828 .Cases("LONG", "long", 32)
1829 .Cases("FWORD", "fword", 48)
1830 .Cases("DOUBLE", "double", 64)
1831 .Cases("QWORD", "qword", 64)
1832 .Cases("MMWORD","mmword", 64)
1833 .Cases("XWORD", "xword", 80)
1834 .Cases("TBYTE", "tbyte", 80)
1835 .Cases("XMMWORD", "xmmword", 128)
1836 .Cases("YMMWORD", "ymmword", 256)
1837 .Cases("ZMMWORD", "zmmword", 512)
1838 .Default(0);
1839 if (Size) {
1840 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1841 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1842 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1843 Lex(); // Eat ptr.
1845 return false;
1848 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1849 MCAsmParser &Parser = getParser();
1850 const AsmToken &Tok = Parser.getTok();
1851 SMLoc Start, End;
1853 // FIXME: Offset operator
1854 // Should be handled as part of immediate expression, as other operators
1855 // Currently, only supported as a stand-alone operand
1856 if (isParsingInlineAsm())
1857 if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1858 return ParseIntelOffsetOfOperator();
1860 // Parse optional Size directive.
1861 unsigned Size;
1862 if (ParseIntelMemoryOperandSize(Size))
1863 return nullptr;
1864 bool PtrInOperand = bool(Size);
1866 Start = Tok.getLoc();
1868 // Rounding mode operand.
1869 if (getLexer().is(AsmToken::LCurly))
1870 return ParseRoundingModeOp(Start);
1872 // Register operand.
1873 unsigned RegNo = 0;
1874 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1875 if (RegNo == X86::RIP)
1876 return ErrorOperand(Start, "rip can only be used as a base register");
1877 // A Register followed by ':' is considered a segment override
1878 if (Tok.isNot(AsmToken::Colon))
1879 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1880 ErrorOperand(Start, "expected memory operand after 'ptr', "
1881 "found register operand instead");
1882 // An alleged segment override. check if we have a valid segment register
1883 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1884 return ErrorOperand(Start, "invalid segment register");
1885 // Eat ':' and update Start location
1886 Start = Lex().getLoc();
1889 // Immediates and Memory
1890 IntelExprStateMachine SM;
1891 if (ParseIntelExpression(SM, End))
1892 return nullptr;
1894 if (isParsingInlineAsm())
1895 RewriteIntelExpression(SM, Start, Tok.getLoc());
1897 int64_t Imm = SM.getImm();
1898 const MCExpr *Disp = SM.getSym();
1899 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1900 if (Disp && Imm)
1901 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1902 if (!Disp)
1903 Disp = ImmDisp;
1905 // RegNo != 0 specifies a valid segment register,
1906 // and we are parsing a segment override
1907 if (!SM.isMemExpr() && !RegNo)
1908 return X86Operand::CreateImm(Disp, Start, End);
1910 StringRef ErrMsg;
1911 unsigned BaseReg = SM.getBaseReg();
1912 unsigned IndexReg = SM.getIndexReg();
1913 unsigned Scale = SM.getScale();
1915 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
1916 (IndexReg == X86::ESP || IndexReg == X86::RSP))
1917 std::swap(BaseReg, IndexReg);
1919 // If BaseReg is a vector register and IndexReg is not, swap them unless
1920 // Scale was specified in which case it would be an error.
1921 if (Scale == 0 &&
1922 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1923 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1924 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
1925 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
1926 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
1927 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
1928 std::swap(BaseReg, IndexReg);
1930 if (Scale != 0 &&
1931 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
1932 return ErrorOperand(Start, "16-bit addresses cannot have a scale");
1934 // If there was no explicit scale specified, change it to 1.
1935 if (Scale == 0)
1936 Scale = 1;
1938 // If this is a 16-bit addressing mode with the base and index in the wrong
1939 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
1940 // shared with att syntax where order matters.
1941 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1942 (IndexReg == X86::BX || IndexReg == X86::BP))
1943 std::swap(BaseReg, IndexReg);
1945 if ((BaseReg || IndexReg) &&
1946 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
1947 ErrMsg))
1948 return ErrorOperand(Start, ErrMsg);
1949 if (isParsingInlineAsm())
1950 return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1951 Scale, Start, End, Size, SM.getSymName(),
1952 SM.getIdentifierInfo());
1953 if (!(BaseReg || IndexReg || RegNo))
1954 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1955 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1956 BaseReg, IndexReg, Scale, Start, End, Size);
1959 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1960 MCAsmParser &Parser = getParser();
1961 switch (getLexer().getKind()) {
1962 case AsmToken::Dollar: {
1963 // $42 or $ID -> immediate.
1964 SMLoc Start = Parser.getTok().getLoc(), End;
1965 Parser.Lex();
1966 const MCExpr *Val;
1967 // This is an immediate, so we should not parse a register. Do a precheck
1968 // for '%' to supercede intra-register parse errors.
1969 SMLoc L = Parser.getTok().getLoc();
1970 if (check(getLexer().is(AsmToken::Percent), L,
1971 "expected immediate expression") ||
1972 getParser().parseExpression(Val, End) ||
1973 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
1974 return nullptr;
1975 return X86Operand::CreateImm(Val, Start, End);
1977 case AsmToken::LCurly: {
1978 SMLoc Start = Parser.getTok().getLoc();
1979 return ParseRoundingModeOp(Start);
1981 default: {
1982 // This a memory operand or a register. We have some parsing complications
1983 // as a '(' may be part of an immediate expression or the addressing mode
1984 // block. This is complicated by the fact that an assembler-level variable
1985 // may refer either to a register or an immediate expression.
1987 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
1988 const MCExpr *Expr = nullptr;
1989 unsigned Reg = 0;
1990 if (getLexer().isNot(AsmToken::LParen)) {
1991 // No '(' so this is either a displacement expression or a register.
1992 if (Parser.parseExpression(Expr, EndLoc))
1993 return nullptr;
1994 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
1995 // Segment Register. Reset Expr and copy value to register.
1996 Expr = nullptr;
1997 Reg = RE->getRegNo();
1999 // Sanity check register.
2000 if (Reg == X86::EIZ || Reg == X86::RIZ)
2001 return ErrorOperand(
2002 Loc, "%eiz and %riz can only be used as index registers",
2003 SMRange(Loc, EndLoc));
2004 if (Reg == X86::RIP)
2005 return ErrorOperand(Loc, "%rip can only be used as a base register",
2006 SMRange(Loc, EndLoc));
2007 // Return register that are not segment prefixes immediately.
2008 if (!Parser.parseOptionalToken(AsmToken::Colon))
2009 return X86Operand::CreateReg(Reg, Loc, EndLoc);
2010 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2011 return ErrorOperand(Loc, "invalid segment register");
2014 // This is a Memory operand.
2015 return ParseMemOperand(Reg, Expr, Loc, EndLoc);
2020 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2021 // otherwise the EFLAGS Condition Code enumerator.
2022 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2023 return StringSwitch<X86::CondCode>(CC)
2024 .Case("o", X86::COND_O) // Overflow
2025 .Case("no", X86::COND_NO) // No Overflow
2026 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2027 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2028 .Cases("e", "z", X86::COND_E) // Equal/Zero
2029 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2030 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2031 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2032 .Case("s", X86::COND_S) // Sign
2033 .Case("ns", X86::COND_NS) // No Sign
2034 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2035 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2036 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2037 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2038 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2039 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2040 .Default(X86::COND_INVALID);
2043 // true on failure, false otherwise
2044 // If no {z} mark was found - Parser doesn't advance
2045 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2046 const SMLoc &StartLoc) {
2047 MCAsmParser &Parser = getParser();
2048 // Assuming we are just pass the '{' mark, quering the next token
2049 // Searched for {z}, but none was found. Return false, as no parsing error was
2050 // encountered
2051 if (!(getLexer().is(AsmToken::Identifier) &&
2052 (getLexer().getTok().getIdentifier() == "z")))
2053 return false;
2054 Parser.Lex(); // Eat z
2055 // Query and eat the '}' mark
2056 if (!getLexer().is(AsmToken::RCurly))
2057 return Error(getLexer().getLoc(), "Expected } at this point");
2058 Parser.Lex(); // Eat '}'
2059 // Assign Z with the {z} mark opernad
2060 Z = X86Operand::CreateToken("{z}", StartLoc);
2061 return false;
2064 // true on failure, false otherwise
2065 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2066 const MCParsedAsmOperand &Op) {
2067 MCAsmParser &Parser = getParser();
2068 if (getLexer().is(AsmToken::LCurly)) {
2069 // Eat "{" and mark the current place.
2070 const SMLoc consumedToken = consumeToken();
2071 // Distinguish {1to<NUM>} from {%k<NUM>}.
2072 if(getLexer().is(AsmToken::Integer)) {
2073 // Parse memory broadcasting ({1to<NUM>}).
2074 if (getLexer().getTok().getIntVal() != 1)
2075 return TokError("Expected 1to<NUM> at this point");
2076 Parser.Lex(); // Eat "1" of 1to8
2077 if (!getLexer().is(AsmToken::Identifier) ||
2078 !getLexer().getTok().getIdentifier().startswith("to"))
2079 return TokError("Expected 1to<NUM> at this point");
2080 // Recognize only reasonable suffixes.
2081 const char *BroadcastPrimitive =
2082 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2083 .Case("to2", "{1to2}")
2084 .Case("to4", "{1to4}")
2085 .Case("to8", "{1to8}")
2086 .Case("to16", "{1to16}")
2087 .Default(nullptr);
2088 if (!BroadcastPrimitive)
2089 return TokError("Invalid memory broadcast primitive.");
2090 Parser.Lex(); // Eat "toN" of 1toN
2091 if (!getLexer().is(AsmToken::RCurly))
2092 return TokError("Expected } at this point");
2093 Parser.Lex(); // Eat "}"
2094 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2095 consumedToken));
2096 // No AVX512 specific primitives can pass
2097 // after memory broadcasting, so return.
2098 return false;
2099 } else {
2100 // Parse either {k}{z}, {z}{k}, {k} or {z}
2101 // last one have no meaning, but GCC accepts it
2102 // Currently, we're just pass a '{' mark
2103 std::unique_ptr<X86Operand> Z;
2104 if (ParseZ(Z, consumedToken))
2105 return true;
2106 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2107 // no errors.
2108 // Query for the need of further parsing for a {%k<NUM>} mark
2109 if (!Z || getLexer().is(AsmToken::LCurly)) {
2110 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2111 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2112 // expected
2113 unsigned RegNo;
2114 SMLoc RegLoc;
2115 if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2116 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2117 if (RegNo == X86::K0)
2118 return Error(RegLoc, "Register k0 can't be used as write mask");
2119 if (!getLexer().is(AsmToken::RCurly))
2120 return Error(getLexer().getLoc(), "Expected } at this point");
2121 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2122 Operands.push_back(
2123 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2124 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2125 } else
2126 return Error(getLexer().getLoc(),
2127 "Expected an op-mask register at this point");
2128 // {%k<NUM>} mark is found, inquire for {z}
2129 if (getLexer().is(AsmToken::LCurly) && !Z) {
2130 // Have we've found a parsing error, or found no (expected) {z} mark
2131 // - report an error
2132 if (ParseZ(Z, consumeToken()) || !Z)
2133 return Error(getLexer().getLoc(),
2134 "Expected a {z} mark at this point");
2137 // '{z}' on its own is meaningless, hence should be ignored.
2138 // on the contrary - have it been accompanied by a K register,
2139 // allow it.
2140 if (Z)
2141 Operands.push_back(std::move(Z));
2145 return false;
2148 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2149 /// has already been parsed if present. disp may be provided as well.
2150 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2151 const MCExpr *&Disp,
2152 const SMLoc &StartLoc,
2153 SMLoc &EndLoc) {
2154 MCAsmParser &Parser = getParser();
2155 SMLoc Loc;
2156 // Based on the initial passed values, we may be in any of these cases, we are
2157 // in one of these cases (with current position (*)):
2159 // 1. seg : * disp (base-index-scale-expr)
2160 // 2. seg : *(disp) (base-index-scale-expr)
2161 // 3. seg : *(base-index-scale-expr)
2162 // 4. disp *(base-index-scale-expr)
2163 // 5. *(disp) (base-index-scale-expr)
2164 // 6. *(base-index-scale-expr)
2165 // 7. disp *
2166 // 8. *(disp)
2168 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2169 // checking if the first object after the parenthesis is a register (or an
2170 // identifier referring to a register) and parse the displacement or default
2171 // to 0 as appropriate.
2172 auto isAtMemOperand = [this]() {
2173 if (this->getLexer().isNot(AsmToken::LParen))
2174 return false;
2175 AsmToken Buf[2];
2176 StringRef Id;
2177 auto TokCount = this->getLexer().peekTokens(Buf, true);
2178 if (TokCount == 0)
2179 return false;
2180 switch (Buf[0].getKind()) {
2181 case AsmToken::Percent:
2182 case AsmToken::Comma:
2183 return true;
2184 // These lower cases are doing a peekIdentifier.
2185 case AsmToken::At:
2186 case AsmToken::Dollar:
2187 if ((TokCount > 1) &&
2188 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2189 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2190 Id = StringRef(Buf[0].getLoc().getPointer(),
2191 Buf[1].getIdentifier().size() + 1);
2192 break;
2193 case AsmToken::Identifier:
2194 case AsmToken::String:
2195 Id = Buf[0].getIdentifier();
2196 break;
2197 default:
2198 return false;
2200 // We have an ID. Check if it is bound to a register.
2201 if (!Id.empty()) {
2202 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2203 if (Sym->isVariable()) {
2204 auto V = Sym->getVariableValue(/*SetUsed*/ false);
2205 return isa<X86MCExpr>(V);
2208 return false;
2211 if (!Disp) {
2212 // Parse immediate if we're not at a mem operand yet.
2213 if (!isAtMemOperand()) {
2214 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2215 return nullptr;
2216 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2217 } else {
2218 // Disp is implicitly zero if we haven't parsed it yet.
2219 Disp = MCConstantExpr::create(0, Parser.getContext());
2223 // We are now either at the end of the operand or at the '(' at the start of a
2224 // base-index-scale-expr.
2226 if (!parseOptionalToken(AsmToken::LParen)) {
2227 if (SegReg == 0)
2228 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2229 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2230 StartLoc, EndLoc);
2233 // If we reached here, then eat the '(' and Process
2234 // the rest of the memory operand.
2235 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2236 SMLoc BaseLoc = getLexer().getLoc();
2237 const MCExpr *E;
2238 StringRef ErrMsg;
2240 // Parse BaseReg if one is provided.
2241 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2242 if (Parser.parseExpression(E, EndLoc) ||
2243 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2244 return nullptr;
2246 // Sanity check register.
2247 BaseReg = cast<X86MCExpr>(E)->getRegNo();
2248 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2249 return ErrorOperand(BaseLoc,
2250 "eiz and riz can only be used as index registers",
2251 SMRange(BaseLoc, EndLoc));
2254 if (parseOptionalToken(AsmToken::Comma)) {
2255 // Following the comma we should have either an index register, or a scale
2256 // value. We don't support the later form, but we want to parse it
2257 // correctly.
2259 // Even though it would be completely consistent to support syntax like
2260 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2261 if (getLexer().isNot(AsmToken::RParen)) {
2262 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2263 return nullptr;
2265 if (!isa<X86MCExpr>(E)) {
2266 // We've parsed an unexpected Scale Value instead of an index
2267 // register. Interpret it as an absolute.
2268 int64_t ScaleVal;
2269 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
2270 return ErrorOperand(Loc, "expected absolute expression");
2271 if (ScaleVal != 1)
2272 Warning(Loc, "scale factor without index register is ignored");
2273 Scale = 1;
2274 } else { // IndexReg Found.
2275 IndexReg = cast<X86MCExpr>(E)->getRegNo();
2277 if (BaseReg == X86::RIP)
2278 return ErrorOperand(
2279 Loc, "%rip as base register can not have an index register");
2280 if (IndexReg == X86::RIP)
2281 return ErrorOperand(Loc, "%rip is not allowed as an index register");
2283 if (parseOptionalToken(AsmToken::Comma)) {
2284 // Parse the scale amount:
2285 // ::= ',' [scale-expression]
2287 // A scale amount without an index is ignored.
2288 if (getLexer().isNot(AsmToken::RParen)) {
2289 int64_t ScaleVal;
2290 if (Parser.parseTokenLoc(Loc) ||
2291 Parser.parseAbsoluteExpression(ScaleVal))
2292 return ErrorOperand(Loc, "expected scale expression");
2293 Scale = (unsigned)ScaleVal;
2294 // Validate the scale amount.
2295 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2296 Scale != 1)
2297 return ErrorOperand(Loc,
2298 "scale factor in 16-bit address must be 1");
2299 if (checkScale(Scale, ErrMsg))
2300 return ErrorOperand(Loc, ErrMsg);
2307 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2308 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2309 return nullptr;
2311 // This is to support otherwise illegal operand (%dx) found in various
2312 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
2313 // be supported. Mark such DX variants separately fix only in special cases.
2314 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
2315 isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
2316 return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
2318 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2319 ErrMsg))
2320 return ErrorOperand(BaseLoc, ErrMsg);
2322 if (SegReg || BaseReg || IndexReg)
2323 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2324 IndexReg, Scale, StartLoc, EndLoc);
2325 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2328 // Parse either a standard primary expression or a register.
2329 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
2330 MCAsmParser &Parser = getParser();
2331 // See if this is a register first.
2332 if (getTok().is(AsmToken::Percent) ||
2333 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
2334 MatchRegisterName(Parser.getTok().getString()))) {
2335 SMLoc StartLoc = Parser.getTok().getLoc();
2336 unsigned RegNo;
2337 if (ParseRegister(RegNo, StartLoc, EndLoc))
2338 return true;
2339 Res = X86MCExpr::create(RegNo, Parser.getContext());
2340 return false;
2342 return Parser.parsePrimaryExpr(Res, EndLoc);
2345 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2346 SMLoc NameLoc, OperandVector &Operands) {
2347 MCAsmParser &Parser = getParser();
2348 InstInfo = &Info;
2350 // Reset the forced VEX encoding.
2351 ForcedVEXEncoding = VEXEncoding_Default;
2353 // Parse pseudo prefixes.
2354 while (1) {
2355 if (Name == "{") {
2356 if (getLexer().isNot(AsmToken::Identifier))
2357 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
2358 std::string Prefix = Parser.getTok().getString().lower();
2359 Parser.Lex(); // Eat identifier.
2360 if (getLexer().isNot(AsmToken::RCurly))
2361 return Error(Parser.getTok().getLoc(), "Expected '}'");
2362 Parser.Lex(); // Eat curly.
2364 if (Prefix == "vex2")
2365 ForcedVEXEncoding = VEXEncoding_VEX2;
2366 else if (Prefix == "vex3")
2367 ForcedVEXEncoding = VEXEncoding_VEX3;
2368 else if (Prefix == "evex")
2369 ForcedVEXEncoding = VEXEncoding_EVEX;
2370 else
2371 return Error(NameLoc, "unknown prefix");
2373 NameLoc = Parser.getTok().getLoc();
2374 if (getLexer().is(AsmToken::LCurly)) {
2375 Parser.Lex();
2376 Name = "{";
2377 } else {
2378 if (getLexer().isNot(AsmToken::Identifier))
2379 return Error(Parser.getTok().getLoc(), "Expected identifier");
2380 // FIXME: The mnemonic won't match correctly if its not in lower case.
2381 Name = Parser.getTok().getString();
2382 Parser.Lex();
2384 continue;
2387 break;
2390 StringRef PatchedName = Name;
2392 // Hack to skip "short" following Jcc.
2393 if (isParsingIntelSyntax() &&
2394 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
2395 PatchedName == "jcxz" || PatchedName == "jexcz" ||
2396 (PatchedName.startswith("j") &&
2397 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
2398 StringRef NextTok = Parser.getTok().getString();
2399 if (NextTok == "short") {
2400 SMLoc NameEndLoc =
2401 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2402 // Eat the short keyword.
2403 Parser.Lex();
2404 // MS and GAS ignore the short keyword; they both determine the jmp type
2405 // based on the distance of the label. (NASM does emit different code with
2406 // and without "short," though.)
2407 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2408 NextTok.size() + 1);
2412 // FIXME: Hack to recognize setneb as setne.
2413 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2414 PatchedName != "setb" && PatchedName != "setnb")
2415 PatchedName = PatchedName.substr(0, Name.size()-1);
2417 unsigned ComparisonPredicate = ~0U;
2419 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2420 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2421 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2422 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2423 bool IsVCMP = PatchedName[0] == 'v';
2424 unsigned CCIdx = IsVCMP ? 4 : 3;
2425 unsigned CC = StringSwitch<unsigned>(
2426 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2427 .Case("eq", 0x00)
2428 .Case("eq_oq", 0x00)
2429 .Case("lt", 0x01)
2430 .Case("lt_os", 0x01)
2431 .Case("le", 0x02)
2432 .Case("le_os", 0x02)
2433 .Case("unord", 0x03)
2434 .Case("unord_q", 0x03)
2435 .Case("neq", 0x04)
2436 .Case("neq_uq", 0x04)
2437 .Case("nlt", 0x05)
2438 .Case("nlt_us", 0x05)
2439 .Case("nle", 0x06)
2440 .Case("nle_us", 0x06)
2441 .Case("ord", 0x07)
2442 .Case("ord_q", 0x07)
2443 /* AVX only from here */
2444 .Case("eq_uq", 0x08)
2445 .Case("nge", 0x09)
2446 .Case("nge_us", 0x09)
2447 .Case("ngt", 0x0A)
2448 .Case("ngt_us", 0x0A)
2449 .Case("false", 0x0B)
2450 .Case("false_oq", 0x0B)
2451 .Case("neq_oq", 0x0C)
2452 .Case("ge", 0x0D)
2453 .Case("ge_os", 0x0D)
2454 .Case("gt", 0x0E)
2455 .Case("gt_os", 0x0E)
2456 .Case("true", 0x0F)
2457 .Case("true_uq", 0x0F)
2458 .Case("eq_os", 0x10)
2459 .Case("lt_oq", 0x11)
2460 .Case("le_oq", 0x12)
2461 .Case("unord_s", 0x13)
2462 .Case("neq_us", 0x14)
2463 .Case("nlt_uq", 0x15)
2464 .Case("nle_uq", 0x16)
2465 .Case("ord_s", 0x17)
2466 .Case("eq_us", 0x18)
2467 .Case("nge_uq", 0x19)
2468 .Case("ngt_uq", 0x1A)
2469 .Case("false_os", 0x1B)
2470 .Case("neq_os", 0x1C)
2471 .Case("ge_oq", 0x1D)
2472 .Case("gt_oq", 0x1E)
2473 .Case("true_us", 0x1F)
2474 .Default(~0U);
2475 if (CC != ~0U && (IsVCMP || CC < 8)) {
2476 if (PatchedName.endswith("ss"))
2477 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
2478 else if (PatchedName.endswith("sd"))
2479 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
2480 else if (PatchedName.endswith("ps"))
2481 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
2482 else if (PatchedName.endswith("pd"))
2483 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
2484 else
2485 llvm_unreachable("Unexpected suffix!");
2487 ComparisonPredicate = CC;
2491 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2492 if (PatchedName.startswith("vpcmp") &&
2493 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
2494 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
2495 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2496 unsigned CC = StringSwitch<unsigned>(
2497 PatchedName.slice(5, PatchedName.size() - SuffixSize))
2498 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2499 .Case("lt", 0x1)
2500 .Case("le", 0x2)
2501 //.Case("false", 0x3) // Not a documented alias.
2502 .Case("neq", 0x4)
2503 .Case("nlt", 0x5)
2504 .Case("nle", 0x6)
2505 //.Case("true", 0x7) // Not a documented alias.
2506 .Default(~0U);
2507 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
2508 switch (PatchedName.back()) {
2509 default: llvm_unreachable("Unexpected character!");
2510 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
2511 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
2512 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
2513 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
2515 // Set up the immediate to push into the operands later.
2516 ComparisonPredicate = CC;
2520 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2521 if (PatchedName.startswith("vpcom") &&
2522 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
2523 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
2524 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2525 unsigned CC = StringSwitch<unsigned>(
2526 PatchedName.slice(5, PatchedName.size() - SuffixSize))
2527 .Case("lt", 0x0)
2528 .Case("le", 0x1)
2529 .Case("gt", 0x2)
2530 .Case("ge", 0x3)
2531 .Case("eq", 0x4)
2532 .Case("neq", 0x5)
2533 .Case("false", 0x6)
2534 .Case("true", 0x7)
2535 .Default(~0U);
2536 if (CC != ~0U) {
2537 switch (PatchedName.back()) {
2538 default: llvm_unreachable("Unexpected character!");
2539 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
2540 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
2541 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
2542 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
2544 // Set up the immediate to push into the operands later.
2545 ComparisonPredicate = CC;
2550 // Determine whether this is an instruction prefix.
2551 // FIXME:
2552 // Enhance prefixes integrity robustness. for example, following forms
2553 // are currently tolerated:
2554 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
2555 // lock addq %rax, %rbx ; Destination operand must be of memory type
2556 // xacquire <insn> ; xacquire must be accompanied by 'lock'
2557 bool isPrefix = StringSwitch<bool>(Name)
2558 .Cases("rex64", "data32", "data16", true)
2559 .Cases("xacquire", "xrelease", true)
2560 .Cases("acquire", "release", isParsingIntelSyntax())
2561 .Default(false);
2563 auto isLockRepeatNtPrefix = [](StringRef N) {
2564 return StringSwitch<bool>(N)
2565 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
2566 .Default(false);
2569 bool CurlyAsEndOfStatement = false;
2571 unsigned Flags = X86::IP_NO_PREFIX;
2572 while (isLockRepeatNtPrefix(Name.lower())) {
2573 unsigned Prefix =
2574 StringSwitch<unsigned>(Name)
2575 .Cases("lock", "lock", X86::IP_HAS_LOCK)
2576 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
2577 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
2578 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
2579 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
2580 Flags |= Prefix;
2581 if (getLexer().is(AsmToken::EndOfStatement)) {
2582 // We don't have real instr with the given prefix
2583 // let's use the prefix as the instr.
2584 // TODO: there could be several prefixes one after another
2585 Flags = X86::IP_NO_PREFIX;
2586 break;
2588 // FIXME: The mnemonic won't match correctly if its not in lower case.
2589 Name = Parser.getTok().getString();
2590 Parser.Lex(); // eat the prefix
2591 // Hack: we could have something like "rep # some comment" or
2592 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
2593 while (Name.startswith(";") || Name.startswith("\n") ||
2594 Name.startswith("#") || Name.startswith("\t") ||
2595 Name.startswith("/")) {
2596 // FIXME: The mnemonic won't match correctly if its not in lower case.
2597 Name = Parser.getTok().getString();
2598 Parser.Lex(); // go to next prefix or instr
2602 if (Flags)
2603 PatchedName = Name;
2605 // Hacks to handle 'data16' and 'data32'
2606 if (PatchedName == "data16" && is16BitMode()) {
2607 return Error(NameLoc, "redundant data16 prefix");
2609 if (PatchedName == "data32") {
2610 if (is32BitMode())
2611 return Error(NameLoc, "redundant data32 prefix");
2612 if (is64BitMode())
2613 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
2614 // Hack to 'data16' for the table lookup.
2615 PatchedName = "data16";
2618 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2620 // Push the immediate if we extracted one from the mnemonic.
2621 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
2622 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
2623 getParser().getContext());
2624 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2627 // This does the actual operand parsing. Don't parse any more if we have a
2628 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2629 // just want to parse the "lock" as the first instruction and the "incl" as
2630 // the next one.
2631 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2632 // Parse '*' modifier.
2633 if (getLexer().is(AsmToken::Star))
2634 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2636 // Read the operands.
2637 while(1) {
2638 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2639 Operands.push_back(std::move(Op));
2640 if (HandleAVX512Operand(Operands, *Operands.back()))
2641 return true;
2642 } else {
2643 return true;
2645 // check for comma and eat it
2646 if (getLexer().is(AsmToken::Comma))
2647 Parser.Lex();
2648 else
2649 break;
2652 // In MS inline asm curly braces mark the beginning/end of a block,
2653 // therefore they should be interepreted as end of statement
2654 CurlyAsEndOfStatement =
2655 isParsingIntelSyntax() && isParsingInlineAsm() &&
2656 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2657 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2658 return TokError("unexpected token in argument list");
2661 // Push the immediate if we extracted one from the mnemonic.
2662 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
2663 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
2664 getParser().getContext());
2665 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2668 // Consume the EndOfStatement or the prefix separator Slash
2669 if (getLexer().is(AsmToken::EndOfStatement) ||
2670 (isPrefix && getLexer().is(AsmToken::Slash)))
2671 Parser.Lex();
2672 else if (CurlyAsEndOfStatement)
2673 // Add an actual EndOfStatement before the curly brace
2674 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2675 getLexer().getTok().getLoc(), 0);
2677 // This is for gas compatibility and cannot be done in td.
2678 // Adding "p" for some floating point with no argument.
2679 // For example: fsub --> fsubp
2680 bool IsFp =
2681 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2682 if (IsFp && Operands.size() == 1) {
2683 const char *Repl = StringSwitch<const char *>(Name)
2684 .Case("fsub", "fsubp")
2685 .Case("fdiv", "fdivp")
2686 .Case("fsubr", "fsubrp")
2687 .Case("fdivr", "fdivrp");
2688 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2691 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2692 (Operands.size() == 3)) {
2693 X86Operand &Op1 = (X86Operand &)*Operands[1];
2694 X86Operand &Op2 = (X86Operand &)*Operands[2];
2695 SMLoc Loc = Op1.getEndLoc();
2696 // Moving a 32 or 16 bit value into a segment register has the same
2697 // behavior. Modify such instructions to always take shorter form.
2698 if (Op1.isReg() && Op2.isReg() &&
2699 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2700 Op2.getReg()) &&
2701 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2702 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2703 // Change instruction name to match new instruction.
2704 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2705 Name = is16BitMode() ? "movw" : "movl";
2706 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2708 // Select the correct equivalent 16-/32-bit source register.
2709 unsigned Reg =
2710 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2711 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2715 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2716 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2717 // documented form in various unofficial manuals, so a lot of code uses it.
2718 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2719 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2720 Operands.size() == 3) {
2721 X86Operand &Op = (X86Operand &)*Operands.back();
2722 if (Op.isDXReg())
2723 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2724 Op.getEndLoc());
2726 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2727 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2728 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2729 Operands.size() == 3) {
2730 X86Operand &Op = (X86Operand &)*Operands[1];
2731 if (Op.isDXReg())
2732 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2733 Op.getEndLoc());
2736 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2737 bool HadVerifyError = false;
2739 // Append default arguments to "ins[bwld]"
2740 if (Name.startswith("ins") &&
2741 (Operands.size() == 1 || Operands.size() == 3) &&
2742 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2743 Name == "ins")) {
2745 AddDefaultSrcDestOperands(TmpOperands,
2746 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2747 DefaultMemDIOperand(NameLoc));
2748 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2751 // Append default arguments to "outs[bwld]"
2752 if (Name.startswith("outs") &&
2753 (Operands.size() == 1 || Operands.size() == 3) &&
2754 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2755 Name == "outsd" || Name == "outs")) {
2756 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2757 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2758 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2761 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2762 // values of $SIREG according to the mode. It would be nice if this
2763 // could be achieved with InstAlias in the tables.
2764 if (Name.startswith("lods") &&
2765 (Operands.size() == 1 || Operands.size() == 2) &&
2766 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2767 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2768 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2769 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2772 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2773 // values of $DIREG according to the mode. It would be nice if this
2774 // could be achieved with InstAlias in the tables.
2775 if (Name.startswith("stos") &&
2776 (Operands.size() == 1 || Operands.size() == 2) &&
2777 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2778 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2779 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2780 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2783 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2784 // values of $DIREG according to the mode. It would be nice if this
2785 // could be achieved with InstAlias in the tables.
2786 if (Name.startswith("scas") &&
2787 (Operands.size() == 1 || Operands.size() == 2) &&
2788 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2789 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2790 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2791 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2794 // Add default SI and DI operands to "cmps[bwlq]".
2795 if (Name.startswith("cmps") &&
2796 (Operands.size() == 1 || Operands.size() == 3) &&
2797 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2798 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2799 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2800 DefaultMemSIOperand(NameLoc));
2801 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2804 // Add default SI and DI operands to "movs[bwlq]".
2805 if (((Name.startswith("movs") &&
2806 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2807 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2808 (Name.startswith("smov") &&
2809 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2810 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2811 (Operands.size() == 1 || Operands.size() == 3)) {
2812 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2813 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2814 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2815 DefaultMemDIOperand(NameLoc));
2816 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2819 // Check if we encountered an error for one the string insturctions
2820 if (HadVerifyError) {
2821 return HadVerifyError;
2824 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2825 // "shift <op>".
2826 if ((Name.startswith("shr") || Name.startswith("sar") ||
2827 Name.startswith("shl") || Name.startswith("sal") ||
2828 Name.startswith("rcl") || Name.startswith("rcr") ||
2829 Name.startswith("rol") || Name.startswith("ror")) &&
2830 Operands.size() == 3) {
2831 if (isParsingIntelSyntax()) {
2832 // Intel syntax
2833 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2834 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2835 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2836 Operands.pop_back();
2837 } else {
2838 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2839 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2840 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2841 Operands.erase(Operands.begin() + 1);
2845 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2846 // instalias with an immediate operand yet.
2847 if (Name == "int" && Operands.size() == 2) {
2848 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2849 if (Op1.isImm())
2850 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2851 if (CE->getValue() == 3) {
2852 Operands.erase(Operands.begin() + 1);
2853 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2857 // Transforms "xlat mem8" into "xlatb"
2858 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2859 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2860 if (Op1.isMem8()) {
2861 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2862 "size, (R|E)BX will be used for the location");
2863 Operands.pop_back();
2864 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2868 if (Flags)
2869 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
2870 return false;
2873 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2874 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2876 switch (Inst.getOpcode()) {
2877 default: return false;
2878 case X86::VMOVZPQILo2PQIrr:
2879 case X86::VMOVAPDrr:
2880 case X86::VMOVAPDYrr:
2881 case X86::VMOVAPSrr:
2882 case X86::VMOVAPSYrr:
2883 case X86::VMOVDQArr:
2884 case X86::VMOVDQAYrr:
2885 case X86::VMOVDQUrr:
2886 case X86::VMOVDQUYrr:
2887 case X86::VMOVUPDrr:
2888 case X86::VMOVUPDYrr:
2889 case X86::VMOVUPSrr:
2890 case X86::VMOVUPSYrr: {
2891 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
2892 // the registers is extended, but other isn't.
2893 if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
2894 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
2895 MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
2896 return false;
2898 unsigned NewOpc;
2899 switch (Inst.getOpcode()) {
2900 default: llvm_unreachable("Invalid opcode");
2901 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
2902 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2903 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2904 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2905 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2906 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2907 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2908 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2909 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2910 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2911 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2912 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2913 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2915 Inst.setOpcode(NewOpc);
2916 return true;
2918 case X86::VMOVSDrr:
2919 case X86::VMOVSSrr: {
2920 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
2921 // the registers is extended, but other isn't.
2922 if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
2923 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
2924 MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
2925 return false;
2927 unsigned NewOpc;
2928 switch (Inst.getOpcode()) {
2929 default: llvm_unreachable("Invalid opcode");
2930 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2931 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2933 Inst.setOpcode(NewOpc);
2934 return true;
2939 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2940 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2942 switch (Inst.getOpcode()) {
2943 case X86::VGATHERDPDYrm:
2944 case X86::VGATHERDPDrm:
2945 case X86::VGATHERDPSYrm:
2946 case X86::VGATHERDPSrm:
2947 case X86::VGATHERQPDYrm:
2948 case X86::VGATHERQPDrm:
2949 case X86::VGATHERQPSYrm:
2950 case X86::VGATHERQPSrm:
2951 case X86::VPGATHERDDYrm:
2952 case X86::VPGATHERDDrm:
2953 case X86::VPGATHERDQYrm:
2954 case X86::VPGATHERDQrm:
2955 case X86::VPGATHERQDYrm:
2956 case X86::VPGATHERQDrm:
2957 case X86::VPGATHERQQYrm:
2958 case X86::VPGATHERQQrm: {
2959 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2960 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
2961 unsigned Index =
2962 MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
2963 if (Dest == Mask || Dest == Index || Mask == Index)
2964 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
2965 "registers should be distinct");
2966 break;
2968 case X86::VGATHERDPDZ128rm:
2969 case X86::VGATHERDPDZ256rm:
2970 case X86::VGATHERDPDZrm:
2971 case X86::VGATHERDPSZ128rm:
2972 case X86::VGATHERDPSZ256rm:
2973 case X86::VGATHERDPSZrm:
2974 case X86::VGATHERQPDZ128rm:
2975 case X86::VGATHERQPDZ256rm:
2976 case X86::VGATHERQPDZrm:
2977 case X86::VGATHERQPSZ128rm:
2978 case X86::VGATHERQPSZ256rm:
2979 case X86::VGATHERQPSZrm:
2980 case X86::VPGATHERDDZ128rm:
2981 case X86::VPGATHERDDZ256rm:
2982 case X86::VPGATHERDDZrm:
2983 case X86::VPGATHERDQZ128rm:
2984 case X86::VPGATHERDQZ256rm:
2985 case X86::VPGATHERDQZrm:
2986 case X86::VPGATHERQDZ128rm:
2987 case X86::VPGATHERQDZ256rm:
2988 case X86::VPGATHERQDZrm:
2989 case X86::VPGATHERQQZ128rm:
2990 case X86::VPGATHERQQZ256rm:
2991 case X86::VPGATHERQQZrm: {
2992 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2993 unsigned Index =
2994 MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
2995 if (Dest == Index)
2996 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
2997 "should be distinct");
2998 break;
3000 case X86::V4FMADDPSrm:
3001 case X86::V4FMADDPSrmk:
3002 case X86::V4FMADDPSrmkz:
3003 case X86::V4FMADDSSrm:
3004 case X86::V4FMADDSSrmk:
3005 case X86::V4FMADDSSrmkz:
3006 case X86::V4FNMADDPSrm:
3007 case X86::V4FNMADDPSrmk:
3008 case X86::V4FNMADDPSrmkz:
3009 case X86::V4FNMADDSSrm:
3010 case X86::V4FNMADDSSrmk:
3011 case X86::V4FNMADDSSrmkz:
3012 case X86::VP4DPWSSDSrm:
3013 case X86::VP4DPWSSDSrmk:
3014 case X86::VP4DPWSSDSrmkz:
3015 case X86::VP4DPWSSDrm:
3016 case X86::VP4DPWSSDrmk:
3017 case X86::VP4DPWSSDrmkz: {
3018 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3019 X86::AddrNumOperands - 1).getReg();
3020 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3021 if (Src2Enc % 4 != 0) {
3022 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3023 unsigned GroupStart = (Src2Enc / 4) * 4;
3024 unsigned GroupEnd = GroupStart + 3;
3025 return Warning(Ops[0]->getStartLoc(),
3026 "source register '" + RegName + "' implicitly denotes '" +
3027 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3028 RegName.take_front(3) + Twine(GroupEnd) +
3029 "' source group");
3031 break;
3035 return false;
3038 static const char *getSubtargetFeatureName(uint64_t Val);
3040 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
3041 MCStreamer &Out) {
3042 Out.EmitInstruction(Inst, getSTI());
3045 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3046 OperandVector &Operands,
3047 MCStreamer &Out, uint64_t &ErrorInfo,
3048 bool MatchingInlineAsm) {
3049 if (isParsingIntelSyntax())
3050 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
3051 MatchingInlineAsm);
3052 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
3053 MatchingInlineAsm);
3056 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
3057 OperandVector &Operands, MCStreamer &Out,
3058 bool MatchingInlineAsm) {
3059 // FIXME: This should be replaced with a real .td file alias mechanism.
3060 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
3061 // call.
3062 const char *Repl = StringSwitch<const char *>(Op.getToken())
3063 .Case("finit", "fninit")
3064 .Case("fsave", "fnsave")
3065 .Case("fstcw", "fnstcw")
3066 .Case("fstcww", "fnstcw")
3067 .Case("fstenv", "fnstenv")
3068 .Case("fstsw", "fnstsw")
3069 .Case("fstsww", "fnstsw")
3070 .Case("fclex", "fnclex")
3071 .Default(nullptr);
3072 if (Repl) {
3073 MCInst Inst;
3074 Inst.setOpcode(X86::WAIT);
3075 Inst.setLoc(IDLoc);
3076 if (!MatchingInlineAsm)
3077 EmitInstruction(Inst, Operands, Out);
3078 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
3082 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
3083 const FeatureBitset &MissingFeatures,
3084 bool MatchingInlineAsm) {
3085 assert(MissingFeatures.any() && "Unknown missing feature!");
3086 SmallString<126> Msg;
3087 raw_svector_ostream OS(Msg);
3088 OS << "instruction requires:";
3089 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
3090 if (MissingFeatures[i])
3091 OS << ' ' << getSubtargetFeatureName(i);
3093 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
3096 static unsigned getPrefixes(OperandVector &Operands) {
3097 unsigned Result = 0;
3098 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
3099 if (Prefix.isPrefix()) {
3100 Result = Prefix.getPrefix();
3101 Operands.pop_back();
3103 return Result;
3106 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3107 unsigned Opc = Inst.getOpcode();
3108 const MCInstrDesc &MCID = MII.get(Opc);
3110 if (ForcedVEXEncoding == VEXEncoding_EVEX &&
3111 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
3112 return Match_Unsupported;
3114 if ((ForcedVEXEncoding == VEXEncoding_VEX2 ||
3115 ForcedVEXEncoding == VEXEncoding_VEX3) &&
3116 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
3117 return Match_Unsupported;
3119 // These instructions match ambiguously with their VEX encoded counterparts
3120 // and appear first in the matching table. Reject them unless we're forcing
3121 // EVEX encoding.
3122 // FIXME: We really need a way to break the ambiguity.
3123 switch (Opc) {
3124 case X86::VCVTSD2SIZrm_Int:
3125 case X86::VCVTSD2SI64Zrm_Int:
3126 case X86::VCVTSS2SIZrm_Int:
3127 case X86::VCVTSS2SI64Zrm_Int:
3128 case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int:
3129 case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
3130 case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int:
3131 case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
3132 if (ForcedVEXEncoding != VEXEncoding_EVEX)
3133 return Match_Unsupported;
3136 return Match_Success;
3139 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
3140 OperandVector &Operands,
3141 MCStreamer &Out,
3142 uint64_t &ErrorInfo,
3143 bool MatchingInlineAsm) {
3144 assert(!Operands.empty() && "Unexpect empty operand list!");
3145 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
3146 SMRange EmptyRange = None;
3148 // First, handle aliases that expand to multiple instructions.
3149 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
3150 Out, MatchingInlineAsm);
3151 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3152 unsigned Prefixes = getPrefixes(Operands);
3154 MCInst Inst;
3156 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
3157 // encoder.
3158 if (ForcedVEXEncoding == VEXEncoding_VEX3)
3159 Prefixes |= X86::IP_USE_VEX3;
3161 if (Prefixes)
3162 Inst.setFlags(Prefixes);
3164 // First, try a direct match.
3165 FeatureBitset MissingFeatures;
3166 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
3167 MissingFeatures, MatchingInlineAsm,
3168 isParsingIntelSyntax());
3169 switch (OriginalError) {
3170 default: llvm_unreachable("Unexpected match result!");
3171 case Match_Success:
3172 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3173 return true;
3174 // Some instructions need post-processing to, for example, tweak which
3175 // encoding is selected. Loop on it while changes happen so the
3176 // individual transformations can chain off each other.
3177 if (!MatchingInlineAsm)
3178 while (processInstruction(Inst, Operands))
3181 Inst.setLoc(IDLoc);
3182 if (!MatchingInlineAsm)
3183 EmitInstruction(Inst, Operands, Out);
3184 Opcode = Inst.getOpcode();
3185 return false;
3186 case Match_InvalidImmUnsignedi4: {
3187 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
3188 if (ErrorLoc == SMLoc())
3189 ErrorLoc = IDLoc;
3190 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
3191 EmptyRange, MatchingInlineAsm);
3193 case Match_MissingFeature:
3194 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
3195 case Match_InvalidOperand:
3196 case Match_MnemonicFail:
3197 case Match_Unsupported:
3198 break;
3200 if (Op.getToken().empty()) {
3201 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
3202 MatchingInlineAsm);
3203 return true;
3206 // FIXME: Ideally, we would only attempt suffix matches for things which are
3207 // valid prefixes, and we could just infer the right unambiguous
3208 // type. However, that requires substantially more matcher support than the
3209 // following hack.
3211 // Change the operand to point to a temporary token.
3212 StringRef Base = Op.getToken();
3213 SmallString<16> Tmp;
3214 Tmp += Base;
3215 Tmp += ' ';
3216 Op.setTokenValue(Tmp);
3218 // If this instruction starts with an 'f', then it is a floating point stack
3219 // instruction. These come in up to three forms for 32-bit, 64-bit, and
3220 // 80-bit floating point, which use the suffixes s,l,t respectively.
3222 // Otherwise, we assume that this may be an integer instruction, which comes
3223 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
3224 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
3226 // Check for the various suffix matches.
3227 uint64_t ErrorInfoIgnore;
3228 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
3229 unsigned Match[4];
3231 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
3232 Tmp.back() = Suffixes[I];
3233 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3234 MissingFeatures, MatchingInlineAsm,
3235 isParsingIntelSyntax());
3236 // If this returned as a missing feature failure, remember that.
3237 if (Match[I] == Match_MissingFeature)
3238 ErrorInfoMissingFeatures = MissingFeatures;
3241 // Restore the old token.
3242 Op.setTokenValue(Base);
3244 // If exactly one matched, then we treat that as a successful match (and the
3245 // instruction will already have been filled in correctly, since the failing
3246 // matches won't have modified it).
3247 unsigned NumSuccessfulMatches =
3248 std::count(std::begin(Match), std::end(Match), Match_Success);
3249 if (NumSuccessfulMatches == 1) {
3250 Inst.setLoc(IDLoc);
3251 if (!MatchingInlineAsm)
3252 EmitInstruction(Inst, Operands, Out);
3253 Opcode = Inst.getOpcode();
3254 return false;
3257 // Otherwise, the match failed, try to produce a decent error message.
3259 // If we had multiple suffix matches, then identify this as an ambiguous
3260 // match.
3261 if (NumSuccessfulMatches > 1) {
3262 char MatchChars[4];
3263 unsigned NumMatches = 0;
3264 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
3265 if (Match[I] == Match_Success)
3266 MatchChars[NumMatches++] = Suffixes[I];
3268 SmallString<126> Msg;
3269 raw_svector_ostream OS(Msg);
3270 OS << "ambiguous instructions require an explicit suffix (could be ";
3271 for (unsigned i = 0; i != NumMatches; ++i) {
3272 if (i != 0)
3273 OS << ", ";
3274 if (i + 1 == NumMatches)
3275 OS << "or ";
3276 OS << "'" << Base << MatchChars[i] << "'";
3278 OS << ")";
3279 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
3280 return true;
3283 // Okay, we know that none of the variants matched successfully.
3285 // If all of the instructions reported an invalid mnemonic, then the original
3286 // mnemonic was invalid.
3287 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
3288 if (OriginalError == Match_MnemonicFail)
3289 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
3290 Op.getLocRange(), MatchingInlineAsm);
3292 if (OriginalError == Match_Unsupported)
3293 return Error(IDLoc, "unsupported instruction", EmptyRange,
3294 MatchingInlineAsm);
3296 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
3297 // Recover location info for the operand if we know which was the problem.
3298 if (ErrorInfo != ~0ULL) {
3299 if (ErrorInfo >= Operands.size())
3300 return Error(IDLoc, "too few operands for instruction", EmptyRange,
3301 MatchingInlineAsm);
3303 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
3304 if (Operand.getStartLoc().isValid()) {
3305 SMRange OperandRange = Operand.getLocRange();
3306 return Error(Operand.getStartLoc(), "invalid operand for instruction",
3307 OperandRange, MatchingInlineAsm);
3311 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3312 MatchingInlineAsm);
3315 // If one instruction matched as unsupported, report this as unsupported.
3316 if (std::count(std::begin(Match), std::end(Match),
3317 Match_Unsupported) == 1) {
3318 return Error(IDLoc, "unsupported instruction", EmptyRange,
3319 MatchingInlineAsm);
3322 // If one instruction matched with a missing feature, report this as a
3323 // missing feature.
3324 if (std::count(std::begin(Match), std::end(Match),
3325 Match_MissingFeature) == 1) {
3326 ErrorInfo = Match_MissingFeature;
3327 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
3328 MatchingInlineAsm);
3331 // If one instruction matched with an invalid operand, report this as an
3332 // operand failure.
3333 if (std::count(std::begin(Match), std::end(Match),
3334 Match_InvalidOperand) == 1) {
3335 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3336 MatchingInlineAsm);
3339 // If all of these were an outright failure, report it in a useless way.
3340 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
3341 EmptyRange, MatchingInlineAsm);
3342 return true;
3345 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
3346 OperandVector &Operands,
3347 MCStreamer &Out,
3348 uint64_t &ErrorInfo,
3349 bool MatchingInlineAsm) {
3350 assert(!Operands.empty() && "Unexpect empty operand list!");
3351 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
3352 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
3353 SMRange EmptyRange = None;
3354 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
3355 unsigned Prefixes = getPrefixes(Operands);
3357 // First, handle aliases that expand to multiple instructions.
3358 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
3359 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3361 MCInst Inst;
3363 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
3364 // encoder.
3365 if (ForcedVEXEncoding == VEXEncoding_VEX3)
3366 Prefixes |= X86::IP_USE_VEX3;
3368 if (Prefixes)
3369 Inst.setFlags(Prefixes);
3371 // Find one unsized memory operand, if present.
3372 X86Operand *UnsizedMemOp = nullptr;
3373 for (const auto &Op : Operands) {
3374 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
3375 if (X86Op->isMemUnsized()) {
3376 UnsizedMemOp = X86Op;
3377 // Have we found an unqualified memory operand,
3378 // break. IA allows only one memory operand.
3379 break;
3383 // Allow some instructions to have implicitly pointer-sized operands. This is
3384 // compatible with gas.
3385 if (UnsizedMemOp) {
3386 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
3387 for (const char *Instr : PtrSizedInstrs) {
3388 if (Mnemonic == Instr) {
3389 UnsizedMemOp->Mem.Size = getPointerWidth();
3390 break;
3395 SmallVector<unsigned, 8> Match;
3396 FeatureBitset ErrorInfoMissingFeatures;
3397 FeatureBitset MissingFeatures;
3399 // If unsized push has immediate operand we should default the default pointer
3400 // size for the size.
3401 if (Mnemonic == "push" && Operands.size() == 2) {
3402 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
3403 if (X86Op->isImm()) {
3404 // If it's not a constant fall through and let remainder take care of it.
3405 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
3406 unsigned Size = getPointerWidth();
3407 if (CE &&
3408 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
3409 SmallString<16> Tmp;
3410 Tmp += Base;
3411 Tmp += (is64BitMode())
3412 ? "q"
3413 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
3414 Op.setTokenValue(Tmp);
3415 // Do match in ATT mode to allow explicit suffix usage.
3416 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
3417 MissingFeatures, MatchingInlineAsm,
3418 false /*isParsingIntelSyntax()*/));
3419 Op.setTokenValue(Base);
3424 // If an unsized memory operand is present, try to match with each memory
3425 // operand size. In Intel assembly, the size is not part of the instruction
3426 // mnemonic.
3427 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
3428 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
3429 for (unsigned Size : MopSizes) {
3430 UnsizedMemOp->Mem.Size = Size;
3431 uint64_t ErrorInfoIgnore;
3432 unsigned LastOpcode = Inst.getOpcode();
3433 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3434 MissingFeatures, MatchingInlineAsm,
3435 isParsingIntelSyntax());
3436 if (Match.empty() || LastOpcode != Inst.getOpcode())
3437 Match.push_back(M);
3439 // If this returned as a missing feature failure, remember that.
3440 if (Match.back() == Match_MissingFeature)
3441 ErrorInfoMissingFeatures = MissingFeatures;
3444 // Restore the size of the unsized memory operand if we modified it.
3445 UnsizedMemOp->Mem.Size = 0;
3448 // If we haven't matched anything yet, this is not a basic integer or FPU
3449 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
3450 // matching with the unsized operand.
3451 if (Match.empty()) {
3452 Match.push_back(MatchInstruction(
3453 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
3454 isParsingIntelSyntax()));
3455 // If this returned as a missing feature failure, remember that.
3456 if (Match.back() == Match_MissingFeature)
3457 ErrorInfoMissingFeatures = MissingFeatures;
3460 // Restore the size of the unsized memory operand if we modified it.
3461 if (UnsizedMemOp)
3462 UnsizedMemOp->Mem.Size = 0;
3464 // If it's a bad mnemonic, all results will be the same.
3465 if (Match.back() == Match_MnemonicFail) {
3466 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3467 Op.getLocRange(), MatchingInlineAsm);
3470 unsigned NumSuccessfulMatches =
3471 std::count(std::begin(Match), std::end(Match), Match_Success);
3473 // If matching was ambiguous and we had size information from the frontend,
3474 // try again with that. This handles cases like "movxz eax, m8/m16".
3475 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3476 UnsizedMemOp->getMemFrontendSize()) {
3477 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3478 unsigned M = MatchInstruction(
3479 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
3480 isParsingIntelSyntax());
3481 if (M == Match_Success)
3482 NumSuccessfulMatches = 1;
3484 // Add a rewrite that encodes the size information we used from the
3485 // frontend.
3486 InstInfo->AsmRewrites->emplace_back(
3487 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3488 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3491 // If exactly one matched, then we treat that as a successful match (and the
3492 // instruction will already have been filled in correctly, since the failing
3493 // matches won't have modified it).
3494 if (NumSuccessfulMatches == 1) {
3495 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3496 return true;
3497 // Some instructions need post-processing to, for example, tweak which
3498 // encoding is selected. Loop on it while changes happen so the individual
3499 // transformations can chain off each other.
3500 if (!MatchingInlineAsm)
3501 while (processInstruction(Inst, Operands))
3503 Inst.setLoc(IDLoc);
3504 if (!MatchingInlineAsm)
3505 EmitInstruction(Inst, Operands, Out);
3506 Opcode = Inst.getOpcode();
3507 return false;
3508 } else if (NumSuccessfulMatches > 1) {
3509 assert(UnsizedMemOp &&
3510 "multiple matches only possible with unsized memory operands");
3511 return Error(UnsizedMemOp->getStartLoc(),
3512 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3513 UnsizedMemOp->getLocRange());
3516 // If one instruction matched as unsupported, report this as unsupported.
3517 if (std::count(std::begin(Match), std::end(Match),
3518 Match_Unsupported) == 1) {
3519 return Error(IDLoc, "unsupported instruction", EmptyRange,
3520 MatchingInlineAsm);
3523 // If one instruction matched with a missing feature, report this as a
3524 // missing feature.
3525 if (std::count(std::begin(Match), std::end(Match),
3526 Match_MissingFeature) == 1) {
3527 ErrorInfo = Match_MissingFeature;
3528 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
3529 MatchingInlineAsm);
3532 // If one instruction matched with an invalid operand, report this as an
3533 // operand failure.
3534 if (std::count(std::begin(Match), std::end(Match),
3535 Match_InvalidOperand) == 1) {
3536 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3537 MatchingInlineAsm);
3540 if (std::count(std::begin(Match), std::end(Match),
3541 Match_InvalidImmUnsignedi4) == 1) {
3542 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
3543 if (ErrorLoc == SMLoc())
3544 ErrorLoc = IDLoc;
3545 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
3546 EmptyRange, MatchingInlineAsm);
3549 // If all of these were an outright failure, report it in a useless way.
3550 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3551 MatchingInlineAsm);
3554 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3555 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3558 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3559 MCAsmParser &Parser = getParser();
3560 StringRef IDVal = DirectiveID.getIdentifier();
3561 if (IDVal.startswith(".code"))
3562 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3563 else if (IDVal.startswith(".att_syntax")) {
3564 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3565 if (Parser.getTok().getString() == "prefix")
3566 Parser.Lex();
3567 else if (Parser.getTok().getString() == "noprefix")
3568 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3569 "supported: registers must have a "
3570 "'%' prefix in .att_syntax");
3572 getParser().setAssemblerDialect(0);
3573 return false;
3574 } else if (IDVal.startswith(".intel_syntax")) {
3575 getParser().setAssemblerDialect(1);
3576 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3577 if (Parser.getTok().getString() == "noprefix")
3578 Parser.Lex();
3579 else if (Parser.getTok().getString() == "prefix")
3580 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3581 "supported: registers must not have "
3582 "a '%' prefix in .intel_syntax");
3584 return false;
3585 } else if (IDVal == ".even")
3586 return parseDirectiveEven(DirectiveID.getLoc());
3587 else if (IDVal == ".cv_fpo_proc")
3588 return parseDirectiveFPOProc(DirectiveID.getLoc());
3589 else if (IDVal == ".cv_fpo_setframe")
3590 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
3591 else if (IDVal == ".cv_fpo_pushreg")
3592 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
3593 else if (IDVal == ".cv_fpo_stackalloc")
3594 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
3595 else if (IDVal == ".cv_fpo_stackalign")
3596 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
3597 else if (IDVal == ".cv_fpo_endprologue")
3598 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
3599 else if (IDVal == ".cv_fpo_endproc")
3600 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
3601 else if (IDVal == ".seh_pushreg")
3602 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
3603 else if (IDVal == ".seh_setframe")
3604 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
3605 else if (IDVal == ".seh_savereg")
3606 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
3607 else if (IDVal == ".seh_savexmm")
3608 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
3609 else if (IDVal == ".seh_pushframe")
3610 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
3612 return true;
3615 /// parseDirectiveEven
3616 /// ::= .even
3617 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3618 if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
3619 return false;
3621 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3622 if (!Section) {
3623 getStreamer().InitSections(false);
3624 Section = getStreamer().getCurrentSectionOnly();
3626 if (Section->UseCodeAlign())
3627 getStreamer().EmitCodeAlignment(2, 0);
3628 else
3629 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3630 return false;
3633 /// ParseDirectiveCode
3634 /// ::= .code16 | .code32 | .code64
3635 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3636 MCAsmParser &Parser = getParser();
3637 Code16GCC = false;
3638 if (IDVal == ".code16") {
3639 Parser.Lex();
3640 if (!is16BitMode()) {
3641 SwitchMode(X86::Mode16Bit);
3642 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3644 } else if (IDVal == ".code16gcc") {
3645 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3646 Parser.Lex();
3647 Code16GCC = true;
3648 if (!is16BitMode()) {
3649 SwitchMode(X86::Mode16Bit);
3650 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3652 } else if (IDVal == ".code32") {
3653 Parser.Lex();
3654 if (!is32BitMode()) {
3655 SwitchMode(X86::Mode32Bit);
3656 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3658 } else if (IDVal == ".code64") {
3659 Parser.Lex();
3660 if (!is64BitMode()) {
3661 SwitchMode(X86::Mode64Bit);
3662 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3664 } else {
3665 Error(L, "unknown directive " + IDVal);
3666 return false;
3669 return false;
3672 // .cv_fpo_proc foo
3673 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
3674 MCAsmParser &Parser = getParser();
3675 StringRef ProcName;
3676 int64_t ParamsSize;
3677 if (Parser.parseIdentifier(ProcName))
3678 return Parser.TokError("expected symbol name");
3679 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
3680 return true;
3681 if (!isUIntN(32, ParamsSize))
3682 return Parser.TokError("parameters size out of range");
3683 if (Parser.parseEOL("unexpected tokens"))
3684 return addErrorSuffix(" in '.cv_fpo_proc' directive");
3685 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
3686 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
3689 // .cv_fpo_setframe ebp
3690 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
3691 MCAsmParser &Parser = getParser();
3692 unsigned Reg;
3693 SMLoc DummyLoc;
3694 if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3695 Parser.parseEOL("unexpected tokens"))
3696 return addErrorSuffix(" in '.cv_fpo_setframe' directive");
3697 return getTargetStreamer().emitFPOSetFrame(Reg, L);
3700 // .cv_fpo_pushreg ebx
3701 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
3702 MCAsmParser &Parser = getParser();
3703 unsigned Reg;
3704 SMLoc DummyLoc;
3705 if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3706 Parser.parseEOL("unexpected tokens"))
3707 return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
3708 return getTargetStreamer().emitFPOPushReg(Reg, L);
3711 // .cv_fpo_stackalloc 20
3712 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
3713 MCAsmParser &Parser = getParser();
3714 int64_t Offset;
3715 if (Parser.parseIntToken(Offset, "expected offset") ||
3716 Parser.parseEOL("unexpected tokens"))
3717 return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
3718 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
3721 // .cv_fpo_stackalign 8
3722 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
3723 MCAsmParser &Parser = getParser();
3724 int64_t Offset;
3725 if (Parser.parseIntToken(Offset, "expected offset") ||
3726 Parser.parseEOL("unexpected tokens"))
3727 return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
3728 return getTargetStreamer().emitFPOStackAlign(Offset, L);
3731 // .cv_fpo_endprologue
3732 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
3733 MCAsmParser &Parser = getParser();
3734 if (Parser.parseEOL("unexpected tokens"))
3735 return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
3736 return getTargetStreamer().emitFPOEndPrologue(L);
3739 // .cv_fpo_endproc
3740 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
3741 MCAsmParser &Parser = getParser();
3742 if (Parser.parseEOL("unexpected tokens"))
3743 return addErrorSuffix(" in '.cv_fpo_endproc' directive");
3744 return getTargetStreamer().emitFPOEndProc(L);
3747 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
3748 unsigned &RegNo) {
3749 SMLoc startLoc = getLexer().getLoc();
3750 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3752 // Try parsing the argument as a register first.
3753 if (getLexer().getTok().isNot(AsmToken::Integer)) {
3754 SMLoc endLoc;
3755 if (ParseRegister(RegNo, startLoc, endLoc))
3756 return true;
3758 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
3759 return Error(startLoc,
3760 "register is not supported for use with this directive");
3762 } else {
3763 // Otherwise, an integer number matching the encoding of the desired
3764 // register may appear.
3765 int64_t EncodedReg;
3766 if (getParser().parseAbsoluteExpression(EncodedReg))
3767 return true;
3769 // The SEH register number is the same as the encoding register number. Map
3770 // from the encoding back to the LLVM register number.
3771 RegNo = 0;
3772 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
3773 if (MRI->getEncodingValue(Reg) == EncodedReg) {
3774 RegNo = Reg;
3775 break;
3778 if (RegNo == 0) {
3779 return Error(startLoc,
3780 "incorrect register number for use with this directive");
3784 return false;
3787 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
3788 unsigned Reg = 0;
3789 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
3790 return true;
3792 if (getLexer().isNot(AsmToken::EndOfStatement))
3793 return TokError("unexpected token in directive");
3795 getParser().Lex();
3796 getStreamer().EmitWinCFIPushReg(Reg, Loc);
3797 return false;
3800 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
3801 unsigned Reg = 0;
3802 int64_t Off;
3803 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
3804 return true;
3805 if (getLexer().isNot(AsmToken::Comma))
3806 return TokError("you must specify a stack pointer offset");
3808 getParser().Lex();
3809 if (getParser().parseAbsoluteExpression(Off))
3810 return true;
3812 if (getLexer().isNot(AsmToken::EndOfStatement))
3813 return TokError("unexpected token in directive");
3815 getParser().Lex();
3816 getStreamer().EmitWinCFISetFrame(Reg, Off, Loc);
3817 return false;
3820 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
3821 unsigned Reg = 0;
3822 int64_t Off;
3823 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
3824 return true;
3825 if (getLexer().isNot(AsmToken::Comma))
3826 return TokError("you must specify an offset on the stack");
3828 getParser().Lex();
3829 if (getParser().parseAbsoluteExpression(Off))
3830 return true;
3832 if (getLexer().isNot(AsmToken::EndOfStatement))
3833 return TokError("unexpected token in directive");
3835 getParser().Lex();
3836 getStreamer().EmitWinCFISaveReg(Reg, Off, Loc);
3837 return false;
3840 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
3841 unsigned Reg = 0;
3842 int64_t Off;
3843 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
3844 return true;
3845 if (getLexer().isNot(AsmToken::Comma))
3846 return TokError("you must specify an offset on the stack");
3848 getParser().Lex();
3849 if (getParser().parseAbsoluteExpression(Off))
3850 return true;
3852 if (getLexer().isNot(AsmToken::EndOfStatement))
3853 return TokError("unexpected token in directive");
3855 getParser().Lex();
3856 getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc);
3857 return false;
3860 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
3861 bool Code = false;
3862 StringRef CodeID;
3863 if (getLexer().is(AsmToken::At)) {
3864 SMLoc startLoc = getLexer().getLoc();
3865 getParser().Lex();
3866 if (!getParser().parseIdentifier(CodeID)) {
3867 if (CodeID != "code")
3868 return Error(startLoc, "expected @code");
3869 Code = true;
3873 if (getLexer().isNot(AsmToken::EndOfStatement))
3874 return TokError("unexpected token in directive");
3876 getParser().Lex();
3877 getStreamer().EmitWinCFIPushFrame(Code, Loc);
3878 return false;
3881 // Force static initialization.
3882 extern "C" void LLVMInitializeX86AsmParser() {
3883 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3884 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3887 #define GET_REGISTER_MATCHER
3888 #define GET_MATCHER_IMPLEMENTATION
3889 #define GET_SUBTARGET_FEATURE_NAME
3890 #include "X86GenAsmMatcher.inc"