1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Target/TargetAsmParser.h"
12 #include "X86Subtarget.h"
13 #include "llvm/Target/TargetRegistry.h"
14 #include "llvm/Target/TargetAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCParser/MCAsmLexer.h"
19 #include "llvm/MC/MCParser/MCAsmParser.h"
20 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include "llvm/Support/raw_ostream.h"
33 class X86ATTAsmParser
: public TargetAsmParser
{
41 MCAsmParser
&getParser() const { return Parser
; }
43 MCAsmLexer
&getLexer() const { return Parser
.getLexer(); }
45 bool Error(SMLoc L
, const Twine
&Msg
) { return Parser
.Error(L
, Msg
); }
47 X86Operand
*ParseOperand();
48 X86Operand
*ParseMemOperand(unsigned SegReg
, SMLoc StartLoc
);
50 bool ParseDirectiveWord(unsigned Size
, SMLoc L
);
52 bool MatchAndEmitInstruction(SMLoc IDLoc
,
53 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
,
56 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
57 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
58 bool isSrcOp(X86Operand
&Op
);
60 /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
61 /// or %es:(%edi) in 32bit mode.
62 bool isDstOp(X86Operand
&Op
);
64 /// @name Auto-generated Matcher Functions
67 #define GET_ASSEMBLER_HEADER
68 #include "X86GenAsmMatcher.inc"
73 X86ATTAsmParser(const Target
&T
, MCAsmParser
&parser
, TargetMachine
&TM
)
74 : TargetAsmParser(T
), Parser(parser
), TM(TM
) {
76 // Initialize the set of available features.
77 setAvailableFeatures(ComputeAvailableFeatures(
78 &TM
.getSubtarget
<X86Subtarget
>()));
80 virtual bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
);
82 virtual bool ParseInstruction(StringRef Name
, SMLoc NameLoc
,
83 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
);
85 virtual bool ParseDirective(AsmToken DirectiveID
);
88 class X86_32ATTAsmParser
: public X86ATTAsmParser
{
90 X86_32ATTAsmParser(const Target
&T
, MCAsmParser
&Parser
, TargetMachine
&TM
)
91 : X86ATTAsmParser(T
, Parser
, TM
) {
96 class X86_64ATTAsmParser
: public X86ATTAsmParser
{
98 X86_64ATTAsmParser(const Target
&T
, MCAsmParser
&Parser
, TargetMachine
&TM
)
99 : X86ATTAsmParser(T
, Parser
, TM
) {
104 } // end anonymous namespace
106 /// @name Auto-generated Match Functions
109 static unsigned MatchRegisterName(StringRef Name
);
115 /// X86Operand - Instances of this class represent a parsed X86 machine
117 struct X86Operand
: public MCParsedAsmOperand
{
125 SMLoc StartLoc
, EndLoc
;
150 X86Operand(KindTy K
, SMLoc Start
, SMLoc End
)
151 : Kind(K
), StartLoc(Start
), EndLoc(End
) {}
153 /// getStartLoc - Get the location of the first token of this operand.
154 SMLoc
getStartLoc() const { return StartLoc
; }
155 /// getEndLoc - Get the location of the last token of this operand.
156 SMLoc
getEndLoc() const { return EndLoc
; }
158 virtual void dump(raw_ostream
&OS
) const {}
160 StringRef
getToken() const {
161 assert(Kind
== Token
&& "Invalid access!");
162 return StringRef(Tok
.Data
, Tok
.Length
);
164 void setTokenValue(StringRef Value
) {
165 assert(Kind
== Token
&& "Invalid access!");
166 Tok
.Data
= Value
.data();
167 Tok
.Length
= Value
.size();
170 unsigned getReg() const {
171 assert(Kind
== Register
&& "Invalid access!");
175 const MCExpr
*getImm() const {
176 assert(Kind
== Immediate
&& "Invalid access!");
180 const MCExpr
*getMemDisp() const {
181 assert(Kind
== Memory
&& "Invalid access!");
184 unsigned getMemSegReg() const {
185 assert(Kind
== Memory
&& "Invalid access!");
188 unsigned getMemBaseReg() const {
189 assert(Kind
== Memory
&& "Invalid access!");
192 unsigned getMemIndexReg() const {
193 assert(Kind
== Memory
&& "Invalid access!");
196 unsigned getMemScale() const {
197 assert(Kind
== Memory
&& "Invalid access!");
201 bool isToken() const {return Kind
== Token
; }
203 bool isImm() const { return Kind
== Immediate
; }
205 bool isImmSExti16i8() const {
209 // If this isn't a constant expr, just assume it fits and let relaxation
211 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
215 // Otherwise, check the value is in a range that makes sense for this
217 uint64_t Value
= CE
->getValue();
218 return (( Value
<= 0x000000000000007FULL
)||
219 (0x000000000000FF80ULL
<= Value
&& Value
<= 0x000000000000FFFFULL
)||
220 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
222 bool isImmSExti32i8() const {
226 // If this isn't a constant expr, just assume it fits and let relaxation
228 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
232 // Otherwise, check the value is in a range that makes sense for this
234 uint64_t Value
= CE
->getValue();
235 return (( Value
<= 0x000000000000007FULL
)||
236 (0x00000000FFFFFF80ULL
<= Value
&& Value
<= 0x00000000FFFFFFFFULL
)||
237 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
239 bool isImmSExti64i8() const {
243 // If this isn't a constant expr, just assume it fits and let relaxation
245 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
249 // Otherwise, check the value is in a range that makes sense for this
251 uint64_t Value
= CE
->getValue();
252 return (( Value
<= 0x000000000000007FULL
)||
253 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
255 bool isImmSExti64i32() const {
259 // If this isn't a constant expr, just assume it fits and let relaxation
261 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
265 // Otherwise, check the value is in a range that makes sense for this
267 uint64_t Value
= CE
->getValue();
268 return (( Value
<= 0x000000007FFFFFFFULL
)||
269 (0xFFFFFFFF80000000ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
272 bool isMem() const { return Kind
== Memory
; }
274 bool isAbsMem() const {
275 return Kind
== Memory
&& !getMemSegReg() && !getMemBaseReg() &&
276 !getMemIndexReg() && getMemScale() == 1;
279 bool isReg() const { return Kind
== Register
; }
281 void addExpr(MCInst
&Inst
, const MCExpr
*Expr
) const {
282 // Add as immediates when possible.
283 if (const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(Expr
))
284 Inst
.addOperand(MCOperand::CreateImm(CE
->getValue()));
286 Inst
.addOperand(MCOperand::CreateExpr(Expr
));
289 void addRegOperands(MCInst
&Inst
, unsigned N
) const {
290 assert(N
== 1 && "Invalid number of operands!");
291 Inst
.addOperand(MCOperand::CreateReg(getReg()));
294 void addImmOperands(MCInst
&Inst
, unsigned N
) const {
295 assert(N
== 1 && "Invalid number of operands!");
296 addExpr(Inst
, getImm());
299 void addMemOperands(MCInst
&Inst
, unsigned N
) const {
300 assert((N
== 5) && "Invalid number of operands!");
301 Inst
.addOperand(MCOperand::CreateReg(getMemBaseReg()));
302 Inst
.addOperand(MCOperand::CreateImm(getMemScale()));
303 Inst
.addOperand(MCOperand::CreateReg(getMemIndexReg()));
304 addExpr(Inst
, getMemDisp());
305 Inst
.addOperand(MCOperand::CreateReg(getMemSegReg()));
308 void addAbsMemOperands(MCInst
&Inst
, unsigned N
) const {
309 assert((N
== 1) && "Invalid number of operands!");
310 Inst
.addOperand(MCOperand::CreateExpr(getMemDisp()));
313 static X86Operand
*CreateToken(StringRef Str
, SMLoc Loc
) {
314 X86Operand
*Res
= new X86Operand(Token
, Loc
, Loc
);
315 Res
->Tok
.Data
= Str
.data();
316 Res
->Tok
.Length
= Str
.size();
320 static X86Operand
*CreateReg(unsigned RegNo
, SMLoc StartLoc
, SMLoc EndLoc
) {
321 X86Operand
*Res
= new X86Operand(Register
, StartLoc
, EndLoc
);
322 Res
->Reg
.RegNo
= RegNo
;
326 static X86Operand
*CreateImm(const MCExpr
*Val
, SMLoc StartLoc
, SMLoc EndLoc
){
327 X86Operand
*Res
= new X86Operand(Immediate
, StartLoc
, EndLoc
);
332 /// Create an absolute memory operand.
333 static X86Operand
*CreateMem(const MCExpr
*Disp
, SMLoc StartLoc
,
335 X86Operand
*Res
= new X86Operand(Memory
, StartLoc
, EndLoc
);
337 Res
->Mem
.Disp
= Disp
;
338 Res
->Mem
.BaseReg
= 0;
339 Res
->Mem
.IndexReg
= 0;
344 /// Create a generalized memory operand.
345 static X86Operand
*CreateMem(unsigned SegReg
, const MCExpr
*Disp
,
346 unsigned BaseReg
, unsigned IndexReg
,
347 unsigned Scale
, SMLoc StartLoc
, SMLoc EndLoc
) {
348 // We should never just have a displacement, that should be parsed as an
349 // absolute memory operand.
350 assert((SegReg
|| BaseReg
|| IndexReg
) && "Invalid memory operand!");
352 // The scale should always be one of {1,2,4,8}.
353 assert(((Scale
== 1 || Scale
== 2 || Scale
== 4 || Scale
== 8)) &&
355 X86Operand
*Res
= new X86Operand(Memory
, StartLoc
, EndLoc
);
356 Res
->Mem
.SegReg
= SegReg
;
357 Res
->Mem
.Disp
= Disp
;
358 Res
->Mem
.BaseReg
= BaseReg
;
359 Res
->Mem
.IndexReg
= IndexReg
;
360 Res
->Mem
.Scale
= Scale
;
365 } // end anonymous namespace.
367 bool X86ATTAsmParser::isSrcOp(X86Operand
&Op
) {
368 unsigned basereg
= Is64Bit
? X86::RSI
: X86::ESI
;
370 return (Op
.isMem() &&
371 (Op
.Mem
.SegReg
== 0 || Op
.Mem
.SegReg
== X86::DS
) &&
372 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
373 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
374 Op
.Mem
.BaseReg
== basereg
&& Op
.Mem
.IndexReg
== 0);
377 bool X86ATTAsmParser::isDstOp(X86Operand
&Op
) {
378 unsigned basereg
= Is64Bit
? X86::RDI
: X86::EDI
;
380 return Op
.isMem() && Op
.Mem
.SegReg
== X86::ES
&&
381 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
382 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
383 Op
.Mem
.BaseReg
== basereg
&& Op
.Mem
.IndexReg
== 0;
386 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo
,
387 SMLoc
&StartLoc
, SMLoc
&EndLoc
) {
389 const AsmToken
&TokPercent
= Parser
.getTok();
390 assert(TokPercent
.is(AsmToken::Percent
) && "Invalid token kind!");
391 StartLoc
= TokPercent
.getLoc();
392 Parser
.Lex(); // Eat percent token.
394 const AsmToken
&Tok
= Parser
.getTok();
395 if (Tok
.isNot(AsmToken::Identifier
))
396 return Error(Tok
.getLoc(), "invalid register name");
398 // FIXME: Validate register for the current architecture; we have to do
399 // validation later, so maybe there is no need for this here.
400 RegNo
= MatchRegisterName(Tok
.getString());
402 // If the match failed, try the register name as lowercase.
404 RegNo
= MatchRegisterName(LowercaseString(Tok
.getString()));
406 // FIXME: This should be done using Requires<In32BitMode> and
407 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
408 // can be also checked.
409 if (RegNo
== X86::RIZ
&& !Is64Bit
)
410 return Error(Tok
.getLoc(), "riz register in 64-bit mode only");
412 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
413 if (RegNo
== 0 && (Tok
.getString() == "st" || Tok
.getString() == "ST")) {
415 EndLoc
= Tok
.getLoc();
416 Parser
.Lex(); // Eat 'st'
418 // Check to see if we have '(4)' after %st.
419 if (getLexer().isNot(AsmToken::LParen
))
424 const AsmToken
&IntTok
= Parser
.getTok();
425 if (IntTok
.isNot(AsmToken::Integer
))
426 return Error(IntTok
.getLoc(), "expected stack index");
427 switch (IntTok
.getIntVal()) {
428 case 0: RegNo
= X86::ST0
; break;
429 case 1: RegNo
= X86::ST1
; break;
430 case 2: RegNo
= X86::ST2
; break;
431 case 3: RegNo
= X86::ST3
; break;
432 case 4: RegNo
= X86::ST4
; break;
433 case 5: RegNo
= X86::ST5
; break;
434 case 6: RegNo
= X86::ST6
; break;
435 case 7: RegNo
= X86::ST7
; break;
436 default: return Error(IntTok
.getLoc(), "invalid stack index");
439 if (getParser().Lex().isNot(AsmToken::RParen
))
440 return Error(Parser
.getTok().getLoc(), "expected ')'");
442 EndLoc
= Tok
.getLoc();
443 Parser
.Lex(); // Eat ')'
447 // If this is "db[0-7]", match it as an alias
449 if (RegNo
== 0 && Tok
.getString().size() == 3 &&
450 Tok
.getString().startswith("db")) {
451 switch (Tok
.getString()[2]) {
452 case '0': RegNo
= X86::DR0
; break;
453 case '1': RegNo
= X86::DR1
; break;
454 case '2': RegNo
= X86::DR2
; break;
455 case '3': RegNo
= X86::DR3
; break;
456 case '4': RegNo
= X86::DR4
; break;
457 case '5': RegNo
= X86::DR5
; break;
458 case '6': RegNo
= X86::DR6
; break;
459 case '7': RegNo
= X86::DR7
; break;
463 EndLoc
= Tok
.getLoc();
464 Parser
.Lex(); // Eat it.
470 return Error(Tok
.getLoc(), "invalid register name");
472 EndLoc
= Tok
.getLoc();
473 Parser
.Lex(); // Eat identifier token.
477 X86Operand
*X86ATTAsmParser::ParseOperand() {
478 switch (getLexer().getKind()) {
480 // Parse a memory operand with no segment register.
481 return ParseMemOperand(0, Parser
.getTok().getLoc());
482 case AsmToken::Percent
: {
483 // Read the register.
486 if (ParseRegister(RegNo
, Start
, End
)) return 0;
487 if (RegNo
== X86::EIZ
|| RegNo
== X86::RIZ
) {
488 Error(Start
, "eiz and riz can only be used as index registers");
492 // If this is a segment register followed by a ':', then this is the start
493 // of a memory reference, otherwise this is a normal register reference.
494 if (getLexer().isNot(AsmToken::Colon
))
495 return X86Operand::CreateReg(RegNo
, Start
, End
);
498 getParser().Lex(); // Eat the colon.
499 return ParseMemOperand(RegNo
, Start
);
501 case AsmToken::Dollar
: {
503 SMLoc Start
= Parser
.getTok().getLoc(), End
;
506 if (getParser().ParseExpression(Val
, End
))
508 return X86Operand::CreateImm(Val
, Start
, End
);
513 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
514 /// has already been parsed if present.
515 X86Operand
*X86ATTAsmParser::ParseMemOperand(unsigned SegReg
, SMLoc MemStart
) {
517 // We have to disambiguate a parenthesized expression "(4+5)" from the start
518 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
519 // only way to do this without lookahead is to eat the '(' and see what is
521 const MCExpr
*Disp
= MCConstantExpr::Create(0, getParser().getContext());
522 if (getLexer().isNot(AsmToken::LParen
)) {
524 if (getParser().ParseExpression(Disp
, ExprEnd
)) return 0;
526 // After parsing the base expression we could either have a parenthesized
527 // memory address or not. If not, return now. If so, eat the (.
528 if (getLexer().isNot(AsmToken::LParen
)) {
529 // Unless we have a segment register, treat this as an immediate.
531 return X86Operand::CreateMem(Disp
, MemStart
, ExprEnd
);
532 return X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1, MemStart
, ExprEnd
);
538 // Okay, we have a '('. We don't know if this is an expression or not, but
539 // so we have to eat the ( to see beyond it.
540 SMLoc LParenLoc
= Parser
.getTok().getLoc();
541 Parser
.Lex(); // Eat the '('.
543 if (getLexer().is(AsmToken::Percent
) || getLexer().is(AsmToken::Comma
)) {
544 // Nothing to do here, fall into the code below with the '(' part of the
545 // memory operand consumed.
549 // It must be an parenthesized expression, parse it now.
550 if (getParser().ParseParenExpression(Disp
, ExprEnd
))
553 // After parsing the base expression we could either have a parenthesized
554 // memory address or not. If not, return now. If so, eat the (.
555 if (getLexer().isNot(AsmToken::LParen
)) {
556 // Unless we have a segment register, treat this as an immediate.
558 return X86Operand::CreateMem(Disp
, LParenLoc
, ExprEnd
);
559 return X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1, MemStart
, ExprEnd
);
567 // If we reached here, then we just ate the ( of the memory operand. Process
568 // the rest of the memory operand.
569 unsigned BaseReg
= 0, IndexReg
= 0, Scale
= 1;
571 if (getLexer().is(AsmToken::Percent
)) {
573 if (ParseRegister(BaseReg
, L
, L
)) return 0;
574 if (BaseReg
== X86::EIZ
|| BaseReg
== X86::RIZ
) {
575 Error(L
, "eiz and riz can only be used as index registers");
580 if (getLexer().is(AsmToken::Comma
)) {
581 Parser
.Lex(); // Eat the comma.
583 // Following the comma we should have either an index register, or a scale
584 // value. We don't support the later form, but we want to parse it
587 // Not that even though it would be completely consistent to support syntax
588 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
589 if (getLexer().is(AsmToken::Percent
)) {
591 if (ParseRegister(IndexReg
, L
, L
)) return 0;
593 if (getLexer().isNot(AsmToken::RParen
)) {
594 // Parse the scale amount:
595 // ::= ',' [scale-expression]
596 if (getLexer().isNot(AsmToken::Comma
)) {
597 Error(Parser
.getTok().getLoc(),
598 "expected comma in scale expression");
601 Parser
.Lex(); // Eat the comma.
603 if (getLexer().isNot(AsmToken::RParen
)) {
604 SMLoc Loc
= Parser
.getTok().getLoc();
607 if (getParser().ParseAbsoluteExpression(ScaleVal
))
610 // Validate the scale amount.
611 if (ScaleVal
!= 1 && ScaleVal
!= 2 && ScaleVal
!= 4 && ScaleVal
!= 8){
612 Error(Loc
, "scale factor in address must be 1, 2, 4 or 8");
615 Scale
= (unsigned)ScaleVal
;
618 } else if (getLexer().isNot(AsmToken::RParen
)) {
619 // A scale amount without an index is ignored.
621 SMLoc Loc
= Parser
.getTok().getLoc();
624 if (getParser().ParseAbsoluteExpression(Value
))
628 Warning(Loc
, "scale factor without index register is ignored");
633 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
634 if (getLexer().isNot(AsmToken::RParen
)) {
635 Error(Parser
.getTok().getLoc(), "unexpected token in memory operand");
638 SMLoc MemEnd
= Parser
.getTok().getLoc();
639 Parser
.Lex(); // Eat the ')'.
641 return X86Operand::CreateMem(SegReg
, Disp
, BaseReg
, IndexReg
, Scale
,
645 bool X86ATTAsmParser::
646 ParseInstruction(StringRef Name
, SMLoc NameLoc
,
647 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
) {
648 StringRef PatchedName
= Name
;
650 // FIXME: Hack to recognize setneb as setne.
651 if (PatchedName
.startswith("set") && PatchedName
.endswith("b") &&
652 PatchedName
!= "setb" && PatchedName
!= "setnb")
653 PatchedName
= PatchedName
.substr(0, Name
.size()-1);
655 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
656 const MCExpr
*ExtraImmOp
= 0;
657 if ((PatchedName
.startswith("cmp") || PatchedName
.startswith("vcmp")) &&
658 (PatchedName
.endswith("ss") || PatchedName
.endswith("sd") ||
659 PatchedName
.endswith("ps") || PatchedName
.endswith("pd"))) {
660 bool IsVCMP
= PatchedName
.startswith("vcmp");
661 unsigned SSECCIdx
= IsVCMP
? 4 : 3;
662 unsigned SSEComparisonCode
= StringSwitch
<unsigned>(
663 PatchedName
.slice(SSECCIdx
, PatchedName
.size() - 2))
676 .Case("neq_oq", 0x0C)
683 .Case("unord_s", 0x13)
684 .Case("neq_us", 0x14)
685 .Case("nlt_uq", 0x15)
686 .Case("nle_uq", 0x16)
689 .Case("nge_uq", 0x19)
690 .Case("ngt_uq", 0x1A)
691 .Case("false_os", 0x1B)
692 .Case("neq_os", 0x1C)
695 .Case("true_us", 0x1F)
697 if (SSEComparisonCode
!= ~0U) {
698 ExtraImmOp
= MCConstantExpr::Create(SSEComparisonCode
,
699 getParser().getContext());
700 if (PatchedName
.endswith("ss")) {
701 PatchedName
= IsVCMP
? "vcmpss" : "cmpss";
702 } else if (PatchedName
.endswith("sd")) {
703 PatchedName
= IsVCMP
? "vcmpsd" : "cmpsd";
704 } else if (PatchedName
.endswith("ps")) {
705 PatchedName
= IsVCMP
? "vcmpps" : "cmpps";
707 assert(PatchedName
.endswith("pd") && "Unexpected mnemonic!");
708 PatchedName
= IsVCMP
? "vcmppd" : "cmppd";
713 // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
714 if (PatchedName
.startswith("vpclmul")) {
715 unsigned CLMULQuadWordSelect
= StringSwitch
<unsigned>(
716 PatchedName
.slice(7, PatchedName
.size() - 2))
717 .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
718 .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
719 .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
720 .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
722 if (CLMULQuadWordSelect
!= ~0U) {
723 ExtraImmOp
= MCConstantExpr::Create(CLMULQuadWordSelect
,
724 getParser().getContext());
725 assert(PatchedName
.endswith("dq") && "Unexpected mnemonic!");
726 PatchedName
= "vpclmulqdq";
730 Operands
.push_back(X86Operand::CreateToken(PatchedName
, NameLoc
));
733 Operands
.push_back(X86Operand::CreateImm(ExtraImmOp
, NameLoc
, NameLoc
));
736 // Determine whether this is an instruction prefix.
738 Name
== "lock" || Name
== "rep" ||
739 Name
== "repe" || Name
== "repz" ||
740 Name
== "repne" || Name
== "repnz" ||
741 Name
== "rex64" || Name
== "data16";
744 // This does the actual operand parsing. Don't parse any more if we have a
745 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
746 // just want to parse the "lock" as the first instruction and the "incl" as
748 if (getLexer().isNot(AsmToken::EndOfStatement
) && !isPrefix
) {
750 // Parse '*' modifier.
751 if (getLexer().is(AsmToken::Star
)) {
752 SMLoc Loc
= Parser
.getTok().getLoc();
753 Operands
.push_back(X86Operand::CreateToken("*", Loc
));
754 Parser
.Lex(); // Eat the star.
757 // Read the first operand.
758 if (X86Operand
*Op
= ParseOperand())
759 Operands
.push_back(Op
);
761 Parser
.EatToEndOfStatement();
765 while (getLexer().is(AsmToken::Comma
)) {
766 Parser
.Lex(); // Eat the comma.
768 // Parse and remember the operand.
769 if (X86Operand
*Op
= ParseOperand())
770 Operands
.push_back(Op
);
772 Parser
.EatToEndOfStatement();
777 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
778 SMLoc Loc
= getLexer().getLoc();
779 Parser
.EatToEndOfStatement();
780 return Error(Loc
, "unexpected token in argument list");
784 if (getLexer().is(AsmToken::EndOfStatement
))
785 Parser
.Lex(); // Consume the EndOfStatement
786 else if (isPrefix
&& getLexer().is(AsmToken::Slash
))
787 Parser
.Lex(); // Consume the prefix separator Slash
789 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
790 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
791 // documented form in various unofficial manuals, so a lot of code uses it.
792 if ((Name
== "outb" || Name
== "outw" || Name
== "outl" || Name
== "out") &&
793 Operands
.size() == 3) {
794 X86Operand
&Op
= *(X86Operand
*)Operands
.back();
795 if (Op
.isMem() && Op
.Mem
.SegReg
== 0 &&
796 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
797 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
798 Op
.Mem
.BaseReg
== MatchRegisterName("dx") && Op
.Mem
.IndexReg
== 0) {
799 SMLoc Loc
= Op
.getEndLoc();
800 Operands
.back() = X86Operand::CreateReg(Op
.Mem
.BaseReg
, Loc
, Loc
);
804 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
805 if ((Name
== "inb" || Name
== "inw" || Name
== "inl" || Name
== "in") &&
806 Operands
.size() == 3) {
807 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
808 if (Op
.isMem() && Op
.Mem
.SegReg
== 0 &&
809 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
810 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
811 Op
.Mem
.BaseReg
== MatchRegisterName("dx") && Op
.Mem
.IndexReg
== 0) {
812 SMLoc Loc
= Op
.getEndLoc();
813 Operands
.begin()[1] = X86Operand::CreateReg(Op
.Mem
.BaseReg
, Loc
, Loc
);
817 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
818 if (Name
.startswith("ins") && Operands
.size() == 3 &&
819 (Name
== "insb" || Name
== "insw" || Name
== "insl")) {
820 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
821 X86Operand
&Op2
= *(X86Operand
*)Operands
.begin()[2];
822 if (Op
.isReg() && Op
.getReg() == X86::DX
&& isDstOp(Op2
)) {
830 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
831 if (Name
.startswith("outs") && Operands
.size() == 3 &&
832 (Name
== "outsb" || Name
== "outsw" || Name
== "outsl")) {
833 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
834 X86Operand
&Op2
= *(X86Operand
*)Operands
.begin()[2];
835 if (isSrcOp(Op
) && Op2
.isReg() && Op2
.getReg() == X86::DX
) {
843 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
844 if (Name
.startswith("movs") && Operands
.size() == 3 &&
845 (Name
== "movsb" || Name
== "movsw" || Name
== "movsl" ||
846 (Is64Bit
&& Name
== "movsq"))) {
847 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
848 X86Operand
&Op2
= *(X86Operand
*)Operands
.begin()[2];
849 if (isSrcOp(Op
) && isDstOp(Op2
)) {
856 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
857 if (Name
.startswith("lods") && Operands
.size() == 3 &&
858 (Name
== "lods" || Name
== "lodsb" || Name
== "lodsw" ||
859 Name
== "lodsl" || (Is64Bit
&& Name
== "lodsq"))) {
860 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
861 X86Operand
*Op2
= static_cast<X86Operand
*>(Operands
[2]);
862 if (isSrcOp(*Op1
) && Op2
->isReg()) {
864 unsigned reg
= Op2
->getReg();
865 bool isLods
= Name
== "lods";
866 if (reg
== X86::AL
&& (isLods
|| Name
== "lodsb"))
868 else if (reg
== X86::AX
&& (isLods
|| Name
== "lodsw"))
870 else if (reg
== X86::EAX
&& (isLods
|| Name
== "lodsl"))
872 else if (reg
== X86::RAX
&& (isLods
|| Name
== "lodsq"))
882 static_cast<X86Operand
*>(Operands
[0])->setTokenValue(ins
);
886 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
887 if (Name
.startswith("stos") && Operands
.size() == 3 &&
888 (Name
== "stos" || Name
== "stosb" || Name
== "stosw" ||
889 Name
== "stosl" || (Is64Bit
&& Name
== "stosq"))) {
890 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
891 X86Operand
*Op2
= static_cast<X86Operand
*>(Operands
[2]);
892 if (isDstOp(*Op2
) && Op1
->isReg()) {
894 unsigned reg
= Op1
->getReg();
895 bool isStos
= Name
== "stos";
896 if (reg
== X86::AL
&& (isStos
|| Name
== "stosb"))
898 else if (reg
== X86::AX
&& (isStos
|| Name
== "stosw"))
900 else if (reg
== X86::EAX
&& (isStos
|| Name
== "stosl"))
902 else if (reg
== X86::RAX
&& (isStos
|| Name
== "stosq"))
912 static_cast<X86Operand
*>(Operands
[0])->setTokenValue(ins
);
917 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
919 if ((Name
.startswith("shr") || Name
.startswith("sar") ||
920 Name
.startswith("shl") || Name
.startswith("sal") ||
921 Name
.startswith("rcl") || Name
.startswith("rcr") ||
922 Name
.startswith("rol") || Name
.startswith("ror")) &&
923 Operands
.size() == 3) {
924 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
925 if (Op1
->isImm() && isa
<MCConstantExpr
>(Op1
->getImm()) &&
926 cast
<MCConstantExpr
>(Op1
->getImm())->getValue() == 1) {
928 Operands
.erase(Operands
.begin() + 1);
932 // Transforms "int $3" into "int3" as a size optimization. We can't write an
933 // instalias with an immediate operand yet.
934 if (Name
== "int" && Operands
.size() == 2) {
935 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
936 if (Op1
->isImm() && isa
<MCConstantExpr
>(Op1
->getImm()) &&
937 cast
<MCConstantExpr
>(Op1
->getImm())->getValue() == 3) {
939 Operands
.erase(Operands
.begin() + 1);
940 static_cast<X86Operand
*>(Operands
[0])->setTokenValue("int3");
947 bool X86ATTAsmParser::
948 MatchAndEmitInstruction(SMLoc IDLoc
,
949 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
,
951 assert(!Operands
.empty() && "Unexpect empty operand list!");
952 X86Operand
*Op
= static_cast<X86Operand
*>(Operands
[0]);
953 assert(Op
->isToken() && "Leading operand should always be a mnemonic!");
955 // First, handle aliases that expand to multiple instructions.
956 // FIXME: This should be replaced with a real .td file alias mechanism.
957 // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
959 if (Op
->getToken() == "fstsw" || Op
->getToken() == "fstcw" ||
960 Op
->getToken() == "fstsww" || Op
->getToken() == "fstcww" ||
961 Op
->getToken() == "finit" || Op
->getToken() == "fsave" ||
962 Op
->getToken() == "fstenv" || Op
->getToken() == "fclex") {
964 Inst
.setOpcode(X86::WAIT
);
965 Out
.EmitInstruction(Inst
);
968 StringSwitch
<const char*>(Op
->getToken())
969 .Case("finit", "fninit")
970 .Case("fsave", "fnsave")
971 .Case("fstcw", "fnstcw")
972 .Case("fstcww", "fnstcw")
973 .Case("fstenv", "fnstenv")
974 .Case("fstsw", "fnstsw")
975 .Case("fstsww", "fnstsw")
976 .Case("fclex", "fnclex")
978 assert(Repl
&& "Unknown wait-prefixed instruction");
980 Operands
[0] = X86Operand::CreateToken(Repl
, IDLoc
);
983 bool WasOriginallyInvalidOperand
= false;
984 unsigned OrigErrorInfo
;
987 // First, try a direct match.
988 switch (MatchInstructionImpl(Operands
, Inst
, OrigErrorInfo
)) {
990 Out
.EmitInstruction(Inst
);
992 case Match_MissingFeature
:
993 Error(IDLoc
, "instruction requires a CPU feature not currently enabled");
995 case Match_ConversionFail
:
996 return Error(IDLoc
, "unable to convert operands to instruction");
997 case Match_InvalidOperand
:
998 WasOriginallyInvalidOperand
= true;
1000 case Match_MnemonicFail
:
1004 // FIXME: Ideally, we would only attempt suffix matches for things which are
1005 // valid prefixes, and we could just infer the right unambiguous
1006 // type. However, that requires substantially more matcher support than the
1009 // Change the operand to point to a temporary token.
1010 StringRef Base
= Op
->getToken();
1011 SmallString
<16> Tmp
;
1014 Op
->setTokenValue(Tmp
.str());
1016 // If this instruction starts with an 'f', then it is a floating point stack
1017 // instruction. These come in up to three forms for 32-bit, 64-bit, and
1018 // 80-bit floating point, which use the suffixes s,l,t respectively.
1020 // Otherwise, we assume that this may be an integer instruction, which comes
1021 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
1022 const char *Suffixes
= Base
[0] != 'f' ? "bwlq" : "slt\0";
1024 // Check for the various suffix matches.
1025 Tmp
[Base
.size()] = Suffixes
[0];
1026 unsigned ErrorInfoIgnore
;
1027 MatchResultTy Match1
, Match2
, Match3
, Match4
;
1029 Match1
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1030 Tmp
[Base
.size()] = Suffixes
[1];
1031 Match2
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1032 Tmp
[Base
.size()] = Suffixes
[2];
1033 Match3
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1034 Tmp
[Base
.size()] = Suffixes
[3];
1035 Match4
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1037 // Restore the old token.
1038 Op
->setTokenValue(Base
);
1040 // If exactly one matched, then we treat that as a successful match (and the
1041 // instruction will already have been filled in correctly, since the failing
1042 // matches won't have modified it).
1043 unsigned NumSuccessfulMatches
=
1044 (Match1
== Match_Success
) + (Match2
== Match_Success
) +
1045 (Match3
== Match_Success
) + (Match4
== Match_Success
);
1046 if (NumSuccessfulMatches
== 1) {
1047 Out
.EmitInstruction(Inst
);
1051 // Otherwise, the match failed, try to produce a decent error message.
1053 // If we had multiple suffix matches, then identify this as an ambiguous
1055 if (NumSuccessfulMatches
> 1) {
1057 unsigned NumMatches
= 0;
1058 if (Match1
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[0];
1059 if (Match2
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[1];
1060 if (Match3
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[2];
1061 if (Match4
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[3];
1063 SmallString
<126> Msg
;
1064 raw_svector_ostream
OS(Msg
);
1065 OS
<< "ambiguous instructions require an explicit suffix (could be ";
1066 for (unsigned i
= 0; i
!= NumMatches
; ++i
) {
1069 if (i
+ 1 == NumMatches
)
1071 OS
<< "'" << Base
<< MatchChars
[i
] << "'";
1074 Error(IDLoc
, OS
.str());
1078 // Okay, we know that none of the variants matched successfully.
1080 // If all of the instructions reported an invalid mnemonic, then the original
1081 // mnemonic was invalid.
1082 if ((Match1
== Match_MnemonicFail
) && (Match2
== Match_MnemonicFail
) &&
1083 (Match3
== Match_MnemonicFail
) && (Match4
== Match_MnemonicFail
)) {
1084 if (!WasOriginallyInvalidOperand
) {
1085 Error(IDLoc
, "invalid instruction mnemonic '" + Base
+ "'");
1089 // Recover location info for the operand if we know which was the problem.
1090 SMLoc ErrorLoc
= IDLoc
;
1091 if (OrigErrorInfo
!= ~0U) {
1092 if (OrigErrorInfo
>= Operands
.size())
1093 return Error(IDLoc
, "too few operands for instruction");
1095 ErrorLoc
= ((X86Operand
*)Operands
[OrigErrorInfo
])->getStartLoc();
1096 if (ErrorLoc
== SMLoc()) ErrorLoc
= IDLoc
;
1099 return Error(ErrorLoc
, "invalid operand for instruction");
1102 // If one instruction matched with a missing feature, report this as a
1104 if ((Match1
== Match_MissingFeature
) + (Match2
== Match_MissingFeature
) +
1105 (Match3
== Match_MissingFeature
) + (Match4
== Match_MissingFeature
) == 1){
1106 Error(IDLoc
, "instruction requires a CPU feature not currently enabled");
1110 // If one instruction matched with an invalid operand, report this as an
1112 if ((Match1
== Match_InvalidOperand
) + (Match2
== Match_InvalidOperand
) +
1113 (Match3
== Match_InvalidOperand
) + (Match4
== Match_InvalidOperand
) == 1){
1114 Error(IDLoc
, "invalid operand for instruction");
1118 // If all of these were an outright failure, report it in a useless way.
1119 // FIXME: We should give nicer diagnostics about the exact failure.
1120 Error(IDLoc
, "unknown use of instruction mnemonic without a size suffix");
1125 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID
) {
1126 StringRef IDVal
= DirectiveID
.getIdentifier();
1127 if (IDVal
== ".word")
1128 return ParseDirectiveWord(2, DirectiveID
.getLoc());
1132 /// ParseDirectiveWord
1133 /// ::= .word [ expression (, expression)* ]
1134 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size
, SMLoc L
) {
1135 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
1137 const MCExpr
*Value
;
1138 if (getParser().ParseExpression(Value
))
1141 getParser().getStreamer().EmitValue(Value
, Size
, 0 /*addrspace*/);
1143 if (getLexer().is(AsmToken::EndOfStatement
))
1146 // FIXME: Improve diagnostic.
1147 if (getLexer().isNot(AsmToken::Comma
))
1148 return Error(L
, "unexpected token in directive");
1160 extern "C" void LLVMInitializeX86AsmLexer();
1162 // Force static initialization.
1163 extern "C" void LLVMInitializeX86AsmParser() {
1164 RegisterAsmParser
<X86_32ATTAsmParser
> X(TheX86_32Target
);
1165 RegisterAsmParser
<X86_64ATTAsmParser
> Y(TheX86_64Target
);
1166 LLVMInitializeX86AsmLexer();
1169 #define GET_REGISTER_MATCHER
1170 #define GET_MATCHER_IMPLEMENTATION
1171 #include "X86GenAsmMatcher.inc"