1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Target/TargetAsmParser.h"
12 #include "X86Subtarget.h"
13 #include "llvm/Target/TargetRegistry.h"
14 #include "llvm/Target/TargetAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCParser/MCAsmLexer.h"
19 #include "llvm/MC/MCParser/MCAsmParser.h"
20 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include "llvm/Support/raw_ostream.h"
33 class X86ATTAsmParser
: public TargetAsmParser
{
41 MCAsmParser
&getParser() const { return Parser
; }
43 MCAsmLexer
&getLexer() const { return Parser
.getLexer(); }
45 bool Error(SMLoc L
, const Twine
&Msg
) { return Parser
.Error(L
, Msg
); }
47 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
);
49 X86Operand
*ParseOperand();
50 X86Operand
*ParseMemOperand(unsigned SegReg
, SMLoc StartLoc
);
52 bool ParseDirectiveWord(unsigned Size
, SMLoc L
);
54 bool MatchAndEmitInstruction(SMLoc IDLoc
,
55 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
,
58 /// @name Auto-generated Matcher Functions
61 #define GET_ASSEMBLER_HEADER
62 #include "X86GenAsmMatcher.inc"
67 X86ATTAsmParser(const Target
&T
, MCAsmParser
&parser
, TargetMachine
&TM
)
68 : TargetAsmParser(T
), Parser(parser
), TM(TM
) {
70 // Initialize the set of available features.
71 setAvailableFeatures(ComputeAvailableFeatures(
72 &TM
.getSubtarget
<X86Subtarget
>()));
75 virtual bool ParseInstruction(StringRef Name
, SMLoc NameLoc
,
76 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
);
78 virtual bool ParseDirective(AsmToken DirectiveID
);
81 class X86_32ATTAsmParser
: public X86ATTAsmParser
{
83 X86_32ATTAsmParser(const Target
&T
, MCAsmParser
&Parser
, TargetMachine
&TM
)
84 : X86ATTAsmParser(T
, Parser
, TM
) {
89 class X86_64ATTAsmParser
: public X86ATTAsmParser
{
91 X86_64ATTAsmParser(const Target
&T
, MCAsmParser
&Parser
, TargetMachine
&TM
)
92 : X86ATTAsmParser(T
, Parser
, TM
) {
97 } // end anonymous namespace
99 /// @name Auto-generated Match Functions
102 static unsigned MatchRegisterName(StringRef Name
);
108 /// X86Operand - Instances of this class represent a parsed X86 machine
110 struct X86Operand
: public MCParsedAsmOperand
{
118 SMLoc StartLoc
, EndLoc
;
143 X86Operand(KindTy K
, SMLoc Start
, SMLoc End
)
144 : Kind(K
), StartLoc(Start
), EndLoc(End
) {}
146 /// getStartLoc - Get the location of the first token of this operand.
147 SMLoc
getStartLoc() const { return StartLoc
; }
148 /// getEndLoc - Get the location of the last token of this operand.
149 SMLoc
getEndLoc() const { return EndLoc
; }
151 virtual void dump(raw_ostream
&OS
) const {}
153 StringRef
getToken() const {
154 assert(Kind
== Token
&& "Invalid access!");
155 return StringRef(Tok
.Data
, Tok
.Length
);
157 void setTokenValue(StringRef Value
) {
158 assert(Kind
== Token
&& "Invalid access!");
159 Tok
.Data
= Value
.data();
160 Tok
.Length
= Value
.size();
163 unsigned getReg() const {
164 assert(Kind
== Register
&& "Invalid access!");
168 const MCExpr
*getImm() const {
169 assert(Kind
== Immediate
&& "Invalid access!");
173 const MCExpr
*getMemDisp() const {
174 assert(Kind
== Memory
&& "Invalid access!");
177 unsigned getMemSegReg() const {
178 assert(Kind
== Memory
&& "Invalid access!");
181 unsigned getMemBaseReg() const {
182 assert(Kind
== Memory
&& "Invalid access!");
185 unsigned getMemIndexReg() const {
186 assert(Kind
== Memory
&& "Invalid access!");
189 unsigned getMemScale() const {
190 assert(Kind
== Memory
&& "Invalid access!");
194 bool isToken() const {return Kind
== Token
; }
196 bool isImm() const { return Kind
== Immediate
; }
198 bool isImmSExti16i8() const {
202 // If this isn't a constant expr, just assume it fits and let relaxation
204 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
208 // Otherwise, check the value is in a range that makes sense for this
210 uint64_t Value
= CE
->getValue();
211 return (( Value
<= 0x000000000000007FULL
)||
212 (0x000000000000FF80ULL
<= Value
&& Value
<= 0x000000000000FFFFULL
)||
213 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
215 bool isImmSExti32i8() const {
219 // If this isn't a constant expr, just assume it fits and let relaxation
221 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
225 // Otherwise, check the value is in a range that makes sense for this
227 uint64_t Value
= CE
->getValue();
228 return (( Value
<= 0x000000000000007FULL
)||
229 (0x00000000FFFFFF80ULL
<= Value
&& Value
<= 0x00000000FFFFFFFFULL
)||
230 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
232 bool isImmSExti64i8() const {
236 // If this isn't a constant expr, just assume it fits and let relaxation
238 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
242 // Otherwise, check the value is in a range that makes sense for this
244 uint64_t Value
= CE
->getValue();
245 return (( Value
<= 0x000000000000007FULL
)||
246 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
248 bool isImmSExti64i32() const {
252 // If this isn't a constant expr, just assume it fits and let relaxation
254 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
258 // Otherwise, check the value is in a range that makes sense for this
260 uint64_t Value
= CE
->getValue();
261 return (( Value
<= 0x000000007FFFFFFFULL
)||
262 (0xFFFFFFFF80000000ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
265 bool isMem() const { return Kind
== Memory
; }
267 bool isAbsMem() const {
268 return Kind
== Memory
&& !getMemSegReg() && !getMemBaseReg() &&
269 !getMemIndexReg() && getMemScale() == 1;
272 bool isReg() const { return Kind
== Register
; }
274 void addExpr(MCInst
&Inst
, const MCExpr
*Expr
) const {
275 // Add as immediates when possible.
276 if (const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(Expr
))
277 Inst
.addOperand(MCOperand::CreateImm(CE
->getValue()));
279 Inst
.addOperand(MCOperand::CreateExpr(Expr
));
282 void addRegOperands(MCInst
&Inst
, unsigned N
) const {
283 assert(N
== 1 && "Invalid number of operands!");
284 Inst
.addOperand(MCOperand::CreateReg(getReg()));
287 void addImmOperands(MCInst
&Inst
, unsigned N
) const {
288 assert(N
== 1 && "Invalid number of operands!");
289 addExpr(Inst
, getImm());
292 void addMemOperands(MCInst
&Inst
, unsigned N
) const {
293 assert((N
== 5) && "Invalid number of operands!");
294 Inst
.addOperand(MCOperand::CreateReg(getMemBaseReg()));
295 Inst
.addOperand(MCOperand::CreateImm(getMemScale()));
296 Inst
.addOperand(MCOperand::CreateReg(getMemIndexReg()));
297 addExpr(Inst
, getMemDisp());
298 Inst
.addOperand(MCOperand::CreateReg(getMemSegReg()));
301 void addAbsMemOperands(MCInst
&Inst
, unsigned N
) const {
302 assert((N
== 1) && "Invalid number of operands!");
303 Inst
.addOperand(MCOperand::CreateExpr(getMemDisp()));
306 static X86Operand
*CreateToken(StringRef Str
, SMLoc Loc
) {
307 X86Operand
*Res
= new X86Operand(Token
, Loc
, Loc
);
308 Res
->Tok
.Data
= Str
.data();
309 Res
->Tok
.Length
= Str
.size();
313 static X86Operand
*CreateReg(unsigned RegNo
, SMLoc StartLoc
, SMLoc EndLoc
) {
314 X86Operand
*Res
= new X86Operand(Register
, StartLoc
, EndLoc
);
315 Res
->Reg
.RegNo
= RegNo
;
319 static X86Operand
*CreateImm(const MCExpr
*Val
, SMLoc StartLoc
, SMLoc EndLoc
){
320 X86Operand
*Res
= new X86Operand(Immediate
, StartLoc
, EndLoc
);
325 /// Create an absolute memory operand.
326 static X86Operand
*CreateMem(const MCExpr
*Disp
, SMLoc StartLoc
,
328 X86Operand
*Res
= new X86Operand(Memory
, StartLoc
, EndLoc
);
330 Res
->Mem
.Disp
= Disp
;
331 Res
->Mem
.BaseReg
= 0;
332 Res
->Mem
.IndexReg
= 0;
337 /// Create a generalized memory operand.
338 static X86Operand
*CreateMem(unsigned SegReg
, const MCExpr
*Disp
,
339 unsigned BaseReg
, unsigned IndexReg
,
340 unsigned Scale
, SMLoc StartLoc
, SMLoc EndLoc
) {
341 // We should never just have a displacement, that should be parsed as an
342 // absolute memory operand.
343 assert((SegReg
|| BaseReg
|| IndexReg
) && "Invalid memory operand!");
345 // The scale should always be one of {1,2,4,8}.
346 assert(((Scale
== 1 || Scale
== 2 || Scale
== 4 || Scale
== 8)) &&
348 X86Operand
*Res
= new X86Operand(Memory
, StartLoc
, EndLoc
);
349 Res
->Mem
.SegReg
= SegReg
;
350 Res
->Mem
.Disp
= Disp
;
351 Res
->Mem
.BaseReg
= BaseReg
;
352 Res
->Mem
.IndexReg
= IndexReg
;
353 Res
->Mem
.Scale
= Scale
;
358 } // end anonymous namespace.
361 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo
,
362 SMLoc
&StartLoc
, SMLoc
&EndLoc
) {
364 const AsmToken
&TokPercent
= Parser
.getTok();
365 assert(TokPercent
.is(AsmToken::Percent
) && "Invalid token kind!");
366 StartLoc
= TokPercent
.getLoc();
367 Parser
.Lex(); // Eat percent token.
369 const AsmToken
&Tok
= Parser
.getTok();
370 if (Tok
.isNot(AsmToken::Identifier
))
371 return Error(Tok
.getLoc(), "invalid register name");
373 // FIXME: Validate register for the current architecture; we have to do
374 // validation later, so maybe there is no need for this here.
375 RegNo
= MatchRegisterName(Tok
.getString());
377 // If the match failed, try the register name as lowercase.
379 RegNo
= MatchRegisterName(LowercaseString(Tok
.getString()));
381 // FIXME: This should be done using Requires<In32BitMode> and
382 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
383 // can be also checked.
384 if (RegNo
== X86::RIZ
&& !Is64Bit
)
385 return Error(Tok
.getLoc(), "riz register in 64-bit mode only");
387 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
388 if (RegNo
== 0 && (Tok
.getString() == "st" || Tok
.getString() == "ST")) {
390 EndLoc
= Tok
.getLoc();
391 Parser
.Lex(); // Eat 'st'
393 // Check to see if we have '(4)' after %st.
394 if (getLexer().isNot(AsmToken::LParen
))
399 const AsmToken
&IntTok
= Parser
.getTok();
400 if (IntTok
.isNot(AsmToken::Integer
))
401 return Error(IntTok
.getLoc(), "expected stack index");
402 switch (IntTok
.getIntVal()) {
403 case 0: RegNo
= X86::ST0
; break;
404 case 1: RegNo
= X86::ST1
; break;
405 case 2: RegNo
= X86::ST2
; break;
406 case 3: RegNo
= X86::ST3
; break;
407 case 4: RegNo
= X86::ST4
; break;
408 case 5: RegNo
= X86::ST5
; break;
409 case 6: RegNo
= X86::ST6
; break;
410 case 7: RegNo
= X86::ST7
; break;
411 default: return Error(IntTok
.getLoc(), "invalid stack index");
414 if (getParser().Lex().isNot(AsmToken::RParen
))
415 return Error(Parser
.getTok().getLoc(), "expected ')'");
417 EndLoc
= Tok
.getLoc();
418 Parser
.Lex(); // Eat ')'
422 // If this is "db[0-7]", match it as an alias
424 if (RegNo
== 0 && Tok
.getString().size() == 3 &&
425 Tok
.getString().startswith("db")) {
426 switch (Tok
.getString()[2]) {
427 case '0': RegNo
= X86::DR0
; break;
428 case '1': RegNo
= X86::DR1
; break;
429 case '2': RegNo
= X86::DR2
; break;
430 case '3': RegNo
= X86::DR3
; break;
431 case '4': RegNo
= X86::DR4
; break;
432 case '5': RegNo
= X86::DR5
; break;
433 case '6': RegNo
= X86::DR6
; break;
434 case '7': RegNo
= X86::DR7
; break;
438 EndLoc
= Tok
.getLoc();
439 Parser
.Lex(); // Eat it.
445 return Error(Tok
.getLoc(), "invalid register name");
447 EndLoc
= Tok
.getLoc();
448 Parser
.Lex(); // Eat identifier token.
452 X86Operand
*X86ATTAsmParser::ParseOperand() {
453 switch (getLexer().getKind()) {
455 // Parse a memory operand with no segment register.
456 return ParseMemOperand(0, Parser
.getTok().getLoc());
457 case AsmToken::Percent
: {
458 // Read the register.
461 if (ParseRegister(RegNo
, Start
, End
)) return 0;
462 if (RegNo
== X86::EIZ
|| RegNo
== X86::RIZ
) {
463 Error(Start
, "eiz and riz can only be used as index registers");
467 // If this is a segment register followed by a ':', then this is the start
468 // of a memory reference, otherwise this is a normal register reference.
469 if (getLexer().isNot(AsmToken::Colon
))
470 return X86Operand::CreateReg(RegNo
, Start
, End
);
473 getParser().Lex(); // Eat the colon.
474 return ParseMemOperand(RegNo
, Start
);
476 case AsmToken::Dollar
: {
478 SMLoc Start
= Parser
.getTok().getLoc(), End
;
481 if (getParser().ParseExpression(Val
, End
))
483 return X86Operand::CreateImm(Val
, Start
, End
);
488 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
489 /// has already been parsed if present.
490 X86Operand
*X86ATTAsmParser::ParseMemOperand(unsigned SegReg
, SMLoc MemStart
) {
492 // We have to disambiguate a parenthesized expression "(4+5)" from the start
493 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
494 // only way to do this without lookahead is to eat the '(' and see what is
496 const MCExpr
*Disp
= MCConstantExpr::Create(0, getParser().getContext());
497 if (getLexer().isNot(AsmToken::LParen
)) {
499 if (getParser().ParseExpression(Disp
, ExprEnd
)) return 0;
501 // After parsing the base expression we could either have a parenthesized
502 // memory address or not. If not, return now. If so, eat the (.
503 if (getLexer().isNot(AsmToken::LParen
)) {
504 // Unless we have a segment register, treat this as an immediate.
506 return X86Operand::CreateMem(Disp
, MemStart
, ExprEnd
);
507 return X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1, MemStart
, ExprEnd
);
513 // Okay, we have a '('. We don't know if this is an expression or not, but
514 // so we have to eat the ( to see beyond it.
515 SMLoc LParenLoc
= Parser
.getTok().getLoc();
516 Parser
.Lex(); // Eat the '('.
518 if (getLexer().is(AsmToken::Percent
) || getLexer().is(AsmToken::Comma
)) {
519 // Nothing to do here, fall into the code below with the '(' part of the
520 // memory operand consumed.
524 // It must be an parenthesized expression, parse it now.
525 if (getParser().ParseParenExpression(Disp
, ExprEnd
))
528 // After parsing the base expression we could either have a parenthesized
529 // memory address or not. If not, return now. If so, eat the (.
530 if (getLexer().isNot(AsmToken::LParen
)) {
531 // Unless we have a segment register, treat this as an immediate.
533 return X86Operand::CreateMem(Disp
, LParenLoc
, ExprEnd
);
534 return X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1, MemStart
, ExprEnd
);
542 // If we reached here, then we just ate the ( of the memory operand. Process
543 // the rest of the memory operand.
544 unsigned BaseReg
= 0, IndexReg
= 0, Scale
= 1;
546 if (getLexer().is(AsmToken::Percent
)) {
548 if (ParseRegister(BaseReg
, L
, L
)) return 0;
549 if (BaseReg
== X86::EIZ
|| BaseReg
== X86::RIZ
) {
550 Error(L
, "eiz and riz can only be used as index registers");
555 if (getLexer().is(AsmToken::Comma
)) {
556 Parser
.Lex(); // Eat the comma.
558 // Following the comma we should have either an index register, or a scale
559 // value. We don't support the later form, but we want to parse it
562 // Not that even though it would be completely consistent to support syntax
563 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
564 if (getLexer().is(AsmToken::Percent
)) {
566 if (ParseRegister(IndexReg
, L
, L
)) return 0;
568 if (getLexer().isNot(AsmToken::RParen
)) {
569 // Parse the scale amount:
570 // ::= ',' [scale-expression]
571 if (getLexer().isNot(AsmToken::Comma
)) {
572 Error(Parser
.getTok().getLoc(),
573 "expected comma in scale expression");
576 Parser
.Lex(); // Eat the comma.
578 if (getLexer().isNot(AsmToken::RParen
)) {
579 SMLoc Loc
= Parser
.getTok().getLoc();
582 if (getParser().ParseAbsoluteExpression(ScaleVal
))
585 // Validate the scale amount.
586 if (ScaleVal
!= 1 && ScaleVal
!= 2 && ScaleVal
!= 4 && ScaleVal
!= 8){
587 Error(Loc
, "scale factor in address must be 1, 2, 4 or 8");
590 Scale
= (unsigned)ScaleVal
;
593 } else if (getLexer().isNot(AsmToken::RParen
)) {
594 // A scale amount without an index is ignored.
596 SMLoc Loc
= Parser
.getTok().getLoc();
599 if (getParser().ParseAbsoluteExpression(Value
))
603 Warning(Loc
, "scale factor without index register is ignored");
608 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
609 if (getLexer().isNot(AsmToken::RParen
)) {
610 Error(Parser
.getTok().getLoc(), "unexpected token in memory operand");
613 SMLoc MemEnd
= Parser
.getTok().getLoc();
614 Parser
.Lex(); // Eat the ')'.
616 return X86Operand::CreateMem(SegReg
, Disp
, BaseReg
, IndexReg
, Scale
,
620 bool X86ATTAsmParser::
621 ParseInstruction(StringRef Name
, SMLoc NameLoc
,
622 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
) {
623 StringRef PatchedName
= Name
;
625 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
626 const MCExpr
*ExtraImmOp
= 0;
627 if ((PatchedName
.startswith("cmp") || PatchedName
.startswith("vcmp")) &&
628 (PatchedName
.endswith("ss") || PatchedName
.endswith("sd") ||
629 PatchedName
.endswith("ps") || PatchedName
.endswith("pd"))) {
630 bool IsVCMP
= PatchedName
.startswith("vcmp");
631 unsigned SSECCIdx
= IsVCMP
? 4 : 3;
632 unsigned SSEComparisonCode
= StringSwitch
<unsigned>(
633 PatchedName
.slice(SSECCIdx
, PatchedName
.size() - 2))
646 .Case("neq_oq", 0x0C)
653 .Case("unord_s", 0x13)
654 .Case("neq_us", 0x14)
655 .Case("nlt_uq", 0x15)
656 .Case("nle_uq", 0x16)
659 .Case("nge_uq", 0x19)
660 .Case("ngt_uq", 0x1A)
661 .Case("false_os", 0x1B)
662 .Case("neq_os", 0x1C)
665 .Case("true_us", 0x1F)
667 if (SSEComparisonCode
!= ~0U) {
668 ExtraImmOp
= MCConstantExpr::Create(SSEComparisonCode
,
669 getParser().getContext());
670 if (PatchedName
.endswith("ss")) {
671 PatchedName
= IsVCMP
? "vcmpss" : "cmpss";
672 } else if (PatchedName
.endswith("sd")) {
673 PatchedName
= IsVCMP
? "vcmpsd" : "cmpsd";
674 } else if (PatchedName
.endswith("ps")) {
675 PatchedName
= IsVCMP
? "vcmpps" : "cmpps";
677 assert(PatchedName
.endswith("pd") && "Unexpected mnemonic!");
678 PatchedName
= IsVCMP
? "vcmppd" : "cmppd";
683 // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
684 if (PatchedName
.startswith("vpclmul")) {
685 unsigned CLMULQuadWordSelect
= StringSwitch
<unsigned>(
686 PatchedName
.slice(7, PatchedName
.size() - 2))
687 .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
688 .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
689 .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
690 .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
692 if (CLMULQuadWordSelect
!= ~0U) {
693 ExtraImmOp
= MCConstantExpr::Create(CLMULQuadWordSelect
,
694 getParser().getContext());
695 assert(PatchedName
.endswith("dq") && "Unexpected mnemonic!");
696 PatchedName
= "vpclmulqdq";
700 Operands
.push_back(X86Operand::CreateToken(PatchedName
, NameLoc
));
703 Operands
.push_back(X86Operand::CreateImm(ExtraImmOp
, NameLoc
, NameLoc
));
706 // Determine whether this is an instruction prefix.
708 Name
== "lock" || Name
== "rep" ||
709 Name
== "repe" || Name
== "repz" ||
710 Name
== "repne" || Name
== "repnz";
713 // This does the actual operand parsing. Don't parse any more if we have a
714 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
715 // just want to parse the "lock" as the first instruction and the "incl" as
717 if (getLexer().isNot(AsmToken::EndOfStatement
) && !isPrefix
) {
719 // Parse '*' modifier.
720 if (getLexer().is(AsmToken::Star
)) {
721 SMLoc Loc
= Parser
.getTok().getLoc();
722 Operands
.push_back(X86Operand::CreateToken("*", Loc
));
723 Parser
.Lex(); // Eat the star.
726 // Read the first operand.
727 if (X86Operand
*Op
= ParseOperand())
728 Operands
.push_back(Op
);
730 Parser
.EatToEndOfStatement();
734 while (getLexer().is(AsmToken::Comma
)) {
735 Parser
.Lex(); // Eat the comma.
737 // Parse and remember the operand.
738 if (X86Operand
*Op
= ParseOperand())
739 Operands
.push_back(Op
);
741 Parser
.EatToEndOfStatement();
746 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
747 Parser
.EatToEndOfStatement();
748 return TokError("unexpected token in argument list");
752 if (getLexer().is(AsmToken::EndOfStatement
))
753 Parser
.Lex(); // Consume the EndOfStatement
755 // Hack to allow 'movq <largeimm>, <reg>' as an alias for movabsq.
756 if ((Name
== "movq" || Name
== "mov") && Operands
.size() == 3 &&
757 static_cast<X86Operand
*>(Operands
[2])->isReg() &&
758 static_cast<X86Operand
*>(Operands
[1])->isImm() &&
759 !static_cast<X86Operand
*>(Operands
[1])->isImmSExti64i32()) {
761 Operands
[0] = X86Operand::CreateToken("movabsq", NameLoc
);
764 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
766 if ((Name
.startswith("shr") || Name
.startswith("sar") ||
767 Name
.startswith("shl") || Name
.startswith("sal")) &&
768 Operands
.size() == 3) {
769 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
770 if (Op1
->isImm() && isa
<MCConstantExpr
>(Op1
->getImm()) &&
771 cast
<MCConstantExpr
>(Op1
->getImm())->getValue() == 1) {
773 Operands
.erase(Operands
.begin() + 1);
777 // FIXME: Hack to handle recognize "rc[lr] <op>" -> "rcl $1, <op>".
778 if ((Name
.startswith("rcl") || Name
.startswith("rcr")) &&
779 Operands
.size() == 2) {
780 const MCExpr
*One
= MCConstantExpr::Create(1, getParser().getContext());
781 Operands
.push_back(X86Operand::CreateImm(One
, NameLoc
, NameLoc
));
782 std::swap(Operands
[1], Operands
[2]);
785 // FIXME: Hack to handle recognize "sh[lr]d op,op" -> "shld $1, op,op".
786 if ((Name
.startswith("shld") || Name
.startswith("shrd")) &&
787 Operands
.size() == 3) {
788 const MCExpr
*One
= MCConstantExpr::Create(1, getParser().getContext());
789 Operands
.insert(Operands
.begin()+1,
790 X86Operand::CreateImm(One
, NameLoc
, NameLoc
));
794 // FIXME: Hack to handle recognize "in[bwl] <op>". Canonicalize it to
796 if ((Name
== "inb" || Name
== "inw" || Name
== "inl") &&
797 Operands
.size() == 2) {
800 Reg
= MatchRegisterName("al");
801 else if (Name
[2] == 'w')
802 Reg
= MatchRegisterName("ax");
804 Reg
= MatchRegisterName("eax");
805 SMLoc Loc
= Operands
.back()->getEndLoc();
806 Operands
.push_back(X86Operand::CreateReg(Reg
, Loc
, Loc
));
809 // FIXME: Hack to handle recognize "out[bwl] <op>". Canonicalize it to
811 if ((Name
== "outb" || Name
== "outw" || Name
== "outl") &&
812 Operands
.size() == 2) {
815 Reg
= MatchRegisterName("al");
816 else if (Name
[3] == 'w')
817 Reg
= MatchRegisterName("ax");
819 Reg
= MatchRegisterName("eax");
820 SMLoc Loc
= Operands
.back()->getEndLoc();
821 Operands
.push_back(X86Operand::CreateReg(Reg
, Loc
, Loc
));
822 std::swap(Operands
[1], Operands
[2]);
825 // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
826 if ((Name
== "outb" || Name
== "outw" || Name
== "outl" || Name
== "out") &&
827 Operands
.size() == 3) {
828 X86Operand
&Op
= *(X86Operand
*)Operands
.back();
829 if (Op
.isMem() && Op
.Mem
.SegReg
== 0 &&
830 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
831 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
832 Op
.Mem
.BaseReg
== MatchRegisterName("dx") && Op
.Mem
.IndexReg
== 0) {
833 SMLoc Loc
= Op
.getEndLoc();
834 Operands
.back() = X86Operand::CreateReg(Op
.Mem
.BaseReg
, Loc
, Loc
);
839 // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
840 // "f{mul*,add*,sub*,div*} $op"
841 if ((Name
.startswith("fmul") || Name
.startswith("fadd") ||
842 Name
.startswith("fsub") || Name
.startswith("fdiv")) &&
843 Operands
.size() == 3 &&
844 static_cast<X86Operand
*>(Operands
[2])->isReg() &&
845 static_cast<X86Operand
*>(Operands
[2])->getReg() == X86::ST0
) {
847 Operands
.erase(Operands
.begin() + 2);
850 // FIXME: Hack to handle "f{mulp,addp} st(0), $op" the same as
851 // "f{mulp,addp} $op", since they commute. We also allow fdivrp/fsubrp even
852 // though they don't commute, solely because gas does support this.
853 if ((Name
=="fmulp" || Name
=="faddp" || Name
=="fsubrp" || Name
=="fdivrp") &&
854 Operands
.size() == 3 &&
855 static_cast<X86Operand
*>(Operands
[1])->isReg() &&
856 static_cast<X86Operand
*>(Operands
[1])->getReg() == X86::ST0
) {
858 Operands
.erase(Operands
.begin() + 1);
861 // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
863 if (Name
.startswith("imul") && Operands
.size() == 3 &&
864 static_cast<X86Operand
*>(Operands
[1])->isImm() &&
865 static_cast<X86Operand
*>(Operands
.back())->isReg()) {
866 X86Operand
*Op
= static_cast<X86Operand
*>(Operands
.back());
867 Operands
.push_back(X86Operand::CreateReg(Op
->getReg(), Op
->getStartLoc(),
871 // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
872 // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
873 // errors, since its encoding is the most compact.
874 if (Name
== "sldt" && Operands
.size() == 2 &&
875 static_cast<X86Operand
*>(Operands
[1])->isMem()) {
877 Operands
[0] = X86Operand::CreateToken("sldtw", NameLoc
);
880 // The assembler accepts "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as
881 // synonyms. Our tables only have the "<reg>, <mem>" form, so if we see the
882 // other operand order, swap them.
883 if (Name
== "xchgb" || Name
== "xchgw" || Name
== "xchgl" || Name
== "xchgq"||
885 if (Operands
.size() == 3 &&
886 static_cast<X86Operand
*>(Operands
[1])->isMem() &&
887 static_cast<X86Operand
*>(Operands
[2])->isReg()) {
888 std::swap(Operands
[1], Operands
[2]);
891 // The assembler accepts "testX <reg>, <mem>" and "testX <mem>, <reg>" as
892 // synonyms. Our tables only have the "<mem>, <reg>" form, so if we see the
893 // other operand order, swap them.
894 if (Name
== "testb" || Name
== "testw" || Name
== "testl" || Name
== "testq"||
896 if (Operands
.size() == 3 &&
897 static_cast<X86Operand
*>(Operands
[1])->isReg() &&
898 static_cast<X86Operand
*>(Operands
[2])->isMem()) {
899 std::swap(Operands
[1], Operands
[2]);
902 // The assembler accepts these instructions with no operand as a synonym for
903 // an instruction acting on st(1). e.g. "fxch" -> "fxch %st(1)".
904 if ((Name
== "fxch" || Name
== "fucom" || Name
== "fucomp" ||
905 Name
== "faddp" || Name
== "fsubp" || Name
== "fsubrp" ||
906 Name
== "fmulp" || Name
== "fdivp" || Name
== "fdivrp") &&
907 Operands
.size() == 1) {
908 Operands
.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
912 // The assembler accepts these instructions with two few operands as a synonym
913 // for taking %st(1),%st(0) or X, %st(0).
914 if ((Name
== "fcomi" || Name
== "fucomi" || Name
== "fucompi" ||
915 Name
== "fcompi" ) &&
916 Operands
.size() < 3) {
917 if (Operands
.size() == 1)
918 Operands
.push_back(X86Operand::CreateReg(MatchRegisterName("st(1)"),
920 Operands
.push_back(X86Operand::CreateReg(MatchRegisterName("st(0)"),
924 // The assembler accepts various amounts of brokenness for fnstsw.
925 if (Name
== "fnstsw" || Name
== "fnstsww") {
926 if (Operands
.size() == 2 &&
927 static_cast<X86Operand
*>(Operands
[1])->isReg()) {
928 // "fnstsw al" and "fnstsw eax" -> "fnstw"
929 unsigned Reg
= static_cast<X86Operand
*>(Operands
[1])->Reg
.RegNo
;
930 if (Reg
== MatchRegisterName("eax") ||
931 Reg
== MatchRegisterName("al")) {
937 // "fnstw" -> "fnstw %ax"
938 if (Operands
.size() == 1)
939 Operands
.push_back(X86Operand::CreateReg(MatchRegisterName("ax"),
943 // jmp $42,$5 -> ljmp, similarly for call.
944 if ((Name
.startswith("call") || Name
.startswith("jmp")) &&
945 Operands
.size() == 3 &&
946 static_cast<X86Operand
*>(Operands
[1])->isImm() &&
947 static_cast<X86Operand
*>(Operands
[2])->isImm()) {
948 const char *NewOpName
= StringSwitch
<const char *>(Name
)
950 .Case("jmpw", "ljmpw")
951 .Case("jmpl", "ljmpl")
952 .Case("jmpq", "ljmpq")
953 .Case("call", "lcall")
954 .Case("callw", "lcallw")
955 .Case("calll", "lcalll")
956 .Case("callq", "lcallq")
960 Operands
[0] = X86Operand::CreateToken(NewOpName
, NameLoc
);
965 // lcall and ljmp -> lcalll and ljmpl
966 if ((Name
== "lcall" || Name
== "ljmp") && Operands
.size() == 3) {
968 Operands
[0] = X86Operand::CreateToken(Name
== "lcall" ? "lcalll" : "ljmpl",
972 // movsd -> movsl (when no operands are specified).
973 if (Name
== "movsd" && Operands
.size() == 1) {
975 Operands
[0] = X86Operand::CreateToken("movsl", NameLoc
);
978 // fstp <mem> -> fstps <mem>. Without this, we'll default to fstpl due to
980 if (Name
== "fstp" && Operands
.size() == 2 &&
981 static_cast<X86Operand
*>(Operands
[1])->isMem()) {
983 Operands
[0] = X86Operand::CreateToken("fstps", NameLoc
);
987 // "clr <reg>" -> "xor <reg>, <reg>".
988 if ((Name
== "clrb" || Name
== "clrw" || Name
== "clrl" || Name
== "clrq" ||
989 Name
== "clr") && Operands
.size() == 2 &&
990 static_cast<X86Operand
*>(Operands
[1])->isReg()) {
991 unsigned RegNo
= static_cast<X86Operand
*>(Operands
[1])->getReg();
992 Operands
.push_back(X86Operand::CreateReg(RegNo
, NameLoc
, NameLoc
));
994 Operands
[0] = X86Operand::CreateToken("xor", NameLoc
);
997 // FIXME: Hack to handle recognize "aa[dm]" -> "aa[dm] $0xA".
998 if ((Name
.startswith("aad") || Name
.startswith("aam")) &&
999 Operands
.size() == 1) {
1000 const MCExpr
*A
= MCConstantExpr::Create(0xA, getParser().getContext());
1001 Operands
.push_back(X86Operand::CreateImm(A
, NameLoc
, NameLoc
));
1007 bool X86ATTAsmParser::
1008 MatchAndEmitInstruction(SMLoc IDLoc
,
1009 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
,
1011 assert(!Operands
.empty() && "Unexpect empty operand list!");
1012 X86Operand
*Op
= static_cast<X86Operand
*>(Operands
[0]);
1013 assert(Op
->isToken() && "Leading operand should always be a mnemonic!");
1015 // First, handle aliases that expand to multiple instructions.
1016 // FIXME: This should be replaced with a real .td file alias mechanism.
1017 if (Op
->getToken() == "fstsw" || Op
->getToken() == "fstcw" ||
1018 Op
->getToken() == "fstsww" || Op
->getToken() == "fstcww" ||
1019 Op
->getToken() == "finit" || Op
->getToken() == "fsave" ||
1020 Op
->getToken() == "fstenv" || Op
->getToken() == "fclex") {
1022 Inst
.setOpcode(X86::WAIT
);
1023 Out
.EmitInstruction(Inst
);
1026 StringSwitch
<const char*>(Op
->getToken())
1027 .Case("finit", "fninit")
1028 .Case("fsave", "fnsave")
1029 .Case("fstcw", "fnstcw")
1030 .Case("fstcww", "fnstcw")
1031 .Case("fstenv", "fnstenv")
1032 .Case("fstsw", "fnstsw")
1033 .Case("fstsww", "fnstsw")
1034 .Case("fclex", "fnclex")
1036 assert(Repl
&& "Unknown wait-prefixed instruction");
1038 Operands
[0] = X86Operand::CreateToken(Repl
, IDLoc
);
1041 bool WasOriginallyInvalidOperand
= false;
1042 unsigned OrigErrorInfo
;
1045 // First, try a direct match.
1046 switch (MatchInstructionImpl(Operands
, Inst
, OrigErrorInfo
)) {
1048 Out
.EmitInstruction(Inst
);
1050 case Match_MissingFeature
:
1051 Error(IDLoc
, "instruction requires a CPU feature not currently enabled");
1053 case Match_InvalidOperand
:
1054 WasOriginallyInvalidOperand
= true;
1056 case Match_MnemonicFail
:
1060 // FIXME: Ideally, we would only attempt suffix matches for things which are
1061 // valid prefixes, and we could just infer the right unambiguous
1062 // type. However, that requires substantially more matcher support than the
1065 // Change the operand to point to a temporary token.
1066 StringRef Base
= Op
->getToken();
1067 SmallString
<16> Tmp
;
1070 Op
->setTokenValue(Tmp
.str());
1072 // Check for the various suffix matches.
1073 Tmp
[Base
.size()] = 'b';
1074 unsigned BErrorInfo
, WErrorInfo
, LErrorInfo
, QErrorInfo
;
1075 MatchResultTy MatchB
= MatchInstructionImpl(Operands
, Inst
, BErrorInfo
);
1076 Tmp
[Base
.size()] = 'w';
1077 MatchResultTy MatchW
= MatchInstructionImpl(Operands
, Inst
, WErrorInfo
);
1078 Tmp
[Base
.size()] = 'l';
1079 MatchResultTy MatchL
= MatchInstructionImpl(Operands
, Inst
, LErrorInfo
);
1080 Tmp
[Base
.size()] = 'q';
1081 MatchResultTy MatchQ
= MatchInstructionImpl(Operands
, Inst
, QErrorInfo
);
1083 // Restore the old token.
1084 Op
->setTokenValue(Base
);
1086 // If exactly one matched, then we treat that as a successful match (and the
1087 // instruction will already have been filled in correctly, since the failing
1088 // matches won't have modified it).
1089 unsigned NumSuccessfulMatches
=
1090 (MatchB
== Match_Success
) + (MatchW
== Match_Success
) +
1091 (MatchL
== Match_Success
) + (MatchQ
== Match_Success
);
1092 if (NumSuccessfulMatches
== 1) {
1093 Out
.EmitInstruction(Inst
);
1097 // Otherwise, the match failed, try to produce a decent error message.
1099 // If we had multiple suffix matches, then identify this as an ambiguous
1101 if (NumSuccessfulMatches
> 1) {
1103 unsigned NumMatches
= 0;
1104 if (MatchB
== Match_Success
)
1105 MatchChars
[NumMatches
++] = 'b';
1106 if (MatchW
== Match_Success
)
1107 MatchChars
[NumMatches
++] = 'w';
1108 if (MatchL
== Match_Success
)
1109 MatchChars
[NumMatches
++] = 'l';
1110 if (MatchQ
== Match_Success
)
1111 MatchChars
[NumMatches
++] = 'q';
1113 SmallString
<126> Msg
;
1114 raw_svector_ostream
OS(Msg
);
1115 OS
<< "ambiguous instructions require an explicit suffix (could be ";
1116 for (unsigned i
= 0; i
!= NumMatches
; ++i
) {
1119 if (i
+ 1 == NumMatches
)
1121 OS
<< "'" << Base
<< MatchChars
[i
] << "'";
1124 Error(IDLoc
, OS
.str());
1128 // Okay, we know that none of the variants matched successfully.
1130 // If all of the instructions reported an invalid mnemonic, then the original
1131 // mnemonic was invalid.
1132 if ((MatchB
== Match_MnemonicFail
) && (MatchW
== Match_MnemonicFail
) &&
1133 (MatchL
== Match_MnemonicFail
) && (MatchQ
== Match_MnemonicFail
)) {
1134 if (!WasOriginallyInvalidOperand
) {
1135 Error(IDLoc
, "invalid instruction mnemonic '" + Base
+ "'");
1139 // Recover location info for the operand if we know which was the problem.
1140 SMLoc ErrorLoc
= IDLoc
;
1141 if (OrigErrorInfo
!= ~0U) {
1142 if (OrigErrorInfo
>= Operands
.size())
1143 return Error(IDLoc
, "too few operands for instruction");
1145 ErrorLoc
= ((X86Operand
*)Operands
[OrigErrorInfo
])->getStartLoc();
1146 if (ErrorLoc
== SMLoc()) ErrorLoc
= IDLoc
;
1149 return Error(ErrorLoc
, "invalid operand for instruction");
1152 // If one instruction matched with a missing feature, report this as a
1154 if ((MatchB
== Match_MissingFeature
) + (MatchW
== Match_MissingFeature
) +
1155 (MatchL
== Match_MissingFeature
) + (MatchQ
== Match_MissingFeature
) == 1){
1156 Error(IDLoc
, "instruction requires a CPU feature not currently enabled");
1160 // If one instruction matched with an invalid operand, report this as an
1162 if ((MatchB
== Match_InvalidOperand
) + (MatchW
== Match_InvalidOperand
) +
1163 (MatchL
== Match_InvalidOperand
) + (MatchQ
== Match_InvalidOperand
) == 1){
1164 Error(IDLoc
, "invalid operand for instruction");
1168 // If all of these were an outright failure, report it in a useless way.
1169 // FIXME: We should give nicer diagnostics about the exact failure.
1170 Error(IDLoc
, "unknown use of instruction mnemonic without a size suffix");
1175 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID
) {
1176 StringRef IDVal
= DirectiveID
.getIdentifier();
1177 if (IDVal
== ".word")
1178 return ParseDirectiveWord(2, DirectiveID
.getLoc());
1182 /// ParseDirectiveWord
1183 /// ::= .word [ expression (, expression)* ]
1184 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size
, SMLoc L
) {
1185 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
1187 const MCExpr
*Value
;
1188 if (getParser().ParseExpression(Value
))
1191 getParser().getStreamer().EmitValue(Value
, Size
, 0 /*addrspace*/);
1193 if (getLexer().is(AsmToken::EndOfStatement
))
1196 // FIXME: Improve diagnostic.
1197 if (getLexer().isNot(AsmToken::Comma
))
1198 return Error(L
, "unexpected token in directive");
1210 extern "C" void LLVMInitializeX86AsmLexer();
1212 // Force static initialization.
1213 extern "C" void LLVMInitializeX86AsmParser() {
1214 RegisterAsmParser
<X86_32ATTAsmParser
> X(TheX86_32Target
);
1215 RegisterAsmParser
<X86_64ATTAsmParser
> Y(TheX86_64Target
);
1216 LLVMInitializeX86AsmLexer();
1219 #define GET_REGISTER_MATCHER
1220 #define GET_MATCHER_IMPLEMENTATION
1221 #include "X86GenAsmMatcher.inc"