1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Target/TargetAsmParser.h"
12 #include "X86Subtarget.h"
13 #include "llvm/Target/TargetRegistry.h"
14 #include "llvm/Target/TargetAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCSubtargetInfo.h"
19 #include "llvm/MC/MCParser/MCAsmLexer.h"
20 #include "llvm/MC/MCParser/MCAsmParser.h"
21 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
22 #include "llvm/ADT/OwningPtr.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
31 #define GET_SUBTARGETINFO_ENUM
32 #include "X86GenSubtargetInfo.inc"
39 class X86ATTAsmParser
: public TargetAsmParser
{
44 MCAsmParser
&getParser() const { return Parser
; }
46 MCAsmLexer
&getLexer() const { return Parser
.getLexer(); }
48 bool Error(SMLoc L
, const Twine
&Msg
) { return Parser
.Error(L
, Msg
); }
50 X86Operand
*ParseOperand();
51 X86Operand
*ParseMemOperand(unsigned SegReg
, SMLoc StartLoc
);
53 bool ParseDirectiveWord(unsigned Size
, SMLoc L
);
55 bool MatchAndEmitInstruction(SMLoc IDLoc
,
56 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
,
59 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
60 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
61 bool isSrcOp(X86Operand
&Op
);
63 /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
64 /// or %es:(%edi) in 32bit mode.
65 bool isDstOp(X86Operand
&Op
);
68 // FIXME: Can tablegen auto-generate this?
69 return (STI
.getFeatureBits() & X86::Mode64Bit
) != 0;
72 /// @name Auto-generated Matcher Functions
75 #define GET_ASSEMBLER_HEADER
76 #include "X86GenAsmMatcher.inc"
81 X86ATTAsmParser(MCSubtargetInfo
&sti
, MCAsmParser
&parser
)
82 : TargetAsmParser(), STI(sti
), Parser(parser
) {
84 // Initialize the set of available features.
85 setAvailableFeatures(ComputeAvailableFeatures(STI
.getFeatureBits()));
87 virtual bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
);
89 virtual bool ParseInstruction(StringRef Name
, SMLoc NameLoc
,
90 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
);
92 virtual bool ParseDirective(AsmToken DirectiveID
);
94 } // end anonymous namespace
96 /// @name Auto-generated Match Functions
99 static unsigned MatchRegisterName(StringRef Name
);
105 /// X86Operand - Instances of this class represent a parsed X86 machine
107 struct X86Operand
: public MCParsedAsmOperand
{
115 SMLoc StartLoc
, EndLoc
;
140 X86Operand(KindTy K
, SMLoc Start
, SMLoc End
)
141 : Kind(K
), StartLoc(Start
), EndLoc(End
) {}
143 /// getStartLoc - Get the location of the first token of this operand.
144 SMLoc
getStartLoc() const { return StartLoc
; }
145 /// getEndLoc - Get the location of the last token of this operand.
146 SMLoc
getEndLoc() const { return EndLoc
; }
148 virtual void dump(raw_ostream
&OS
) const {}
150 StringRef
getToken() const {
151 assert(Kind
== Token
&& "Invalid access!");
152 return StringRef(Tok
.Data
, Tok
.Length
);
154 void setTokenValue(StringRef Value
) {
155 assert(Kind
== Token
&& "Invalid access!");
156 Tok
.Data
= Value
.data();
157 Tok
.Length
= Value
.size();
160 unsigned getReg() const {
161 assert(Kind
== Register
&& "Invalid access!");
165 const MCExpr
*getImm() const {
166 assert(Kind
== Immediate
&& "Invalid access!");
170 const MCExpr
*getMemDisp() const {
171 assert(Kind
== Memory
&& "Invalid access!");
174 unsigned getMemSegReg() const {
175 assert(Kind
== Memory
&& "Invalid access!");
178 unsigned getMemBaseReg() const {
179 assert(Kind
== Memory
&& "Invalid access!");
182 unsigned getMemIndexReg() const {
183 assert(Kind
== Memory
&& "Invalid access!");
186 unsigned getMemScale() const {
187 assert(Kind
== Memory
&& "Invalid access!");
191 bool isToken() const {return Kind
== Token
; }
193 bool isImm() const { return Kind
== Immediate
; }
195 bool isImmSExti16i8() const {
199 // If this isn't a constant expr, just assume it fits and let relaxation
201 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
205 // Otherwise, check the value is in a range that makes sense for this
207 uint64_t Value
= CE
->getValue();
208 return (( Value
<= 0x000000000000007FULL
)||
209 (0x000000000000FF80ULL
<= Value
&& Value
<= 0x000000000000FFFFULL
)||
210 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
212 bool isImmSExti32i8() const {
216 // If this isn't a constant expr, just assume it fits and let relaxation
218 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
222 // Otherwise, check the value is in a range that makes sense for this
224 uint64_t Value
= CE
->getValue();
225 return (( Value
<= 0x000000000000007FULL
)||
226 (0x00000000FFFFFF80ULL
<= Value
&& Value
<= 0x00000000FFFFFFFFULL
)||
227 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
229 bool isImmSExti64i8() const {
233 // If this isn't a constant expr, just assume it fits and let relaxation
235 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
239 // Otherwise, check the value is in a range that makes sense for this
241 uint64_t Value
= CE
->getValue();
242 return (( Value
<= 0x000000000000007FULL
)||
243 (0xFFFFFFFFFFFFFF80ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
245 bool isImmSExti64i32() const {
249 // If this isn't a constant expr, just assume it fits and let relaxation
251 const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm());
255 // Otherwise, check the value is in a range that makes sense for this
257 uint64_t Value
= CE
->getValue();
258 return (( Value
<= 0x000000007FFFFFFFULL
)||
259 (0xFFFFFFFF80000000ULL
<= Value
&& Value
<= 0xFFFFFFFFFFFFFFFFULL
));
262 bool isMem() const { return Kind
== Memory
; }
264 bool isAbsMem() const {
265 return Kind
== Memory
&& !getMemSegReg() && !getMemBaseReg() &&
266 !getMemIndexReg() && getMemScale() == 1;
269 bool isReg() const { return Kind
== Register
; }
271 void addExpr(MCInst
&Inst
, const MCExpr
*Expr
) const {
272 // Add as immediates when possible.
273 if (const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(Expr
))
274 Inst
.addOperand(MCOperand::CreateImm(CE
->getValue()));
276 Inst
.addOperand(MCOperand::CreateExpr(Expr
));
279 void addRegOperands(MCInst
&Inst
, unsigned N
) const {
280 assert(N
== 1 && "Invalid number of operands!");
281 Inst
.addOperand(MCOperand::CreateReg(getReg()));
284 void addImmOperands(MCInst
&Inst
, unsigned N
) const {
285 assert(N
== 1 && "Invalid number of operands!");
286 addExpr(Inst
, getImm());
289 void addMemOperands(MCInst
&Inst
, unsigned N
) const {
290 assert((N
== 5) && "Invalid number of operands!");
291 Inst
.addOperand(MCOperand::CreateReg(getMemBaseReg()));
292 Inst
.addOperand(MCOperand::CreateImm(getMemScale()));
293 Inst
.addOperand(MCOperand::CreateReg(getMemIndexReg()));
294 addExpr(Inst
, getMemDisp());
295 Inst
.addOperand(MCOperand::CreateReg(getMemSegReg()));
298 void addAbsMemOperands(MCInst
&Inst
, unsigned N
) const {
299 assert((N
== 1) && "Invalid number of operands!");
300 Inst
.addOperand(MCOperand::CreateExpr(getMemDisp()));
303 static X86Operand
*CreateToken(StringRef Str
, SMLoc Loc
) {
304 X86Operand
*Res
= new X86Operand(Token
, Loc
, Loc
);
305 Res
->Tok
.Data
= Str
.data();
306 Res
->Tok
.Length
= Str
.size();
310 static X86Operand
*CreateReg(unsigned RegNo
, SMLoc StartLoc
, SMLoc EndLoc
) {
311 X86Operand
*Res
= new X86Operand(Register
, StartLoc
, EndLoc
);
312 Res
->Reg
.RegNo
= RegNo
;
316 static X86Operand
*CreateImm(const MCExpr
*Val
, SMLoc StartLoc
, SMLoc EndLoc
){
317 X86Operand
*Res
= new X86Operand(Immediate
, StartLoc
, EndLoc
);
322 /// Create an absolute memory operand.
323 static X86Operand
*CreateMem(const MCExpr
*Disp
, SMLoc StartLoc
,
325 X86Operand
*Res
= new X86Operand(Memory
, StartLoc
, EndLoc
);
327 Res
->Mem
.Disp
= Disp
;
328 Res
->Mem
.BaseReg
= 0;
329 Res
->Mem
.IndexReg
= 0;
334 /// Create a generalized memory operand.
335 static X86Operand
*CreateMem(unsigned SegReg
, const MCExpr
*Disp
,
336 unsigned BaseReg
, unsigned IndexReg
,
337 unsigned Scale
, SMLoc StartLoc
, SMLoc EndLoc
) {
338 // We should never just have a displacement, that should be parsed as an
339 // absolute memory operand.
340 assert((SegReg
|| BaseReg
|| IndexReg
) && "Invalid memory operand!");
342 // The scale should always be one of {1,2,4,8}.
343 assert(((Scale
== 1 || Scale
== 2 || Scale
== 4 || Scale
== 8)) &&
345 X86Operand
*Res
= new X86Operand(Memory
, StartLoc
, EndLoc
);
346 Res
->Mem
.SegReg
= SegReg
;
347 Res
->Mem
.Disp
= Disp
;
348 Res
->Mem
.BaseReg
= BaseReg
;
349 Res
->Mem
.IndexReg
= IndexReg
;
350 Res
->Mem
.Scale
= Scale
;
355 } // end anonymous namespace.
357 bool X86ATTAsmParser::isSrcOp(X86Operand
&Op
) {
358 unsigned basereg
= is64Bit() ? X86::RSI
: X86::ESI
;
360 return (Op
.isMem() &&
361 (Op
.Mem
.SegReg
== 0 || Op
.Mem
.SegReg
== X86::DS
) &&
362 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
363 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
364 Op
.Mem
.BaseReg
== basereg
&& Op
.Mem
.IndexReg
== 0);
367 bool X86ATTAsmParser::isDstOp(X86Operand
&Op
) {
368 unsigned basereg
= is64Bit() ? X86::RDI
: X86::EDI
;
370 return Op
.isMem() && Op
.Mem
.SegReg
== X86::ES
&&
371 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
372 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
373 Op
.Mem
.BaseReg
== basereg
&& Op
.Mem
.IndexReg
== 0;
376 bool X86ATTAsmParser::ParseRegister(unsigned &RegNo
,
377 SMLoc
&StartLoc
, SMLoc
&EndLoc
) {
379 const AsmToken
&TokPercent
= Parser
.getTok();
380 assert(TokPercent
.is(AsmToken::Percent
) && "Invalid token kind!");
381 StartLoc
= TokPercent
.getLoc();
382 Parser
.Lex(); // Eat percent token.
384 const AsmToken
&Tok
= Parser
.getTok();
385 if (Tok
.isNot(AsmToken::Identifier
))
386 return Error(Tok
.getLoc(), "invalid register name");
388 // FIXME: Validate register for the current architecture; we have to do
389 // validation later, so maybe there is no need for this here.
390 RegNo
= MatchRegisterName(Tok
.getString());
392 // If the match failed, try the register name as lowercase.
394 RegNo
= MatchRegisterName(LowercaseString(Tok
.getString()));
396 // FIXME: This should be done using Requires<In32BitMode> and
397 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
398 // can be also checked.
399 if (RegNo
== X86::RIZ
&& !is64Bit())
400 return Error(Tok
.getLoc(), "riz register in 64-bit mode only");
402 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
403 if (RegNo
== 0 && (Tok
.getString() == "st" || Tok
.getString() == "ST")) {
405 EndLoc
= Tok
.getLoc();
406 Parser
.Lex(); // Eat 'st'
408 // Check to see if we have '(4)' after %st.
409 if (getLexer().isNot(AsmToken::LParen
))
414 const AsmToken
&IntTok
= Parser
.getTok();
415 if (IntTok
.isNot(AsmToken::Integer
))
416 return Error(IntTok
.getLoc(), "expected stack index");
417 switch (IntTok
.getIntVal()) {
418 case 0: RegNo
= X86::ST0
; break;
419 case 1: RegNo
= X86::ST1
; break;
420 case 2: RegNo
= X86::ST2
; break;
421 case 3: RegNo
= X86::ST3
; break;
422 case 4: RegNo
= X86::ST4
; break;
423 case 5: RegNo
= X86::ST5
; break;
424 case 6: RegNo
= X86::ST6
; break;
425 case 7: RegNo
= X86::ST7
; break;
426 default: return Error(IntTok
.getLoc(), "invalid stack index");
429 if (getParser().Lex().isNot(AsmToken::RParen
))
430 return Error(Parser
.getTok().getLoc(), "expected ')'");
432 EndLoc
= Tok
.getLoc();
433 Parser
.Lex(); // Eat ')'
437 // If this is "db[0-7]", match it as an alias
439 if (RegNo
== 0 && Tok
.getString().size() == 3 &&
440 Tok
.getString().startswith("db")) {
441 switch (Tok
.getString()[2]) {
442 case '0': RegNo
= X86::DR0
; break;
443 case '1': RegNo
= X86::DR1
; break;
444 case '2': RegNo
= X86::DR2
; break;
445 case '3': RegNo
= X86::DR3
; break;
446 case '4': RegNo
= X86::DR4
; break;
447 case '5': RegNo
= X86::DR5
; break;
448 case '6': RegNo
= X86::DR6
; break;
449 case '7': RegNo
= X86::DR7
; break;
453 EndLoc
= Tok
.getLoc();
454 Parser
.Lex(); // Eat it.
460 return Error(Tok
.getLoc(), "invalid register name");
462 EndLoc
= Tok
.getLoc();
463 Parser
.Lex(); // Eat identifier token.
467 X86Operand
*X86ATTAsmParser::ParseOperand() {
468 switch (getLexer().getKind()) {
470 // Parse a memory operand with no segment register.
471 return ParseMemOperand(0, Parser
.getTok().getLoc());
472 case AsmToken::Percent
: {
473 // Read the register.
476 if (ParseRegister(RegNo
, Start
, End
)) return 0;
477 if (RegNo
== X86::EIZ
|| RegNo
== X86::RIZ
) {
478 Error(Start
, "eiz and riz can only be used as index registers");
482 // If this is a segment register followed by a ':', then this is the start
483 // of a memory reference, otherwise this is a normal register reference.
484 if (getLexer().isNot(AsmToken::Colon
))
485 return X86Operand::CreateReg(RegNo
, Start
, End
);
488 getParser().Lex(); // Eat the colon.
489 return ParseMemOperand(RegNo
, Start
);
491 case AsmToken::Dollar
: {
493 SMLoc Start
= Parser
.getTok().getLoc(), End
;
496 if (getParser().ParseExpression(Val
, End
))
498 return X86Operand::CreateImm(Val
, Start
, End
);
503 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
504 /// has already been parsed if present.
505 X86Operand
*X86ATTAsmParser::ParseMemOperand(unsigned SegReg
, SMLoc MemStart
) {
507 // We have to disambiguate a parenthesized expression "(4+5)" from the start
508 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
509 // only way to do this without lookahead is to eat the '(' and see what is
511 const MCExpr
*Disp
= MCConstantExpr::Create(0, getParser().getContext());
512 if (getLexer().isNot(AsmToken::LParen
)) {
514 if (getParser().ParseExpression(Disp
, ExprEnd
)) return 0;
516 // After parsing the base expression we could either have a parenthesized
517 // memory address or not. If not, return now. If so, eat the (.
518 if (getLexer().isNot(AsmToken::LParen
)) {
519 // Unless we have a segment register, treat this as an immediate.
521 return X86Operand::CreateMem(Disp
, MemStart
, ExprEnd
);
522 return X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1, MemStart
, ExprEnd
);
528 // Okay, we have a '('. We don't know if this is an expression or not, but
529 // so we have to eat the ( to see beyond it.
530 SMLoc LParenLoc
= Parser
.getTok().getLoc();
531 Parser
.Lex(); // Eat the '('.
533 if (getLexer().is(AsmToken::Percent
) || getLexer().is(AsmToken::Comma
)) {
534 // Nothing to do here, fall into the code below with the '(' part of the
535 // memory operand consumed.
539 // It must be an parenthesized expression, parse it now.
540 if (getParser().ParseParenExpression(Disp
, ExprEnd
))
543 // After parsing the base expression we could either have a parenthesized
544 // memory address or not. If not, return now. If so, eat the (.
545 if (getLexer().isNot(AsmToken::LParen
)) {
546 // Unless we have a segment register, treat this as an immediate.
548 return X86Operand::CreateMem(Disp
, LParenLoc
, ExprEnd
);
549 return X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1, MemStart
, ExprEnd
);
557 // If we reached here, then we just ate the ( of the memory operand. Process
558 // the rest of the memory operand.
559 unsigned BaseReg
= 0, IndexReg
= 0, Scale
= 1;
561 if (getLexer().is(AsmToken::Percent
)) {
563 if (ParseRegister(BaseReg
, L
, L
)) return 0;
564 if (BaseReg
== X86::EIZ
|| BaseReg
== X86::RIZ
) {
565 Error(L
, "eiz and riz can only be used as index registers");
570 if (getLexer().is(AsmToken::Comma
)) {
571 Parser
.Lex(); // Eat the comma.
573 // Following the comma we should have either an index register, or a scale
574 // value. We don't support the later form, but we want to parse it
577 // Not that even though it would be completely consistent to support syntax
578 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
579 if (getLexer().is(AsmToken::Percent
)) {
581 if (ParseRegister(IndexReg
, L
, L
)) return 0;
583 if (getLexer().isNot(AsmToken::RParen
)) {
584 // Parse the scale amount:
585 // ::= ',' [scale-expression]
586 if (getLexer().isNot(AsmToken::Comma
)) {
587 Error(Parser
.getTok().getLoc(),
588 "expected comma in scale expression");
591 Parser
.Lex(); // Eat the comma.
593 if (getLexer().isNot(AsmToken::RParen
)) {
594 SMLoc Loc
= Parser
.getTok().getLoc();
597 if (getParser().ParseAbsoluteExpression(ScaleVal
))
600 // Validate the scale amount.
601 if (ScaleVal
!= 1 && ScaleVal
!= 2 && ScaleVal
!= 4 && ScaleVal
!= 8){
602 Error(Loc
, "scale factor in address must be 1, 2, 4 or 8");
605 Scale
= (unsigned)ScaleVal
;
608 } else if (getLexer().isNot(AsmToken::RParen
)) {
609 // A scale amount without an index is ignored.
611 SMLoc Loc
= Parser
.getTok().getLoc();
614 if (getParser().ParseAbsoluteExpression(Value
))
618 Warning(Loc
, "scale factor without index register is ignored");
623 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
624 if (getLexer().isNot(AsmToken::RParen
)) {
625 Error(Parser
.getTok().getLoc(), "unexpected token in memory operand");
628 SMLoc MemEnd
= Parser
.getTok().getLoc();
629 Parser
.Lex(); // Eat the ')'.
631 return X86Operand::CreateMem(SegReg
, Disp
, BaseReg
, IndexReg
, Scale
,
635 bool X86ATTAsmParser::
636 ParseInstruction(StringRef Name
, SMLoc NameLoc
,
637 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
) {
638 StringRef PatchedName
= Name
;
640 // FIXME: Hack to recognize setneb as setne.
641 if (PatchedName
.startswith("set") && PatchedName
.endswith("b") &&
642 PatchedName
!= "setb" && PatchedName
!= "setnb")
643 PatchedName
= PatchedName
.substr(0, Name
.size()-1);
645 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
646 const MCExpr
*ExtraImmOp
= 0;
647 if ((PatchedName
.startswith("cmp") || PatchedName
.startswith("vcmp")) &&
648 (PatchedName
.endswith("ss") || PatchedName
.endswith("sd") ||
649 PatchedName
.endswith("ps") || PatchedName
.endswith("pd"))) {
650 bool IsVCMP
= PatchedName
.startswith("vcmp");
651 unsigned SSECCIdx
= IsVCMP
? 4 : 3;
652 unsigned SSEComparisonCode
= StringSwitch
<unsigned>(
653 PatchedName
.slice(SSECCIdx
, PatchedName
.size() - 2))
666 .Case("neq_oq", 0x0C)
673 .Case("unord_s", 0x13)
674 .Case("neq_us", 0x14)
675 .Case("nlt_uq", 0x15)
676 .Case("nle_uq", 0x16)
679 .Case("nge_uq", 0x19)
680 .Case("ngt_uq", 0x1A)
681 .Case("false_os", 0x1B)
682 .Case("neq_os", 0x1C)
685 .Case("true_us", 0x1F)
687 if (SSEComparisonCode
!= ~0U) {
688 ExtraImmOp
= MCConstantExpr::Create(SSEComparisonCode
,
689 getParser().getContext());
690 if (PatchedName
.endswith("ss")) {
691 PatchedName
= IsVCMP
? "vcmpss" : "cmpss";
692 } else if (PatchedName
.endswith("sd")) {
693 PatchedName
= IsVCMP
? "vcmpsd" : "cmpsd";
694 } else if (PatchedName
.endswith("ps")) {
695 PatchedName
= IsVCMP
? "vcmpps" : "cmpps";
697 assert(PatchedName
.endswith("pd") && "Unexpected mnemonic!");
698 PatchedName
= IsVCMP
? "vcmppd" : "cmppd";
703 Operands
.push_back(X86Operand::CreateToken(PatchedName
, NameLoc
));
706 Operands
.push_back(X86Operand::CreateImm(ExtraImmOp
, NameLoc
, NameLoc
));
709 // Determine whether this is an instruction prefix.
711 Name
== "lock" || Name
== "rep" ||
712 Name
== "repe" || Name
== "repz" ||
713 Name
== "repne" || Name
== "repnz" ||
714 Name
== "rex64" || Name
== "data16";
717 // This does the actual operand parsing. Don't parse any more if we have a
718 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
719 // just want to parse the "lock" as the first instruction and the "incl" as
721 if (getLexer().isNot(AsmToken::EndOfStatement
) && !isPrefix
) {
723 // Parse '*' modifier.
724 if (getLexer().is(AsmToken::Star
)) {
725 SMLoc Loc
= Parser
.getTok().getLoc();
726 Operands
.push_back(X86Operand::CreateToken("*", Loc
));
727 Parser
.Lex(); // Eat the star.
730 // Read the first operand.
731 if (X86Operand
*Op
= ParseOperand())
732 Operands
.push_back(Op
);
734 Parser
.EatToEndOfStatement();
738 while (getLexer().is(AsmToken::Comma
)) {
739 Parser
.Lex(); // Eat the comma.
741 // Parse and remember the operand.
742 if (X86Operand
*Op
= ParseOperand())
743 Operands
.push_back(Op
);
745 Parser
.EatToEndOfStatement();
750 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
751 SMLoc Loc
= getLexer().getLoc();
752 Parser
.EatToEndOfStatement();
753 return Error(Loc
, "unexpected token in argument list");
757 if (getLexer().is(AsmToken::EndOfStatement
))
758 Parser
.Lex(); // Consume the EndOfStatement
759 else if (isPrefix
&& getLexer().is(AsmToken::Slash
))
760 Parser
.Lex(); // Consume the prefix separator Slash
762 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
763 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
764 // documented form in various unofficial manuals, so a lot of code uses it.
765 if ((Name
== "outb" || Name
== "outw" || Name
== "outl" || Name
== "out") &&
766 Operands
.size() == 3) {
767 X86Operand
&Op
= *(X86Operand
*)Operands
.back();
768 if (Op
.isMem() && Op
.Mem
.SegReg
== 0 &&
769 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
770 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
771 Op
.Mem
.BaseReg
== MatchRegisterName("dx") && Op
.Mem
.IndexReg
== 0) {
772 SMLoc Loc
= Op
.getEndLoc();
773 Operands
.back() = X86Operand::CreateReg(Op
.Mem
.BaseReg
, Loc
, Loc
);
777 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
778 if ((Name
== "inb" || Name
== "inw" || Name
== "inl" || Name
== "in") &&
779 Operands
.size() == 3) {
780 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
781 if (Op
.isMem() && Op
.Mem
.SegReg
== 0 &&
782 isa
<MCConstantExpr
>(Op
.Mem
.Disp
) &&
783 cast
<MCConstantExpr
>(Op
.Mem
.Disp
)->getValue() == 0 &&
784 Op
.Mem
.BaseReg
== MatchRegisterName("dx") && Op
.Mem
.IndexReg
== 0) {
785 SMLoc Loc
= Op
.getEndLoc();
786 Operands
.begin()[1] = X86Operand::CreateReg(Op
.Mem
.BaseReg
, Loc
, Loc
);
790 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
791 if (Name
.startswith("ins") && Operands
.size() == 3 &&
792 (Name
== "insb" || Name
== "insw" || Name
== "insl")) {
793 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
794 X86Operand
&Op2
= *(X86Operand
*)Operands
.begin()[2];
795 if (Op
.isReg() && Op
.getReg() == X86::DX
&& isDstOp(Op2
)) {
803 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
804 if (Name
.startswith("outs") && Operands
.size() == 3 &&
805 (Name
== "outsb" || Name
== "outsw" || Name
== "outsl")) {
806 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
807 X86Operand
&Op2
= *(X86Operand
*)Operands
.begin()[2];
808 if (isSrcOp(Op
) && Op2
.isReg() && Op2
.getReg() == X86::DX
) {
816 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
817 if (Name
.startswith("movs") && Operands
.size() == 3 &&
818 (Name
== "movsb" || Name
== "movsw" || Name
== "movsl" ||
819 (is64Bit() && Name
== "movsq"))) {
820 X86Operand
&Op
= *(X86Operand
*)Operands
.begin()[1];
821 X86Operand
&Op2
= *(X86Operand
*)Operands
.begin()[2];
822 if (isSrcOp(Op
) && isDstOp(Op2
)) {
829 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
830 if (Name
.startswith("lods") && Operands
.size() == 3 &&
831 (Name
== "lods" || Name
== "lodsb" || Name
== "lodsw" ||
832 Name
== "lodsl" || (is64Bit() && Name
== "lodsq"))) {
833 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
834 X86Operand
*Op2
= static_cast<X86Operand
*>(Operands
[2]);
835 if (isSrcOp(*Op1
) && Op2
->isReg()) {
837 unsigned reg
= Op2
->getReg();
838 bool isLods
= Name
== "lods";
839 if (reg
== X86::AL
&& (isLods
|| Name
== "lodsb"))
841 else if (reg
== X86::AX
&& (isLods
|| Name
== "lodsw"))
843 else if (reg
== X86::EAX
&& (isLods
|| Name
== "lodsl"))
845 else if (reg
== X86::RAX
&& (isLods
|| Name
== "lodsq"))
855 static_cast<X86Operand
*>(Operands
[0])->setTokenValue(ins
);
859 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
860 if (Name
.startswith("stos") && Operands
.size() == 3 &&
861 (Name
== "stos" || Name
== "stosb" || Name
== "stosw" ||
862 Name
== "stosl" || (is64Bit() && Name
== "stosq"))) {
863 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
864 X86Operand
*Op2
= static_cast<X86Operand
*>(Operands
[2]);
865 if (isDstOp(*Op2
) && Op1
->isReg()) {
867 unsigned reg
= Op1
->getReg();
868 bool isStos
= Name
== "stos";
869 if (reg
== X86::AL
&& (isStos
|| Name
== "stosb"))
871 else if (reg
== X86::AX
&& (isStos
|| Name
== "stosw"))
873 else if (reg
== X86::EAX
&& (isStos
|| Name
== "stosl"))
875 else if (reg
== X86::RAX
&& (isStos
|| Name
== "stosq"))
885 static_cast<X86Operand
*>(Operands
[0])->setTokenValue(ins
);
890 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
892 if ((Name
.startswith("shr") || Name
.startswith("sar") ||
893 Name
.startswith("shl") || Name
.startswith("sal") ||
894 Name
.startswith("rcl") || Name
.startswith("rcr") ||
895 Name
.startswith("rol") || Name
.startswith("ror")) &&
896 Operands
.size() == 3) {
897 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
898 if (Op1
->isImm() && isa
<MCConstantExpr
>(Op1
->getImm()) &&
899 cast
<MCConstantExpr
>(Op1
->getImm())->getValue() == 1) {
901 Operands
.erase(Operands
.begin() + 1);
905 // Transforms "int $3" into "int3" as a size optimization. We can't write an
906 // instalias with an immediate operand yet.
907 if (Name
== "int" && Operands
.size() == 2) {
908 X86Operand
*Op1
= static_cast<X86Operand
*>(Operands
[1]);
909 if (Op1
->isImm() && isa
<MCConstantExpr
>(Op1
->getImm()) &&
910 cast
<MCConstantExpr
>(Op1
->getImm())->getValue() == 3) {
912 Operands
.erase(Operands
.begin() + 1);
913 static_cast<X86Operand
*>(Operands
[0])->setTokenValue("int3");
920 bool X86ATTAsmParser::
921 MatchAndEmitInstruction(SMLoc IDLoc
,
922 SmallVectorImpl
<MCParsedAsmOperand
*> &Operands
,
924 assert(!Operands
.empty() && "Unexpect empty operand list!");
925 X86Operand
*Op
= static_cast<X86Operand
*>(Operands
[0]);
926 assert(Op
->isToken() && "Leading operand should always be a mnemonic!");
928 // First, handle aliases that expand to multiple instructions.
929 // FIXME: This should be replaced with a real .td file alias mechanism.
930 // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
932 if (Op
->getToken() == "fstsw" || Op
->getToken() == "fstcw" ||
933 Op
->getToken() == "fstsww" || Op
->getToken() == "fstcww" ||
934 Op
->getToken() == "finit" || Op
->getToken() == "fsave" ||
935 Op
->getToken() == "fstenv" || Op
->getToken() == "fclex") {
937 Inst
.setOpcode(X86::WAIT
);
938 Out
.EmitInstruction(Inst
);
941 StringSwitch
<const char*>(Op
->getToken())
942 .Case("finit", "fninit")
943 .Case("fsave", "fnsave")
944 .Case("fstcw", "fnstcw")
945 .Case("fstcww", "fnstcw")
946 .Case("fstenv", "fnstenv")
947 .Case("fstsw", "fnstsw")
948 .Case("fstsww", "fnstsw")
949 .Case("fclex", "fnclex")
951 assert(Repl
&& "Unknown wait-prefixed instruction");
953 Operands
[0] = X86Operand::CreateToken(Repl
, IDLoc
);
956 bool WasOriginallyInvalidOperand
= false;
957 unsigned OrigErrorInfo
;
960 // First, try a direct match.
961 switch (MatchInstructionImpl(Operands
, Inst
, OrigErrorInfo
)) {
963 Out
.EmitInstruction(Inst
);
965 case Match_MissingFeature
:
966 Error(IDLoc
, "instruction requires a CPU feature not currently enabled");
968 case Match_ConversionFail
:
969 return Error(IDLoc
, "unable to convert operands to instruction");
970 case Match_InvalidOperand
:
971 WasOriginallyInvalidOperand
= true;
973 case Match_MnemonicFail
:
977 // FIXME: Ideally, we would only attempt suffix matches for things which are
978 // valid prefixes, and we could just infer the right unambiguous
979 // type. However, that requires substantially more matcher support than the
982 // Change the operand to point to a temporary token.
983 StringRef Base
= Op
->getToken();
987 Op
->setTokenValue(Tmp
.str());
989 // If this instruction starts with an 'f', then it is a floating point stack
990 // instruction. These come in up to three forms for 32-bit, 64-bit, and
991 // 80-bit floating point, which use the suffixes s,l,t respectively.
993 // Otherwise, we assume that this may be an integer instruction, which comes
994 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
995 const char *Suffixes
= Base
[0] != 'f' ? "bwlq" : "slt\0";
997 // Check for the various suffix matches.
998 Tmp
[Base
.size()] = Suffixes
[0];
999 unsigned ErrorInfoIgnore
;
1000 MatchResultTy Match1
, Match2
, Match3
, Match4
;
1002 Match1
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1003 Tmp
[Base
.size()] = Suffixes
[1];
1004 Match2
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1005 Tmp
[Base
.size()] = Suffixes
[2];
1006 Match3
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1007 Tmp
[Base
.size()] = Suffixes
[3];
1008 Match4
= MatchInstructionImpl(Operands
, Inst
, ErrorInfoIgnore
);
1010 // Restore the old token.
1011 Op
->setTokenValue(Base
);
1013 // If exactly one matched, then we treat that as a successful match (and the
1014 // instruction will already have been filled in correctly, since the failing
1015 // matches won't have modified it).
1016 unsigned NumSuccessfulMatches
=
1017 (Match1
== Match_Success
) + (Match2
== Match_Success
) +
1018 (Match3
== Match_Success
) + (Match4
== Match_Success
);
1019 if (NumSuccessfulMatches
== 1) {
1020 Out
.EmitInstruction(Inst
);
1024 // Otherwise, the match failed, try to produce a decent error message.
1026 // If we had multiple suffix matches, then identify this as an ambiguous
1028 if (NumSuccessfulMatches
> 1) {
1030 unsigned NumMatches
= 0;
1031 if (Match1
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[0];
1032 if (Match2
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[1];
1033 if (Match3
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[2];
1034 if (Match4
== Match_Success
) MatchChars
[NumMatches
++] = Suffixes
[3];
1036 SmallString
<126> Msg
;
1037 raw_svector_ostream
OS(Msg
);
1038 OS
<< "ambiguous instructions require an explicit suffix (could be ";
1039 for (unsigned i
= 0; i
!= NumMatches
; ++i
) {
1042 if (i
+ 1 == NumMatches
)
1044 OS
<< "'" << Base
<< MatchChars
[i
] << "'";
1047 Error(IDLoc
, OS
.str());
1051 // Okay, we know that none of the variants matched successfully.
1053 // If all of the instructions reported an invalid mnemonic, then the original
1054 // mnemonic was invalid.
1055 if ((Match1
== Match_MnemonicFail
) && (Match2
== Match_MnemonicFail
) &&
1056 (Match3
== Match_MnemonicFail
) && (Match4
== Match_MnemonicFail
)) {
1057 if (!WasOriginallyInvalidOperand
) {
1058 Error(IDLoc
, "invalid instruction mnemonic '" + Base
+ "'");
1062 // Recover location info for the operand if we know which was the problem.
1063 SMLoc ErrorLoc
= IDLoc
;
1064 if (OrigErrorInfo
!= ~0U) {
1065 if (OrigErrorInfo
>= Operands
.size())
1066 return Error(IDLoc
, "too few operands for instruction");
1068 ErrorLoc
= ((X86Operand
*)Operands
[OrigErrorInfo
])->getStartLoc();
1069 if (ErrorLoc
== SMLoc()) ErrorLoc
= IDLoc
;
1072 return Error(ErrorLoc
, "invalid operand for instruction");
1075 // If one instruction matched with a missing feature, report this as a
1077 if ((Match1
== Match_MissingFeature
) + (Match2
== Match_MissingFeature
) +
1078 (Match3
== Match_MissingFeature
) + (Match4
== Match_MissingFeature
) == 1){
1079 Error(IDLoc
, "instruction requires a CPU feature not currently enabled");
1083 // If one instruction matched with an invalid operand, report this as an
1085 if ((Match1
== Match_InvalidOperand
) + (Match2
== Match_InvalidOperand
) +
1086 (Match3
== Match_InvalidOperand
) + (Match4
== Match_InvalidOperand
) == 1){
1087 Error(IDLoc
, "invalid operand for instruction");
1091 // If all of these were an outright failure, report it in a useless way.
1092 // FIXME: We should give nicer diagnostics about the exact failure.
1093 Error(IDLoc
, "unknown use of instruction mnemonic without a size suffix");
1098 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID
) {
1099 StringRef IDVal
= DirectiveID
.getIdentifier();
1100 if (IDVal
== ".word")
1101 return ParseDirectiveWord(2, DirectiveID
.getLoc());
1105 /// ParseDirectiveWord
1106 /// ::= .word [ expression (, expression)* ]
1107 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size
, SMLoc L
) {
1108 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
1110 const MCExpr
*Value
;
1111 if (getParser().ParseExpression(Value
))
1114 getParser().getStreamer().EmitValue(Value
, Size
, 0 /*addrspace*/);
1116 if (getLexer().is(AsmToken::EndOfStatement
))
1119 // FIXME: Improve diagnostic.
1120 if (getLexer().isNot(AsmToken::Comma
))
1121 return Error(L
, "unexpected token in directive");
1133 extern "C" void LLVMInitializeX86AsmLexer();
1135 // Force static initialization.
1136 extern "C" void LLVMInitializeX86AsmParser() {
1137 RegisterAsmParser
<X86ATTAsmParser
> X(TheX86_32Target
);
1138 RegisterAsmParser
<X86ATTAsmParser
> Y(TheX86_64Target
);
1139 LLVMInitializeX86AsmLexer();
1142 #define GET_REGISTER_MATCHER
1143 #define GET_MATCHER_IMPLEMENTATION
1144 #include "X86GenAsmMatcher.inc"