1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "llvm/ADT/SmallVector.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/MC/MCAsmLexer.h"
14 #include "llvm/MC/MCAsmParser.h"
15 #include "llvm/MC/MCStreamer.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/SourceMgr.h"
19 #include "llvm/Target/TargetRegistry.h"
20 #include "llvm/Target/TargetAsmParser.h"
26 class X86ATTAsmParser
: public TargetAsmParser
{
30 MCAsmParser
&getParser() const { return Parser
; }
32 MCAsmLexer
&getLexer() const { return Parser
.getLexer(); }
34 void Warning(SMLoc L
, const Twine
&Msg
) { Parser
.Warning(L
, Msg
); }
36 bool Error(SMLoc L
, const Twine
&Msg
) { return Parser
.Error(L
, Msg
); }
38 bool ParseRegister(X86Operand
&Op
);
40 bool ParseOperand(X86Operand
&Op
);
42 bool ParseMemOperand(X86Operand
&Op
);
44 bool ParseDirectiveWord(unsigned Size
, SMLoc L
);
46 /// @name Auto-generated Match Functions
49 bool MatchInstruction(SmallVectorImpl
<X86Operand
> &Operands
,
52 /// MatchRegisterName - Match the given string to a register name, or 0 if
53 /// there is no match.
54 unsigned MatchRegisterName(const StringRef
&Name
);
59 X86ATTAsmParser(const Target
&T
, MCAsmParser
&_Parser
)
60 : TargetAsmParser(T
), Parser(_Parser
) {}
62 virtual bool ParseInstruction(const StringRef
&Name
, MCInst
&Inst
);
64 virtual bool ParseDirective(AsmToken DirectiveID
);
67 } // end anonymous namespace
72 /// X86Operand - Instances of this class represent a parsed X86 machine
105 StringRef
getToken() const {
106 assert(Kind
== Token
&& "Invalid access!");
107 return StringRef(Tok
.Data
, Tok
.Length
);
110 unsigned getReg() const {
111 assert(Kind
== Register
&& "Invalid access!");
115 const MCExpr
*getImm() const {
116 assert(Kind
== Immediate
&& "Invalid access!");
120 const MCExpr
*getMemDisp() const {
121 assert(Kind
== Memory
&& "Invalid access!");
124 unsigned getMemSegReg() const {
125 assert(Kind
== Memory
&& "Invalid access!");
128 unsigned getMemBaseReg() const {
129 assert(Kind
== Memory
&& "Invalid access!");
132 unsigned getMemIndexReg() const {
133 assert(Kind
== Memory
&& "Invalid access!");
136 unsigned getMemScale() const {
137 assert(Kind
== Memory
&& "Invalid access!");
141 bool isToken() const {return Kind
== Token
; }
143 bool isImm() const { return Kind
== Immediate
; }
145 bool isImmSExt8() const {
146 // Accept immediates which fit in 8 bits when sign extended, and
147 // non-absolute immediates.
151 if (const MCConstantExpr
*CE
= dyn_cast
<MCConstantExpr
>(getImm())) {
152 int64_t Value
= CE
->getValue();
153 return Value
== (int64_t) (int8_t) Value
;
159 bool isMem() const { return Kind
== Memory
; }
161 bool isReg() const { return Kind
== Register
; }
163 void addRegOperands(MCInst
&Inst
, unsigned N
) const {
164 assert(N
== 1 && "Invalid number of operands!");
165 Inst
.addOperand(MCOperand::CreateReg(getReg()));
168 void addImmOperands(MCInst
&Inst
, unsigned N
) const {
169 assert(N
== 1 && "Invalid number of operands!");
170 Inst
.addOperand(MCOperand::CreateExpr(getImm()));
173 void addImmSExt8Operands(MCInst
&Inst
, unsigned N
) const {
174 // FIXME: Support user customization of the render method.
175 assert(N
== 1 && "Invalid number of operands!");
176 Inst
.addOperand(MCOperand::CreateExpr(getImm()));
179 void addMemOperands(MCInst
&Inst
, unsigned N
) const {
180 assert((N
== 4 || N
== 5) && "Invalid number of operands!");
182 Inst
.addOperand(MCOperand::CreateReg(getMemBaseReg()));
183 Inst
.addOperand(MCOperand::CreateImm(getMemScale()));
184 Inst
.addOperand(MCOperand::CreateReg(getMemIndexReg()));
185 Inst
.addOperand(MCOperand::CreateExpr(getMemDisp()));
187 // FIXME: What a hack.
189 Inst
.addOperand(MCOperand::CreateReg(getMemSegReg()));
192 static X86Operand
CreateToken(StringRef Str
) {
195 Res
.Tok
.Data
= Str
.data();
196 Res
.Tok
.Length
= Str
.size();
200 static X86Operand
CreateReg(unsigned RegNo
) {
203 Res
.Reg
.RegNo
= RegNo
;
207 static X86Operand
CreateImm(const MCExpr
*Val
) {
209 Res
.Kind
= Immediate
;
214 static X86Operand
CreateMem(unsigned SegReg
, const MCExpr
*Disp
,
215 unsigned BaseReg
, unsigned IndexReg
,
217 // We should never just have a displacement, that would be an immediate.
218 assert((SegReg
|| BaseReg
|| IndexReg
) && "Invalid memory operand!");
220 // The scale should always be one of {1,2,4,8}.
221 assert(((Scale
== 1 || Scale
== 2 || Scale
== 4 || Scale
== 8)) &&
225 Res
.Mem
.SegReg
= SegReg
;
227 Res
.Mem
.BaseReg
= BaseReg
;
228 Res
.Mem
.IndexReg
= IndexReg
;
229 Res
.Mem
.Scale
= Scale
;
234 } // end anonymous namespace.
237 bool X86ATTAsmParser::ParseRegister(X86Operand
&Op
) {
238 const AsmToken
&TokPercent
= getLexer().getTok();
239 (void)TokPercent
; // Avoid warning when assertions are disabled.
240 assert(TokPercent
.is(AsmToken::Percent
) && "Invalid token kind!");
241 getLexer().Lex(); // Eat percent token.
243 const AsmToken
&Tok
= getLexer().getTok();
244 assert(TokPercent
.is(AsmToken::Identifier
) && "Invalid token kind!");
246 // FIXME: Validate register for the current architecture; we have to do
247 // validation later, so maybe there is no need for this here.
250 RegNo
= MatchRegisterName(Tok
.getString());
252 return Error(Tok
.getLoc(), "invalid register name");
254 Op
= X86Operand::CreateReg(RegNo
);
255 getLexer().Lex(); // Eat identifier token.
260 bool X86ATTAsmParser::ParseOperand(X86Operand
&Op
) {
261 switch (getLexer().getKind()) {
263 return ParseMemOperand(Op
);
264 case AsmToken::Percent
:
265 // FIXME: if a segment register, this could either be just the seg reg, or
266 // the start of a memory operand.
267 return ParseRegister(Op
);
268 case AsmToken::Dollar
: {
272 if (getParser().ParseExpression(Val
))
274 Op
= X86Operand::CreateImm(Val
);
280 /// ParseMemOperand: segment: disp(basereg, indexreg, scale)
281 bool X86ATTAsmParser::ParseMemOperand(X86Operand
&Op
) {
282 // FIXME: If SegReg ':' (e.g. %gs:), eat and remember.
285 // We have to disambiguate a parenthesized expression "(4+5)" from the start
286 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
287 // only way to do this without lookahead is to eat the ( and see what is after
289 const MCExpr
*Disp
= MCConstantExpr::Create(0, getParser().getContext());
290 if (getLexer().isNot(AsmToken::LParen
)) {
291 if (getParser().ParseExpression(Disp
)) return true;
293 // After parsing the base expression we could either have a parenthesized
294 // memory address or not. If not, return now. If so, eat the (.
295 if (getLexer().isNot(AsmToken::LParen
)) {
296 // Unless we have a segment register, treat this as an immediate.
298 Op
= X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1);
300 Op
= X86Operand::CreateImm(Disp
);
307 // Okay, we have a '('. We don't know if this is an expression or not, but
308 // so we have to eat the ( to see beyond it.
309 getLexer().Lex(); // Eat the '('.
311 if (getLexer().is(AsmToken::Percent
) || getLexer().is(AsmToken::Comma
)) {
312 // Nothing to do here, fall into the code below with the '(' part of the
313 // memory operand consumed.
315 // It must be an parenthesized expression, parse it now.
316 if (getParser().ParseParenExpression(Disp
))
319 // After parsing the base expression we could either have a parenthesized
320 // memory address or not. If not, return now. If so, eat the (.
321 if (getLexer().isNot(AsmToken::LParen
)) {
322 // Unless we have a segment register, treat this as an immediate.
324 Op
= X86Operand::CreateMem(SegReg
, Disp
, 0, 0, 1);
326 Op
= X86Operand::CreateImm(Disp
);
335 // If we reached here, then we just ate the ( of the memory operand. Process
336 // the rest of the memory operand.
337 unsigned BaseReg
= 0, IndexReg
= 0, Scale
= 1;
339 if (getLexer().is(AsmToken::Percent
)) {
340 if (ParseRegister(Op
))
342 BaseReg
= Op
.getReg();
345 if (getLexer().is(AsmToken::Comma
)) {
346 getLexer().Lex(); // Eat the comma.
348 // Following the comma we should have either an index register, or a scale
349 // value. We don't support the later form, but we want to parse it
352 // Not that even though it would be completely consistent to support syntax
353 // like "1(%eax,,1)", the assembler doesn't.
354 if (getLexer().is(AsmToken::Percent
)) {
355 if (ParseRegister(Op
))
357 IndexReg
= Op
.getReg();
359 if (getLexer().isNot(AsmToken::RParen
)) {
360 // Parse the scale amount:
361 // ::= ',' [scale-expression]
362 if (getLexer().isNot(AsmToken::Comma
))
364 getLexer().Lex(); // Eat the comma.
366 if (getLexer().isNot(AsmToken::RParen
)) {
367 SMLoc Loc
= getLexer().getTok().getLoc();
370 if (getParser().ParseAbsoluteExpression(ScaleVal
))
373 // Validate the scale amount.
374 if (ScaleVal
!= 1 && ScaleVal
!= 2 && ScaleVal
!= 4 && ScaleVal
!= 8)
375 return Error(Loc
, "scale factor in address must be 1, 2, 4 or 8");
376 Scale
= (unsigned)ScaleVal
;
379 } else if (getLexer().isNot(AsmToken::RParen
)) {
380 // Otherwise we have the unsupported form of a scale amount without an
382 SMLoc Loc
= getLexer().getTok().getLoc();
385 if (getParser().ParseAbsoluteExpression(Value
))
388 return Error(Loc
, "cannot have scale factor without index register");
392 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
393 if (getLexer().isNot(AsmToken::RParen
))
394 return Error(getLexer().getTok().getLoc(),
395 "unexpected token in memory operand");
396 getLexer().Lex(); // Eat the ')'.
398 Op
= X86Operand::CreateMem(SegReg
, Disp
, BaseReg
, IndexReg
, Scale
);
402 bool X86ATTAsmParser::ParseInstruction(const StringRef
&Name
, MCInst
&Inst
) {
403 SmallVector
<X86Operand
, 8> Operands
;
405 Operands
.push_back(X86Operand::CreateToken(Name
));
407 SMLoc Loc
= getLexer().getTok().getLoc();
408 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
410 // Parse '*' modifier.
411 if (getLexer().is(AsmToken::Star
)) {
412 getLexer().Lex(); // Eat the star.
413 Operands
.push_back(X86Operand::CreateToken("*"));
416 // Read the first operand.
417 Operands
.push_back(X86Operand());
418 if (ParseOperand(Operands
.back()))
421 while (getLexer().is(AsmToken::Comma
)) {
422 getLexer().Lex(); // Eat the comma.
424 // Parse and remember the operand.
425 Operands
.push_back(X86Operand());
426 if (ParseOperand(Operands
.back()))
431 if (!MatchInstruction(Operands
, Inst
))
434 // FIXME: We should give nicer diagnostics about the exact failure.
436 Error(Loc
, "unrecognized instruction");
440 bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID
) {
441 StringRef IDVal
= DirectiveID
.getIdentifier();
442 if (IDVal
== ".word")
443 return ParseDirectiveWord(2, DirectiveID
.getLoc());
447 /// ParseDirectiveWord
448 /// ::= .word [ expression (, expression)* ]
449 bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size
, SMLoc L
) {
450 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
453 if (getParser().ParseExpression(Value
))
456 getParser().getStreamer().EmitValue(Value
, Size
);
458 if (getLexer().is(AsmToken::EndOfStatement
))
461 // FIXME: Improve diagnostic.
462 if (getLexer().isNot(AsmToken::Comma
))
463 return Error(L
, "unexpected token in directive");
472 // Force static initialization.
473 extern "C" void LLVMInitializeX86AsmParser() {
474 RegisterAsmParser
<X86ATTAsmParser
> X(TheX86_32Target
);
475 RegisterAsmParser
<X86ATTAsmParser
> Y(TheX86_64Target
);
478 #include "X86GenAsmMatcher.inc"