[PowerPC] Materialize more constants with CR-field set in late peephole
[llvm-core.git] / lib / Target / AMDGPU / AsmParser / AMDGPUAsmParser.cpp
blob31e2885c833d9f02c2db027acfa36f3577b1f4cf
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
10 #include "AMDGPU.h"
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "SIInstrInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetRegistry.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <cstring>
58 #include <iterator>
59 #include <map>
60 #include <memory>
61 #include <string>
63 using namespace llvm;
64 using namespace llvm::AMDGPU;
65 using namespace llvm::amdhsa;
67 namespace {
69 class AMDGPUAsmParser;
71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
73 //===----------------------------------------------------------------------===//
74 // Operand
75 //===----------------------------------------------------------------------===//
77 class AMDGPUOperand : public MCParsedAsmOperand {
78 enum KindTy {
79 Token,
80 Immediate,
81 Register,
82 Expression
83 } Kind;
85 SMLoc StartLoc, EndLoc;
86 const AMDGPUAsmParser *AsmParser;
88 public:
89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 using Ptr = std::unique_ptr<AMDGPUOperand>;
94 struct Modifiers {
95 bool Abs = false;
96 bool Neg = false;
97 bool Sext = false;
99 bool hasFPModifiers() const { return Abs || Neg; }
100 bool hasIntModifiers() const { return Sext; }
101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 int64_t getFPModifiersOperand() const {
104 int64_t Operand = 0;
105 Operand |= Abs ? SISrcMods::ABS : 0;
106 Operand |= Neg ? SISrcMods::NEG : 0;
107 return Operand;
110 int64_t getIntModifiersOperand() const {
111 int64_t Operand = 0;
112 Operand |= Sext ? SISrcMods::SEXT : 0;
113 return Operand;
116 int64_t getModifiersOperand() const {
117 assert(!(hasFPModifiers() && hasIntModifiers())
118 && "fp and int modifiers should not be used simultaneously");
119 if (hasFPModifiers()) {
120 return getFPModifiersOperand();
121 } else if (hasIntModifiers()) {
122 return getIntModifiersOperand();
123 } else {
124 return 0;
128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
131 enum ImmTy {
132 ImmTyNone,
133 ImmTyGDS,
134 ImmTyLDS,
135 ImmTyOffen,
136 ImmTyIdxen,
137 ImmTyAddr64,
138 ImmTyOffset,
139 ImmTyInstOffset,
140 ImmTyOffset0,
141 ImmTyOffset1,
142 ImmTyGLC,
143 ImmTySLC,
144 ImmTyTFE,
145 ImmTyD16,
146 ImmTyClampSI,
147 ImmTyOModSI,
148 ImmTyDppCtrl,
149 ImmTyDppRowMask,
150 ImmTyDppBankMask,
151 ImmTyDppBoundCtrl,
152 ImmTySdwaDstSel,
153 ImmTySdwaSrc0Sel,
154 ImmTySdwaSrc1Sel,
155 ImmTySdwaDstUnused,
156 ImmTyDMask,
157 ImmTyUNorm,
158 ImmTyDA,
159 ImmTyR128,
160 ImmTyLWE,
161 ImmTyExpTgt,
162 ImmTyExpCompr,
163 ImmTyExpVM,
164 ImmTyDFMT,
165 ImmTyNFMT,
166 ImmTyHwreg,
167 ImmTyOff,
168 ImmTySendMsg,
169 ImmTyInterpSlot,
170 ImmTyInterpAttr,
171 ImmTyAttrChan,
172 ImmTyOpSel,
173 ImmTyOpSelHi,
174 ImmTyNegLo,
175 ImmTyNegHi,
176 ImmTySwizzle,
177 ImmTyHigh
180 struct TokOp {
181 const char *Data;
182 unsigned Length;
185 struct ImmOp {
186 int64_t Val;
187 ImmTy Type;
188 bool IsFPImm;
189 Modifiers Mods;
192 struct RegOp {
193 unsigned RegNo;
194 bool IsForcedVOP3;
195 Modifiers Mods;
198 union {
199 TokOp Tok;
200 ImmOp Imm;
201 RegOp Reg;
202 const MCExpr *Expr;
205 bool isToken() const override {
206 if (Kind == Token)
207 return true;
209 if (Kind != Expression || !Expr)
210 return false;
212 // When parsing operands, we can't always tell if something was meant to be
213 // a token, like 'gds', or an expression that references a global variable.
214 // In this case, we assume the string is an expression, and if we need to
215 // interpret is a token, then we treat the symbol name as the token.
216 return isa<MCSymbolRefExpr>(Expr);
219 bool isImm() const override {
220 return Kind == Immediate;
223 bool isInlinableImm(MVT type) const;
224 bool isLiteralImm(MVT type) const;
226 bool isRegKind() const {
227 return Kind == Register;
230 bool isReg() const override {
231 return isRegKind() && !hasModifiers();
234 bool isRegOrImmWithInputMods(MVT type) const {
235 return isRegKind() || isInlinableImm(type);
238 bool isRegOrImmWithInt16InputMods() const {
239 return isRegOrImmWithInputMods(MVT::i16);
242 bool isRegOrImmWithInt32InputMods() const {
243 return isRegOrImmWithInputMods(MVT::i32);
246 bool isRegOrImmWithInt64InputMods() const {
247 return isRegOrImmWithInputMods(MVT::i64);
250 bool isRegOrImmWithFP16InputMods() const {
251 return isRegOrImmWithInputMods(MVT::f16);
254 bool isRegOrImmWithFP32InputMods() const {
255 return isRegOrImmWithInputMods(MVT::f32);
258 bool isRegOrImmWithFP64InputMods() const {
259 return isRegOrImmWithInputMods(MVT::f64);
262 bool isVReg() const {
263 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
264 isRegClass(AMDGPU::VReg_64RegClassID) ||
265 isRegClass(AMDGPU::VReg_96RegClassID) ||
266 isRegClass(AMDGPU::VReg_128RegClassID) ||
267 isRegClass(AMDGPU::VReg_256RegClassID) ||
268 isRegClass(AMDGPU::VReg_512RegClassID);
271 bool isVReg32OrOff() const {
272 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
275 bool isSDWAOperand(MVT type) const;
276 bool isSDWAFP16Operand() const;
277 bool isSDWAFP32Operand() const;
278 bool isSDWAInt16Operand() const;
279 bool isSDWAInt32Operand() const;
281 bool isImmTy(ImmTy ImmT) const {
282 return isImm() && Imm.Type == ImmT;
285 bool isImmModifier() const {
286 return isImm() && Imm.Type != ImmTyNone;
289 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
290 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
291 bool isDMask() const { return isImmTy(ImmTyDMask); }
292 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
293 bool isDA() const { return isImmTy(ImmTyDA); }
294 bool isR128() const { return isImmTy(ImmTyR128); }
295 bool isLWE() const { return isImmTy(ImmTyLWE); }
296 bool isOff() const { return isImmTy(ImmTyOff); }
297 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
298 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
299 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
300 bool isOffen() const { return isImmTy(ImmTyOffen); }
301 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
302 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
303 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
304 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
305 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
307 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
308 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
309 bool isGDS() const { return isImmTy(ImmTyGDS); }
310 bool isLDS() const { return isImmTy(ImmTyLDS); }
311 bool isGLC() const { return isImmTy(ImmTyGLC); }
312 bool isSLC() const { return isImmTy(ImmTySLC); }
313 bool isTFE() const { return isImmTy(ImmTyTFE); }
314 bool isD16() const { return isImmTy(ImmTyD16); }
315 bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
316 bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
317 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
318 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
319 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
320 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
321 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
322 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
323 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
324 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
325 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
326 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
327 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
328 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
329 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
330 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
331 bool isHigh() const { return isImmTy(ImmTyHigh); }
333 bool isMod() const {
334 return isClampSI() || isOModSI();
337 bool isRegOrImm() const {
338 return isReg() || isImm();
341 bool isRegClass(unsigned RCID) const;
343 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
344 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
347 bool isSCSrcB16() const {
348 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
351 bool isSCSrcV2B16() const {
352 return isSCSrcB16();
355 bool isSCSrcB32() const {
356 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
359 bool isSCSrcB64() const {
360 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
363 bool isSCSrcF16() const {
364 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
367 bool isSCSrcV2F16() const {
368 return isSCSrcF16();
371 bool isSCSrcF32() const {
372 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
375 bool isSCSrcF64() const {
376 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
379 bool isSSrcB32() const {
380 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
383 bool isSSrcB16() const {
384 return isSCSrcB16() || isLiteralImm(MVT::i16);
387 bool isSSrcV2B16() const {
388 llvm_unreachable("cannot happen");
389 return isSSrcB16();
392 bool isSSrcB64() const {
393 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
394 // See isVSrc64().
395 return isSCSrcB64() || isLiteralImm(MVT::i64);
398 bool isSSrcF32() const {
399 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
402 bool isSSrcF64() const {
403 return isSCSrcB64() || isLiteralImm(MVT::f64);
406 bool isSSrcF16() const {
407 return isSCSrcB16() || isLiteralImm(MVT::f16);
410 bool isSSrcV2F16() const {
411 llvm_unreachable("cannot happen");
412 return isSSrcF16();
415 bool isVCSrcB32() const {
416 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
419 bool isVCSrcB64() const {
420 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
423 bool isVCSrcB16() const {
424 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
427 bool isVCSrcV2B16() const {
428 return isVCSrcB16();
431 bool isVCSrcF32() const {
432 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
435 bool isVCSrcF64() const {
436 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
439 bool isVCSrcF16() const {
440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
443 bool isVCSrcV2F16() const {
444 return isVCSrcF16();
447 bool isVSrcB32() const {
448 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
451 bool isVSrcB64() const {
452 return isVCSrcF64() || isLiteralImm(MVT::i64);
455 bool isVSrcB16() const {
456 return isVCSrcF16() || isLiteralImm(MVT::i16);
459 bool isVSrcV2B16() const {
460 llvm_unreachable("cannot happen");
461 return isVSrcB16();
464 bool isVSrcF32() const {
465 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
468 bool isVSrcF64() const {
469 return isVCSrcF64() || isLiteralImm(MVT::f64);
472 bool isVSrcF16() const {
473 return isVCSrcF16() || isLiteralImm(MVT::f16);
476 bool isVSrcV2F16() const {
477 llvm_unreachable("cannot happen");
478 return isVSrcF16();
481 bool isKImmFP32() const {
482 return isLiteralImm(MVT::f32);
485 bool isKImmFP16() const {
486 return isLiteralImm(MVT::f16);
489 bool isMem() const override {
490 return false;
493 bool isExpr() const {
494 return Kind == Expression;
497 bool isSoppBrTarget() const {
498 return isExpr() || isImm();
501 bool isSWaitCnt() const;
502 bool isHwreg() const;
503 bool isSendMsg() const;
504 bool isSwizzle() const;
505 bool isSMRDOffset8() const;
506 bool isSMRDOffset20() const;
507 bool isSMRDLiteralOffset() const;
508 bool isDPPCtrl() const;
509 bool isGPRIdxMode() const;
510 bool isS16Imm() const;
511 bool isU16Imm() const;
513 StringRef getExpressionAsToken() const {
514 assert(isExpr());
515 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
516 return S->getSymbol().getName();
519 StringRef getToken() const {
520 assert(isToken());
522 if (Kind == Expression)
523 return getExpressionAsToken();
525 return StringRef(Tok.Data, Tok.Length);
528 int64_t getImm() const {
529 assert(isImm());
530 return Imm.Val;
533 ImmTy getImmTy() const {
534 assert(isImm());
535 return Imm.Type;
538 unsigned getReg() const override {
539 return Reg.RegNo;
542 SMLoc getStartLoc() const override {
543 return StartLoc;
546 SMLoc getEndLoc() const override {
547 return EndLoc;
550 SMRange getLocRange() const {
551 return SMRange(StartLoc, EndLoc);
554 Modifiers getModifiers() const {
555 assert(isRegKind() || isImmTy(ImmTyNone));
556 return isRegKind() ? Reg.Mods : Imm.Mods;
559 void setModifiers(Modifiers Mods) {
560 assert(isRegKind() || isImmTy(ImmTyNone));
561 if (isRegKind())
562 Reg.Mods = Mods;
563 else
564 Imm.Mods = Mods;
567 bool hasModifiers() const {
568 return getModifiers().hasModifiers();
571 bool hasFPModifiers() const {
572 return getModifiers().hasFPModifiers();
575 bool hasIntModifiers() const {
576 return getModifiers().hasIntModifiers();
579 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
581 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
583 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
585 template <unsigned Bitwidth>
586 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
588 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
589 addKImmFPOperands<16>(Inst, N);
592 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
593 addKImmFPOperands<32>(Inst, N);
596 void addRegOperands(MCInst &Inst, unsigned N) const;
598 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
599 if (isRegKind())
600 addRegOperands(Inst, N);
601 else if (isExpr())
602 Inst.addOperand(MCOperand::createExpr(Expr));
603 else
604 addImmOperands(Inst, N);
607 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
608 Modifiers Mods = getModifiers();
609 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
610 if (isRegKind()) {
611 addRegOperands(Inst, N);
612 } else {
613 addImmOperands(Inst, N, false);
617 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
618 assert(!hasIntModifiers());
619 addRegOrImmWithInputModsOperands(Inst, N);
622 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
623 assert(!hasFPModifiers());
624 addRegOrImmWithInputModsOperands(Inst, N);
627 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
628 Modifiers Mods = getModifiers();
629 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
630 assert(isRegKind());
631 addRegOperands(Inst, N);
634 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
635 assert(!hasIntModifiers());
636 addRegWithInputModsOperands(Inst, N);
639 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
640 assert(!hasFPModifiers());
641 addRegWithInputModsOperands(Inst, N);
644 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
645 if (isImm())
646 addImmOperands(Inst, N);
647 else {
648 assert(isExpr());
649 Inst.addOperand(MCOperand::createExpr(Expr));
653 static void printImmTy(raw_ostream& OS, ImmTy Type) {
654 switch (Type) {
655 case ImmTyNone: OS << "None"; break;
656 case ImmTyGDS: OS << "GDS"; break;
657 case ImmTyLDS: OS << "LDS"; break;
658 case ImmTyOffen: OS << "Offen"; break;
659 case ImmTyIdxen: OS << "Idxen"; break;
660 case ImmTyAddr64: OS << "Addr64"; break;
661 case ImmTyOffset: OS << "Offset"; break;
662 case ImmTyInstOffset: OS << "InstOffset"; break;
663 case ImmTyOffset0: OS << "Offset0"; break;
664 case ImmTyOffset1: OS << "Offset1"; break;
665 case ImmTyGLC: OS << "GLC"; break;
666 case ImmTySLC: OS << "SLC"; break;
667 case ImmTyTFE: OS << "TFE"; break;
668 case ImmTyD16: OS << "D16"; break;
669 case ImmTyDFMT: OS << "DFMT"; break;
670 case ImmTyNFMT: OS << "NFMT"; break;
671 case ImmTyClampSI: OS << "ClampSI"; break;
672 case ImmTyOModSI: OS << "OModSI"; break;
673 case ImmTyDppCtrl: OS << "DppCtrl"; break;
674 case ImmTyDppRowMask: OS << "DppRowMask"; break;
675 case ImmTyDppBankMask: OS << "DppBankMask"; break;
676 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
677 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
678 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
679 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
680 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
681 case ImmTyDMask: OS << "DMask"; break;
682 case ImmTyUNorm: OS << "UNorm"; break;
683 case ImmTyDA: OS << "DA"; break;
684 case ImmTyR128: OS << "R128"; break;
685 case ImmTyLWE: OS << "LWE"; break;
686 case ImmTyOff: OS << "Off"; break;
687 case ImmTyExpTgt: OS << "ExpTgt"; break;
688 case ImmTyExpCompr: OS << "ExpCompr"; break;
689 case ImmTyExpVM: OS << "ExpVM"; break;
690 case ImmTyHwreg: OS << "Hwreg"; break;
691 case ImmTySendMsg: OS << "SendMsg"; break;
692 case ImmTyInterpSlot: OS << "InterpSlot"; break;
693 case ImmTyInterpAttr: OS << "InterpAttr"; break;
694 case ImmTyAttrChan: OS << "AttrChan"; break;
695 case ImmTyOpSel: OS << "OpSel"; break;
696 case ImmTyOpSelHi: OS << "OpSelHi"; break;
697 case ImmTyNegLo: OS << "NegLo"; break;
698 case ImmTyNegHi: OS << "NegHi"; break;
699 case ImmTySwizzle: OS << "Swizzle"; break;
700 case ImmTyHigh: OS << "High"; break;
704 void print(raw_ostream &OS) const override {
705 switch (Kind) {
706 case Register:
707 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
708 break;
709 case Immediate:
710 OS << '<' << getImm();
711 if (getImmTy() != ImmTyNone) {
712 OS << " type: "; printImmTy(OS, getImmTy());
714 OS << " mods: " << Imm.Mods << '>';
715 break;
716 case Token:
717 OS << '\'' << getToken() << '\'';
718 break;
719 case Expression:
720 OS << "<expr " << *Expr << '>';
721 break;
725 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
726 int64_t Val, SMLoc Loc,
727 ImmTy Type = ImmTyNone,
728 bool IsFPImm = false) {
729 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
730 Op->Imm.Val = Val;
731 Op->Imm.IsFPImm = IsFPImm;
732 Op->Imm.Type = Type;
733 Op->Imm.Mods = Modifiers();
734 Op->StartLoc = Loc;
735 Op->EndLoc = Loc;
736 return Op;
739 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
740 StringRef Str, SMLoc Loc,
741 bool HasExplicitEncodingSize = true) {
742 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
743 Res->Tok.Data = Str.data();
744 Res->Tok.Length = Str.size();
745 Res->StartLoc = Loc;
746 Res->EndLoc = Loc;
747 return Res;
750 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
751 unsigned RegNo, SMLoc S,
752 SMLoc E,
753 bool ForceVOP3) {
754 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
755 Op->Reg.RegNo = RegNo;
756 Op->Reg.Mods = Modifiers();
757 Op->Reg.IsForcedVOP3 = ForceVOP3;
758 Op->StartLoc = S;
759 Op->EndLoc = E;
760 return Op;
763 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
764 const class MCExpr *Expr, SMLoc S) {
765 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
766 Op->Expr = Expr;
767 Op->StartLoc = S;
768 Op->EndLoc = S;
769 return Op;
773 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
774 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
775 return OS;
778 //===----------------------------------------------------------------------===//
779 // AsmParser
780 //===----------------------------------------------------------------------===//
782 // Holds info related to the current kernel, e.g. count of SGPRs used.
783 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
784 // .amdgpu_hsa_kernel or at EOF.
785 class KernelScopeInfo {
786 int SgprIndexUnusedMin = -1;
787 int VgprIndexUnusedMin = -1;
788 MCContext *Ctx = nullptr;
790 void usesSgprAt(int i) {
791 if (i >= SgprIndexUnusedMin) {
792 SgprIndexUnusedMin = ++i;
793 if (Ctx) {
794 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
795 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
800 void usesVgprAt(int i) {
801 if (i >= VgprIndexUnusedMin) {
802 VgprIndexUnusedMin = ++i;
803 if (Ctx) {
804 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
805 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
810 public:
811 KernelScopeInfo() = default;
813 void initialize(MCContext &Context) {
814 Ctx = &Context;
815 usesSgprAt(SgprIndexUnusedMin = -1);
816 usesVgprAt(VgprIndexUnusedMin = -1);
819 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
820 switch (RegKind) {
821 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
822 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
823 default: break;
828 class AMDGPUAsmParser : public MCTargetAsmParser {
829 MCAsmParser &Parser;
831 // Number of extra operands parsed after the first optional operand.
832 // This may be necessary to skip hardcoded mandatory operands.
833 static const unsigned MAX_OPR_LOOKAHEAD = 8;
835 unsigned ForcedEncodingSize = 0;
836 bool ForcedDPP = false;
837 bool ForcedSDWA = false;
838 KernelScopeInfo KernelScope;
840 /// @name Auto-generated Match Functions
841 /// {
843 #define GET_ASSEMBLER_HEADER
844 #include "AMDGPUGenAsmMatcher.inc"
846 /// }
848 private:
849 bool ParseAsAbsoluteExpression(uint32_t &Ret);
850 bool OutOfRangeError(SMRange Range);
851 /// Calculate VGPR/SGPR blocks required for given target, reserved
852 /// registers, and user-specified NextFreeXGPR values.
854 /// \param Features [in] Target features, used for bug corrections.
855 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
856 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
857 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
858 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
859 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
860 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
861 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
862 /// \param VGPRBlocks [out] Result VGPR block count.
863 /// \param SGPRBlocks [out] Result SGPR block count.
864 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
865 bool FlatScrUsed, bool XNACKUsed,
866 unsigned NextFreeVGPR, SMRange VGPRRange,
867 unsigned NextFreeSGPR, SMRange SGPRRange,
868 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
869 bool ParseDirectiveAMDGCNTarget();
870 bool ParseDirectiveAMDHSAKernel();
871 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
872 bool ParseDirectiveHSACodeObjectVersion();
873 bool ParseDirectiveHSACodeObjectISA();
874 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
875 bool ParseDirectiveAMDKernelCodeT();
876 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
877 bool ParseDirectiveAMDGPUHsaKernel();
879 bool ParseDirectiveISAVersion();
880 bool ParseDirectiveHSAMetadata();
881 bool ParseDirectivePALMetadata();
883 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
884 RegisterKind RegKind, unsigned Reg1,
885 unsigned RegNum);
886 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
887 unsigned& RegNum, unsigned& RegWidth,
888 unsigned *DwordRegIndex);
889 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
890 void initializeGprCountSymbol(RegisterKind RegKind);
891 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
892 unsigned RegWidth);
893 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
894 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
895 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
896 bool IsGdsHardcoded);
898 public:
899 enum AMDGPUMatchResultTy {
900 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
903 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
905 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
906 const MCInstrInfo &MII,
907 const MCTargetOptions &Options)
908 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
909 MCAsmParserExtension::Initialize(Parser);
911 if (getFeatureBits().none()) {
912 // Set default features.
913 copySTI().ToggleFeature("SOUTHERN_ISLANDS");
916 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
919 // TODO: make those pre-defined variables read-only.
920 // Currently there is none suitable machinery in the core llvm-mc for this.
921 // MCSymbol::isRedefinable is intended for another purpose, and
922 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
923 AMDGPU::IsaInfo::IsaVersion ISA =
924 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
925 MCContext &Ctx = getContext();
926 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
927 MCSymbol *Sym =
928 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
929 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
930 } else {
931 MCSymbol *Sym =
932 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
933 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
934 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
935 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
936 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
937 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
939 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
940 initializeGprCountSymbol(IS_VGPR);
941 initializeGprCountSymbol(IS_SGPR);
942 } else
943 KernelScope.initialize(getContext());
947 bool hasXNACK() const {
948 return AMDGPU::hasXNACK(getSTI());
951 bool hasMIMG_R128() const {
952 return AMDGPU::hasMIMG_R128(getSTI());
955 bool hasPackedD16() const {
956 return AMDGPU::hasPackedD16(getSTI());
959 bool isSI() const {
960 return AMDGPU::isSI(getSTI());
963 bool isCI() const {
964 return AMDGPU::isCI(getSTI());
967 bool isVI() const {
968 return AMDGPU::isVI(getSTI());
971 bool isGFX9() const {
972 return AMDGPU::isGFX9(getSTI());
975 bool hasInv2PiInlineImm() const {
976 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
979 bool hasFlatOffsets() const {
980 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
983 bool hasSGPR102_SGPR103() const {
984 return !isVI();
987 bool hasIntClamp() const {
988 return getFeatureBits()[AMDGPU::FeatureIntClamp];
991 AMDGPUTargetStreamer &getTargetStreamer() {
992 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
993 return static_cast<AMDGPUTargetStreamer &>(TS);
996 const MCRegisterInfo *getMRI() const {
997 // We need this const_cast because for some reason getContext() is not const
998 // in MCAsmParser.
999 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1002 const MCInstrInfo *getMII() const {
1003 return &MII;
1006 const FeatureBitset &getFeatureBits() const {
1007 return getSTI().getFeatureBits();
1010 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1011 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1012 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1014 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1015 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1016 bool isForcedDPP() const { return ForcedDPP; }
1017 bool isForcedSDWA() const { return ForcedSDWA; }
1018 ArrayRef<unsigned> getMatchedVariants() const;
1020 std::unique_ptr<AMDGPUOperand> parseRegister();
1021 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1022 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1023 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1024 unsigned Kind) override;
1025 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1026 OperandVector &Operands, MCStreamer &Out,
1027 uint64_t &ErrorInfo,
1028 bool MatchingInlineAsm) override;
1029 bool ParseDirective(AsmToken DirectiveID) override;
1030 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1031 StringRef parseMnemonicSuffix(StringRef Name);
1032 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1033 SMLoc NameLoc, OperandVector &Operands) override;
1034 //bool ProcessInstruction(MCInst &Inst);
1036 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1038 OperandMatchResultTy
1039 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1040 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1041 bool (*ConvertResult)(int64_t &) = nullptr);
1043 OperandMatchResultTy parseOperandArrayWithPrefix(
1044 const char *Prefix,
1045 OperandVector &Operands,
1046 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1047 bool (*ConvertResult)(int64_t&) = nullptr);
1049 OperandMatchResultTy
1050 parseNamedBit(const char *Name, OperandVector &Operands,
1051 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1052 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1053 StringRef &Value);
1055 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1056 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1057 OperandMatchResultTy parseReg(OperandVector &Operands);
1058 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1059 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1060 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1061 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1062 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1063 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1065 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1066 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1067 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1068 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1070 bool parseCnt(int64_t &IntVal);
1071 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1072 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1074 private:
1075 struct OperandInfoTy {
1076 int64_t Id;
1077 bool IsSymbolic = false;
1079 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1082 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1083 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1085 void errorExpTgt();
1086 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1088 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1089 bool validateConstantBusLimitations(const MCInst &Inst);
1090 bool validateEarlyClobberLimitations(const MCInst &Inst);
1091 bool validateIntClampSupported(const MCInst &Inst);
1092 bool validateMIMGAtomicDMask(const MCInst &Inst);
1093 bool validateMIMGGatherDMask(const MCInst &Inst);
1094 bool validateMIMGDataSize(const MCInst &Inst);
1095 bool validateMIMGR128(const MCInst &Inst);
1096 bool validateMIMGD16(const MCInst &Inst);
1097 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1098 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1099 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1101 bool trySkipId(const StringRef Id);
1102 bool trySkipToken(const AsmToken::TokenKind Kind);
1103 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1104 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1105 bool parseExpr(int64_t &Imm);
1107 public:
1108 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1109 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1111 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1112 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1113 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1114 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1115 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1117 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1118 const unsigned MinVal,
1119 const unsigned MaxVal,
1120 const StringRef ErrMsg);
1121 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1122 bool parseSwizzleOffset(int64_t &Imm);
1123 bool parseSwizzleMacro(int64_t &Imm);
1124 bool parseSwizzleQuadPerm(int64_t &Imm);
1125 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1126 bool parseSwizzleBroadcast(int64_t &Imm);
1127 bool parseSwizzleSwap(int64_t &Imm);
1128 bool parseSwizzleReverse(int64_t &Imm);
1130 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1131 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1132 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1133 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1134 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1136 AMDGPUOperand::Ptr defaultGLC() const;
1137 AMDGPUOperand::Ptr defaultSLC() const;
1139 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1140 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1141 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1142 AMDGPUOperand::Ptr defaultOffsetU12() const;
1143 AMDGPUOperand::Ptr defaultOffsetS13() const;
1145 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1147 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1148 OptionalImmIndexMap &OptionalIdx);
1149 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1150 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1151 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1153 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1155 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1156 bool IsAtomic = false);
1157 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1159 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1160 AMDGPUOperand::Ptr defaultRowMask() const;
1161 AMDGPUOperand::Ptr defaultBankMask() const;
1162 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1163 void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1165 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1166 AMDGPUOperand::ImmTy Type);
1167 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1168 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1169 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1170 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1171 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1172 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1173 uint64_t BasicInstType, bool skipVcc = false);
1176 struct OptionalOperand {
1177 const char *Name;
1178 AMDGPUOperand::ImmTy Type;
1179 bool IsBit;
1180 bool (*ConvertResult)(int64_t&);
1183 } // end anonymous namespace
1185 // May be called with integer type with equivalent bitwidth.
1186 static const fltSemantics *getFltSemantics(unsigned Size) {
1187 switch (Size) {
1188 case 4:
1189 return &APFloat::IEEEsingle();
1190 case 8:
1191 return &APFloat::IEEEdouble();
1192 case 2:
1193 return &APFloat::IEEEhalf();
1194 default:
1195 llvm_unreachable("unsupported fp type");
1199 static const fltSemantics *getFltSemantics(MVT VT) {
1200 return getFltSemantics(VT.getSizeInBits() / 8);
1203 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1204 switch (OperandType) {
1205 case AMDGPU::OPERAND_REG_IMM_INT32:
1206 case AMDGPU::OPERAND_REG_IMM_FP32:
1207 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1208 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1209 return &APFloat::IEEEsingle();
1210 case AMDGPU::OPERAND_REG_IMM_INT64:
1211 case AMDGPU::OPERAND_REG_IMM_FP64:
1212 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1213 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1214 return &APFloat::IEEEdouble();
1215 case AMDGPU::OPERAND_REG_IMM_INT16:
1216 case AMDGPU::OPERAND_REG_IMM_FP16:
1217 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1218 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1219 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1220 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1221 return &APFloat::IEEEhalf();
1222 default:
1223 llvm_unreachable("unsupported fp type");
1227 //===----------------------------------------------------------------------===//
1228 // Operand
1229 //===----------------------------------------------------------------------===//
1231 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1232 bool Lost;
1234 // Convert literal to single precision
1235 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1236 APFloat::rmNearestTiesToEven,
1237 &Lost);
1238 // We allow precision lost but not overflow or underflow
1239 if (Status != APFloat::opOK &&
1240 Lost &&
1241 ((Status & APFloat::opOverflow) != 0 ||
1242 (Status & APFloat::opUnderflow) != 0)) {
1243 return false;
1246 return true;
1249 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1250 if (!isImmTy(ImmTyNone)) {
1251 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1252 return false;
1254 // TODO: We should avoid using host float here. It would be better to
1255 // check the float bit values which is what a few other places do.
1256 // We've had bot failures before due to weird NaN support on mips hosts.
1258 APInt Literal(64, Imm.Val);
1260 if (Imm.IsFPImm) { // We got fp literal token
1261 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1262 return AMDGPU::isInlinableLiteral64(Imm.Val,
1263 AsmParser->hasInv2PiInlineImm());
1266 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1267 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1268 return false;
1270 if (type.getScalarSizeInBits() == 16) {
1271 return AMDGPU::isInlinableLiteral16(
1272 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1273 AsmParser->hasInv2PiInlineImm());
1276 // Check if single precision literal is inlinable
1277 return AMDGPU::isInlinableLiteral32(
1278 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1279 AsmParser->hasInv2PiInlineImm());
1282 // We got int literal token.
1283 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1284 return AMDGPU::isInlinableLiteral64(Imm.Val,
1285 AsmParser->hasInv2PiInlineImm());
1288 if (type.getScalarSizeInBits() == 16) {
1289 return AMDGPU::isInlinableLiteral16(
1290 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1291 AsmParser->hasInv2PiInlineImm());
1294 return AMDGPU::isInlinableLiteral32(
1295 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1296 AsmParser->hasInv2PiInlineImm());
1299 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1300 // Check that this immediate can be added as literal
1301 if (!isImmTy(ImmTyNone)) {
1302 return false;
1305 if (!Imm.IsFPImm) {
1306 // We got int literal token.
1308 if (type == MVT::f64 && hasFPModifiers()) {
1309 // Cannot apply fp modifiers to int literals preserving the same semantics
1310 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1311 // disable these cases.
1312 return false;
1315 unsigned Size = type.getSizeInBits();
1316 if (Size == 64)
1317 Size = 32;
1319 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1320 // types.
1321 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1324 // We got fp literal token
1325 if (type == MVT::f64) { // Expected 64-bit fp operand
1326 // We would set low 64-bits of literal to zeroes but we accept this literals
1327 return true;
1330 if (type == MVT::i64) { // Expected 64-bit int operand
1331 // We don't allow fp literals in 64-bit integer instructions. It is
1332 // unclear how we should encode them.
1333 return false;
1336 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1337 return canLosslesslyConvertToFPType(FPLiteral, type);
1340 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1341 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1344 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1345 if (AsmParser->isVI())
1346 return isVReg();
1347 else if (AsmParser->isGFX9())
1348 return isRegKind() || isInlinableImm(type);
1349 else
1350 return false;
1353 bool AMDGPUOperand::isSDWAFP16Operand() const {
1354 return isSDWAOperand(MVT::f16);
1357 bool AMDGPUOperand::isSDWAFP32Operand() const {
1358 return isSDWAOperand(MVT::f32);
1361 bool AMDGPUOperand::isSDWAInt16Operand() const {
1362 return isSDWAOperand(MVT::i16);
1365 bool AMDGPUOperand::isSDWAInt32Operand() const {
1366 return isSDWAOperand(MVT::i32);
1369 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1371 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1372 assert(Size == 2 || Size == 4 || Size == 8);
1374 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1376 if (Imm.Mods.Abs) {
1377 Val &= ~FpSignMask;
1379 if (Imm.Mods.Neg) {
1380 Val ^= FpSignMask;
1383 return Val;
1386 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1387 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1388 Inst.getNumOperands())) {
1389 addLiteralImmOperand(Inst, Imm.Val,
1390 ApplyModifiers &
1391 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1392 } else {
1393 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1394 Inst.addOperand(MCOperand::createImm(Imm.Val));
1398 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1399 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1400 auto OpNum = Inst.getNumOperands();
1401 // Check that this operand accepts literals
1402 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1404 if (ApplyModifiers) {
1405 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1406 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1407 Val = applyInputFPModifiers(Val, Size);
1410 APInt Literal(64, Val);
1411 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1413 if (Imm.IsFPImm) { // We got fp literal token
1414 switch (OpTy) {
1415 case AMDGPU::OPERAND_REG_IMM_INT64:
1416 case AMDGPU::OPERAND_REG_IMM_FP64:
1417 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1418 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1419 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1420 AsmParser->hasInv2PiInlineImm())) {
1421 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1422 return;
1425 // Non-inlineable
1426 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1427 // For fp operands we check if low 32 bits are zeros
1428 if (Literal.getLoBits(32) != 0) {
1429 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1430 "Can't encode literal as exact 64-bit floating-point operand. "
1431 "Low 32-bits will be set to zero");
1434 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1435 return;
1438 // We don't allow fp literals in 64-bit integer instructions. It is
1439 // unclear how we should encode them. This case should be checked earlier
1440 // in predicate methods (isLiteralImm())
1441 llvm_unreachable("fp literal in 64-bit integer instruction.");
1443 case AMDGPU::OPERAND_REG_IMM_INT32:
1444 case AMDGPU::OPERAND_REG_IMM_FP32:
1445 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1446 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1447 case AMDGPU::OPERAND_REG_IMM_INT16:
1448 case AMDGPU::OPERAND_REG_IMM_FP16:
1449 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1450 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1451 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1452 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1453 bool lost;
1454 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1455 // Convert literal to single precision
1456 FPLiteral.convert(*getOpFltSemantics(OpTy),
1457 APFloat::rmNearestTiesToEven, &lost);
1458 // We allow precision lost but not overflow or underflow. This should be
1459 // checked earlier in isLiteralImm()
1461 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1462 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1463 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1464 ImmVal |= (ImmVal << 16);
1467 Inst.addOperand(MCOperand::createImm(ImmVal));
1468 return;
1470 default:
1471 llvm_unreachable("invalid operand size");
1474 return;
1477 // We got int literal token.
1478 // Only sign extend inline immediates.
1479 // FIXME: No errors on truncation
1480 switch (OpTy) {
1481 case AMDGPU::OPERAND_REG_IMM_INT32:
1482 case AMDGPU::OPERAND_REG_IMM_FP32:
1483 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1484 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1485 if (isInt<32>(Val) &&
1486 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1487 AsmParser->hasInv2PiInlineImm())) {
1488 Inst.addOperand(MCOperand::createImm(Val));
1489 return;
1492 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1493 return;
1495 case AMDGPU::OPERAND_REG_IMM_INT64:
1496 case AMDGPU::OPERAND_REG_IMM_FP64:
1497 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1498 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1499 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1500 Inst.addOperand(MCOperand::createImm(Val));
1501 return;
1504 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1505 return;
1507 case AMDGPU::OPERAND_REG_IMM_INT16:
1508 case AMDGPU::OPERAND_REG_IMM_FP16:
1509 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1510 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1511 if (isInt<16>(Val) &&
1512 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1513 AsmParser->hasInv2PiInlineImm())) {
1514 Inst.addOperand(MCOperand::createImm(Val));
1515 return;
1518 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1519 return;
1521 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1522 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1523 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1524 assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1525 AsmParser->hasInv2PiInlineImm()));
1527 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1528 static_cast<uint32_t>(LiteralVal);
1529 Inst.addOperand(MCOperand::createImm(ImmVal));
1530 return;
1532 default:
1533 llvm_unreachable("invalid operand size");
1537 template <unsigned Bitwidth>
1538 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1539 APInt Literal(64, Imm.Val);
1541 if (!Imm.IsFPImm) {
1542 // We got int literal token.
1543 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1544 return;
1547 bool Lost;
1548 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1549 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1550 APFloat::rmNearestTiesToEven, &Lost);
1551 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1554 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1555 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1558 //===----------------------------------------------------------------------===//
1559 // AsmParser
1560 //===----------------------------------------------------------------------===//
1562 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1563 if (Is == IS_VGPR) {
1564 switch (RegWidth) {
1565 default: return -1;
1566 case 1: return AMDGPU::VGPR_32RegClassID;
1567 case 2: return AMDGPU::VReg_64RegClassID;
1568 case 3: return AMDGPU::VReg_96RegClassID;
1569 case 4: return AMDGPU::VReg_128RegClassID;
1570 case 8: return AMDGPU::VReg_256RegClassID;
1571 case 16: return AMDGPU::VReg_512RegClassID;
1573 } else if (Is == IS_TTMP) {
1574 switch (RegWidth) {
1575 default: return -1;
1576 case 1: return AMDGPU::TTMP_32RegClassID;
1577 case 2: return AMDGPU::TTMP_64RegClassID;
1578 case 4: return AMDGPU::TTMP_128RegClassID;
1579 case 8: return AMDGPU::TTMP_256RegClassID;
1580 case 16: return AMDGPU::TTMP_512RegClassID;
1582 } else if (Is == IS_SGPR) {
1583 switch (RegWidth) {
1584 default: return -1;
1585 case 1: return AMDGPU::SGPR_32RegClassID;
1586 case 2: return AMDGPU::SGPR_64RegClassID;
1587 case 4: return AMDGPU::SGPR_128RegClassID;
1588 case 8: return AMDGPU::SGPR_256RegClassID;
1589 case 16: return AMDGPU::SGPR_512RegClassID;
1592 return -1;
1595 static unsigned getSpecialRegForName(StringRef RegName) {
1596 return StringSwitch<unsigned>(RegName)
1597 .Case("exec", AMDGPU::EXEC)
1598 .Case("vcc", AMDGPU::VCC)
1599 .Case("flat_scratch", AMDGPU::FLAT_SCR)
1600 .Case("xnack_mask", AMDGPU::XNACK_MASK)
1601 .Case("m0", AMDGPU::M0)
1602 .Case("scc", AMDGPU::SCC)
1603 .Case("tba", AMDGPU::TBA)
1604 .Case("tma", AMDGPU::TMA)
1605 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1606 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1607 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1608 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1609 .Case("vcc_lo", AMDGPU::VCC_LO)
1610 .Case("vcc_hi", AMDGPU::VCC_HI)
1611 .Case("exec_lo", AMDGPU::EXEC_LO)
1612 .Case("exec_hi", AMDGPU::EXEC_HI)
1613 .Case("tma_lo", AMDGPU::TMA_LO)
1614 .Case("tma_hi", AMDGPU::TMA_HI)
1615 .Case("tba_lo", AMDGPU::TBA_LO)
1616 .Case("tba_hi", AMDGPU::TBA_HI)
1617 .Default(0);
1620 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1621 SMLoc &EndLoc) {
1622 auto R = parseRegister();
1623 if (!R) return true;
1624 assert(R->isReg());
1625 RegNo = R->getReg();
1626 StartLoc = R->getStartLoc();
1627 EndLoc = R->getEndLoc();
1628 return false;
1631 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1632 RegisterKind RegKind, unsigned Reg1,
1633 unsigned RegNum) {
1634 switch (RegKind) {
1635 case IS_SPECIAL:
1636 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1637 Reg = AMDGPU::EXEC;
1638 RegWidth = 2;
1639 return true;
1641 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1642 Reg = AMDGPU::FLAT_SCR;
1643 RegWidth = 2;
1644 return true;
1646 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1647 Reg = AMDGPU::XNACK_MASK;
1648 RegWidth = 2;
1649 return true;
1651 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1652 Reg = AMDGPU::VCC;
1653 RegWidth = 2;
1654 return true;
1656 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1657 Reg = AMDGPU::TBA;
1658 RegWidth = 2;
1659 return true;
1661 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1662 Reg = AMDGPU::TMA;
1663 RegWidth = 2;
1664 return true;
1666 return false;
1667 case IS_VGPR:
1668 case IS_SGPR:
1669 case IS_TTMP:
1670 if (Reg1 != Reg + RegWidth) {
1671 return false;
1673 RegWidth++;
1674 return true;
1675 default:
1676 llvm_unreachable("unexpected register kind");
1680 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1681 unsigned &RegNum, unsigned &RegWidth,
1682 unsigned *DwordRegIndex) {
1683 if (DwordRegIndex) { *DwordRegIndex = 0; }
1684 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1685 if (getLexer().is(AsmToken::Identifier)) {
1686 StringRef RegName = Parser.getTok().getString();
1687 if ((Reg = getSpecialRegForName(RegName))) {
1688 Parser.Lex();
1689 RegKind = IS_SPECIAL;
1690 } else {
1691 unsigned RegNumIndex = 0;
1692 if (RegName[0] == 'v') {
1693 RegNumIndex = 1;
1694 RegKind = IS_VGPR;
1695 } else if (RegName[0] == 's') {
1696 RegNumIndex = 1;
1697 RegKind = IS_SGPR;
1698 } else if (RegName.startswith("ttmp")) {
1699 RegNumIndex = strlen("ttmp");
1700 RegKind = IS_TTMP;
1701 } else {
1702 return false;
1704 if (RegName.size() > RegNumIndex) {
1705 // Single 32-bit register: vXX.
1706 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1707 return false;
1708 Parser.Lex();
1709 RegWidth = 1;
1710 } else {
1711 // Range of registers: v[XX:YY]. ":YY" is optional.
1712 Parser.Lex();
1713 int64_t RegLo, RegHi;
1714 if (getLexer().isNot(AsmToken::LBrac))
1715 return false;
1716 Parser.Lex();
1718 if (getParser().parseAbsoluteExpression(RegLo))
1719 return false;
1721 const bool isRBrace = getLexer().is(AsmToken::RBrac);
1722 if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1723 return false;
1724 Parser.Lex();
1726 if (isRBrace) {
1727 RegHi = RegLo;
1728 } else {
1729 if (getParser().parseAbsoluteExpression(RegHi))
1730 return false;
1732 if (getLexer().isNot(AsmToken::RBrac))
1733 return false;
1734 Parser.Lex();
1736 RegNum = (unsigned) RegLo;
1737 RegWidth = (RegHi - RegLo) + 1;
1740 } else if (getLexer().is(AsmToken::LBrac)) {
1741 // List of consecutive registers: [s0,s1,s2,s3]
1742 Parser.Lex();
1743 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1744 return false;
1745 if (RegWidth != 1)
1746 return false;
1747 RegisterKind RegKind1;
1748 unsigned Reg1, RegNum1, RegWidth1;
1749 do {
1750 if (getLexer().is(AsmToken::Comma)) {
1751 Parser.Lex();
1752 } else if (getLexer().is(AsmToken::RBrac)) {
1753 Parser.Lex();
1754 break;
1755 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1756 if (RegWidth1 != 1) {
1757 return false;
1759 if (RegKind1 != RegKind) {
1760 return false;
1762 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1763 return false;
1765 } else {
1766 return false;
1768 } while (true);
1769 } else {
1770 return false;
1772 switch (RegKind) {
1773 case IS_SPECIAL:
1774 RegNum = 0;
1775 RegWidth = 1;
1776 break;
1777 case IS_VGPR:
1778 case IS_SGPR:
1779 case IS_TTMP:
1781 unsigned Size = 1;
1782 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1783 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1784 Size = std::min(RegWidth, 4u);
1786 if (RegNum % Size != 0)
1787 return false;
1788 if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1789 RegNum = RegNum / Size;
1790 int RCID = getRegClass(RegKind, RegWidth);
1791 if (RCID == -1)
1792 return false;
1793 const MCRegisterClass RC = TRI->getRegClass(RCID);
1794 if (RegNum >= RC.getNumRegs())
1795 return false;
1796 Reg = RC.getRegister(RegNum);
1797 break;
1800 default:
1801 llvm_unreachable("unexpected register kind");
1804 if (!subtargetHasRegister(*TRI, Reg))
1805 return false;
1806 return true;
1809 Optional<StringRef>
1810 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1811 switch (RegKind) {
1812 case IS_VGPR:
1813 return StringRef(".amdgcn.next_free_vgpr");
1814 case IS_SGPR:
1815 return StringRef(".amdgcn.next_free_sgpr");
1816 default:
1817 return None;
1821 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1822 auto SymbolName = getGprCountSymbolName(RegKind);
1823 assert(SymbolName && "initializing invalid register kind");
1824 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1825 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1828 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1829 unsigned DwordRegIndex,
1830 unsigned RegWidth) {
1831 // Symbols are only defined for GCN targets
1832 if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
1833 return true;
1835 auto SymbolName = getGprCountSymbolName(RegKind);
1836 if (!SymbolName)
1837 return true;
1838 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1840 int64_t NewMax = DwordRegIndex + RegWidth - 1;
1841 int64_t OldCount;
1843 if (!Sym->isVariable())
1844 return !Error(getParser().getTok().getLoc(),
1845 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1846 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1847 return !Error(
1848 getParser().getTok().getLoc(),
1849 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1851 if (OldCount <= NewMax)
1852 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1854 return true;
1857 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1858 const auto &Tok = Parser.getTok();
1859 SMLoc StartLoc = Tok.getLoc();
1860 SMLoc EndLoc = Tok.getEndLoc();
1861 RegisterKind RegKind;
1862 unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1864 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1865 return nullptr;
1867 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1868 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1869 return nullptr;
1870 } else
1871 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1872 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1875 bool
1876 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1877 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1878 (getLexer().getKind() == AsmToken::Integer ||
1879 getLexer().getKind() == AsmToken::Real)) {
1880 // This is a workaround for handling operands like these:
1881 // |1.0|
1882 // |-1|
1883 // This syntax is not compatible with syntax of standard
1884 // MC expressions (due to the trailing '|').
1886 SMLoc EndLoc;
1887 const MCExpr *Expr;
1889 if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1890 return true;
1893 return !Expr->evaluateAsAbsolute(Val);
1896 return getParser().parseAbsoluteExpression(Val);
1899 OperandMatchResultTy
1900 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1901 // TODO: add syntactic sugar for 1/(2*PI)
1902 bool Minus = false;
1903 if (getLexer().getKind() == AsmToken::Minus) {
1904 const AsmToken NextToken = getLexer().peekTok();
1905 if (!NextToken.is(AsmToken::Integer) &&
1906 !NextToken.is(AsmToken::Real)) {
1907 return MatchOperand_NoMatch;
1909 Minus = true;
1910 Parser.Lex();
1913 SMLoc S = Parser.getTok().getLoc();
1914 switch(getLexer().getKind()) {
1915 case AsmToken::Integer: {
1916 int64_t IntVal;
1917 if (parseAbsoluteExpr(IntVal, AbsMod))
1918 return MatchOperand_ParseFail;
1919 if (Minus)
1920 IntVal *= -1;
1921 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1922 return MatchOperand_Success;
1924 case AsmToken::Real: {
1925 int64_t IntVal;
1926 if (parseAbsoluteExpr(IntVal, AbsMod))
1927 return MatchOperand_ParseFail;
1929 APFloat F(BitsToDouble(IntVal));
1930 if (Minus)
1931 F.changeSign();
1932 Operands.push_back(
1933 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1934 AMDGPUOperand::ImmTyNone, true));
1935 return MatchOperand_Success;
1937 default:
1938 return MatchOperand_NoMatch;
1942 OperandMatchResultTy
1943 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1944 if (auto R = parseRegister()) {
1945 assert(R->isReg());
1946 R->Reg.IsForcedVOP3 = isForcedVOP3();
1947 Operands.push_back(std::move(R));
1948 return MatchOperand_Success;
1950 return MatchOperand_NoMatch;
1953 OperandMatchResultTy
1954 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1955 auto res = parseImm(Operands, AbsMod);
1956 if (res != MatchOperand_NoMatch) {
1957 return res;
1960 return parseReg(Operands);
1963 OperandMatchResultTy
1964 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1965 bool AllowImm) {
1966 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1968 if (getLexer().getKind()== AsmToken::Minus) {
1969 const AsmToken NextToken = getLexer().peekTok();
1971 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1972 if (NextToken.is(AsmToken::Minus)) {
1973 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1974 return MatchOperand_ParseFail;
1977 // '-' followed by an integer literal N should be interpreted as integer
1978 // negation rather than a floating-point NEG modifier applied to N.
1979 // Beside being contr-intuitive, such use of floating-point NEG modifier
1980 // results in different meaning of integer literals used with VOP1/2/C
1981 // and VOP3, for example:
1982 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1983 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1984 // Negative fp literals should be handled likewise for unifomtity
1985 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1986 Parser.Lex();
1987 Negate = true;
1991 if (getLexer().getKind() == AsmToken::Identifier &&
1992 Parser.getTok().getString() == "neg") {
1993 if (Negate) {
1994 Error(Parser.getTok().getLoc(), "expected register or immediate");
1995 return MatchOperand_ParseFail;
1997 Parser.Lex();
1998 Negate2 = true;
1999 if (getLexer().isNot(AsmToken::LParen)) {
2000 Error(Parser.getTok().getLoc(), "expected left paren after neg");
2001 return MatchOperand_ParseFail;
2003 Parser.Lex();
2006 if (getLexer().getKind() == AsmToken::Identifier &&
2007 Parser.getTok().getString() == "abs") {
2008 Parser.Lex();
2009 Abs2 = true;
2010 if (getLexer().isNot(AsmToken::LParen)) {
2011 Error(Parser.getTok().getLoc(), "expected left paren after abs");
2012 return MatchOperand_ParseFail;
2014 Parser.Lex();
2017 if (getLexer().getKind() == AsmToken::Pipe) {
2018 if (Abs2) {
2019 Error(Parser.getTok().getLoc(), "expected register or immediate");
2020 return MatchOperand_ParseFail;
2022 Parser.Lex();
2023 Abs = true;
2026 OperandMatchResultTy Res;
2027 if (AllowImm) {
2028 Res = parseRegOrImm(Operands, Abs);
2029 } else {
2030 Res = parseReg(Operands);
2032 if (Res != MatchOperand_Success) {
2033 return Res;
2036 AMDGPUOperand::Modifiers Mods;
2037 if (Abs) {
2038 if (getLexer().getKind() != AsmToken::Pipe) {
2039 Error(Parser.getTok().getLoc(), "expected vertical bar");
2040 return MatchOperand_ParseFail;
2042 Parser.Lex();
2043 Mods.Abs = true;
2045 if (Abs2) {
2046 if (getLexer().isNot(AsmToken::RParen)) {
2047 Error(Parser.getTok().getLoc(), "expected closing parentheses");
2048 return MatchOperand_ParseFail;
2050 Parser.Lex();
2051 Mods.Abs = true;
2054 if (Negate) {
2055 Mods.Neg = true;
2056 } else if (Negate2) {
2057 if (getLexer().isNot(AsmToken::RParen)) {
2058 Error(Parser.getTok().getLoc(), "expected closing parentheses");
2059 return MatchOperand_ParseFail;
2061 Parser.Lex();
2062 Mods.Neg = true;
2065 if (Mods.hasFPModifiers()) {
2066 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2067 Op.setModifiers(Mods);
2069 return MatchOperand_Success;
2072 OperandMatchResultTy
2073 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2074 bool AllowImm) {
2075 bool Sext = false;
2077 if (getLexer().getKind() == AsmToken::Identifier &&
2078 Parser.getTok().getString() == "sext") {
2079 Parser.Lex();
2080 Sext = true;
2081 if (getLexer().isNot(AsmToken::LParen)) {
2082 Error(Parser.getTok().getLoc(), "expected left paren after sext");
2083 return MatchOperand_ParseFail;
2085 Parser.Lex();
2088 OperandMatchResultTy Res;
2089 if (AllowImm) {
2090 Res = parseRegOrImm(Operands);
2091 } else {
2092 Res = parseReg(Operands);
2094 if (Res != MatchOperand_Success) {
2095 return Res;
2098 AMDGPUOperand::Modifiers Mods;
2099 if (Sext) {
2100 if (getLexer().isNot(AsmToken::RParen)) {
2101 Error(Parser.getTok().getLoc(), "expected closing parentheses");
2102 return MatchOperand_ParseFail;
2104 Parser.Lex();
2105 Mods.Sext = true;
2108 if (Mods.hasIntModifiers()) {
2109 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2110 Op.setModifiers(Mods);
2113 return MatchOperand_Success;
2116 OperandMatchResultTy
2117 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2118 return parseRegOrImmWithFPInputMods(Operands, false);
2121 OperandMatchResultTy
2122 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2123 return parseRegOrImmWithIntInputMods(Operands, false);
2126 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2127 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2128 if (Reg) {
2129 Operands.push_back(std::move(Reg));
2130 return MatchOperand_Success;
2133 const AsmToken &Tok = Parser.getTok();
2134 if (Tok.getString() == "off") {
2135 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2136 AMDGPUOperand::ImmTyOff, false));
2137 Parser.Lex();
2138 return MatchOperand_Success;
2141 return MatchOperand_NoMatch;
2144 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2145 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2147 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2148 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2149 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2150 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2151 return Match_InvalidOperand;
2153 if ((TSFlags & SIInstrFlags::VOP3) &&
2154 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2155 getForcedEncodingSize() != 64)
2156 return Match_PreferE32;
2158 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2159 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2160 // v_mac_f32/16 allow only dst_sel == DWORD;
2161 auto OpNum =
2162 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2163 const auto &Op = Inst.getOperand(OpNum);
2164 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2165 return Match_InvalidOperand;
2169 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2170 // FIXME: Produces error without correct column reported.
2171 auto OpNum =
2172 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2173 const auto &Op = Inst.getOperand(OpNum);
2174 if (Op.getImm() != 0)
2175 return Match_InvalidOperand;
2178 return Match_Success;
2181 // What asm variants we should check
2182 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2183 if (getForcedEncodingSize() == 32) {
2184 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2185 return makeArrayRef(Variants);
2188 if (isForcedVOP3()) {
2189 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2190 return makeArrayRef(Variants);
2193 if (isForcedSDWA()) {
2194 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2195 AMDGPUAsmVariants::SDWA9};
2196 return makeArrayRef(Variants);
2199 if (isForcedDPP()) {
2200 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2201 return makeArrayRef(Variants);
2204 static const unsigned Variants[] = {
2205 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2206 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2209 return makeArrayRef(Variants);
2212 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2213 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2214 const unsigned Num = Desc.getNumImplicitUses();
2215 for (unsigned i = 0; i < Num; ++i) {
2216 unsigned Reg = Desc.ImplicitUses[i];
2217 switch (Reg) {
2218 case AMDGPU::FLAT_SCR:
2219 case AMDGPU::VCC:
2220 case AMDGPU::M0:
2221 return Reg;
2222 default:
2223 break;
2226 return AMDGPU::NoRegister;
2229 // NB: This code is correct only when used to check constant
2230 // bus limitations because GFX7 support no f16 inline constants.
2231 // Note that there are no cases when a GFX7 opcode violates
2232 // constant bus limitations due to the use of an f16 constant.
2233 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2234 unsigned OpIdx) const {
2235 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2237 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2238 return false;
2241 const MCOperand &MO = Inst.getOperand(OpIdx);
2243 int64_t Val = MO.getImm();
2244 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2246 switch (OpSize) { // expected operand size
2247 case 8:
2248 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2249 case 4:
2250 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2251 case 2: {
2252 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2253 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2254 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2255 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2256 } else {
2257 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2260 default:
2261 llvm_unreachable("invalid operand size");
2265 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2266 const MCOperand &MO = Inst.getOperand(OpIdx);
2267 if (MO.isImm()) {
2268 return !isInlineConstant(Inst, OpIdx);
2270 return !MO.isReg() ||
2271 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2274 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2275 const unsigned Opcode = Inst.getOpcode();
2276 const MCInstrDesc &Desc = MII.get(Opcode);
2277 unsigned ConstantBusUseCount = 0;
2279 if (Desc.TSFlags &
2280 (SIInstrFlags::VOPC |
2281 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2282 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2283 SIInstrFlags::SDWA)) {
2284 // Check special imm operands (used by madmk, etc)
2285 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2286 ++ConstantBusUseCount;
2289 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2290 if (SGPRUsed != AMDGPU::NoRegister) {
2291 ++ConstantBusUseCount;
2294 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2295 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2296 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2298 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2300 for (int OpIdx : OpIndices) {
2301 if (OpIdx == -1) break;
2303 const MCOperand &MO = Inst.getOperand(OpIdx);
2304 if (usesConstantBus(Inst, OpIdx)) {
2305 if (MO.isReg()) {
2306 const unsigned Reg = mc2PseudoReg(MO.getReg());
2307 // Pairs of registers with a partial intersections like these
2308 // s0, s[0:1]
2309 // flat_scratch_lo, flat_scratch
2310 // flat_scratch_lo, flat_scratch_hi
2311 // are theoretically valid but they are disabled anyway.
2312 // Note that this code mimics SIInstrInfo::verifyInstruction
2313 if (Reg != SGPRUsed) {
2314 ++ConstantBusUseCount;
2316 SGPRUsed = Reg;
2317 } else { // Expression or a literal
2318 ++ConstantBusUseCount;
2324 return ConstantBusUseCount <= 1;
2327 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2328 const unsigned Opcode = Inst.getOpcode();
2329 const MCInstrDesc &Desc = MII.get(Opcode);
2331 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2332 if (DstIdx == -1 ||
2333 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2334 return true;
2337 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2339 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2340 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2341 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2343 assert(DstIdx != -1);
2344 const MCOperand &Dst = Inst.getOperand(DstIdx);
2345 assert(Dst.isReg());
2346 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2348 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2350 for (int SrcIdx : SrcIndices) {
2351 if (SrcIdx == -1) break;
2352 const MCOperand &Src = Inst.getOperand(SrcIdx);
2353 if (Src.isReg()) {
2354 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2355 if (isRegIntersect(DstReg, SrcReg, TRI)) {
2356 return false;
2361 return true;
2364 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2366 const unsigned Opc = Inst.getOpcode();
2367 const MCInstrDesc &Desc = MII.get(Opc);
2369 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2370 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2371 assert(ClampIdx != -1);
2372 return Inst.getOperand(ClampIdx).getImm() == 0;
2375 return true;
2378 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2380 const unsigned Opc = Inst.getOpcode();
2381 const MCInstrDesc &Desc = MII.get(Opc);
2383 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2384 return true;
2386 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2387 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2388 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2390 assert(VDataIdx != -1);
2391 assert(DMaskIdx != -1);
2392 assert(TFEIdx != -1);
2394 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2395 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2396 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2397 if (DMask == 0)
2398 DMask = 1;
2400 unsigned DataSize =
2401 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2402 if (hasPackedD16()) {
2403 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2404 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2405 DataSize = (DataSize + 1) / 2;
2408 return (VDataSize / 4) == DataSize + TFESize;
2411 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2413 const unsigned Opc = Inst.getOpcode();
2414 const MCInstrDesc &Desc = MII.get(Opc);
2416 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2417 return true;
2418 if (!Desc.mayLoad() || !Desc.mayStore())
2419 return true; // Not atomic
2421 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2422 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2424 // This is an incomplete check because image_atomic_cmpswap
2425 // may only use 0x3 and 0xf while other atomic operations
2426 // may use 0x1 and 0x3. However these limitations are
2427 // verified when we check that dmask matches dst size.
2428 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2431 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2433 const unsigned Opc = Inst.getOpcode();
2434 const MCInstrDesc &Desc = MII.get(Opc);
2436 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2437 return true;
2439 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2440 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2442 // GATHER4 instructions use dmask in a different fashion compared to
2443 // other MIMG instructions. The only useful DMASK values are
2444 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2445 // (red,red,red,red) etc.) The ISA document doesn't mention
2446 // this.
2447 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2450 bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
2452 const unsigned Opc = Inst.getOpcode();
2453 const MCInstrDesc &Desc = MII.get(Opc);
2455 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2456 return true;
2458 int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
2459 assert(Idx != -1);
2461 bool R128 = (Inst.getOperand(Idx).getImm() != 0);
2463 return !R128 || hasMIMG_R128();
2466 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2468 const unsigned Opc = Inst.getOpcode();
2469 const MCInstrDesc &Desc = MII.get(Opc);
2471 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2472 return true;
2474 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2475 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2476 if (isCI() || isSI())
2477 return false;
2480 return true;
2483 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2484 const SMLoc &IDLoc) {
2485 if (!validateConstantBusLimitations(Inst)) {
2486 Error(IDLoc,
2487 "invalid operand (violates constant bus restrictions)");
2488 return false;
2490 if (!validateEarlyClobberLimitations(Inst)) {
2491 Error(IDLoc,
2492 "destination must be different than all sources");
2493 return false;
2495 if (!validateIntClampSupported(Inst)) {
2496 Error(IDLoc,
2497 "integer clamping is not supported on this GPU");
2498 return false;
2500 if (!validateMIMGR128(Inst)) {
2501 Error(IDLoc,
2502 "r128 modifier is not supported on this GPU");
2503 return false;
2505 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2506 if (!validateMIMGD16(Inst)) {
2507 Error(IDLoc,
2508 "d16 modifier is not supported on this GPU");
2509 return false;
2511 if (!validateMIMGDataSize(Inst)) {
2512 Error(IDLoc,
2513 "image data size does not match dmask and tfe");
2514 return false;
2516 if (!validateMIMGAtomicDMask(Inst)) {
2517 Error(IDLoc,
2518 "invalid atomic image dmask");
2519 return false;
2521 if (!validateMIMGGatherDMask(Inst)) {
2522 Error(IDLoc,
2523 "invalid image_gather dmask: only one bit must be set");
2524 return false;
2527 return true;
2530 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2531 unsigned VariantID = 0);
2533 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2534 OperandVector &Operands,
2535 MCStreamer &Out,
2536 uint64_t &ErrorInfo,
2537 bool MatchingInlineAsm) {
2538 MCInst Inst;
2539 unsigned Result = Match_Success;
2540 for (auto Variant : getMatchedVariants()) {
2541 uint64_t EI;
2542 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2543 Variant);
2544 // We order match statuses from least to most specific. We use most specific
2545 // status as resulting
2546 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2547 if ((R == Match_Success) ||
2548 (R == Match_PreferE32) ||
2549 (R == Match_MissingFeature && Result != Match_PreferE32) ||
2550 (R == Match_InvalidOperand && Result != Match_MissingFeature
2551 && Result != Match_PreferE32) ||
2552 (R == Match_MnemonicFail && Result != Match_InvalidOperand
2553 && Result != Match_MissingFeature
2554 && Result != Match_PreferE32)) {
2555 Result = R;
2556 ErrorInfo = EI;
2558 if (R == Match_Success)
2559 break;
2562 switch (Result) {
2563 default: break;
2564 case Match_Success:
2565 if (!validateInstruction(Inst, IDLoc)) {
2566 return true;
2568 Inst.setLoc(IDLoc);
2569 Out.EmitInstruction(Inst, getSTI());
2570 return false;
2572 case Match_MissingFeature:
2573 return Error(IDLoc, "instruction not supported on this GPU");
2575 case Match_MnemonicFail: {
2576 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2577 std::string Suggestion = AMDGPUMnemonicSpellCheck(
2578 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2579 return Error(IDLoc, "invalid instruction" + Suggestion,
2580 ((AMDGPUOperand &)*Operands[0]).getLocRange());
2583 case Match_InvalidOperand: {
2584 SMLoc ErrorLoc = IDLoc;
2585 if (ErrorInfo != ~0ULL) {
2586 if (ErrorInfo >= Operands.size()) {
2587 return Error(IDLoc, "too few operands for instruction");
2589 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2590 if (ErrorLoc == SMLoc())
2591 ErrorLoc = IDLoc;
2593 return Error(ErrorLoc, "invalid operand for instruction");
2596 case Match_PreferE32:
2597 return Error(IDLoc, "internal error: instruction without _e64 suffix "
2598 "should be encoded as e32");
2600 llvm_unreachable("Implement any new match types added!");
2603 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2604 int64_t Tmp = -1;
2605 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2606 return true;
2608 if (getParser().parseAbsoluteExpression(Tmp)) {
2609 return true;
2611 Ret = static_cast<uint32_t>(Tmp);
2612 return false;
2615 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2616 uint32_t &Minor) {
2617 if (ParseAsAbsoluteExpression(Major))
2618 return TokError("invalid major version");
2620 if (getLexer().isNot(AsmToken::Comma))
2621 return TokError("minor version number required, comma expected");
2622 Lex();
2624 if (ParseAsAbsoluteExpression(Minor))
2625 return TokError("invalid minor version");
2627 return false;
2630 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2631 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2632 return TokError("directive only supported for amdgcn architecture");
2634 std::string Target;
2636 SMLoc TargetStart = getTok().getLoc();
2637 if (getParser().parseEscapedString(Target))
2638 return true;
2639 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2641 std::string ExpectedTarget;
2642 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2643 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2645 if (Target != ExpectedTargetOS.str())
2646 return getParser().Error(TargetRange.Start, "target must match options",
2647 TargetRange);
2649 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2650 return false;
2653 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2654 return getParser().Error(Range.Start, "value out of range", Range);
2657 bool AMDGPUAsmParser::calculateGPRBlocks(
2658 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2659 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2660 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2661 unsigned &SGPRBlocks) {
2662 // TODO(scott.linder): These calculations are duplicated from
2663 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2664 IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
2666 unsigned NumVGPRs = NextFreeVGPR;
2667 unsigned NumSGPRs = NextFreeSGPR;
2668 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
2670 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2671 NumSGPRs > MaxAddressableNumSGPRs)
2672 return OutOfRangeError(SGPRRange);
2674 NumSGPRs +=
2675 IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
2677 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2678 NumSGPRs > MaxAddressableNumSGPRs)
2679 return OutOfRangeError(SGPRRange);
2681 if (Features.test(FeatureSGPRInitBug))
2682 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2684 VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
2685 SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
2687 return false;
2690 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2691 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2692 return TokError("directive only supported for amdgcn architecture");
2694 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2695 return TokError("directive only supported for amdhsa OS");
2697 StringRef KernelName;
2698 if (getParser().parseIdentifier(KernelName))
2699 return true;
2701 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2703 StringSet<> Seen;
2705 IsaInfo::IsaVersion IVersion =
2706 IsaInfo::getIsaVersion(getSTI().getFeatureBits());
2708 SMRange VGPRRange;
2709 uint64_t NextFreeVGPR = 0;
2710 SMRange SGPRRange;
2711 uint64_t NextFreeSGPR = 0;
2712 unsigned UserSGPRCount = 0;
2713 bool ReserveVCC = true;
2714 bool ReserveFlatScr = true;
2715 bool ReserveXNACK = hasXNACK();
2717 while (true) {
2718 while (getLexer().is(AsmToken::EndOfStatement))
2719 Lex();
2721 if (getLexer().isNot(AsmToken::Identifier))
2722 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2724 StringRef ID = getTok().getIdentifier();
2725 SMRange IDRange = getTok().getLocRange();
2726 Lex();
2728 if (ID == ".end_amdhsa_kernel")
2729 break;
2731 if (Seen.find(ID) != Seen.end())
2732 return TokError(".amdhsa_ directives cannot be repeated");
2733 Seen.insert(ID);
2735 SMLoc ValStart = getTok().getLoc();
2736 int64_t IVal;
2737 if (getParser().parseAbsoluteExpression(IVal))
2738 return true;
2739 SMLoc ValEnd = getTok().getLoc();
2740 SMRange ValRange = SMRange(ValStart, ValEnd);
2742 if (IVal < 0)
2743 return OutOfRangeError(ValRange);
2745 uint64_t Val = IVal;
2747 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
2748 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
2749 return OutOfRangeError(RANGE); \
2750 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2752 if (ID == ".amdhsa_group_segment_fixed_size") {
2753 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2754 return OutOfRangeError(ValRange);
2755 KD.group_segment_fixed_size = Val;
2756 } else if (ID == ".amdhsa_private_segment_fixed_size") {
2757 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2758 return OutOfRangeError(ValRange);
2759 KD.private_segment_fixed_size = Val;
2760 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2761 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2762 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2763 Val, ValRange);
2764 UserSGPRCount++;
2765 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2766 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2767 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2768 ValRange);
2769 UserSGPRCount++;
2770 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2771 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2772 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2773 ValRange);
2774 UserSGPRCount++;
2775 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2776 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2777 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2778 Val, ValRange);
2779 UserSGPRCount++;
2780 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2781 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2782 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2783 ValRange);
2784 UserSGPRCount++;
2785 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2786 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2787 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2788 ValRange);
2789 UserSGPRCount++;
2790 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2791 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2792 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2793 Val, ValRange);
2794 UserSGPRCount++;
2795 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2796 PARSE_BITS_ENTRY(
2797 KD.compute_pgm_rsrc2,
2798 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2799 ValRange);
2800 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2801 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2802 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2803 ValRange);
2804 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2805 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2806 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2807 ValRange);
2808 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2809 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2810 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2811 ValRange);
2812 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2813 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2814 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2815 ValRange);
2816 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2817 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2818 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2819 ValRange);
2820 } else if (ID == ".amdhsa_next_free_vgpr") {
2821 VGPRRange = ValRange;
2822 NextFreeVGPR = Val;
2823 } else if (ID == ".amdhsa_next_free_sgpr") {
2824 SGPRRange = ValRange;
2825 NextFreeSGPR = Val;
2826 } else if (ID == ".amdhsa_reserve_vcc") {
2827 if (!isUInt<1>(Val))
2828 return OutOfRangeError(ValRange);
2829 ReserveVCC = Val;
2830 } else if (ID == ".amdhsa_reserve_flat_scratch") {
2831 if (IVersion.Major < 7)
2832 return getParser().Error(IDRange.Start, "directive requires gfx7+",
2833 IDRange);
2834 if (!isUInt<1>(Val))
2835 return OutOfRangeError(ValRange);
2836 ReserveFlatScr = Val;
2837 } else if (ID == ".amdhsa_reserve_xnack_mask") {
2838 if (IVersion.Major < 8)
2839 return getParser().Error(IDRange.Start, "directive requires gfx8+",
2840 IDRange);
2841 if (!isUInt<1>(Val))
2842 return OutOfRangeError(ValRange);
2843 ReserveXNACK = Val;
2844 } else if (ID == ".amdhsa_float_round_mode_32") {
2845 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2846 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2847 } else if (ID == ".amdhsa_float_round_mode_16_64") {
2848 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2849 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2850 } else if (ID == ".amdhsa_float_denorm_mode_32") {
2851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2852 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2853 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2854 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2855 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2856 ValRange);
2857 } else if (ID == ".amdhsa_dx10_clamp") {
2858 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2859 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
2860 } else if (ID == ".amdhsa_ieee_mode") {
2861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
2862 Val, ValRange);
2863 } else if (ID == ".amdhsa_fp16_overflow") {
2864 if (IVersion.Major < 9)
2865 return getParser().Error(IDRange.Start, "directive requires gfx9+",
2866 IDRange);
2867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
2868 ValRange);
2869 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
2870 PARSE_BITS_ENTRY(
2871 KD.compute_pgm_rsrc2,
2872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
2873 ValRange);
2874 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
2875 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2876 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
2877 Val, ValRange);
2878 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
2879 PARSE_BITS_ENTRY(
2880 KD.compute_pgm_rsrc2,
2881 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
2882 ValRange);
2883 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
2884 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2885 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
2886 Val, ValRange);
2887 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
2888 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2889 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
2890 Val, ValRange);
2891 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
2892 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2893 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
2894 Val, ValRange);
2895 } else if (ID == ".amdhsa_exception_int_div_zero") {
2896 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2897 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
2898 Val, ValRange);
2899 } else {
2900 return getParser().Error(IDRange.Start,
2901 "unknown .amdhsa_kernel directive", IDRange);
2904 #undef PARSE_BITS_ENTRY
2907 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
2908 return TokError(".amdhsa_next_free_vgpr directive is required");
2910 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
2911 return TokError(".amdhsa_next_free_sgpr directive is required");
2913 unsigned VGPRBlocks;
2914 unsigned SGPRBlocks;
2915 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
2916 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
2917 SGPRRange, VGPRBlocks, SGPRBlocks))
2918 return true;
2920 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
2921 VGPRBlocks))
2922 return OutOfRangeError(VGPRRange);
2923 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2924 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
2926 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
2927 SGPRBlocks))
2928 return OutOfRangeError(SGPRRange);
2929 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2930 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2931 SGPRBlocks);
2933 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
2934 return TokError("too many user SGPRs enabled");
2935 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
2936 UserSGPRCount);
2938 getTargetStreamer().EmitAmdhsaKernelDescriptor(
2939 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
2940 ReserveFlatScr, ReserveXNACK);
2941 return false;
2944 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2945 uint32_t Major;
2946 uint32_t Minor;
2948 if (ParseDirectiveMajorMinor(Major, Minor))
2949 return true;
2951 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2952 return false;
2955 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2956 uint32_t Major;
2957 uint32_t Minor;
2958 uint32_t Stepping;
2959 StringRef VendorName;
2960 StringRef ArchName;
2962 // If this directive has no arguments, then use the ISA version for the
2963 // targeted GPU.
2964 if (getLexer().is(AsmToken::EndOfStatement)) {
2965 AMDGPU::IsaInfo::IsaVersion ISA =
2966 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2967 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2968 ISA.Stepping,
2969 "AMD", "AMDGPU");
2970 return false;
2973 if (ParseDirectiveMajorMinor(Major, Minor))
2974 return true;
2976 if (getLexer().isNot(AsmToken::Comma))
2977 return TokError("stepping version number required, comma expected");
2978 Lex();
2980 if (ParseAsAbsoluteExpression(Stepping))
2981 return TokError("invalid stepping version");
2983 if (getLexer().isNot(AsmToken::Comma))
2984 return TokError("vendor name required, comma expected");
2985 Lex();
2987 if (getLexer().isNot(AsmToken::String))
2988 return TokError("invalid vendor name");
2990 VendorName = getLexer().getTok().getStringContents();
2991 Lex();
2993 if (getLexer().isNot(AsmToken::Comma))
2994 return TokError("arch name required, comma expected");
2995 Lex();
2997 if (getLexer().isNot(AsmToken::String))
2998 return TokError("invalid arch name");
3000 ArchName = getLexer().getTok().getStringContents();
3001 Lex();
3003 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3004 VendorName, ArchName);
3005 return false;
3008 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3009 amd_kernel_code_t &Header) {
3010 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3011 // assembly for backwards compatibility.
3012 if (ID == "max_scratch_backing_memory_byte_size") {
3013 Parser.eatToEndOfStatement();
3014 return false;
3017 SmallString<40> ErrStr;
3018 raw_svector_ostream Err(ErrStr);
3019 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3020 return TokError(Err.str());
3022 Lex();
3023 return false;
3026 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3027 amd_kernel_code_t Header;
3028 AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
3030 while (true) {
3031 // Lex EndOfStatement. This is in a while loop, because lexing a comment
3032 // will set the current token to EndOfStatement.
3033 while(getLexer().is(AsmToken::EndOfStatement))
3034 Lex();
3036 if (getLexer().isNot(AsmToken::Identifier))
3037 return TokError("expected value identifier or .end_amd_kernel_code_t");
3039 StringRef ID = getLexer().getTok().getIdentifier();
3040 Lex();
3042 if (ID == ".end_amd_kernel_code_t")
3043 break;
3045 if (ParseAMDKernelCodeTValue(ID, Header))
3046 return true;
3049 getTargetStreamer().EmitAMDKernelCodeT(Header);
3051 return false;
3054 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3055 if (getLexer().isNot(AsmToken::Identifier))
3056 return TokError("expected symbol name");
3058 StringRef KernelName = Parser.getTok().getString();
3060 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3061 ELF::STT_AMDGPU_HSA_KERNEL);
3062 Lex();
3063 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3064 KernelScope.initialize(getContext());
3065 return false;
3068 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3069 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3070 return Error(getParser().getTok().getLoc(),
3071 ".amd_amdgpu_isa directive is not available on non-amdgcn "
3072 "architectures");
3075 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3077 std::string ISAVersionStringFromSTI;
3078 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3079 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3081 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3082 return Error(getParser().getTok().getLoc(),
3083 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3084 "arguments specified through the command line");
3087 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3088 Lex();
3090 return false;
3093 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3094 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3095 return Error(getParser().getTok().getLoc(),
3096 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
3097 "not available on non-amdhsa OSes")).str());
3100 std::string HSAMetadataString;
3101 raw_string_ostream YamlStream(HSAMetadataString);
3103 getLexer().setSkipSpace(false);
3105 bool FoundEnd = false;
3106 while (!getLexer().is(AsmToken::Eof)) {
3107 while (getLexer().is(AsmToken::Space)) {
3108 YamlStream << getLexer().getTok().getString();
3109 Lex();
3112 if (getLexer().is(AsmToken::Identifier)) {
3113 StringRef ID = getLexer().getTok().getIdentifier();
3114 if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
3115 Lex();
3116 FoundEnd = true;
3117 break;
3121 YamlStream << Parser.parseStringToEndOfStatement()
3122 << getContext().getAsmInfo()->getSeparatorString();
3124 Parser.eatToEndOfStatement();
3127 getLexer().setSkipSpace(true);
3129 if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3130 return TokError(Twine("expected directive ") +
3131 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3134 YamlStream.flush();
3136 if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
3137 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3139 return false;
3142 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3143 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3144 return Error(getParser().getTok().getLoc(),
3145 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3146 "not available on non-amdpal OSes")).str());
3149 PALMD::Metadata PALMetadata;
3150 for (;;) {
3151 uint32_t Value;
3152 if (ParseAsAbsoluteExpression(Value)) {
3153 return TokError(Twine("invalid value in ") +
3154 Twine(PALMD::AssemblerDirective));
3156 PALMetadata.push_back(Value);
3157 if (getLexer().isNot(AsmToken::Comma))
3158 break;
3159 Lex();
3161 getTargetStreamer().EmitPALMetadata(PALMetadata);
3162 return false;
3165 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3166 StringRef IDVal = DirectiveID.getString();
3168 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3169 if (IDVal == ".amdgcn_target")
3170 return ParseDirectiveAMDGCNTarget();
3172 if (IDVal == ".amdhsa_kernel")
3173 return ParseDirectiveAMDHSAKernel();
3174 } else {
3175 if (IDVal == ".hsa_code_object_version")
3176 return ParseDirectiveHSACodeObjectVersion();
3178 if (IDVal == ".hsa_code_object_isa")
3179 return ParseDirectiveHSACodeObjectISA();
3181 if (IDVal == ".amd_kernel_code_t")
3182 return ParseDirectiveAMDKernelCodeT();
3184 if (IDVal == ".amdgpu_hsa_kernel")
3185 return ParseDirectiveAMDGPUHsaKernel();
3187 if (IDVal == ".amd_amdgpu_isa")
3188 return ParseDirectiveISAVersion();
3191 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3192 return ParseDirectiveHSAMetadata();
3194 if (IDVal == PALMD::AssemblerDirective)
3195 return ParseDirectivePALMetadata();
3197 return true;
3200 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3201 unsigned RegNo) const {
3203 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3204 R.isValid(); ++R) {
3205 if (*R == RegNo)
3206 return isGFX9();
3209 switch (RegNo) {
3210 case AMDGPU::TBA:
3211 case AMDGPU::TBA_LO:
3212 case AMDGPU::TBA_HI:
3213 case AMDGPU::TMA:
3214 case AMDGPU::TMA_LO:
3215 case AMDGPU::TMA_HI:
3216 return !isGFX9();
3217 case AMDGPU::XNACK_MASK:
3218 case AMDGPU::XNACK_MASK_LO:
3219 case AMDGPU::XNACK_MASK_HI:
3220 return !isCI() && !isSI() && hasXNACK();
3221 default:
3222 break;
3225 if (isCI())
3226 return true;
3228 if (isSI()) {
3229 // No flat_scr
3230 switch (RegNo) {
3231 case AMDGPU::FLAT_SCR:
3232 case AMDGPU::FLAT_SCR_LO:
3233 case AMDGPU::FLAT_SCR_HI:
3234 return false;
3235 default:
3236 return true;
3240 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3241 // SI/CI have.
3242 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3243 R.isValid(); ++R) {
3244 if (*R == RegNo)
3245 return false;
3248 return true;
3251 OperandMatchResultTy
3252 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3253 // Try to parse with a custom parser
3254 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3256 // If we successfully parsed the operand or if there as an error parsing,
3257 // we are done.
3259 // If we are parsing after we reach EndOfStatement then this means we
3260 // are appending default values to the Operands list. This is only done
3261 // by custom parser, so we shouldn't continue on to the generic parsing.
3262 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3263 getLexer().is(AsmToken::EndOfStatement))
3264 return ResTy;
3266 ResTy = parseRegOrImm(Operands);
3268 if (ResTy == MatchOperand_Success)
3269 return ResTy;
3271 const auto &Tok = Parser.getTok();
3272 SMLoc S = Tok.getLoc();
3274 const MCExpr *Expr = nullptr;
3275 if (!Parser.parseExpression(Expr)) {
3276 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3277 return MatchOperand_Success;
3280 // Possibly this is an instruction flag like 'gds'.
3281 if (Tok.getKind() == AsmToken::Identifier) {
3282 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3283 Parser.Lex();
3284 return MatchOperand_Success;
3287 return MatchOperand_NoMatch;
3290 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3291 // Clear any forced encodings from the previous instruction.
3292 setForcedEncodingSize(0);
3293 setForcedDPP(false);
3294 setForcedSDWA(false);
3296 if (Name.endswith("_e64")) {
3297 setForcedEncodingSize(64);
3298 return Name.substr(0, Name.size() - 4);
3299 } else if (Name.endswith("_e32")) {
3300 setForcedEncodingSize(32);
3301 return Name.substr(0, Name.size() - 4);
3302 } else if (Name.endswith("_dpp")) {
3303 setForcedDPP(true);
3304 return Name.substr(0, Name.size() - 4);
3305 } else if (Name.endswith("_sdwa")) {
3306 setForcedSDWA(true);
3307 return Name.substr(0, Name.size() - 5);
3309 return Name;
3312 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3313 StringRef Name,
3314 SMLoc NameLoc, OperandVector &Operands) {
3315 // Add the instruction mnemonic
3316 Name = parseMnemonicSuffix(Name);
3317 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3319 while (!getLexer().is(AsmToken::EndOfStatement)) {
3320 OperandMatchResultTy Res = parseOperand(Operands, Name);
3322 // Eat the comma or space if there is one.
3323 if (getLexer().is(AsmToken::Comma))
3324 Parser.Lex();
3326 switch (Res) {
3327 case MatchOperand_Success: break;
3328 case MatchOperand_ParseFail:
3329 Error(getLexer().getLoc(), "failed parsing operand.");
3330 while (!getLexer().is(AsmToken::EndOfStatement)) {
3331 Parser.Lex();
3333 return true;
3334 case MatchOperand_NoMatch:
3335 Error(getLexer().getLoc(), "not a valid operand.");
3336 while (!getLexer().is(AsmToken::EndOfStatement)) {
3337 Parser.Lex();
3339 return true;
3343 return false;
3346 //===----------------------------------------------------------------------===//
3347 // Utility functions
3348 //===----------------------------------------------------------------------===//
3350 OperandMatchResultTy
3351 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3352 switch(getLexer().getKind()) {
3353 default: return MatchOperand_NoMatch;
3354 case AsmToken::Identifier: {
3355 StringRef Name = Parser.getTok().getString();
3356 if (!Name.equals(Prefix)) {
3357 return MatchOperand_NoMatch;
3360 Parser.Lex();
3361 if (getLexer().isNot(AsmToken::Colon))
3362 return MatchOperand_ParseFail;
3364 Parser.Lex();
3366 bool IsMinus = false;
3367 if (getLexer().getKind() == AsmToken::Minus) {
3368 Parser.Lex();
3369 IsMinus = true;
3372 if (getLexer().isNot(AsmToken::Integer))
3373 return MatchOperand_ParseFail;
3375 if (getParser().parseAbsoluteExpression(Int))
3376 return MatchOperand_ParseFail;
3378 if (IsMinus)
3379 Int = -Int;
3380 break;
3383 return MatchOperand_Success;
3386 OperandMatchResultTy
3387 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3388 AMDGPUOperand::ImmTy ImmTy,
3389 bool (*ConvertResult)(int64_t&)) {
3390 SMLoc S = Parser.getTok().getLoc();
3391 int64_t Value = 0;
3393 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3394 if (Res != MatchOperand_Success)
3395 return Res;
3397 if (ConvertResult && !ConvertResult(Value)) {
3398 return MatchOperand_ParseFail;
3401 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3402 return MatchOperand_Success;
3405 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3406 const char *Prefix,
3407 OperandVector &Operands,
3408 AMDGPUOperand::ImmTy ImmTy,
3409 bool (*ConvertResult)(int64_t&)) {
3410 StringRef Name = Parser.getTok().getString();
3411 if (!Name.equals(Prefix))
3412 return MatchOperand_NoMatch;
3414 Parser.Lex();
3415 if (getLexer().isNot(AsmToken::Colon))
3416 return MatchOperand_ParseFail;
3418 Parser.Lex();
3419 if (getLexer().isNot(AsmToken::LBrac))
3420 return MatchOperand_ParseFail;
3421 Parser.Lex();
3423 unsigned Val = 0;
3424 SMLoc S = Parser.getTok().getLoc();
3426 // FIXME: How to verify the number of elements matches the number of src
3427 // operands?
3428 for (int I = 0; I < 4; ++I) {
3429 if (I != 0) {
3430 if (getLexer().is(AsmToken::RBrac))
3431 break;
3433 if (getLexer().isNot(AsmToken::Comma))
3434 return MatchOperand_ParseFail;
3435 Parser.Lex();
3438 if (getLexer().isNot(AsmToken::Integer))
3439 return MatchOperand_ParseFail;
3441 int64_t Op;
3442 if (getParser().parseAbsoluteExpression(Op))
3443 return MatchOperand_ParseFail;
3445 if (Op != 0 && Op != 1)
3446 return MatchOperand_ParseFail;
3447 Val |= (Op << I);
3450 Parser.Lex();
3451 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3452 return MatchOperand_Success;
3455 OperandMatchResultTy
3456 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3457 AMDGPUOperand::ImmTy ImmTy) {
3458 int64_t Bit = 0;
3459 SMLoc S = Parser.getTok().getLoc();
3461 // We are at the end of the statement, and this is a default argument, so
3462 // use a default value.
3463 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3464 switch(getLexer().getKind()) {
3465 case AsmToken::Identifier: {
3466 StringRef Tok = Parser.getTok().getString();
3467 if (Tok == Name) {
3468 Bit = 1;
3469 Parser.Lex();
3470 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3471 Bit = 0;
3472 Parser.Lex();
3473 } else {
3474 return MatchOperand_NoMatch;
3476 break;
3478 default:
3479 return MatchOperand_NoMatch;
3483 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3484 return MatchOperand_Success;
3487 static void addOptionalImmOperand(
3488 MCInst& Inst, const OperandVector& Operands,
3489 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3490 AMDGPUOperand::ImmTy ImmT,
3491 int64_t Default = 0) {
3492 auto i = OptionalIdx.find(ImmT);
3493 if (i != OptionalIdx.end()) {
3494 unsigned Idx = i->second;
3495 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3496 } else {
3497 Inst.addOperand(MCOperand::createImm(Default));
3501 OperandMatchResultTy
3502 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3503 if (getLexer().isNot(AsmToken::Identifier)) {
3504 return MatchOperand_NoMatch;
3506 StringRef Tok = Parser.getTok().getString();
3507 if (Tok != Prefix) {
3508 return MatchOperand_NoMatch;
3511 Parser.Lex();
3512 if (getLexer().isNot(AsmToken::Colon)) {
3513 return MatchOperand_ParseFail;
3516 Parser.Lex();
3517 if (getLexer().isNot(AsmToken::Identifier)) {
3518 return MatchOperand_ParseFail;
3521 Value = Parser.getTok().getString();
3522 return MatchOperand_Success;
3525 //===----------------------------------------------------------------------===//
3526 // ds
3527 //===----------------------------------------------------------------------===//
3529 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3530 const OperandVector &Operands) {
3531 OptionalImmIndexMap OptionalIdx;
3533 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3534 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3536 // Add the register arguments
3537 if (Op.isReg()) {
3538 Op.addRegOperands(Inst, 1);
3539 continue;
3542 // Handle optional arguments
3543 OptionalIdx[Op.getImmTy()] = i;
3546 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3550 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3553 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3554 bool IsGdsHardcoded) {
3555 OptionalImmIndexMap OptionalIdx;
3557 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3558 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3560 // Add the register arguments
3561 if (Op.isReg()) {
3562 Op.addRegOperands(Inst, 1);
3563 continue;
3566 if (Op.isToken() && Op.getToken() == "gds") {
3567 IsGdsHardcoded = true;
3568 continue;
3571 // Handle optional arguments
3572 OptionalIdx[Op.getImmTy()] = i;
3575 AMDGPUOperand::ImmTy OffsetType =
3576 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3577 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3578 AMDGPUOperand::ImmTyOffset;
3580 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3582 if (!IsGdsHardcoded) {
3583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3585 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3588 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3589 OptionalImmIndexMap OptionalIdx;
3591 unsigned OperandIdx[4];
3592 unsigned EnMask = 0;
3593 int SrcIdx = 0;
3595 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3596 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3598 // Add the register arguments
3599 if (Op.isReg()) {
3600 assert(SrcIdx < 4);
3601 OperandIdx[SrcIdx] = Inst.size();
3602 Op.addRegOperands(Inst, 1);
3603 ++SrcIdx;
3604 continue;
3607 if (Op.isOff()) {
3608 assert(SrcIdx < 4);
3609 OperandIdx[SrcIdx] = Inst.size();
3610 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3611 ++SrcIdx;
3612 continue;
3615 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3616 Op.addImmOperands(Inst, 1);
3617 continue;
3620 if (Op.isToken() && Op.getToken() == "done")
3621 continue;
3623 // Handle optional arguments
3624 OptionalIdx[Op.getImmTy()] = i;
3627 assert(SrcIdx == 4);
3629 bool Compr = false;
3630 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3631 Compr = true;
3632 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3633 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3634 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3637 for (auto i = 0; i < SrcIdx; ++i) {
3638 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3639 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3646 Inst.addOperand(MCOperand::createImm(EnMask));
3649 //===----------------------------------------------------------------------===//
3650 // s_waitcnt
3651 //===----------------------------------------------------------------------===//
3653 static bool
3654 encodeCnt(
3655 const AMDGPU::IsaInfo::IsaVersion ISA,
3656 int64_t &IntVal,
3657 int64_t CntVal,
3658 bool Saturate,
3659 unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3660 unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3662 bool Failed = false;
3664 IntVal = encode(ISA, IntVal, CntVal);
3665 if (CntVal != decode(ISA, IntVal)) {
3666 if (Saturate) {
3667 IntVal = encode(ISA, IntVal, -1);
3668 } else {
3669 Failed = true;
3672 return Failed;
3675 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3676 StringRef CntName = Parser.getTok().getString();
3677 int64_t CntVal;
3679 Parser.Lex();
3680 if (getLexer().isNot(AsmToken::LParen))
3681 return true;
3683 Parser.Lex();
3684 if (getLexer().isNot(AsmToken::Integer))
3685 return true;
3687 SMLoc ValLoc = Parser.getTok().getLoc();
3688 if (getParser().parseAbsoluteExpression(CntVal))
3689 return true;
3691 AMDGPU::IsaInfo::IsaVersion ISA =
3692 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3694 bool Failed = true;
3695 bool Sat = CntName.endswith("_sat");
3697 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3698 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3699 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3700 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3701 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3702 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3705 if (Failed) {
3706 Error(ValLoc, "too large value for " + CntName);
3707 return true;
3710 if (getLexer().isNot(AsmToken::RParen)) {
3711 return true;
3714 Parser.Lex();
3715 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3716 const AsmToken NextToken = getLexer().peekTok();
3717 if (NextToken.is(AsmToken::Identifier)) {
3718 Parser.Lex();
3722 return false;
3725 OperandMatchResultTy
3726 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3727 AMDGPU::IsaInfo::IsaVersion ISA =
3728 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3729 int64_t Waitcnt = getWaitcntBitMask(ISA);
3730 SMLoc S = Parser.getTok().getLoc();
3732 switch(getLexer().getKind()) {
3733 default: return MatchOperand_ParseFail;
3734 case AsmToken::Integer:
3735 // The operand can be an integer value.
3736 if (getParser().parseAbsoluteExpression(Waitcnt))
3737 return MatchOperand_ParseFail;
3738 break;
3740 case AsmToken::Identifier:
3741 do {
3742 if (parseCnt(Waitcnt))
3743 return MatchOperand_ParseFail;
3744 } while(getLexer().isNot(AsmToken::EndOfStatement));
3745 break;
3747 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3748 return MatchOperand_Success;
3751 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3752 int64_t &Width) {
3753 using namespace llvm::AMDGPU::Hwreg;
3755 if (Parser.getTok().getString() != "hwreg")
3756 return true;
3757 Parser.Lex();
3759 if (getLexer().isNot(AsmToken::LParen))
3760 return true;
3761 Parser.Lex();
3763 if (getLexer().is(AsmToken::Identifier)) {
3764 HwReg.IsSymbolic = true;
3765 HwReg.Id = ID_UNKNOWN_;
3766 const StringRef tok = Parser.getTok().getString();
3767 int Last = ID_SYMBOLIC_LAST_;
3768 if (isSI() || isCI() || isVI())
3769 Last = ID_SYMBOLIC_FIRST_GFX9_;
3770 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3771 if (tok == IdSymbolic[i]) {
3772 HwReg.Id = i;
3773 break;
3776 Parser.Lex();
3777 } else {
3778 HwReg.IsSymbolic = false;
3779 if (getLexer().isNot(AsmToken::Integer))
3780 return true;
3781 if (getParser().parseAbsoluteExpression(HwReg.Id))
3782 return true;
3785 if (getLexer().is(AsmToken::RParen)) {
3786 Parser.Lex();
3787 return false;
3790 // optional params
3791 if (getLexer().isNot(AsmToken::Comma))
3792 return true;
3793 Parser.Lex();
3795 if (getLexer().isNot(AsmToken::Integer))
3796 return true;
3797 if (getParser().parseAbsoluteExpression(Offset))
3798 return true;
3800 if (getLexer().isNot(AsmToken::Comma))
3801 return true;
3802 Parser.Lex();
3804 if (getLexer().isNot(AsmToken::Integer))
3805 return true;
3806 if (getParser().parseAbsoluteExpression(Width))
3807 return true;
3809 if (getLexer().isNot(AsmToken::RParen))
3810 return true;
3811 Parser.Lex();
3813 return false;
3816 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3817 using namespace llvm::AMDGPU::Hwreg;
3819 int64_t Imm16Val = 0;
3820 SMLoc S = Parser.getTok().getLoc();
3822 switch(getLexer().getKind()) {
3823 default: return MatchOperand_NoMatch;
3824 case AsmToken::Integer:
3825 // The operand can be an integer value.
3826 if (getParser().parseAbsoluteExpression(Imm16Val))
3827 return MatchOperand_NoMatch;
3828 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3829 Error(S, "invalid immediate: only 16-bit values are legal");
3830 // Do not return error code, but create an imm operand anyway and proceed
3831 // to the next operand, if any. That avoids unneccessary error messages.
3833 break;
3835 case AsmToken::Identifier: {
3836 OperandInfoTy HwReg(ID_UNKNOWN_);
3837 int64_t Offset = OFFSET_DEFAULT_;
3838 int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3839 if (parseHwregConstruct(HwReg, Offset, Width))
3840 return MatchOperand_ParseFail;
3841 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3842 if (HwReg.IsSymbolic)
3843 Error(S, "invalid symbolic name of hardware register");
3844 else
3845 Error(S, "invalid code of hardware register: only 6-bit values are legal");
3847 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3848 Error(S, "invalid bit offset: only 5-bit values are legal");
3849 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3850 Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3851 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3853 break;
3855 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3856 return MatchOperand_Success;
3859 bool AMDGPUOperand::isSWaitCnt() const {
3860 return isImm();
3863 bool AMDGPUOperand::isHwreg() const {
3864 return isImmTy(ImmTyHwreg);
3867 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3868 using namespace llvm::AMDGPU::SendMsg;
3870 if (Parser.getTok().getString() != "sendmsg")
3871 return true;
3872 Parser.Lex();
3874 if (getLexer().isNot(AsmToken::LParen))
3875 return true;
3876 Parser.Lex();
3878 if (getLexer().is(AsmToken::Identifier)) {
3879 Msg.IsSymbolic = true;
3880 Msg.Id = ID_UNKNOWN_;
3881 const std::string tok = Parser.getTok().getString();
3882 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3883 switch(i) {
3884 default: continue; // Omit gaps.
3885 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break;
3887 if (tok == IdSymbolic[i]) {
3888 Msg.Id = i;
3889 break;
3892 Parser.Lex();
3893 } else {
3894 Msg.IsSymbolic = false;
3895 if (getLexer().isNot(AsmToken::Integer))
3896 return true;
3897 if (getParser().parseAbsoluteExpression(Msg.Id))
3898 return true;
3899 if (getLexer().is(AsmToken::Integer))
3900 if (getParser().parseAbsoluteExpression(Msg.Id))
3901 Msg.Id = ID_UNKNOWN_;
3903 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3904 return false;
3906 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3907 if (getLexer().isNot(AsmToken::RParen))
3908 return true;
3909 Parser.Lex();
3910 return false;
3913 if (getLexer().isNot(AsmToken::Comma))
3914 return true;
3915 Parser.Lex();
3917 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3918 Operation.Id = ID_UNKNOWN_;
3919 if (getLexer().is(AsmToken::Identifier)) {
3920 Operation.IsSymbolic = true;
3921 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3922 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3923 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3924 const StringRef Tok = Parser.getTok().getString();
3925 for (int i = F; i < L; ++i) {
3926 if (Tok == S[i]) {
3927 Operation.Id = i;
3928 break;
3931 Parser.Lex();
3932 } else {
3933 Operation.IsSymbolic = false;
3934 if (getLexer().isNot(AsmToken::Integer))
3935 return true;
3936 if (getParser().parseAbsoluteExpression(Operation.Id))
3937 return true;
3940 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3941 // Stream id is optional.
3942 if (getLexer().is(AsmToken::RParen)) {
3943 Parser.Lex();
3944 return false;
3947 if (getLexer().isNot(AsmToken::Comma))
3948 return true;
3949 Parser.Lex();
3951 if (getLexer().isNot(AsmToken::Integer))
3952 return true;
3953 if (getParser().parseAbsoluteExpression(StreamId))
3954 return true;
3957 if (getLexer().isNot(AsmToken::RParen))
3958 return true;
3959 Parser.Lex();
3960 return false;
3963 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3964 if (getLexer().getKind() != AsmToken::Identifier)
3965 return MatchOperand_NoMatch;
3967 StringRef Str = Parser.getTok().getString();
3968 int Slot = StringSwitch<int>(Str)
3969 .Case("p10", 0)
3970 .Case("p20", 1)
3971 .Case("p0", 2)
3972 .Default(-1);
3974 SMLoc S = Parser.getTok().getLoc();
3975 if (Slot == -1)
3976 return MatchOperand_ParseFail;
3978 Parser.Lex();
3979 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3980 AMDGPUOperand::ImmTyInterpSlot));
3981 return MatchOperand_Success;
3984 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3985 if (getLexer().getKind() != AsmToken::Identifier)
3986 return MatchOperand_NoMatch;
3988 StringRef Str = Parser.getTok().getString();
3989 if (!Str.startswith("attr"))
3990 return MatchOperand_NoMatch;
3992 StringRef Chan = Str.take_back(2);
3993 int AttrChan = StringSwitch<int>(Chan)
3994 .Case(".x", 0)
3995 .Case(".y", 1)
3996 .Case(".z", 2)
3997 .Case(".w", 3)
3998 .Default(-1);
3999 if (AttrChan == -1)
4000 return MatchOperand_ParseFail;
4002 Str = Str.drop_back(2).drop_front(4);
4004 uint8_t Attr;
4005 if (Str.getAsInteger(10, Attr))
4006 return MatchOperand_ParseFail;
4008 SMLoc S = Parser.getTok().getLoc();
4009 Parser.Lex();
4010 if (Attr > 63) {
4011 Error(S, "out of bounds attr");
4012 return MatchOperand_Success;
4015 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4017 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4018 AMDGPUOperand::ImmTyInterpAttr));
4019 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4020 AMDGPUOperand::ImmTyAttrChan));
4021 return MatchOperand_Success;
4024 void AMDGPUAsmParser::errorExpTgt() {
4025 Error(Parser.getTok().getLoc(), "invalid exp target");
4028 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4029 uint8_t &Val) {
4030 if (Str == "null") {
4031 Val = 9;
4032 return MatchOperand_Success;
4035 if (Str.startswith("mrt")) {
4036 Str = Str.drop_front(3);
4037 if (Str == "z") { // == mrtz
4038 Val = 8;
4039 return MatchOperand_Success;
4042 if (Str.getAsInteger(10, Val))
4043 return MatchOperand_ParseFail;
4045 if (Val > 7)
4046 errorExpTgt();
4048 return MatchOperand_Success;
4051 if (Str.startswith("pos")) {
4052 Str = Str.drop_front(3);
4053 if (Str.getAsInteger(10, Val))
4054 return MatchOperand_ParseFail;
4056 if (Val > 3)
4057 errorExpTgt();
4059 Val += 12;
4060 return MatchOperand_Success;
4063 if (Str.startswith("param")) {
4064 Str = Str.drop_front(5);
4065 if (Str.getAsInteger(10, Val))
4066 return MatchOperand_ParseFail;
4068 if (Val >= 32)
4069 errorExpTgt();
4071 Val += 32;
4072 return MatchOperand_Success;
4075 if (Str.startswith("invalid_target_")) {
4076 Str = Str.drop_front(15);
4077 if (Str.getAsInteger(10, Val))
4078 return MatchOperand_ParseFail;
4080 errorExpTgt();
4081 return MatchOperand_Success;
4084 return MatchOperand_NoMatch;
4087 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4088 uint8_t Val;
4089 StringRef Str = Parser.getTok().getString();
4091 auto Res = parseExpTgtImpl(Str, Val);
4092 if (Res != MatchOperand_Success)
4093 return Res;
4095 SMLoc S = Parser.getTok().getLoc();
4096 Parser.Lex();
4098 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4099 AMDGPUOperand::ImmTyExpTgt));
4100 return MatchOperand_Success;
4103 OperandMatchResultTy
4104 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4105 using namespace llvm::AMDGPU::SendMsg;
4107 int64_t Imm16Val = 0;
4108 SMLoc S = Parser.getTok().getLoc();
4110 switch(getLexer().getKind()) {
4111 default:
4112 return MatchOperand_NoMatch;
4113 case AsmToken::Integer:
4114 // The operand can be an integer value.
4115 if (getParser().parseAbsoluteExpression(Imm16Val))
4116 return MatchOperand_NoMatch;
4117 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4118 Error(S, "invalid immediate: only 16-bit values are legal");
4119 // Do not return error code, but create an imm operand anyway and proceed
4120 // to the next operand, if any. That avoids unneccessary error messages.
4122 break;
4123 case AsmToken::Identifier: {
4124 OperandInfoTy Msg(ID_UNKNOWN_);
4125 OperandInfoTy Operation(OP_UNKNOWN_);
4126 int64_t StreamId = STREAM_ID_DEFAULT_;
4127 if (parseSendMsgConstruct(Msg, Operation, StreamId))
4128 return MatchOperand_ParseFail;
4129 do {
4130 // Validate and encode message ID.
4131 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4132 || Msg.Id == ID_SYSMSG)) {
4133 if (Msg.IsSymbolic)
4134 Error(S, "invalid/unsupported symbolic name of message");
4135 else
4136 Error(S, "invalid/unsupported code of message");
4137 break;
4139 Imm16Val = (Msg.Id << ID_SHIFT_);
4140 // Validate and encode operation ID.
4141 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4142 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4143 if (Operation.IsSymbolic)
4144 Error(S, "invalid symbolic name of GS_OP");
4145 else
4146 Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4147 break;
4149 if (Operation.Id == OP_GS_NOP
4150 && Msg.Id != ID_GS_DONE) {
4151 Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4152 break;
4154 Imm16Val |= (Operation.Id << OP_SHIFT_);
4156 if (Msg.Id == ID_SYSMSG) {
4157 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4158 if (Operation.IsSymbolic)
4159 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4160 else
4161 Error(S, "invalid/unsupported code of SYSMSG_OP");
4162 break;
4164 Imm16Val |= (Operation.Id << OP_SHIFT_);
4166 // Validate and encode stream ID.
4167 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4168 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4169 Error(S, "invalid stream id: only 2-bit values are legal");
4170 break;
4172 Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4174 } while (false);
4176 break;
4178 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4179 return MatchOperand_Success;
4182 bool AMDGPUOperand::isSendMsg() const {
4183 return isImmTy(ImmTySendMsg);
4186 //===----------------------------------------------------------------------===//
4187 // parser helpers
4188 //===----------------------------------------------------------------------===//
4190 bool
4191 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4192 if (getLexer().getKind() == AsmToken::Identifier &&
4193 Parser.getTok().getString() == Id) {
4194 Parser.Lex();
4195 return true;
4197 return false;
4200 bool
4201 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4202 if (getLexer().getKind() == Kind) {
4203 Parser.Lex();
4204 return true;
4206 return false;
4209 bool
4210 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4211 const StringRef ErrMsg) {
4212 if (!trySkipToken(Kind)) {
4213 Error(Parser.getTok().getLoc(), ErrMsg);
4214 return false;
4216 return true;
4219 bool
4220 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4221 return !getParser().parseAbsoluteExpression(Imm);
4224 bool
4225 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4226 SMLoc S = Parser.getTok().getLoc();
4227 if (getLexer().getKind() == AsmToken::String) {
4228 Val = Parser.getTok().getStringContents();
4229 Parser.Lex();
4230 return true;
4231 } else {
4232 Error(S, ErrMsg);
4233 return false;
4237 //===----------------------------------------------------------------------===//
4238 // swizzle
4239 //===----------------------------------------------------------------------===//
4241 LLVM_READNONE
4242 static unsigned
4243 encodeBitmaskPerm(const unsigned AndMask,
4244 const unsigned OrMask,
4245 const unsigned XorMask) {
4246 using namespace llvm::AMDGPU::Swizzle;
4248 return BITMASK_PERM_ENC |
4249 (AndMask << BITMASK_AND_SHIFT) |
4250 (OrMask << BITMASK_OR_SHIFT) |
4251 (XorMask << BITMASK_XOR_SHIFT);
4254 bool
4255 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4256 const unsigned MinVal,
4257 const unsigned MaxVal,
4258 const StringRef ErrMsg) {
4259 for (unsigned i = 0; i < OpNum; ++i) {
4260 if (!skipToken(AsmToken::Comma, "expected a comma")){
4261 return false;
4263 SMLoc ExprLoc = Parser.getTok().getLoc();
4264 if (!parseExpr(Op[i])) {
4265 return false;
4267 if (Op[i] < MinVal || Op[i] > MaxVal) {
4268 Error(ExprLoc, ErrMsg);
4269 return false;
4273 return true;
4276 bool
4277 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4278 using namespace llvm::AMDGPU::Swizzle;
4280 int64_t Lane[LANE_NUM];
4281 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4282 "expected a 2-bit lane id")) {
4283 Imm = QUAD_PERM_ENC;
4284 for (auto i = 0; i < LANE_NUM; ++i) {
4285 Imm |= Lane[i] << (LANE_SHIFT * i);
4287 return true;
4289 return false;
4292 bool
4293 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4294 using namespace llvm::AMDGPU::Swizzle;
4296 SMLoc S = Parser.getTok().getLoc();
4297 int64_t GroupSize;
4298 int64_t LaneIdx;
4300 if (!parseSwizzleOperands(1, &GroupSize,
4301 2, 32,
4302 "group size must be in the interval [2,32]")) {
4303 return false;
4305 if (!isPowerOf2_64(GroupSize)) {
4306 Error(S, "group size must be a power of two");
4307 return false;
4309 if (parseSwizzleOperands(1, &LaneIdx,
4310 0, GroupSize - 1,
4311 "lane id must be in the interval [0,group size - 1]")) {
4312 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4313 return true;
4315 return false;
4318 bool
4319 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4320 using namespace llvm::AMDGPU::Swizzle;
4322 SMLoc S = Parser.getTok().getLoc();
4323 int64_t GroupSize;
4325 if (!parseSwizzleOperands(1, &GroupSize,
4326 2, 32, "group size must be in the interval [2,32]")) {
4327 return false;
4329 if (!isPowerOf2_64(GroupSize)) {
4330 Error(S, "group size must be a power of two");
4331 return false;
4334 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4335 return true;
4338 bool
4339 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4340 using namespace llvm::AMDGPU::Swizzle;
4342 SMLoc S = Parser.getTok().getLoc();
4343 int64_t GroupSize;
4345 if (!parseSwizzleOperands(1, &GroupSize,
4346 1, 16, "group size must be in the interval [1,16]")) {
4347 return false;
4349 if (!isPowerOf2_64(GroupSize)) {
4350 Error(S, "group size must be a power of two");
4351 return false;
4354 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4355 return true;
4358 bool
4359 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4360 using namespace llvm::AMDGPU::Swizzle;
4362 if (!skipToken(AsmToken::Comma, "expected a comma")) {
4363 return false;
4366 StringRef Ctl;
4367 SMLoc StrLoc = Parser.getTok().getLoc();
4368 if (!parseString(Ctl)) {
4369 return false;
4371 if (Ctl.size() != BITMASK_WIDTH) {
4372 Error(StrLoc, "expected a 5-character mask");
4373 return false;
4376 unsigned AndMask = 0;
4377 unsigned OrMask = 0;
4378 unsigned XorMask = 0;
4380 for (size_t i = 0; i < Ctl.size(); ++i) {
4381 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4382 switch(Ctl[i]) {
4383 default:
4384 Error(StrLoc, "invalid mask");
4385 return false;
4386 case '0':
4387 break;
4388 case '1':
4389 OrMask |= Mask;
4390 break;
4391 case 'p':
4392 AndMask |= Mask;
4393 break;
4394 case 'i':
4395 AndMask |= Mask;
4396 XorMask |= Mask;
4397 break;
4401 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4402 return true;
4405 bool
4406 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4408 SMLoc OffsetLoc = Parser.getTok().getLoc();
4410 if (!parseExpr(Imm)) {
4411 return false;
4413 if (!isUInt<16>(Imm)) {
4414 Error(OffsetLoc, "expected a 16-bit offset");
4415 return false;
4417 return true;
4420 bool
4421 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4422 using namespace llvm::AMDGPU::Swizzle;
4424 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4426 SMLoc ModeLoc = Parser.getTok().getLoc();
4427 bool Ok = false;
4429 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4430 Ok = parseSwizzleQuadPerm(Imm);
4431 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4432 Ok = parseSwizzleBitmaskPerm(Imm);
4433 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4434 Ok = parseSwizzleBroadcast(Imm);
4435 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4436 Ok = parseSwizzleSwap(Imm);
4437 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4438 Ok = parseSwizzleReverse(Imm);
4439 } else {
4440 Error(ModeLoc, "expected a swizzle mode");
4443 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4446 return false;
4449 OperandMatchResultTy
4450 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4451 SMLoc S = Parser.getTok().getLoc();
4452 int64_t Imm = 0;
4454 if (trySkipId("offset")) {
4456 bool Ok = false;
4457 if (skipToken(AsmToken::Colon, "expected a colon")) {
4458 if (trySkipId("swizzle")) {
4459 Ok = parseSwizzleMacro(Imm);
4460 } else {
4461 Ok = parseSwizzleOffset(Imm);
4465 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4467 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4468 } else {
4469 // Swizzle "offset" operand is optional.
4470 // If it is omitted, try parsing other optional operands.
4471 return parseOptionalOpr(Operands);
4475 bool
4476 AMDGPUOperand::isSwizzle() const {
4477 return isImmTy(ImmTySwizzle);
4480 //===----------------------------------------------------------------------===//
4481 // sopp branch targets
4482 //===----------------------------------------------------------------------===//
4484 OperandMatchResultTy
4485 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4486 SMLoc S = Parser.getTok().getLoc();
4488 switch (getLexer().getKind()) {
4489 default: return MatchOperand_ParseFail;
4490 case AsmToken::Integer: {
4491 int64_t Imm;
4492 if (getParser().parseAbsoluteExpression(Imm))
4493 return MatchOperand_ParseFail;
4494 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4495 return MatchOperand_Success;
4498 case AsmToken::Identifier:
4499 Operands.push_back(AMDGPUOperand::CreateExpr(this,
4500 MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4501 Parser.getTok().getString()), getContext()), S));
4502 Parser.Lex();
4503 return MatchOperand_Success;
4507 //===----------------------------------------------------------------------===//
4508 // mubuf
4509 //===----------------------------------------------------------------------===//
4511 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4512 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4516 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4519 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4520 const OperandVector &Operands,
4521 bool IsAtomic,
4522 bool IsAtomicReturn,
4523 bool IsLds) {
4524 bool IsLdsOpcode = IsLds;
4525 bool HasLdsModifier = false;
4526 OptionalImmIndexMap OptionalIdx;
4527 assert(IsAtomicReturn ? IsAtomic : true);
4529 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4532 // Add the register arguments
4533 if (Op.isReg()) {
4534 Op.addRegOperands(Inst, 1);
4535 continue;
4538 // Handle the case where soffset is an immediate
4539 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4540 Op.addImmOperands(Inst, 1);
4541 continue;
4544 HasLdsModifier = Op.isLDS();
4546 // Handle tokens like 'offen' which are sometimes hard-coded into the
4547 // asm string. There are no MCInst operands for these.
4548 if (Op.isToken()) {
4549 continue;
4551 assert(Op.isImm());
4553 // Handle optional arguments
4554 OptionalIdx[Op.getImmTy()] = i;
4557 // This is a workaround for an llvm quirk which may result in an
4558 // incorrect instruction selection. Lds and non-lds versions of
4559 // MUBUF instructions are identical except that lds versions
4560 // have mandatory 'lds' modifier. However this modifier follows
4561 // optional modifiers and llvm asm matcher regards this 'lds'
4562 // modifier as an optional one. As a result, an lds version
4563 // of opcode may be selected even if it has no 'lds' modifier.
4564 if (IsLdsOpcode && !HasLdsModifier) {
4565 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4566 if (NoLdsOpcode != -1) { // Got lds version - correct it.
4567 Inst.setOpcode(NoLdsOpcode);
4568 IsLdsOpcode = false;
4572 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4573 if (IsAtomicReturn) {
4574 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4575 Inst.insert(I, *I);
4578 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4579 if (!IsAtomic) { // glc is hard-coded.
4580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4584 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4589 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4590 OptionalImmIndexMap OptionalIdx;
4592 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4593 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4595 // Add the register arguments
4596 if (Op.isReg()) {
4597 Op.addRegOperands(Inst, 1);
4598 continue;
4601 // Handle the case where soffset is an immediate
4602 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4603 Op.addImmOperands(Inst, 1);
4604 continue;
4607 // Handle tokens like 'offen' which are sometimes hard-coded into the
4608 // asm string. There are no MCInst operands for these.
4609 if (Op.isToken()) {
4610 continue;
4612 assert(Op.isImm());
4614 // Handle optional arguments
4615 OptionalIdx[Op.getImmTy()] = i;
4618 addOptionalImmOperand(Inst, Operands, OptionalIdx,
4619 AMDGPUOperand::ImmTyOffset);
4620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
4621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
4622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4627 //===----------------------------------------------------------------------===//
4628 // mimg
4629 //===----------------------------------------------------------------------===//
4631 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4632 bool IsAtomic) {
4633 unsigned I = 1;
4634 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4635 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4636 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4639 if (IsAtomic) {
4640 // Add src, same as dst
4641 assert(Desc.getNumDefs() == 1);
4642 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4645 OptionalImmIndexMap OptionalIdx;
4647 for (unsigned E = Operands.size(); I != E; ++I) {
4648 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4650 // Add the register arguments
4651 if (Op.isReg()) {
4652 Op.addRegOperands(Inst, 1);
4653 } else if (Op.isImmModifier()) {
4654 OptionalIdx[Op.getImmTy()] = I;
4655 } else {
4656 llvm_unreachable("unexpected operand type");
4660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4671 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4672 cvtMIMG(Inst, Operands, true);
4675 //===----------------------------------------------------------------------===//
4676 // smrd
4677 //===----------------------------------------------------------------------===//
4679 bool AMDGPUOperand::isSMRDOffset8() const {
4680 return isImm() && isUInt<8>(getImm());
4683 bool AMDGPUOperand::isSMRDOffset20() const {
4684 return isImm() && isUInt<20>(getImm());
4687 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4688 // 32-bit literals are only supported on CI and we only want to use them
4689 // when the offset is > 8-bits.
4690 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4693 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4694 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4697 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4698 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4701 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4702 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4705 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4706 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4709 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4710 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4713 //===----------------------------------------------------------------------===//
4714 // vop3
4715 //===----------------------------------------------------------------------===//
4717 static bool ConvertOmodMul(int64_t &Mul) {
4718 if (Mul != 1 && Mul != 2 && Mul != 4)
4719 return false;
4721 Mul >>= 1;
4722 return true;
4725 static bool ConvertOmodDiv(int64_t &Div) {
4726 if (Div == 1) {
4727 Div = 0;
4728 return true;
4731 if (Div == 2) {
4732 Div = 3;
4733 return true;
4736 return false;
4739 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4740 if (BoundCtrl == 0) {
4741 BoundCtrl = 1;
4742 return true;
4745 if (BoundCtrl == -1) {
4746 BoundCtrl = 0;
4747 return true;
4750 return false;
4753 // Note: the order in this table matches the order of operands in AsmString.
4754 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4755 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
4756 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
4757 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
4758 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4759 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4760 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
4761 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
4762 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
4763 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4764 {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
4765 {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
4766 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
4767 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
4768 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
4769 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
4770 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
4771 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
4772 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4773 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
4774 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
4775 {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
4776 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
4777 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
4778 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
4779 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4780 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4781 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4782 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4783 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4784 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4785 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4786 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4787 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4788 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4789 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4790 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4791 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4794 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4795 unsigned size = Operands.size();
4796 assert(size > 0);
4798 OperandMatchResultTy res = parseOptionalOpr(Operands);
4800 // This is a hack to enable hardcoded mandatory operands which follow
4801 // optional operands.
4803 // Current design assumes that all operands after the first optional operand
4804 // are also optional. However implementation of some instructions violates
4805 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4807 // To alleviate this problem, we have to (implicitly) parse extra operands
4808 // to make sure autogenerated parser of custom operands never hit hardcoded
4809 // mandatory operands.
4811 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4813 // We have parsed the first optional operand.
4814 // Parse as many operands as necessary to skip all mandatory operands.
4816 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4817 if (res != MatchOperand_Success ||
4818 getLexer().is(AsmToken::EndOfStatement)) break;
4819 if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4820 res = parseOptionalOpr(Operands);
4824 return res;
4827 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4828 OperandMatchResultTy res;
4829 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4830 // try to parse any optional operand here
4831 if (Op.IsBit) {
4832 res = parseNamedBit(Op.Name, Operands, Op.Type);
4833 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4834 res = parseOModOperand(Operands);
4835 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4836 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4837 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4838 res = parseSDWASel(Operands, Op.Name, Op.Type);
4839 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4840 res = parseSDWADstUnused(Operands);
4841 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4842 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4843 Op.Type == AMDGPUOperand::ImmTyNegLo ||
4844 Op.Type == AMDGPUOperand::ImmTyNegHi) {
4845 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4846 Op.ConvertResult);
4847 } else {
4848 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4850 if (res != MatchOperand_NoMatch) {
4851 return res;
4854 return MatchOperand_NoMatch;
4857 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4858 StringRef Name = Parser.getTok().getString();
4859 if (Name == "mul") {
4860 return parseIntWithPrefix("mul", Operands,
4861 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4864 if (Name == "div") {
4865 return parseIntWithPrefix("div", Operands,
4866 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4869 return MatchOperand_NoMatch;
4872 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4873 cvtVOP3P(Inst, Operands);
4875 int Opc = Inst.getOpcode();
4877 int SrcNum;
4878 const int Ops[] = { AMDGPU::OpName::src0,
4879 AMDGPU::OpName::src1,
4880 AMDGPU::OpName::src2 };
4881 for (SrcNum = 0;
4882 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4883 ++SrcNum);
4884 assert(SrcNum > 0);
4886 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4887 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4889 if ((OpSel & (1 << SrcNum)) != 0) {
4890 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4891 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4892 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4896 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4897 // 1. This operand is input modifiers
4898 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4899 // 2. This is not last operand
4900 && Desc.NumOperands > (OpNum + 1)
4901 // 3. Next operand is register class
4902 && Desc.OpInfo[OpNum + 1].RegClass != -1
4903 // 4. Next register is not tied to any other operand
4904 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4907 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4909 OptionalImmIndexMap OptionalIdx;
4910 unsigned Opc = Inst.getOpcode();
4912 unsigned I = 1;
4913 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4914 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4915 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4918 for (unsigned E = Operands.size(); I != E; ++I) {
4919 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4920 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4921 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4922 } else if (Op.isInterpSlot() ||
4923 Op.isInterpAttr() ||
4924 Op.isAttrChan()) {
4925 Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4926 } else if (Op.isImmModifier()) {
4927 OptionalIdx[Op.getImmTy()] = I;
4928 } else {
4929 llvm_unreachable("unhandled operand type");
4933 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4937 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4941 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4946 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4947 OptionalImmIndexMap &OptionalIdx) {
4948 unsigned Opc = Inst.getOpcode();
4950 unsigned I = 1;
4951 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4952 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4953 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4956 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4957 // This instruction has src modifiers
4958 for (unsigned E = Operands.size(); I != E; ++I) {
4959 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4960 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4961 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4962 } else if (Op.isImmModifier()) {
4963 OptionalIdx[Op.getImmTy()] = I;
4964 } else if (Op.isRegOrImm()) {
4965 Op.addRegOrImmOperands(Inst, 1);
4966 } else {
4967 llvm_unreachable("unhandled operand type");
4970 } else {
4971 // No src modifiers
4972 for (unsigned E = Operands.size(); I != E; ++I) {
4973 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4974 if (Op.isMod()) {
4975 OptionalIdx[Op.getImmTy()] = I;
4976 } else {
4977 Op.addRegOrImmOperands(Inst, 1);
4982 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4986 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4990 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
4991 // it has src2 register operand that is tied to dst operand
4992 // we don't allow modifiers for this operand in assembler so src2_modifiers
4993 // should be 0.
4994 if (Opc == AMDGPU::V_MAC_F32_e64_si ||
4995 Opc == AMDGPU::V_MAC_F32_e64_vi ||
4996 Opc == AMDGPU::V_MAC_F16_e64_vi ||
4997 Opc == AMDGPU::V_FMAC_F32_e64_vi) {
4998 auto it = Inst.begin();
4999 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5000 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5001 ++it;
5002 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5006 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5007 OptionalImmIndexMap OptionalIdx;
5008 cvtVOP3(Inst, Operands, OptionalIdx);
5011 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5012 const OperandVector &Operands) {
5013 OptionalImmIndexMap OptIdx;
5014 const int Opc = Inst.getOpcode();
5015 const MCInstrDesc &Desc = MII.get(Opc);
5017 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5019 cvtVOP3(Inst, Operands, OptIdx);
5021 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5022 assert(!IsPacked);
5023 Inst.addOperand(Inst.getOperand(0));
5026 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5027 // instruction, and then figure out where to actually put the modifiers
5029 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5031 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5032 if (OpSelHiIdx != -1) {
5033 int DefaultVal = IsPacked ? -1 : 0;
5034 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5035 DefaultVal);
5038 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5039 if (NegLoIdx != -1) {
5040 assert(IsPacked);
5041 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5042 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5045 const int Ops[] = { AMDGPU::OpName::src0,
5046 AMDGPU::OpName::src1,
5047 AMDGPU::OpName::src2 };
5048 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5049 AMDGPU::OpName::src1_modifiers,
5050 AMDGPU::OpName::src2_modifiers };
5052 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5054 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5055 unsigned OpSelHi = 0;
5056 unsigned NegLo = 0;
5057 unsigned NegHi = 0;
5059 if (OpSelHiIdx != -1) {
5060 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5063 if (NegLoIdx != -1) {
5064 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5065 NegLo = Inst.getOperand(NegLoIdx).getImm();
5066 NegHi = Inst.getOperand(NegHiIdx).getImm();
5069 for (int J = 0; J < 3; ++J) {
5070 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5071 if (OpIdx == -1)
5072 break;
5074 uint32_t ModVal = 0;
5076 if ((OpSel & (1 << J)) != 0)
5077 ModVal |= SISrcMods::OP_SEL_0;
5079 if ((OpSelHi & (1 << J)) != 0)
5080 ModVal |= SISrcMods::OP_SEL_1;
5082 if ((NegLo & (1 << J)) != 0)
5083 ModVal |= SISrcMods::NEG;
5085 if ((NegHi & (1 << J)) != 0)
5086 ModVal |= SISrcMods::NEG_HI;
5088 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5090 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5094 //===----------------------------------------------------------------------===//
5095 // dpp
5096 //===----------------------------------------------------------------------===//
5098 bool AMDGPUOperand::isDPPCtrl() const {
5099 using namespace AMDGPU::DPP;
5101 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5102 if (result) {
5103 int64_t Imm = getImm();
5104 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5105 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5106 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5107 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5108 (Imm == DppCtrl::WAVE_SHL1) ||
5109 (Imm == DppCtrl::WAVE_ROL1) ||
5110 (Imm == DppCtrl::WAVE_SHR1) ||
5111 (Imm == DppCtrl::WAVE_ROR1) ||
5112 (Imm == DppCtrl::ROW_MIRROR) ||
5113 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5114 (Imm == DppCtrl::BCAST15) ||
5115 (Imm == DppCtrl::BCAST31);
5117 return false;
5120 bool AMDGPUOperand::isGPRIdxMode() const {
5121 return isImm() && isUInt<4>(getImm());
5124 bool AMDGPUOperand::isS16Imm() const {
5125 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5128 bool AMDGPUOperand::isU16Imm() const {
5129 return isImm() && isUInt<16>(getImm());
5132 OperandMatchResultTy
5133 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5134 using namespace AMDGPU::DPP;
5136 SMLoc S = Parser.getTok().getLoc();
5137 StringRef Prefix;
5138 int64_t Int;
5140 if (getLexer().getKind() == AsmToken::Identifier) {
5141 Prefix = Parser.getTok().getString();
5142 } else {
5143 return MatchOperand_NoMatch;
5146 if (Prefix == "row_mirror") {
5147 Int = DppCtrl::ROW_MIRROR;
5148 Parser.Lex();
5149 } else if (Prefix == "row_half_mirror") {
5150 Int = DppCtrl::ROW_HALF_MIRROR;
5151 Parser.Lex();
5152 } else {
5153 // Check to prevent parseDPPCtrlOps from eating invalid tokens
5154 if (Prefix != "quad_perm"
5155 && Prefix != "row_shl"
5156 && Prefix != "row_shr"
5157 && Prefix != "row_ror"
5158 && Prefix != "wave_shl"
5159 && Prefix != "wave_rol"
5160 && Prefix != "wave_shr"
5161 && Prefix != "wave_ror"
5162 && Prefix != "row_bcast") {
5163 return MatchOperand_NoMatch;
5166 Parser.Lex();
5167 if (getLexer().isNot(AsmToken::Colon))
5168 return MatchOperand_ParseFail;
5170 if (Prefix == "quad_perm") {
5171 // quad_perm:[%d,%d,%d,%d]
5172 Parser.Lex();
5173 if (getLexer().isNot(AsmToken::LBrac))
5174 return MatchOperand_ParseFail;
5175 Parser.Lex();
5177 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5178 return MatchOperand_ParseFail;
5180 for (int i = 0; i < 3; ++i) {
5181 if (getLexer().isNot(AsmToken::Comma))
5182 return MatchOperand_ParseFail;
5183 Parser.Lex();
5185 int64_t Temp;
5186 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5187 return MatchOperand_ParseFail;
5188 const int shift = i*2 + 2;
5189 Int += (Temp << shift);
5192 if (getLexer().isNot(AsmToken::RBrac))
5193 return MatchOperand_ParseFail;
5194 Parser.Lex();
5195 } else {
5196 // sel:%d
5197 Parser.Lex();
5198 if (getParser().parseAbsoluteExpression(Int))
5199 return MatchOperand_ParseFail;
5201 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5202 Int |= DppCtrl::ROW_SHL0;
5203 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5204 Int |= DppCtrl::ROW_SHR0;
5205 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5206 Int |= DppCtrl::ROW_ROR0;
5207 } else if (Prefix == "wave_shl" && 1 == Int) {
5208 Int = DppCtrl::WAVE_SHL1;
5209 } else if (Prefix == "wave_rol" && 1 == Int) {
5210 Int = DppCtrl::WAVE_ROL1;
5211 } else if (Prefix == "wave_shr" && 1 == Int) {
5212 Int = DppCtrl::WAVE_SHR1;
5213 } else if (Prefix == "wave_ror" && 1 == Int) {
5214 Int = DppCtrl::WAVE_ROR1;
5215 } else if (Prefix == "row_bcast") {
5216 if (Int == 15) {
5217 Int = DppCtrl::BCAST15;
5218 } else if (Int == 31) {
5219 Int = DppCtrl::BCAST31;
5220 } else {
5221 return MatchOperand_ParseFail;
5223 } else {
5224 return MatchOperand_ParseFail;
5229 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5230 return MatchOperand_Success;
5233 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5234 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5238 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5241 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5242 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5245 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5246 OptionalImmIndexMap OptionalIdx;
5248 unsigned I = 1;
5249 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5250 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5251 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5254 // All DPP instructions with at least one source operand have a fake "old"
5255 // source at the beginning that's tied to the dst operand. Handle it here.
5256 if (Desc.getNumOperands() >= 2)
5257 Inst.addOperand(Inst.getOperand(0));
5259 for (unsigned E = Operands.size(); I != E; ++I) {
5260 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5261 // Add the register arguments
5262 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5263 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5264 // Skip it.
5265 continue;
5266 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5267 Op.addRegWithFPInputModsOperands(Inst, 2);
5268 } else if (Op.isDPPCtrl()) {
5269 Op.addImmOperands(Inst, 1);
5270 } else if (Op.isImm()) {
5271 // Handle optional arguments
5272 OptionalIdx[Op.getImmTy()] = I;
5273 } else {
5274 llvm_unreachable("Invalid operand type");
5278 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5283 //===----------------------------------------------------------------------===//
5284 // sdwa
5285 //===----------------------------------------------------------------------===//
5287 OperandMatchResultTy
5288 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5289 AMDGPUOperand::ImmTy Type) {
5290 using namespace llvm::AMDGPU::SDWA;
5292 SMLoc S = Parser.getTok().getLoc();
5293 StringRef Value;
5294 OperandMatchResultTy res;
5296 res = parseStringWithPrefix(Prefix, Value);
5297 if (res != MatchOperand_Success) {
5298 return res;
5301 int64_t Int;
5302 Int = StringSwitch<int64_t>(Value)
5303 .Case("BYTE_0", SdwaSel::BYTE_0)
5304 .Case("BYTE_1", SdwaSel::BYTE_1)
5305 .Case("BYTE_2", SdwaSel::BYTE_2)
5306 .Case("BYTE_3", SdwaSel::BYTE_3)
5307 .Case("WORD_0", SdwaSel::WORD_0)
5308 .Case("WORD_1", SdwaSel::WORD_1)
5309 .Case("DWORD", SdwaSel::DWORD)
5310 .Default(0xffffffff);
5311 Parser.Lex(); // eat last token
5313 if (Int == 0xffffffff) {
5314 return MatchOperand_ParseFail;
5317 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5318 return MatchOperand_Success;
5321 OperandMatchResultTy
5322 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5323 using namespace llvm::AMDGPU::SDWA;
5325 SMLoc S = Parser.getTok().getLoc();
5326 StringRef Value;
5327 OperandMatchResultTy res;
5329 res = parseStringWithPrefix("dst_unused", Value);
5330 if (res != MatchOperand_Success) {
5331 return res;
5334 int64_t Int;
5335 Int = StringSwitch<int64_t>(Value)
5336 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5337 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5338 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5339 .Default(0xffffffff);
5340 Parser.Lex(); // eat last token
5342 if (Int == 0xffffffff) {
5343 return MatchOperand_ParseFail;
5346 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5347 return MatchOperand_Success;
5350 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5351 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5354 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5355 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5358 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5359 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5362 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5363 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5366 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5367 uint64_t BasicInstType, bool skipVcc) {
5368 using namespace llvm::AMDGPU::SDWA;
5370 OptionalImmIndexMap OptionalIdx;
5371 bool skippedVcc = false;
5373 unsigned I = 1;
5374 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5375 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5376 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5379 for (unsigned E = Operands.size(); I != E; ++I) {
5380 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5381 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5382 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5383 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5384 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5385 // Skip VCC only if we didn't skip it on previous iteration.
5386 if (BasicInstType == SIInstrFlags::VOP2 &&
5387 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5388 skippedVcc = true;
5389 continue;
5390 } else if (BasicInstType == SIInstrFlags::VOPC &&
5391 Inst.getNumOperands() == 0) {
5392 skippedVcc = true;
5393 continue;
5396 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5397 Op.addRegOrImmWithInputModsOperands(Inst, 2);
5398 } else if (Op.isImm()) {
5399 // Handle optional arguments
5400 OptionalIdx[Op.getImmTy()] = I;
5401 } else {
5402 llvm_unreachable("Invalid operand type");
5404 skippedVcc = false;
5407 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5408 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5409 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5410 switch (BasicInstType) {
5411 case SIInstrFlags::VOP1:
5412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5413 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5417 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5418 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5419 break;
5421 case SIInstrFlags::VOP2:
5422 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5423 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5430 break;
5432 case SIInstrFlags::VOPC:
5433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5434 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5435 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5436 break;
5438 default:
5439 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5443 // special case v_mac_{f16, f32}:
5444 // it has src2 register operand that is tied to dst operand
5445 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5446 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
5447 auto it = Inst.begin();
5448 std::advance(
5449 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5450 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5454 /// Force static initialization.
5455 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5456 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5457 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5460 #define GET_REGISTER_MATCHER
5461 #define GET_MATCHER_IMPLEMENTATION
5462 #define GET_MNEMONIC_SPELL_CHECKER
5463 #include "AMDGPUGenAsmMatcher.inc"
5465 // This fuction should be defined after auto-generated include so that we have
5466 // MatchClassKind enum defined
5467 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5468 unsigned Kind) {
5469 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5470 // But MatchInstructionImpl() expects to meet token and fails to validate
5471 // operand. This method checks if we are given immediate operand but expect to
5472 // get corresponding token.
5473 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5474 switch (Kind) {
5475 case MCK_addr64:
5476 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5477 case MCK_gds:
5478 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5479 case MCK_lds:
5480 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5481 case MCK_glc:
5482 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5483 case MCK_idxen:
5484 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5485 case MCK_offen:
5486 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5487 case MCK_SSrcB32:
5488 // When operands have expression values, they will return true for isToken,
5489 // because it is not possible to distinguish between a token and an
5490 // expression at parse time. MatchInstructionImpl() will always try to
5491 // match an operand as a token, when isToken returns true, and when the
5492 // name of the expression is not a valid token, the match will fail,
5493 // so we need to handle it here.
5494 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5495 case MCK_SSrcF32:
5496 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5497 case MCK_SoppBrTarget:
5498 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5499 case MCK_VReg32OrOff:
5500 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5501 case MCK_InterpSlot:
5502 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5503 case MCK_Attr:
5504 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5505 case MCK_AttrChan:
5506 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5507 default:
5508 return Match_InvalidOperand;