[AMDGPU][AsmParser][NFC] Translate parsed MIMG instructions to MCInsts automatically.
[llvm-project.git] / llvm / lib / Target / AMDGPU / AsmParser / AMDGPUAsmParser.cpp
blobc493b88725751385f7a6bbced0db53629d3c0d36
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
47 namespace {
49 class AMDGPUAsmParser;
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
57 class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
61 Register,
62 Expression
63 } Kind;
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
68 public:
69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
79 bool hasFPModifiers() const { return Abs || Neg; }
80 bool hasIntModifiers() const { return Sext; }
81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83 int64_t getFPModifiersOperand() const {
84 int64_t Operand = 0;
85 Operand |= Abs ? SISrcMods::ABS : 0u;
86 Operand |= Neg ? SISrcMods::NEG : 0u;
87 return Operand;
90 int64_t getIntModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Sext ? SISrcMods::SEXT : 0u;
93 return Operand;
96 int64_t getModifiersOperand() const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 && "fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 } else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
103 } else {
104 return 0;
108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
111 enum ImmTy {
112 ImmTyNone,
113 ImmTyGDS,
114 ImmTyLDS,
115 ImmTyOffen,
116 ImmTyIdxen,
117 ImmTyAddr64,
118 ImmTyOffset,
119 ImmTyInstOffset,
120 ImmTyOffset0,
121 ImmTyOffset1,
122 ImmTySMEMOffsetMod,
123 ImmTyCPol,
124 ImmTyTFE,
125 ImmTyD16,
126 ImmTyClampSI,
127 ImmTyOModSI,
128 ImmTySDWADstSel,
129 ImmTySDWASrc0Sel,
130 ImmTySDWASrc1Sel,
131 ImmTySDWADstUnused,
132 ImmTyDMask,
133 ImmTyDim,
134 ImmTyUNorm,
135 ImmTyDA,
136 ImmTyR128A16,
137 ImmTyA16,
138 ImmTyLWE,
139 ImmTyExpTgt,
140 ImmTyExpCompr,
141 ImmTyExpVM,
142 ImmTyFORMAT,
143 ImmTyHwreg,
144 ImmTyOff,
145 ImmTySendMsg,
146 ImmTyInterpSlot,
147 ImmTyInterpAttr,
148 ImmTyInterpAttrChan,
149 ImmTyOpSel,
150 ImmTyOpSelHi,
151 ImmTyNegLo,
152 ImmTyNegHi,
153 ImmTyDPP8,
154 ImmTyDppCtrl,
155 ImmTyDppRowMask,
156 ImmTyDppBankMask,
157 ImmTyDppBoundCtrl,
158 ImmTyDppFI,
159 ImmTySwizzle,
160 ImmTyGprIdxMode,
161 ImmTyHigh,
162 ImmTyBLGP,
163 ImmTyCBSZ,
164 ImmTyABID,
165 ImmTyEndpgm,
166 ImmTyWaitVDST,
167 ImmTyWaitEXP,
170 // Immediate operand kind.
171 // It helps to identify the location of an offending operand after an error.
172 // Note that regular literals and mandatory literals (KImm) must be handled
173 // differently. When looking for an offending operand, we should usually
174 // ignore mandatory literals because they are part of the instruction and
175 // cannot be changed. Report location of mandatory operands only for VOPD,
176 // when both OpX and OpY have a KImm and there are no other literals.
177 enum ImmKindTy {
178 ImmKindTyNone,
179 ImmKindTyLiteral,
180 ImmKindTyMandatoryLiteral,
181 ImmKindTyConst,
184 private:
185 struct TokOp {
186 const char *Data;
187 unsigned Length;
190 struct ImmOp {
191 int64_t Val;
192 ImmTy Type;
193 bool IsFPImm;
194 mutable ImmKindTy Kind;
195 Modifiers Mods;
198 struct RegOp {
199 unsigned RegNo;
200 Modifiers Mods;
203 union {
204 TokOp Tok;
205 ImmOp Imm;
206 RegOp Reg;
207 const MCExpr *Expr;
210 public:
211 bool isToken() const override { return Kind == Token; }
213 bool isSymbolRefExpr() const {
214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
217 bool isImm() const override {
218 return Kind == Immediate;
221 void setImmKindNone() const {
222 assert(isImm());
223 Imm.Kind = ImmKindTyNone;
226 void setImmKindLiteral() const {
227 assert(isImm());
228 Imm.Kind = ImmKindTyLiteral;
231 void setImmKindMandatoryLiteral() const {
232 assert(isImm());
233 Imm.Kind = ImmKindTyMandatoryLiteral;
236 void setImmKindConst() const {
237 assert(isImm());
238 Imm.Kind = ImmKindTyConst;
241 bool IsImmKindLiteral() const {
242 return isImm() && Imm.Kind == ImmKindTyLiteral;
245 bool IsImmKindMandatoryLiteral() const {
246 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
249 bool isImmKindConst() const {
250 return isImm() && Imm.Kind == ImmKindTyConst;
253 bool isInlinableImm(MVT type) const;
254 bool isLiteralImm(MVT type) const;
256 bool isRegKind() const {
257 return Kind == Register;
260 bool isReg() const override {
261 return isRegKind() && !hasModifiers();
264 bool isRegOrInline(unsigned RCID, MVT type) const {
265 return isRegClass(RCID) || isInlinableImm(type);
268 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
269 return isRegOrInline(RCID, type) || isLiteralImm(type);
272 bool isRegOrImmWithInt16InputMods() const {
273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
276 bool isRegOrImmWithInt32InputMods() const {
277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
280 bool isRegOrInlineImmWithInt16InputMods() const {
281 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
284 bool isRegOrInlineImmWithInt32InputMods() const {
285 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
288 bool isRegOrImmWithInt64InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
292 bool isRegOrImmWithFP16InputMods() const {
293 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
296 bool isRegOrImmWithFP32InputMods() const {
297 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
300 bool isRegOrImmWithFP64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
304 bool isRegOrInlineImmWithFP16InputMods() const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
308 bool isRegOrInlineImmWithFP32InputMods() const {
309 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
313 bool isVReg() const {
314 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
315 isRegClass(AMDGPU::VReg_64RegClassID) ||
316 isRegClass(AMDGPU::VReg_96RegClassID) ||
317 isRegClass(AMDGPU::VReg_128RegClassID) ||
318 isRegClass(AMDGPU::VReg_160RegClassID) ||
319 isRegClass(AMDGPU::VReg_192RegClassID) ||
320 isRegClass(AMDGPU::VReg_256RegClassID) ||
321 isRegClass(AMDGPU::VReg_512RegClassID) ||
322 isRegClass(AMDGPU::VReg_1024RegClassID);
325 bool isVReg32() const {
326 return isRegClass(AMDGPU::VGPR_32RegClassID);
329 bool isVReg32OrOff() const {
330 return isOff() || isVReg32();
333 bool isNull() const {
334 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
337 bool isVRegWithInputMods() const;
338 bool isT16VRegWithInputMods() const;
340 bool isSDWAOperand(MVT type) const;
341 bool isSDWAFP16Operand() const;
342 bool isSDWAFP32Operand() const;
343 bool isSDWAInt16Operand() const;
344 bool isSDWAInt32Operand() const;
346 bool isImmTy(ImmTy ImmT) const {
347 return isImm() && Imm.Type == ImmT;
350 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
352 bool isImmModifier() const {
353 return isImm() && Imm.Type != ImmTyNone;
356 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
357 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
358 bool isDMask() const { return isImmTy(ImmTyDMask); }
359 bool isDim() const { return isImmTy(ImmTyDim); }
360 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
361 bool isDA() const { return isImmTy(ImmTyDA); }
362 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
363 bool isA16() const { return isImmTy(ImmTyA16); }
364 bool isLWE() const { return isImmTy(ImmTyLWE); }
365 bool isOff() const { return isImmTy(ImmTyOff); }
366 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
367 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
368 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
369 bool isOffen() const { return isImmTy(ImmTyOffen); }
370 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
371 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
372 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
373 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
374 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
375 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
376 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
377 bool isGDS() const { return isImmTy(ImmTyGDS); }
378 bool isLDS() const { return isImmTy(ImmTyLDS); }
379 bool isCPol() const { return isImmTy(ImmTyCPol); }
380 bool isTFE() const { return isImmTy(ImmTyTFE); }
381 bool isD16() const { return isImmTy(ImmTyD16); }
382 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
383 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
384 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
385 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
386 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
387 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
388 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
389 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
390 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
391 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
392 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
393 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
394 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
395 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
396 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
397 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
398 bool isHigh() const { return isImmTy(ImmTyHigh); }
400 bool isRegOrImm() const {
401 return isReg() || isImm();
404 bool isRegClass(unsigned RCID) const;
406 bool isInlineValue() const;
408 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
409 return isRegOrInline(RCID, type) && !hasModifiers();
412 bool isSCSrcB16() const {
413 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
416 bool isSCSrcV2B16() const {
417 return isSCSrcB16();
420 bool isSCSrcB32() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
424 bool isSCSrcB64() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
428 bool isBoolReg() const;
430 bool isSCSrcF16() const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
434 bool isSCSrcV2F16() const {
435 return isSCSrcF16();
438 bool isSCSrcF32() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
442 bool isSCSrcF64() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
446 bool isSSrcB32() const {
447 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
450 bool isSSrcB16() const {
451 return isSCSrcB16() || isLiteralImm(MVT::i16);
454 bool isSSrcV2B16() const {
455 llvm_unreachable("cannot happen");
456 return isSSrcB16();
459 bool isSSrcB64() const {
460 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
461 // See isVSrc64().
462 return isSCSrcB64() || isLiteralImm(MVT::i64);
465 bool isSSrcF32() const {
466 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
469 bool isSSrcF64() const {
470 return isSCSrcB64() || isLiteralImm(MVT::f64);
473 bool isSSrcF16() const {
474 return isSCSrcB16() || isLiteralImm(MVT::f16);
477 bool isSSrcV2F16() const {
478 llvm_unreachable("cannot happen");
479 return isSSrcF16();
482 bool isSSrcV2FP32() const {
483 llvm_unreachable("cannot happen");
484 return isSSrcF32();
487 bool isSCSrcV2FP32() const {
488 llvm_unreachable("cannot happen");
489 return isSCSrcF32();
492 bool isSSrcV2INT32() const {
493 llvm_unreachable("cannot happen");
494 return isSSrcB32();
497 bool isSCSrcV2INT32() const {
498 llvm_unreachable("cannot happen");
499 return isSCSrcB32();
502 bool isSSrcOrLdsB32() const {
503 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
504 isLiteralImm(MVT::i32) || isExpr();
507 bool isVCSrcB32() const {
508 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 bool isVCSrcB64() const {
512 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 bool isVCSrcTB16_Lo128() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
519 bool isVCSrcB16() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
523 bool isVCSrcV2B16() const {
524 return isVCSrcB16();
527 bool isVCSrcF32() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
531 bool isVCSrcF64() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
535 bool isVCSrcTF16_Lo128() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
539 bool isVCSrcF16() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
543 bool isVCSrcV2F16() const {
544 return isVCSrcF16();
547 bool isVSrcB32() const {
548 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
551 bool isVSrcB64() const {
552 return isVCSrcF64() || isLiteralImm(MVT::i64);
555 bool isVSrcTB16_Lo128() const {
556 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
559 bool isVSrcB16() const {
560 return isVCSrcB16() || isLiteralImm(MVT::i16);
563 bool isVSrcV2B16() const {
564 return isVSrcB16() || isLiteralImm(MVT::v2i16);
567 bool isVCSrcV2FP32() const {
568 return isVCSrcF64();
571 bool isVSrcV2FP32() const {
572 return isVSrcF64() || isLiteralImm(MVT::v2f32);
575 bool isVCSrcV2INT32() const {
576 return isVCSrcB64();
579 bool isVSrcV2INT32() const {
580 return isVSrcB64() || isLiteralImm(MVT::v2i32);
583 bool isVSrcF32() const {
584 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
587 bool isVSrcF64() const {
588 return isVCSrcF64() || isLiteralImm(MVT::f64);
591 bool isVSrcTF16_Lo128() const {
592 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
595 bool isVSrcF16() const {
596 return isVCSrcF16() || isLiteralImm(MVT::f16);
599 bool isVSrcV2F16() const {
600 return isVSrcF16() || isLiteralImm(MVT::v2f16);
603 bool isVISrcB32() const {
604 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
607 bool isVISrcB16() const {
608 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
611 bool isVISrcV2B16() const {
612 return isVISrcB16();
615 bool isVISrcF32() const {
616 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
619 bool isVISrcF16() const {
620 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
623 bool isVISrcV2F16() const {
624 return isVISrcF16() || isVISrcB32();
627 bool isVISrc_64B64() const {
628 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
631 bool isVISrc_64F64() const {
632 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
635 bool isVISrc_64V2FP32() const {
636 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
639 bool isVISrc_64V2INT32() const {
640 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
643 bool isVISrc_256B64() const {
644 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
647 bool isVISrc_256F64() const {
648 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
651 bool isVISrc_128B16() const {
652 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
655 bool isVISrc_128V2B16() const {
656 return isVISrc_128B16();
659 bool isVISrc_128B32() const {
660 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
663 bool isVISrc_128F32() const {
664 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
667 bool isVISrc_256V2FP32() const {
668 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
671 bool isVISrc_256V2INT32() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
675 bool isVISrc_512B32() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
679 bool isVISrc_512B16() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
683 bool isVISrc_512V2B16() const {
684 return isVISrc_512B16();
687 bool isVISrc_512F32() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
691 bool isVISrc_512F16() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
695 bool isVISrc_512V2F16() const {
696 return isVISrc_512F16() || isVISrc_512B32();
699 bool isVISrc_1024B32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
703 bool isVISrc_1024B16() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
707 bool isVISrc_1024V2B16() const {
708 return isVISrc_1024B16();
711 bool isVISrc_1024F32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
715 bool isVISrc_1024F16() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
719 bool isVISrc_1024V2F16() const {
720 return isVISrc_1024F16() || isVISrc_1024B32();
723 bool isAISrcB32() const {
724 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
727 bool isAISrcB16() const {
728 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
731 bool isAISrcV2B16() const {
732 return isAISrcB16();
735 bool isAISrcF32() const {
736 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
739 bool isAISrcF16() const {
740 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
743 bool isAISrcV2F16() const {
744 return isAISrcF16() || isAISrcB32();
747 bool isAISrc_64B64() const {
748 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
751 bool isAISrc_64F64() const {
752 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
755 bool isAISrc_128B32() const {
756 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
759 bool isAISrc_128B16() const {
760 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
763 bool isAISrc_128V2B16() const {
764 return isAISrc_128B16();
767 bool isAISrc_128F32() const {
768 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
771 bool isAISrc_128F16() const {
772 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
775 bool isAISrc_128V2F16() const {
776 return isAISrc_128F16() || isAISrc_128B32();
779 bool isVISrc_128F16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
783 bool isVISrc_128V2F16() const {
784 return isVISrc_128F16() || isVISrc_128B32();
787 bool isAISrc_256B64() const {
788 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
791 bool isAISrc_256F64() const {
792 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
795 bool isAISrc_512B32() const {
796 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
799 bool isAISrc_512B16() const {
800 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
803 bool isAISrc_512V2B16() const {
804 return isAISrc_512B16();
807 bool isAISrc_512F32() const {
808 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
811 bool isAISrc_512F16() const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
815 bool isAISrc_512V2F16() const {
816 return isAISrc_512F16() || isAISrc_512B32();
819 bool isAISrc_1024B32() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
823 bool isAISrc_1024B16() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
827 bool isAISrc_1024V2B16() const {
828 return isAISrc_1024B16();
831 bool isAISrc_1024F32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
835 bool isAISrc_1024F16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
839 bool isAISrc_1024V2F16() const {
840 return isAISrc_1024F16() || isAISrc_1024B32();
843 bool isKImmFP32() const {
844 return isLiteralImm(MVT::f32);
847 bool isKImmFP16() const {
848 return isLiteralImm(MVT::f16);
851 bool isMem() const override {
852 return false;
855 bool isExpr() const {
856 return Kind == Expression;
859 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
861 bool isSWaitCnt() const;
862 bool isDepCtr() const;
863 bool isSDelayALU() const;
864 bool isHwreg() const;
865 bool isSendMsg() const;
866 bool isSwizzle() const;
867 bool isSMRDOffset8() const;
868 bool isSMEMOffset() const;
869 bool isSMRDLiteralOffset() const;
870 bool isDPP8() const;
871 bool isDPPCtrl() const;
872 bool isBLGP() const;
873 bool isCBSZ() const;
874 bool isABID() const;
875 bool isGPRIdxMode() const;
876 bool isS16Imm() const;
877 bool isU16Imm() const;
878 bool isEndpgm() const;
879 bool isWaitVDST() const;
880 bool isWaitEXP() const;
882 StringRef getToken() const {
883 assert(isToken());
884 return StringRef(Tok.Data, Tok.Length);
887 int64_t getImm() const {
888 assert(isImm());
889 return Imm.Val;
892 void setImm(int64_t Val) {
893 assert(isImm());
894 Imm.Val = Val;
897 ImmTy getImmTy() const {
898 assert(isImm());
899 return Imm.Type;
902 unsigned getReg() const override {
903 assert(isRegKind());
904 return Reg.RegNo;
907 SMLoc getStartLoc() const override {
908 return StartLoc;
911 SMLoc getEndLoc() const override {
912 return EndLoc;
915 SMRange getLocRange() const {
916 return SMRange(StartLoc, EndLoc);
919 Modifiers getModifiers() const {
920 assert(isRegKind() || isImmTy(ImmTyNone));
921 return isRegKind() ? Reg.Mods : Imm.Mods;
924 void setModifiers(Modifiers Mods) {
925 assert(isRegKind() || isImmTy(ImmTyNone));
926 if (isRegKind())
927 Reg.Mods = Mods;
928 else
929 Imm.Mods = Mods;
932 bool hasModifiers() const {
933 return getModifiers().hasModifiers();
936 bool hasFPModifiers() const {
937 return getModifiers().hasFPModifiers();
940 bool hasIntModifiers() const {
941 return getModifiers().hasIntModifiers();
944 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
946 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
948 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
950 void addRegOperands(MCInst &Inst, unsigned N) const;
952 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
953 if (isRegKind())
954 addRegOperands(Inst, N);
955 else
956 addImmOperands(Inst, N);
959 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
960 Modifiers Mods = getModifiers();
961 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
962 if (isRegKind()) {
963 addRegOperands(Inst, N);
964 } else {
965 addImmOperands(Inst, N, false);
969 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
970 assert(!hasIntModifiers());
971 addRegOrImmWithInputModsOperands(Inst, N);
974 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
975 assert(!hasFPModifiers());
976 addRegOrImmWithInputModsOperands(Inst, N);
979 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
980 Modifiers Mods = getModifiers();
981 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
982 assert(isRegKind());
983 addRegOperands(Inst, N);
986 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
987 assert(!hasIntModifiers());
988 addRegWithInputModsOperands(Inst, N);
991 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
992 assert(!hasFPModifiers());
993 addRegWithInputModsOperands(Inst, N);
996 static void printImmTy(raw_ostream& OS, ImmTy Type) {
997 switch (Type) {
998 case ImmTyNone: OS << "None"; break;
999 case ImmTyGDS: OS << "GDS"; break;
1000 case ImmTyLDS: OS << "LDS"; break;
1001 case ImmTyOffen: OS << "Offen"; break;
1002 case ImmTyIdxen: OS << "Idxen"; break;
1003 case ImmTyAddr64: OS << "Addr64"; break;
1004 case ImmTyOffset: OS << "Offset"; break;
1005 case ImmTyInstOffset: OS << "InstOffset"; break;
1006 case ImmTyOffset0: OS << "Offset0"; break;
1007 case ImmTyOffset1: OS << "Offset1"; break;
1008 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1009 case ImmTyCPol: OS << "CPol"; break;
1010 case ImmTyTFE: OS << "TFE"; break;
1011 case ImmTyD16: OS << "D16"; break;
1012 case ImmTyFORMAT: OS << "FORMAT"; break;
1013 case ImmTyClampSI: OS << "ClampSI"; break;
1014 case ImmTyOModSI: OS << "OModSI"; break;
1015 case ImmTyDPP8: OS << "DPP8"; break;
1016 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1017 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1018 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1019 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1020 case ImmTyDppFI: OS << "DppFI"; break;
1021 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1022 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1023 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1024 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1025 case ImmTyDMask: OS << "DMask"; break;
1026 case ImmTyDim: OS << "Dim"; break;
1027 case ImmTyUNorm: OS << "UNorm"; break;
1028 case ImmTyDA: OS << "DA"; break;
1029 case ImmTyR128A16: OS << "R128A16"; break;
1030 case ImmTyA16: OS << "A16"; break;
1031 case ImmTyLWE: OS << "LWE"; break;
1032 case ImmTyOff: OS << "Off"; break;
1033 case ImmTyExpTgt: OS << "ExpTgt"; break;
1034 case ImmTyExpCompr: OS << "ExpCompr"; break;
1035 case ImmTyExpVM: OS << "ExpVM"; break;
1036 case ImmTyHwreg: OS << "Hwreg"; break;
1037 case ImmTySendMsg: OS << "SendMsg"; break;
1038 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1039 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1040 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1041 case ImmTyOpSel: OS << "OpSel"; break;
1042 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1043 case ImmTyNegLo: OS << "NegLo"; break;
1044 case ImmTyNegHi: OS << "NegHi"; break;
1045 case ImmTySwizzle: OS << "Swizzle"; break;
1046 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1047 case ImmTyHigh: OS << "High"; break;
1048 case ImmTyBLGP: OS << "BLGP"; break;
1049 case ImmTyCBSZ: OS << "CBSZ"; break;
1050 case ImmTyABID: OS << "ABID"; break;
1051 case ImmTyEndpgm: OS << "Endpgm"; break;
1052 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1053 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1057 void print(raw_ostream &OS) const override {
1058 switch (Kind) {
1059 case Register:
1060 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1061 break;
1062 case Immediate:
1063 OS << '<' << getImm();
1064 if (getImmTy() != ImmTyNone) {
1065 OS << " type: "; printImmTy(OS, getImmTy());
1067 OS << " mods: " << Imm.Mods << '>';
1068 break;
1069 case Token:
1070 OS << '\'' << getToken() << '\'';
1071 break;
1072 case Expression:
1073 OS << "<expr " << *Expr << '>';
1074 break;
1078 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1079 int64_t Val, SMLoc Loc,
1080 ImmTy Type = ImmTyNone,
1081 bool IsFPImm = false) {
1082 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1083 Op->Imm.Val = Val;
1084 Op->Imm.IsFPImm = IsFPImm;
1085 Op->Imm.Kind = ImmKindTyNone;
1086 Op->Imm.Type = Type;
1087 Op->Imm.Mods = Modifiers();
1088 Op->StartLoc = Loc;
1089 Op->EndLoc = Loc;
1090 return Op;
1093 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1094 StringRef Str, SMLoc Loc,
1095 bool HasExplicitEncodingSize = true) {
1096 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1097 Res->Tok.Data = Str.data();
1098 Res->Tok.Length = Str.size();
1099 Res->StartLoc = Loc;
1100 Res->EndLoc = Loc;
1101 return Res;
1104 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1105 unsigned RegNo, SMLoc S,
1106 SMLoc E) {
1107 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1108 Op->Reg.RegNo = RegNo;
1109 Op->Reg.Mods = Modifiers();
1110 Op->StartLoc = S;
1111 Op->EndLoc = E;
1112 return Op;
1115 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1116 const class MCExpr *Expr, SMLoc S) {
1117 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1118 Op->Expr = Expr;
1119 Op->StartLoc = S;
1120 Op->EndLoc = S;
1121 return Op;
1125 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1126 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1127 return OS;
1130 //===----------------------------------------------------------------------===//
1131 // AsmParser
1132 //===----------------------------------------------------------------------===//
1134 // Holds info related to the current kernel, e.g. count of SGPRs used.
1135 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1136 // .amdgpu_hsa_kernel or at EOF.
1137 class KernelScopeInfo {
1138 int SgprIndexUnusedMin = -1;
1139 int VgprIndexUnusedMin = -1;
1140 int AgprIndexUnusedMin = -1;
1141 MCContext *Ctx = nullptr;
1142 MCSubtargetInfo const *MSTI = nullptr;
1144 void usesSgprAt(int i) {
1145 if (i >= SgprIndexUnusedMin) {
1146 SgprIndexUnusedMin = ++i;
1147 if (Ctx) {
1148 MCSymbol* const Sym =
1149 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1150 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1155 void usesVgprAt(int i) {
1156 if (i >= VgprIndexUnusedMin) {
1157 VgprIndexUnusedMin = ++i;
1158 if (Ctx) {
1159 MCSymbol* const Sym =
1160 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1161 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1162 VgprIndexUnusedMin);
1163 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1168 void usesAgprAt(int i) {
1169 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1170 if (!hasMAIInsts(*MSTI))
1171 return;
1173 if (i >= AgprIndexUnusedMin) {
1174 AgprIndexUnusedMin = ++i;
1175 if (Ctx) {
1176 MCSymbol* const Sym =
1177 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1178 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1180 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1181 MCSymbol* const vSym =
1182 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1183 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1184 VgprIndexUnusedMin);
1185 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1190 public:
1191 KernelScopeInfo() = default;
1193 void initialize(MCContext &Context) {
1194 Ctx = &Context;
1195 MSTI = Ctx->getSubtargetInfo();
1197 usesSgprAt(SgprIndexUnusedMin = -1);
1198 usesVgprAt(VgprIndexUnusedMin = -1);
1199 if (hasMAIInsts(*MSTI)) {
1200 usesAgprAt(AgprIndexUnusedMin = -1);
1204 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1205 unsigned RegWidth) {
1206 switch (RegKind) {
1207 case IS_SGPR:
1208 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209 break;
1210 case IS_AGPR:
1211 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212 break;
1213 case IS_VGPR:
1214 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1215 break;
1216 default:
1217 break;
1222 class AMDGPUAsmParser : public MCTargetAsmParser {
1223 MCAsmParser &Parser;
1225 unsigned ForcedEncodingSize = 0;
1226 bool ForcedDPP = false;
1227 bool ForcedSDWA = false;
1228 KernelScopeInfo KernelScope;
1230 /// @name Auto-generated Match Functions
1231 /// {
1233 #define GET_ASSEMBLER_HEADER
1234 #include "AMDGPUGenAsmMatcher.inc"
1236 /// }
1238 private:
1239 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1240 bool OutOfRangeError(SMRange Range);
1241 /// Calculate VGPR/SGPR blocks required for given target, reserved
1242 /// registers, and user-specified NextFreeXGPR values.
1244 /// \param Features [in] Target features, used for bug corrections.
1245 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1246 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1247 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1248 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1249 /// descriptor field, if valid.
1250 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1251 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1252 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1253 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1254 /// \param VGPRBlocks [out] Result VGPR block count.
1255 /// \param SGPRBlocks [out] Result SGPR block count.
1256 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1257 bool FlatScrUsed, bool XNACKUsed,
1258 std::optional<bool> EnableWavefrontSize32,
1259 unsigned NextFreeVGPR, SMRange VGPRRange,
1260 unsigned NextFreeSGPR, SMRange SGPRRange,
1261 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1262 bool ParseDirectiveAMDGCNTarget();
1263 bool ParseDirectiveAMDHSAKernel();
1264 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1265 bool ParseDirectiveHSACodeObjectVersion();
1266 bool ParseDirectiveHSACodeObjectISA();
1267 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1268 bool ParseDirectiveAMDKernelCodeT();
1269 // TODO: Possibly make subtargetHasRegister const.
1270 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1271 bool ParseDirectiveAMDGPUHsaKernel();
1273 bool ParseDirectiveISAVersion();
1274 bool ParseDirectiveHSAMetadata();
1275 bool ParseDirectivePALMetadataBegin();
1276 bool ParseDirectivePALMetadata();
1277 bool ParseDirectiveAMDGPULDS();
1279 /// Common code to parse out a block of text (typically YAML) between start and
1280 /// end directives.
1281 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1282 const char *AssemblerDirectiveEnd,
1283 std::string &CollectString);
1285 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1286 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1287 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1288 unsigned &RegNum, unsigned &RegWidth,
1289 bool RestoreOnFailure = false);
1290 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1291 unsigned &RegNum, unsigned &RegWidth,
1292 SmallVectorImpl<AsmToken> &Tokens);
1293 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1294 unsigned &RegWidth,
1295 SmallVectorImpl<AsmToken> &Tokens);
1296 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1297 unsigned &RegWidth,
1298 SmallVectorImpl<AsmToken> &Tokens);
1299 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1300 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1301 bool ParseRegRange(unsigned& Num, unsigned& Width);
1302 unsigned getRegularReg(RegisterKind RegKind,
1303 unsigned RegNum,
1304 unsigned RegWidth,
1305 SMLoc Loc);
1307 bool isRegister();
1308 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1309 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1310 void initializeGprCountSymbol(RegisterKind RegKind);
1311 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1312 unsigned RegWidth);
1313 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1314 bool IsAtomic);
1315 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1316 bool IsGdsHardcoded);
1318 public:
1319 enum AMDGPUMatchResultTy {
1320 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1322 enum OperandMode {
1323 OperandMode_Default,
1324 OperandMode_NSA,
1327 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1329 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1330 const MCInstrInfo &MII,
1331 const MCTargetOptions &Options)
1332 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1333 MCAsmParserExtension::Initialize(Parser);
1335 if (getFeatureBits().none()) {
1336 // Set default features.
1337 copySTI().ToggleFeature("southern-islands");
1340 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1343 // TODO: make those pre-defined variables read-only.
1344 // Currently there is none suitable machinery in the core llvm-mc for this.
1345 // MCSymbol::isRedefinable is intended for another purpose, and
1346 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1347 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1348 MCContext &Ctx = getContext();
1349 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1350 MCSymbol *Sym =
1351 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1352 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1353 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1355 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1357 } else {
1358 MCSymbol *Sym =
1359 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1360 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1361 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1362 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1363 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1364 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1366 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1367 initializeGprCountSymbol(IS_VGPR);
1368 initializeGprCountSymbol(IS_SGPR);
1369 } else
1370 KernelScope.initialize(getContext());
1374 bool hasMIMG_R128() const {
1375 return AMDGPU::hasMIMG_R128(getSTI());
1378 bool hasPackedD16() const {
1379 return AMDGPU::hasPackedD16(getSTI());
1382 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1384 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1386 bool isSI() const {
1387 return AMDGPU::isSI(getSTI());
1390 bool isCI() const {
1391 return AMDGPU::isCI(getSTI());
1394 bool isVI() const {
1395 return AMDGPU::isVI(getSTI());
1398 bool isGFX9() const {
1399 return AMDGPU::isGFX9(getSTI());
1402 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1403 bool isGFX90A() const {
1404 return AMDGPU::isGFX90A(getSTI());
1407 bool isGFX940() const {
1408 return AMDGPU::isGFX940(getSTI());
1411 bool isGFX9Plus() const {
1412 return AMDGPU::isGFX9Plus(getSTI());
1415 bool isGFX10() const {
1416 return AMDGPU::isGFX10(getSTI());
1419 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1421 bool isGFX11() const {
1422 return AMDGPU::isGFX11(getSTI());
1425 bool isGFX11Plus() const {
1426 return AMDGPU::isGFX11Plus(getSTI());
1429 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1431 bool isGFX10_BEncoding() const {
1432 return AMDGPU::isGFX10_BEncoding(getSTI());
1435 bool hasInv2PiInlineImm() const {
1436 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1439 bool hasFlatOffsets() const {
1440 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1443 bool hasArchitectedFlatScratch() const {
1444 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1447 bool hasSGPR102_SGPR103() const {
1448 return !isVI() && !isGFX9();
1451 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1453 bool hasIntClamp() const {
1454 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1457 bool hasPartialNSAEncoding() const {
1458 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1461 unsigned getNSAMaxSize() const {
1462 return AMDGPU::getNSAMaxSize(getSTI());
1465 AMDGPUTargetStreamer &getTargetStreamer() {
1466 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1467 return static_cast<AMDGPUTargetStreamer &>(TS);
1470 const MCRegisterInfo *getMRI() const {
1471 // We need this const_cast because for some reason getContext() is not const
1472 // in MCAsmParser.
1473 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1476 const MCInstrInfo *getMII() const {
1477 return &MII;
1480 const FeatureBitset &getFeatureBits() const {
1481 return getSTI().getFeatureBits();
1484 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1485 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1486 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1488 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1489 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1490 bool isForcedDPP() const { return ForcedDPP; }
1491 bool isForcedSDWA() const { return ForcedSDWA; }
1492 ArrayRef<unsigned> getMatchedVariants() const;
1493 StringRef getMatchedVariantName() const;
1495 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1496 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1497 bool RestoreOnFailure);
1498 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1499 SMLoc &EndLoc) override;
1500 OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1501 SMLoc &EndLoc) override;
1502 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1503 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1504 unsigned Kind) override;
1505 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1506 OperandVector &Operands, MCStreamer &Out,
1507 uint64_t &ErrorInfo,
1508 bool MatchingInlineAsm) override;
1509 bool ParseDirective(AsmToken DirectiveID) override;
1510 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1511 OperandMode Mode = OperandMode_Default);
1512 StringRef parseMnemonicSuffix(StringRef Name);
1513 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1514 SMLoc NameLoc, OperandVector &Operands) override;
1515 //bool ProcessInstruction(MCInst &Inst);
1517 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1519 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1521 ParseStatus
1522 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1523 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1524 std::function<bool(int64_t &)> ConvertResult = nullptr);
1526 ParseStatus parseOperandArrayWithPrefix(
1527 const char *Prefix, OperandVector &Operands,
1528 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1529 bool (*ConvertResult)(int64_t &) = nullptr);
1531 ParseStatus
1532 parseNamedBit(StringRef Name, OperandVector &Operands,
1533 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1534 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1535 ParseStatus parseCPol(OperandVector &Operands);
1536 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1537 SMLoc &StringLoc);
1539 bool isModifier();
1540 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1541 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1542 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1543 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1544 bool parseSP3NegModifier();
1545 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1546 ParseStatus parseReg(OperandVector &Operands);
1547 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1548 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1549 bool AllowImm = true);
1550 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1551 bool AllowImm = true);
1552 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1553 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1554 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1555 ParseStatus parseDfmtNfmt(int64_t &Format);
1556 ParseStatus parseUfmt(int64_t &Format);
1557 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1558 int64_t &Format);
1559 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1560 int64_t &Format);
1561 ParseStatus parseFORMAT(OperandVector &Operands);
1562 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1563 ParseStatus parseNumericFormat(int64_t &Format);
1564 ParseStatus parseFlatOffset(OperandVector &Operands);
1565 ParseStatus parseR128A16(OperandVector &Operands);
1566 ParseStatus parseBLGP(OperandVector &Operands);
1567 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1568 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1570 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1571 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1572 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1573 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1575 bool parseCnt(int64_t &IntVal);
1576 ParseStatus parseSWaitCnt(OperandVector &Operands);
1578 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1579 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1580 ParseStatus parseDepCtr(OperandVector &Operands);
1582 bool parseDelay(int64_t &Delay);
1583 ParseStatus parseSDelayALU(OperandVector &Operands);
1585 ParseStatus parseHwreg(OperandVector &Operands);
1587 private:
1588 struct OperandInfoTy {
1589 SMLoc Loc;
1590 int64_t Id;
1591 bool IsSymbolic = false;
1592 bool IsDefined = false;
1594 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1597 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1598 bool validateSendMsg(const OperandInfoTy &Msg,
1599 const OperandInfoTy &Op,
1600 const OperandInfoTy &Stream);
1602 bool parseHwregBody(OperandInfoTy &HwReg,
1603 OperandInfoTy &Offset,
1604 OperandInfoTy &Width);
1605 bool validateHwreg(const OperandInfoTy &HwReg,
1606 const OperandInfoTy &Offset,
1607 const OperandInfoTy &Width);
1609 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1610 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1611 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1613 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1614 const OperandVector &Operands) const;
1615 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1616 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1617 SMLoc getLitLoc(const OperandVector &Operands,
1618 bool SearchMandatoryLiterals = false) const;
1619 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1620 SMLoc getConstLoc(const OperandVector &Operands) const;
1621 SMLoc getInstLoc(const OperandVector &Operands) const;
1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626 bool validateSOPLiteral(const MCInst &Inst) const;
1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1629 const OperandVector &Operands);
1630 bool validateIntClampSupported(const MCInst &Inst);
1631 bool validateMIMGAtomicDMask(const MCInst &Inst);
1632 bool validateMIMGGatherDMask(const MCInst &Inst);
1633 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1634 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1635 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1636 bool validateMIMGD16(const MCInst &Inst);
1637 bool validateMIMGMSAA(const MCInst &Inst);
1638 bool validateOpSel(const MCInst &Inst);
1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640 bool validateVccOperand(unsigned Reg) const;
1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1644 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1645 bool validateAGPRLdSt(const MCInst &Inst) const;
1646 bool validateVGPRAlign(const MCInst &Inst) const;
1647 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1648 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1649 bool validateDivScale(const MCInst &Inst);
1650 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1651 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1652 const SMLoc &IDLoc);
1653 bool validateExeczVcczOperands(const OperandVector &Operands);
1654 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1655 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1656 unsigned getConstantBusLimit(unsigned Opcode) const;
1657 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1658 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1659 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1661 bool isSupportedMnemo(StringRef Mnemo,
1662 const FeatureBitset &FBS);
1663 bool isSupportedMnemo(StringRef Mnemo,
1664 const FeatureBitset &FBS,
1665 ArrayRef<unsigned> Variants);
1666 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1668 bool isId(const StringRef Id) const;
1669 bool isId(const AsmToken &Token, const StringRef Id) const;
1670 bool isToken(const AsmToken::TokenKind Kind) const;
1671 StringRef getId() const;
1672 bool trySkipId(const StringRef Id);
1673 bool trySkipId(const StringRef Pref, const StringRef Id);
1674 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1675 bool trySkipToken(const AsmToken::TokenKind Kind);
1676 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1677 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1678 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1680 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1681 AsmToken::TokenKind getTokenKind() const;
1682 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1683 bool parseExpr(OperandVector &Operands);
1684 StringRef getTokenStr() const;
1685 AsmToken peekToken(bool ShouldSkipSpace = true);
1686 AsmToken getToken() const;
1687 SMLoc getLoc() const;
1688 void lex();
1690 public:
1691 void onBeginOfFile() override;
1693 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1695 ParseStatus parseExpTgt(OperandVector &Operands);
1696 ParseStatus parseSendMsg(OperandVector &Operands);
1697 ParseStatus parseInterpSlot(OperandVector &Operands);
1698 ParseStatus parseInterpAttr(OperandVector &Operands);
1699 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1700 ParseStatus parseBoolReg(OperandVector &Operands);
1702 bool parseSwizzleOperand(int64_t &Op,
1703 const unsigned MinVal,
1704 const unsigned MaxVal,
1705 const StringRef ErrMsg,
1706 SMLoc &Loc);
1707 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1708 const unsigned MinVal,
1709 const unsigned MaxVal,
1710 const StringRef ErrMsg);
1711 ParseStatus parseSwizzle(OperandVector &Operands);
1712 bool parseSwizzleOffset(int64_t &Imm);
1713 bool parseSwizzleMacro(int64_t &Imm);
1714 bool parseSwizzleQuadPerm(int64_t &Imm);
1715 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1716 bool parseSwizzleBroadcast(int64_t &Imm);
1717 bool parseSwizzleSwap(int64_t &Imm);
1718 bool parseSwizzleReverse(int64_t &Imm);
1720 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1721 int64_t parseGPRIdxMacro();
1723 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1724 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1726 ParseStatus parseOModSI(OperandVector &Operands);
1728 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1729 OptionalImmIndexMap &OptionalIdx);
1730 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1731 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1732 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1733 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1734 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1735 OptionalImmIndexMap &OptionalIdx);
1736 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1737 OptionalImmIndexMap &OptionalIdx);
1739 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1740 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1741 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1743 bool parseDimId(unsigned &Encoding);
1744 ParseStatus parseDim(OperandVector &Operands);
1745 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1746 ParseStatus parseDPP8(OperandVector &Operands);
1747 ParseStatus parseDPPCtrl(OperandVector &Operands);
1748 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1749 int64_t parseDPPCtrlSel(StringRef Ctrl);
1750 int64_t parseDPPCtrlPerm();
1751 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1752 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1753 cvtDPP(Inst, Operands, true);
1755 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1756 bool IsDPP8 = false);
1757 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1758 cvtVOP3DPP(Inst, Operands, true);
1761 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1762 AMDGPUOperand::ImmTy Type);
1763 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1764 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1765 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1766 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1767 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1768 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1769 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1770 uint64_t BasicInstType,
1771 bool SkipDstVcc = false,
1772 bool SkipSrcVcc = false);
1774 ParseStatus parseEndpgm(OperandVector &Operands);
1776 ParseStatus parseVOPD(OperandVector &Operands);
1779 } // end anonymous namespace
1781 // May be called with integer type with equivalent bitwidth.
1782 static const fltSemantics *getFltSemantics(unsigned Size) {
1783 switch (Size) {
1784 case 4:
1785 return &APFloat::IEEEsingle();
1786 case 8:
1787 return &APFloat::IEEEdouble();
1788 case 2:
1789 return &APFloat::IEEEhalf();
1790 default:
1791 llvm_unreachable("unsupported fp type");
1795 static const fltSemantics *getFltSemantics(MVT VT) {
1796 return getFltSemantics(VT.getSizeInBits() / 8);
1799 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1800 switch (OperandType) {
1801 case AMDGPU::OPERAND_REG_IMM_INT32:
1802 case AMDGPU::OPERAND_REG_IMM_FP32:
1803 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1804 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1805 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1806 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1807 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1808 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1809 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1810 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1811 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1812 case AMDGPU::OPERAND_KIMM32:
1813 return &APFloat::IEEEsingle();
1814 case AMDGPU::OPERAND_REG_IMM_INT64:
1815 case AMDGPU::OPERAND_REG_IMM_FP64:
1816 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1817 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1818 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1819 return &APFloat::IEEEdouble();
1820 case AMDGPU::OPERAND_REG_IMM_INT16:
1821 case AMDGPU::OPERAND_REG_IMM_FP16:
1822 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1823 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1824 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1825 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1826 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1827 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1828 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1829 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1830 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1831 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1832 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1833 case AMDGPU::OPERAND_KIMM16:
1834 return &APFloat::IEEEhalf();
1835 default:
1836 llvm_unreachable("unsupported fp type");
1840 //===----------------------------------------------------------------------===//
1841 // Operand
1842 //===----------------------------------------------------------------------===//
1844 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1845 bool Lost;
1847 // Convert literal to single precision
1848 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1849 APFloat::rmNearestTiesToEven,
1850 &Lost);
1851 // We allow precision lost but not overflow or underflow
1852 if (Status != APFloat::opOK &&
1853 Lost &&
1854 ((Status & APFloat::opOverflow) != 0 ||
1855 (Status & APFloat::opUnderflow) != 0)) {
1856 return false;
1859 return true;
1862 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1863 return isUIntN(Size, Val) || isIntN(Size, Val);
1866 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1867 if (VT.getScalarType() == MVT::i16) {
1868 // FP immediate values are broken.
1869 return isInlinableIntLiteral(Val);
1872 // f16/v2f16 operands work correctly for all values.
1873 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1876 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1878 // This is a hack to enable named inline values like
1879 // shared_base with both 32-bit and 64-bit operands.
1880 // Note that these values are defined as
1881 // 32-bit operands only.
1882 if (isInlineValue()) {
1883 return true;
1886 if (!isImmTy(ImmTyNone)) {
1887 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1888 return false;
1890 // TODO: We should avoid using host float here. It would be better to
1891 // check the float bit values which is what a few other places do.
1892 // We've had bot failures before due to weird NaN support on mips hosts.
1894 APInt Literal(64, Imm.Val);
1896 if (Imm.IsFPImm) { // We got fp literal token
1897 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1898 return AMDGPU::isInlinableLiteral64(Imm.Val,
1899 AsmParser->hasInv2PiInlineImm());
1902 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1903 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1904 return false;
1906 if (type.getScalarSizeInBits() == 16) {
1907 return isInlineableLiteralOp16(
1908 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1909 type, AsmParser->hasInv2PiInlineImm());
1912 // Check if single precision literal is inlinable
1913 return AMDGPU::isInlinableLiteral32(
1914 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1915 AsmParser->hasInv2PiInlineImm());
1918 // We got int literal token.
1919 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1920 return AMDGPU::isInlinableLiteral64(Imm.Val,
1921 AsmParser->hasInv2PiInlineImm());
1924 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1925 return false;
1928 if (type.getScalarSizeInBits() == 16) {
1929 return isInlineableLiteralOp16(
1930 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1931 type, AsmParser->hasInv2PiInlineImm());
1934 return AMDGPU::isInlinableLiteral32(
1935 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1936 AsmParser->hasInv2PiInlineImm());
1939 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1940 // Check that this immediate can be added as literal
1941 if (!isImmTy(ImmTyNone)) {
1942 return false;
1945 if (!Imm.IsFPImm) {
1946 // We got int literal token.
1948 if (type == MVT::f64 && hasFPModifiers()) {
1949 // Cannot apply fp modifiers to int literals preserving the same semantics
1950 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1951 // disable these cases.
1952 return false;
1955 unsigned Size = type.getSizeInBits();
1956 if (Size == 64)
1957 Size = 32;
1959 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1960 // types.
1961 return isSafeTruncation(Imm.Val, Size);
1964 // We got fp literal token
1965 if (type == MVT::f64) { // Expected 64-bit fp operand
1966 // We would set low 64-bits of literal to zeroes but we accept this literals
1967 return true;
1970 if (type == MVT::i64) { // Expected 64-bit int operand
1971 // We don't allow fp literals in 64-bit integer instructions. It is
1972 // unclear how we should encode them.
1973 return false;
1976 // We allow fp literals with f16x2 operands assuming that the specified
1977 // literal goes into the lower half and the upper half is zero. We also
1978 // require that the literal may be losslessly converted to f16.
1979 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1980 (type == MVT::v2i16)? MVT::i16 :
1981 (type == MVT::v2f32)? MVT::f32 : type;
1983 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1984 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1987 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1988 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1991 bool AMDGPUOperand::isVRegWithInputMods() const {
1992 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1993 // GFX90A allows DPP on 64-bit operands.
1994 (isRegClass(AMDGPU::VReg_64RegClassID) &&
1995 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1998 bool AMDGPUOperand::isT16VRegWithInputMods() const {
1999 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2002 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2003 if (AsmParser->isVI())
2004 return isVReg32();
2005 else if (AsmParser->isGFX9Plus())
2006 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2007 else
2008 return false;
2011 bool AMDGPUOperand::isSDWAFP16Operand() const {
2012 return isSDWAOperand(MVT::f16);
2015 bool AMDGPUOperand::isSDWAFP32Operand() const {
2016 return isSDWAOperand(MVT::f32);
2019 bool AMDGPUOperand::isSDWAInt16Operand() const {
2020 return isSDWAOperand(MVT::i16);
2023 bool AMDGPUOperand::isSDWAInt32Operand() const {
2024 return isSDWAOperand(MVT::i32);
2027 bool AMDGPUOperand::isBoolReg() const {
2028 auto FB = AsmParser->getFeatureBits();
2029 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2030 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2033 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2035 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2036 assert(Size == 2 || Size == 4 || Size == 8);
2038 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2040 if (Imm.Mods.Abs) {
2041 Val &= ~FpSignMask;
2043 if (Imm.Mods.Neg) {
2044 Val ^= FpSignMask;
2047 return Val;
2050 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2051 if (isExpr()) {
2052 Inst.addOperand(MCOperand::createExpr(Expr));
2053 return;
2056 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2057 Inst.getNumOperands())) {
2058 addLiteralImmOperand(Inst, Imm.Val,
2059 ApplyModifiers &
2060 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2061 } else {
2062 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2063 Inst.addOperand(MCOperand::createImm(Imm.Val));
2064 setImmKindNone();
2068 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2069 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2070 auto OpNum = Inst.getNumOperands();
2071 // Check that this operand accepts literals
2072 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2074 if (ApplyModifiers) {
2075 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2076 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2077 Val = applyInputFPModifiers(Val, Size);
2080 APInt Literal(64, Val);
2081 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2083 if (Imm.IsFPImm) { // We got fp literal token
2084 switch (OpTy) {
2085 case AMDGPU::OPERAND_REG_IMM_INT64:
2086 case AMDGPU::OPERAND_REG_IMM_FP64:
2087 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2088 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2089 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2090 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2091 AsmParser->hasInv2PiInlineImm())) {
2092 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2093 setImmKindConst();
2094 return;
2097 // Non-inlineable
2098 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2099 // For fp operands we check if low 32 bits are zeros
2100 if (Literal.getLoBits(32) != 0) {
2101 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2102 "Can't encode literal as exact 64-bit floating-point operand. "
2103 "Low 32-bits will be set to zero");
2106 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2107 setImmKindLiteral();
2108 return;
2111 // We don't allow fp literals in 64-bit integer instructions. It is
2112 // unclear how we should encode them. This case should be checked earlier
2113 // in predicate methods (isLiteralImm())
2114 llvm_unreachable("fp literal in 64-bit integer instruction.");
2116 case AMDGPU::OPERAND_REG_IMM_INT32:
2117 case AMDGPU::OPERAND_REG_IMM_FP32:
2118 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2119 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2120 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2121 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2122 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2123 case AMDGPU::OPERAND_REG_IMM_INT16:
2124 case AMDGPU::OPERAND_REG_IMM_FP16:
2125 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2126 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2127 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2128 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2129 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2130 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2131 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2132 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2133 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2134 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2135 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2136 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2137 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2138 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2139 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2140 case AMDGPU::OPERAND_KIMM32:
2141 case AMDGPU::OPERAND_KIMM16: {
2142 bool lost;
2143 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2144 // Convert literal to single precision
2145 FPLiteral.convert(*getOpFltSemantics(OpTy),
2146 APFloat::rmNearestTiesToEven, &lost);
2147 // We allow precision lost but not overflow or underflow. This should be
2148 // checked earlier in isLiteralImm()
2150 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2151 Inst.addOperand(MCOperand::createImm(ImmVal));
2152 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2153 setImmKindMandatoryLiteral();
2154 } else {
2155 setImmKindLiteral();
2157 return;
2159 default:
2160 llvm_unreachable("invalid operand size");
2163 return;
2166 // We got int literal token.
2167 // Only sign extend inline immediates.
2168 switch (OpTy) {
2169 case AMDGPU::OPERAND_REG_IMM_INT32:
2170 case AMDGPU::OPERAND_REG_IMM_FP32:
2171 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2172 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2173 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2174 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2175 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2176 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2177 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2178 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2179 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2180 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2181 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2182 if (isSafeTruncation(Val, 32) &&
2183 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2184 AsmParser->hasInv2PiInlineImm())) {
2185 Inst.addOperand(MCOperand::createImm(Val));
2186 setImmKindConst();
2187 return;
2190 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2191 setImmKindLiteral();
2192 return;
2194 case AMDGPU::OPERAND_REG_IMM_INT64:
2195 case AMDGPU::OPERAND_REG_IMM_FP64:
2196 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2197 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2198 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2199 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2200 Inst.addOperand(MCOperand::createImm(Val));
2201 setImmKindConst();
2202 return;
2205 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2206 setImmKindLiteral();
2207 return;
2209 case AMDGPU::OPERAND_REG_IMM_INT16:
2210 case AMDGPU::OPERAND_REG_IMM_FP16:
2211 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2212 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2213 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2214 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2215 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2216 if (isSafeTruncation(Val, 16) &&
2217 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2218 AsmParser->hasInv2PiInlineImm())) {
2219 Inst.addOperand(MCOperand::createImm(Val));
2220 setImmKindConst();
2221 return;
2224 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2225 setImmKindLiteral();
2226 return;
2228 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2229 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2230 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2231 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2232 assert(isSafeTruncation(Val, 16));
2233 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2234 AsmParser->hasInv2PiInlineImm()));
2236 Inst.addOperand(MCOperand::createImm(Val));
2237 return;
2239 case AMDGPU::OPERAND_KIMM32:
2240 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2241 setImmKindMandatoryLiteral();
2242 return;
2243 case AMDGPU::OPERAND_KIMM16:
2244 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2245 setImmKindMandatoryLiteral();
2246 return;
2247 default:
2248 llvm_unreachable("invalid operand size");
2252 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2253 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2256 bool AMDGPUOperand::isInlineValue() const {
2257 return isRegKind() && ::isInlineValue(getReg());
2260 //===----------------------------------------------------------------------===//
2261 // AsmParser
2262 //===----------------------------------------------------------------------===//
2264 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2265 if (Is == IS_VGPR) {
2266 switch (RegWidth) {
2267 default: return -1;
2268 case 32:
2269 return AMDGPU::VGPR_32RegClassID;
2270 case 64:
2271 return AMDGPU::VReg_64RegClassID;
2272 case 96:
2273 return AMDGPU::VReg_96RegClassID;
2274 case 128:
2275 return AMDGPU::VReg_128RegClassID;
2276 case 160:
2277 return AMDGPU::VReg_160RegClassID;
2278 case 192:
2279 return AMDGPU::VReg_192RegClassID;
2280 case 224:
2281 return AMDGPU::VReg_224RegClassID;
2282 case 256:
2283 return AMDGPU::VReg_256RegClassID;
2284 case 288:
2285 return AMDGPU::VReg_288RegClassID;
2286 case 320:
2287 return AMDGPU::VReg_320RegClassID;
2288 case 352:
2289 return AMDGPU::VReg_352RegClassID;
2290 case 384:
2291 return AMDGPU::VReg_384RegClassID;
2292 case 512:
2293 return AMDGPU::VReg_512RegClassID;
2294 case 1024:
2295 return AMDGPU::VReg_1024RegClassID;
2297 } else if (Is == IS_TTMP) {
2298 switch (RegWidth) {
2299 default: return -1;
2300 case 32:
2301 return AMDGPU::TTMP_32RegClassID;
2302 case 64:
2303 return AMDGPU::TTMP_64RegClassID;
2304 case 128:
2305 return AMDGPU::TTMP_128RegClassID;
2306 case 256:
2307 return AMDGPU::TTMP_256RegClassID;
2308 case 512:
2309 return AMDGPU::TTMP_512RegClassID;
2311 } else if (Is == IS_SGPR) {
2312 switch (RegWidth) {
2313 default: return -1;
2314 case 32:
2315 return AMDGPU::SGPR_32RegClassID;
2316 case 64:
2317 return AMDGPU::SGPR_64RegClassID;
2318 case 96:
2319 return AMDGPU::SGPR_96RegClassID;
2320 case 128:
2321 return AMDGPU::SGPR_128RegClassID;
2322 case 160:
2323 return AMDGPU::SGPR_160RegClassID;
2324 case 192:
2325 return AMDGPU::SGPR_192RegClassID;
2326 case 224:
2327 return AMDGPU::SGPR_224RegClassID;
2328 case 256:
2329 return AMDGPU::SGPR_256RegClassID;
2330 case 288:
2331 return AMDGPU::SGPR_288RegClassID;
2332 case 320:
2333 return AMDGPU::SGPR_320RegClassID;
2334 case 352:
2335 return AMDGPU::SGPR_352RegClassID;
2336 case 384:
2337 return AMDGPU::SGPR_384RegClassID;
2338 case 512:
2339 return AMDGPU::SGPR_512RegClassID;
2341 } else if (Is == IS_AGPR) {
2342 switch (RegWidth) {
2343 default: return -1;
2344 case 32:
2345 return AMDGPU::AGPR_32RegClassID;
2346 case 64:
2347 return AMDGPU::AReg_64RegClassID;
2348 case 96:
2349 return AMDGPU::AReg_96RegClassID;
2350 case 128:
2351 return AMDGPU::AReg_128RegClassID;
2352 case 160:
2353 return AMDGPU::AReg_160RegClassID;
2354 case 192:
2355 return AMDGPU::AReg_192RegClassID;
2356 case 224:
2357 return AMDGPU::AReg_224RegClassID;
2358 case 256:
2359 return AMDGPU::AReg_256RegClassID;
2360 case 288:
2361 return AMDGPU::AReg_288RegClassID;
2362 case 320:
2363 return AMDGPU::AReg_320RegClassID;
2364 case 352:
2365 return AMDGPU::AReg_352RegClassID;
2366 case 384:
2367 return AMDGPU::AReg_384RegClassID;
2368 case 512:
2369 return AMDGPU::AReg_512RegClassID;
2370 case 1024:
2371 return AMDGPU::AReg_1024RegClassID;
2374 return -1;
2377 static unsigned getSpecialRegForName(StringRef RegName) {
2378 return StringSwitch<unsigned>(RegName)
2379 .Case("exec", AMDGPU::EXEC)
2380 .Case("vcc", AMDGPU::VCC)
2381 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2382 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2383 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2384 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2385 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2386 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2387 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2388 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2389 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2390 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2391 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2392 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2393 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2394 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2395 .Case("m0", AMDGPU::M0)
2396 .Case("vccz", AMDGPU::SRC_VCCZ)
2397 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2398 .Case("execz", AMDGPU::SRC_EXECZ)
2399 .Case("src_execz", AMDGPU::SRC_EXECZ)
2400 .Case("scc", AMDGPU::SRC_SCC)
2401 .Case("src_scc", AMDGPU::SRC_SCC)
2402 .Case("tba", AMDGPU::TBA)
2403 .Case("tma", AMDGPU::TMA)
2404 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2405 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2406 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2407 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2408 .Case("vcc_lo", AMDGPU::VCC_LO)
2409 .Case("vcc_hi", AMDGPU::VCC_HI)
2410 .Case("exec_lo", AMDGPU::EXEC_LO)
2411 .Case("exec_hi", AMDGPU::EXEC_HI)
2412 .Case("tma_lo", AMDGPU::TMA_LO)
2413 .Case("tma_hi", AMDGPU::TMA_HI)
2414 .Case("tba_lo", AMDGPU::TBA_LO)
2415 .Case("tba_hi", AMDGPU::TBA_HI)
2416 .Case("pc", AMDGPU::PC_REG)
2417 .Case("null", AMDGPU::SGPR_NULL)
2418 .Default(AMDGPU::NoRegister);
2421 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2422 SMLoc &EndLoc, bool RestoreOnFailure) {
2423 auto R = parseRegister();
2424 if (!R) return true;
2425 assert(R->isReg());
2426 RegNo = R->getReg();
2427 StartLoc = R->getStartLoc();
2428 EndLoc = R->getEndLoc();
2429 return false;
2432 bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2433 SMLoc &EndLoc) {
2434 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2437 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2438 SMLoc &StartLoc,
2439 SMLoc &EndLoc) {
2440 bool Result =
2441 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2442 bool PendingErrors = getParser().hasPendingError();
2443 getParser().clearPendingErrors();
2444 if (PendingErrors)
2445 return MatchOperand_ParseFail;
2446 if (Result)
2447 return MatchOperand_NoMatch;
2448 return MatchOperand_Success;
2451 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2452 RegisterKind RegKind, unsigned Reg1,
2453 SMLoc Loc) {
2454 switch (RegKind) {
2455 case IS_SPECIAL:
2456 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2457 Reg = AMDGPU::EXEC;
2458 RegWidth = 64;
2459 return true;
2461 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2462 Reg = AMDGPU::FLAT_SCR;
2463 RegWidth = 64;
2464 return true;
2466 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2467 Reg = AMDGPU::XNACK_MASK;
2468 RegWidth = 64;
2469 return true;
2471 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2472 Reg = AMDGPU::VCC;
2473 RegWidth = 64;
2474 return true;
2476 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2477 Reg = AMDGPU::TBA;
2478 RegWidth = 64;
2479 return true;
2481 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2482 Reg = AMDGPU::TMA;
2483 RegWidth = 64;
2484 return true;
2486 Error(Loc, "register does not fit in the list");
2487 return false;
2488 case IS_VGPR:
2489 case IS_SGPR:
2490 case IS_AGPR:
2491 case IS_TTMP:
2492 if (Reg1 != Reg + RegWidth / 32) {
2493 Error(Loc, "registers in a list must have consecutive indices");
2494 return false;
2496 RegWidth += 32;
2497 return true;
2498 default:
2499 llvm_unreachable("unexpected register kind");
2503 struct RegInfo {
2504 StringLiteral Name;
2505 RegisterKind Kind;
2508 static constexpr RegInfo RegularRegisters[] = {
2509 {{"v"}, IS_VGPR},
2510 {{"s"}, IS_SGPR},
2511 {{"ttmp"}, IS_TTMP},
2512 {{"acc"}, IS_AGPR},
2513 {{"a"}, IS_AGPR},
2516 static bool isRegularReg(RegisterKind Kind) {
2517 return Kind == IS_VGPR ||
2518 Kind == IS_SGPR ||
2519 Kind == IS_TTMP ||
2520 Kind == IS_AGPR;
2523 static const RegInfo* getRegularRegInfo(StringRef Str) {
2524 for (const RegInfo &Reg : RegularRegisters)
2525 if (Str.startswith(Reg.Name))
2526 return &Reg;
2527 return nullptr;
2530 static bool getRegNum(StringRef Str, unsigned& Num) {
2531 return !Str.getAsInteger(10, Num);
2534 bool
2535 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2536 const AsmToken &NextToken) const {
2538 // A list of consecutive registers: [s0,s1,s2,s3]
2539 if (Token.is(AsmToken::LBrac))
2540 return true;
2542 if (!Token.is(AsmToken::Identifier))
2543 return false;
2545 // A single register like s0 or a range of registers like s[0:1]
2547 StringRef Str = Token.getString();
2548 const RegInfo *Reg = getRegularRegInfo(Str);
2549 if (Reg) {
2550 StringRef RegName = Reg->Name;
2551 StringRef RegSuffix = Str.substr(RegName.size());
2552 if (!RegSuffix.empty()) {
2553 unsigned Num;
2554 // A single register with an index: rXX
2555 if (getRegNum(RegSuffix, Num))
2556 return true;
2557 } else {
2558 // A range of registers: r[XX:YY].
2559 if (NextToken.is(AsmToken::LBrac))
2560 return true;
2564 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2567 bool
2568 AMDGPUAsmParser::isRegister()
2570 return isRegister(getToken(), peekToken());
2573 unsigned
2574 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2575 unsigned RegNum,
2576 unsigned RegWidth,
2577 SMLoc Loc) {
2579 assert(isRegularReg(RegKind));
2581 unsigned AlignSize = 1;
2582 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2583 // SGPR and TTMP registers must be aligned.
2584 // Max required alignment is 4 dwords.
2585 AlignSize = std::min(RegWidth / 32, 4u);
2588 if (RegNum % AlignSize != 0) {
2589 Error(Loc, "invalid register alignment");
2590 return AMDGPU::NoRegister;
2593 unsigned RegIdx = RegNum / AlignSize;
2594 int RCID = getRegClass(RegKind, RegWidth);
2595 if (RCID == -1) {
2596 Error(Loc, "invalid or unsupported register size");
2597 return AMDGPU::NoRegister;
2600 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2601 const MCRegisterClass RC = TRI->getRegClass(RCID);
2602 if (RegIdx >= RC.getNumRegs()) {
2603 Error(Loc, "register index is out of range");
2604 return AMDGPU::NoRegister;
2607 return RC.getRegister(RegIdx);
2610 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2611 int64_t RegLo, RegHi;
2612 if (!skipToken(AsmToken::LBrac, "missing register index"))
2613 return false;
2615 SMLoc FirstIdxLoc = getLoc();
2616 SMLoc SecondIdxLoc;
2618 if (!parseExpr(RegLo))
2619 return false;
2621 if (trySkipToken(AsmToken::Colon)) {
2622 SecondIdxLoc = getLoc();
2623 if (!parseExpr(RegHi))
2624 return false;
2625 } else {
2626 RegHi = RegLo;
2629 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2630 return false;
2632 if (!isUInt<32>(RegLo)) {
2633 Error(FirstIdxLoc, "invalid register index");
2634 return false;
2637 if (!isUInt<32>(RegHi)) {
2638 Error(SecondIdxLoc, "invalid register index");
2639 return false;
2642 if (RegLo > RegHi) {
2643 Error(FirstIdxLoc, "first register index should not exceed second index");
2644 return false;
2647 Num = static_cast<unsigned>(RegLo);
2648 RegWidth = 32 * ((RegHi - RegLo) + 1);
2649 return true;
2652 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2653 unsigned &RegNum, unsigned &RegWidth,
2654 SmallVectorImpl<AsmToken> &Tokens) {
2655 assert(isToken(AsmToken::Identifier));
2656 unsigned Reg = getSpecialRegForName(getTokenStr());
2657 if (Reg) {
2658 RegNum = 0;
2659 RegWidth = 32;
2660 RegKind = IS_SPECIAL;
2661 Tokens.push_back(getToken());
2662 lex(); // skip register name
2664 return Reg;
2667 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2668 unsigned &RegNum, unsigned &RegWidth,
2669 SmallVectorImpl<AsmToken> &Tokens) {
2670 assert(isToken(AsmToken::Identifier));
2671 StringRef RegName = getTokenStr();
2672 auto Loc = getLoc();
2674 const RegInfo *RI = getRegularRegInfo(RegName);
2675 if (!RI) {
2676 Error(Loc, "invalid register name");
2677 return AMDGPU::NoRegister;
2680 Tokens.push_back(getToken());
2681 lex(); // skip register name
2683 RegKind = RI->Kind;
2684 StringRef RegSuffix = RegName.substr(RI->Name.size());
2685 if (!RegSuffix.empty()) {
2686 // Single 32-bit register: vXX.
2687 if (!getRegNum(RegSuffix, RegNum)) {
2688 Error(Loc, "invalid register index");
2689 return AMDGPU::NoRegister;
2691 RegWidth = 32;
2692 } else {
2693 // Range of registers: v[XX:YY]. ":YY" is optional.
2694 if (!ParseRegRange(RegNum, RegWidth))
2695 return AMDGPU::NoRegister;
2698 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2701 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2702 unsigned &RegWidth,
2703 SmallVectorImpl<AsmToken> &Tokens) {
2704 unsigned Reg = AMDGPU::NoRegister;
2705 auto ListLoc = getLoc();
2707 if (!skipToken(AsmToken::LBrac,
2708 "expected a register or a list of registers")) {
2709 return AMDGPU::NoRegister;
2712 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2714 auto Loc = getLoc();
2715 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2716 return AMDGPU::NoRegister;
2717 if (RegWidth != 32) {
2718 Error(Loc, "expected a single 32-bit register");
2719 return AMDGPU::NoRegister;
2722 for (; trySkipToken(AsmToken::Comma); ) {
2723 RegisterKind NextRegKind;
2724 unsigned NextReg, NextRegNum, NextRegWidth;
2725 Loc = getLoc();
2727 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2728 NextRegNum, NextRegWidth,
2729 Tokens)) {
2730 return AMDGPU::NoRegister;
2732 if (NextRegWidth != 32) {
2733 Error(Loc, "expected a single 32-bit register");
2734 return AMDGPU::NoRegister;
2736 if (NextRegKind != RegKind) {
2737 Error(Loc, "registers in a list must be of the same kind");
2738 return AMDGPU::NoRegister;
2740 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2741 return AMDGPU::NoRegister;
2744 if (!skipToken(AsmToken::RBrac,
2745 "expected a comma or a closing square bracket")) {
2746 return AMDGPU::NoRegister;
2749 if (isRegularReg(RegKind))
2750 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2752 return Reg;
2755 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2756 unsigned &RegNum, unsigned &RegWidth,
2757 SmallVectorImpl<AsmToken> &Tokens) {
2758 auto Loc = getLoc();
2759 Reg = AMDGPU::NoRegister;
2761 if (isToken(AsmToken::Identifier)) {
2762 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2763 if (Reg == AMDGPU::NoRegister)
2764 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2765 } else {
2766 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2769 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2770 if (Reg == AMDGPU::NoRegister) {
2771 assert(Parser.hasPendingError());
2772 return false;
2775 if (!subtargetHasRegister(*TRI, Reg)) {
2776 if (Reg == AMDGPU::SGPR_NULL) {
2777 Error(Loc, "'null' operand is not supported on this GPU");
2778 } else {
2779 Error(Loc, "register not available on this GPU");
2781 return false;
2784 return true;
2787 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2788 unsigned &RegNum, unsigned &RegWidth,
2789 bool RestoreOnFailure /*=false*/) {
2790 Reg = AMDGPU::NoRegister;
2792 SmallVector<AsmToken, 1> Tokens;
2793 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2794 if (RestoreOnFailure) {
2795 while (!Tokens.empty()) {
2796 getLexer().UnLex(Tokens.pop_back_val());
2799 return true;
2801 return false;
2804 std::optional<StringRef>
2805 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2806 switch (RegKind) {
2807 case IS_VGPR:
2808 return StringRef(".amdgcn.next_free_vgpr");
2809 case IS_SGPR:
2810 return StringRef(".amdgcn.next_free_sgpr");
2811 default:
2812 return std::nullopt;
2816 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2817 auto SymbolName = getGprCountSymbolName(RegKind);
2818 assert(SymbolName && "initializing invalid register kind");
2819 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2820 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2823 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2824 unsigned DwordRegIndex,
2825 unsigned RegWidth) {
2826 // Symbols are only defined for GCN targets
2827 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2828 return true;
2830 auto SymbolName = getGprCountSymbolName(RegKind);
2831 if (!SymbolName)
2832 return true;
2833 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2835 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2836 int64_t OldCount;
2838 if (!Sym->isVariable())
2839 return !Error(getLoc(),
2840 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2841 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2842 return !Error(
2843 getLoc(),
2844 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2846 if (OldCount <= NewMax)
2847 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2849 return true;
2852 std::unique_ptr<AMDGPUOperand>
2853 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2854 const auto &Tok = getToken();
2855 SMLoc StartLoc = Tok.getLoc();
2856 SMLoc EndLoc = Tok.getEndLoc();
2857 RegisterKind RegKind;
2858 unsigned Reg, RegNum, RegWidth;
2860 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2861 return nullptr;
2863 if (isHsaAbiVersion3AndAbove(&getSTI())) {
2864 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2865 return nullptr;
2866 } else
2867 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2868 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2871 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2872 bool HasSP3AbsModifier) {
2873 // TODO: add syntactic sugar for 1/(2*PI)
2875 if (isRegister())
2876 return ParseStatus::NoMatch;
2877 assert(!isModifier());
2879 const auto& Tok = getToken();
2880 const auto& NextTok = peekToken();
2881 bool IsReal = Tok.is(AsmToken::Real);
2882 SMLoc S = getLoc();
2883 bool Negate = false;
2885 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2886 lex();
2887 IsReal = true;
2888 Negate = true;
2891 if (IsReal) {
2892 // Floating-point expressions are not supported.
2893 // Can only allow floating-point literals with an
2894 // optional sign.
2896 StringRef Num = getTokenStr();
2897 lex();
2899 APFloat RealVal(APFloat::IEEEdouble());
2900 auto roundMode = APFloat::rmNearestTiesToEven;
2901 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2902 return ParseStatus::Failure;
2903 if (Negate)
2904 RealVal.changeSign();
2906 Operands.push_back(
2907 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2908 AMDGPUOperand::ImmTyNone, true));
2910 return ParseStatus::Success;
2912 } else {
2913 int64_t IntVal;
2914 const MCExpr *Expr;
2915 SMLoc S = getLoc();
2917 if (HasSP3AbsModifier) {
2918 // This is a workaround for handling expressions
2919 // as arguments of SP3 'abs' modifier, for example:
2920 // |1.0|
2921 // |-1|
2922 // |1+x|
2923 // This syntax is not compatible with syntax of standard
2924 // MC expressions (due to the trailing '|').
2925 SMLoc EndLoc;
2926 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2927 return ParseStatus::Failure;
2928 } else {
2929 if (Parser.parseExpression(Expr))
2930 return ParseStatus::Failure;
2933 if (Expr->evaluateAsAbsolute(IntVal)) {
2934 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2935 } else {
2936 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2939 return ParseStatus::Success;
2942 return ParseStatus::NoMatch;
2945 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2946 if (!isRegister())
2947 return ParseStatus::NoMatch;
2949 if (auto R = parseRegister()) {
2950 assert(R->isReg());
2951 Operands.push_back(std::move(R));
2952 return ParseStatus::Success;
2954 return ParseStatus::Failure;
2957 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
2958 bool HasSP3AbsMod) {
2959 ParseStatus Res = parseReg(Operands);
2960 if (!Res.isNoMatch())
2961 return Res;
2962 if (isModifier())
2963 return ParseStatus::NoMatch;
2964 return parseImm(Operands, HasSP3AbsMod);
2967 bool
2968 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2969 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2970 const auto &str = Token.getString();
2971 return str == "abs" || str == "neg" || str == "sext";
2973 return false;
2976 bool
2977 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2978 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2981 bool
2982 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2983 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2986 bool
2987 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2988 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2991 // Check if this is an operand modifier or an opcode modifier
2992 // which may look like an expression but it is not. We should
2993 // avoid parsing these modifiers as expressions. Currently
2994 // recognized sequences are:
2995 // |...|
2996 // abs(...)
2997 // neg(...)
2998 // sext(...)
2999 // -reg
3000 // -|...|
3001 // -abs(...)
3002 // name:...
3004 bool
3005 AMDGPUAsmParser::isModifier() {
3007 AsmToken Tok = getToken();
3008 AsmToken NextToken[2];
3009 peekTokens(NextToken);
3011 return isOperandModifier(Tok, NextToken[0]) ||
3012 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3013 isOpcodeModifierWithVal(Tok, NextToken[0]);
3016 // Check if the current token is an SP3 'neg' modifier.
3017 // Currently this modifier is allowed in the following context:
3019 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3020 // 2. Before an 'abs' modifier: -abs(...)
3021 // 3. Before an SP3 'abs' modifier: -|...|
3023 // In all other cases "-" is handled as a part
3024 // of an expression that follows the sign.
3026 // Note: When "-" is followed by an integer literal,
3027 // this is interpreted as integer negation rather
3028 // than a floating-point NEG modifier applied to N.
3029 // Beside being contr-intuitive, such use of floating-point
3030 // NEG modifier would have resulted in different meaning
3031 // of integer literals used with VOP1/2/C and VOP3,
3032 // for example:
3033 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3034 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3035 // Negative fp literals with preceding "-" are
3036 // handled likewise for uniformity
3038 bool
3039 AMDGPUAsmParser::parseSP3NegModifier() {
3041 AsmToken NextToken[2];
3042 peekTokens(NextToken);
3044 if (isToken(AsmToken::Minus) &&
3045 (isRegister(NextToken[0], NextToken[1]) ||
3046 NextToken[0].is(AsmToken::Pipe) ||
3047 isId(NextToken[0], "abs"))) {
3048 lex();
3049 return true;
3052 return false;
3055 ParseStatus
3056 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3057 bool AllowImm) {
3058 bool Neg, SP3Neg;
3059 bool Abs, SP3Abs;
3060 SMLoc Loc;
3062 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3063 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3064 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3066 SP3Neg = parseSP3NegModifier();
3068 Loc = getLoc();
3069 Neg = trySkipId("neg");
3070 if (Neg && SP3Neg)
3071 return Error(Loc, "expected register or immediate");
3072 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3073 return ParseStatus::Failure;
3075 Abs = trySkipId("abs");
3076 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3077 return ParseStatus::Failure;
3079 Loc = getLoc();
3080 SP3Abs = trySkipToken(AsmToken::Pipe);
3081 if (Abs && SP3Abs)
3082 return Error(Loc, "expected register or immediate");
3084 ParseStatus Res;
3085 if (AllowImm) {
3086 Res = parseRegOrImm(Operands, SP3Abs);
3087 } else {
3088 Res = parseReg(Operands);
3090 if (!Res.isSuccess())
3091 return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
3093 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3094 return ParseStatus::Failure;
3095 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3096 return ParseStatus::Failure;
3097 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3098 return ParseStatus::Failure;
3100 AMDGPUOperand::Modifiers Mods;
3101 Mods.Abs = Abs || SP3Abs;
3102 Mods.Neg = Neg || SP3Neg;
3104 if (Mods.hasFPModifiers()) {
3105 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3106 if (Op.isExpr())
3107 return Error(Op.getStartLoc(), "expected an absolute expression");
3108 Op.setModifiers(Mods);
3110 return ParseStatus::Success;
3113 ParseStatus
3114 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3115 bool AllowImm) {
3116 bool Sext = trySkipId("sext");
3117 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3118 return ParseStatus::Failure;
3120 ParseStatus Res;
3121 if (AllowImm) {
3122 Res = parseRegOrImm(Operands);
3123 } else {
3124 Res = parseReg(Operands);
3126 if (!Res.isSuccess())
3127 return Sext ? ParseStatus::Failure : Res;
3129 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3130 return ParseStatus::Failure;
3132 AMDGPUOperand::Modifiers Mods;
3133 Mods.Sext = Sext;
3135 if (Mods.hasIntModifiers()) {
3136 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3137 if (Op.isExpr())
3138 return Error(Op.getStartLoc(), "expected an absolute expression");
3139 Op.setModifiers(Mods);
3142 return ParseStatus::Success;
3145 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3146 return parseRegOrImmWithFPInputMods(Operands, false);
3149 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3150 return parseRegOrImmWithIntInputMods(Operands, false);
3153 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3154 auto Loc = getLoc();
3155 if (trySkipId("off")) {
3156 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3157 AMDGPUOperand::ImmTyOff, false));
3158 return ParseStatus::Success;
3161 if (!isRegister())
3162 return ParseStatus::NoMatch;
3164 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3165 if (Reg) {
3166 Operands.push_back(std::move(Reg));
3167 return ParseStatus::Success;
3170 return ParseStatus::Failure;
3173 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3174 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3176 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3177 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3178 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3179 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3180 return Match_InvalidOperand;
3182 if ((TSFlags & SIInstrFlags::VOP3) &&
3183 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3184 getForcedEncodingSize() != 64)
3185 return Match_PreferE32;
3187 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3188 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3189 // v_mac_f32/16 allow only dst_sel == DWORD;
3190 auto OpNum =
3191 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3192 const auto &Op = Inst.getOperand(OpNum);
3193 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3194 return Match_InvalidOperand;
3198 return Match_Success;
3201 static ArrayRef<unsigned> getAllVariants() {
3202 static const unsigned Variants[] = {
3203 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3204 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3205 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3208 return ArrayRef(Variants);
3211 // What asm variants we should check
3212 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3213 if (isForcedDPP() && isForcedVOP3()) {
3214 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3215 return ArrayRef(Variants);
3217 if (getForcedEncodingSize() == 32) {
3218 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3219 return ArrayRef(Variants);
3222 if (isForcedVOP3()) {
3223 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3224 return ArrayRef(Variants);
3227 if (isForcedSDWA()) {
3228 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3229 AMDGPUAsmVariants::SDWA9};
3230 return ArrayRef(Variants);
3233 if (isForcedDPP()) {
3234 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3235 return ArrayRef(Variants);
3238 return getAllVariants();
3241 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3242 if (isForcedDPP() && isForcedVOP3())
3243 return "e64_dpp";
3245 if (getForcedEncodingSize() == 32)
3246 return "e32";
3248 if (isForcedVOP3())
3249 return "e64";
3251 if (isForcedSDWA())
3252 return "sdwa";
3254 if (isForcedDPP())
3255 return "dpp";
3257 return "";
3260 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3261 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3262 for (MCPhysReg Reg : Desc.implicit_uses()) {
3263 switch (Reg) {
3264 case AMDGPU::FLAT_SCR:
3265 case AMDGPU::VCC:
3266 case AMDGPU::VCC_LO:
3267 case AMDGPU::VCC_HI:
3268 case AMDGPU::M0:
3269 return Reg;
3270 default:
3271 break;
3274 return AMDGPU::NoRegister;
3277 // NB: This code is correct only when used to check constant
3278 // bus limitations because GFX7 support no f16 inline constants.
3279 // Note that there are no cases when a GFX7 opcode violates
3280 // constant bus limitations due to the use of an f16 constant.
3281 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3282 unsigned OpIdx) const {
3283 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3285 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3286 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3287 return false;
3290 const MCOperand &MO = Inst.getOperand(OpIdx);
3292 int64_t Val = MO.getImm();
3293 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3295 switch (OpSize) { // expected operand size
3296 case 8:
3297 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3298 case 4:
3299 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3300 case 2: {
3301 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3302 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3303 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3304 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3305 return AMDGPU::isInlinableIntLiteral(Val);
3307 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3308 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3309 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3310 return AMDGPU::isInlinableIntLiteralV216(Val);
3312 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3313 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3314 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3315 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3317 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3319 default:
3320 llvm_unreachable("invalid operand size");
3324 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3325 if (!isGFX10Plus())
3326 return 1;
3328 switch (Opcode) {
3329 // 64-bit shift instructions can use only one scalar value input
3330 case AMDGPU::V_LSHLREV_B64_e64:
3331 case AMDGPU::V_LSHLREV_B64_gfx10:
3332 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3333 case AMDGPU::V_LSHRREV_B64_e64:
3334 case AMDGPU::V_LSHRREV_B64_gfx10:
3335 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3336 case AMDGPU::V_ASHRREV_I64_e64:
3337 case AMDGPU::V_ASHRREV_I64_gfx10:
3338 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3339 case AMDGPU::V_LSHL_B64_e64:
3340 case AMDGPU::V_LSHR_B64_e64:
3341 case AMDGPU::V_ASHR_I64_e64:
3342 return 1;
3343 default:
3344 return 2;
3348 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3349 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3351 // Get regular operand indices in the same order as specified
3352 // in the instruction (but append mandatory literals to the end).
3353 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3354 bool AddMandatoryLiterals = false) {
3356 int16_t ImmIdx =
3357 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3359 if (isVOPD(Opcode)) {
3360 int16_t ImmDeferredIdx =
3361 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3362 : -1;
3364 return {getNamedOperandIdx(Opcode, OpName::src0X),
3365 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3366 getNamedOperandIdx(Opcode, OpName::src0Y),
3367 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3368 ImmDeferredIdx,
3369 ImmIdx};
3372 return {getNamedOperandIdx(Opcode, OpName::src0),
3373 getNamedOperandIdx(Opcode, OpName::src1),
3374 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3377 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3378 const MCOperand &MO = Inst.getOperand(OpIdx);
3379 if (MO.isImm()) {
3380 return !isInlineConstant(Inst, OpIdx);
3381 } else if (MO.isReg()) {
3382 auto Reg = MO.getReg();
3383 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3384 auto PReg = mc2PseudoReg(Reg);
3385 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3386 } else {
3387 return true;
3391 bool AMDGPUAsmParser::validateConstantBusLimitations(
3392 const MCInst &Inst, const OperandVector &Operands) {
3393 const unsigned Opcode = Inst.getOpcode();
3394 const MCInstrDesc &Desc = MII.get(Opcode);
3395 unsigned LastSGPR = AMDGPU::NoRegister;
3396 unsigned ConstantBusUseCount = 0;
3397 unsigned NumLiterals = 0;
3398 unsigned LiteralSize;
3400 if (!(Desc.TSFlags &
3401 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3402 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3403 !isVOPD(Opcode))
3404 return true;
3406 // Check special imm operands (used by madmk, etc)
3407 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3408 ++NumLiterals;
3409 LiteralSize = 4;
3412 SmallDenseSet<unsigned> SGPRsUsed;
3413 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3414 if (SGPRUsed != AMDGPU::NoRegister) {
3415 SGPRsUsed.insert(SGPRUsed);
3416 ++ConstantBusUseCount;
3419 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3421 for (int OpIdx : OpIndices) {
3422 if (OpIdx == -1)
3423 continue;
3425 const MCOperand &MO = Inst.getOperand(OpIdx);
3426 if (usesConstantBus(Inst, OpIdx)) {
3427 if (MO.isReg()) {
3428 LastSGPR = mc2PseudoReg(MO.getReg());
3429 // Pairs of registers with a partial intersections like these
3430 // s0, s[0:1]
3431 // flat_scratch_lo, flat_scratch
3432 // flat_scratch_lo, flat_scratch_hi
3433 // are theoretically valid but they are disabled anyway.
3434 // Note that this code mimics SIInstrInfo::verifyInstruction
3435 if (SGPRsUsed.insert(LastSGPR).second) {
3436 ++ConstantBusUseCount;
3438 } else { // Expression or a literal
3440 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3441 continue; // special operand like VINTERP attr_chan
3443 // An instruction may use only one literal.
3444 // This has been validated on the previous step.
3445 // See validateVOPLiteral.
3446 // This literal may be used as more than one operand.
3447 // If all these operands are of the same size,
3448 // this literal counts as one scalar value.
3449 // Otherwise it counts as 2 scalar values.
3450 // See "GFX10 Shader Programming", section 3.6.2.3.
3452 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3453 if (Size < 4)
3454 Size = 4;
3456 if (NumLiterals == 0) {
3457 NumLiterals = 1;
3458 LiteralSize = Size;
3459 } else if (LiteralSize != Size) {
3460 NumLiterals = 2;
3465 ConstantBusUseCount += NumLiterals;
3467 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3468 return true;
3470 SMLoc LitLoc = getLitLoc(Operands);
3471 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3472 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3473 Error(Loc, "invalid operand (violates constant bus restrictions)");
3474 return false;
3477 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3478 const MCInst &Inst, const OperandVector &Operands) {
3480 const unsigned Opcode = Inst.getOpcode();
3481 if (!isVOPD(Opcode))
3482 return true;
3484 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3486 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3487 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3488 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3489 ? Opr.getReg()
3490 : MCRegister::NoRegister;
3493 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3494 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3495 if (!InvalidCompOprIdx)
3496 return true;
3498 auto CompOprIdx = *InvalidCompOprIdx;
3499 auto ParsedIdx =
3500 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3501 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3502 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3504 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3505 if (CompOprIdx == VOPD::Component::DST) {
3506 Error(Loc, "one dst register must be even and the other odd");
3507 } else {
3508 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3509 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3510 " operands must use different VGPR banks");
3513 return false;
3516 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3518 const unsigned Opc = Inst.getOpcode();
3519 const MCInstrDesc &Desc = MII.get(Opc);
3521 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3522 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3523 assert(ClampIdx != -1);
3524 return Inst.getOperand(ClampIdx).getImm() == 0;
3527 return true;
3530 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3531 const SMLoc &IDLoc) {
3533 const unsigned Opc = Inst.getOpcode();
3534 const MCInstrDesc &Desc = MII.get(Opc);
3536 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3537 return true;
3539 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3540 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3541 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3543 assert(VDataIdx != -1);
3545 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3546 return true;
3548 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3549 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3550 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3551 if (DMask == 0)
3552 DMask = 1;
3554 bool IsPackedD16 = false;
3555 unsigned DataSize =
3556 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3557 if (hasPackedD16()) {
3558 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3559 IsPackedD16 = D16Idx >= 0;
3560 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3561 DataSize = (DataSize + 1) / 2;
3564 if ((VDataSize / 4) == DataSize + TFESize)
3565 return true;
3567 StringRef Modifiers;
3568 if (isGFX90A())
3569 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3570 else
3571 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3573 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3574 return false;
3577 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3578 const SMLoc &IDLoc) {
3579 const unsigned Opc = Inst.getOpcode();
3580 const MCInstrDesc &Desc = MII.get(Opc);
3582 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3583 return true;
3585 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3587 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3588 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3589 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3590 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3591 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3592 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3594 assert(VAddr0Idx != -1);
3595 assert(SrsrcIdx != -1);
3596 assert(SrsrcIdx > VAddr0Idx);
3598 bool IsA16 = Inst.getOperand(A16Idx).getImm();
3599 if (BaseOpcode->BVH) {
3600 if (IsA16 == BaseOpcode->A16)
3601 return true;
3602 Error(IDLoc, "image address size does not match a16");
3603 return false;
3606 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3607 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3608 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3609 unsigned ActualAddrSize =
3610 IsNSA ? SrsrcIdx - VAddr0Idx
3611 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3613 unsigned ExpectedAddrSize =
3614 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3616 if (IsNSA) {
3617 if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3618 int VAddrLastIdx = SrsrcIdx - 1;
3619 unsigned VAddrLastSize =
3620 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3622 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3624 } else {
3625 if (ExpectedAddrSize > 12)
3626 ExpectedAddrSize = 16;
3628 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3629 // This provides backward compatibility for assembly created
3630 // before 160b/192b/224b types were directly supported.
3631 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3632 return true;
3635 if (ActualAddrSize == ExpectedAddrSize)
3636 return true;
3638 Error(IDLoc, "image address size does not match dim and a16");
3639 return false;
3642 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3644 const unsigned Opc = Inst.getOpcode();
3645 const MCInstrDesc &Desc = MII.get(Opc);
3647 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3648 return true;
3649 if (!Desc.mayLoad() || !Desc.mayStore())
3650 return true; // Not atomic
3652 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3653 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3655 // This is an incomplete check because image_atomic_cmpswap
3656 // may only use 0x3 and 0xf while other atomic operations
3657 // may use 0x1 and 0x3. However these limitations are
3658 // verified when we check that dmask matches dst size.
3659 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3662 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3664 const unsigned Opc = Inst.getOpcode();
3665 const MCInstrDesc &Desc = MII.get(Opc);
3667 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3668 return true;
3670 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3671 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3673 // GATHER4 instructions use dmask in a different fashion compared to
3674 // other MIMG instructions. The only useful DMASK values are
3675 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3676 // (red,red,red,red) etc.) The ISA document doesn't mention
3677 // this.
3678 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3681 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3682 const unsigned Opc = Inst.getOpcode();
3683 const MCInstrDesc &Desc = MII.get(Opc);
3685 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3686 return true;
3688 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3689 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3690 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3692 if (!BaseOpcode->MSAA)
3693 return true;
3695 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3696 assert(DimIdx != -1);
3698 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3699 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3701 return DimInfo->MSAA;
3704 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3706 switch (Opcode) {
3707 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3708 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3709 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3710 return true;
3711 default:
3712 return false;
3716 // movrels* opcodes should only allow VGPRS as src0.
3717 // This is specified in .td description for vop1/vop3,
3718 // but sdwa is handled differently. See isSDWAOperand.
3719 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3720 const OperandVector &Operands) {
3722 const unsigned Opc = Inst.getOpcode();
3723 const MCInstrDesc &Desc = MII.get(Opc);
3725 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3726 return true;
3728 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3729 assert(Src0Idx != -1);
3731 SMLoc ErrLoc;
3732 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3733 if (Src0.isReg()) {
3734 auto Reg = mc2PseudoReg(Src0.getReg());
3735 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3736 if (!isSGPR(Reg, TRI))
3737 return true;
3738 ErrLoc = getRegLoc(Reg, Operands);
3739 } else {
3740 ErrLoc = getConstLoc(Operands);
3743 Error(ErrLoc, "source operand must be a VGPR");
3744 return false;
3747 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3748 const OperandVector &Operands) {
3750 const unsigned Opc = Inst.getOpcode();
3752 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3753 return true;
3755 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3756 assert(Src0Idx != -1);
3758 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3759 if (!Src0.isReg())
3760 return true;
3762 auto Reg = mc2PseudoReg(Src0.getReg());
3763 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3764 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3765 Error(getRegLoc(Reg, Operands),
3766 "source operand must be either a VGPR or an inline constant");
3767 return false;
3770 return true;
3773 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3774 const OperandVector &Operands) {
3775 unsigned Opcode = Inst.getOpcode();
3776 const MCInstrDesc &Desc = MII.get(Opcode);
3778 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3779 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3780 return true;
3782 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3783 if (Src2Idx == -1)
3784 return true;
3786 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3787 Error(getConstLoc(Operands),
3788 "inline constants are not allowed for this operand");
3789 return false;
3792 return true;
3795 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3796 const OperandVector &Operands) {
3797 const unsigned Opc = Inst.getOpcode();
3798 const MCInstrDesc &Desc = MII.get(Opc);
3800 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3801 return true;
3803 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3804 if (Src2Idx == -1)
3805 return true;
3807 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3808 if (!Src2.isReg())
3809 return true;
3811 MCRegister Src2Reg = Src2.getReg();
3812 MCRegister DstReg = Inst.getOperand(0).getReg();
3813 if (Src2Reg == DstReg)
3814 return true;
3816 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3817 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3818 return true;
3820 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3821 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3822 "source 2 operand must not partially overlap with dst");
3823 return false;
3826 return true;
3829 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3830 switch (Inst.getOpcode()) {
3831 default:
3832 return true;
3833 case V_DIV_SCALE_F32_gfx6_gfx7:
3834 case V_DIV_SCALE_F32_vi:
3835 case V_DIV_SCALE_F32_gfx10:
3836 case V_DIV_SCALE_F64_gfx6_gfx7:
3837 case V_DIV_SCALE_F64_vi:
3838 case V_DIV_SCALE_F64_gfx10:
3839 break;
3842 // TODO: Check that src0 = src1 or src2.
3844 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3845 AMDGPU::OpName::src2_modifiers,
3846 AMDGPU::OpName::src2_modifiers}) {
3847 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3848 .getImm() &
3849 SISrcMods::ABS) {
3850 return false;
3854 return true;
3857 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3859 const unsigned Opc = Inst.getOpcode();
3860 const MCInstrDesc &Desc = MII.get(Opc);
3862 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3863 return true;
3865 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3866 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3867 if (isCI() || isSI())
3868 return false;
3871 return true;
3874 static bool IsRevOpcode(const unsigned Opcode)
3876 switch (Opcode) {
3877 case AMDGPU::V_SUBREV_F32_e32:
3878 case AMDGPU::V_SUBREV_F32_e64:
3879 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3880 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3881 case AMDGPU::V_SUBREV_F32_e32_vi:
3882 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3883 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3884 case AMDGPU::V_SUBREV_F32_e64_vi:
3886 case AMDGPU::V_SUBREV_CO_U32_e32:
3887 case AMDGPU::V_SUBREV_CO_U32_e64:
3888 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3889 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3891 case AMDGPU::V_SUBBREV_U32_e32:
3892 case AMDGPU::V_SUBBREV_U32_e64:
3893 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3894 case AMDGPU::V_SUBBREV_U32_e32_vi:
3895 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3896 case AMDGPU::V_SUBBREV_U32_e64_vi:
3898 case AMDGPU::V_SUBREV_U32_e32:
3899 case AMDGPU::V_SUBREV_U32_e64:
3900 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3901 case AMDGPU::V_SUBREV_U32_e32_vi:
3902 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3903 case AMDGPU::V_SUBREV_U32_e64_vi:
3905 case AMDGPU::V_SUBREV_F16_e32:
3906 case AMDGPU::V_SUBREV_F16_e64:
3907 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3908 case AMDGPU::V_SUBREV_F16_e32_vi:
3909 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3910 case AMDGPU::V_SUBREV_F16_e64_vi:
3912 case AMDGPU::V_SUBREV_U16_e32:
3913 case AMDGPU::V_SUBREV_U16_e64:
3914 case AMDGPU::V_SUBREV_U16_e32_vi:
3915 case AMDGPU::V_SUBREV_U16_e64_vi:
3917 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3918 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3919 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3921 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3922 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3924 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3925 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3927 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3928 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3930 case AMDGPU::V_LSHRREV_B32_e32:
3931 case AMDGPU::V_LSHRREV_B32_e64:
3932 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3933 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3934 case AMDGPU::V_LSHRREV_B32_e32_vi:
3935 case AMDGPU::V_LSHRREV_B32_e64_vi:
3936 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3937 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3939 case AMDGPU::V_ASHRREV_I32_e32:
3940 case AMDGPU::V_ASHRREV_I32_e64:
3941 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3942 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3943 case AMDGPU::V_ASHRREV_I32_e32_vi:
3944 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3945 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3946 case AMDGPU::V_ASHRREV_I32_e64_vi:
3948 case AMDGPU::V_LSHLREV_B32_e32:
3949 case AMDGPU::V_LSHLREV_B32_e64:
3950 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3951 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3952 case AMDGPU::V_LSHLREV_B32_e32_vi:
3953 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3954 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3955 case AMDGPU::V_LSHLREV_B32_e64_vi:
3957 case AMDGPU::V_LSHLREV_B16_e32:
3958 case AMDGPU::V_LSHLREV_B16_e64:
3959 case AMDGPU::V_LSHLREV_B16_e32_vi:
3960 case AMDGPU::V_LSHLREV_B16_e64_vi:
3961 case AMDGPU::V_LSHLREV_B16_gfx10:
3963 case AMDGPU::V_LSHRREV_B16_e32:
3964 case AMDGPU::V_LSHRREV_B16_e64:
3965 case AMDGPU::V_LSHRREV_B16_e32_vi:
3966 case AMDGPU::V_LSHRREV_B16_e64_vi:
3967 case AMDGPU::V_LSHRREV_B16_gfx10:
3969 case AMDGPU::V_ASHRREV_I16_e32:
3970 case AMDGPU::V_ASHRREV_I16_e64:
3971 case AMDGPU::V_ASHRREV_I16_e32_vi:
3972 case AMDGPU::V_ASHRREV_I16_e64_vi:
3973 case AMDGPU::V_ASHRREV_I16_gfx10:
3975 case AMDGPU::V_LSHLREV_B64_e64:
3976 case AMDGPU::V_LSHLREV_B64_gfx10:
3977 case AMDGPU::V_LSHLREV_B64_vi:
3979 case AMDGPU::V_LSHRREV_B64_e64:
3980 case AMDGPU::V_LSHRREV_B64_gfx10:
3981 case AMDGPU::V_LSHRREV_B64_vi:
3983 case AMDGPU::V_ASHRREV_I64_e64:
3984 case AMDGPU::V_ASHRREV_I64_gfx10:
3985 case AMDGPU::V_ASHRREV_I64_vi:
3987 case AMDGPU::V_PK_LSHLREV_B16:
3988 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3989 case AMDGPU::V_PK_LSHLREV_B16_vi:
3991 case AMDGPU::V_PK_LSHRREV_B16:
3992 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3993 case AMDGPU::V_PK_LSHRREV_B16_vi:
3994 case AMDGPU::V_PK_ASHRREV_I16:
3995 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3996 case AMDGPU::V_PK_ASHRREV_I16_vi:
3997 return true;
3998 default:
3999 return false;
4003 std::optional<StringRef>
4004 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4006 using namespace SIInstrFlags;
4007 const unsigned Opcode = Inst.getOpcode();
4008 const MCInstrDesc &Desc = MII.get(Opcode);
4010 // lds_direct register is defined so that it can be used
4011 // with 9-bit operands only. Ignore encodings which do not accept these.
4012 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4013 if ((Desc.TSFlags & Enc) == 0)
4014 return std::nullopt;
4016 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4017 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4018 if (SrcIdx == -1)
4019 break;
4020 const auto &Src = Inst.getOperand(SrcIdx);
4021 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4023 if (isGFX90A() || isGFX11Plus())
4024 return StringRef("lds_direct is not supported on this GPU");
4026 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4027 return StringRef("lds_direct cannot be used with this instruction");
4029 if (SrcName != OpName::src0)
4030 return StringRef("lds_direct may be used as src0 only");
4034 return std::nullopt;
4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4038 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4039 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4040 if (Op.isFlatOffset())
4041 return Op.getStartLoc();
4043 return getLoc();
4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4047 const OperandVector &Operands) {
4048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4050 return true;
4052 auto Opcode = Inst.getOpcode();
4053 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054 assert(OpNum != -1);
4056 const auto &Op = Inst.getOperand(OpNum);
4057 if (!hasFlatOffsets() && Op.getImm() != 0) {
4058 Error(getFlatOffsetLoc(Operands),
4059 "flat offset modifier is not supported on this GPU");
4060 return false;
4063 // For FLAT segment the offset must be positive;
4064 // MSB is ignored and forced to zero.
4065 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4066 bool AllowNegative =
4067 TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
4068 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4069 Error(getFlatOffsetLoc(Operands),
4070 Twine("expected a ") +
4071 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4072 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4073 return false;
4076 return true;
4079 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4080 // Start with second operand because SMEM Offset cannot be dst or src0.
4081 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4082 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4083 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4084 return Op.getStartLoc();
4086 return getLoc();
4089 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4090 const OperandVector &Operands) {
4091 if (isCI() || isSI())
4092 return true;
4094 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4095 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4096 return true;
4098 auto Opcode = Inst.getOpcode();
4099 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4100 if (OpNum == -1)
4101 return true;
4103 const auto &Op = Inst.getOperand(OpNum);
4104 if (!Op.isImm())
4105 return true;
4107 uint64_t Offset = Op.getImm();
4108 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4109 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4110 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4111 return true;
4113 Error(getSMEMOffsetLoc(Operands),
4114 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4115 "expected a 21-bit signed offset");
4117 return false;
4120 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4121 unsigned Opcode = Inst.getOpcode();
4122 const MCInstrDesc &Desc = MII.get(Opcode);
4123 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4124 return true;
4126 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4127 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4129 const int OpIndices[] = { Src0Idx, Src1Idx };
4131 unsigned NumExprs = 0;
4132 unsigned NumLiterals = 0;
4133 uint32_t LiteralValue;
4135 for (int OpIdx : OpIndices) {
4136 if (OpIdx == -1) break;
4138 const MCOperand &MO = Inst.getOperand(OpIdx);
4139 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4140 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4141 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4142 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4143 if (NumLiterals == 0 || LiteralValue != Value) {
4144 LiteralValue = Value;
4145 ++NumLiterals;
4147 } else if (MO.isExpr()) {
4148 ++NumExprs;
4153 return NumLiterals + NumExprs <= 1;
4156 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4157 const unsigned Opc = Inst.getOpcode();
4158 if (isPermlane16(Opc)) {
4159 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4160 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4162 if (OpSel & ~3)
4163 return false;
4166 uint64_t TSFlags = MII.get(Opc).TSFlags;
4168 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4169 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4170 if (OpSelIdx != -1) {
4171 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4172 return false;
4174 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4175 if (OpSelHiIdx != -1) {
4176 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4177 return false;
4181 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4182 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4183 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4184 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4185 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4186 if (OpSel & 3)
4187 return false;
4190 return true;
4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4194 const OperandVector &Operands) {
4195 const unsigned Opc = Inst.getOpcode();
4196 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4197 if (DppCtrlIdx < 0)
4198 return true;
4199 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4201 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4202 // DPP64 is supported for row_newbcast only.
4203 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4204 if (Src0Idx >= 0 &&
4205 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4206 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4207 Error(S, "64 bit dpp only supports row_newbcast");
4208 return false;
4212 return true;
4215 // Check if VCC register matches wavefront size
4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4217 auto FB = getFeatureBits();
4218 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4219 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4224 const OperandVector &Operands) {
4225 unsigned Opcode = Inst.getOpcode();
4226 const MCInstrDesc &Desc = MII.get(Opcode);
4227 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4228 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4229 !HasMandatoryLiteral && !isVOPD(Opcode))
4230 return true;
4232 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4234 unsigned NumExprs = 0;
4235 unsigned NumLiterals = 0;
4236 uint32_t LiteralValue;
4238 for (int OpIdx : OpIndices) {
4239 if (OpIdx == -1)
4240 continue;
4242 const MCOperand &MO = Inst.getOperand(OpIdx);
4243 if (!MO.isImm() && !MO.isExpr())
4244 continue;
4245 if (!isSISrcOperand(Desc, OpIdx))
4246 continue;
4248 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4249 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4250 if (NumLiterals == 0 || LiteralValue != Value) {
4251 LiteralValue = Value;
4252 ++NumLiterals;
4254 } else if (MO.isExpr()) {
4255 ++NumExprs;
4258 NumLiterals += NumExprs;
4260 if (!NumLiterals)
4261 return true;
4263 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4264 Error(getLitLoc(Operands), "literal operands are not supported");
4265 return false;
4268 if (NumLiterals > 1) {
4269 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4270 return false;
4273 return true;
4276 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4277 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4278 const MCRegisterInfo *MRI) {
4279 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4280 if (OpIdx < 0)
4281 return -1;
4283 const MCOperand &Op = Inst.getOperand(OpIdx);
4284 if (!Op.isReg())
4285 return -1;
4287 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4288 auto Reg = Sub ? Sub : Op.getReg();
4289 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4290 return AGPR32.contains(Reg) ? 1 : 0;
4293 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4294 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4295 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4296 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4297 SIInstrFlags::DS)) == 0)
4298 return true;
4300 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4301 : AMDGPU::OpName::vdata;
4303 const MCRegisterInfo *MRI = getMRI();
4304 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4305 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4307 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4308 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4309 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4310 return false;
4313 auto FB = getFeatureBits();
4314 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4315 if (DataAreg < 0 || DstAreg < 0)
4316 return true;
4317 return DstAreg == DataAreg;
4320 return DstAreg < 1 && DataAreg < 1;
4323 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4324 auto FB = getFeatureBits();
4325 if (!FB[AMDGPU::FeatureGFX90AInsts])
4326 return true;
4328 const MCRegisterInfo *MRI = getMRI();
4329 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4330 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4331 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4332 const MCOperand &Op = Inst.getOperand(I);
4333 if (!Op.isReg())
4334 continue;
4336 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4337 if (!Sub)
4338 continue;
4340 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4341 return false;
4342 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4343 return false;
4346 return true;
4349 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4350 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4351 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4352 if (Op.isBLGP())
4353 return Op.getStartLoc();
4355 return SMLoc();
4358 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4359 const OperandVector &Operands) {
4360 unsigned Opc = Inst.getOpcode();
4361 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4362 if (BlgpIdx == -1)
4363 return true;
4364 SMLoc BLGPLoc = getBLGPLoc(Operands);
4365 if (!BLGPLoc.isValid())
4366 return true;
4367 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4368 auto FB = getFeatureBits();
4369 bool UsesNeg = false;
4370 if (FB[AMDGPU::FeatureGFX940Insts]) {
4371 switch (Opc) {
4372 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4373 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4374 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4375 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4376 UsesNeg = true;
4380 if (IsNeg == UsesNeg)
4381 return true;
4383 Error(BLGPLoc,
4384 UsesNeg ? "invalid modifier: blgp is not supported"
4385 : "invalid modifier: neg is not supported");
4387 return false;
4390 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4391 const OperandVector &Operands) {
4392 if (!isGFX11Plus())
4393 return true;
4395 unsigned Opc = Inst.getOpcode();
4396 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4397 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4398 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4399 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4400 return true;
4402 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4403 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4404 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4405 if (Reg == AMDGPU::SGPR_NULL)
4406 return true;
4408 SMLoc RegLoc = getRegLoc(Reg, Operands);
4409 Error(RegLoc, "src0 must be null");
4410 return false;
4413 // gfx90a has an undocumented limitation:
4414 // DS_GWS opcodes must use even aligned registers.
4415 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4416 const OperandVector &Operands) {
4417 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4418 return true;
4420 int Opc = Inst.getOpcode();
4421 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4422 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4423 return true;
4425 const MCRegisterInfo *MRI = getMRI();
4426 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4427 int Data0Pos =
4428 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4429 assert(Data0Pos != -1);
4430 auto Reg = Inst.getOperand(Data0Pos).getReg();
4431 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4432 if (RegIdx & 1) {
4433 SMLoc RegLoc = getRegLoc(Reg, Operands);
4434 Error(RegLoc, "vgpr must be even aligned");
4435 return false;
4438 return true;
4441 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4442 const OperandVector &Operands,
4443 const SMLoc &IDLoc) {
4444 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4445 AMDGPU::OpName::cpol);
4446 if (CPolPos == -1)
4447 return true;
4449 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4451 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4452 if (TSFlags & SIInstrFlags::SMRD) {
4453 if (CPol && (isSI() || isCI())) {
4454 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4455 Error(S, "cache policy is not supported for SMRD instructions");
4456 return false;
4458 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4459 Error(IDLoc, "invalid cache policy for SMEM instruction");
4460 return false;
4464 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4465 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4466 StringRef CStr(S.getPointer());
4467 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4468 Error(S, "scc is not supported on this GPU");
4469 return false;
4472 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4473 return true;
4475 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4476 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4477 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4478 : "instruction must use glc");
4479 return false;
4481 } else {
4482 if (CPol & CPol::GLC) {
4483 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4484 StringRef CStr(S.getPointer());
4485 S = SMLoc::getFromPointer(
4486 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4487 Error(S, isGFX940() ? "instruction must not use sc0"
4488 : "instruction must not use glc");
4489 return false;
4493 return true;
4496 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4497 if (!isGFX11Plus())
4498 return true;
4499 for (auto &Operand : Operands) {
4500 if (!Operand->isReg())
4501 continue;
4502 unsigned Reg = Operand->getReg();
4503 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4504 Error(getRegLoc(Reg, Operands),
4505 "execz and vccz are not supported on this GPU");
4506 return false;
4509 return true;
4512 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4513 const OperandVector &Operands) {
4514 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4515 if (Desc.mayStore() &&
4516 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4517 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4518 if (Loc != getInstLoc(Operands)) {
4519 Error(Loc, "TFE modifier has no meaning for store instructions");
4520 return false;
4524 return true;
4527 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4528 const SMLoc &IDLoc,
4529 const OperandVector &Operands) {
4530 if (auto ErrMsg = validateLdsDirect(Inst)) {
4531 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4532 return false;
4534 if (!validateSOPLiteral(Inst)) {
4535 Error(getLitLoc(Operands),
4536 "only one unique literal operand is allowed");
4537 return false;
4539 if (!validateVOPLiteral(Inst, Operands)) {
4540 return false;
4542 if (!validateConstantBusLimitations(Inst, Operands)) {
4543 return false;
4545 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4546 return false;
4548 if (!validateIntClampSupported(Inst)) {
4549 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4550 "integer clamping is not supported on this GPU");
4551 return false;
4553 if (!validateOpSel(Inst)) {
4554 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4555 "invalid op_sel operand");
4556 return false;
4558 if (!validateDPP(Inst, Operands)) {
4559 return false;
4561 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4562 if (!validateMIMGD16(Inst)) {
4563 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4564 "d16 modifier is not supported on this GPU");
4565 return false;
4567 if (!validateMIMGMSAA(Inst)) {
4568 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4569 "invalid dim; must be MSAA type");
4570 return false;
4572 if (!validateMIMGDataSize(Inst, IDLoc)) {
4573 return false;
4575 if (!validateMIMGAddrSize(Inst, IDLoc))
4576 return false;
4577 if (!validateMIMGAtomicDMask(Inst)) {
4578 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4579 "invalid atomic image dmask");
4580 return false;
4582 if (!validateMIMGGatherDMask(Inst)) {
4583 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4584 "invalid image_gather dmask: only one bit must be set");
4585 return false;
4587 if (!validateMovrels(Inst, Operands)) {
4588 return false;
4590 if (!validateFlatOffset(Inst, Operands)) {
4591 return false;
4593 if (!validateSMEMOffset(Inst, Operands)) {
4594 return false;
4596 if (!validateMAIAccWrite(Inst, Operands)) {
4597 return false;
4599 if (!validateMAISrc2(Inst, Operands)) {
4600 return false;
4602 if (!validateMFMA(Inst, Operands)) {
4603 return false;
4605 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4606 return false;
4609 if (!validateAGPRLdSt(Inst)) {
4610 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4611 ? "invalid register class: data and dst should be all VGPR or AGPR"
4612 : "invalid register class: agpr loads and stores not supported on this GPU"
4614 return false;
4616 if (!validateVGPRAlign(Inst)) {
4617 Error(IDLoc,
4618 "invalid register class: vgpr tuples must be 64 bit aligned");
4619 return false;
4621 if (!validateGWS(Inst, Operands)) {
4622 return false;
4625 if (!validateBLGP(Inst, Operands)) {
4626 return false;
4629 if (!validateDivScale(Inst)) {
4630 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4631 return false;
4633 if (!validateWaitCnt(Inst, Operands)) {
4634 return false;
4636 if (!validateExeczVcczOperands(Operands)) {
4637 return false;
4639 if (!validateTFE(Inst, Operands)) {
4640 return false;
4643 return true;
4646 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4647 const FeatureBitset &FBS,
4648 unsigned VariantID = 0);
4650 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4651 const FeatureBitset &AvailableFeatures,
4652 unsigned VariantID);
4654 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4655 const FeatureBitset &FBS) {
4656 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4659 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4660 const FeatureBitset &FBS,
4661 ArrayRef<unsigned> Variants) {
4662 for (auto Variant : Variants) {
4663 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4664 return true;
4667 return false;
4670 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4671 const SMLoc &IDLoc) {
4672 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4674 // Check if requested instruction variant is supported.
4675 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4676 return false;
4678 // This instruction is not supported.
4679 // Clear any other pending errors because they are no longer relevant.
4680 getParser().clearPendingErrors();
4682 // Requested instruction variant is not supported.
4683 // Check if any other variants are supported.
4684 StringRef VariantName = getMatchedVariantName();
4685 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4686 return Error(IDLoc,
4687 Twine(VariantName,
4688 " variant of this instruction is not supported"));
4691 // Check if this instruction may be used with a different wavesize.
4692 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4693 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4695 FeatureBitset FeaturesWS32 = getFeatureBits();
4696 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4697 .flip(AMDGPU::FeatureWavefrontSize32);
4698 FeatureBitset AvailableFeaturesWS32 =
4699 ComputeAvailableFeatures(FeaturesWS32);
4701 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4702 return Error(IDLoc, "instruction requires wavesize=32");
4705 // Finally check if this instruction is supported on any other GPU.
4706 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4707 return Error(IDLoc, "instruction not supported on this GPU");
4710 // Instruction not supported on any GPU. Probably a typo.
4711 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4712 return Error(IDLoc, "invalid instruction" + Suggestion);
4715 static bool isInvalidVOPDY(const OperandVector &Operands,
4716 uint64_t InvalidOprIdx) {
4717 assert(InvalidOprIdx < Operands.size());
4718 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4719 if (Op.isToken() && InvalidOprIdx > 1) {
4720 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4721 return PrevOp.isToken() && PrevOp.getToken() == "::";
4723 return false;
4726 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4727 OperandVector &Operands,
4728 MCStreamer &Out,
4729 uint64_t &ErrorInfo,
4730 bool MatchingInlineAsm) {
4731 MCInst Inst;
4732 unsigned Result = Match_Success;
4733 for (auto Variant : getMatchedVariants()) {
4734 uint64_t EI;
4735 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4736 Variant);
4737 // We order match statuses from least to most specific. We use most specific
4738 // status as resulting
4739 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4740 if ((R == Match_Success) ||
4741 (R == Match_PreferE32) ||
4742 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4743 (R == Match_InvalidOperand && Result != Match_MissingFeature
4744 && Result != Match_PreferE32) ||
4745 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4746 && Result != Match_MissingFeature
4747 && Result != Match_PreferE32)) {
4748 Result = R;
4749 ErrorInfo = EI;
4751 if (R == Match_Success)
4752 break;
4755 if (Result == Match_Success) {
4756 if (!validateInstruction(Inst, IDLoc, Operands)) {
4757 return true;
4759 Inst.setLoc(IDLoc);
4760 Out.emitInstruction(Inst, getSTI());
4761 return false;
4764 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4765 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4766 return true;
4769 switch (Result) {
4770 default: break;
4771 case Match_MissingFeature:
4772 // It has been verified that the specified instruction
4773 // mnemonic is valid. A match was found but it requires
4774 // features which are not supported on this GPU.
4775 return Error(IDLoc, "operands are not valid for this GPU or mode");
4777 case Match_InvalidOperand: {
4778 SMLoc ErrorLoc = IDLoc;
4779 if (ErrorInfo != ~0ULL) {
4780 if (ErrorInfo >= Operands.size()) {
4781 return Error(IDLoc, "too few operands for instruction");
4783 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4784 if (ErrorLoc == SMLoc())
4785 ErrorLoc = IDLoc;
4787 if (isInvalidVOPDY(Operands, ErrorInfo))
4788 return Error(ErrorLoc, "invalid VOPDY instruction");
4790 return Error(ErrorLoc, "invalid operand for instruction");
4793 case Match_PreferE32:
4794 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4795 "should be encoded as e32");
4796 case Match_MnemonicFail:
4797 llvm_unreachable("Invalid instructions should have been handled already");
4799 llvm_unreachable("Implement any new match types added!");
4802 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4803 int64_t Tmp = -1;
4804 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4805 return true;
4807 if (getParser().parseAbsoluteExpression(Tmp)) {
4808 return true;
4810 Ret = static_cast<uint32_t>(Tmp);
4811 return false;
4814 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4815 uint32_t &Minor) {
4816 if (ParseAsAbsoluteExpression(Major))
4817 return TokError("invalid major version");
4819 if (!trySkipToken(AsmToken::Comma))
4820 return TokError("minor version number required, comma expected");
4822 if (ParseAsAbsoluteExpression(Minor))
4823 return TokError("invalid minor version");
4825 return false;
4828 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4829 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4830 return TokError("directive only supported for amdgcn architecture");
4832 std::string TargetIDDirective;
4833 SMLoc TargetStart = getTok().getLoc();
4834 if (getParser().parseEscapedString(TargetIDDirective))
4835 return true;
4837 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4838 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4839 return getParser().Error(TargetRange.Start,
4840 (Twine(".amdgcn_target directive's target id ") +
4841 Twine(TargetIDDirective) +
4842 Twine(" does not match the specified target id ") +
4843 Twine(getTargetStreamer().getTargetID()->toString())).str());
4845 return false;
4848 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4849 return Error(Range.Start, "value out of range", Range);
4852 bool AMDGPUAsmParser::calculateGPRBlocks(
4853 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4854 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4855 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4856 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4857 // TODO(scott.linder): These calculations are duplicated from
4858 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4859 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4861 unsigned NumVGPRs = NextFreeVGPR;
4862 unsigned NumSGPRs = NextFreeSGPR;
4864 if (Version.Major >= 10)
4865 NumSGPRs = 0;
4866 else {
4867 unsigned MaxAddressableNumSGPRs =
4868 IsaInfo::getAddressableNumSGPRs(&getSTI());
4870 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4871 NumSGPRs > MaxAddressableNumSGPRs)
4872 return OutOfRangeError(SGPRRange);
4874 NumSGPRs +=
4875 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4877 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4878 NumSGPRs > MaxAddressableNumSGPRs)
4879 return OutOfRangeError(SGPRRange);
4881 if (Features.test(FeatureSGPRInitBug))
4882 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4885 VGPRBlocks =
4886 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4887 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4889 return false;
4892 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4893 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4894 return TokError("directive only supported for amdgcn architecture");
4896 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4897 return TokError("directive only supported for amdhsa OS");
4899 StringRef KernelName;
4900 if (getParser().parseIdentifier(KernelName))
4901 return true;
4903 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4905 StringSet<> Seen;
4907 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4909 SMRange VGPRRange;
4910 uint64_t NextFreeVGPR = 0;
4911 uint64_t AccumOffset = 0;
4912 uint64_t SharedVGPRCount = 0;
4913 SMRange SGPRRange;
4914 uint64_t NextFreeSGPR = 0;
4916 // Count the number of user SGPRs implied from the enabled feature bits.
4917 unsigned ImpliedUserSGPRCount = 0;
4919 // Track if the asm explicitly contains the directive for the user SGPR
4920 // count.
4921 std::optional<unsigned> ExplicitUserSGPRCount;
4922 bool ReserveVCC = true;
4923 bool ReserveFlatScr = true;
4924 std::optional<bool> EnableWavefrontSize32;
4926 while (true) {
4927 while (trySkipToken(AsmToken::EndOfStatement));
4929 StringRef ID;
4930 SMRange IDRange = getTok().getLocRange();
4931 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4932 return true;
4934 if (ID == ".end_amdhsa_kernel")
4935 break;
4937 if (!Seen.insert(ID).second)
4938 return TokError(".amdhsa_ directives cannot be repeated");
4940 SMLoc ValStart = getLoc();
4941 int64_t IVal;
4942 if (getParser().parseAbsoluteExpression(IVal))
4943 return true;
4944 SMLoc ValEnd = getLoc();
4945 SMRange ValRange = SMRange(ValStart, ValEnd);
4947 if (IVal < 0)
4948 return OutOfRangeError(ValRange);
4950 uint64_t Val = IVal;
4952 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4953 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4954 return OutOfRangeError(RANGE); \
4955 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4957 if (ID == ".amdhsa_group_segment_fixed_size") {
4958 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4959 return OutOfRangeError(ValRange);
4960 KD.group_segment_fixed_size = Val;
4961 } else if (ID == ".amdhsa_private_segment_fixed_size") {
4962 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4963 return OutOfRangeError(ValRange);
4964 KD.private_segment_fixed_size = Val;
4965 } else if (ID == ".amdhsa_kernarg_size") {
4966 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4967 return OutOfRangeError(ValRange);
4968 KD.kernarg_size = Val;
4969 } else if (ID == ".amdhsa_user_sgpr_count") {
4970 ExplicitUserSGPRCount = Val;
4971 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4972 if (hasArchitectedFlatScratch())
4973 return Error(IDRange.Start,
4974 "directive is not supported with architected flat scratch",
4975 IDRange);
4976 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4977 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4978 Val, ValRange);
4979 if (Val)
4980 ImpliedUserSGPRCount += 4;
4981 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4982 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4983 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4984 ValRange);
4985 if (Val)
4986 ImpliedUserSGPRCount += 2;
4987 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4988 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4989 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4990 ValRange);
4991 if (Val)
4992 ImpliedUserSGPRCount += 2;
4993 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4994 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4995 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4996 Val, ValRange);
4997 if (Val)
4998 ImpliedUserSGPRCount += 2;
4999 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5000 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5001 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5002 ValRange);
5003 if (Val)
5004 ImpliedUserSGPRCount += 2;
5005 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5006 if (hasArchitectedFlatScratch())
5007 return Error(IDRange.Start,
5008 "directive is not supported with architected flat scratch",
5009 IDRange);
5010 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5011 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5012 ValRange);
5013 if (Val)
5014 ImpliedUserSGPRCount += 2;
5015 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5016 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5017 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5018 Val, ValRange);
5019 if (Val)
5020 ImpliedUserSGPRCount += 1;
5021 } else if (ID == ".amdhsa_wavefront_size32") {
5022 if (IVersion.Major < 10)
5023 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5024 EnableWavefrontSize32 = Val;
5025 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5026 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5027 Val, ValRange);
5028 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5029 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5030 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5031 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5032 if (hasArchitectedFlatScratch())
5033 return Error(IDRange.Start,
5034 "directive is not supported with architected flat scratch",
5035 IDRange);
5036 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5037 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5038 } else if (ID == ".amdhsa_enable_private_segment") {
5039 if (!hasArchitectedFlatScratch())
5040 return Error(
5041 IDRange.Start,
5042 "directive is not supported without architected flat scratch",
5043 IDRange);
5044 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5045 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5046 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5047 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5048 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5049 ValRange);
5050 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5051 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5052 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5053 ValRange);
5054 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5056 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5057 ValRange);
5058 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5059 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5060 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5061 ValRange);
5062 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5063 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5064 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5065 ValRange);
5066 } else if (ID == ".amdhsa_next_free_vgpr") {
5067 VGPRRange = ValRange;
5068 NextFreeVGPR = Val;
5069 } else if (ID == ".amdhsa_next_free_sgpr") {
5070 SGPRRange = ValRange;
5071 NextFreeSGPR = Val;
5072 } else if (ID == ".amdhsa_accum_offset") {
5073 if (!isGFX90A())
5074 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5075 AccumOffset = Val;
5076 } else if (ID == ".amdhsa_reserve_vcc") {
5077 if (!isUInt<1>(Val))
5078 return OutOfRangeError(ValRange);
5079 ReserveVCC = Val;
5080 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5081 if (IVersion.Major < 7)
5082 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5083 if (hasArchitectedFlatScratch())
5084 return Error(IDRange.Start,
5085 "directive is not supported with architected flat scratch",
5086 IDRange);
5087 if (!isUInt<1>(Val))
5088 return OutOfRangeError(ValRange);
5089 ReserveFlatScr = Val;
5090 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5091 if (IVersion.Major < 8)
5092 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5093 if (!isUInt<1>(Val))
5094 return OutOfRangeError(ValRange);
5095 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5096 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5097 IDRange);
5098 } else if (ID == ".amdhsa_float_round_mode_32") {
5099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5100 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5101 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5102 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5103 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5104 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5105 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5106 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5107 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5110 ValRange);
5111 } else if (ID == ".amdhsa_dx10_clamp") {
5112 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5113 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5114 } else if (ID == ".amdhsa_ieee_mode") {
5115 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5116 Val, ValRange);
5117 } else if (ID == ".amdhsa_fp16_overflow") {
5118 if (IVersion.Major < 9)
5119 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5120 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5121 ValRange);
5122 } else if (ID == ".amdhsa_tg_split") {
5123 if (!isGFX90A())
5124 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5125 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5126 ValRange);
5127 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5128 if (IVersion.Major < 10)
5129 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5131 ValRange);
5132 } else if (ID == ".amdhsa_memory_ordered") {
5133 if (IVersion.Major < 10)
5134 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5135 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5136 ValRange);
5137 } else if (ID == ".amdhsa_forward_progress") {
5138 if (IVersion.Major < 10)
5139 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5140 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5141 ValRange);
5142 } else if (ID == ".amdhsa_shared_vgpr_count") {
5143 if (IVersion.Major < 10)
5144 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5145 SharedVGPRCount = Val;
5146 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5147 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5148 ValRange);
5149 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5150 PARSE_BITS_ENTRY(
5151 KD.compute_pgm_rsrc2,
5152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5153 ValRange);
5154 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5155 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5156 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5157 Val, ValRange);
5158 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5159 PARSE_BITS_ENTRY(
5160 KD.compute_pgm_rsrc2,
5161 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5162 ValRange);
5163 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5164 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5165 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5166 Val, ValRange);
5167 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5168 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5169 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5170 Val, ValRange);
5171 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5172 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5173 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5174 Val, ValRange);
5175 } else if (ID == ".amdhsa_exception_int_div_zero") {
5176 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5177 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5178 Val, ValRange);
5179 } else {
5180 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5183 #undef PARSE_BITS_ENTRY
5186 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5187 return TokError(".amdhsa_next_free_vgpr directive is required");
5189 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5190 return TokError(".amdhsa_next_free_sgpr directive is required");
5192 unsigned VGPRBlocks;
5193 unsigned SGPRBlocks;
5194 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5195 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5196 EnableWavefrontSize32, NextFreeVGPR,
5197 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5198 SGPRBlocks))
5199 return true;
5201 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5202 VGPRBlocks))
5203 return OutOfRangeError(VGPRRange);
5204 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5205 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5207 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5208 SGPRBlocks))
5209 return OutOfRangeError(SGPRRange);
5210 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5211 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5212 SGPRBlocks);
5214 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5215 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5216 "enabled user SGPRs");
5218 unsigned UserSGPRCount =
5219 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5221 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5222 return TokError("too many user SGPRs enabled");
5223 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5224 UserSGPRCount);
5226 if (isGFX90A()) {
5227 if (!Seen.contains(".amdhsa_accum_offset"))
5228 return TokError(".amdhsa_accum_offset directive is required");
5229 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5230 return TokError("accum_offset should be in range [4..256] in "
5231 "increments of 4");
5232 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5233 return TokError("accum_offset exceeds total VGPR allocation");
5234 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5235 (AccumOffset / 4 - 1));
5238 if (IVersion.Major >= 10) {
5239 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5240 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5241 return TokError("shared_vgpr_count directive not valid on "
5242 "wavefront size 32");
5244 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5245 return TokError("shared_vgpr_count*2 + "
5246 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5247 "exceed 63\n");
5251 getTargetStreamer().EmitAmdhsaKernelDescriptor(
5252 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5253 ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
5254 return false;
5257 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5258 uint32_t Major;
5259 uint32_t Minor;
5261 if (ParseDirectiveMajorMinor(Major, Minor))
5262 return true;
5264 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5265 return false;
5268 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5269 uint32_t Major;
5270 uint32_t Minor;
5271 uint32_t Stepping;
5272 StringRef VendorName;
5273 StringRef ArchName;
5275 // If this directive has no arguments, then use the ISA version for the
5276 // targeted GPU.
5277 if (isToken(AsmToken::EndOfStatement)) {
5278 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5279 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5280 ISA.Stepping,
5281 "AMD", "AMDGPU");
5282 return false;
5285 if (ParseDirectiveMajorMinor(Major, Minor))
5286 return true;
5288 if (!trySkipToken(AsmToken::Comma))
5289 return TokError("stepping version number required, comma expected");
5291 if (ParseAsAbsoluteExpression(Stepping))
5292 return TokError("invalid stepping version");
5294 if (!trySkipToken(AsmToken::Comma))
5295 return TokError("vendor name required, comma expected");
5297 if (!parseString(VendorName, "invalid vendor name"))
5298 return true;
5300 if (!trySkipToken(AsmToken::Comma))
5301 return TokError("arch name required, comma expected");
5303 if (!parseString(ArchName, "invalid arch name"))
5304 return true;
5306 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5307 VendorName, ArchName);
5308 return false;
5311 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5312 amd_kernel_code_t &Header) {
5313 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5314 // assembly for backwards compatibility.
5315 if (ID == "max_scratch_backing_memory_byte_size") {
5316 Parser.eatToEndOfStatement();
5317 return false;
5320 SmallString<40> ErrStr;
5321 raw_svector_ostream Err(ErrStr);
5322 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5323 return TokError(Err.str());
5325 Lex();
5327 if (ID == "enable_wavefront_size32") {
5328 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5329 if (!isGFX10Plus())
5330 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5331 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5332 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5333 } else {
5334 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5335 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5339 if (ID == "wavefront_size") {
5340 if (Header.wavefront_size == 5) {
5341 if (!isGFX10Plus())
5342 return TokError("wavefront_size=5 is only allowed on GFX10+");
5343 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5344 return TokError("wavefront_size=5 requires +WavefrontSize32");
5345 } else if (Header.wavefront_size == 6) {
5346 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5347 return TokError("wavefront_size=6 requires +WavefrontSize64");
5351 if (ID == "enable_wgp_mode") {
5352 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5353 !isGFX10Plus())
5354 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5357 if (ID == "enable_mem_ordered") {
5358 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5359 !isGFX10Plus())
5360 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5363 if (ID == "enable_fwd_progress") {
5364 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5365 !isGFX10Plus())
5366 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5369 return false;
5372 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5373 amd_kernel_code_t Header;
5374 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5376 while (true) {
5377 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5378 // will set the current token to EndOfStatement.
5379 while(trySkipToken(AsmToken::EndOfStatement));
5381 StringRef ID;
5382 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5383 return true;
5385 if (ID == ".end_amd_kernel_code_t")
5386 break;
5388 if (ParseAMDKernelCodeTValue(ID, Header))
5389 return true;
5392 getTargetStreamer().EmitAMDKernelCodeT(Header);
5394 return false;
5397 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5398 StringRef KernelName;
5399 if (!parseId(KernelName, "expected symbol name"))
5400 return true;
5402 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5403 ELF::STT_AMDGPU_HSA_KERNEL);
5405 KernelScope.initialize(getContext());
5406 return false;
5409 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5410 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5411 return Error(getLoc(),
5412 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5413 "architectures");
5416 auto TargetIDDirective = getLexer().getTok().getStringContents();
5417 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5418 return Error(getParser().getTok().getLoc(), "target id must match options");
5420 getTargetStreamer().EmitISAVersion();
5421 Lex();
5423 return false;
5426 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5427 const char *AssemblerDirectiveBegin;
5428 const char *AssemblerDirectiveEnd;
5429 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5430 isHsaAbiVersion3AndAbove(&getSTI())
5431 ? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
5432 HSAMD::V3::AssemblerDirectiveEnd)
5433 : std::pair(HSAMD::AssemblerDirectiveBegin,
5434 HSAMD::AssemblerDirectiveEnd);
5436 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5437 return Error(getLoc(),
5438 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5439 "not available on non-amdhsa OSes")).str());
5442 std::string HSAMetadataString;
5443 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5444 HSAMetadataString))
5445 return true;
5447 if (isHsaAbiVersion3AndAbove(&getSTI())) {
5448 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5449 return Error(getLoc(), "invalid HSA metadata");
5450 } else {
5451 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5452 return Error(getLoc(), "invalid HSA metadata");
5455 return false;
5458 /// Common code to parse out a block of text (typically YAML) between start and
5459 /// end directives.
5460 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5461 const char *AssemblerDirectiveEnd,
5462 std::string &CollectString) {
5464 raw_string_ostream CollectStream(CollectString);
5466 getLexer().setSkipSpace(false);
5468 bool FoundEnd = false;
5469 while (!isToken(AsmToken::Eof)) {
5470 while (isToken(AsmToken::Space)) {
5471 CollectStream << getTokenStr();
5472 Lex();
5475 if (trySkipId(AssemblerDirectiveEnd)) {
5476 FoundEnd = true;
5477 break;
5480 CollectStream << Parser.parseStringToEndOfStatement()
5481 << getContext().getAsmInfo()->getSeparatorString();
5483 Parser.eatToEndOfStatement();
5486 getLexer().setSkipSpace(true);
5488 if (isToken(AsmToken::Eof) && !FoundEnd) {
5489 return TokError(Twine("expected directive ") +
5490 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5493 CollectStream.flush();
5494 return false;
5497 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5498 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5499 std::string String;
5500 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5501 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5502 return true;
5504 auto PALMetadata = getTargetStreamer().getPALMetadata();
5505 if (!PALMetadata->setFromString(String))
5506 return Error(getLoc(), "invalid PAL metadata");
5507 return false;
5510 /// Parse the assembler directive for old linear-format PAL metadata.
5511 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5512 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5513 return Error(getLoc(),
5514 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5515 "not available on non-amdpal OSes")).str());
5518 auto PALMetadata = getTargetStreamer().getPALMetadata();
5519 PALMetadata->setLegacy();
5520 for (;;) {
5521 uint32_t Key, Value;
5522 if (ParseAsAbsoluteExpression(Key)) {
5523 return TokError(Twine("invalid value in ") +
5524 Twine(PALMD::AssemblerDirective));
5526 if (!trySkipToken(AsmToken::Comma)) {
5527 return TokError(Twine("expected an even number of values in ") +
5528 Twine(PALMD::AssemblerDirective));
5530 if (ParseAsAbsoluteExpression(Value)) {
5531 return TokError(Twine("invalid value in ") +
5532 Twine(PALMD::AssemblerDirective));
5534 PALMetadata->setRegister(Key, Value);
5535 if (!trySkipToken(AsmToken::Comma))
5536 break;
5538 return false;
5541 /// ParseDirectiveAMDGPULDS
5542 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5543 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5544 if (getParser().checkForValidSection())
5545 return true;
5547 StringRef Name;
5548 SMLoc NameLoc = getLoc();
5549 if (getParser().parseIdentifier(Name))
5550 return TokError("expected identifier in directive");
5552 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5553 if (getParser().parseComma())
5554 return true;
5556 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5558 int64_t Size;
5559 SMLoc SizeLoc = getLoc();
5560 if (getParser().parseAbsoluteExpression(Size))
5561 return true;
5562 if (Size < 0)
5563 return Error(SizeLoc, "size must be non-negative");
5564 if (Size > LocalMemorySize)
5565 return Error(SizeLoc, "size is too large");
5567 int64_t Alignment = 4;
5568 if (trySkipToken(AsmToken::Comma)) {
5569 SMLoc AlignLoc = getLoc();
5570 if (getParser().parseAbsoluteExpression(Alignment))
5571 return true;
5572 if (Alignment < 0 || !isPowerOf2_64(Alignment))
5573 return Error(AlignLoc, "alignment must be a power of two");
5575 // Alignment larger than the size of LDS is possible in theory, as long
5576 // as the linker manages to place to symbol at address 0, but we do want
5577 // to make sure the alignment fits nicely into a 32-bit integer.
5578 if (Alignment >= 1u << 31)
5579 return Error(AlignLoc, "alignment is too large");
5582 if (parseEOL())
5583 return true;
5585 Symbol->redefineIfPossible();
5586 if (!Symbol->isUndefined())
5587 return Error(NameLoc, "invalid symbol redefinition");
5589 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5590 return false;
5593 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5594 StringRef IDVal = DirectiveID.getString();
5596 if (isHsaAbiVersion3AndAbove(&getSTI())) {
5597 if (IDVal == ".amdhsa_kernel")
5598 return ParseDirectiveAMDHSAKernel();
5600 // TODO: Restructure/combine with PAL metadata directive.
5601 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5602 return ParseDirectiveHSAMetadata();
5603 } else {
5604 if (IDVal == ".hsa_code_object_version")
5605 return ParseDirectiveHSACodeObjectVersion();
5607 if (IDVal == ".hsa_code_object_isa")
5608 return ParseDirectiveHSACodeObjectISA();
5610 if (IDVal == ".amd_kernel_code_t")
5611 return ParseDirectiveAMDKernelCodeT();
5613 if (IDVal == ".amdgpu_hsa_kernel")
5614 return ParseDirectiveAMDGPUHsaKernel();
5616 if (IDVal == ".amd_amdgpu_isa")
5617 return ParseDirectiveISAVersion();
5619 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5620 return ParseDirectiveHSAMetadata();
5623 if (IDVal == ".amdgcn_target")
5624 return ParseDirectiveAMDGCNTarget();
5626 if (IDVal == ".amdgpu_lds")
5627 return ParseDirectiveAMDGPULDS();
5629 if (IDVal == PALMD::AssemblerDirectiveBegin)
5630 return ParseDirectivePALMetadataBegin();
5632 if (IDVal == PALMD::AssemblerDirective)
5633 return ParseDirectivePALMetadata();
5635 return true;
5638 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5639 unsigned RegNo) {
5641 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5642 return isGFX9Plus();
5644 // GFX10+ has 2 more SGPRs 104 and 105.
5645 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5646 return hasSGPR104_SGPR105();
5648 switch (RegNo) {
5649 case AMDGPU::SRC_SHARED_BASE_LO:
5650 case AMDGPU::SRC_SHARED_BASE:
5651 case AMDGPU::SRC_SHARED_LIMIT_LO:
5652 case AMDGPU::SRC_SHARED_LIMIT:
5653 case AMDGPU::SRC_PRIVATE_BASE_LO:
5654 case AMDGPU::SRC_PRIVATE_BASE:
5655 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5656 case AMDGPU::SRC_PRIVATE_LIMIT:
5657 return isGFX9Plus();
5658 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5659 return isGFX9Plus() && !isGFX11Plus();
5660 case AMDGPU::TBA:
5661 case AMDGPU::TBA_LO:
5662 case AMDGPU::TBA_HI:
5663 case AMDGPU::TMA:
5664 case AMDGPU::TMA_LO:
5665 case AMDGPU::TMA_HI:
5666 return !isGFX9Plus();
5667 case AMDGPU::XNACK_MASK:
5668 case AMDGPU::XNACK_MASK_LO:
5669 case AMDGPU::XNACK_MASK_HI:
5670 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5671 case AMDGPU::SGPR_NULL:
5672 return isGFX10Plus();
5673 default:
5674 break;
5677 if (isCI())
5678 return true;
5680 if (isSI() || isGFX10Plus()) {
5681 // No flat_scr on SI.
5682 // On GFX10Plus flat scratch is not a valid register operand and can only be
5683 // accessed with s_setreg/s_getreg.
5684 switch (RegNo) {
5685 case AMDGPU::FLAT_SCR:
5686 case AMDGPU::FLAT_SCR_LO:
5687 case AMDGPU::FLAT_SCR_HI:
5688 return false;
5689 default:
5690 return true;
5694 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5695 // SI/CI have.
5696 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5697 return hasSGPR102_SGPR103();
5699 return true;
5702 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
5703 StringRef Mnemonic,
5704 OperandMode Mode) {
5705 ParseStatus Res = parseVOPD(Operands);
5706 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5707 return Res;
5709 // Try to parse with a custom parser
5710 Res = MatchOperandParserImpl(Operands, Mnemonic);
5712 // If we successfully parsed the operand or if there as an error parsing,
5713 // we are done.
5715 // If we are parsing after we reach EndOfStatement then this means we
5716 // are appending default values to the Operands list. This is only done
5717 // by custom parser, so we shouldn't continue on to the generic parsing.
5718 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5719 return Res;
5721 SMLoc RBraceLoc;
5722 SMLoc LBraceLoc = getLoc();
5723 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5724 unsigned Prefix = Operands.size();
5726 for (;;) {
5727 auto Loc = getLoc();
5728 Res = parseReg(Operands);
5729 if (Res.isNoMatch())
5730 Error(Loc, "expected a register");
5731 if (!Res.isSuccess())
5732 return ParseStatus::Failure;
5734 RBraceLoc = getLoc();
5735 if (trySkipToken(AsmToken::RBrac))
5736 break;
5738 if (!skipToken(AsmToken::Comma,
5739 "expected a comma or a closing square bracket"))
5740 return ParseStatus::Failure;
5743 if (Operands.size() - Prefix > 1) {
5744 Operands.insert(Operands.begin() + Prefix,
5745 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5746 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5749 return ParseStatus::Success;
5752 return parseRegOrImm(Operands);
5755 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5756 // Clear any forced encodings from the previous instruction.
5757 setForcedEncodingSize(0);
5758 setForcedDPP(false);
5759 setForcedSDWA(false);
5761 if (Name.endswith("_e64_dpp")) {
5762 setForcedDPP(true);
5763 setForcedEncodingSize(64);
5764 return Name.substr(0, Name.size() - 8);
5765 } else if (Name.endswith("_e64")) {
5766 setForcedEncodingSize(64);
5767 return Name.substr(0, Name.size() - 4);
5768 } else if (Name.endswith("_e32")) {
5769 setForcedEncodingSize(32);
5770 return Name.substr(0, Name.size() - 4);
5771 } else if (Name.endswith("_dpp")) {
5772 setForcedDPP(true);
5773 return Name.substr(0, Name.size() - 4);
5774 } else if (Name.endswith("_sdwa")) {
5775 setForcedSDWA(true);
5776 return Name.substr(0, Name.size() - 5);
5778 return Name;
5781 static void applyMnemonicAliases(StringRef &Mnemonic,
5782 const FeatureBitset &Features,
5783 unsigned VariantID);
5785 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5786 StringRef Name,
5787 SMLoc NameLoc, OperandVector &Operands) {
5788 // Add the instruction mnemonic
5789 Name = parseMnemonicSuffix(Name);
5791 // If the target architecture uses MnemonicAlias, call it here to parse
5792 // operands correctly.
5793 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5795 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5797 bool IsMIMG = Name.startswith("image_");
5799 while (!trySkipToken(AsmToken::EndOfStatement)) {
5800 OperandMode Mode = OperandMode_Default;
5801 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5802 Mode = OperandMode_NSA;
5803 ParseStatus Res = parseOperand(Operands, Name, Mode);
5805 if (!Res.isSuccess()) {
5806 checkUnsupportedInstruction(Name, NameLoc);
5807 if (!Parser.hasPendingError()) {
5808 // FIXME: use real operand location rather than the current location.
5809 StringRef Msg = Res.isFailure() ? "failed parsing operand."
5810 : "not a valid operand.";
5811 Error(getLoc(), Msg);
5813 while (!trySkipToken(AsmToken::EndOfStatement)) {
5814 lex();
5816 return true;
5819 // Eat the comma or space if there is one.
5820 trySkipToken(AsmToken::Comma);
5823 return false;
5826 //===----------------------------------------------------------------------===//
5827 // Utility functions
5828 //===----------------------------------------------------------------------===//
5830 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
5831 OperandVector &Operands) {
5832 SMLoc S = getLoc();
5833 if (!trySkipId(Name))
5834 return ParseStatus::NoMatch;
5836 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
5837 return ParseStatus::Success;
5840 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
5841 int64_t &IntVal) {
5843 if (!trySkipId(Prefix, AsmToken::Colon))
5844 return ParseStatus::NoMatch;
5846 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
5849 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
5850 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5851 std::function<bool(int64_t &)> ConvertResult) {
5852 SMLoc S = getLoc();
5853 int64_t Value = 0;
5855 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
5856 if (!Res.isSuccess())
5857 return Res;
5859 if (ConvertResult && !ConvertResult(Value)) {
5860 Error(S, "invalid " + StringRef(Prefix) + " value.");
5863 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5864 return ParseStatus::Success;
5867 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
5868 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5869 bool (*ConvertResult)(int64_t &)) {
5870 SMLoc S = getLoc();
5871 if (!trySkipId(Prefix, AsmToken::Colon))
5872 return ParseStatus::NoMatch;
5874 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5875 return ParseStatus::Failure;
5877 unsigned Val = 0;
5878 const unsigned MaxSize = 4;
5880 // FIXME: How to verify the number of elements matches the number of src
5881 // operands?
5882 for (int I = 0; ; ++I) {
5883 int64_t Op;
5884 SMLoc Loc = getLoc();
5885 if (!parseExpr(Op))
5886 return ParseStatus::Failure;
5888 if (Op != 0 && Op != 1)
5889 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5891 Val |= (Op << I);
5893 if (trySkipToken(AsmToken::RBrac))
5894 break;
5896 if (I + 1 == MaxSize)
5897 return Error(getLoc(), "expected a closing square bracket");
5899 if (!skipToken(AsmToken::Comma, "expected a comma"))
5900 return ParseStatus::Failure;
5903 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5904 return ParseStatus::Success;
5907 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
5908 OperandVector &Operands,
5909 AMDGPUOperand::ImmTy ImmTy) {
5910 int64_t Bit;
5911 SMLoc S = getLoc();
5913 if (trySkipId(Name)) {
5914 Bit = 1;
5915 } else if (trySkipId("no", Name)) {
5916 Bit = 0;
5917 } else {
5918 return ParseStatus::NoMatch;
5921 if (Name == "r128" && !hasMIMG_R128())
5922 return Error(S, "r128 modifier is not supported on this GPU");
5923 if (Name == "a16" && !hasA16())
5924 return Error(S, "a16 modifier is not supported on this GPU");
5926 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5927 ImmTy = AMDGPUOperand::ImmTyR128A16;
5929 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5930 return ParseStatus::Success;
5933 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
5934 bool &Disabling) const {
5935 Disabling = Id.consume_front("no");
5937 if (isGFX940() && !Mnemo.startswith("s_")) {
5938 return StringSwitch<unsigned>(Id)
5939 .Case("nt", AMDGPU::CPol::NT)
5940 .Case("sc0", AMDGPU::CPol::SC0)
5941 .Case("sc1", AMDGPU::CPol::SC1)
5942 .Default(0);
5945 return StringSwitch<unsigned>(Id)
5946 .Case("dlc", AMDGPU::CPol::DLC)
5947 .Case("glc", AMDGPU::CPol::GLC)
5948 .Case("scc", AMDGPU::CPol::SCC)
5949 .Case("slc", AMDGPU::CPol::SLC)
5950 .Default(0);
5953 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5954 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5955 SMLoc OpLoc = getLoc();
5956 unsigned Enabled = 0, Seen = 0;
5957 for (;;) {
5958 SMLoc S = getLoc();
5959 bool Disabling;
5960 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
5961 if (!CPol)
5962 break;
5964 lex();
5966 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
5967 return Error(S, "dlc modifier is not supported on this GPU");
5969 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
5970 return Error(S, "scc modifier is not supported on this GPU");
5972 if (Seen & CPol)
5973 return Error(S, "duplicate cache policy modifier");
5975 if (!Disabling)
5976 Enabled |= CPol;
5978 Seen |= CPol;
5981 if (!Seen)
5982 return ParseStatus::NoMatch;
5984 Operands.push_back(
5985 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
5986 return ParseStatus::Success;
5989 static void addOptionalImmOperand(
5990 MCInst& Inst, const OperandVector& Operands,
5991 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5992 AMDGPUOperand::ImmTy ImmT,
5993 int64_t Default = 0) {
5994 auto i = OptionalIdx.find(ImmT);
5995 if (i != OptionalIdx.end()) {
5996 unsigned Idx = i->second;
5997 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5998 } else {
5999 Inst.addOperand(MCOperand::createImm(Default));
6003 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6004 StringRef &Value,
6005 SMLoc &StringLoc) {
6006 if (!trySkipId(Prefix, AsmToken::Colon))
6007 return ParseStatus::NoMatch;
6009 StringLoc = getLoc();
6010 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6011 : ParseStatus::Failure;
6014 //===----------------------------------------------------------------------===//
6015 // MTBUF format
6016 //===----------------------------------------------------------------------===//
6018 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6019 int64_t MaxVal,
6020 int64_t &Fmt) {
6021 int64_t Val;
6022 SMLoc Loc = getLoc();
6024 auto Res = parseIntWithPrefix(Pref, Val);
6025 if (Res.isFailure())
6026 return false;
6027 if (Res.isNoMatch())
6028 return true;
6030 if (Val < 0 || Val > MaxVal) {
6031 Error(Loc, Twine("out of range ", StringRef(Pref)));
6032 return false;
6035 Fmt = Val;
6036 return true;
6039 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6040 // values to live in a joint format operand in the MCInst encoding.
6041 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6042 using namespace llvm::AMDGPU::MTBUFFormat;
6044 int64_t Dfmt = DFMT_UNDEF;
6045 int64_t Nfmt = NFMT_UNDEF;
6047 // dfmt and nfmt can appear in either order, and each is optional.
6048 for (int I = 0; I < 2; ++I) {
6049 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6050 return ParseStatus::Failure;
6052 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6053 return ParseStatus::Failure;
6055 // Skip optional comma between dfmt/nfmt
6056 // but guard against 2 commas following each other.
6057 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6058 !peekToken().is(AsmToken::Comma)) {
6059 trySkipToken(AsmToken::Comma);
6063 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6064 return ParseStatus::NoMatch;
6066 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6067 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6069 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6070 return ParseStatus::Success;
6073 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6074 using namespace llvm::AMDGPU::MTBUFFormat;
6076 int64_t Fmt = UFMT_UNDEF;
6078 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6079 return ParseStatus::Failure;
6081 if (Fmt == UFMT_UNDEF)
6082 return ParseStatus::NoMatch;
6084 Format = Fmt;
6085 return ParseStatus::Success;
6088 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6089 int64_t &Nfmt,
6090 StringRef FormatStr,
6091 SMLoc Loc) {
6092 using namespace llvm::AMDGPU::MTBUFFormat;
6093 int64_t Format;
6095 Format = getDfmt(FormatStr);
6096 if (Format != DFMT_UNDEF) {
6097 Dfmt = Format;
6098 return true;
6101 Format = getNfmt(FormatStr, getSTI());
6102 if (Format != NFMT_UNDEF) {
6103 Nfmt = Format;
6104 return true;
6107 Error(Loc, "unsupported format");
6108 return false;
6111 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6112 SMLoc FormatLoc,
6113 int64_t &Format) {
6114 using namespace llvm::AMDGPU::MTBUFFormat;
6116 int64_t Dfmt = DFMT_UNDEF;
6117 int64_t Nfmt = NFMT_UNDEF;
6118 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6119 return ParseStatus::Failure;
6121 if (trySkipToken(AsmToken::Comma)) {
6122 StringRef Str;
6123 SMLoc Loc = getLoc();
6124 if (!parseId(Str, "expected a format string") ||
6125 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6126 return ParseStatus::Failure;
6127 if (Dfmt == DFMT_UNDEF)
6128 return Error(Loc, "duplicate numeric format");
6129 if (Nfmt == NFMT_UNDEF)
6130 return Error(Loc, "duplicate data format");
6133 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6134 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6136 if (isGFX10Plus()) {
6137 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6138 if (Ufmt == UFMT_UNDEF)
6139 return Error(FormatLoc, "unsupported format");
6140 Format = Ufmt;
6141 } else {
6142 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6145 return ParseStatus::Success;
6148 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6149 SMLoc Loc,
6150 int64_t &Format) {
6151 using namespace llvm::AMDGPU::MTBUFFormat;
6153 auto Id = getUnifiedFormat(FormatStr, getSTI());
6154 if (Id == UFMT_UNDEF)
6155 return ParseStatus::NoMatch;
6157 if (!isGFX10Plus())
6158 return Error(Loc, "unified format is not supported on this GPU");
6160 Format = Id;
6161 return ParseStatus::Success;
6164 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6165 using namespace llvm::AMDGPU::MTBUFFormat;
6166 SMLoc Loc = getLoc();
6168 if (!parseExpr(Format))
6169 return ParseStatus::Failure;
6170 if (!isValidFormatEncoding(Format, getSTI()))
6171 return Error(Loc, "out of range format");
6173 return ParseStatus::Success;
6176 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6177 using namespace llvm::AMDGPU::MTBUFFormat;
6179 if (!trySkipId("format", AsmToken::Colon))
6180 return ParseStatus::NoMatch;
6182 if (trySkipToken(AsmToken::LBrac)) {
6183 StringRef FormatStr;
6184 SMLoc Loc = getLoc();
6185 if (!parseId(FormatStr, "expected a format string"))
6186 return ParseStatus::Failure;
6188 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6189 if (Res.isNoMatch())
6190 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6191 if (!Res.isSuccess())
6192 return Res;
6194 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6195 return ParseStatus::Failure;
6197 return ParseStatus::Success;
6200 return parseNumericFormat(Format);
6203 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6204 using namespace llvm::AMDGPU::MTBUFFormat;
6206 int64_t Format = getDefaultFormatEncoding(getSTI());
6207 ParseStatus Res;
6208 SMLoc Loc = getLoc();
6210 // Parse legacy format syntax.
6211 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6212 if (Res.isFailure())
6213 return Res;
6215 bool FormatFound = Res.isSuccess();
6217 Operands.push_back(
6218 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6220 if (FormatFound)
6221 trySkipToken(AsmToken::Comma);
6223 if (isToken(AsmToken::EndOfStatement)) {
6224 // We are expecting an soffset operand,
6225 // but let matcher handle the error.
6226 return ParseStatus::Success;
6229 // Parse soffset.
6230 Res = parseRegOrImm(Operands);
6231 if (!Res.isSuccess())
6232 return Res;
6234 trySkipToken(AsmToken::Comma);
6236 if (!FormatFound) {
6237 Res = parseSymbolicOrNumericFormat(Format);
6238 if (Res.isFailure())
6239 return Res;
6240 if (Res.isSuccess()) {
6241 auto Size = Operands.size();
6242 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6243 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6244 Op.setImm(Format);
6246 return ParseStatus::Success;
6249 if (isId("format") && peekToken().is(AsmToken::Colon))
6250 return Error(getLoc(), "duplicate format");
6251 return ParseStatus::Success;
6254 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6255 ParseStatus Res =
6256 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6257 if (Res.isNoMatch()) {
6258 Res = parseIntWithPrefix("inst_offset", Operands,
6259 AMDGPUOperand::ImmTyInstOffset);
6261 return Res;
6264 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6265 ParseStatus Res =
6266 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6267 if (Res.isNoMatch())
6268 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6269 return Res;
6272 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6273 ParseStatus Res =
6274 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6275 if (Res.isNoMatch()) {
6276 Res =
6277 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6279 return Res;
6282 //===----------------------------------------------------------------------===//
6283 // ds
6284 //===----------------------------------------------------------------------===//
6286 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6287 const OperandVector &Operands) {
6288 OptionalImmIndexMap OptionalIdx;
6290 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6291 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6293 // Add the register arguments
6294 if (Op.isReg()) {
6295 Op.addRegOperands(Inst, 1);
6296 continue;
6299 // Handle optional arguments
6300 OptionalIdx[Op.getImmTy()] = i;
6303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6304 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6307 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6310 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6311 bool IsGdsHardcoded) {
6312 OptionalImmIndexMap OptionalIdx;
6313 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6314 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6316 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6317 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6319 auto TiedTo =
6320 Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO);
6322 if (TiedTo != -1) {
6323 assert((unsigned)TiedTo < Inst.getNumOperands());
6324 Inst.addOperand(Inst.getOperand(TiedTo));
6327 // Add the register arguments
6328 if (Op.isReg()) {
6329 Op.addRegOperands(Inst, 1);
6330 continue;
6333 if (Op.isToken() && Op.getToken() == "gds") {
6334 IsGdsHardcoded = true;
6335 continue;
6338 // Handle optional arguments
6339 OptionalIdx[Op.getImmTy()] = i;
6341 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6342 OffsetType = AMDGPUOperand::ImmTySwizzle;
6345 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6347 if (!IsGdsHardcoded) {
6348 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6350 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6353 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6354 OptionalImmIndexMap OptionalIdx;
6356 unsigned OperandIdx[4];
6357 unsigned EnMask = 0;
6358 int SrcIdx = 0;
6360 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6361 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6363 // Add the register arguments
6364 if (Op.isReg()) {
6365 assert(SrcIdx < 4);
6366 OperandIdx[SrcIdx] = Inst.size();
6367 Op.addRegOperands(Inst, 1);
6368 ++SrcIdx;
6369 continue;
6372 if (Op.isOff()) {
6373 assert(SrcIdx < 4);
6374 OperandIdx[SrcIdx] = Inst.size();
6375 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6376 ++SrcIdx;
6377 continue;
6380 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6381 Op.addImmOperands(Inst, 1);
6382 continue;
6385 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6386 continue;
6388 // Handle optional arguments
6389 OptionalIdx[Op.getImmTy()] = i;
6392 assert(SrcIdx == 4);
6394 bool Compr = false;
6395 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6396 Compr = true;
6397 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6398 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6399 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6402 for (auto i = 0; i < SrcIdx; ++i) {
6403 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6404 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6408 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6411 Inst.addOperand(MCOperand::createImm(EnMask));
6414 //===----------------------------------------------------------------------===//
6415 // s_waitcnt
6416 //===----------------------------------------------------------------------===//
6418 static bool
6419 encodeCnt(
6420 const AMDGPU::IsaVersion ISA,
6421 int64_t &IntVal,
6422 int64_t CntVal,
6423 bool Saturate,
6424 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6425 unsigned (*decode)(const IsaVersion &Version, unsigned))
6427 bool Failed = false;
6429 IntVal = encode(ISA, IntVal, CntVal);
6430 if (CntVal != decode(ISA, IntVal)) {
6431 if (Saturate) {
6432 IntVal = encode(ISA, IntVal, -1);
6433 } else {
6434 Failed = true;
6437 return Failed;
6440 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6442 SMLoc CntLoc = getLoc();
6443 StringRef CntName = getTokenStr();
6445 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6446 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6447 return false;
6449 int64_t CntVal;
6450 SMLoc ValLoc = getLoc();
6451 if (!parseExpr(CntVal))
6452 return false;
6454 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6456 bool Failed = true;
6457 bool Sat = CntName.endswith("_sat");
6459 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6460 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6461 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6462 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6463 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6464 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6465 } else {
6466 Error(CntLoc, "invalid counter name " + CntName);
6467 return false;
6470 if (Failed) {
6471 Error(ValLoc, "too large value for " + CntName);
6472 return false;
6475 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6476 return false;
6478 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6479 if (isToken(AsmToken::EndOfStatement)) {
6480 Error(getLoc(), "expected a counter name");
6481 return false;
6485 return true;
6488 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6489 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6490 int64_t Waitcnt = getWaitcntBitMask(ISA);
6491 SMLoc S = getLoc();
6493 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6494 while (!isToken(AsmToken::EndOfStatement)) {
6495 if (!parseCnt(Waitcnt))
6496 return ParseStatus::Failure;
6498 } else {
6499 if (!parseExpr(Waitcnt))
6500 return ParseStatus::Failure;
6503 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6504 return ParseStatus::Success;
6507 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6508 SMLoc FieldLoc = getLoc();
6509 StringRef FieldName = getTokenStr();
6510 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6511 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6512 return false;
6514 SMLoc ValueLoc = getLoc();
6515 StringRef ValueName = getTokenStr();
6516 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6517 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6518 return false;
6520 unsigned Shift;
6521 if (FieldName == "instid0") {
6522 Shift = 0;
6523 } else if (FieldName == "instskip") {
6524 Shift = 4;
6525 } else if (FieldName == "instid1") {
6526 Shift = 7;
6527 } else {
6528 Error(FieldLoc, "invalid field name " + FieldName);
6529 return false;
6532 int Value;
6533 if (Shift == 4) {
6534 // Parse values for instskip.
6535 Value = StringSwitch<int>(ValueName)
6536 .Case("SAME", 0)
6537 .Case("NEXT", 1)
6538 .Case("SKIP_1", 2)
6539 .Case("SKIP_2", 3)
6540 .Case("SKIP_3", 4)
6541 .Case("SKIP_4", 5)
6542 .Default(-1);
6543 } else {
6544 // Parse values for instid0 and instid1.
6545 Value = StringSwitch<int>(ValueName)
6546 .Case("NO_DEP", 0)
6547 .Case("VALU_DEP_1", 1)
6548 .Case("VALU_DEP_2", 2)
6549 .Case("VALU_DEP_3", 3)
6550 .Case("VALU_DEP_4", 4)
6551 .Case("TRANS32_DEP_1", 5)
6552 .Case("TRANS32_DEP_2", 6)
6553 .Case("TRANS32_DEP_3", 7)
6554 .Case("FMA_ACCUM_CYCLE_1", 8)
6555 .Case("SALU_CYCLE_1", 9)
6556 .Case("SALU_CYCLE_2", 10)
6557 .Case("SALU_CYCLE_3", 11)
6558 .Default(-1);
6560 if (Value < 0) {
6561 Error(ValueLoc, "invalid value name " + ValueName);
6562 return false;
6565 Delay |= Value << Shift;
6566 return true;
6569 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6570 int64_t Delay = 0;
6571 SMLoc S = getLoc();
6573 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6574 do {
6575 if (!parseDelay(Delay))
6576 return ParseStatus::Failure;
6577 } while (trySkipToken(AsmToken::Pipe));
6578 } else {
6579 if (!parseExpr(Delay))
6580 return ParseStatus::Failure;
6583 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6584 return ParseStatus::Success;
6587 bool
6588 AMDGPUOperand::isSWaitCnt() const {
6589 return isImm();
6592 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6594 //===----------------------------------------------------------------------===//
6595 // DepCtr
6596 //===----------------------------------------------------------------------===//
6598 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6599 StringRef DepCtrName) {
6600 switch (ErrorId) {
6601 case OPR_ID_UNKNOWN:
6602 Error(Loc, Twine("invalid counter name ", DepCtrName));
6603 return;
6604 case OPR_ID_UNSUPPORTED:
6605 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6606 return;
6607 case OPR_ID_DUPLICATE:
6608 Error(Loc, Twine("duplicate counter name ", DepCtrName));
6609 return;
6610 case OPR_VAL_INVALID:
6611 Error(Loc, Twine("invalid value for ", DepCtrName));
6612 return;
6613 default:
6614 assert(false);
6618 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6620 using namespace llvm::AMDGPU::DepCtr;
6622 SMLoc DepCtrLoc = getLoc();
6623 StringRef DepCtrName = getTokenStr();
6625 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6626 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6627 return false;
6629 int64_t ExprVal;
6630 if (!parseExpr(ExprVal))
6631 return false;
6633 unsigned PrevOprMask = UsedOprMask;
6634 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6636 if (CntVal < 0) {
6637 depCtrError(DepCtrLoc, CntVal, DepCtrName);
6638 return false;
6641 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6642 return false;
6644 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6645 if (isToken(AsmToken::EndOfStatement)) {
6646 Error(getLoc(), "expected a counter name");
6647 return false;
6651 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6652 DepCtr = (DepCtr & ~CntValMask) | CntVal;
6653 return true;
6656 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
6657 using namespace llvm::AMDGPU::DepCtr;
6659 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6660 SMLoc Loc = getLoc();
6662 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6663 unsigned UsedOprMask = 0;
6664 while (!isToken(AsmToken::EndOfStatement)) {
6665 if (!parseDepCtr(DepCtr, UsedOprMask))
6666 return ParseStatus::Failure;
6668 } else {
6669 if (!parseExpr(DepCtr))
6670 return ParseStatus::Failure;
6673 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6674 return ParseStatus::Success;
6677 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6679 //===----------------------------------------------------------------------===//
6680 // hwreg
6681 //===----------------------------------------------------------------------===//
6683 bool
6684 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6685 OperandInfoTy &Offset,
6686 OperandInfoTy &Width) {
6687 using namespace llvm::AMDGPU::Hwreg;
6689 // The register may be specified by name or using a numeric code
6690 HwReg.Loc = getLoc();
6691 if (isToken(AsmToken::Identifier) &&
6692 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6693 HwReg.IsSymbolic = true;
6694 lex(); // skip register name
6695 } else if (!parseExpr(HwReg.Id, "a register name")) {
6696 return false;
6699 if (trySkipToken(AsmToken::RParen))
6700 return true;
6702 // parse optional params
6703 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6704 return false;
6706 Offset.Loc = getLoc();
6707 if (!parseExpr(Offset.Id))
6708 return false;
6710 if (!skipToken(AsmToken::Comma, "expected a comma"))
6711 return false;
6713 Width.Loc = getLoc();
6714 return parseExpr(Width.Id) &&
6715 skipToken(AsmToken::RParen, "expected a closing parenthesis");
6718 bool
6719 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6720 const OperandInfoTy &Offset,
6721 const OperandInfoTy &Width) {
6723 using namespace llvm::AMDGPU::Hwreg;
6725 if (HwReg.IsSymbolic) {
6726 if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6727 Error(HwReg.Loc,
6728 "specified hardware register is not supported on this GPU");
6729 return false;
6731 } else {
6732 if (!isValidHwreg(HwReg.Id)) {
6733 Error(HwReg.Loc,
6734 "invalid code of hardware register: only 6-bit values are legal");
6735 return false;
6738 if (!isValidHwregOffset(Offset.Id)) {
6739 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6740 return false;
6742 if (!isValidHwregWidth(Width.Id)) {
6743 Error(Width.Loc,
6744 "invalid bitfield width: only values from 1 to 32 are legal");
6745 return false;
6747 return true;
6750 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6751 using namespace llvm::AMDGPU::Hwreg;
6753 int64_t ImmVal = 0;
6754 SMLoc Loc = getLoc();
6756 if (trySkipId("hwreg", AsmToken::LParen)) {
6757 OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6758 OperandInfoTy Offset(OFFSET_DEFAULT_);
6759 OperandInfoTy Width(WIDTH_DEFAULT_);
6760 if (parseHwregBody(HwReg, Offset, Width) &&
6761 validateHwreg(HwReg, Offset, Width)) {
6762 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6763 } else {
6764 return ParseStatus::Failure;
6766 } else if (parseExpr(ImmVal, "a hwreg macro")) {
6767 if (ImmVal < 0 || !isUInt<16>(ImmVal))
6768 return Error(Loc, "invalid immediate: only 16-bit values are legal");
6769 } else {
6770 return ParseStatus::Failure;
6773 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6774 return ParseStatus::Success;
6777 bool AMDGPUOperand::isHwreg() const {
6778 return isImmTy(ImmTyHwreg);
6781 //===----------------------------------------------------------------------===//
6782 // sendmsg
6783 //===----------------------------------------------------------------------===//
6785 bool
6786 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6787 OperandInfoTy &Op,
6788 OperandInfoTy &Stream) {
6789 using namespace llvm::AMDGPU::SendMsg;
6791 Msg.Loc = getLoc();
6792 if (isToken(AsmToken::Identifier) &&
6793 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6794 Msg.IsSymbolic = true;
6795 lex(); // skip message name
6796 } else if (!parseExpr(Msg.Id, "a message name")) {
6797 return false;
6800 if (trySkipToken(AsmToken::Comma)) {
6801 Op.IsDefined = true;
6802 Op.Loc = getLoc();
6803 if (isToken(AsmToken::Identifier) &&
6804 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6805 lex(); // skip operation name
6806 } else if (!parseExpr(Op.Id, "an operation name")) {
6807 return false;
6810 if (trySkipToken(AsmToken::Comma)) {
6811 Stream.IsDefined = true;
6812 Stream.Loc = getLoc();
6813 if (!parseExpr(Stream.Id))
6814 return false;
6818 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6821 bool
6822 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6823 const OperandInfoTy &Op,
6824 const OperandInfoTy &Stream) {
6825 using namespace llvm::AMDGPU::SendMsg;
6827 // Validation strictness depends on whether message is specified
6828 // in a symbolic or in a numeric form. In the latter case
6829 // only encoding possibility is checked.
6830 bool Strict = Msg.IsSymbolic;
6832 if (Strict) {
6833 if (Msg.Id == OPR_ID_UNSUPPORTED) {
6834 Error(Msg.Loc, "specified message id is not supported on this GPU");
6835 return false;
6837 } else {
6838 if (!isValidMsgId(Msg.Id, getSTI())) {
6839 Error(Msg.Loc, "invalid message id");
6840 return false;
6843 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6844 if (Op.IsDefined) {
6845 Error(Op.Loc, "message does not support operations");
6846 } else {
6847 Error(Msg.Loc, "missing message operation");
6849 return false;
6851 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6852 Error(Op.Loc, "invalid operation id");
6853 return false;
6855 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6856 Stream.IsDefined) {
6857 Error(Stream.Loc, "message operation does not support streams");
6858 return false;
6860 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6861 Error(Stream.Loc, "invalid message stream id");
6862 return false;
6864 return true;
6867 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
6868 using namespace llvm::AMDGPU::SendMsg;
6870 int64_t ImmVal = 0;
6871 SMLoc Loc = getLoc();
6873 if (trySkipId("sendmsg", AsmToken::LParen)) {
6874 OperandInfoTy Msg(OPR_ID_UNKNOWN);
6875 OperandInfoTy Op(OP_NONE_);
6876 OperandInfoTy Stream(STREAM_ID_NONE_);
6877 if (parseSendMsgBody(Msg, Op, Stream) &&
6878 validateSendMsg(Msg, Op, Stream)) {
6879 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6880 } else {
6881 return ParseStatus::Failure;
6883 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6884 if (ImmVal < 0 || !isUInt<16>(ImmVal))
6885 return Error(Loc, "invalid immediate: only 16-bit values are legal");
6886 } else {
6887 return ParseStatus::Failure;
6890 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6891 return ParseStatus::Success;
6894 bool AMDGPUOperand::isSendMsg() const {
6895 return isImmTy(ImmTySendMsg);
6898 //===----------------------------------------------------------------------===//
6899 // v_interp
6900 //===----------------------------------------------------------------------===//
6902 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6903 StringRef Str;
6904 SMLoc S = getLoc();
6906 if (!parseId(Str))
6907 return ParseStatus::NoMatch;
6909 int Slot = StringSwitch<int>(Str)
6910 .Case("p10", 0)
6911 .Case("p20", 1)
6912 .Case("p0", 2)
6913 .Default(-1);
6915 if (Slot == -1)
6916 return Error(S, "invalid interpolation slot");
6918 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6919 AMDGPUOperand::ImmTyInterpSlot));
6920 return ParseStatus::Success;
6923 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6924 StringRef Str;
6925 SMLoc S = getLoc();
6927 if (!parseId(Str))
6928 return ParseStatus::NoMatch;
6930 if (!Str.startswith("attr"))
6931 return Error(S, "invalid interpolation attribute");
6933 StringRef Chan = Str.take_back(2);
6934 int AttrChan = StringSwitch<int>(Chan)
6935 .Case(".x", 0)
6936 .Case(".y", 1)
6937 .Case(".z", 2)
6938 .Case(".w", 3)
6939 .Default(-1);
6940 if (AttrChan == -1)
6941 return Error(S, "invalid or missing interpolation attribute channel");
6943 Str = Str.drop_back(2).drop_front(4);
6945 uint8_t Attr;
6946 if (Str.getAsInteger(10, Attr))
6947 return Error(S, "invalid or missing interpolation attribute number");
6949 if (Attr > 32)
6950 return Error(S, "out of bounds interpolation attribute number");
6952 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6954 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6955 AMDGPUOperand::ImmTyInterpAttr));
6956 Operands.push_back(AMDGPUOperand::CreateImm(
6957 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
6958 return ParseStatus::Success;
6961 //===----------------------------------------------------------------------===//
6962 // exp
6963 //===----------------------------------------------------------------------===//
6965 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6966 using namespace llvm::AMDGPU::Exp;
6968 StringRef Str;
6969 SMLoc S = getLoc();
6971 if (!parseId(Str))
6972 return ParseStatus::NoMatch;
6974 unsigned Id = getTgtId(Str);
6975 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
6976 return Error(S, (Id == ET_INVALID)
6977 ? "invalid exp target"
6978 : "exp target is not supported on this GPU");
6980 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6981 AMDGPUOperand::ImmTyExpTgt));
6982 return ParseStatus::Success;
6985 //===----------------------------------------------------------------------===//
6986 // parser helpers
6987 //===----------------------------------------------------------------------===//
6989 bool
6990 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6991 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6994 bool
6995 AMDGPUAsmParser::isId(const StringRef Id) const {
6996 return isId(getToken(), Id);
6999 bool
7000 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7001 return getTokenKind() == Kind;
7004 StringRef AMDGPUAsmParser::getId() const {
7005 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7008 bool
7009 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7010 if (isId(Id)) {
7011 lex();
7012 return true;
7014 return false;
7017 bool
7018 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7019 if (isToken(AsmToken::Identifier)) {
7020 StringRef Tok = getTokenStr();
7021 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7022 lex();
7023 return true;
7026 return false;
7029 bool
7030 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7031 if (isId(Id) && peekToken().is(Kind)) {
7032 lex();
7033 lex();
7034 return true;
7036 return false;
7039 bool
7040 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7041 if (isToken(Kind)) {
7042 lex();
7043 return true;
7045 return false;
7048 bool
7049 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7050 const StringRef ErrMsg) {
7051 if (!trySkipToken(Kind)) {
7052 Error(getLoc(), ErrMsg);
7053 return false;
7055 return true;
7058 bool
7059 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7060 SMLoc S = getLoc();
7062 const MCExpr *Expr;
7063 if (Parser.parseExpression(Expr))
7064 return false;
7066 if (Expr->evaluateAsAbsolute(Imm))
7067 return true;
7069 if (Expected.empty()) {
7070 Error(S, "expected absolute expression");
7071 } else {
7072 Error(S, Twine("expected ", Expected) +
7073 Twine(" or an absolute expression"));
7075 return false;
7078 bool
7079 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7080 SMLoc S = getLoc();
7082 const MCExpr *Expr;
7083 if (Parser.parseExpression(Expr))
7084 return false;
7086 int64_t IntVal;
7087 if (Expr->evaluateAsAbsolute(IntVal)) {
7088 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7089 } else {
7090 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7092 return true;
7095 bool
7096 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7097 if (isToken(AsmToken::String)) {
7098 Val = getToken().getStringContents();
7099 lex();
7100 return true;
7101 } else {
7102 Error(getLoc(), ErrMsg);
7103 return false;
7107 bool
7108 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7109 if (isToken(AsmToken::Identifier)) {
7110 Val = getTokenStr();
7111 lex();
7112 return true;
7113 } else {
7114 if (!ErrMsg.empty())
7115 Error(getLoc(), ErrMsg);
7116 return false;
7120 AsmToken
7121 AMDGPUAsmParser::getToken() const {
7122 return Parser.getTok();
7125 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7126 return isToken(AsmToken::EndOfStatement)
7127 ? getToken()
7128 : getLexer().peekTok(ShouldSkipSpace);
7131 void
7132 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7133 auto TokCount = getLexer().peekTokens(Tokens);
7135 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7136 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7139 AsmToken::TokenKind
7140 AMDGPUAsmParser::getTokenKind() const {
7141 return getLexer().getKind();
7144 SMLoc
7145 AMDGPUAsmParser::getLoc() const {
7146 return getToken().getLoc();
7149 StringRef
7150 AMDGPUAsmParser::getTokenStr() const {
7151 return getToken().getString();
7154 void
7155 AMDGPUAsmParser::lex() {
7156 Parser.Lex();
7159 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7160 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7163 SMLoc
7164 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7165 const OperandVector &Operands) const {
7166 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7167 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7168 if (Test(Op))
7169 return Op.getStartLoc();
7171 return getInstLoc(Operands);
7174 SMLoc
7175 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7176 const OperandVector &Operands) const {
7177 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7178 return getOperandLoc(Test, Operands);
7181 SMLoc
7182 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7183 const OperandVector &Operands) const {
7184 auto Test = [=](const AMDGPUOperand& Op) {
7185 return Op.isRegKind() && Op.getReg() == Reg;
7187 return getOperandLoc(Test, Operands);
7190 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7191 bool SearchMandatoryLiterals) const {
7192 auto Test = [](const AMDGPUOperand& Op) {
7193 return Op.IsImmKindLiteral() || Op.isExpr();
7195 SMLoc Loc = getOperandLoc(Test, Operands);
7196 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7197 Loc = getMandatoryLitLoc(Operands);
7198 return Loc;
7201 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7202 auto Test = [](const AMDGPUOperand &Op) {
7203 return Op.IsImmKindMandatoryLiteral();
7205 return getOperandLoc(Test, Operands);
7208 SMLoc
7209 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7210 auto Test = [](const AMDGPUOperand& Op) {
7211 return Op.isImmKindConst();
7213 return getOperandLoc(Test, Operands);
7216 //===----------------------------------------------------------------------===//
7217 // swizzle
7218 //===----------------------------------------------------------------------===//
7220 LLVM_READNONE
7221 static unsigned
7222 encodeBitmaskPerm(const unsigned AndMask,
7223 const unsigned OrMask,
7224 const unsigned XorMask) {
7225 using namespace llvm::AMDGPU::Swizzle;
7227 return BITMASK_PERM_ENC |
7228 (AndMask << BITMASK_AND_SHIFT) |
7229 (OrMask << BITMASK_OR_SHIFT) |
7230 (XorMask << BITMASK_XOR_SHIFT);
7233 bool
7234 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7235 const unsigned MinVal,
7236 const unsigned MaxVal,
7237 const StringRef ErrMsg,
7238 SMLoc &Loc) {
7239 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7240 return false;
7242 Loc = getLoc();
7243 if (!parseExpr(Op)) {
7244 return false;
7246 if (Op < MinVal || Op > MaxVal) {
7247 Error(Loc, ErrMsg);
7248 return false;
7251 return true;
7254 bool
7255 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7256 const unsigned MinVal,
7257 const unsigned MaxVal,
7258 const StringRef ErrMsg) {
7259 SMLoc Loc;
7260 for (unsigned i = 0; i < OpNum; ++i) {
7261 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7262 return false;
7265 return true;
7268 bool
7269 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7270 using namespace llvm::AMDGPU::Swizzle;
7272 int64_t Lane[LANE_NUM];
7273 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7274 "expected a 2-bit lane id")) {
7275 Imm = QUAD_PERM_ENC;
7276 for (unsigned I = 0; I < LANE_NUM; ++I) {
7277 Imm |= Lane[I] << (LANE_SHIFT * I);
7279 return true;
7281 return false;
7284 bool
7285 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7286 using namespace llvm::AMDGPU::Swizzle;
7288 SMLoc Loc;
7289 int64_t GroupSize;
7290 int64_t LaneIdx;
7292 if (!parseSwizzleOperand(GroupSize,
7293 2, 32,
7294 "group size must be in the interval [2,32]",
7295 Loc)) {
7296 return false;
7298 if (!isPowerOf2_64(GroupSize)) {
7299 Error(Loc, "group size must be a power of two");
7300 return false;
7302 if (parseSwizzleOperand(LaneIdx,
7303 0, GroupSize - 1,
7304 "lane id must be in the interval [0,group size - 1]",
7305 Loc)) {
7306 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7307 return true;
7309 return false;
7312 bool
7313 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7314 using namespace llvm::AMDGPU::Swizzle;
7316 SMLoc Loc;
7317 int64_t GroupSize;
7319 if (!parseSwizzleOperand(GroupSize,
7320 2, 32,
7321 "group size must be in the interval [2,32]",
7322 Loc)) {
7323 return false;
7325 if (!isPowerOf2_64(GroupSize)) {
7326 Error(Loc, "group size must be a power of two");
7327 return false;
7330 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7331 return true;
7334 bool
7335 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7336 using namespace llvm::AMDGPU::Swizzle;
7338 SMLoc Loc;
7339 int64_t GroupSize;
7341 if (!parseSwizzleOperand(GroupSize,
7342 1, 16,
7343 "group size must be in the interval [1,16]",
7344 Loc)) {
7345 return false;
7347 if (!isPowerOf2_64(GroupSize)) {
7348 Error(Loc, "group size must be a power of two");
7349 return false;
7352 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7353 return true;
7356 bool
7357 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7358 using namespace llvm::AMDGPU::Swizzle;
7360 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7361 return false;
7364 StringRef Ctl;
7365 SMLoc StrLoc = getLoc();
7366 if (!parseString(Ctl)) {
7367 return false;
7369 if (Ctl.size() != BITMASK_WIDTH) {
7370 Error(StrLoc, "expected a 5-character mask");
7371 return false;
7374 unsigned AndMask = 0;
7375 unsigned OrMask = 0;
7376 unsigned XorMask = 0;
7378 for (size_t i = 0; i < Ctl.size(); ++i) {
7379 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7380 switch(Ctl[i]) {
7381 default:
7382 Error(StrLoc, "invalid mask");
7383 return false;
7384 case '0':
7385 break;
7386 case '1':
7387 OrMask |= Mask;
7388 break;
7389 case 'p':
7390 AndMask |= Mask;
7391 break;
7392 case 'i':
7393 AndMask |= Mask;
7394 XorMask |= Mask;
7395 break;
7399 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7400 return true;
7403 bool
7404 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7406 SMLoc OffsetLoc = getLoc();
7408 if (!parseExpr(Imm, "a swizzle macro")) {
7409 return false;
7411 if (!isUInt<16>(Imm)) {
7412 Error(OffsetLoc, "expected a 16-bit offset");
7413 return false;
7415 return true;
7418 bool
7419 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7420 using namespace llvm::AMDGPU::Swizzle;
7422 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7424 SMLoc ModeLoc = getLoc();
7425 bool Ok = false;
7427 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7428 Ok = parseSwizzleQuadPerm(Imm);
7429 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7430 Ok = parseSwizzleBitmaskPerm(Imm);
7431 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7432 Ok = parseSwizzleBroadcast(Imm);
7433 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7434 Ok = parseSwizzleSwap(Imm);
7435 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7436 Ok = parseSwizzleReverse(Imm);
7437 } else {
7438 Error(ModeLoc, "expected a swizzle mode");
7441 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7444 return false;
7447 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7448 SMLoc S = getLoc();
7449 int64_t Imm = 0;
7451 if (trySkipId("offset")) {
7453 bool Ok = false;
7454 if (skipToken(AsmToken::Colon, "expected a colon")) {
7455 if (trySkipId("swizzle")) {
7456 Ok = parseSwizzleMacro(Imm);
7457 } else {
7458 Ok = parseSwizzleOffset(Imm);
7462 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7464 return Ok ? ParseStatus::Success : ParseStatus::Failure;
7466 return ParseStatus::NoMatch;
7469 bool
7470 AMDGPUOperand::isSwizzle() const {
7471 return isImmTy(ImmTySwizzle);
7474 //===----------------------------------------------------------------------===//
7475 // VGPR Index Mode
7476 //===----------------------------------------------------------------------===//
7478 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7480 using namespace llvm::AMDGPU::VGPRIndexMode;
7482 if (trySkipToken(AsmToken::RParen)) {
7483 return OFF;
7486 int64_t Imm = 0;
7488 while (true) {
7489 unsigned Mode = 0;
7490 SMLoc S = getLoc();
7492 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7493 if (trySkipId(IdSymbolic[ModeId])) {
7494 Mode = 1 << ModeId;
7495 break;
7499 if (Mode == 0) {
7500 Error(S, (Imm == 0)?
7501 "expected a VGPR index mode or a closing parenthesis" :
7502 "expected a VGPR index mode");
7503 return UNDEF;
7506 if (Imm & Mode) {
7507 Error(S, "duplicate VGPR index mode");
7508 return UNDEF;
7510 Imm |= Mode;
7512 if (trySkipToken(AsmToken::RParen))
7513 break;
7514 if (!skipToken(AsmToken::Comma,
7515 "expected a comma or a closing parenthesis"))
7516 return UNDEF;
7519 return Imm;
7522 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7524 using namespace llvm::AMDGPU::VGPRIndexMode;
7526 int64_t Imm = 0;
7527 SMLoc S = getLoc();
7529 if (trySkipId("gpr_idx", AsmToken::LParen)) {
7530 Imm = parseGPRIdxMacro();
7531 if (Imm == UNDEF)
7532 return ParseStatus::Failure;
7533 } else {
7534 if (getParser().parseAbsoluteExpression(Imm))
7535 return ParseStatus::Failure;
7536 if (Imm < 0 || !isUInt<4>(Imm))
7537 return Error(S, "invalid immediate: only 4-bit values are legal");
7540 Operands.push_back(
7541 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7542 return ParseStatus::Success;
7545 bool AMDGPUOperand::isGPRIdxMode() const {
7546 return isImmTy(ImmTyGprIdxMode);
7549 //===----------------------------------------------------------------------===//
7550 // sopp branch targets
7551 //===----------------------------------------------------------------------===//
7553 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7555 // Make sure we are not parsing something
7556 // that looks like a label or an expression but is not.
7557 // This will improve error messages.
7558 if (isRegister() || isModifier())
7559 return ParseStatus::NoMatch;
7561 if (!parseExpr(Operands))
7562 return ParseStatus::Failure;
7564 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7565 assert(Opr.isImm() || Opr.isExpr());
7566 SMLoc Loc = Opr.getStartLoc();
7568 // Currently we do not support arbitrary expressions as branch targets.
7569 // Only labels and absolute expressions are accepted.
7570 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7571 Error(Loc, "expected an absolute expression or a label");
7572 } else if (Opr.isImm() && !Opr.isS16Imm()) {
7573 Error(Loc, "expected a 16-bit signed jump offset");
7576 return ParseStatus::Success;
7579 //===----------------------------------------------------------------------===//
7580 // Boolean holding registers
7581 //===----------------------------------------------------------------------===//
7583 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7584 return parseReg(Operands);
7587 //===----------------------------------------------------------------------===//
7588 // mubuf
7589 //===----------------------------------------------------------------------===//
7591 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7592 const OperandVector &Operands,
7593 bool IsAtomic) {
7594 OptionalImmIndexMap OptionalIdx;
7595 unsigned FirstOperandIdx = 1;
7596 bool IsAtomicReturn = false;
7598 if (IsAtomic) {
7599 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7600 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7601 if (!Op.isCPol())
7602 continue;
7603 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7604 break;
7607 if (!IsAtomicReturn) {
7608 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7609 if (NewOpc != -1)
7610 Inst.setOpcode(NewOpc);
7613 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7614 SIInstrFlags::IsAtomicRet;
7617 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7618 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7620 // Add the register arguments
7621 if (Op.isReg()) {
7622 Op.addRegOperands(Inst, 1);
7623 // Insert a tied src for atomic return dst.
7624 // This cannot be postponed as subsequent calls to
7625 // addImmOperands rely on correct number of MC operands.
7626 if (IsAtomicReturn && i == FirstOperandIdx)
7627 Op.addRegOperands(Inst, 1);
7628 continue;
7631 // Handle the case where soffset is an immediate
7632 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7633 Op.addImmOperands(Inst, 1);
7634 continue;
7637 // Handle tokens like 'offen' which are sometimes hard-coded into the
7638 // asm string. There are no MCInst operands for these.
7639 if (Op.isToken()) {
7640 continue;
7642 assert(Op.isImm());
7644 // Handle optional arguments
7645 OptionalIdx[Op.getImmTy()] = i;
7648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7652 //===----------------------------------------------------------------------===//
7653 // SMEM
7654 //===----------------------------------------------------------------------===//
7656 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7657 OptionalImmIndexMap OptionalIdx;
7658 bool IsAtomicReturn = false;
7660 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7661 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7662 if (!Op.isCPol())
7663 continue;
7664 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7665 break;
7668 if (!IsAtomicReturn) {
7669 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7670 if (NewOpc != -1)
7671 Inst.setOpcode(NewOpc);
7674 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7675 SIInstrFlags::IsAtomicRet;
7677 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7678 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7680 // Add the register arguments
7681 if (Op.isReg()) {
7682 Op.addRegOperands(Inst, 1);
7683 if (IsAtomicReturn && i == 1)
7684 Op.addRegOperands(Inst, 1);
7685 continue;
7688 // Handle the case where soffset is an immediate
7689 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7690 Op.addImmOperands(Inst, 1);
7691 continue;
7694 // Handle tokens like 'offen' which are sometimes hard-coded into the
7695 // asm string. There are no MCInst operands for these.
7696 if (Op.isToken()) {
7697 continue;
7699 assert(Op.isImm());
7701 // Handle optional arguments
7702 OptionalIdx[Op.getImmTy()] = i;
7705 if ((int)Inst.getNumOperands() <=
7706 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7707 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7708 AMDGPUOperand::ImmTySMEMOffsetMod);
7709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7712 //===----------------------------------------------------------------------===//
7713 // smrd
7714 //===----------------------------------------------------------------------===//
7716 bool AMDGPUOperand::isSMRDOffset8() const {
7717 return isImmLiteral() && isUInt<8>(getImm());
7720 bool AMDGPUOperand::isSMEMOffset() const {
7721 // Offset range is checked later by validator.
7722 return isImmLiteral();
7725 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7726 // 32-bit literals are only supported on CI and we only want to use them
7727 // when the offset is > 8-bits.
7728 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7731 //===----------------------------------------------------------------------===//
7732 // vop3
7733 //===----------------------------------------------------------------------===//
7735 static bool ConvertOmodMul(int64_t &Mul) {
7736 if (Mul != 1 && Mul != 2 && Mul != 4)
7737 return false;
7739 Mul >>= 1;
7740 return true;
7743 static bool ConvertOmodDiv(int64_t &Div) {
7744 if (Div == 1) {
7745 Div = 0;
7746 return true;
7749 if (Div == 2) {
7750 Div = 3;
7751 return true;
7754 return false;
7757 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7758 // This is intentional and ensures compatibility with sp3.
7759 // See bug 35397 for details.
7760 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
7761 if (BoundCtrl == 0 || BoundCtrl == 1) {
7762 if (!isGFX11Plus())
7763 BoundCtrl = 1;
7764 return true;
7766 return false;
7769 void AMDGPUAsmParser::onBeginOfFile() {
7770 if (!getParser().getStreamer().getTargetStreamer() ||
7771 getSTI().getTargetTriple().getArch() == Triple::r600)
7772 return;
7774 if (!getTargetStreamer().getTargetID())
7775 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
7776 // TODO: Should try to check code object version from directive???
7777 AMDGPU::getAmdhsaCodeObjectVersion());
7779 if (isHsaAbiVersion3AndAbove(&getSTI()))
7780 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7783 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
7784 StringRef Name = getTokenStr();
7785 if (Name == "mul") {
7786 return parseIntWithPrefix("mul", Operands,
7787 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7790 if (Name == "div") {
7791 return parseIntWithPrefix("div", Operands,
7792 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7795 return ParseStatus::NoMatch;
7798 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
7799 // the number of src operands present, then copies that bit into src0_modifiers.
7800 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
7801 int Opc = Inst.getOpcode();
7802 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7803 if (OpSelIdx == -1)
7804 return;
7806 int SrcNum;
7807 const int Ops[] = { AMDGPU::OpName::src0,
7808 AMDGPU::OpName::src1,
7809 AMDGPU::OpName::src2 };
7810 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
7811 ++SrcNum)
7813 assert(SrcNum > 0);
7815 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7817 if ((OpSel & (1 << SrcNum)) != 0) {
7818 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7819 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7820 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7824 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
7825 const OperandVector &Operands) {
7826 cvtVOP3P(Inst, Operands);
7827 cvtVOP3DstOpSelOnly(Inst);
7830 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
7831 OptionalImmIndexMap &OptionalIdx) {
7832 cvtVOP3P(Inst, Operands, OptionalIdx);
7833 cvtVOP3DstOpSelOnly(Inst);
7836 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7837 return
7838 // 1. This operand is input modifiers
7839 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7840 // 2. This is not last operand
7841 && Desc.NumOperands > (OpNum + 1)
7842 // 3. Next operand is register class
7843 && Desc.operands()[OpNum + 1].RegClass != -1
7844 // 4. Next register is not tied to any other operand
7845 && Desc.getOperandConstraint(OpNum + 1,
7846 MCOI::OperandConstraint::TIED_TO) == -1;
7849 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7851 OptionalImmIndexMap OptionalIdx;
7852 unsigned Opc = Inst.getOpcode();
7854 unsigned I = 1;
7855 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7856 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7857 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7860 for (unsigned E = Operands.size(); I != E; ++I) {
7861 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7862 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7863 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7864 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
7865 Op.isInterpAttrChan()) {
7866 Inst.addOperand(MCOperand::createImm(Op.getImm()));
7867 } else if (Op.isImmModifier()) {
7868 OptionalIdx[Op.getImmTy()] = I;
7869 } else {
7870 llvm_unreachable("unhandled operand type");
7874 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
7875 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7876 AMDGPUOperand::ImmTyHigh);
7878 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
7879 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7880 AMDGPUOperand::ImmTyClampSI);
7882 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
7883 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7884 AMDGPUOperand::ImmTyOModSI);
7887 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
7889 OptionalImmIndexMap OptionalIdx;
7890 unsigned Opc = Inst.getOpcode();
7892 unsigned I = 1;
7893 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7894 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7895 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7898 for (unsigned E = Operands.size(); I != E; ++I) {
7899 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7900 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7901 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7902 } else if (Op.isImmModifier()) {
7903 OptionalIdx[Op.getImmTy()] = I;
7904 } else {
7905 llvm_unreachable("unhandled operand type");
7909 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7911 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7912 if (OpSelIdx != -1)
7913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
7915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
7917 if (OpSelIdx == -1)
7918 return;
7920 const int Ops[] = { AMDGPU::OpName::src0,
7921 AMDGPU::OpName::src1,
7922 AMDGPU::OpName::src2 };
7923 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7924 AMDGPU::OpName::src1_modifiers,
7925 AMDGPU::OpName::src2_modifiers };
7927 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7929 for (int J = 0; J < 3; ++J) {
7930 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7931 if (OpIdx == -1)
7932 break;
7934 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7935 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7937 if ((OpSel & (1 << J)) != 0)
7938 ModVal |= SISrcMods::OP_SEL_0;
7939 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
7940 (OpSel & (1 << 3)) != 0)
7941 ModVal |= SISrcMods::DST_OP_SEL;
7943 Inst.getOperand(ModIdx).setImm(ModVal);
7947 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7948 OptionalImmIndexMap &OptionalIdx) {
7949 unsigned Opc = Inst.getOpcode();
7951 unsigned I = 1;
7952 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7953 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7954 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7957 for (unsigned E = Operands.size(); I != E; ++I) {
7958 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7959 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7960 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7961 } else if (Op.isImmModifier()) {
7962 OptionalIdx[Op.getImmTy()] = I;
7963 } else if (Op.isRegOrImm()) {
7964 Op.addRegOrImmOperands(Inst, 1);
7965 } else {
7966 llvm_unreachable("unhandled operand type");
7970 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
7971 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7972 AMDGPUOperand::ImmTyClampSI);
7974 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
7975 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7976 AMDGPUOperand::ImmTyOModSI);
7978 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7979 // it has src2 register operand that is tied to dst operand
7980 // we don't allow modifiers for this operand in assembler so src2_modifiers
7981 // should be 0.
7982 if (isMAC(Opc)) {
7983 auto it = Inst.begin();
7984 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7985 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7986 ++it;
7987 // Copy the operand to ensure it's not invalidated when Inst grows.
7988 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7992 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7993 OptionalImmIndexMap OptionalIdx;
7994 cvtVOP3(Inst, Operands, OptionalIdx);
7997 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7998 OptionalImmIndexMap &OptIdx) {
7999 const int Opc = Inst.getOpcode();
8000 const MCInstrDesc &Desc = MII.get(Opc);
8002 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8004 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8005 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8006 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8007 Inst.addOperand(Inst.getOperand(0));
8010 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
8011 assert(!IsPacked);
8012 Inst.addOperand(Inst.getOperand(0));
8015 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8016 // instruction, and then figure out where to actually put the modifiers
8018 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8019 if (OpSelIdx != -1) {
8020 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8023 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8024 if (OpSelHiIdx != -1) {
8025 int DefaultVal = IsPacked ? -1 : 0;
8026 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8027 DefaultVal);
8030 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8031 if (NegLoIdx != -1) {
8032 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8033 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8036 const int Ops[] = { AMDGPU::OpName::src0,
8037 AMDGPU::OpName::src1,
8038 AMDGPU::OpName::src2 };
8039 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8040 AMDGPU::OpName::src1_modifiers,
8041 AMDGPU::OpName::src2_modifiers };
8043 unsigned OpSel = 0;
8044 unsigned OpSelHi = 0;
8045 unsigned NegLo = 0;
8046 unsigned NegHi = 0;
8048 if (OpSelIdx != -1)
8049 OpSel = Inst.getOperand(OpSelIdx).getImm();
8051 if (OpSelHiIdx != -1)
8052 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8054 if (NegLoIdx != -1) {
8055 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8056 NegLo = Inst.getOperand(NegLoIdx).getImm();
8057 NegHi = Inst.getOperand(NegHiIdx).getImm();
8060 for (int J = 0; J < 3; ++J) {
8061 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8062 if (OpIdx == -1)
8063 break;
8065 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8067 if (ModIdx == -1)
8068 continue;
8070 uint32_t ModVal = 0;
8072 if ((OpSel & (1 << J)) != 0)
8073 ModVal |= SISrcMods::OP_SEL_0;
8075 if ((OpSelHi & (1 << J)) != 0)
8076 ModVal |= SISrcMods::OP_SEL_1;
8078 if ((NegLo & (1 << J)) != 0)
8079 ModVal |= SISrcMods::NEG;
8081 if ((NegHi & (1 << J)) != 0)
8082 ModVal |= SISrcMods::NEG_HI;
8084 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8088 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8089 OptionalImmIndexMap OptIdx;
8090 cvtVOP3(Inst, Operands, OptIdx);
8091 cvtVOP3P(Inst, Operands, OptIdx);
8094 //===----------------------------------------------------------------------===//
8095 // VOPD
8096 //===----------------------------------------------------------------------===//
8098 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8099 if (!hasVOPD(getSTI()))
8100 return ParseStatus::NoMatch;
8102 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8103 SMLoc S = getLoc();
8104 lex();
8105 lex();
8106 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8107 SMLoc OpYLoc = getLoc();
8108 StringRef OpYName;
8109 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8110 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8111 return ParseStatus::Success;
8113 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8115 return ParseStatus::NoMatch;
8118 // Create VOPD MCInst operands using parsed assembler operands.
8119 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8120 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8122 if (Op.isReg()) {
8123 Op.addRegOperands(Inst, 1);
8124 return;
8126 if (Op.isImm()) {
8127 Op.addImmOperands(Inst, 1);
8128 return;
8130 llvm_unreachable("Unhandled operand type in cvtVOPD");
8133 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8135 // MCInst operands are ordered as follows:
8136 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8138 for (auto CompIdx : VOPD::COMPONENTS) {
8139 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8142 for (auto CompIdx : VOPD::COMPONENTS) {
8143 const auto &CInfo = InstInfo[CompIdx];
8144 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8145 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8146 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8147 if (CInfo.hasSrc2Acc())
8148 addOp(CInfo.getIndexOfDstInParsedOperands());
8152 //===----------------------------------------------------------------------===//
8153 // dpp
8154 //===----------------------------------------------------------------------===//
8156 bool AMDGPUOperand::isDPP8() const {
8157 return isImmTy(ImmTyDPP8);
8160 bool AMDGPUOperand::isDPPCtrl() const {
8161 using namespace AMDGPU::DPP;
8163 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8164 if (result) {
8165 int64_t Imm = getImm();
8166 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8167 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8168 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8169 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8170 (Imm == DppCtrl::WAVE_SHL1) ||
8171 (Imm == DppCtrl::WAVE_ROL1) ||
8172 (Imm == DppCtrl::WAVE_SHR1) ||
8173 (Imm == DppCtrl::WAVE_ROR1) ||
8174 (Imm == DppCtrl::ROW_MIRROR) ||
8175 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8176 (Imm == DppCtrl::BCAST15) ||
8177 (Imm == DppCtrl::BCAST31) ||
8178 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8179 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8181 return false;
8184 //===----------------------------------------------------------------------===//
8185 // mAI
8186 //===----------------------------------------------------------------------===//
8188 bool AMDGPUOperand::isBLGP() const {
8189 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8192 bool AMDGPUOperand::isCBSZ() const {
8193 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8196 bool AMDGPUOperand::isABID() const {
8197 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8200 bool AMDGPUOperand::isS16Imm() const {
8201 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8204 bool AMDGPUOperand::isU16Imm() const {
8205 return isImmLiteral() && isUInt<16>(getImm());
8208 //===----------------------------------------------------------------------===//
8209 // dim
8210 //===----------------------------------------------------------------------===//
8212 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8213 // We want to allow "dim:1D" etc.,
8214 // but the initial 1 is tokenized as an integer.
8215 std::string Token;
8216 if (isToken(AsmToken::Integer)) {
8217 SMLoc Loc = getToken().getEndLoc();
8218 Token = std::string(getTokenStr());
8219 lex();
8220 if (getLoc() != Loc)
8221 return false;
8224 StringRef Suffix;
8225 if (!parseId(Suffix))
8226 return false;
8227 Token += Suffix;
8229 StringRef DimId = Token;
8230 if (DimId.startswith("SQ_RSRC_IMG_"))
8231 DimId = DimId.drop_front(12);
8233 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8234 if (!DimInfo)
8235 return false;
8237 Encoding = DimInfo->Encoding;
8238 return true;
8241 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8242 if (!isGFX10Plus())
8243 return ParseStatus::NoMatch;
8245 SMLoc S = getLoc();
8247 if (!trySkipId("dim", AsmToken::Colon))
8248 return ParseStatus::NoMatch;
8250 unsigned Encoding;
8251 SMLoc Loc = getLoc();
8252 if (!parseDimId(Encoding))
8253 return Error(Loc, "invalid dim value");
8255 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8256 AMDGPUOperand::ImmTyDim));
8257 return ParseStatus::Success;
8260 //===----------------------------------------------------------------------===//
8261 // dpp
8262 //===----------------------------------------------------------------------===//
8264 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8265 SMLoc S = getLoc();
8267 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8268 return ParseStatus::NoMatch;
8270 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8272 int64_t Sels[8];
8274 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8275 return ParseStatus::Failure;
8277 for (size_t i = 0; i < 8; ++i) {
8278 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8279 return ParseStatus::Failure;
8281 SMLoc Loc = getLoc();
8282 if (getParser().parseAbsoluteExpression(Sels[i]))
8283 return ParseStatus::Failure;
8284 if (0 > Sels[i] || 7 < Sels[i])
8285 return Error(Loc, "expected a 3-bit value");
8288 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8289 return ParseStatus::Failure;
8291 unsigned DPP8 = 0;
8292 for (size_t i = 0; i < 8; ++i)
8293 DPP8 |= (Sels[i] << (i * 3));
8295 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8296 return ParseStatus::Success;
8299 bool
8300 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8301 const OperandVector &Operands) {
8302 if (Ctrl == "row_newbcast")
8303 return isGFX90A();
8305 if (Ctrl == "row_share" ||
8306 Ctrl == "row_xmask")
8307 return isGFX10Plus();
8309 if (Ctrl == "wave_shl" ||
8310 Ctrl == "wave_shr" ||
8311 Ctrl == "wave_rol" ||
8312 Ctrl == "wave_ror" ||
8313 Ctrl == "row_bcast")
8314 return isVI() || isGFX9();
8316 return Ctrl == "row_mirror" ||
8317 Ctrl == "row_half_mirror" ||
8318 Ctrl == "quad_perm" ||
8319 Ctrl == "row_shl" ||
8320 Ctrl == "row_shr" ||
8321 Ctrl == "row_ror";
8324 int64_t
8325 AMDGPUAsmParser::parseDPPCtrlPerm() {
8326 // quad_perm:[%d,%d,%d,%d]
8328 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8329 return -1;
8331 int64_t Val = 0;
8332 for (int i = 0; i < 4; ++i) {
8333 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8334 return -1;
8336 int64_t Temp;
8337 SMLoc Loc = getLoc();
8338 if (getParser().parseAbsoluteExpression(Temp))
8339 return -1;
8340 if (Temp < 0 || Temp > 3) {
8341 Error(Loc, "expected a 2-bit value");
8342 return -1;
8345 Val += (Temp << i * 2);
8348 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8349 return -1;
8351 return Val;
8354 int64_t
8355 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8356 using namespace AMDGPU::DPP;
8358 // sel:%d
8360 int64_t Val;
8361 SMLoc Loc = getLoc();
8363 if (getParser().parseAbsoluteExpression(Val))
8364 return -1;
8366 struct DppCtrlCheck {
8367 int64_t Ctrl;
8368 int Lo;
8369 int Hi;
8372 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8373 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
8374 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
8375 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
8376 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
8377 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
8378 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
8379 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
8380 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8381 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8382 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8383 .Default({-1, 0, 0});
8385 bool Valid;
8386 if (Check.Ctrl == -1) {
8387 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8388 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8389 } else {
8390 Valid = Check.Lo <= Val && Val <= Check.Hi;
8391 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8394 if (!Valid) {
8395 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8396 return -1;
8399 return Val;
8402 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8403 using namespace AMDGPU::DPP;
8405 if (!isToken(AsmToken::Identifier) ||
8406 !isSupportedDPPCtrl(getTokenStr(), Operands))
8407 return ParseStatus::NoMatch;
8409 SMLoc S = getLoc();
8410 int64_t Val = -1;
8411 StringRef Ctrl;
8413 parseId(Ctrl);
8415 if (Ctrl == "row_mirror") {
8416 Val = DppCtrl::ROW_MIRROR;
8417 } else if (Ctrl == "row_half_mirror") {
8418 Val = DppCtrl::ROW_HALF_MIRROR;
8419 } else {
8420 if (skipToken(AsmToken::Colon, "expected a colon")) {
8421 if (Ctrl == "quad_perm") {
8422 Val = parseDPPCtrlPerm();
8423 } else {
8424 Val = parseDPPCtrlSel(Ctrl);
8429 if (Val == -1)
8430 return ParseStatus::Failure;
8432 Operands.push_back(
8433 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8434 return ParseStatus::Success;
8437 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8438 bool IsDPP8) {
8439 OptionalImmIndexMap OptionalIdx;
8440 unsigned Opc = Inst.getOpcode();
8441 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8443 // MAC instructions are special because they have 'old'
8444 // operand which is not tied to dst (but assumed to be).
8445 // They also have dummy unused src2_modifiers.
8446 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8447 int Src2ModIdx =
8448 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8449 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8450 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8452 unsigned I = 1;
8453 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8454 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8457 int Fi = 0;
8458 for (unsigned E = Operands.size(); I != E; ++I) {
8460 if (IsMAC) {
8461 int NumOperands = Inst.getNumOperands();
8462 if (OldIdx == NumOperands) {
8463 // Handle old operand
8464 constexpr int DST_IDX = 0;
8465 Inst.addOperand(Inst.getOperand(DST_IDX));
8466 } else if (Src2ModIdx == NumOperands) {
8467 // Add unused dummy src2_modifiers
8468 Inst.addOperand(MCOperand::createImm(0));
8472 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8473 MCOI::TIED_TO);
8474 if (TiedTo != -1) {
8475 assert((unsigned)TiedTo < Inst.getNumOperands());
8476 // handle tied old or src2 for MAC instructions
8477 Inst.addOperand(Inst.getOperand(TiedTo));
8479 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8480 // Add the register arguments
8481 if (IsDPP8 && Op.isDppFI()) {
8482 Fi = Op.getImm();
8483 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8484 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8485 } else if (Op.isReg()) {
8486 Op.addRegOperands(Inst, 1);
8487 } else if (Op.isImm() &&
8488 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8489 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8490 Op.addImmOperands(Inst, 1);
8491 } else if (Op.isImm()) {
8492 OptionalIdx[Op.getImmTy()] = I;
8493 } else {
8494 llvm_unreachable("unhandled operand type");
8497 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8498 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8500 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8503 if (Desc.TSFlags & SIInstrFlags::VOP3P)
8504 cvtVOP3P(Inst, Operands, OptionalIdx);
8505 else if (Desc.TSFlags & SIInstrFlags::VOP3)
8506 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8507 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8511 if (IsDPP8) {
8512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8513 using namespace llvm::AMDGPU::DPP;
8514 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8515 } else {
8516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8517 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8519 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8521 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8522 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8523 AMDGPUOperand::ImmTyDppFI);
8527 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8528 OptionalImmIndexMap OptionalIdx;
8530 unsigned I = 1;
8531 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8532 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8533 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8536 int Fi = 0;
8537 for (unsigned E = Operands.size(); I != E; ++I) {
8538 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8539 MCOI::TIED_TO);
8540 if (TiedTo != -1) {
8541 assert((unsigned)TiedTo < Inst.getNumOperands());
8542 // handle tied old or src2 for MAC instructions
8543 Inst.addOperand(Inst.getOperand(TiedTo));
8545 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8546 // Add the register arguments
8547 if (Op.isReg() && validateVccOperand(Op.getReg())) {
8548 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8549 // Skip it.
8550 continue;
8553 if (IsDPP8) {
8554 if (Op.isDPP8()) {
8555 Op.addImmOperands(Inst, 1);
8556 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8557 Op.addRegWithFPInputModsOperands(Inst, 2);
8558 } else if (Op.isDppFI()) {
8559 Fi = Op.getImm();
8560 } else if (Op.isReg()) {
8561 Op.addRegOperands(Inst, 1);
8562 } else {
8563 llvm_unreachable("Invalid operand type");
8565 } else {
8566 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8567 Op.addRegWithFPInputModsOperands(Inst, 2);
8568 } else if (Op.isReg()) {
8569 Op.addRegOperands(Inst, 1);
8570 } else if (Op.isDPPCtrl()) {
8571 Op.addImmOperands(Inst, 1);
8572 } else if (Op.isImm()) {
8573 // Handle optional arguments
8574 OptionalIdx[Op.getImmTy()] = I;
8575 } else {
8576 llvm_unreachable("Invalid operand type");
8581 if (IsDPP8) {
8582 using namespace llvm::AMDGPU::DPP;
8583 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8584 } else {
8585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8587 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8588 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8589 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8590 AMDGPUOperand::ImmTyDppFI);
8595 //===----------------------------------------------------------------------===//
8596 // sdwa
8597 //===----------------------------------------------------------------------===//
8599 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
8600 StringRef Prefix,
8601 AMDGPUOperand::ImmTy Type) {
8602 using namespace llvm::AMDGPU::SDWA;
8604 SMLoc S = getLoc();
8605 StringRef Value;
8607 SMLoc StringLoc;
8608 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
8609 if (!Res.isSuccess())
8610 return Res;
8612 int64_t Int;
8613 Int = StringSwitch<int64_t>(Value)
8614 .Case("BYTE_0", SdwaSel::BYTE_0)
8615 .Case("BYTE_1", SdwaSel::BYTE_1)
8616 .Case("BYTE_2", SdwaSel::BYTE_2)
8617 .Case("BYTE_3", SdwaSel::BYTE_3)
8618 .Case("WORD_0", SdwaSel::WORD_0)
8619 .Case("WORD_1", SdwaSel::WORD_1)
8620 .Case("DWORD", SdwaSel::DWORD)
8621 .Default(0xffffffff);
8623 if (Int == 0xffffffff)
8624 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8626 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8627 return ParseStatus::Success;
8630 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8631 using namespace llvm::AMDGPU::SDWA;
8633 SMLoc S = getLoc();
8634 StringRef Value;
8636 SMLoc StringLoc;
8637 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8638 if (!Res.isSuccess())
8639 return Res;
8641 int64_t Int;
8642 Int = StringSwitch<int64_t>(Value)
8643 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8644 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8645 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8646 .Default(0xffffffff);
8648 if (Int == 0xffffffff)
8649 return Error(StringLoc, "invalid dst_unused value");
8651 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
8652 return ParseStatus::Success;
8655 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8656 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8659 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8660 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8663 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8664 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8667 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8668 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8671 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8672 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8675 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8676 uint64_t BasicInstType,
8677 bool SkipDstVcc,
8678 bool SkipSrcVcc) {
8679 using namespace llvm::AMDGPU::SDWA;
8681 OptionalImmIndexMap OptionalIdx;
8682 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8683 bool SkippedVcc = false;
8685 unsigned I = 1;
8686 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8687 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8688 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8691 for (unsigned E = Operands.size(); I != E; ++I) {
8692 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8693 if (SkipVcc && !SkippedVcc && Op.isReg() &&
8694 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8695 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8696 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8697 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8698 // Skip VCC only if we didn't skip it on previous iteration.
8699 // Note that src0 and src1 occupy 2 slots each because of modifiers.
8700 if (BasicInstType == SIInstrFlags::VOP2 &&
8701 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8702 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8703 SkippedVcc = true;
8704 continue;
8705 } else if (BasicInstType == SIInstrFlags::VOPC &&
8706 Inst.getNumOperands() == 0) {
8707 SkippedVcc = true;
8708 continue;
8711 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8712 Op.addRegOrImmWithInputModsOperands(Inst, 2);
8713 } else if (Op.isImm()) {
8714 // Handle optional arguments
8715 OptionalIdx[Op.getImmTy()] = I;
8716 } else {
8717 llvm_unreachable("Invalid operand type");
8719 SkippedVcc = false;
8722 const unsigned Opc = Inst.getOpcode();
8723 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
8724 Opc != AMDGPU::V_NOP_sdwa_vi) {
8725 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8726 switch (BasicInstType) {
8727 case SIInstrFlags::VOP1:
8728 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8729 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8730 AMDGPUOperand::ImmTyClampSI, 0);
8732 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8733 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8734 AMDGPUOperand::ImmTyOModSI, 0);
8736 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
8737 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8738 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
8740 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
8741 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8742 AMDGPUOperand::ImmTySDWADstUnused,
8743 DstUnused::UNUSED_PRESERVE);
8745 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8746 break;
8748 case SIInstrFlags::VOP2:
8749 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8751 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
8752 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8754 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
8755 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
8756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
8758 break;
8760 case SIInstrFlags::VOPC:
8761 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
8762 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8763 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
8765 break;
8767 default:
8768 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8772 // special case v_mac_{f16, f32}:
8773 // it has src2 register operand that is tied to dst operand
8774 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8775 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
8776 auto it = Inst.begin();
8777 std::advance(
8778 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8779 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8783 /// Force static initialization.
8784 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8785 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
8786 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8789 #define GET_REGISTER_MATCHER
8790 #define GET_MATCHER_IMPLEMENTATION
8791 #define GET_MNEMONIC_SPELL_CHECKER
8792 #define GET_MNEMONIC_CHECKER
8793 #include "AMDGPUGenAsmMatcher.inc"
8795 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
8796 unsigned MCK) {
8797 switch (MCK) {
8798 case MCK_addr64:
8799 return parseTokenOp("addr64", Operands);
8800 case MCK_done:
8801 return parseTokenOp("done", Operands);
8802 case MCK_idxen:
8803 return parseTokenOp("idxen", Operands);
8804 case MCK_lds:
8805 return parseTokenOp("lds", Operands);
8806 case MCK_offen:
8807 return parseTokenOp("offen", Operands);
8808 case MCK_off:
8809 return parseTokenOp("off", Operands);
8810 case MCK_row_95_en:
8811 return parseTokenOp("row_en", Operands);
8812 case MCK_gds:
8813 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
8814 case MCK_tfe:
8815 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
8817 return tryCustomParseOperand(Operands, MCK);
8820 // This function should be defined after auto-generated include so that we have
8821 // MatchClassKind enum defined
8822 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8823 unsigned Kind) {
8824 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8825 // But MatchInstructionImpl() expects to meet token and fails to validate
8826 // operand. This method checks if we are given immediate operand but expect to
8827 // get corresponding token.
8828 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8829 switch (Kind) {
8830 case MCK_addr64:
8831 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8832 case MCK_gds:
8833 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8834 case MCK_lds:
8835 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8836 case MCK_idxen:
8837 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8838 case MCK_offen:
8839 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8840 case MCK_tfe:
8841 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
8842 case MCK_SSrcB32:
8843 // When operands have expression values, they will return true for isToken,
8844 // because it is not possible to distinguish between a token and an
8845 // expression at parse time. MatchInstructionImpl() will always try to
8846 // match an operand as a token, when isToken returns true, and when the
8847 // name of the expression is not a valid token, the match will fail,
8848 // so we need to handle it here.
8849 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8850 case MCK_SSrcF32:
8851 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8852 case MCK_SOPPBrTarget:
8853 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
8854 case MCK_VReg32OrOff:
8855 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8856 case MCK_InterpSlot:
8857 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8858 case MCK_InterpAttr:
8859 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8860 case MCK_InterpAttrChan:
8861 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
8862 case MCK_SReg_64:
8863 case MCK_SReg_64_XEXEC:
8864 // Null is defined as a 32-bit register but
8865 // it should also be enabled with 64-bit operands.
8866 // The following code enables it for SReg_64 operands
8867 // used as source and destination. Remaining source
8868 // operands are handled in isInlinableImm.
8869 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8870 default:
8871 return Match_InvalidOperand;
8875 //===----------------------------------------------------------------------===//
8876 // endpgm
8877 //===----------------------------------------------------------------------===//
8879 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
8880 SMLoc S = getLoc();
8881 int64_t Imm = 0;
8883 if (!parseExpr(Imm)) {
8884 // The operand is optional, if not present default to 0
8885 Imm = 0;
8888 if (!isUInt<16>(Imm))
8889 return Error(S, "expected a 16-bit value");
8891 Operands.push_back(
8892 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8893 return ParseStatus::Success;
8896 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8898 //===----------------------------------------------------------------------===//
8899 // LDSDIR
8900 //===----------------------------------------------------------------------===//
8902 bool AMDGPUOperand::isWaitVDST() const {
8903 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8906 //===----------------------------------------------------------------------===//
8907 // VINTERP
8908 //===----------------------------------------------------------------------===//
8910 bool AMDGPUOperand::isWaitEXP() const {
8911 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());