[AMDGPU] Check for immediate SrcC in mfma in AsmParser
[llvm-core.git] / lib / Target / AMDGPU / AsmParser / AMDGPUAsmParser.cpp
blobbf90e36eec6e8c8186f091131ae2d1364e784d8b
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
68 namespace {
70 class AMDGPUAsmParser;
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
78 class AMDGPUOperand : public MCParsedAsmOperand {
79 enum KindTy {
80 Token,
81 Immediate,
82 Register,
83 Expression
84 } Kind;
86 SMLoc StartLoc, EndLoc;
87 const AMDGPUAsmParser *AsmParser;
89 public:
90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
93 using Ptr = std::unique_ptr<AMDGPUOperand>;
95 struct Modifiers {
96 bool Abs = false;
97 bool Neg = false;
98 bool Sext = false;
100 bool hasFPModifiers() const { return Abs || Neg; }
101 bool hasIntModifiers() const { return Sext; }
102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
104 int64_t getFPModifiersOperand() const {
105 int64_t Operand = 0;
106 Operand |= Abs ? SISrcMods::ABS : 0u;
107 Operand |= Neg ? SISrcMods::NEG : 0u;
108 return Operand;
111 int64_t getIntModifiersOperand() const {
112 int64_t Operand = 0;
113 Operand |= Sext ? SISrcMods::SEXT : 0u;
114 return Operand;
117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
124 } else {
125 return 0;
129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
132 enum ImmTy {
133 ImmTyNone,
134 ImmTyGDS,
135 ImmTyLDS,
136 ImmTyOffen,
137 ImmTyIdxen,
138 ImmTyAddr64,
139 ImmTyOffset,
140 ImmTyInstOffset,
141 ImmTyOffset0,
142 ImmTyOffset1,
143 ImmTyDLC,
144 ImmTyGLC,
145 ImmTySLC,
146 ImmTyTFE,
147 ImmTyD16,
148 ImmTyClampSI,
149 ImmTyOModSI,
150 ImmTyDPP8,
151 ImmTyDppCtrl,
152 ImmTyDppRowMask,
153 ImmTyDppBankMask,
154 ImmTyDppBoundCtrl,
155 ImmTyDppFi,
156 ImmTySdwaDstSel,
157 ImmTySdwaSrc0Sel,
158 ImmTySdwaSrc1Sel,
159 ImmTySdwaDstUnused,
160 ImmTyDMask,
161 ImmTyDim,
162 ImmTyUNorm,
163 ImmTyDA,
164 ImmTyR128A16,
165 ImmTyLWE,
166 ImmTyExpTgt,
167 ImmTyExpCompr,
168 ImmTyExpVM,
169 ImmTyFORMAT,
170 ImmTyHwreg,
171 ImmTyOff,
172 ImmTySendMsg,
173 ImmTyInterpSlot,
174 ImmTyInterpAttr,
175 ImmTyAttrChan,
176 ImmTyOpSel,
177 ImmTyOpSelHi,
178 ImmTyNegLo,
179 ImmTyNegHi,
180 ImmTySwizzle,
181 ImmTyGprIdxMode,
182 ImmTyHigh,
183 ImmTyBLGP,
184 ImmTyCBSZ,
185 ImmTyABID,
186 ImmTyEndpgm,
189 private:
190 struct TokOp {
191 const char *Data;
192 unsigned Length;
195 struct ImmOp {
196 int64_t Val;
197 ImmTy Type;
198 bool IsFPImm;
199 Modifiers Mods;
202 struct RegOp {
203 unsigned RegNo;
204 Modifiers Mods;
207 union {
208 TokOp Tok;
209 ImmOp Imm;
210 RegOp Reg;
211 const MCExpr *Expr;
214 public:
215 bool isToken() const override {
216 if (Kind == Token)
217 return true;
219 // When parsing operands, we can't always tell if something was meant to be
220 // a token, like 'gds', or an expression that references a global variable.
221 // In this case, we assume the string is an expression, and if we need to
222 // interpret is a token, then we treat the symbol name as the token.
223 return isSymbolRefExpr();
226 bool isSymbolRefExpr() const {
227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 bool isImm() const override {
231 return Kind == Immediate;
234 bool isInlinableImm(MVT type) const;
235 bool isLiteralImm(MVT type) const;
237 bool isRegKind() const {
238 return Kind == Register;
241 bool isReg() const override {
242 return isRegKind() && !hasModifiers();
245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249 bool isRegOrImmWithInt16InputMods() const {
250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253 bool isRegOrImmWithInt32InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257 bool isRegOrImmWithInt64InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261 bool isRegOrImmWithFP16InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265 bool isRegOrImmWithFP32InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269 bool isRegOrImmWithFP64InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273 bool isVReg() const {
274 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275 isRegClass(AMDGPU::VReg_64RegClassID) ||
276 isRegClass(AMDGPU::VReg_96RegClassID) ||
277 isRegClass(AMDGPU::VReg_128RegClassID) ||
278 isRegClass(AMDGPU::VReg_160RegClassID) ||
279 isRegClass(AMDGPU::VReg_256RegClassID) ||
280 isRegClass(AMDGPU::VReg_512RegClassID) ||
281 isRegClass(AMDGPU::VReg_1024RegClassID);
284 bool isVReg32() const {
285 return isRegClass(AMDGPU::VGPR_32RegClassID);
288 bool isVReg32OrOff() const {
289 return isOff() || isVReg32();
292 bool isSDWAOperand(MVT type) const;
293 bool isSDWAFP16Operand() const;
294 bool isSDWAFP32Operand() const;
295 bool isSDWAInt16Operand() const;
296 bool isSDWAInt32Operand() const;
298 bool isImmTy(ImmTy ImmT) const {
299 return isImm() && Imm.Type == ImmT;
302 bool isImmModifier() const {
303 return isImm() && Imm.Type != ImmTyNone;
306 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308 bool isDMask() const { return isImmTy(ImmTyDMask); }
309 bool isDim() const { return isImmTy(ImmTyDim); }
310 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311 bool isDA() const { return isImmTy(ImmTyDA); }
312 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313 bool isLWE() const { return isImmTy(ImmTyLWE); }
314 bool isOff() const { return isImmTy(ImmTyOff); }
315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318 bool isOffen() const { return isImmTy(ImmTyOffen); }
319 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326 bool isGDS() const { return isImmTy(ImmTyGDS); }
327 bool isLDS() const { return isImmTy(ImmTyLDS); }
328 bool isDLC() const { return isImmTy(ImmTyDLC); }
329 bool isGLC() const { return isImmTy(ImmTyGLC); }
330 bool isSLC() const { return isImmTy(ImmTySLC); }
331 bool isTFE() const { return isImmTy(ImmTyTFE); }
332 bool isD16() const { return isImmTy(ImmTyD16); }
333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337 bool isFI() const { return isImmTy(ImmTyDppFi); }
338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349 bool isHigh() const { return isImmTy(ImmTyHigh); }
351 bool isMod() const {
352 return isClampSI() || isOModSI();
355 bool isRegOrImm() const {
356 return isReg() || isImm();
359 bool isRegClass(unsigned RCID) const;
361 bool isInlineValue() const;
363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
367 bool isSCSrcB16() const {
368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
371 bool isSCSrcV2B16() const {
372 return isSCSrcB16();
375 bool isSCSrcB32() const {
376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
379 bool isSCSrcB64() const {
380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
383 bool isBoolReg() const;
385 bool isSCSrcF16() const {
386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
389 bool isSCSrcV2F16() const {
390 return isSCSrcF16();
393 bool isSCSrcF32() const {
394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
397 bool isSCSrcF64() const {
398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
401 bool isSSrcB32() const {
402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
405 bool isSSrcB16() const {
406 return isSCSrcB16() || isLiteralImm(MVT::i16);
409 bool isSSrcV2B16() const {
410 llvm_unreachable("cannot happen");
411 return isSSrcB16();
414 bool isSSrcB64() const {
415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416 // See isVSrc64().
417 return isSCSrcB64() || isLiteralImm(MVT::i64);
420 bool isSSrcF32() const {
421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
424 bool isSSrcF64() const {
425 return isSCSrcB64() || isLiteralImm(MVT::f64);
428 bool isSSrcF16() const {
429 return isSCSrcB16() || isLiteralImm(MVT::f16);
432 bool isSSrcV2F16() const {
433 llvm_unreachable("cannot happen");
434 return isSSrcF16();
437 bool isSSrcOrLdsB32() const {
438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439 isLiteralImm(MVT::i32) || isExpr();
442 bool isVCSrcB32() const {
443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
446 bool isVCSrcB64() const {
447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
450 bool isVCSrcB16() const {
451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
454 bool isVCSrcV2B16() const {
455 return isVCSrcB16();
458 bool isVCSrcF32() const {
459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
462 bool isVCSrcF64() const {
463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
466 bool isVCSrcF16() const {
467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
470 bool isVCSrcV2F16() const {
471 return isVCSrcF16();
474 bool isVSrcB32() const {
475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
478 bool isVSrcB64() const {
479 return isVCSrcF64() || isLiteralImm(MVT::i64);
482 bool isVSrcB16() const {
483 return isVCSrcF16() || isLiteralImm(MVT::i16);
486 bool isVSrcV2B16() const {
487 return isVSrcB16() || isLiteralImm(MVT::v2i16);
490 bool isVSrcF32() const {
491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
494 bool isVSrcF64() const {
495 return isVCSrcF64() || isLiteralImm(MVT::f64);
498 bool isVSrcF16() const {
499 return isVCSrcF16() || isLiteralImm(MVT::f16);
502 bool isVSrcV2F16() const {
503 return isVSrcF16() || isLiteralImm(MVT::v2f16);
506 bool isVISrcB32() const {
507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
510 bool isVISrcB16() const {
511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
514 bool isVISrcV2B16() const {
515 return isVISrcB16();
518 bool isVISrcF32() const {
519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
522 bool isVISrcF16() const {
523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
526 bool isVISrcV2F16() const {
527 return isVISrcF16() || isVISrcB32();
530 bool isAISrcB32() const {
531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
534 bool isAISrcB16() const {
535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
538 bool isAISrcV2B16() const {
539 return isAISrcB16();
542 bool isAISrcF32() const {
543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
546 bool isAISrcF16() const {
547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
550 bool isAISrcV2F16() const {
551 return isAISrcF16() || isAISrcB32();
554 bool isAISrc_128B32() const {
555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
558 bool isAISrc_128B16() const {
559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
562 bool isAISrc_128V2B16() const {
563 return isAISrc_128B16();
566 bool isAISrc_128F32() const {
567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
570 bool isAISrc_128F16() const {
571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
574 bool isAISrc_128V2F16() const {
575 return isAISrc_128F16() || isAISrc_128B32();
578 bool isAISrc_512B32() const {
579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
582 bool isAISrc_512B16() const {
583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
586 bool isAISrc_512V2B16() const {
587 return isAISrc_512B16();
590 bool isAISrc_512F32() const {
591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
594 bool isAISrc_512F16() const {
595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
598 bool isAISrc_512V2F16() const {
599 return isAISrc_512F16() || isAISrc_512B32();
602 bool isAISrc_1024B32() const {
603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
606 bool isAISrc_1024B16() const {
607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
610 bool isAISrc_1024V2B16() const {
611 return isAISrc_1024B16();
614 bool isAISrc_1024F32() const {
615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
618 bool isAISrc_1024F16() const {
619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
622 bool isAISrc_1024V2F16() const {
623 return isAISrc_1024F16() || isAISrc_1024B32();
626 bool isKImmFP32() const {
627 return isLiteralImm(MVT::f32);
630 bool isKImmFP16() const {
631 return isLiteralImm(MVT::f16);
634 bool isMem() const override {
635 return false;
638 bool isExpr() const {
639 return Kind == Expression;
642 bool isSoppBrTarget() const {
643 return isExpr() || isImm();
646 bool isSWaitCnt() const;
647 bool isHwreg() const;
648 bool isSendMsg() const;
649 bool isSwizzle() const;
650 bool isSMRDOffset8() const;
651 bool isSMRDOffset20() const;
652 bool isSMRDLiteralOffset() const;
653 bool isDPP8() const;
654 bool isDPPCtrl() const;
655 bool isBLGP() const;
656 bool isCBSZ() const;
657 bool isABID() const;
658 bool isGPRIdxMode() const;
659 bool isS16Imm() const;
660 bool isU16Imm() const;
661 bool isEndpgm() const;
663 StringRef getExpressionAsToken() const {
664 assert(isExpr());
665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666 return S->getSymbol().getName();
669 StringRef getToken() const {
670 assert(isToken());
672 if (Kind == Expression)
673 return getExpressionAsToken();
675 return StringRef(Tok.Data, Tok.Length);
678 int64_t getImm() const {
679 assert(isImm());
680 return Imm.Val;
683 ImmTy getImmTy() const {
684 assert(isImm());
685 return Imm.Type;
688 unsigned getReg() const override {
689 assert(isRegKind());
690 return Reg.RegNo;
693 SMLoc getStartLoc() const override {
694 return StartLoc;
697 SMLoc getEndLoc() const override {
698 return EndLoc;
701 SMRange getLocRange() const {
702 return SMRange(StartLoc, EndLoc);
705 Modifiers getModifiers() const {
706 assert(isRegKind() || isImmTy(ImmTyNone));
707 return isRegKind() ? Reg.Mods : Imm.Mods;
710 void setModifiers(Modifiers Mods) {
711 assert(isRegKind() || isImmTy(ImmTyNone));
712 if (isRegKind())
713 Reg.Mods = Mods;
714 else
715 Imm.Mods = Mods;
718 bool hasModifiers() const {
719 return getModifiers().hasModifiers();
722 bool hasFPModifiers() const {
723 return getModifiers().hasFPModifiers();
726 bool hasIntModifiers() const {
727 return getModifiers().hasIntModifiers();
730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
736 template <unsigned Bitwidth>
737 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740 addKImmFPOperands<16>(Inst, N);
743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744 addKImmFPOperands<32>(Inst, N);
747 void addRegOperands(MCInst &Inst, unsigned N) const;
749 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750 addRegOperands(Inst, N);
753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754 if (isRegKind())
755 addRegOperands(Inst, N);
756 else if (isExpr())
757 Inst.addOperand(MCOperand::createExpr(Expr));
758 else
759 addImmOperands(Inst, N);
762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763 Modifiers Mods = getModifiers();
764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765 if (isRegKind()) {
766 addRegOperands(Inst, N);
767 } else {
768 addImmOperands(Inst, N, false);
772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773 assert(!hasIntModifiers());
774 addRegOrImmWithInputModsOperands(Inst, N);
777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778 assert(!hasFPModifiers());
779 addRegOrImmWithInputModsOperands(Inst, N);
782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783 Modifiers Mods = getModifiers();
784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785 assert(isRegKind());
786 addRegOperands(Inst, N);
789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790 assert(!hasIntModifiers());
791 addRegWithInputModsOperands(Inst, N);
794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795 assert(!hasFPModifiers());
796 addRegWithInputModsOperands(Inst, N);
799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800 if (isImm())
801 addImmOperands(Inst, N);
802 else {
803 assert(isExpr());
804 Inst.addOperand(MCOperand::createExpr(Expr));
808 static void printImmTy(raw_ostream& OS, ImmTy Type) {
809 switch (Type) {
810 case ImmTyNone: OS << "None"; break;
811 case ImmTyGDS: OS << "GDS"; break;
812 case ImmTyLDS: OS << "LDS"; break;
813 case ImmTyOffen: OS << "Offen"; break;
814 case ImmTyIdxen: OS << "Idxen"; break;
815 case ImmTyAddr64: OS << "Addr64"; break;
816 case ImmTyOffset: OS << "Offset"; break;
817 case ImmTyInstOffset: OS << "InstOffset"; break;
818 case ImmTyOffset0: OS << "Offset0"; break;
819 case ImmTyOffset1: OS << "Offset1"; break;
820 case ImmTyDLC: OS << "DLC"; break;
821 case ImmTyGLC: OS << "GLC"; break;
822 case ImmTySLC: OS << "SLC"; break;
823 case ImmTyTFE: OS << "TFE"; break;
824 case ImmTyD16: OS << "D16"; break;
825 case ImmTyFORMAT: OS << "FORMAT"; break;
826 case ImmTyClampSI: OS << "ClampSI"; break;
827 case ImmTyOModSI: OS << "OModSI"; break;
828 case ImmTyDPP8: OS << "DPP8"; break;
829 case ImmTyDppCtrl: OS << "DppCtrl"; break;
830 case ImmTyDppRowMask: OS << "DppRowMask"; break;
831 case ImmTyDppBankMask: OS << "DppBankMask"; break;
832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833 case ImmTyDppFi: OS << "FI"; break;
834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838 case ImmTyDMask: OS << "DMask"; break;
839 case ImmTyDim: OS << "Dim"; break;
840 case ImmTyUNorm: OS << "UNorm"; break;
841 case ImmTyDA: OS << "DA"; break;
842 case ImmTyR128A16: OS << "R128A16"; break;
843 case ImmTyLWE: OS << "LWE"; break;
844 case ImmTyOff: OS << "Off"; break;
845 case ImmTyExpTgt: OS << "ExpTgt"; break;
846 case ImmTyExpCompr: OS << "ExpCompr"; break;
847 case ImmTyExpVM: OS << "ExpVM"; break;
848 case ImmTyHwreg: OS << "Hwreg"; break;
849 case ImmTySendMsg: OS << "SendMsg"; break;
850 case ImmTyInterpSlot: OS << "InterpSlot"; break;
851 case ImmTyInterpAttr: OS << "InterpAttr"; break;
852 case ImmTyAttrChan: OS << "AttrChan"; break;
853 case ImmTyOpSel: OS << "OpSel"; break;
854 case ImmTyOpSelHi: OS << "OpSelHi"; break;
855 case ImmTyNegLo: OS << "NegLo"; break;
856 case ImmTyNegHi: OS << "NegHi"; break;
857 case ImmTySwizzle: OS << "Swizzle"; break;
858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859 case ImmTyHigh: OS << "High"; break;
860 case ImmTyBLGP: OS << "BLGP"; break;
861 case ImmTyCBSZ: OS << "CBSZ"; break;
862 case ImmTyABID: OS << "ABID"; break;
863 case ImmTyEndpgm: OS << "Endpgm"; break;
867 void print(raw_ostream &OS) const override {
868 switch (Kind) {
869 case Register:
870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871 break;
872 case Immediate:
873 OS << '<' << getImm();
874 if (getImmTy() != ImmTyNone) {
875 OS << " type: "; printImmTy(OS, getImmTy());
877 OS << " mods: " << Imm.Mods << '>';
878 break;
879 case Token:
880 OS << '\'' << getToken() << '\'';
881 break;
882 case Expression:
883 OS << "<expr " << *Expr << '>';
884 break;
888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889 int64_t Val, SMLoc Loc,
890 ImmTy Type = ImmTyNone,
891 bool IsFPImm = false) {
892 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893 Op->Imm.Val = Val;
894 Op->Imm.IsFPImm = IsFPImm;
895 Op->Imm.Type = Type;
896 Op->Imm.Mods = Modifiers();
897 Op->StartLoc = Loc;
898 Op->EndLoc = Loc;
899 return Op;
902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903 StringRef Str, SMLoc Loc,
904 bool HasExplicitEncodingSize = true) {
905 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906 Res->Tok.Data = Str.data();
907 Res->Tok.Length = Str.size();
908 Res->StartLoc = Loc;
909 Res->EndLoc = Loc;
910 return Res;
913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914 unsigned RegNo, SMLoc S,
915 SMLoc E) {
916 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917 Op->Reg.RegNo = RegNo;
918 Op->Reg.Mods = Modifiers();
919 Op->StartLoc = S;
920 Op->EndLoc = E;
921 return Op;
924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925 const class MCExpr *Expr, SMLoc S) {
926 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927 Op->Expr = Expr;
928 Op->StartLoc = S;
929 Op->EndLoc = S;
930 return Op;
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936 return OS;
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947 int SgprIndexUnusedMin = -1;
948 int VgprIndexUnusedMin = -1;
949 MCContext *Ctx = nullptr;
951 void usesSgprAt(int i) {
952 if (i >= SgprIndexUnusedMin) {
953 SgprIndexUnusedMin = ++i;
954 if (Ctx) {
955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
961 void usesVgprAt(int i) {
962 if (i >= VgprIndexUnusedMin) {
963 VgprIndexUnusedMin = ++i;
964 if (Ctx) {
965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
971 public:
972 KernelScopeInfo() = default;
974 void initialize(MCContext &Context) {
975 Ctx = &Context;
976 usesSgprAt(SgprIndexUnusedMin = -1);
977 usesVgprAt(VgprIndexUnusedMin = -1);
980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981 switch (RegKind) {
982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983 case IS_AGPR: // fall through
984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985 default: break;
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991 MCAsmParser &Parser;
993 // Number of extra operands parsed after the first optional operand.
994 // This may be necessary to skip hardcoded mandatory operands.
995 static const unsigned MAX_OPR_LOOKAHEAD = 8;
997 unsigned ForcedEncodingSize = 0;
998 bool ForcedDPP = false;
999 bool ForcedSDWA = false;
1000 KernelScopeInfo KernelScope;
1002 /// @name Auto-generated Match Functions
1003 /// {
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1008 /// }
1010 private:
1011 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012 bool OutOfRangeError(SMRange Range);
1013 /// Calculate VGPR/SGPR blocks required for given target, reserved
1014 /// registers, and user-specified NextFreeXGPR values.
1016 /// \param Features [in] Target features, used for bug corrections.
1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021 /// descriptor field, if valid.
1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026 /// \param VGPRBlocks [out] Result VGPR block count.
1027 /// \param SGPRBlocks [out] Result SGPR block count.
1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029 bool FlatScrUsed, bool XNACKUsed,
1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031 SMRange VGPRRange, unsigned NextFreeSGPR,
1032 SMRange SGPRRange, unsigned &VGPRBlocks,
1033 unsigned &SGPRBlocks);
1034 bool ParseDirectiveAMDGCNTarget();
1035 bool ParseDirectiveAMDHSAKernel();
1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037 bool ParseDirectiveHSACodeObjectVersion();
1038 bool ParseDirectiveHSACodeObjectISA();
1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040 bool ParseDirectiveAMDKernelCodeT();
1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042 bool ParseDirectiveAMDGPUHsaKernel();
1044 bool ParseDirectiveISAVersion();
1045 bool ParseDirectiveHSAMetadata();
1046 bool ParseDirectivePALMetadataBegin();
1047 bool ParseDirectivePALMetadata();
1048 bool ParseDirectiveAMDGPULDS();
1050 /// Common code to parse out a block of text (typically YAML) between start and
1051 /// end directives.
1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053 const char *AssemblerDirectiveEnd,
1054 std::string &CollectString);
1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057 RegisterKind RegKind, unsigned Reg1,
1058 unsigned RegNum);
1059 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060 unsigned& RegNum, unsigned& RegWidth,
1061 unsigned *DwordRegIndex);
1062 bool isRegister();
1063 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065 void initializeGprCountSymbol(RegisterKind RegKind);
1066 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067 unsigned RegWidth);
1068 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071 bool IsGdsHardcoded);
1073 public:
1074 enum AMDGPUMatchResultTy {
1075 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1077 enum OperandMode {
1078 OperandMode_Default,
1079 OperandMode_NSA,
1082 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1084 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085 const MCInstrInfo &MII,
1086 const MCTargetOptions &Options)
1087 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088 MCAsmParserExtension::Initialize(Parser);
1090 if (getFeatureBits().none()) {
1091 // Set default features.
1092 copySTI().ToggleFeature("southern-islands");
1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1098 // TODO: make those pre-defined variables read-only.
1099 // Currently there is none suitable machinery in the core llvm-mc for this.
1100 // MCSymbol::isRedefinable is intended for another purpose, and
1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103 MCContext &Ctx = getContext();
1104 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105 MCSymbol *Sym =
1106 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112 } else {
1113 MCSymbol *Sym =
1114 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1121 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122 initializeGprCountSymbol(IS_VGPR);
1123 initializeGprCountSymbol(IS_SGPR);
1124 } else
1125 KernelScope.initialize(getContext());
1129 bool hasXNACK() const {
1130 return AMDGPU::hasXNACK(getSTI());
1133 bool hasMIMG_R128() const {
1134 return AMDGPU::hasMIMG_R128(getSTI());
1137 bool hasPackedD16() const {
1138 return AMDGPU::hasPackedD16(getSTI());
1141 bool isSI() const {
1142 return AMDGPU::isSI(getSTI());
1145 bool isCI() const {
1146 return AMDGPU::isCI(getSTI());
1149 bool isVI() const {
1150 return AMDGPU::isVI(getSTI());
1153 bool isGFX9() const {
1154 return AMDGPU::isGFX9(getSTI());
1157 bool isGFX10() const {
1158 return AMDGPU::isGFX10(getSTI());
1161 bool hasInv2PiInlineImm() const {
1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1165 bool hasFlatOffsets() const {
1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1169 bool hasSGPR102_SGPR103() const {
1170 return !isVI() && !isGFX9();
1173 bool hasSGPR104_SGPR105() const {
1174 return isGFX10();
1177 bool hasIntClamp() const {
1178 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1181 AMDGPUTargetStreamer &getTargetStreamer() {
1182 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183 return static_cast<AMDGPUTargetStreamer &>(TS);
1186 const MCRegisterInfo *getMRI() const {
1187 // We need this const_cast because for some reason getContext() is not const
1188 // in MCAsmParser.
1189 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1192 const MCInstrInfo *getMII() const {
1193 return &MII;
1196 const FeatureBitset &getFeatureBits() const {
1197 return getSTI().getFeatureBits();
1200 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206 bool isForcedDPP() const { return ForcedDPP; }
1207 bool isForcedSDWA() const { return ForcedSDWA; }
1208 ArrayRef<unsigned> getMatchedVariants() const;
1210 std::unique_ptr<AMDGPUOperand> parseRegister();
1211 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214 unsigned Kind) override;
1215 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216 OperandVector &Operands, MCStreamer &Out,
1217 uint64_t &ErrorInfo,
1218 bool MatchingInlineAsm) override;
1219 bool ParseDirective(AsmToken DirectiveID) override;
1220 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221 OperandMode Mode = OperandMode_Default);
1222 StringRef parseMnemonicSuffix(StringRef Name);
1223 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224 SMLoc NameLoc, OperandVector &Operands) override;
1225 //bool ProcessInstruction(MCInst &Inst);
1227 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1229 OperandMatchResultTy
1230 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232 bool (*ConvertResult)(int64_t &) = nullptr);
1234 OperandMatchResultTy
1235 parseOperandArrayWithPrefix(const char *Prefix,
1236 OperandVector &Operands,
1237 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238 bool (*ConvertResult)(int64_t&) = nullptr);
1240 OperandMatchResultTy
1241 parseNamedBit(const char *Name, OperandVector &Operands,
1242 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244 StringRef &Value);
1246 bool isModifier();
1247 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251 bool parseSP3NegModifier();
1252 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253 OperandMatchResultTy parseReg(OperandVector &Operands);
1254 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1262 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1267 bool parseCnt(int64_t &IntVal);
1268 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1271 private:
1272 struct OperandInfoTy {
1273 int64_t Id;
1274 bool IsSymbolic = false;
1275 bool IsDefined = false;
1277 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1280 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281 bool validateSendMsg(const OperandInfoTy &Msg,
1282 const OperandInfoTy &Op,
1283 const OperandInfoTy &Stream,
1284 const SMLoc Loc);
1286 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287 bool validateHwreg(const OperandInfoTy &HwReg,
1288 const int64_t Offset,
1289 const int64_t Width,
1290 const SMLoc Loc);
1292 void errorExpTgt();
1293 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1296 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298 bool validateSOPLiteral(const MCInst &Inst) const;
1299 bool validateConstantBusLimitations(const MCInst &Inst);
1300 bool validateEarlyClobberLimitations(const MCInst &Inst);
1301 bool validateIntClampSupported(const MCInst &Inst);
1302 bool validateMIMGAtomicDMask(const MCInst &Inst);
1303 bool validateMIMGGatherDMask(const MCInst &Inst);
1304 bool validateMIMGDataSize(const MCInst &Inst);
1305 bool validateMIMGAddrSize(const MCInst &Inst);
1306 bool validateMIMGD16(const MCInst &Inst);
1307 bool validateMIMGDim(const MCInst &Inst);
1308 bool validateLdsDirect(const MCInst &Inst);
1309 bool validateOpSel(const MCInst &Inst);
1310 bool validateVccOperand(unsigned Reg) const;
1311 bool validateVOP3Literal(const MCInst &Inst) const;
1312 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1313 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1314 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 bool isId(const StringRef Id) const;
1317 bool isId(const AsmToken &Token, const StringRef Id) const;
1318 bool isToken(const AsmToken::TokenKind Kind) const;
1319 bool trySkipId(const StringRef Id);
1320 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1321 bool trySkipToken(const AsmToken::TokenKind Kind);
1322 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1323 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1324 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1325 AsmToken::TokenKind getTokenKind() const;
1326 bool parseExpr(int64_t &Imm);
1327 bool parseExpr(OperandVector &Operands);
1328 StringRef getTokenStr() const;
1329 AsmToken peekToken();
1330 AsmToken getToken() const;
1331 SMLoc getLoc() const;
1332 void lex();
1334 public:
1335 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1336 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1339 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1340 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1341 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1342 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1343 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1346 const unsigned MinVal,
1347 const unsigned MaxVal,
1348 const StringRef ErrMsg);
1349 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1350 bool parseSwizzleOffset(int64_t &Imm);
1351 bool parseSwizzleMacro(int64_t &Imm);
1352 bool parseSwizzleQuadPerm(int64_t &Imm);
1353 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1354 bool parseSwizzleBroadcast(int64_t &Imm);
1355 bool parseSwizzleSwap(int64_t &Imm);
1356 bool parseSwizzleReverse(int64_t &Imm);
1358 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1359 int64_t parseGPRIdxMacro();
1361 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1362 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1363 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1364 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1365 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 AMDGPUOperand::Ptr defaultDLC() const;
1368 AMDGPUOperand::Ptr defaultGLC() const;
1369 AMDGPUOperand::Ptr defaultSLC() const;
1371 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1372 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1373 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1374 AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1379 OptionalImmIndexMap &OptionalIdx);
1380 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1381 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1382 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1387 bool IsAtomic = false);
1388 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 OperandMatchResultTy parseDim(OperandVector &Operands);
1391 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1392 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1393 AMDGPUOperand::Ptr defaultRowMask() const;
1394 AMDGPUOperand::Ptr defaultBankMask() const;
1395 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1396 AMDGPUOperand::Ptr defaultFI() const;
1397 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1398 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1401 AMDGPUOperand::ImmTy Type);
1402 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1403 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1404 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1405 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1406 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1407 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1408 uint64_t BasicInstType, bool skipVcc = false);
1410 AMDGPUOperand::Ptr defaultBLGP() const;
1411 AMDGPUOperand::Ptr defaultCBSZ() const;
1412 AMDGPUOperand::Ptr defaultABID() const;
1414 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1415 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1418 struct OptionalOperand {
1419 const char *Name;
1420 AMDGPUOperand::ImmTy Type;
1421 bool IsBit;
1422 bool (*ConvertResult)(int64_t&);
1425 } // end anonymous namespace
1427 // May be called with integer type with equivalent bitwidth.
1428 static const fltSemantics *getFltSemantics(unsigned Size) {
1429 switch (Size) {
1430 case 4:
1431 return &APFloat::IEEEsingle();
1432 case 8:
1433 return &APFloat::IEEEdouble();
1434 case 2:
1435 return &APFloat::IEEEhalf();
1436 default:
1437 llvm_unreachable("unsupported fp type");
1441 static const fltSemantics *getFltSemantics(MVT VT) {
1442 return getFltSemantics(VT.getSizeInBits() / 8);
1445 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1446 switch (OperandType) {
1447 case AMDGPU::OPERAND_REG_IMM_INT32:
1448 case AMDGPU::OPERAND_REG_IMM_FP32:
1449 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1450 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1451 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1452 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1453 return &APFloat::IEEEsingle();
1454 case AMDGPU::OPERAND_REG_IMM_INT64:
1455 case AMDGPU::OPERAND_REG_IMM_FP64:
1456 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1457 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1458 return &APFloat::IEEEdouble();
1459 case AMDGPU::OPERAND_REG_IMM_INT16:
1460 case AMDGPU::OPERAND_REG_IMM_FP16:
1461 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1462 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1463 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1464 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1465 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1466 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1467 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1469 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1470 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1471 return &APFloat::IEEEhalf();
1472 default:
1473 llvm_unreachable("unsupported fp type");
1477 //===----------------------------------------------------------------------===//
1478 // Operand
1479 //===----------------------------------------------------------------------===//
1481 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1482 bool Lost;
1484 // Convert literal to single precision
1485 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1486 APFloat::rmNearestTiesToEven,
1487 &Lost);
1488 // We allow precision lost but not overflow or underflow
1489 if (Status != APFloat::opOK &&
1490 Lost &&
1491 ((Status & APFloat::opOverflow) != 0 ||
1492 (Status & APFloat::opUnderflow) != 0)) {
1493 return false;
1496 return true;
1499 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1500 return isUIntN(Size, Val) || isIntN(Size, Val);
1503 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 // This is a hack to enable named inline values like
1506 // shared_base with both 32-bit and 64-bit operands.
1507 // Note that these values are defined as
1508 // 32-bit operands only.
1509 if (isInlineValue()) {
1510 return true;
1513 if (!isImmTy(ImmTyNone)) {
1514 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1515 return false;
1517 // TODO: We should avoid using host float here. It would be better to
1518 // check the float bit values which is what a few other places do.
1519 // We've had bot failures before due to weird NaN support on mips hosts.
1521 APInt Literal(64, Imm.Val);
1523 if (Imm.IsFPImm) { // We got fp literal token
1524 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1525 return AMDGPU::isInlinableLiteral64(Imm.Val,
1526 AsmParser->hasInv2PiInlineImm());
1529 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1530 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1531 return false;
1533 if (type.getScalarSizeInBits() == 16) {
1534 return AMDGPU::isInlinableLiteral16(
1535 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1536 AsmParser->hasInv2PiInlineImm());
1539 // Check if single precision literal is inlinable
1540 return AMDGPU::isInlinableLiteral32(
1541 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1542 AsmParser->hasInv2PiInlineImm());
1545 // We got int literal token.
1546 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1547 return AMDGPU::isInlinableLiteral64(Imm.Val,
1548 AsmParser->hasInv2PiInlineImm());
1551 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1552 return false;
1555 if (type.getScalarSizeInBits() == 16) {
1556 return AMDGPU::isInlinableLiteral16(
1557 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1558 AsmParser->hasInv2PiInlineImm());
1561 return AMDGPU::isInlinableLiteral32(
1562 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1563 AsmParser->hasInv2PiInlineImm());
1566 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1567 // Check that this immediate can be added as literal
1568 if (!isImmTy(ImmTyNone)) {
1569 return false;
1572 if (!Imm.IsFPImm) {
1573 // We got int literal token.
1575 if (type == MVT::f64 && hasFPModifiers()) {
1576 // Cannot apply fp modifiers to int literals preserving the same semantics
1577 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1578 // disable these cases.
1579 return false;
1582 unsigned Size = type.getSizeInBits();
1583 if (Size == 64)
1584 Size = 32;
1586 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1587 // types.
1588 return isSafeTruncation(Imm.Val, Size);
1591 // We got fp literal token
1592 if (type == MVT::f64) { // Expected 64-bit fp operand
1593 // We would set low 64-bits of literal to zeroes but we accept this literals
1594 return true;
1597 if (type == MVT::i64) { // Expected 64-bit int operand
1598 // We don't allow fp literals in 64-bit integer instructions. It is
1599 // unclear how we should encode them.
1600 return false;
1603 // We allow fp literals with f16x2 operands assuming that the specified
1604 // literal goes into the lower half and the upper half is zero. We also
1605 // require that the literal may be losslesly converted to f16.
1606 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1607 (type == MVT::v2i16)? MVT::i16 : type;
1609 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1610 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1613 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1614 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1617 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1618 if (AsmParser->isVI())
1619 return isVReg32();
1620 else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1621 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1622 else
1623 return false;
1626 bool AMDGPUOperand::isSDWAFP16Operand() const {
1627 return isSDWAOperand(MVT::f16);
1630 bool AMDGPUOperand::isSDWAFP32Operand() const {
1631 return isSDWAOperand(MVT::f32);
1634 bool AMDGPUOperand::isSDWAInt16Operand() const {
1635 return isSDWAOperand(MVT::i16);
1638 bool AMDGPUOperand::isSDWAInt32Operand() const {
1639 return isSDWAOperand(MVT::i32);
1642 bool AMDGPUOperand::isBoolReg() const {
1643 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1644 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1647 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1650 assert(Size == 2 || Size == 4 || Size == 8);
1652 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 if (Imm.Mods.Abs) {
1655 Val &= ~FpSignMask;
1657 if (Imm.Mods.Neg) {
1658 Val ^= FpSignMask;
1661 return Val;
1664 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1665 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1666 Inst.getNumOperands())) {
1667 addLiteralImmOperand(Inst, Imm.Val,
1668 ApplyModifiers &
1669 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1670 } else {
1671 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1672 Inst.addOperand(MCOperand::createImm(Imm.Val));
1676 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1677 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1678 auto OpNum = Inst.getNumOperands();
1679 // Check that this operand accepts literals
1680 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 if (ApplyModifiers) {
1683 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1684 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1685 Val = applyInputFPModifiers(Val, Size);
1688 APInt Literal(64, Val);
1689 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 if (Imm.IsFPImm) { // We got fp literal token
1692 switch (OpTy) {
1693 case AMDGPU::OPERAND_REG_IMM_INT64:
1694 case AMDGPU::OPERAND_REG_IMM_FP64:
1695 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1696 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1697 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1698 AsmParser->hasInv2PiInlineImm())) {
1699 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1700 return;
1703 // Non-inlineable
1704 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1705 // For fp operands we check if low 32 bits are zeros
1706 if (Literal.getLoBits(32) != 0) {
1707 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1708 "Can't encode literal as exact 64-bit floating-point operand. "
1709 "Low 32-bits will be set to zero");
1712 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1713 return;
1716 // We don't allow fp literals in 64-bit integer instructions. It is
1717 // unclear how we should encode them. This case should be checked earlier
1718 // in predicate methods (isLiteralImm())
1719 llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 case AMDGPU::OPERAND_REG_IMM_INT32:
1722 case AMDGPU::OPERAND_REG_IMM_FP32:
1723 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1724 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1727 case AMDGPU::OPERAND_REG_IMM_INT16:
1728 case AMDGPU::OPERAND_REG_IMM_FP16:
1729 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1730 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1731 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1732 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1733 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1734 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1735 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1737 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1738 case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1739 bool lost;
1740 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1741 // Convert literal to single precision
1742 FPLiteral.convert(*getOpFltSemantics(OpTy),
1743 APFloat::rmNearestTiesToEven, &lost);
1744 // We allow precision lost but not overflow or underflow. This should be
1745 // checked earlier in isLiteralImm()
1747 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1748 Inst.addOperand(MCOperand::createImm(ImmVal));
1749 return;
1751 default:
1752 llvm_unreachable("invalid operand size");
1755 return;
1758 // We got int literal token.
1759 // Only sign extend inline immediates.
1760 switch (OpTy) {
1761 case AMDGPU::OPERAND_REG_IMM_INT32:
1762 case AMDGPU::OPERAND_REG_IMM_FP32:
1763 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1764 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1765 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1766 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1767 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1768 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1769 if (isSafeTruncation(Val, 32) &&
1770 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1771 AsmParser->hasInv2PiInlineImm())) {
1772 Inst.addOperand(MCOperand::createImm(Val));
1773 return;
1776 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1777 return;
1779 case AMDGPU::OPERAND_REG_IMM_INT64:
1780 case AMDGPU::OPERAND_REG_IMM_FP64:
1781 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1782 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1783 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1784 Inst.addOperand(MCOperand::createImm(Val));
1785 return;
1788 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1789 return;
1791 case AMDGPU::OPERAND_REG_IMM_INT16:
1792 case AMDGPU::OPERAND_REG_IMM_FP16:
1793 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1794 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1795 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1796 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1797 if (isSafeTruncation(Val, 16) &&
1798 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1799 AsmParser->hasInv2PiInlineImm())) {
1800 Inst.addOperand(MCOperand::createImm(Val));
1801 return;
1804 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1805 return;
1807 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1808 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1809 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1811 assert(isSafeTruncation(Val, 16));
1812 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1813 AsmParser->hasInv2PiInlineImm()));
1815 Inst.addOperand(MCOperand::createImm(Val));
1816 return;
1818 default:
1819 llvm_unreachable("invalid operand size");
1823 template <unsigned Bitwidth>
1824 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1825 APInt Literal(64, Imm.Val);
1827 if (!Imm.IsFPImm) {
1828 // We got int literal token.
1829 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1830 return;
1833 bool Lost;
1834 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1835 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1836 APFloat::rmNearestTiesToEven, &Lost);
1837 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1840 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1841 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1844 static bool isInlineValue(unsigned Reg) {
1845 switch (Reg) {
1846 case AMDGPU::SRC_SHARED_BASE:
1847 case AMDGPU::SRC_SHARED_LIMIT:
1848 case AMDGPU::SRC_PRIVATE_BASE:
1849 case AMDGPU::SRC_PRIVATE_LIMIT:
1850 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1851 return true;
1852 case AMDGPU::SRC_VCCZ:
1853 case AMDGPU::SRC_EXECZ:
1854 case AMDGPU::SRC_SCC:
1855 return true;
1856 default:
1857 return false;
1861 bool AMDGPUOperand::isInlineValue() const {
1862 return isRegKind() && ::isInlineValue(getReg());
1865 //===----------------------------------------------------------------------===//
1866 // AsmParser
1867 //===----------------------------------------------------------------------===//
1869 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1870 if (Is == IS_VGPR) {
1871 switch (RegWidth) {
1872 default: return -1;
1873 case 1: return AMDGPU::VGPR_32RegClassID;
1874 case 2: return AMDGPU::VReg_64RegClassID;
1875 case 3: return AMDGPU::VReg_96RegClassID;
1876 case 4: return AMDGPU::VReg_128RegClassID;
1877 case 5: return AMDGPU::VReg_160RegClassID;
1878 case 8: return AMDGPU::VReg_256RegClassID;
1879 case 16: return AMDGPU::VReg_512RegClassID;
1880 case 32: return AMDGPU::VReg_1024RegClassID;
1882 } else if (Is == IS_TTMP) {
1883 switch (RegWidth) {
1884 default: return -1;
1885 case 1: return AMDGPU::TTMP_32RegClassID;
1886 case 2: return AMDGPU::TTMP_64RegClassID;
1887 case 4: return AMDGPU::TTMP_128RegClassID;
1888 case 8: return AMDGPU::TTMP_256RegClassID;
1889 case 16: return AMDGPU::TTMP_512RegClassID;
1891 } else if (Is == IS_SGPR) {
1892 switch (RegWidth) {
1893 default: return -1;
1894 case 1: return AMDGPU::SGPR_32RegClassID;
1895 case 2: return AMDGPU::SGPR_64RegClassID;
1896 case 4: return AMDGPU::SGPR_128RegClassID;
1897 case 8: return AMDGPU::SGPR_256RegClassID;
1898 case 16: return AMDGPU::SGPR_512RegClassID;
1900 } else if (Is == IS_AGPR) {
1901 switch (RegWidth) {
1902 default: return -1;
1903 case 1: return AMDGPU::AGPR_32RegClassID;
1904 case 2: return AMDGPU::AReg_64RegClassID;
1905 case 4: return AMDGPU::AReg_128RegClassID;
1906 case 16: return AMDGPU::AReg_512RegClassID;
1907 case 32: return AMDGPU::AReg_1024RegClassID;
1910 return -1;
1913 static unsigned getSpecialRegForName(StringRef RegName) {
1914 return StringSwitch<unsigned>(RegName)
1915 .Case("exec", AMDGPU::EXEC)
1916 .Case("vcc", AMDGPU::VCC)
1917 .Case("flat_scratch", AMDGPU::FLAT_SCR)
1918 .Case("xnack_mask", AMDGPU::XNACK_MASK)
1919 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1920 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1921 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1922 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1923 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1924 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1925 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1926 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1927 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1928 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1929 .Case("lds_direct", AMDGPU::LDS_DIRECT)
1930 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1931 .Case("m0", AMDGPU::M0)
1932 .Case("vccz", AMDGPU::SRC_VCCZ)
1933 .Case("src_vccz", AMDGPU::SRC_VCCZ)
1934 .Case("execz", AMDGPU::SRC_EXECZ)
1935 .Case("src_execz", AMDGPU::SRC_EXECZ)
1936 .Case("scc", AMDGPU::SRC_SCC)
1937 .Case("src_scc", AMDGPU::SRC_SCC)
1938 .Case("tba", AMDGPU::TBA)
1939 .Case("tma", AMDGPU::TMA)
1940 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1941 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1942 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1943 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1944 .Case("vcc_lo", AMDGPU::VCC_LO)
1945 .Case("vcc_hi", AMDGPU::VCC_HI)
1946 .Case("exec_lo", AMDGPU::EXEC_LO)
1947 .Case("exec_hi", AMDGPU::EXEC_HI)
1948 .Case("tma_lo", AMDGPU::TMA_LO)
1949 .Case("tma_hi", AMDGPU::TMA_HI)
1950 .Case("tba_lo", AMDGPU::TBA_LO)
1951 .Case("tba_hi", AMDGPU::TBA_HI)
1952 .Case("null", AMDGPU::SGPR_NULL)
1953 .Default(0);
1956 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1957 SMLoc &EndLoc) {
1958 auto R = parseRegister();
1959 if (!R) return true;
1960 assert(R->isReg());
1961 RegNo = R->getReg();
1962 StartLoc = R->getStartLoc();
1963 EndLoc = R->getEndLoc();
1964 return false;
1967 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1968 RegisterKind RegKind, unsigned Reg1,
1969 unsigned RegNum) {
1970 switch (RegKind) {
1971 case IS_SPECIAL:
1972 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1973 Reg = AMDGPU::EXEC;
1974 RegWidth = 2;
1975 return true;
1977 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1978 Reg = AMDGPU::FLAT_SCR;
1979 RegWidth = 2;
1980 return true;
1982 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1983 Reg = AMDGPU::XNACK_MASK;
1984 RegWidth = 2;
1985 return true;
1987 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1988 Reg = AMDGPU::VCC;
1989 RegWidth = 2;
1990 return true;
1992 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1993 Reg = AMDGPU::TBA;
1994 RegWidth = 2;
1995 return true;
1997 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1998 Reg = AMDGPU::TMA;
1999 RegWidth = 2;
2000 return true;
2002 return false;
2003 case IS_VGPR:
2004 case IS_SGPR:
2005 case IS_AGPR:
2006 case IS_TTMP:
2007 if (Reg1 != Reg + RegWidth) {
2008 return false;
2010 RegWidth++;
2011 return true;
2012 default:
2013 llvm_unreachable("unexpected register kind");
2017 static const StringRef Registers[] = {
2018 { "v" },
2019 { "s" },
2020 { "ttmp" },
2021 { "acc" },
2022 { "a" },
2025 bool
2026 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2027 const AsmToken &NextToken) const {
2029 // A list of consecutive registers: [s0,s1,s2,s3]
2030 if (Token.is(AsmToken::LBrac))
2031 return true;
2033 if (!Token.is(AsmToken::Identifier))
2034 return false;
2036 // A single register like s0 or a range of registers like s[0:1]
2038 StringRef RegName = Token.getString();
2040 for (StringRef Reg : Registers) {
2041 if (RegName.startswith(Reg)) {
2042 if (Reg.size() < RegName.size()) {
2043 unsigned RegNum;
2044 // A single register with an index: rXX
2045 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2046 return true;
2047 } else {
2048 // A range of registers: r[XX:YY].
2049 if (NextToken.is(AsmToken::LBrac))
2050 return true;
2055 return getSpecialRegForName(RegName);
2058 bool
2059 AMDGPUAsmParser::isRegister()
2061 return isRegister(getToken(), peekToken());
2064 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2065 unsigned &RegNum, unsigned &RegWidth,
2066 unsigned *DwordRegIndex) {
2067 if (DwordRegIndex) { *DwordRegIndex = 0; }
2068 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2069 if (getLexer().is(AsmToken::Identifier)) {
2070 StringRef RegName = Parser.getTok().getString();
2071 if ((Reg = getSpecialRegForName(RegName))) {
2072 Parser.Lex();
2073 RegKind = IS_SPECIAL;
2074 } else {
2075 unsigned RegNumIndex = 0;
2076 if (RegName[0] == 'v') {
2077 RegNumIndex = 1;
2078 RegKind = IS_VGPR;
2079 } else if (RegName[0] == 's') {
2080 RegNumIndex = 1;
2081 RegKind = IS_SGPR;
2082 } else if (RegName[0] == 'a') {
2083 RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2084 RegKind = IS_AGPR;
2085 } else if (RegName.startswith("ttmp")) {
2086 RegNumIndex = strlen("ttmp");
2087 RegKind = IS_TTMP;
2088 } else {
2089 return false;
2091 if (RegName.size() > RegNumIndex) {
2092 // Single 32-bit register: vXX.
2093 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2094 return false;
2095 Parser.Lex();
2096 RegWidth = 1;
2097 } else {
2098 // Range of registers: v[XX:YY]. ":YY" is optional.
2099 Parser.Lex();
2100 int64_t RegLo, RegHi;
2101 if (getLexer().isNot(AsmToken::LBrac))
2102 return false;
2103 Parser.Lex();
2105 if (getParser().parseAbsoluteExpression(RegLo))
2106 return false;
2108 const bool isRBrace = getLexer().is(AsmToken::RBrac);
2109 if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2110 return false;
2111 Parser.Lex();
2113 if (isRBrace) {
2114 RegHi = RegLo;
2115 } else {
2116 if (getParser().parseAbsoluteExpression(RegHi))
2117 return false;
2119 if (getLexer().isNot(AsmToken::RBrac))
2120 return false;
2121 Parser.Lex();
2123 RegNum = (unsigned) RegLo;
2124 RegWidth = (RegHi - RegLo) + 1;
2127 } else if (getLexer().is(AsmToken::LBrac)) {
2128 // List of consecutive registers: [s0,s1,s2,s3]
2129 Parser.Lex();
2130 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2131 return false;
2132 if (RegWidth != 1)
2133 return false;
2134 RegisterKind RegKind1;
2135 unsigned Reg1, RegNum1, RegWidth1;
2136 do {
2137 if (getLexer().is(AsmToken::Comma)) {
2138 Parser.Lex();
2139 } else if (getLexer().is(AsmToken::RBrac)) {
2140 Parser.Lex();
2141 break;
2142 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2143 if (RegWidth1 != 1) {
2144 return false;
2146 if (RegKind1 != RegKind) {
2147 return false;
2149 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2150 return false;
2152 } else {
2153 return false;
2155 } while (true);
2156 } else {
2157 return false;
2159 switch (RegKind) {
2160 case IS_SPECIAL:
2161 RegNum = 0;
2162 RegWidth = 1;
2163 break;
2164 case IS_VGPR:
2165 case IS_SGPR:
2166 case IS_AGPR:
2167 case IS_TTMP:
2169 unsigned Size = 1;
2170 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2171 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2172 Size = std::min(RegWidth, 4u);
2174 if (RegNum % Size != 0)
2175 return false;
2176 if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2177 RegNum = RegNum / Size;
2178 int RCID = getRegClass(RegKind, RegWidth);
2179 if (RCID == -1)
2180 return false;
2181 const MCRegisterClass RC = TRI->getRegClass(RCID);
2182 if (RegNum >= RC.getNumRegs())
2183 return false;
2184 Reg = RC.getRegister(RegNum);
2185 break;
2188 default:
2189 llvm_unreachable("unexpected register kind");
2192 if (!subtargetHasRegister(*TRI, Reg))
2193 return false;
2194 return true;
2197 Optional<StringRef>
2198 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2199 switch (RegKind) {
2200 case IS_VGPR:
2201 return StringRef(".amdgcn.next_free_vgpr");
2202 case IS_SGPR:
2203 return StringRef(".amdgcn.next_free_sgpr");
2204 default:
2205 return None;
2209 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2210 auto SymbolName = getGprCountSymbolName(RegKind);
2211 assert(SymbolName && "initializing invalid register kind");
2212 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2213 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2216 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2217 unsigned DwordRegIndex,
2218 unsigned RegWidth) {
2219 // Symbols are only defined for GCN targets
2220 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2221 return true;
2223 auto SymbolName = getGprCountSymbolName(RegKind);
2224 if (!SymbolName)
2225 return true;
2226 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2228 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2229 int64_t OldCount;
2231 if (!Sym->isVariable())
2232 return !Error(getParser().getTok().getLoc(),
2233 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2234 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2235 return !Error(
2236 getParser().getTok().getLoc(),
2237 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2239 if (OldCount <= NewMax)
2240 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2242 return true;
2245 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2246 const auto &Tok = Parser.getTok();
2247 SMLoc StartLoc = Tok.getLoc();
2248 SMLoc EndLoc = Tok.getEndLoc();
2249 RegisterKind RegKind;
2250 unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2252 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2253 //FIXME: improve error messages (bug 41303).
2254 Error(StartLoc, "not a valid operand.");
2255 return nullptr;
2257 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2258 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2259 return nullptr;
2260 } else
2261 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2262 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2265 OperandMatchResultTy
2266 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2267 // TODO: add syntactic sugar for 1/(2*PI)
2269 assert(!isRegister());
2270 assert(!isModifier());
2272 const auto& Tok = getToken();
2273 const auto& NextTok = peekToken();
2274 bool IsReal = Tok.is(AsmToken::Real);
2275 SMLoc S = getLoc();
2276 bool Negate = false;
2278 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2279 lex();
2280 IsReal = true;
2281 Negate = true;
2284 if (IsReal) {
2285 // Floating-point expressions are not supported.
2286 // Can only allow floating-point literals with an
2287 // optional sign.
2289 StringRef Num = getTokenStr();
2290 lex();
2292 APFloat RealVal(APFloat::IEEEdouble());
2293 auto roundMode = APFloat::rmNearestTiesToEven;
2294 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2295 return MatchOperand_ParseFail;
2297 if (Negate)
2298 RealVal.changeSign();
2300 Operands.push_back(
2301 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2302 AMDGPUOperand::ImmTyNone, true));
2304 return MatchOperand_Success;
2306 } else {
2307 int64_t IntVal;
2308 const MCExpr *Expr;
2309 SMLoc S = getLoc();
2311 if (HasSP3AbsModifier) {
2312 // This is a workaround for handling expressions
2313 // as arguments of SP3 'abs' modifier, for example:
2314 // |1.0|
2315 // |-1|
2316 // |1+x|
2317 // This syntax is not compatible with syntax of standard
2318 // MC expressions (due to the trailing '|').
2319 SMLoc EndLoc;
2320 if (getParser().parsePrimaryExpr(Expr, EndLoc))
2321 return MatchOperand_ParseFail;
2322 } else {
2323 if (Parser.parseExpression(Expr))
2324 return MatchOperand_ParseFail;
2327 if (Expr->evaluateAsAbsolute(IntVal)) {
2328 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2329 } else {
2330 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2333 return MatchOperand_Success;
2336 return MatchOperand_NoMatch;
2339 OperandMatchResultTy
2340 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2341 if (!isRegister())
2342 return MatchOperand_NoMatch;
2344 if (auto R = parseRegister()) {
2345 assert(R->isReg());
2346 Operands.push_back(std::move(R));
2347 return MatchOperand_Success;
2349 return MatchOperand_ParseFail;
2352 OperandMatchResultTy
2353 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2354 auto res = parseReg(Operands);
2355 if (res != MatchOperand_NoMatch) {
2356 return res;
2357 } else if (isModifier()) {
2358 return MatchOperand_NoMatch;
2359 } else {
2360 return parseImm(Operands, HasSP3AbsMod);
2364 bool
2365 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2366 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2367 const auto &str = Token.getString();
2368 return str == "abs" || str == "neg" || str == "sext";
2370 return false;
2373 bool
2374 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2375 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2378 bool
2379 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2380 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2383 bool
2384 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2385 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2388 // Check if this is an operand modifier or an opcode modifier
2389 // which may look like an expression but it is not. We should
2390 // avoid parsing these modifiers as expressions. Currently
2391 // recognized sequences are:
2392 // |...|
2393 // abs(...)
2394 // neg(...)
2395 // sext(...)
2396 // -reg
2397 // -|...|
2398 // -abs(...)
2399 // name:...
2400 // Note that simple opcode modifiers like 'gds' may be parsed as
2401 // expressions; this is a special case. See getExpressionAsToken.
2403 bool
2404 AMDGPUAsmParser::isModifier() {
2406 AsmToken Tok = getToken();
2407 AsmToken NextToken[2];
2408 peekTokens(NextToken);
2410 return isOperandModifier(Tok, NextToken[0]) ||
2411 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2412 isOpcodeModifierWithVal(Tok, NextToken[0]);
2415 // Check if the current token is an SP3 'neg' modifier.
2416 // Currently this modifier is allowed in the following context:
2418 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2419 // 2. Before an 'abs' modifier: -abs(...)
2420 // 3. Before an SP3 'abs' modifier: -|...|
2422 // In all other cases "-" is handled as a part
2423 // of an expression that follows the sign.
2425 // Note: When "-" is followed by an integer literal,
2426 // this is interpreted as integer negation rather
2427 // than a floating-point NEG modifier applied to N.
2428 // Beside being contr-intuitive, such use of floating-point
2429 // NEG modifier would have resulted in different meaning
2430 // of integer literals used with VOP1/2/C and VOP3,
2431 // for example:
2432 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2433 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2434 // Negative fp literals with preceding "-" are
2435 // handled likewise for unifomtity
2437 bool
2438 AMDGPUAsmParser::parseSP3NegModifier() {
2440 AsmToken NextToken[2];
2441 peekTokens(NextToken);
2443 if (isToken(AsmToken::Minus) &&
2444 (isRegister(NextToken[0], NextToken[1]) ||
2445 NextToken[0].is(AsmToken::Pipe) ||
2446 isId(NextToken[0], "abs"))) {
2447 lex();
2448 return true;
2451 return false;
2454 OperandMatchResultTy
2455 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2456 bool AllowImm) {
2457 bool Neg, SP3Neg;
2458 bool Abs, SP3Abs;
2459 SMLoc Loc;
2461 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2462 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2463 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2464 return MatchOperand_ParseFail;
2467 SP3Neg = parseSP3NegModifier();
2469 Loc = getLoc();
2470 Neg = trySkipId("neg");
2471 if (Neg && SP3Neg) {
2472 Error(Loc, "expected register or immediate");
2473 return MatchOperand_ParseFail;
2475 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2476 return MatchOperand_ParseFail;
2478 Abs = trySkipId("abs");
2479 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2480 return MatchOperand_ParseFail;
2482 Loc = getLoc();
2483 SP3Abs = trySkipToken(AsmToken::Pipe);
2484 if (Abs && SP3Abs) {
2485 Error(Loc, "expected register or immediate");
2486 return MatchOperand_ParseFail;
2489 OperandMatchResultTy Res;
2490 if (AllowImm) {
2491 Res = parseRegOrImm(Operands, SP3Abs);
2492 } else {
2493 Res = parseReg(Operands);
2495 if (Res != MatchOperand_Success) {
2496 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2499 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2500 return MatchOperand_ParseFail;
2501 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2502 return MatchOperand_ParseFail;
2503 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2504 return MatchOperand_ParseFail;
2506 AMDGPUOperand::Modifiers Mods;
2507 Mods.Abs = Abs || SP3Abs;
2508 Mods.Neg = Neg || SP3Neg;
2510 if (Mods.hasFPModifiers()) {
2511 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2512 if (Op.isExpr()) {
2513 Error(Op.getStartLoc(), "expected an absolute expression");
2514 return MatchOperand_ParseFail;
2516 Op.setModifiers(Mods);
2518 return MatchOperand_Success;
2521 OperandMatchResultTy
2522 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2523 bool AllowImm) {
2524 bool Sext = trySkipId("sext");
2525 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2526 return MatchOperand_ParseFail;
2528 OperandMatchResultTy Res;
2529 if (AllowImm) {
2530 Res = parseRegOrImm(Operands);
2531 } else {
2532 Res = parseReg(Operands);
2534 if (Res != MatchOperand_Success) {
2535 return Sext? MatchOperand_ParseFail : Res;
2538 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2539 return MatchOperand_ParseFail;
2541 AMDGPUOperand::Modifiers Mods;
2542 Mods.Sext = Sext;
2544 if (Mods.hasIntModifiers()) {
2545 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2546 if (Op.isExpr()) {
2547 Error(Op.getStartLoc(), "expected an absolute expression");
2548 return MatchOperand_ParseFail;
2550 Op.setModifiers(Mods);
2553 return MatchOperand_Success;
2556 OperandMatchResultTy
2557 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2558 return parseRegOrImmWithFPInputMods(Operands, false);
2561 OperandMatchResultTy
2562 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2563 return parseRegOrImmWithIntInputMods(Operands, false);
2566 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2567 auto Loc = getLoc();
2568 if (trySkipId("off")) {
2569 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2570 AMDGPUOperand::ImmTyOff, false));
2571 return MatchOperand_Success;
2574 if (!isRegister())
2575 return MatchOperand_NoMatch;
2577 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2578 if (Reg) {
2579 Operands.push_back(std::move(Reg));
2580 return MatchOperand_Success;
2583 return MatchOperand_ParseFail;
2587 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2588 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2590 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2591 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2592 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2593 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2594 return Match_InvalidOperand;
2596 if ((TSFlags & SIInstrFlags::VOP3) &&
2597 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2598 getForcedEncodingSize() != 64)
2599 return Match_PreferE32;
2601 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2602 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2603 // v_mac_f32/16 allow only dst_sel == DWORD;
2604 auto OpNum =
2605 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2606 const auto &Op = Inst.getOperand(OpNum);
2607 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2608 return Match_InvalidOperand;
2612 return Match_Success;
2615 // What asm variants we should check
2616 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2617 if (getForcedEncodingSize() == 32) {
2618 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2619 return makeArrayRef(Variants);
2622 if (isForcedVOP3()) {
2623 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2624 return makeArrayRef(Variants);
2627 if (isForcedSDWA()) {
2628 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2629 AMDGPUAsmVariants::SDWA9};
2630 return makeArrayRef(Variants);
2633 if (isForcedDPP()) {
2634 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2635 return makeArrayRef(Variants);
2638 static const unsigned Variants[] = {
2639 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2640 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2643 return makeArrayRef(Variants);
2646 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2647 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2648 const unsigned Num = Desc.getNumImplicitUses();
2649 for (unsigned i = 0; i < Num; ++i) {
2650 unsigned Reg = Desc.ImplicitUses[i];
2651 switch (Reg) {
2652 case AMDGPU::FLAT_SCR:
2653 case AMDGPU::VCC:
2654 case AMDGPU::VCC_LO:
2655 case AMDGPU::VCC_HI:
2656 case AMDGPU::M0:
2657 case AMDGPU::SGPR_NULL:
2658 return Reg;
2659 default:
2660 break;
2663 return AMDGPU::NoRegister;
2666 // NB: This code is correct only when used to check constant
2667 // bus limitations because GFX7 support no f16 inline constants.
2668 // Note that there are no cases when a GFX7 opcode violates
2669 // constant bus limitations due to the use of an f16 constant.
2670 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2671 unsigned OpIdx) const {
2672 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2674 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2675 return false;
2678 const MCOperand &MO = Inst.getOperand(OpIdx);
2680 int64_t Val = MO.getImm();
2681 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2683 switch (OpSize) { // expected operand size
2684 case 8:
2685 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2686 case 4:
2687 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2688 case 2: {
2689 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2690 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2691 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2692 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2693 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2694 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2695 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2696 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2697 } else {
2698 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2701 default:
2702 llvm_unreachable("invalid operand size");
2706 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2707 const MCOperand &MO = Inst.getOperand(OpIdx);
2708 if (MO.isImm()) {
2709 return !isInlineConstant(Inst, OpIdx);
2711 return !MO.isReg() ||
2712 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2715 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2716 const unsigned Opcode = Inst.getOpcode();
2717 const MCInstrDesc &Desc = MII.get(Opcode);
2718 unsigned ConstantBusUseCount = 0;
2719 unsigned NumLiterals = 0;
2720 unsigned LiteralSize;
2722 if (Desc.TSFlags &
2723 (SIInstrFlags::VOPC |
2724 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2725 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2726 SIInstrFlags::SDWA)) {
2727 // Check special imm operands (used by madmk, etc)
2728 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2729 ++ConstantBusUseCount;
2732 SmallDenseSet<unsigned> SGPRsUsed;
2733 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2734 if (SGPRUsed != AMDGPU::NoRegister) {
2735 SGPRsUsed.insert(SGPRUsed);
2736 ++ConstantBusUseCount;
2739 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2740 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2741 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2743 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2745 for (int OpIdx : OpIndices) {
2746 if (OpIdx == -1) break;
2748 const MCOperand &MO = Inst.getOperand(OpIdx);
2749 if (usesConstantBus(Inst, OpIdx)) {
2750 if (MO.isReg()) {
2751 const unsigned Reg = mc2PseudoReg(MO.getReg());
2752 // Pairs of registers with a partial intersections like these
2753 // s0, s[0:1]
2754 // flat_scratch_lo, flat_scratch
2755 // flat_scratch_lo, flat_scratch_hi
2756 // are theoretically valid but they are disabled anyway.
2757 // Note that this code mimics SIInstrInfo::verifyInstruction
2758 if (!SGPRsUsed.count(Reg)) {
2759 SGPRsUsed.insert(Reg);
2760 ++ConstantBusUseCount;
2762 } else { // Expression or a literal
2764 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2765 continue; // special operand like VINTERP attr_chan
2767 // An instruction may use only one literal.
2768 // This has been validated on the previous step.
2769 // See validateVOP3Literal.
2770 // This literal may be used as more than one operand.
2771 // If all these operands are of the same size,
2772 // this literal counts as one scalar value.
2773 // Otherwise it counts as 2 scalar values.
2774 // See "GFX10 Shader Programming", section 3.6.2.3.
2776 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2777 if (Size < 4) Size = 4;
2779 if (NumLiterals == 0) {
2780 NumLiterals = 1;
2781 LiteralSize = Size;
2782 } else if (LiteralSize != Size) {
2783 NumLiterals = 2;
2789 ConstantBusUseCount += NumLiterals;
2791 if (isGFX10())
2792 return ConstantBusUseCount <= 2;
2794 return ConstantBusUseCount <= 1;
2797 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2798 const unsigned Opcode = Inst.getOpcode();
2799 const MCInstrDesc &Desc = MII.get(Opcode);
2801 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2802 if (DstIdx == -1 ||
2803 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2804 return true;
2807 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2809 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2810 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2811 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2813 assert(DstIdx != -1);
2814 const MCOperand &Dst = Inst.getOperand(DstIdx);
2815 assert(Dst.isReg());
2816 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2818 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2820 for (int SrcIdx : SrcIndices) {
2821 if (SrcIdx == -1) break;
2822 const MCOperand &Src = Inst.getOperand(SrcIdx);
2823 if (Src.isReg()) {
2824 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2825 if (isRegIntersect(DstReg, SrcReg, TRI)) {
2826 return false;
2831 return true;
2834 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2836 const unsigned Opc = Inst.getOpcode();
2837 const MCInstrDesc &Desc = MII.get(Opc);
2839 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2840 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2841 assert(ClampIdx != -1);
2842 return Inst.getOperand(ClampIdx).getImm() == 0;
2845 return true;
2848 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2850 const unsigned Opc = Inst.getOpcode();
2851 const MCInstrDesc &Desc = MII.get(Opc);
2853 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2854 return true;
2856 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2857 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2858 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2860 assert(VDataIdx != -1);
2861 assert(DMaskIdx != -1);
2862 assert(TFEIdx != -1);
2864 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2865 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2866 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2867 if (DMask == 0)
2868 DMask = 1;
2870 unsigned DataSize =
2871 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2872 if (hasPackedD16()) {
2873 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2874 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2875 DataSize = (DataSize + 1) / 2;
2878 return (VDataSize / 4) == DataSize + TFESize;
2881 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2882 const unsigned Opc = Inst.getOpcode();
2883 const MCInstrDesc &Desc = MII.get(Opc);
2885 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2886 return true;
2888 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2889 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2890 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2891 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2892 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2893 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2895 assert(VAddr0Idx != -1);
2896 assert(SrsrcIdx != -1);
2897 assert(DimIdx != -1);
2898 assert(SrsrcIdx > VAddr0Idx);
2900 unsigned Dim = Inst.getOperand(DimIdx).getImm();
2901 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2902 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2903 unsigned VAddrSize =
2904 IsNSA ? SrsrcIdx - VAddr0Idx
2905 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2907 unsigned AddrSize = BaseOpcode->NumExtraArgs +
2908 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2909 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2910 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2911 if (!IsNSA) {
2912 if (AddrSize > 8)
2913 AddrSize = 16;
2914 else if (AddrSize > 4)
2915 AddrSize = 8;
2918 return VAddrSize == AddrSize;
2921 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2923 const unsigned Opc = Inst.getOpcode();
2924 const MCInstrDesc &Desc = MII.get(Opc);
2926 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2927 return true;
2928 if (!Desc.mayLoad() || !Desc.mayStore())
2929 return true; // Not atomic
2931 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2932 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2934 // This is an incomplete check because image_atomic_cmpswap
2935 // may only use 0x3 and 0xf while other atomic operations
2936 // may use 0x1 and 0x3. However these limitations are
2937 // verified when we check that dmask matches dst size.
2938 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2941 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2943 const unsigned Opc = Inst.getOpcode();
2944 const MCInstrDesc &Desc = MII.get(Opc);
2946 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2947 return true;
2949 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2952 // GATHER4 instructions use dmask in a different fashion compared to
2953 // other MIMG instructions. The only useful DMASK values are
2954 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2955 // (red,red,red,red) etc.) The ISA document doesn't mention
2956 // this.
2957 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2960 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2962 const unsigned Opc = Inst.getOpcode();
2963 const MCInstrDesc &Desc = MII.get(Opc);
2965 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2966 return true;
2968 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2969 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2970 if (isCI() || isSI())
2971 return false;
2974 return true;
2977 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2978 const unsigned Opc = Inst.getOpcode();
2979 const MCInstrDesc &Desc = MII.get(Opc);
2981 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2982 return true;
2984 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2985 if (DimIdx < 0)
2986 return true;
2988 long Imm = Inst.getOperand(DimIdx).getImm();
2989 if (Imm < 0 || Imm >= 8)
2990 return false;
2992 return true;
2995 static bool IsRevOpcode(const unsigned Opcode)
2997 switch (Opcode) {
2998 case AMDGPU::V_SUBREV_F32_e32:
2999 case AMDGPU::V_SUBREV_F32_e64:
3000 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3001 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3002 case AMDGPU::V_SUBREV_F32_e32_vi:
3003 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3004 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3005 case AMDGPU::V_SUBREV_F32_e64_vi:
3007 case AMDGPU::V_SUBREV_I32_e32:
3008 case AMDGPU::V_SUBREV_I32_e64:
3009 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3010 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3012 case AMDGPU::V_SUBBREV_U32_e32:
3013 case AMDGPU::V_SUBBREV_U32_e64:
3014 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3015 case AMDGPU::V_SUBBREV_U32_e32_vi:
3016 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3017 case AMDGPU::V_SUBBREV_U32_e64_vi:
3019 case AMDGPU::V_SUBREV_U32_e32:
3020 case AMDGPU::V_SUBREV_U32_e64:
3021 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3022 case AMDGPU::V_SUBREV_U32_e32_vi:
3023 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3024 case AMDGPU::V_SUBREV_U32_e64_vi:
3026 case AMDGPU::V_SUBREV_F16_e32:
3027 case AMDGPU::V_SUBREV_F16_e64:
3028 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3029 case AMDGPU::V_SUBREV_F16_e32_vi:
3030 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3031 case AMDGPU::V_SUBREV_F16_e64_vi:
3033 case AMDGPU::V_SUBREV_U16_e32:
3034 case AMDGPU::V_SUBREV_U16_e64:
3035 case AMDGPU::V_SUBREV_U16_e32_vi:
3036 case AMDGPU::V_SUBREV_U16_e64_vi:
3038 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3039 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3040 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3042 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3043 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3045 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3046 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3048 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3049 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3051 case AMDGPU::V_LSHRREV_B32_e32:
3052 case AMDGPU::V_LSHRREV_B32_e64:
3053 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3054 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3055 case AMDGPU::V_LSHRREV_B32_e32_vi:
3056 case AMDGPU::V_LSHRREV_B32_e64_vi:
3057 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3058 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3060 case AMDGPU::V_ASHRREV_I32_e32:
3061 case AMDGPU::V_ASHRREV_I32_e64:
3062 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3063 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3064 case AMDGPU::V_ASHRREV_I32_e32_vi:
3065 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3066 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3067 case AMDGPU::V_ASHRREV_I32_e64_vi:
3069 case AMDGPU::V_LSHLREV_B32_e32:
3070 case AMDGPU::V_LSHLREV_B32_e64:
3071 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3072 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3073 case AMDGPU::V_LSHLREV_B32_e32_vi:
3074 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3075 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3076 case AMDGPU::V_LSHLREV_B32_e64_vi:
3078 case AMDGPU::V_LSHLREV_B16_e32:
3079 case AMDGPU::V_LSHLREV_B16_e64:
3080 case AMDGPU::V_LSHLREV_B16_e32_vi:
3081 case AMDGPU::V_LSHLREV_B16_e64_vi:
3082 case AMDGPU::V_LSHLREV_B16_gfx10:
3084 case AMDGPU::V_LSHRREV_B16_e32:
3085 case AMDGPU::V_LSHRREV_B16_e64:
3086 case AMDGPU::V_LSHRREV_B16_e32_vi:
3087 case AMDGPU::V_LSHRREV_B16_e64_vi:
3088 case AMDGPU::V_LSHRREV_B16_gfx10:
3090 case AMDGPU::V_ASHRREV_I16_e32:
3091 case AMDGPU::V_ASHRREV_I16_e64:
3092 case AMDGPU::V_ASHRREV_I16_e32_vi:
3093 case AMDGPU::V_ASHRREV_I16_e64_vi:
3094 case AMDGPU::V_ASHRREV_I16_gfx10:
3096 case AMDGPU::V_LSHLREV_B64:
3097 case AMDGPU::V_LSHLREV_B64_gfx10:
3098 case AMDGPU::V_LSHLREV_B64_vi:
3100 case AMDGPU::V_LSHRREV_B64:
3101 case AMDGPU::V_LSHRREV_B64_gfx10:
3102 case AMDGPU::V_LSHRREV_B64_vi:
3104 case AMDGPU::V_ASHRREV_I64:
3105 case AMDGPU::V_ASHRREV_I64_gfx10:
3106 case AMDGPU::V_ASHRREV_I64_vi:
3108 case AMDGPU::V_PK_LSHLREV_B16:
3109 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3110 case AMDGPU::V_PK_LSHLREV_B16_vi:
3112 case AMDGPU::V_PK_LSHRREV_B16:
3113 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3114 case AMDGPU::V_PK_LSHRREV_B16_vi:
3115 case AMDGPU::V_PK_ASHRREV_I16:
3116 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3117 case AMDGPU::V_PK_ASHRREV_I16_vi:
3118 return true;
3119 default:
3120 return false;
3124 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3126 using namespace SIInstrFlags;
3127 const unsigned Opcode = Inst.getOpcode();
3128 const MCInstrDesc &Desc = MII.get(Opcode);
3130 // lds_direct register is defined so that it can be used
3131 // with 9-bit operands only. Ignore encodings which do not accept these.
3132 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3133 return true;
3135 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3136 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3137 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3139 const int SrcIndices[] = { Src1Idx, Src2Idx };
3141 // lds_direct cannot be specified as either src1 or src2.
3142 for (int SrcIdx : SrcIndices) {
3143 if (SrcIdx == -1) break;
3144 const MCOperand &Src = Inst.getOperand(SrcIdx);
3145 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3146 return false;
3150 if (Src0Idx == -1)
3151 return true;
3153 const MCOperand &Src = Inst.getOperand(Src0Idx);
3154 if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3155 return true;
3157 // lds_direct is specified as src0. Check additional limitations.
3158 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3161 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3162 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3163 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3164 if (Op.isFlatOffset())
3165 return Op.getStartLoc();
3167 return getLoc();
3170 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3171 const OperandVector &Operands) {
3172 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3173 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3174 return true;
3176 auto Opcode = Inst.getOpcode();
3177 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3178 assert(OpNum != -1);
3180 const auto &Op = Inst.getOperand(OpNum);
3181 if (!hasFlatOffsets() && Op.getImm() != 0) {
3182 Error(getFlatOffsetLoc(Operands),
3183 "flat offset modifier is not supported on this GPU");
3184 return false;
3187 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3188 // For FLAT segment the offset must be positive;
3189 // MSB is ignored and forced to zero.
3190 unsigned OffsetSize = isGFX9() ? 13 : 12;
3191 if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3192 if (!isIntN(OffsetSize, Op.getImm())) {
3193 Error(getFlatOffsetLoc(Operands),
3194 isGFX9() ? "expected a 13-bit signed offset" :
3195 "expected a 12-bit signed offset");
3196 return false;
3198 } else {
3199 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3200 Error(getFlatOffsetLoc(Operands),
3201 isGFX9() ? "expected a 12-bit unsigned offset" :
3202 "expected an 11-bit unsigned offset");
3203 return false;
3207 return true;
3210 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3211 unsigned Opcode = Inst.getOpcode();
3212 const MCInstrDesc &Desc = MII.get(Opcode);
3213 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3214 return true;
3216 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3217 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3219 const int OpIndices[] = { Src0Idx, Src1Idx };
3221 unsigned NumLiterals = 0;
3222 uint32_t LiteralValue;
3224 for (int OpIdx : OpIndices) {
3225 if (OpIdx == -1) break;
3227 const MCOperand &MO = Inst.getOperand(OpIdx);
3228 if (MO.isImm() &&
3229 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3230 AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3231 !isInlineConstant(Inst, OpIdx)) {
3232 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3233 if (NumLiterals == 0 || LiteralValue != Value) {
3234 LiteralValue = Value;
3235 ++NumLiterals;
3240 return NumLiterals <= 1;
3243 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3244 const unsigned Opc = Inst.getOpcode();
3245 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3246 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3247 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3248 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3250 if (OpSel & ~3)
3251 return false;
3253 return true;
3256 // Check if VCC register matches wavefront size
3257 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3258 auto FB = getFeatureBits();
3259 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3260 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3263 // VOP3 literal is only allowed in GFX10+ and only one can be used
3264 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3265 unsigned Opcode = Inst.getOpcode();
3266 const MCInstrDesc &Desc = MII.get(Opcode);
3267 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3268 return true;
3270 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3271 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3272 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3274 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3276 unsigned NumLiterals = 0;
3277 uint32_t LiteralValue;
3279 for (int OpIdx : OpIndices) {
3280 if (OpIdx == -1) break;
3282 const MCOperand &MO = Inst.getOperand(OpIdx);
3283 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3284 continue;
3286 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3287 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3288 return false;
3290 if (!isInlineConstant(Inst, OpIdx)) {
3291 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3292 if (NumLiterals == 0 || LiteralValue != Value) {
3293 LiteralValue = Value;
3294 ++NumLiterals;
3299 return !NumLiterals ||
3300 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3303 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3304 const SMLoc &IDLoc,
3305 const OperandVector &Operands) {
3306 if (!validateLdsDirect(Inst)) {
3307 Error(IDLoc,
3308 "invalid use of lds_direct");
3309 return false;
3311 if (!validateSOPLiteral(Inst)) {
3312 Error(IDLoc,
3313 "only one literal operand is allowed");
3314 return false;
3316 if (!validateVOP3Literal(Inst)) {
3317 Error(IDLoc,
3318 "invalid literal operand");
3319 return false;
3321 if (!validateConstantBusLimitations(Inst)) {
3322 Error(IDLoc,
3323 "invalid operand (violates constant bus restrictions)");
3324 return false;
3326 if (!validateEarlyClobberLimitations(Inst)) {
3327 Error(IDLoc,
3328 "destination must be different than all sources");
3329 return false;
3331 if (!validateIntClampSupported(Inst)) {
3332 Error(IDLoc,
3333 "integer clamping is not supported on this GPU");
3334 return false;
3336 if (!validateOpSel(Inst)) {
3337 Error(IDLoc,
3338 "invalid op_sel operand");
3339 return false;
3341 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3342 if (!validateMIMGD16(Inst)) {
3343 Error(IDLoc,
3344 "d16 modifier is not supported on this GPU");
3345 return false;
3347 if (!validateMIMGDim(Inst)) {
3348 Error(IDLoc, "dim modifier is required on this GPU");
3349 return false;
3351 if (!validateMIMGDataSize(Inst)) {
3352 Error(IDLoc,
3353 "image data size does not match dmask and tfe");
3354 return false;
3356 if (!validateMIMGAddrSize(Inst)) {
3357 Error(IDLoc,
3358 "image address size does not match dim and a16");
3359 return false;
3361 if (!validateMIMGAtomicDMask(Inst)) {
3362 Error(IDLoc,
3363 "invalid atomic image dmask");
3364 return false;
3366 if (!validateMIMGGatherDMask(Inst)) {
3367 Error(IDLoc,
3368 "invalid image_gather dmask: only one bit must be set");
3369 return false;
3371 if (!validateFlatOffset(Inst, Operands)) {
3372 return false;
3375 return true;
3378 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3379 const FeatureBitset &FBS,
3380 unsigned VariantID = 0);
3382 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3383 OperandVector &Operands,
3384 MCStreamer &Out,
3385 uint64_t &ErrorInfo,
3386 bool MatchingInlineAsm) {
3387 MCInst Inst;
3388 unsigned Result = Match_Success;
3389 for (auto Variant : getMatchedVariants()) {
3390 uint64_t EI;
3391 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3392 Variant);
3393 // We order match statuses from least to most specific. We use most specific
3394 // status as resulting
3395 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3396 if ((R == Match_Success) ||
3397 (R == Match_PreferE32) ||
3398 (R == Match_MissingFeature && Result != Match_PreferE32) ||
3399 (R == Match_InvalidOperand && Result != Match_MissingFeature
3400 && Result != Match_PreferE32) ||
3401 (R == Match_MnemonicFail && Result != Match_InvalidOperand
3402 && Result != Match_MissingFeature
3403 && Result != Match_PreferE32)) {
3404 Result = R;
3405 ErrorInfo = EI;
3407 if (R == Match_Success)
3408 break;
3411 switch (Result) {
3412 default: break;
3413 case Match_Success:
3414 if (!validateInstruction(Inst, IDLoc, Operands)) {
3415 return true;
3417 Inst.setLoc(IDLoc);
3418 Out.EmitInstruction(Inst, getSTI());
3419 return false;
3421 case Match_MissingFeature:
3422 return Error(IDLoc, "instruction not supported on this GPU");
3424 case Match_MnemonicFail: {
3425 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3426 std::string Suggestion = AMDGPUMnemonicSpellCheck(
3427 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3428 return Error(IDLoc, "invalid instruction" + Suggestion,
3429 ((AMDGPUOperand &)*Operands[0]).getLocRange());
3432 case Match_InvalidOperand: {
3433 SMLoc ErrorLoc = IDLoc;
3434 if (ErrorInfo != ~0ULL) {
3435 if (ErrorInfo >= Operands.size()) {
3436 return Error(IDLoc, "too few operands for instruction");
3438 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3439 if (ErrorLoc == SMLoc())
3440 ErrorLoc = IDLoc;
3442 return Error(ErrorLoc, "invalid operand for instruction");
3445 case Match_PreferE32:
3446 return Error(IDLoc, "internal error: instruction without _e64 suffix "
3447 "should be encoded as e32");
3449 llvm_unreachable("Implement any new match types added!");
3452 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3453 int64_t Tmp = -1;
3454 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3455 return true;
3457 if (getParser().parseAbsoluteExpression(Tmp)) {
3458 return true;
3460 Ret = static_cast<uint32_t>(Tmp);
3461 return false;
3464 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3465 uint32_t &Minor) {
3466 if (ParseAsAbsoluteExpression(Major))
3467 return TokError("invalid major version");
3469 if (getLexer().isNot(AsmToken::Comma))
3470 return TokError("minor version number required, comma expected");
3471 Lex();
3473 if (ParseAsAbsoluteExpression(Minor))
3474 return TokError("invalid minor version");
3476 return false;
3479 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3480 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3481 return TokError("directive only supported for amdgcn architecture");
3483 std::string Target;
3485 SMLoc TargetStart = getTok().getLoc();
3486 if (getParser().parseEscapedString(Target))
3487 return true;
3488 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3490 std::string ExpectedTarget;
3491 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3492 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3494 if (Target != ExpectedTargetOS.str())
3495 return getParser().Error(TargetRange.Start, "target must match options",
3496 TargetRange);
3498 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3499 return false;
3502 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3503 return getParser().Error(Range.Start, "value out of range", Range);
3506 bool AMDGPUAsmParser::calculateGPRBlocks(
3507 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3508 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3509 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3510 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3511 // TODO(scott.linder): These calculations are duplicated from
3512 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3513 IsaVersion Version = getIsaVersion(getSTI().getCPU());
3515 unsigned NumVGPRs = NextFreeVGPR;
3516 unsigned NumSGPRs = NextFreeSGPR;
3518 if (Version.Major >= 10)
3519 NumSGPRs = 0;
3520 else {
3521 unsigned MaxAddressableNumSGPRs =
3522 IsaInfo::getAddressableNumSGPRs(&getSTI());
3524 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3525 NumSGPRs > MaxAddressableNumSGPRs)
3526 return OutOfRangeError(SGPRRange);
3528 NumSGPRs +=
3529 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3531 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3532 NumSGPRs > MaxAddressableNumSGPRs)
3533 return OutOfRangeError(SGPRRange);
3535 if (Features.test(FeatureSGPRInitBug))
3536 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3539 VGPRBlocks =
3540 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3541 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3543 return false;
3546 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3547 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3548 return TokError("directive only supported for amdgcn architecture");
3550 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3551 return TokError("directive only supported for amdhsa OS");
3553 StringRef KernelName;
3554 if (getParser().parseIdentifier(KernelName))
3555 return true;
3557 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3559 StringSet<> Seen;
3561 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3563 SMRange VGPRRange;
3564 uint64_t NextFreeVGPR = 0;
3565 SMRange SGPRRange;
3566 uint64_t NextFreeSGPR = 0;
3567 unsigned UserSGPRCount = 0;
3568 bool ReserveVCC = true;
3569 bool ReserveFlatScr = true;
3570 bool ReserveXNACK = hasXNACK();
3571 Optional<bool> EnableWavefrontSize32;
3573 while (true) {
3574 while (getLexer().is(AsmToken::EndOfStatement))
3575 Lex();
3577 if (getLexer().isNot(AsmToken::Identifier))
3578 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3580 StringRef ID = getTok().getIdentifier();
3581 SMRange IDRange = getTok().getLocRange();
3582 Lex();
3584 if (ID == ".end_amdhsa_kernel")
3585 break;
3587 if (Seen.find(ID) != Seen.end())
3588 return TokError(".amdhsa_ directives cannot be repeated");
3589 Seen.insert(ID);
3591 SMLoc ValStart = getTok().getLoc();
3592 int64_t IVal;
3593 if (getParser().parseAbsoluteExpression(IVal))
3594 return true;
3595 SMLoc ValEnd = getTok().getLoc();
3596 SMRange ValRange = SMRange(ValStart, ValEnd);
3598 if (IVal < 0)
3599 return OutOfRangeError(ValRange);
3601 uint64_t Val = IVal;
3603 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3604 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3605 return OutOfRangeError(RANGE); \
3606 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3608 if (ID == ".amdhsa_group_segment_fixed_size") {
3609 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3610 return OutOfRangeError(ValRange);
3611 KD.group_segment_fixed_size = Val;
3612 } else if (ID == ".amdhsa_private_segment_fixed_size") {
3613 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3614 return OutOfRangeError(ValRange);
3615 KD.private_segment_fixed_size = Val;
3616 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3617 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3618 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3619 Val, ValRange);
3620 UserSGPRCount += 4;
3621 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3622 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3623 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3624 ValRange);
3625 UserSGPRCount += 2;
3626 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3627 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3628 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3629 ValRange);
3630 UserSGPRCount += 2;
3631 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3632 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3633 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3634 Val, ValRange);
3635 UserSGPRCount += 2;
3636 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3637 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3638 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3639 ValRange);
3640 UserSGPRCount += 2;
3641 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3642 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3643 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3644 ValRange);
3645 UserSGPRCount += 2;
3646 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3647 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3649 Val, ValRange);
3650 UserSGPRCount += 1;
3651 } else if (ID == ".amdhsa_wavefront_size32") {
3652 if (IVersion.Major < 10)
3653 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3654 IDRange);
3655 EnableWavefrontSize32 = Val;
3656 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3657 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3658 Val, ValRange);
3659 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3660 PARSE_BITS_ENTRY(
3661 KD.compute_pgm_rsrc2,
3662 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3663 ValRange);
3664 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3666 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3667 ValRange);
3668 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3669 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3670 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3671 ValRange);
3672 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3674 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3675 ValRange);
3676 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3677 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3678 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3679 ValRange);
3680 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3681 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3682 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3683 ValRange);
3684 } else if (ID == ".amdhsa_next_free_vgpr") {
3685 VGPRRange = ValRange;
3686 NextFreeVGPR = Val;
3687 } else if (ID == ".amdhsa_next_free_sgpr") {
3688 SGPRRange = ValRange;
3689 NextFreeSGPR = Val;
3690 } else if (ID == ".amdhsa_reserve_vcc") {
3691 if (!isUInt<1>(Val))
3692 return OutOfRangeError(ValRange);
3693 ReserveVCC = Val;
3694 } else if (ID == ".amdhsa_reserve_flat_scratch") {
3695 if (IVersion.Major < 7)
3696 return getParser().Error(IDRange.Start, "directive requires gfx7+",
3697 IDRange);
3698 if (!isUInt<1>(Val))
3699 return OutOfRangeError(ValRange);
3700 ReserveFlatScr = Val;
3701 } else if (ID == ".amdhsa_reserve_xnack_mask") {
3702 if (IVersion.Major < 8)
3703 return getParser().Error(IDRange.Start, "directive requires gfx8+",
3704 IDRange);
3705 if (!isUInt<1>(Val))
3706 return OutOfRangeError(ValRange);
3707 ReserveXNACK = Val;
3708 } else if (ID == ".amdhsa_float_round_mode_32") {
3709 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3710 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3711 } else if (ID == ".amdhsa_float_round_mode_16_64") {
3712 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3713 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3714 } else if (ID == ".amdhsa_float_denorm_mode_32") {
3715 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3716 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3717 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3718 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3719 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3720 ValRange);
3721 } else if (ID == ".amdhsa_dx10_clamp") {
3722 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3723 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3724 } else if (ID == ".amdhsa_ieee_mode") {
3725 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3726 Val, ValRange);
3727 } else if (ID == ".amdhsa_fp16_overflow") {
3728 if (IVersion.Major < 9)
3729 return getParser().Error(IDRange.Start, "directive requires gfx9+",
3730 IDRange);
3731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3732 ValRange);
3733 } else if (ID == ".amdhsa_workgroup_processor_mode") {
3734 if (IVersion.Major < 10)
3735 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3736 IDRange);
3737 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3738 ValRange);
3739 } else if (ID == ".amdhsa_memory_ordered") {
3740 if (IVersion.Major < 10)
3741 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3742 IDRange);
3743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3744 ValRange);
3745 } else if (ID == ".amdhsa_forward_progress") {
3746 if (IVersion.Major < 10)
3747 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3748 IDRange);
3749 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3750 ValRange);
3751 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3752 PARSE_BITS_ENTRY(
3753 KD.compute_pgm_rsrc2,
3754 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3755 ValRange);
3756 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3757 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3759 Val, ValRange);
3760 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3761 PARSE_BITS_ENTRY(
3762 KD.compute_pgm_rsrc2,
3763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3764 ValRange);
3765 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3768 Val, ValRange);
3769 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3772 Val, ValRange);
3773 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3776 Val, ValRange);
3777 } else if (ID == ".amdhsa_exception_int_div_zero") {
3778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3780 Val, ValRange);
3781 } else {
3782 return getParser().Error(IDRange.Start,
3783 "unknown .amdhsa_kernel directive", IDRange);
3786 #undef PARSE_BITS_ENTRY
3789 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3790 return TokError(".amdhsa_next_free_vgpr directive is required");
3792 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3793 return TokError(".amdhsa_next_free_sgpr directive is required");
3795 unsigned VGPRBlocks;
3796 unsigned SGPRBlocks;
3797 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3798 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3799 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3800 SGPRBlocks))
3801 return true;
3803 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3804 VGPRBlocks))
3805 return OutOfRangeError(VGPRRange);
3806 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3807 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3809 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3810 SGPRBlocks))
3811 return OutOfRangeError(SGPRRange);
3812 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3813 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3814 SGPRBlocks);
3816 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3817 return TokError("too many user SGPRs enabled");
3818 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3819 UserSGPRCount);
3821 getTargetStreamer().EmitAmdhsaKernelDescriptor(
3822 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3823 ReserveFlatScr, ReserveXNACK);
3824 return false;
3827 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3828 uint32_t Major;
3829 uint32_t Minor;
3831 if (ParseDirectiveMajorMinor(Major, Minor))
3832 return true;
3834 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3835 return false;
3838 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3839 uint32_t Major;
3840 uint32_t Minor;
3841 uint32_t Stepping;
3842 StringRef VendorName;
3843 StringRef ArchName;
3845 // If this directive has no arguments, then use the ISA version for the
3846 // targeted GPU.
3847 if (getLexer().is(AsmToken::EndOfStatement)) {
3848 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3849 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3850 ISA.Stepping,
3851 "AMD", "AMDGPU");
3852 return false;
3855 if (ParseDirectiveMajorMinor(Major, Minor))
3856 return true;
3858 if (getLexer().isNot(AsmToken::Comma))
3859 return TokError("stepping version number required, comma expected");
3860 Lex();
3862 if (ParseAsAbsoluteExpression(Stepping))
3863 return TokError("invalid stepping version");
3865 if (getLexer().isNot(AsmToken::Comma))
3866 return TokError("vendor name required, comma expected");
3867 Lex();
3869 if (getLexer().isNot(AsmToken::String))
3870 return TokError("invalid vendor name");
3872 VendorName = getLexer().getTok().getStringContents();
3873 Lex();
3875 if (getLexer().isNot(AsmToken::Comma))
3876 return TokError("arch name required, comma expected");
3877 Lex();
3879 if (getLexer().isNot(AsmToken::String))
3880 return TokError("invalid arch name");
3882 ArchName = getLexer().getTok().getStringContents();
3883 Lex();
3885 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3886 VendorName, ArchName);
3887 return false;
3890 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3891 amd_kernel_code_t &Header) {
3892 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3893 // assembly for backwards compatibility.
3894 if (ID == "max_scratch_backing_memory_byte_size") {
3895 Parser.eatToEndOfStatement();
3896 return false;
3899 SmallString<40> ErrStr;
3900 raw_svector_ostream Err(ErrStr);
3901 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3902 return TokError(Err.str());
3904 Lex();
3906 if (ID == "enable_wavefront_size32") {
3907 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3908 if (!isGFX10())
3909 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3910 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3911 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3912 } else {
3913 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3914 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3918 if (ID == "wavefront_size") {
3919 if (Header.wavefront_size == 5) {
3920 if (!isGFX10())
3921 return TokError("wavefront_size=5 is only allowed on GFX10+");
3922 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3923 return TokError("wavefront_size=5 requires +WavefrontSize32");
3924 } else if (Header.wavefront_size == 6) {
3925 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3926 return TokError("wavefront_size=6 requires +WavefrontSize64");
3930 if (ID == "enable_wgp_mode") {
3931 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3932 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3935 if (ID == "enable_mem_ordered") {
3936 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3937 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3940 if (ID == "enable_fwd_progress") {
3941 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3942 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3945 return false;
3948 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3949 amd_kernel_code_t Header;
3950 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3952 while (true) {
3953 // Lex EndOfStatement. This is in a while loop, because lexing a comment
3954 // will set the current token to EndOfStatement.
3955 while(getLexer().is(AsmToken::EndOfStatement))
3956 Lex();
3958 if (getLexer().isNot(AsmToken::Identifier))
3959 return TokError("expected value identifier or .end_amd_kernel_code_t");
3961 StringRef ID = getLexer().getTok().getIdentifier();
3962 Lex();
3964 if (ID == ".end_amd_kernel_code_t")
3965 break;
3967 if (ParseAMDKernelCodeTValue(ID, Header))
3968 return true;
3971 getTargetStreamer().EmitAMDKernelCodeT(Header);
3973 return false;
3976 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3977 if (getLexer().isNot(AsmToken::Identifier))
3978 return TokError("expected symbol name");
3980 StringRef KernelName = Parser.getTok().getString();
3982 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3983 ELF::STT_AMDGPU_HSA_KERNEL);
3984 Lex();
3985 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3986 KernelScope.initialize(getContext());
3987 return false;
3990 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3991 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3992 return Error(getParser().getTok().getLoc(),
3993 ".amd_amdgpu_isa directive is not available on non-amdgcn "
3994 "architectures");
3997 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3999 std::string ISAVersionStringFromSTI;
4000 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4001 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4003 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4004 return Error(getParser().getTok().getLoc(),
4005 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4006 "arguments specified through the command line");
4009 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4010 Lex();
4012 return false;
4015 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4016 const char *AssemblerDirectiveBegin;
4017 const char *AssemblerDirectiveEnd;
4018 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4019 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4020 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4021 HSAMD::V3::AssemblerDirectiveEnd)
4022 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4023 HSAMD::AssemblerDirectiveEnd);
4025 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4026 return Error(getParser().getTok().getLoc(),
4027 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4028 "not available on non-amdhsa OSes")).str());
4031 std::string HSAMetadataString;
4032 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4033 HSAMetadataString))
4034 return true;
4036 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4037 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4038 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4039 } else {
4040 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4041 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4044 return false;
4047 /// Common code to parse out a block of text (typically YAML) between start and
4048 /// end directives.
4049 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4050 const char *AssemblerDirectiveEnd,
4051 std::string &CollectString) {
4053 raw_string_ostream CollectStream(CollectString);
4055 getLexer().setSkipSpace(false);
4057 bool FoundEnd = false;
4058 while (!getLexer().is(AsmToken::Eof)) {
4059 while (getLexer().is(AsmToken::Space)) {
4060 CollectStream << getLexer().getTok().getString();
4061 Lex();
4064 if (getLexer().is(AsmToken::Identifier)) {
4065 StringRef ID = getLexer().getTok().getIdentifier();
4066 if (ID == AssemblerDirectiveEnd) {
4067 Lex();
4068 FoundEnd = true;
4069 break;
4073 CollectStream << Parser.parseStringToEndOfStatement()
4074 << getContext().getAsmInfo()->getSeparatorString();
4076 Parser.eatToEndOfStatement();
4079 getLexer().setSkipSpace(true);
4081 if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4082 return TokError(Twine("expected directive ") +
4083 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4086 CollectStream.flush();
4087 return false;
4090 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4091 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4092 std::string String;
4093 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4094 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4095 return true;
4097 auto PALMetadata = getTargetStreamer().getPALMetadata();
4098 if (!PALMetadata->setFromString(String))
4099 return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4100 return false;
4103 /// Parse the assembler directive for old linear-format PAL metadata.
4104 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4105 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4106 return Error(getParser().getTok().getLoc(),
4107 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4108 "not available on non-amdpal OSes")).str());
4111 auto PALMetadata = getTargetStreamer().getPALMetadata();
4112 PALMetadata->setLegacy();
4113 for (;;) {
4114 uint32_t Key, Value;
4115 if (ParseAsAbsoluteExpression(Key)) {
4116 return TokError(Twine("invalid value in ") +
4117 Twine(PALMD::AssemblerDirective));
4119 if (getLexer().isNot(AsmToken::Comma)) {
4120 return TokError(Twine("expected an even number of values in ") +
4121 Twine(PALMD::AssemblerDirective));
4123 Lex();
4124 if (ParseAsAbsoluteExpression(Value)) {
4125 return TokError(Twine("invalid value in ") +
4126 Twine(PALMD::AssemblerDirective));
4128 PALMetadata->setRegister(Key, Value);
4129 if (getLexer().isNot(AsmToken::Comma))
4130 break;
4131 Lex();
4133 return false;
4136 /// ParseDirectiveAMDGPULDS
4137 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4138 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4139 if (getParser().checkForValidSection())
4140 return true;
4142 StringRef Name;
4143 SMLoc NameLoc = getLexer().getLoc();
4144 if (getParser().parseIdentifier(Name))
4145 return TokError("expected identifier in directive");
4147 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4148 if (parseToken(AsmToken::Comma, "expected ','"))
4149 return true;
4151 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4153 int64_t Size;
4154 SMLoc SizeLoc = getLexer().getLoc();
4155 if (getParser().parseAbsoluteExpression(Size))
4156 return true;
4157 if (Size < 0)
4158 return Error(SizeLoc, "size must be non-negative");
4159 if (Size > LocalMemorySize)
4160 return Error(SizeLoc, "size is too large");
4162 int64_t Align = 4;
4163 if (getLexer().is(AsmToken::Comma)) {
4164 Lex();
4165 SMLoc AlignLoc = getLexer().getLoc();
4166 if (getParser().parseAbsoluteExpression(Align))
4167 return true;
4168 if (Align < 0 || !isPowerOf2_64(Align))
4169 return Error(AlignLoc, "alignment must be a power of two");
4171 // Alignment larger than the size of LDS is possible in theory, as long
4172 // as the linker manages to place to symbol at address 0, but we do want
4173 // to make sure the alignment fits nicely into a 32-bit integer.
4174 if (Align >= 1u << 31)
4175 return Error(AlignLoc, "alignment is too large");
4178 if (parseToken(AsmToken::EndOfStatement,
4179 "unexpected token in '.amdgpu_lds' directive"))
4180 return true;
4182 Symbol->redefineIfPossible();
4183 if (!Symbol->isUndefined())
4184 return Error(NameLoc, "invalid symbol redefinition");
4186 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4187 return false;
4190 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4191 StringRef IDVal = DirectiveID.getString();
4193 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4194 if (IDVal == ".amdgcn_target")
4195 return ParseDirectiveAMDGCNTarget();
4197 if (IDVal == ".amdhsa_kernel")
4198 return ParseDirectiveAMDHSAKernel();
4200 // TODO: Restructure/combine with PAL metadata directive.
4201 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4202 return ParseDirectiveHSAMetadata();
4203 } else {
4204 if (IDVal == ".hsa_code_object_version")
4205 return ParseDirectiveHSACodeObjectVersion();
4207 if (IDVal == ".hsa_code_object_isa")
4208 return ParseDirectiveHSACodeObjectISA();
4210 if (IDVal == ".amd_kernel_code_t")
4211 return ParseDirectiveAMDKernelCodeT();
4213 if (IDVal == ".amdgpu_hsa_kernel")
4214 return ParseDirectiveAMDGPUHsaKernel();
4216 if (IDVal == ".amd_amdgpu_isa")
4217 return ParseDirectiveISAVersion();
4219 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4220 return ParseDirectiveHSAMetadata();
4223 if (IDVal == ".amdgpu_lds")
4224 return ParseDirectiveAMDGPULDS();
4226 if (IDVal == PALMD::AssemblerDirectiveBegin)
4227 return ParseDirectivePALMetadataBegin();
4229 if (IDVal == PALMD::AssemblerDirective)
4230 return ParseDirectivePALMetadata();
4232 return true;
4235 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4236 unsigned RegNo) const {
4238 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4239 R.isValid(); ++R) {
4240 if (*R == RegNo)
4241 return isGFX9() || isGFX10();
4244 // GFX10 has 2 more SGPRs 104 and 105.
4245 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4246 R.isValid(); ++R) {
4247 if (*R == RegNo)
4248 return hasSGPR104_SGPR105();
4251 switch (RegNo) {
4252 case AMDGPU::SRC_SHARED_BASE:
4253 case AMDGPU::SRC_SHARED_LIMIT:
4254 case AMDGPU::SRC_PRIVATE_BASE:
4255 case AMDGPU::SRC_PRIVATE_LIMIT:
4256 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4257 return !isCI() && !isSI() && !isVI();
4258 case AMDGPU::TBA:
4259 case AMDGPU::TBA_LO:
4260 case AMDGPU::TBA_HI:
4261 case AMDGPU::TMA:
4262 case AMDGPU::TMA_LO:
4263 case AMDGPU::TMA_HI:
4264 return !isGFX9() && !isGFX10();
4265 case AMDGPU::XNACK_MASK:
4266 case AMDGPU::XNACK_MASK_LO:
4267 case AMDGPU::XNACK_MASK_HI:
4268 return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4269 case AMDGPU::SGPR_NULL:
4270 return isGFX10();
4271 default:
4272 break;
4275 if (isCI())
4276 return true;
4278 if (isSI() || isGFX10()) {
4279 // No flat_scr on SI.
4280 // On GFX10 flat scratch is not a valid register operand and can only be
4281 // accessed with s_setreg/s_getreg.
4282 switch (RegNo) {
4283 case AMDGPU::FLAT_SCR:
4284 case AMDGPU::FLAT_SCR_LO:
4285 case AMDGPU::FLAT_SCR_HI:
4286 return false;
4287 default:
4288 return true;
4292 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4293 // SI/CI have.
4294 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4295 R.isValid(); ++R) {
4296 if (*R == RegNo)
4297 return hasSGPR102_SGPR103();
4300 return true;
4303 OperandMatchResultTy
4304 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4305 OperandMode Mode) {
4306 // Try to parse with a custom parser
4307 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4309 // If we successfully parsed the operand or if there as an error parsing,
4310 // we are done.
4312 // If we are parsing after we reach EndOfStatement then this means we
4313 // are appending default values to the Operands list. This is only done
4314 // by custom parser, so we shouldn't continue on to the generic parsing.
4315 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4316 getLexer().is(AsmToken::EndOfStatement))
4317 return ResTy;
4319 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4320 unsigned Prefix = Operands.size();
4321 SMLoc LBraceLoc = getTok().getLoc();
4322 Parser.Lex(); // eat the '['
4324 for (;;) {
4325 ResTy = parseReg(Operands);
4326 if (ResTy != MatchOperand_Success)
4327 return ResTy;
4329 if (getLexer().is(AsmToken::RBrac))
4330 break;
4332 if (getLexer().isNot(AsmToken::Comma))
4333 return MatchOperand_ParseFail;
4334 Parser.Lex();
4337 if (Operands.size() - Prefix > 1) {
4338 Operands.insert(Operands.begin() + Prefix,
4339 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4340 Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4341 getTok().getLoc()));
4344 Parser.Lex(); // eat the ']'
4345 return MatchOperand_Success;
4348 return parseRegOrImm(Operands);
4351 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4352 // Clear any forced encodings from the previous instruction.
4353 setForcedEncodingSize(0);
4354 setForcedDPP(false);
4355 setForcedSDWA(false);
4357 if (Name.endswith("_e64")) {
4358 setForcedEncodingSize(64);
4359 return Name.substr(0, Name.size() - 4);
4360 } else if (Name.endswith("_e32")) {
4361 setForcedEncodingSize(32);
4362 return Name.substr(0, Name.size() - 4);
4363 } else if (Name.endswith("_dpp")) {
4364 setForcedDPP(true);
4365 return Name.substr(0, Name.size() - 4);
4366 } else if (Name.endswith("_sdwa")) {
4367 setForcedSDWA(true);
4368 return Name.substr(0, Name.size() - 5);
4370 return Name;
4373 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4374 StringRef Name,
4375 SMLoc NameLoc, OperandVector &Operands) {
4376 // Add the instruction mnemonic
4377 Name = parseMnemonicSuffix(Name);
4378 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4380 bool IsMIMG = Name.startswith("image_");
4382 while (!getLexer().is(AsmToken::EndOfStatement)) {
4383 OperandMode Mode = OperandMode_Default;
4384 if (IsMIMG && isGFX10() && Operands.size() == 2)
4385 Mode = OperandMode_NSA;
4386 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4388 // Eat the comma or space if there is one.
4389 if (getLexer().is(AsmToken::Comma))
4390 Parser.Lex();
4392 switch (Res) {
4393 case MatchOperand_Success: break;
4394 case MatchOperand_ParseFail:
4395 // FIXME: use real operand location rather than the current location.
4396 Error(getLexer().getLoc(), "failed parsing operand.");
4397 while (!getLexer().is(AsmToken::EndOfStatement)) {
4398 Parser.Lex();
4400 return true;
4401 case MatchOperand_NoMatch:
4402 // FIXME: use real operand location rather than the current location.
4403 Error(getLexer().getLoc(), "not a valid operand.");
4404 while (!getLexer().is(AsmToken::EndOfStatement)) {
4405 Parser.Lex();
4407 return true;
4411 return false;
4414 //===----------------------------------------------------------------------===//
4415 // Utility functions
4416 //===----------------------------------------------------------------------===//
4418 OperandMatchResultTy
4419 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4421 if (!trySkipId(Prefix, AsmToken::Colon))
4422 return MatchOperand_NoMatch;
4424 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4427 OperandMatchResultTy
4428 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4429 AMDGPUOperand::ImmTy ImmTy,
4430 bool (*ConvertResult)(int64_t&)) {
4431 SMLoc S = getLoc();
4432 int64_t Value = 0;
4434 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4435 if (Res != MatchOperand_Success)
4436 return Res;
4438 if (ConvertResult && !ConvertResult(Value)) {
4439 Error(S, "invalid " + StringRef(Prefix) + " value.");
4442 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4443 return MatchOperand_Success;
4446 OperandMatchResultTy
4447 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4448 OperandVector &Operands,
4449 AMDGPUOperand::ImmTy ImmTy,
4450 bool (*ConvertResult)(int64_t&)) {
4451 SMLoc S = getLoc();
4452 if (!trySkipId(Prefix, AsmToken::Colon))
4453 return MatchOperand_NoMatch;
4455 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4456 return MatchOperand_ParseFail;
4458 unsigned Val = 0;
4459 const unsigned MaxSize = 4;
4461 // FIXME: How to verify the number of elements matches the number of src
4462 // operands?
4463 for (int I = 0; ; ++I) {
4464 int64_t Op;
4465 SMLoc Loc = getLoc();
4466 if (!parseExpr(Op))
4467 return MatchOperand_ParseFail;
4469 if (Op != 0 && Op != 1) {
4470 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4471 return MatchOperand_ParseFail;
4474 Val |= (Op << I);
4476 if (trySkipToken(AsmToken::RBrac))
4477 break;
4479 if (I + 1 == MaxSize) {
4480 Error(getLoc(), "expected a closing square bracket");
4481 return MatchOperand_ParseFail;
4484 if (!skipToken(AsmToken::Comma, "expected a comma"))
4485 return MatchOperand_ParseFail;
4488 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4489 return MatchOperand_Success;
4492 OperandMatchResultTy
4493 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4494 AMDGPUOperand::ImmTy ImmTy) {
4495 int64_t Bit = 0;
4496 SMLoc S = Parser.getTok().getLoc();
4498 // We are at the end of the statement, and this is a default argument, so
4499 // use a default value.
4500 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4501 switch(getLexer().getKind()) {
4502 case AsmToken::Identifier: {
4503 StringRef Tok = Parser.getTok().getString();
4504 if (Tok == Name) {
4505 if (Tok == "r128" && isGFX9())
4506 Error(S, "r128 modifier is not supported on this GPU");
4507 if (Tok == "a16" && !isGFX9() && !isGFX10())
4508 Error(S, "a16 modifier is not supported on this GPU");
4509 Bit = 1;
4510 Parser.Lex();
4511 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4512 Bit = 0;
4513 Parser.Lex();
4514 } else {
4515 return MatchOperand_NoMatch;
4517 break;
4519 default:
4520 return MatchOperand_NoMatch;
4524 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4525 return MatchOperand_ParseFail;
4527 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4528 return MatchOperand_Success;
4531 static void addOptionalImmOperand(
4532 MCInst& Inst, const OperandVector& Operands,
4533 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4534 AMDGPUOperand::ImmTy ImmT,
4535 int64_t Default = 0) {
4536 auto i = OptionalIdx.find(ImmT);
4537 if (i != OptionalIdx.end()) {
4538 unsigned Idx = i->second;
4539 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4540 } else {
4541 Inst.addOperand(MCOperand::createImm(Default));
4545 OperandMatchResultTy
4546 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4547 if (getLexer().isNot(AsmToken::Identifier)) {
4548 return MatchOperand_NoMatch;
4550 StringRef Tok = Parser.getTok().getString();
4551 if (Tok != Prefix) {
4552 return MatchOperand_NoMatch;
4555 Parser.Lex();
4556 if (getLexer().isNot(AsmToken::Colon)) {
4557 return MatchOperand_ParseFail;
4560 Parser.Lex();
4561 if (getLexer().isNot(AsmToken::Identifier)) {
4562 return MatchOperand_ParseFail;
4565 Value = Parser.getTok().getString();
4566 return MatchOperand_Success;
4569 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4570 // values to live in a joint format operand in the MCInst encoding.
4571 OperandMatchResultTy
4572 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4573 SMLoc S = Parser.getTok().getLoc();
4574 int64_t Dfmt = 0, Nfmt = 0;
4575 // dfmt and nfmt can appear in either order, and each is optional.
4576 bool GotDfmt = false, GotNfmt = false;
4577 while (!GotDfmt || !GotNfmt) {
4578 if (!GotDfmt) {
4579 auto Res = parseIntWithPrefix("dfmt", Dfmt);
4580 if (Res != MatchOperand_NoMatch) {
4581 if (Res != MatchOperand_Success)
4582 return Res;
4583 if (Dfmt >= 16) {
4584 Error(Parser.getTok().getLoc(), "out of range dfmt");
4585 return MatchOperand_ParseFail;
4587 GotDfmt = true;
4588 Parser.Lex();
4589 continue;
4592 if (!GotNfmt) {
4593 auto Res = parseIntWithPrefix("nfmt", Nfmt);
4594 if (Res != MatchOperand_NoMatch) {
4595 if (Res != MatchOperand_Success)
4596 return Res;
4597 if (Nfmt >= 8) {
4598 Error(Parser.getTok().getLoc(), "out of range nfmt");
4599 return MatchOperand_ParseFail;
4601 GotNfmt = true;
4602 Parser.Lex();
4603 continue;
4606 break;
4608 if (!GotDfmt && !GotNfmt)
4609 return MatchOperand_NoMatch;
4610 auto Format = Dfmt | Nfmt << 4;
4611 Operands.push_back(
4612 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4613 return MatchOperand_Success;
4616 //===----------------------------------------------------------------------===//
4617 // ds
4618 //===----------------------------------------------------------------------===//
4620 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4621 const OperandVector &Operands) {
4622 OptionalImmIndexMap OptionalIdx;
4624 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4625 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4627 // Add the register arguments
4628 if (Op.isReg()) {
4629 Op.addRegOperands(Inst, 1);
4630 continue;
4633 // Handle optional arguments
4634 OptionalIdx[Op.getImmTy()] = i;
4637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4639 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4641 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4644 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4645 bool IsGdsHardcoded) {
4646 OptionalImmIndexMap OptionalIdx;
4648 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4651 // Add the register arguments
4652 if (Op.isReg()) {
4653 Op.addRegOperands(Inst, 1);
4654 continue;
4657 if (Op.isToken() && Op.getToken() == "gds") {
4658 IsGdsHardcoded = true;
4659 continue;
4662 // Handle optional arguments
4663 OptionalIdx[Op.getImmTy()] = i;
4666 AMDGPUOperand::ImmTy OffsetType =
4667 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4668 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4669 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4670 AMDGPUOperand::ImmTyOffset;
4672 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4674 if (!IsGdsHardcoded) {
4675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4677 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4680 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4681 OptionalImmIndexMap OptionalIdx;
4683 unsigned OperandIdx[4];
4684 unsigned EnMask = 0;
4685 int SrcIdx = 0;
4687 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4688 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4690 // Add the register arguments
4691 if (Op.isReg()) {
4692 assert(SrcIdx < 4);
4693 OperandIdx[SrcIdx] = Inst.size();
4694 Op.addRegOperands(Inst, 1);
4695 ++SrcIdx;
4696 continue;
4699 if (Op.isOff()) {
4700 assert(SrcIdx < 4);
4701 OperandIdx[SrcIdx] = Inst.size();
4702 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4703 ++SrcIdx;
4704 continue;
4707 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4708 Op.addImmOperands(Inst, 1);
4709 continue;
4712 if (Op.isToken() && Op.getToken() == "done")
4713 continue;
4715 // Handle optional arguments
4716 OptionalIdx[Op.getImmTy()] = i;
4719 assert(SrcIdx == 4);
4721 bool Compr = false;
4722 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4723 Compr = true;
4724 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4725 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4726 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4729 for (auto i = 0; i < SrcIdx; ++i) {
4730 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4731 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4738 Inst.addOperand(MCOperand::createImm(EnMask));
4741 //===----------------------------------------------------------------------===//
4742 // s_waitcnt
4743 //===----------------------------------------------------------------------===//
4745 static bool
4746 encodeCnt(
4747 const AMDGPU::IsaVersion ISA,
4748 int64_t &IntVal,
4749 int64_t CntVal,
4750 bool Saturate,
4751 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4752 unsigned (*decode)(const IsaVersion &Version, unsigned))
4754 bool Failed = false;
4756 IntVal = encode(ISA, IntVal, CntVal);
4757 if (CntVal != decode(ISA, IntVal)) {
4758 if (Saturate) {
4759 IntVal = encode(ISA, IntVal, -1);
4760 } else {
4761 Failed = true;
4764 return Failed;
4767 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4769 SMLoc CntLoc = getLoc();
4770 StringRef CntName = getTokenStr();
4772 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4773 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4774 return false;
4776 int64_t CntVal;
4777 SMLoc ValLoc = getLoc();
4778 if (!parseExpr(CntVal))
4779 return false;
4781 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4783 bool Failed = true;
4784 bool Sat = CntName.endswith("_sat");
4786 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4787 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4788 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4789 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4790 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4791 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4792 } else {
4793 Error(CntLoc, "invalid counter name " + CntName);
4794 return false;
4797 if (Failed) {
4798 Error(ValLoc, "too large value for " + CntName);
4799 return false;
4802 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4803 return false;
4805 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4806 if (isToken(AsmToken::EndOfStatement)) {
4807 Error(getLoc(), "expected a counter name");
4808 return false;
4812 return true;
4815 OperandMatchResultTy
4816 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4817 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4818 int64_t Waitcnt = getWaitcntBitMask(ISA);
4819 SMLoc S = getLoc();
4821 // If parse failed, do not return error code
4822 // to avoid excessive error messages.
4823 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4824 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4825 } else {
4826 parseExpr(Waitcnt);
4829 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4830 return MatchOperand_Success;
4833 bool
4834 AMDGPUOperand::isSWaitCnt() const {
4835 return isImm();
4838 //===----------------------------------------------------------------------===//
4839 // hwreg
4840 //===----------------------------------------------------------------------===//
4842 bool
4843 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4844 int64_t &Offset,
4845 int64_t &Width) {
4846 using namespace llvm::AMDGPU::Hwreg;
4848 // The register may be specified by name or using a numeric code
4849 if (isToken(AsmToken::Identifier) &&
4850 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4851 HwReg.IsSymbolic = true;
4852 lex(); // skip message name
4853 } else if (!parseExpr(HwReg.Id)) {
4854 return false;
4857 if (trySkipToken(AsmToken::RParen))
4858 return true;
4860 // parse optional params
4861 return
4862 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4863 parseExpr(Offset) &&
4864 skipToken(AsmToken::Comma, "expected a comma") &&
4865 parseExpr(Width) &&
4866 skipToken(AsmToken::RParen, "expected a closing parenthesis");
4869 bool
4870 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4871 const int64_t Offset,
4872 const int64_t Width,
4873 const SMLoc Loc) {
4875 using namespace llvm::AMDGPU::Hwreg;
4877 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4878 Error(Loc, "specified hardware register is not supported on this GPU");
4879 return false;
4880 } else if (!isValidHwreg(HwReg.Id)) {
4881 Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4882 return false;
4883 } else if (!isValidHwregOffset(Offset)) {
4884 Error(Loc, "invalid bit offset: only 5-bit values are legal");
4885 return false;
4886 } else if (!isValidHwregWidth(Width)) {
4887 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4888 return false;
4890 return true;
4893 OperandMatchResultTy
4894 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4895 using namespace llvm::AMDGPU::Hwreg;
4897 int64_t ImmVal = 0;
4898 SMLoc Loc = getLoc();
4900 // If parse failed, do not return error code
4901 // to avoid excessive error messages.
4902 if (trySkipId("hwreg", AsmToken::LParen)) {
4903 OperandInfoTy HwReg(ID_UNKNOWN_);
4904 int64_t Offset = OFFSET_DEFAULT_;
4905 int64_t Width = WIDTH_DEFAULT_;
4906 if (parseHwregBody(HwReg, Offset, Width) &&
4907 validateHwreg(HwReg, Offset, Width, Loc)) {
4908 ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4910 } else if (parseExpr(ImmVal)) {
4911 if (ImmVal < 0 || !isUInt<16>(ImmVal))
4912 Error(Loc, "invalid immediate: only 16-bit values are legal");
4915 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4916 return MatchOperand_Success;
4919 bool AMDGPUOperand::isHwreg() const {
4920 return isImmTy(ImmTyHwreg);
4923 //===----------------------------------------------------------------------===//
4924 // sendmsg
4925 //===----------------------------------------------------------------------===//
4927 bool
4928 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4929 OperandInfoTy &Op,
4930 OperandInfoTy &Stream) {
4931 using namespace llvm::AMDGPU::SendMsg;
4933 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4934 Msg.IsSymbolic = true;
4935 lex(); // skip message name
4936 } else if (!parseExpr(Msg.Id)) {
4937 return false;
4940 if (trySkipToken(AsmToken::Comma)) {
4941 Op.IsDefined = true;
4942 if (isToken(AsmToken::Identifier) &&
4943 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4944 lex(); // skip operation name
4945 } else if (!parseExpr(Op.Id)) {
4946 return false;
4949 if (trySkipToken(AsmToken::Comma)) {
4950 Stream.IsDefined = true;
4951 if (!parseExpr(Stream.Id))
4952 return false;
4956 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4959 bool
4960 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4961 const OperandInfoTy &Op,
4962 const OperandInfoTy &Stream,
4963 const SMLoc S) {
4964 using namespace llvm::AMDGPU::SendMsg;
4966 // Validation strictness depends on whether message is specified
4967 // in a symbolc or in a numeric form. In the latter case
4968 // only encoding possibility is checked.
4969 bool Strict = Msg.IsSymbolic;
4971 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4972 Error(S, "invalid message id");
4973 return false;
4974 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4975 Error(S, Op.IsDefined ?
4976 "message does not support operations" :
4977 "missing message operation");
4978 return false;
4979 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4980 Error(S, "invalid operation id");
4981 return false;
4982 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4983 Error(S, "message operation does not support streams");
4984 return false;
4985 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4986 Error(S, "invalid message stream id");
4987 return false;
4989 return true;
4992 OperandMatchResultTy
4993 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4994 using namespace llvm::AMDGPU::SendMsg;
4996 int64_t ImmVal = 0;
4997 SMLoc Loc = getLoc();
4999 // If parse failed, do not return error code
5000 // to avoid excessive error messages.
5001 if (trySkipId("sendmsg", AsmToken::LParen)) {
5002 OperandInfoTy Msg(ID_UNKNOWN_);
5003 OperandInfoTy Op(OP_NONE_);
5004 OperandInfoTy Stream(STREAM_ID_NONE_);
5005 if (parseSendMsgBody(Msg, Op, Stream) &&
5006 validateSendMsg(Msg, Op, Stream, Loc)) {
5007 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5009 } else if (parseExpr(ImmVal)) {
5010 if (ImmVal < 0 || !isUInt<16>(ImmVal))
5011 Error(Loc, "invalid immediate: only 16-bit values are legal");
5014 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5015 return MatchOperand_Success;
5018 bool AMDGPUOperand::isSendMsg() const {
5019 return isImmTy(ImmTySendMsg);
5022 //===----------------------------------------------------------------------===//
5023 // v_interp
5024 //===----------------------------------------------------------------------===//
5026 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5027 if (getLexer().getKind() != AsmToken::Identifier)
5028 return MatchOperand_NoMatch;
5030 StringRef Str = Parser.getTok().getString();
5031 int Slot = StringSwitch<int>(Str)
5032 .Case("p10", 0)
5033 .Case("p20", 1)
5034 .Case("p0", 2)
5035 .Default(-1);
5037 SMLoc S = Parser.getTok().getLoc();
5038 if (Slot == -1)
5039 return MatchOperand_ParseFail;
5041 Parser.Lex();
5042 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5043 AMDGPUOperand::ImmTyInterpSlot));
5044 return MatchOperand_Success;
5047 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5048 if (getLexer().getKind() != AsmToken::Identifier)
5049 return MatchOperand_NoMatch;
5051 StringRef Str = Parser.getTok().getString();
5052 if (!Str.startswith("attr"))
5053 return MatchOperand_NoMatch;
5055 StringRef Chan = Str.take_back(2);
5056 int AttrChan = StringSwitch<int>(Chan)
5057 .Case(".x", 0)
5058 .Case(".y", 1)
5059 .Case(".z", 2)
5060 .Case(".w", 3)
5061 .Default(-1);
5062 if (AttrChan == -1)
5063 return MatchOperand_ParseFail;
5065 Str = Str.drop_back(2).drop_front(4);
5067 uint8_t Attr;
5068 if (Str.getAsInteger(10, Attr))
5069 return MatchOperand_ParseFail;
5071 SMLoc S = Parser.getTok().getLoc();
5072 Parser.Lex();
5073 if (Attr > 63) {
5074 Error(S, "out of bounds attr");
5075 return MatchOperand_Success;
5078 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5080 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5081 AMDGPUOperand::ImmTyInterpAttr));
5082 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5083 AMDGPUOperand::ImmTyAttrChan));
5084 return MatchOperand_Success;
5087 //===----------------------------------------------------------------------===//
5088 // exp
5089 //===----------------------------------------------------------------------===//
5091 void AMDGPUAsmParser::errorExpTgt() {
5092 Error(Parser.getTok().getLoc(), "invalid exp target");
5095 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5096 uint8_t &Val) {
5097 if (Str == "null") {
5098 Val = 9;
5099 return MatchOperand_Success;
5102 if (Str.startswith("mrt")) {
5103 Str = Str.drop_front(3);
5104 if (Str == "z") { // == mrtz
5105 Val = 8;
5106 return MatchOperand_Success;
5109 if (Str.getAsInteger(10, Val))
5110 return MatchOperand_ParseFail;
5112 if (Val > 7)
5113 errorExpTgt();
5115 return MatchOperand_Success;
5118 if (Str.startswith("pos")) {
5119 Str = Str.drop_front(3);
5120 if (Str.getAsInteger(10, Val))
5121 return MatchOperand_ParseFail;
5123 if (Val > 4 || (Val == 4 && !isGFX10()))
5124 errorExpTgt();
5126 Val += 12;
5127 return MatchOperand_Success;
5130 if (isGFX10() && Str == "prim") {
5131 Val = 20;
5132 return MatchOperand_Success;
5135 if (Str.startswith("param")) {
5136 Str = Str.drop_front(5);
5137 if (Str.getAsInteger(10, Val))
5138 return MatchOperand_ParseFail;
5140 if (Val >= 32)
5141 errorExpTgt();
5143 Val += 32;
5144 return MatchOperand_Success;
5147 if (Str.startswith("invalid_target_")) {
5148 Str = Str.drop_front(15);
5149 if (Str.getAsInteger(10, Val))
5150 return MatchOperand_ParseFail;
5152 errorExpTgt();
5153 return MatchOperand_Success;
5156 return MatchOperand_NoMatch;
5159 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5160 uint8_t Val;
5161 StringRef Str = Parser.getTok().getString();
5163 auto Res = parseExpTgtImpl(Str, Val);
5164 if (Res != MatchOperand_Success)
5165 return Res;
5167 SMLoc S = Parser.getTok().getLoc();
5168 Parser.Lex();
5170 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5171 AMDGPUOperand::ImmTyExpTgt));
5172 return MatchOperand_Success;
5175 //===----------------------------------------------------------------------===//
5176 // parser helpers
5177 //===----------------------------------------------------------------------===//
5179 bool
5180 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5181 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5184 bool
5185 AMDGPUAsmParser::isId(const StringRef Id) const {
5186 return isId(getToken(), Id);
5189 bool
5190 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5191 return getTokenKind() == Kind;
5194 bool
5195 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5196 if (isId(Id)) {
5197 lex();
5198 return true;
5200 return false;
5203 bool
5204 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5205 if (isId(Id) && peekToken().is(Kind)) {
5206 lex();
5207 lex();
5208 return true;
5210 return false;
5213 bool
5214 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5215 if (isToken(Kind)) {
5216 lex();
5217 return true;
5219 return false;
5222 bool
5223 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5224 const StringRef ErrMsg) {
5225 if (!trySkipToken(Kind)) {
5226 Error(getLoc(), ErrMsg);
5227 return false;
5229 return true;
5232 bool
5233 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5234 return !getParser().parseAbsoluteExpression(Imm);
5237 bool
5238 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5239 SMLoc S = getLoc();
5241 const MCExpr *Expr;
5242 if (Parser.parseExpression(Expr))
5243 return false;
5245 int64_t IntVal;
5246 if (Expr->evaluateAsAbsolute(IntVal)) {
5247 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5248 } else {
5249 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5251 return true;
5254 bool
5255 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5256 if (isToken(AsmToken::String)) {
5257 Val = getToken().getStringContents();
5258 lex();
5259 return true;
5260 } else {
5261 Error(getLoc(), ErrMsg);
5262 return false;
5266 AsmToken
5267 AMDGPUAsmParser::getToken() const {
5268 return Parser.getTok();
5271 AsmToken
5272 AMDGPUAsmParser::peekToken() {
5273 return getLexer().peekTok();
5276 void
5277 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5278 auto TokCount = getLexer().peekTokens(Tokens);
5280 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5281 Tokens[Idx] = AsmToken(AsmToken::Error, "");
5284 AsmToken::TokenKind
5285 AMDGPUAsmParser::getTokenKind() const {
5286 return getLexer().getKind();
5289 SMLoc
5290 AMDGPUAsmParser::getLoc() const {
5291 return getToken().getLoc();
5294 StringRef
5295 AMDGPUAsmParser::getTokenStr() const {
5296 return getToken().getString();
5299 void
5300 AMDGPUAsmParser::lex() {
5301 Parser.Lex();
5304 //===----------------------------------------------------------------------===//
5305 // swizzle
5306 //===----------------------------------------------------------------------===//
5308 LLVM_READNONE
5309 static unsigned
5310 encodeBitmaskPerm(const unsigned AndMask,
5311 const unsigned OrMask,
5312 const unsigned XorMask) {
5313 using namespace llvm::AMDGPU::Swizzle;
5315 return BITMASK_PERM_ENC |
5316 (AndMask << BITMASK_AND_SHIFT) |
5317 (OrMask << BITMASK_OR_SHIFT) |
5318 (XorMask << BITMASK_XOR_SHIFT);
5321 bool
5322 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5323 const unsigned MinVal,
5324 const unsigned MaxVal,
5325 const StringRef ErrMsg) {
5326 for (unsigned i = 0; i < OpNum; ++i) {
5327 if (!skipToken(AsmToken::Comma, "expected a comma")){
5328 return false;
5330 SMLoc ExprLoc = Parser.getTok().getLoc();
5331 if (!parseExpr(Op[i])) {
5332 return false;
5334 if (Op[i] < MinVal || Op[i] > MaxVal) {
5335 Error(ExprLoc, ErrMsg);
5336 return false;
5340 return true;
5343 bool
5344 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5345 using namespace llvm::AMDGPU::Swizzle;
5347 int64_t Lane[LANE_NUM];
5348 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5349 "expected a 2-bit lane id")) {
5350 Imm = QUAD_PERM_ENC;
5351 for (unsigned I = 0; I < LANE_NUM; ++I) {
5352 Imm |= Lane[I] << (LANE_SHIFT * I);
5354 return true;
5356 return false;
5359 bool
5360 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5361 using namespace llvm::AMDGPU::Swizzle;
5363 SMLoc S = Parser.getTok().getLoc();
5364 int64_t GroupSize;
5365 int64_t LaneIdx;
5367 if (!parseSwizzleOperands(1, &GroupSize,
5368 2, 32,
5369 "group size must be in the interval [2,32]")) {
5370 return false;
5372 if (!isPowerOf2_64(GroupSize)) {
5373 Error(S, "group size must be a power of two");
5374 return false;
5376 if (parseSwizzleOperands(1, &LaneIdx,
5377 0, GroupSize - 1,
5378 "lane id must be in the interval [0,group size - 1]")) {
5379 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5380 return true;
5382 return false;
5385 bool
5386 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5387 using namespace llvm::AMDGPU::Swizzle;
5389 SMLoc S = Parser.getTok().getLoc();
5390 int64_t GroupSize;
5392 if (!parseSwizzleOperands(1, &GroupSize,
5393 2, 32, "group size must be in the interval [2,32]")) {
5394 return false;
5396 if (!isPowerOf2_64(GroupSize)) {
5397 Error(S, "group size must be a power of two");
5398 return false;
5401 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5402 return true;
5405 bool
5406 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5407 using namespace llvm::AMDGPU::Swizzle;
5409 SMLoc S = Parser.getTok().getLoc();
5410 int64_t GroupSize;
5412 if (!parseSwizzleOperands(1, &GroupSize,
5413 1, 16, "group size must be in the interval [1,16]")) {
5414 return false;
5416 if (!isPowerOf2_64(GroupSize)) {
5417 Error(S, "group size must be a power of two");
5418 return false;
5421 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5422 return true;
5425 bool
5426 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5427 using namespace llvm::AMDGPU::Swizzle;
5429 if (!skipToken(AsmToken::Comma, "expected a comma")) {
5430 return false;
5433 StringRef Ctl;
5434 SMLoc StrLoc = Parser.getTok().getLoc();
5435 if (!parseString(Ctl)) {
5436 return false;
5438 if (Ctl.size() != BITMASK_WIDTH) {
5439 Error(StrLoc, "expected a 5-character mask");
5440 return false;
5443 unsigned AndMask = 0;
5444 unsigned OrMask = 0;
5445 unsigned XorMask = 0;
5447 for (size_t i = 0; i < Ctl.size(); ++i) {
5448 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5449 switch(Ctl[i]) {
5450 default:
5451 Error(StrLoc, "invalid mask");
5452 return false;
5453 case '0':
5454 break;
5455 case '1':
5456 OrMask |= Mask;
5457 break;
5458 case 'p':
5459 AndMask |= Mask;
5460 break;
5461 case 'i':
5462 AndMask |= Mask;
5463 XorMask |= Mask;
5464 break;
5468 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5469 return true;
5472 bool
5473 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5475 SMLoc OffsetLoc = Parser.getTok().getLoc();
5477 if (!parseExpr(Imm)) {
5478 return false;
5480 if (!isUInt<16>(Imm)) {
5481 Error(OffsetLoc, "expected a 16-bit offset");
5482 return false;
5484 return true;
5487 bool
5488 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5489 using namespace llvm::AMDGPU::Swizzle;
5491 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5493 SMLoc ModeLoc = Parser.getTok().getLoc();
5494 bool Ok = false;
5496 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5497 Ok = parseSwizzleQuadPerm(Imm);
5498 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5499 Ok = parseSwizzleBitmaskPerm(Imm);
5500 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5501 Ok = parseSwizzleBroadcast(Imm);
5502 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5503 Ok = parseSwizzleSwap(Imm);
5504 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5505 Ok = parseSwizzleReverse(Imm);
5506 } else {
5507 Error(ModeLoc, "expected a swizzle mode");
5510 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5513 return false;
5516 OperandMatchResultTy
5517 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5518 SMLoc S = Parser.getTok().getLoc();
5519 int64_t Imm = 0;
5521 if (trySkipId("offset")) {
5523 bool Ok = false;
5524 if (skipToken(AsmToken::Colon, "expected a colon")) {
5525 if (trySkipId("swizzle")) {
5526 Ok = parseSwizzleMacro(Imm);
5527 } else {
5528 Ok = parseSwizzleOffset(Imm);
5532 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5534 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5535 } else {
5536 // Swizzle "offset" operand is optional.
5537 // If it is omitted, try parsing other optional operands.
5538 return parseOptionalOpr(Operands);
5542 bool
5543 AMDGPUOperand::isSwizzle() const {
5544 return isImmTy(ImmTySwizzle);
5547 //===----------------------------------------------------------------------===//
5548 // VGPR Index Mode
5549 //===----------------------------------------------------------------------===//
5551 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5553 using namespace llvm::AMDGPU::VGPRIndexMode;
5555 if (trySkipToken(AsmToken::RParen)) {
5556 return OFF;
5559 int64_t Imm = 0;
5561 while (true) {
5562 unsigned Mode = 0;
5563 SMLoc S = Parser.getTok().getLoc();
5565 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5566 if (trySkipId(IdSymbolic[ModeId])) {
5567 Mode = 1 << ModeId;
5568 break;
5572 if (Mode == 0) {
5573 Error(S, (Imm == 0)?
5574 "expected a VGPR index mode or a closing parenthesis" :
5575 "expected a VGPR index mode");
5576 break;
5579 if (Imm & Mode) {
5580 Error(S, "duplicate VGPR index mode");
5581 break;
5583 Imm |= Mode;
5585 if (trySkipToken(AsmToken::RParen))
5586 break;
5587 if (!skipToken(AsmToken::Comma,
5588 "expected a comma or a closing parenthesis"))
5589 break;
5592 return Imm;
5595 OperandMatchResultTy
5596 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5598 int64_t Imm = 0;
5599 SMLoc S = Parser.getTok().getLoc();
5601 if (getLexer().getKind() == AsmToken::Identifier &&
5602 Parser.getTok().getString() == "gpr_idx" &&
5603 getLexer().peekTok().is(AsmToken::LParen)) {
5605 Parser.Lex();
5606 Parser.Lex();
5608 // If parse failed, trigger an error but do not return error code
5609 // to avoid excessive error messages.
5610 Imm = parseGPRIdxMacro();
5612 } else {
5613 if (getParser().parseAbsoluteExpression(Imm))
5614 return MatchOperand_NoMatch;
5615 if (Imm < 0 || !isUInt<4>(Imm)) {
5616 Error(S, "invalid immediate: only 4-bit values are legal");
5620 Operands.push_back(
5621 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5622 return MatchOperand_Success;
5625 bool AMDGPUOperand::isGPRIdxMode() const {
5626 return isImmTy(ImmTyGprIdxMode);
5629 //===----------------------------------------------------------------------===//
5630 // sopp branch targets
5631 //===----------------------------------------------------------------------===//
5633 OperandMatchResultTy
5634 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5636 // Make sure we are not parsing something
5637 // that looks like a label or an expression but is not.
5638 // This will improve error messages.
5639 if (isRegister() || isModifier())
5640 return MatchOperand_NoMatch;
5642 if (parseExpr(Operands)) {
5644 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5645 assert(Opr.isImm() || Opr.isExpr());
5646 SMLoc Loc = Opr.getStartLoc();
5648 // Currently we do not support arbitrary expressions as branch targets.
5649 // Only labels and absolute expressions are accepted.
5650 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5651 Error(Loc, "expected an absolute expression or a label");
5652 } else if (Opr.isImm() && !Opr.isS16Imm()) {
5653 Error(Loc, "expected a 16-bit signed jump offset");
5657 return MatchOperand_Success; // avoid excessive error messages
5660 //===----------------------------------------------------------------------===//
5661 // Boolean holding registers
5662 //===----------------------------------------------------------------------===//
5664 OperandMatchResultTy
5665 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5666 return parseReg(Operands);
5669 //===----------------------------------------------------------------------===//
5670 // mubuf
5671 //===----------------------------------------------------------------------===//
5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5674 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5677 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5678 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5681 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5682 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5685 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5686 const OperandVector &Operands,
5687 bool IsAtomic,
5688 bool IsAtomicReturn,
5689 bool IsLds) {
5690 bool IsLdsOpcode = IsLds;
5691 bool HasLdsModifier = false;
5692 OptionalImmIndexMap OptionalIdx;
5693 assert(IsAtomicReturn ? IsAtomic : true);
5694 unsigned FirstOperandIdx = 1;
5696 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5697 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5699 // Add the register arguments
5700 if (Op.isReg()) {
5701 Op.addRegOperands(Inst, 1);
5702 // Insert a tied src for atomic return dst.
5703 // This cannot be postponed as subsequent calls to
5704 // addImmOperands rely on correct number of MC operands.
5705 if (IsAtomicReturn && i == FirstOperandIdx)
5706 Op.addRegOperands(Inst, 1);
5707 continue;
5710 // Handle the case where soffset is an immediate
5711 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5712 Op.addImmOperands(Inst, 1);
5713 continue;
5716 HasLdsModifier |= Op.isLDS();
5718 // Handle tokens like 'offen' which are sometimes hard-coded into the
5719 // asm string. There are no MCInst operands for these.
5720 if (Op.isToken()) {
5721 continue;
5723 assert(Op.isImm());
5725 // Handle optional arguments
5726 OptionalIdx[Op.getImmTy()] = i;
5729 // This is a workaround for an llvm quirk which may result in an
5730 // incorrect instruction selection. Lds and non-lds versions of
5731 // MUBUF instructions are identical except that lds versions
5732 // have mandatory 'lds' modifier. However this modifier follows
5733 // optional modifiers and llvm asm matcher regards this 'lds'
5734 // modifier as an optional one. As a result, an lds version
5735 // of opcode may be selected even if it has no 'lds' modifier.
5736 if (IsLdsOpcode && !HasLdsModifier) {
5737 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5738 if (NoLdsOpcode != -1) { // Got lds version - correct it.
5739 Inst.setOpcode(NoLdsOpcode);
5740 IsLdsOpcode = false;
5744 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5745 if (!IsAtomic) { // glc is hard-coded.
5746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5748 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5750 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5751 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5754 if (isGFX10())
5755 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5758 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5759 OptionalImmIndexMap OptionalIdx;
5761 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5762 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5764 // Add the register arguments
5765 if (Op.isReg()) {
5766 Op.addRegOperands(Inst, 1);
5767 continue;
5770 // Handle the case where soffset is an immediate
5771 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5772 Op.addImmOperands(Inst, 1);
5773 continue;
5776 // Handle tokens like 'offen' which are sometimes hard-coded into the
5777 // asm string. There are no MCInst operands for these.
5778 if (Op.isToken()) {
5779 continue;
5781 assert(Op.isImm());
5783 // Handle optional arguments
5784 OptionalIdx[Op.getImmTy()] = i;
5787 addOptionalImmOperand(Inst, Operands, OptionalIdx,
5788 AMDGPUOperand::ImmTyOffset);
5789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5792 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5794 if (isGFX10())
5795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5798 //===----------------------------------------------------------------------===//
5799 // mimg
5800 //===----------------------------------------------------------------------===//
5802 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5803 bool IsAtomic) {
5804 unsigned I = 1;
5805 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5806 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5807 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5810 if (IsAtomic) {
5811 // Add src, same as dst
5812 assert(Desc.getNumDefs() == 1);
5813 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5816 OptionalImmIndexMap OptionalIdx;
5818 for (unsigned E = Operands.size(); I != E; ++I) {
5819 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5821 // Add the register arguments
5822 if (Op.isReg()) {
5823 Op.addRegOperands(Inst, 1);
5824 } else if (Op.isImmModifier()) {
5825 OptionalIdx[Op.getImmTy()] = I;
5826 } else if (!Op.isToken()) {
5827 llvm_unreachable("unexpected operand type");
5831 bool IsGFX10 = isGFX10();
5833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5834 if (IsGFX10)
5835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5837 if (IsGFX10)
5838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5839 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5840 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5842 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5843 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5844 if (!IsGFX10)
5845 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5846 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5849 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5850 cvtMIMG(Inst, Operands, true);
5853 //===----------------------------------------------------------------------===//
5854 // smrd
5855 //===----------------------------------------------------------------------===//
5857 bool AMDGPUOperand::isSMRDOffset8() const {
5858 return isImm() && isUInt<8>(getImm());
5861 bool AMDGPUOperand::isSMRDOffset20() const {
5862 return isImm() && isUInt<20>(getImm());
5865 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5866 // 32-bit literals are only supported on CI and we only want to use them
5867 // when the offset is > 8-bits.
5868 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5872 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5876 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5880 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5883 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5884 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5887 //===----------------------------------------------------------------------===//
5888 // vop3
5889 //===----------------------------------------------------------------------===//
5891 static bool ConvertOmodMul(int64_t &Mul) {
5892 if (Mul != 1 && Mul != 2 && Mul != 4)
5893 return false;
5895 Mul >>= 1;
5896 return true;
5899 static bool ConvertOmodDiv(int64_t &Div) {
5900 if (Div == 1) {
5901 Div = 0;
5902 return true;
5905 if (Div == 2) {
5906 Div = 3;
5907 return true;
5910 return false;
5913 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5914 if (BoundCtrl == 0) {
5915 BoundCtrl = 1;
5916 return true;
5919 if (BoundCtrl == -1) {
5920 BoundCtrl = 0;
5921 return true;
5924 return false;
5927 // Note: the order in this table matches the order of operands in AsmString.
5928 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5929 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
5930 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
5931 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
5932 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5933 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5934 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
5935 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
5936 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
5937 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5938 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
5939 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5940 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
5941 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
5942 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5943 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5944 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
5945 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
5946 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5947 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
5948 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
5949 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5950 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5951 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
5952 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5953 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
5954 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
5955 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5956 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5957 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5958 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
5959 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5960 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5961 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5962 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5963 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5964 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5965 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5966 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5967 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5968 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5969 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5970 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5971 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5974 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5975 unsigned size = Operands.size();
5976 assert(size > 0);
5978 OperandMatchResultTy res = parseOptionalOpr(Operands);
5980 // This is a hack to enable hardcoded mandatory operands which follow
5981 // optional operands.
5983 // Current design assumes that all operands after the first optional operand
5984 // are also optional. However implementation of some instructions violates
5985 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5987 // To alleviate this problem, we have to (implicitly) parse extra operands
5988 // to make sure autogenerated parser of custom operands never hit hardcoded
5989 // mandatory operands.
5991 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5993 // We have parsed the first optional operand.
5994 // Parse as many operands as necessary to skip all mandatory operands.
5996 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5997 if (res != MatchOperand_Success ||
5998 getLexer().is(AsmToken::EndOfStatement)) break;
5999 if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6000 res = parseOptionalOpr(Operands);
6004 return res;
6007 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6008 OperandMatchResultTy res;
6009 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6010 // try to parse any optional operand here
6011 if (Op.IsBit) {
6012 res = parseNamedBit(Op.Name, Operands, Op.Type);
6013 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6014 res = parseOModOperand(Operands);
6015 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6016 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6017 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6018 res = parseSDWASel(Operands, Op.Name, Op.Type);
6019 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6020 res = parseSDWADstUnused(Operands);
6021 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6022 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6023 Op.Type == AMDGPUOperand::ImmTyNegLo ||
6024 Op.Type == AMDGPUOperand::ImmTyNegHi) {
6025 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6026 Op.ConvertResult);
6027 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6028 res = parseDim(Operands);
6029 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6030 res = parseDfmtNfmt(Operands);
6031 } else {
6032 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6034 if (res != MatchOperand_NoMatch) {
6035 return res;
6038 return MatchOperand_NoMatch;
6041 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6042 StringRef Name = Parser.getTok().getString();
6043 if (Name == "mul") {
6044 return parseIntWithPrefix("mul", Operands,
6045 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6048 if (Name == "div") {
6049 return parseIntWithPrefix("div", Operands,
6050 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6053 return MatchOperand_NoMatch;
6056 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6057 cvtVOP3P(Inst, Operands);
6059 int Opc = Inst.getOpcode();
6061 int SrcNum;
6062 const int Ops[] = { AMDGPU::OpName::src0,
6063 AMDGPU::OpName::src1,
6064 AMDGPU::OpName::src2 };
6065 for (SrcNum = 0;
6066 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6067 ++SrcNum);
6068 assert(SrcNum > 0);
6070 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6071 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6073 if ((OpSel & (1 << SrcNum)) != 0) {
6074 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6075 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6076 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6080 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6081 // 1. This operand is input modifiers
6082 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6083 // 2. This is not last operand
6084 && Desc.NumOperands > (OpNum + 1)
6085 // 3. Next operand is register class
6086 && Desc.OpInfo[OpNum + 1].RegClass != -1
6087 // 4. Next register is not tied to any other operand
6088 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6091 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6093 OptionalImmIndexMap OptionalIdx;
6094 unsigned Opc = Inst.getOpcode();
6096 unsigned I = 1;
6097 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6098 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6099 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6102 for (unsigned E = Operands.size(); I != E; ++I) {
6103 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6104 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6105 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6106 } else if (Op.isInterpSlot() ||
6107 Op.isInterpAttr() ||
6108 Op.isAttrChan()) {
6109 Inst.addOperand(MCOperand::createImm(Op.getImm()));
6110 } else if (Op.isImmModifier()) {
6111 OptionalIdx[Op.getImmTy()] = I;
6112 } else {
6113 llvm_unreachable("unhandled operand type");
6117 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6121 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6125 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6126 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6130 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6131 OptionalImmIndexMap &OptionalIdx) {
6132 unsigned Opc = Inst.getOpcode();
6134 unsigned I = 1;
6135 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6136 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6137 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6140 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6141 // This instruction has src modifiers
6142 for (unsigned E = Operands.size(); I != E; ++I) {
6143 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6144 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6145 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6146 } else if (Op.isImmModifier()) {
6147 OptionalIdx[Op.getImmTy()] = I;
6148 } else if (Op.isRegOrImm()) {
6149 Op.addRegOrImmOperands(Inst, 1);
6150 } else {
6151 llvm_unreachable("unhandled operand type");
6154 } else {
6155 // No src modifiers
6156 for (unsigned E = Operands.size(); I != E; ++I) {
6157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6158 if (Op.isMod()) {
6159 OptionalIdx[Op.getImmTy()] = I;
6160 } else {
6161 Op.addRegOrImmOperands(Inst, 1);
6166 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6170 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6174 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6175 // it has src2 register operand that is tied to dst operand
6176 // we don't allow modifiers for this operand in assembler so src2_modifiers
6177 // should be 0.
6178 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6179 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6180 Opc == AMDGPU::V_MAC_F32_e64_vi ||
6181 Opc == AMDGPU::V_MAC_F16_e64_vi ||
6182 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6183 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6184 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6185 auto it = Inst.begin();
6186 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6187 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6188 ++it;
6189 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6193 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6194 OptionalImmIndexMap OptionalIdx;
6195 cvtVOP3(Inst, Operands, OptionalIdx);
6198 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6199 const OperandVector &Operands) {
6200 OptionalImmIndexMap OptIdx;
6201 const int Opc = Inst.getOpcode();
6202 const MCInstrDesc &Desc = MII.get(Opc);
6204 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6206 cvtVOP3(Inst, Operands, OptIdx);
6208 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6209 assert(!IsPacked);
6210 Inst.addOperand(Inst.getOperand(0));
6213 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6214 // instruction, and then figure out where to actually put the modifiers
6216 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6218 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6219 if (OpSelHiIdx != -1) {
6220 int DefaultVal = IsPacked ? -1 : 0;
6221 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6222 DefaultVal);
6225 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6226 if (NegLoIdx != -1) {
6227 assert(IsPacked);
6228 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6229 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6232 const int Ops[] = { AMDGPU::OpName::src0,
6233 AMDGPU::OpName::src1,
6234 AMDGPU::OpName::src2 };
6235 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6236 AMDGPU::OpName::src1_modifiers,
6237 AMDGPU::OpName::src2_modifiers };
6239 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6241 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6242 unsigned OpSelHi = 0;
6243 unsigned NegLo = 0;
6244 unsigned NegHi = 0;
6246 if (OpSelHiIdx != -1) {
6247 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6250 if (NegLoIdx != -1) {
6251 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6252 NegLo = Inst.getOperand(NegLoIdx).getImm();
6253 NegHi = Inst.getOperand(NegHiIdx).getImm();
6256 for (int J = 0; J < 3; ++J) {
6257 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6258 if (OpIdx == -1)
6259 break;
6261 uint32_t ModVal = 0;
6263 if ((OpSel & (1 << J)) != 0)
6264 ModVal |= SISrcMods::OP_SEL_0;
6266 if ((OpSelHi & (1 << J)) != 0)
6267 ModVal |= SISrcMods::OP_SEL_1;
6269 if ((NegLo & (1 << J)) != 0)
6270 ModVal |= SISrcMods::NEG;
6272 if ((NegHi & (1 << J)) != 0)
6273 ModVal |= SISrcMods::NEG_HI;
6275 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6277 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6281 //===----------------------------------------------------------------------===//
6282 // dpp
6283 //===----------------------------------------------------------------------===//
6285 bool AMDGPUOperand::isDPP8() const {
6286 return isImmTy(ImmTyDPP8);
6289 bool AMDGPUOperand::isDPPCtrl() const {
6290 using namespace AMDGPU::DPP;
6292 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6293 if (result) {
6294 int64_t Imm = getImm();
6295 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6296 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6297 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6298 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6299 (Imm == DppCtrl::WAVE_SHL1) ||
6300 (Imm == DppCtrl::WAVE_ROL1) ||
6301 (Imm == DppCtrl::WAVE_SHR1) ||
6302 (Imm == DppCtrl::WAVE_ROR1) ||
6303 (Imm == DppCtrl::ROW_MIRROR) ||
6304 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6305 (Imm == DppCtrl::BCAST15) ||
6306 (Imm == DppCtrl::BCAST31) ||
6307 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6308 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6310 return false;
6313 //===----------------------------------------------------------------------===//
6314 // mAI
6315 //===----------------------------------------------------------------------===//
6317 bool AMDGPUOperand::isBLGP() const {
6318 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6321 bool AMDGPUOperand::isCBSZ() const {
6322 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6325 bool AMDGPUOperand::isABID() const {
6326 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6329 bool AMDGPUOperand::isS16Imm() const {
6330 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6333 bool AMDGPUOperand::isU16Imm() const {
6334 return isImm() && isUInt<16>(getImm());
6337 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6338 if (!isGFX10())
6339 return MatchOperand_NoMatch;
6341 SMLoc S = Parser.getTok().getLoc();
6343 if (getLexer().isNot(AsmToken::Identifier))
6344 return MatchOperand_NoMatch;
6345 if (getLexer().getTok().getString() != "dim")
6346 return MatchOperand_NoMatch;
6348 Parser.Lex();
6349 if (getLexer().isNot(AsmToken::Colon))
6350 return MatchOperand_ParseFail;
6352 Parser.Lex();
6354 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6355 // integer.
6356 std::string Token;
6357 if (getLexer().is(AsmToken::Integer)) {
6358 SMLoc Loc = getLexer().getTok().getEndLoc();
6359 Token = getLexer().getTok().getString();
6360 Parser.Lex();
6361 if (getLexer().getTok().getLoc() != Loc)
6362 return MatchOperand_ParseFail;
6364 if (getLexer().isNot(AsmToken::Identifier))
6365 return MatchOperand_ParseFail;
6366 Token += getLexer().getTok().getString();
6368 StringRef DimId = Token;
6369 if (DimId.startswith("SQ_RSRC_IMG_"))
6370 DimId = DimId.substr(12);
6372 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6373 if (!DimInfo)
6374 return MatchOperand_ParseFail;
6376 Parser.Lex();
6378 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6379 AMDGPUOperand::ImmTyDim));
6380 return MatchOperand_Success;
6383 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6384 SMLoc S = Parser.getTok().getLoc();
6385 StringRef Prefix;
6387 if (getLexer().getKind() == AsmToken::Identifier) {
6388 Prefix = Parser.getTok().getString();
6389 } else {
6390 return MatchOperand_NoMatch;
6393 if (Prefix != "dpp8")
6394 return parseDPPCtrl(Operands);
6395 if (!isGFX10())
6396 return MatchOperand_NoMatch;
6398 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6400 int64_t Sels[8];
6402 Parser.Lex();
6403 if (getLexer().isNot(AsmToken::Colon))
6404 return MatchOperand_ParseFail;
6406 Parser.Lex();
6407 if (getLexer().isNot(AsmToken::LBrac))
6408 return MatchOperand_ParseFail;
6410 Parser.Lex();
6411 if (getParser().parseAbsoluteExpression(Sels[0]))
6412 return MatchOperand_ParseFail;
6413 if (0 > Sels[0] || 7 < Sels[0])
6414 return MatchOperand_ParseFail;
6416 for (size_t i = 1; i < 8; ++i) {
6417 if (getLexer().isNot(AsmToken::Comma))
6418 return MatchOperand_ParseFail;
6420 Parser.Lex();
6421 if (getParser().parseAbsoluteExpression(Sels[i]))
6422 return MatchOperand_ParseFail;
6423 if (0 > Sels[i] || 7 < Sels[i])
6424 return MatchOperand_ParseFail;
6427 if (getLexer().isNot(AsmToken::RBrac))
6428 return MatchOperand_ParseFail;
6429 Parser.Lex();
6431 unsigned DPP8 = 0;
6432 for (size_t i = 0; i < 8; ++i)
6433 DPP8 |= (Sels[i] << (i * 3));
6435 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6436 return MatchOperand_Success;
6439 OperandMatchResultTy
6440 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6441 using namespace AMDGPU::DPP;
6443 SMLoc S = Parser.getTok().getLoc();
6444 StringRef Prefix;
6445 int64_t Int;
6447 if (getLexer().getKind() == AsmToken::Identifier) {
6448 Prefix = Parser.getTok().getString();
6449 } else {
6450 return MatchOperand_NoMatch;
6453 if (Prefix == "row_mirror") {
6454 Int = DppCtrl::ROW_MIRROR;
6455 Parser.Lex();
6456 } else if (Prefix == "row_half_mirror") {
6457 Int = DppCtrl::ROW_HALF_MIRROR;
6458 Parser.Lex();
6459 } else {
6460 // Check to prevent parseDPPCtrlOps from eating invalid tokens
6461 if (Prefix != "quad_perm"
6462 && Prefix != "row_shl"
6463 && Prefix != "row_shr"
6464 && Prefix != "row_ror"
6465 && Prefix != "wave_shl"
6466 && Prefix != "wave_rol"
6467 && Prefix != "wave_shr"
6468 && Prefix != "wave_ror"
6469 && Prefix != "row_bcast"
6470 && Prefix != "row_share"
6471 && Prefix != "row_xmask") {
6472 return MatchOperand_NoMatch;
6475 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6476 return MatchOperand_NoMatch;
6478 if (!isVI() && !isGFX9() &&
6479 (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6480 Prefix == "wave_rol" || Prefix == "wave_ror" ||
6481 Prefix == "row_bcast"))
6482 return MatchOperand_NoMatch;
6484 Parser.Lex();
6485 if (getLexer().isNot(AsmToken::Colon))
6486 return MatchOperand_ParseFail;
6488 if (Prefix == "quad_perm") {
6489 // quad_perm:[%d,%d,%d,%d]
6490 Parser.Lex();
6491 if (getLexer().isNot(AsmToken::LBrac))
6492 return MatchOperand_ParseFail;
6493 Parser.Lex();
6495 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6496 return MatchOperand_ParseFail;
6498 for (int i = 0; i < 3; ++i) {
6499 if (getLexer().isNot(AsmToken::Comma))
6500 return MatchOperand_ParseFail;
6501 Parser.Lex();
6503 int64_t Temp;
6504 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6505 return MatchOperand_ParseFail;
6506 const int shift = i*2 + 2;
6507 Int += (Temp << shift);
6510 if (getLexer().isNot(AsmToken::RBrac))
6511 return MatchOperand_ParseFail;
6512 Parser.Lex();
6513 } else {
6514 // sel:%d
6515 Parser.Lex();
6516 if (getParser().parseAbsoluteExpression(Int))
6517 return MatchOperand_ParseFail;
6519 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6520 Int |= DppCtrl::ROW_SHL0;
6521 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6522 Int |= DppCtrl::ROW_SHR0;
6523 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6524 Int |= DppCtrl::ROW_ROR0;
6525 } else if (Prefix == "wave_shl" && 1 == Int) {
6526 Int = DppCtrl::WAVE_SHL1;
6527 } else if (Prefix == "wave_rol" && 1 == Int) {
6528 Int = DppCtrl::WAVE_ROL1;
6529 } else if (Prefix == "wave_shr" && 1 == Int) {
6530 Int = DppCtrl::WAVE_SHR1;
6531 } else if (Prefix == "wave_ror" && 1 == Int) {
6532 Int = DppCtrl::WAVE_ROR1;
6533 } else if (Prefix == "row_bcast") {
6534 if (Int == 15) {
6535 Int = DppCtrl::BCAST15;
6536 } else if (Int == 31) {
6537 Int = DppCtrl::BCAST31;
6538 } else {
6539 return MatchOperand_ParseFail;
6541 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6542 Int |= DppCtrl::ROW_SHARE_FIRST;
6543 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6544 Int |= DppCtrl::ROW_XMASK_FIRST;
6545 } else {
6546 return MatchOperand_ParseFail;
6551 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6552 return MatchOperand_Success;
6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6556 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6560 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6564 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6568 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6572 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6575 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6576 OptionalImmIndexMap OptionalIdx;
6578 unsigned I = 1;
6579 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6580 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6581 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6584 int Fi = 0;
6585 for (unsigned E = Operands.size(); I != E; ++I) {
6586 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6587 MCOI::TIED_TO);
6588 if (TiedTo != -1) {
6589 assert((unsigned)TiedTo < Inst.getNumOperands());
6590 // handle tied old or src2 for MAC instructions
6591 Inst.addOperand(Inst.getOperand(TiedTo));
6593 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6594 // Add the register arguments
6595 if (Op.isReg() && validateVccOperand(Op.getReg())) {
6596 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6597 // Skip it.
6598 continue;
6601 if (IsDPP8) {
6602 if (Op.isDPP8()) {
6603 Op.addImmOperands(Inst, 1);
6604 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6605 Op.addRegWithFPInputModsOperands(Inst, 2);
6606 } else if (Op.isFI()) {
6607 Fi = Op.getImm();
6608 } else if (Op.isReg()) {
6609 Op.addRegOperands(Inst, 1);
6610 } else {
6611 llvm_unreachable("Invalid operand type");
6613 } else {
6614 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6615 Op.addRegWithFPInputModsOperands(Inst, 2);
6616 } else if (Op.isDPPCtrl()) {
6617 Op.addImmOperands(Inst, 1);
6618 } else if (Op.isImm()) {
6619 // Handle optional arguments
6620 OptionalIdx[Op.getImmTy()] = I;
6621 } else {
6622 llvm_unreachable("Invalid operand type");
6627 if (IsDPP8) {
6628 using namespace llvm::AMDGPU::DPP;
6629 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6630 } else {
6631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6632 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6633 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6634 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6640 //===----------------------------------------------------------------------===//
6641 // sdwa
6642 //===----------------------------------------------------------------------===//
6644 OperandMatchResultTy
6645 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6646 AMDGPUOperand::ImmTy Type) {
6647 using namespace llvm::AMDGPU::SDWA;
6649 SMLoc S = Parser.getTok().getLoc();
6650 StringRef Value;
6651 OperandMatchResultTy res;
6653 res = parseStringWithPrefix(Prefix, Value);
6654 if (res != MatchOperand_Success) {
6655 return res;
6658 int64_t Int;
6659 Int = StringSwitch<int64_t>(Value)
6660 .Case("BYTE_0", SdwaSel::BYTE_0)
6661 .Case("BYTE_1", SdwaSel::BYTE_1)
6662 .Case("BYTE_2", SdwaSel::BYTE_2)
6663 .Case("BYTE_3", SdwaSel::BYTE_3)
6664 .Case("WORD_0", SdwaSel::WORD_0)
6665 .Case("WORD_1", SdwaSel::WORD_1)
6666 .Case("DWORD", SdwaSel::DWORD)
6667 .Default(0xffffffff);
6668 Parser.Lex(); // eat last token
6670 if (Int == 0xffffffff) {
6671 return MatchOperand_ParseFail;
6674 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6675 return MatchOperand_Success;
6678 OperandMatchResultTy
6679 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6680 using namespace llvm::AMDGPU::SDWA;
6682 SMLoc S = Parser.getTok().getLoc();
6683 StringRef Value;
6684 OperandMatchResultTy res;
6686 res = parseStringWithPrefix("dst_unused", Value);
6687 if (res != MatchOperand_Success) {
6688 return res;
6691 int64_t Int;
6692 Int = StringSwitch<int64_t>(Value)
6693 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6694 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6695 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6696 .Default(0xffffffff);
6697 Parser.Lex(); // eat last token
6699 if (Int == 0xffffffff) {
6700 return MatchOperand_ParseFail;
6703 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6704 return MatchOperand_Success;
6707 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6708 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6711 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6712 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6715 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6716 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6719 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6720 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6723 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6724 uint64_t BasicInstType, bool skipVcc) {
6725 using namespace llvm::AMDGPU::SDWA;
6727 OptionalImmIndexMap OptionalIdx;
6728 bool skippedVcc = false;
6730 unsigned I = 1;
6731 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6732 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6733 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6736 for (unsigned E = Operands.size(); I != E; ++I) {
6737 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6738 if (skipVcc && !skippedVcc && Op.isReg() &&
6739 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6740 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6741 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6742 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6743 // Skip VCC only if we didn't skip it on previous iteration.
6744 if (BasicInstType == SIInstrFlags::VOP2 &&
6745 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6746 skippedVcc = true;
6747 continue;
6748 } else if (BasicInstType == SIInstrFlags::VOPC &&
6749 Inst.getNumOperands() == 0) {
6750 skippedVcc = true;
6751 continue;
6754 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6755 Op.addRegOrImmWithInputModsOperands(Inst, 2);
6756 } else if (Op.isImm()) {
6757 // Handle optional arguments
6758 OptionalIdx[Op.getImmTy()] = I;
6759 } else {
6760 llvm_unreachable("Invalid operand type");
6762 skippedVcc = false;
6765 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6766 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6767 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6768 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6769 switch (BasicInstType) {
6770 case SIInstrFlags::VOP1:
6771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6772 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6778 break;
6780 case SIInstrFlags::VOP2:
6781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6782 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6789 break;
6791 case SIInstrFlags::VOPC:
6792 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6793 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6794 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6796 break;
6798 default:
6799 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6803 // special case v_mac_{f16, f32}:
6804 // it has src2 register operand that is tied to dst operand
6805 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6806 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
6807 auto it = Inst.begin();
6808 std::advance(
6809 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6810 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6814 //===----------------------------------------------------------------------===//
6815 // mAI
6816 //===----------------------------------------------------------------------===//
6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6819 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6823 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6826 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6827 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6830 /// Force static initialization.
6831 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6832 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6833 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6836 #define GET_REGISTER_MATCHER
6837 #define GET_MATCHER_IMPLEMENTATION
6838 #define GET_MNEMONIC_SPELL_CHECKER
6839 #include "AMDGPUGenAsmMatcher.inc"
6841 // This fuction should be defined after auto-generated include so that we have
6842 // MatchClassKind enum defined
6843 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6844 unsigned Kind) {
6845 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6846 // But MatchInstructionImpl() expects to meet token and fails to validate
6847 // operand. This method checks if we are given immediate operand but expect to
6848 // get corresponding token.
6849 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6850 switch (Kind) {
6851 case MCK_addr64:
6852 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6853 case MCK_gds:
6854 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6855 case MCK_lds:
6856 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6857 case MCK_glc:
6858 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6859 case MCK_idxen:
6860 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6861 case MCK_offen:
6862 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6863 case MCK_SSrcB32:
6864 // When operands have expression values, they will return true for isToken,
6865 // because it is not possible to distinguish between a token and an
6866 // expression at parse time. MatchInstructionImpl() will always try to
6867 // match an operand as a token, when isToken returns true, and when the
6868 // name of the expression is not a valid token, the match will fail,
6869 // so we need to handle it here.
6870 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6871 case MCK_SSrcF32:
6872 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6873 case MCK_SoppBrTarget:
6874 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6875 case MCK_VReg32OrOff:
6876 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6877 case MCK_InterpSlot:
6878 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6879 case MCK_Attr:
6880 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6881 case MCK_AttrChan:
6882 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6883 default:
6884 return Match_InvalidOperand;
6888 //===----------------------------------------------------------------------===//
6889 // endpgm
6890 //===----------------------------------------------------------------------===//
6892 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6893 SMLoc S = Parser.getTok().getLoc();
6894 int64_t Imm = 0;
6896 if (!parseExpr(Imm)) {
6897 // The operand is optional, if not present default to 0
6898 Imm = 0;
6901 if (!isUInt<16>(Imm)) {
6902 Error(S, "expected a 16-bit value");
6903 return MatchOperand_ParseFail;
6906 Operands.push_back(
6907 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6908 return MatchOperand_Success;
6911 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }