[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / AsmParser / AMDGPUAsmParser.cpp
blob462e0e8da14664a2302dc607f22162681a0c3d19
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
68 namespace {
70 class AMDGPUAsmParser;
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
78 class AMDGPUOperand : public MCParsedAsmOperand {
79 enum KindTy {
80 Token,
81 Immediate,
82 Register,
83 Expression
84 } Kind;
86 SMLoc StartLoc, EndLoc;
87 const AMDGPUAsmParser *AsmParser;
89 public:
90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
93 using Ptr = std::unique_ptr<AMDGPUOperand>;
95 struct Modifiers {
96 bool Abs = false;
97 bool Neg = false;
98 bool Sext = false;
100 bool hasFPModifiers() const { return Abs || Neg; }
101 bool hasIntModifiers() const { return Sext; }
102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
104 int64_t getFPModifiersOperand() const {
105 int64_t Operand = 0;
106 Operand |= Abs ? SISrcMods::ABS : 0u;
107 Operand |= Neg ? SISrcMods::NEG : 0u;
108 return Operand;
111 int64_t getIntModifiersOperand() const {
112 int64_t Operand = 0;
113 Operand |= Sext ? SISrcMods::SEXT : 0u;
114 return Operand;
117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
124 } else {
125 return 0;
129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
132 enum ImmTy {
133 ImmTyNone,
134 ImmTyGDS,
135 ImmTyLDS,
136 ImmTyOffen,
137 ImmTyIdxen,
138 ImmTyAddr64,
139 ImmTyOffset,
140 ImmTyInstOffset,
141 ImmTyOffset0,
142 ImmTyOffset1,
143 ImmTyDLC,
144 ImmTyGLC,
145 ImmTySLC,
146 ImmTyTFE,
147 ImmTyD16,
148 ImmTyClampSI,
149 ImmTyOModSI,
150 ImmTyDPP8,
151 ImmTyDppCtrl,
152 ImmTyDppRowMask,
153 ImmTyDppBankMask,
154 ImmTyDppBoundCtrl,
155 ImmTyDppFi,
156 ImmTySdwaDstSel,
157 ImmTySdwaSrc0Sel,
158 ImmTySdwaSrc1Sel,
159 ImmTySdwaDstUnused,
160 ImmTyDMask,
161 ImmTyDim,
162 ImmTyUNorm,
163 ImmTyDA,
164 ImmTyR128A16,
165 ImmTyLWE,
166 ImmTyExpTgt,
167 ImmTyExpCompr,
168 ImmTyExpVM,
169 ImmTyFORMAT,
170 ImmTyHwreg,
171 ImmTyOff,
172 ImmTySendMsg,
173 ImmTyInterpSlot,
174 ImmTyInterpAttr,
175 ImmTyAttrChan,
176 ImmTyOpSel,
177 ImmTyOpSelHi,
178 ImmTyNegLo,
179 ImmTyNegHi,
180 ImmTySwizzle,
181 ImmTyGprIdxMode,
182 ImmTyHigh,
183 ImmTyBLGP,
184 ImmTyCBSZ,
185 ImmTyABID,
186 ImmTyEndpgm,
189 private:
190 struct TokOp {
191 const char *Data;
192 unsigned Length;
195 struct ImmOp {
196 int64_t Val;
197 ImmTy Type;
198 bool IsFPImm;
199 Modifiers Mods;
202 struct RegOp {
203 unsigned RegNo;
204 Modifiers Mods;
207 union {
208 TokOp Tok;
209 ImmOp Imm;
210 RegOp Reg;
211 const MCExpr *Expr;
214 public:
215 bool isToken() const override {
216 if (Kind == Token)
217 return true;
219 // When parsing operands, we can't always tell if something was meant to be
220 // a token, like 'gds', or an expression that references a global variable.
221 // In this case, we assume the string is an expression, and if we need to
222 // interpret is a token, then we treat the symbol name as the token.
223 return isSymbolRefExpr();
226 bool isSymbolRefExpr() const {
227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 bool isImm() const override {
231 return Kind == Immediate;
234 bool isInlinableImm(MVT type) const;
235 bool isLiteralImm(MVT type) const;
237 bool isRegKind() const {
238 return Kind == Register;
241 bool isReg() const override {
242 return isRegKind() && !hasModifiers();
245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249 bool isRegOrImmWithInt16InputMods() const {
250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253 bool isRegOrImmWithInt32InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257 bool isRegOrImmWithInt64InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261 bool isRegOrImmWithFP16InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265 bool isRegOrImmWithFP32InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269 bool isRegOrImmWithFP64InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273 bool isVReg() const {
274 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275 isRegClass(AMDGPU::VReg_64RegClassID) ||
276 isRegClass(AMDGPU::VReg_96RegClassID) ||
277 isRegClass(AMDGPU::VReg_128RegClassID) ||
278 isRegClass(AMDGPU::VReg_160RegClassID) ||
279 isRegClass(AMDGPU::VReg_256RegClassID) ||
280 isRegClass(AMDGPU::VReg_512RegClassID) ||
281 isRegClass(AMDGPU::VReg_1024RegClassID);
284 bool isVReg32() const {
285 return isRegClass(AMDGPU::VGPR_32RegClassID);
288 bool isVReg32OrOff() const {
289 return isOff() || isVReg32();
292 bool isSDWAOperand(MVT type) const;
293 bool isSDWAFP16Operand() const;
294 bool isSDWAFP32Operand() const;
295 bool isSDWAInt16Operand() const;
296 bool isSDWAInt32Operand() const;
298 bool isImmTy(ImmTy ImmT) const {
299 return isImm() && Imm.Type == ImmT;
302 bool isImmModifier() const {
303 return isImm() && Imm.Type != ImmTyNone;
306 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308 bool isDMask() const { return isImmTy(ImmTyDMask); }
309 bool isDim() const { return isImmTy(ImmTyDim); }
310 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311 bool isDA() const { return isImmTy(ImmTyDA); }
312 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313 bool isLWE() const { return isImmTy(ImmTyLWE); }
314 bool isOff() const { return isImmTy(ImmTyOff); }
315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318 bool isOffen() const { return isImmTy(ImmTyOffen); }
319 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326 bool isGDS() const { return isImmTy(ImmTyGDS); }
327 bool isLDS() const { return isImmTy(ImmTyLDS); }
328 bool isDLC() const { return isImmTy(ImmTyDLC); }
329 bool isGLC() const { return isImmTy(ImmTyGLC); }
330 bool isSLC() const { return isImmTy(ImmTySLC); }
331 bool isTFE() const { return isImmTy(ImmTyTFE); }
332 bool isD16() const { return isImmTy(ImmTyD16); }
333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337 bool isFI() const { return isImmTy(ImmTyDppFi); }
338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349 bool isHigh() const { return isImmTy(ImmTyHigh); }
351 bool isMod() const {
352 return isClampSI() || isOModSI();
355 bool isRegOrImm() const {
356 return isReg() || isImm();
359 bool isRegClass(unsigned RCID) const;
361 bool isInlineValue() const;
363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
367 bool isSCSrcB16() const {
368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
371 bool isSCSrcV2B16() const {
372 return isSCSrcB16();
375 bool isSCSrcB32() const {
376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
379 bool isSCSrcB64() const {
380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
383 bool isBoolReg() const;
385 bool isSCSrcF16() const {
386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
389 bool isSCSrcV2F16() const {
390 return isSCSrcF16();
393 bool isSCSrcF32() const {
394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
397 bool isSCSrcF64() const {
398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
401 bool isSSrcB32() const {
402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
405 bool isSSrcB16() const {
406 return isSCSrcB16() || isLiteralImm(MVT::i16);
409 bool isSSrcV2B16() const {
410 llvm_unreachable("cannot happen");
411 return isSSrcB16();
414 bool isSSrcB64() const {
415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416 // See isVSrc64().
417 return isSCSrcB64() || isLiteralImm(MVT::i64);
420 bool isSSrcF32() const {
421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
424 bool isSSrcF64() const {
425 return isSCSrcB64() || isLiteralImm(MVT::f64);
428 bool isSSrcF16() const {
429 return isSCSrcB16() || isLiteralImm(MVT::f16);
432 bool isSSrcV2F16() const {
433 llvm_unreachable("cannot happen");
434 return isSSrcF16();
437 bool isSSrcOrLdsB32() const {
438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439 isLiteralImm(MVT::i32) || isExpr();
442 bool isVCSrcB32() const {
443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
446 bool isVCSrcB64() const {
447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
450 bool isVCSrcB16() const {
451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
454 bool isVCSrcV2B16() const {
455 return isVCSrcB16();
458 bool isVCSrcF32() const {
459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
462 bool isVCSrcF64() const {
463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
466 bool isVCSrcF16() const {
467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
470 bool isVCSrcV2F16() const {
471 return isVCSrcF16();
474 bool isVSrcB32() const {
475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
478 bool isVSrcB64() const {
479 return isVCSrcF64() || isLiteralImm(MVT::i64);
482 bool isVSrcB16() const {
483 return isVCSrcF16() || isLiteralImm(MVT::i16);
486 bool isVSrcV2B16() const {
487 return isVSrcB16() || isLiteralImm(MVT::v2i16);
490 bool isVSrcF32() const {
491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
494 bool isVSrcF64() const {
495 return isVCSrcF64() || isLiteralImm(MVT::f64);
498 bool isVSrcF16() const {
499 return isVCSrcF16() || isLiteralImm(MVT::f16);
502 bool isVSrcV2F16() const {
503 return isVSrcF16() || isLiteralImm(MVT::v2f16);
506 bool isVISrcB32() const {
507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
510 bool isVISrcB16() const {
511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
514 bool isVISrcV2B16() const {
515 return isVISrcB16();
518 bool isVISrcF32() const {
519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
522 bool isVISrcF16() const {
523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
526 bool isVISrcV2F16() const {
527 return isVISrcF16() || isVISrcB32();
530 bool isAISrcB32() const {
531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
534 bool isAISrcB16() const {
535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
538 bool isAISrcV2B16() const {
539 return isAISrcB16();
542 bool isAISrcF32() const {
543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
546 bool isAISrcF16() const {
547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
550 bool isAISrcV2F16() const {
551 return isAISrcF16() || isAISrcB32();
554 bool isAISrc_128B32() const {
555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
558 bool isAISrc_128B16() const {
559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
562 bool isAISrc_128V2B16() const {
563 return isAISrc_128B16();
566 bool isAISrc_128F32() const {
567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
570 bool isAISrc_128F16() const {
571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
574 bool isAISrc_128V2F16() const {
575 return isAISrc_128F16() || isAISrc_128B32();
578 bool isAISrc_512B32() const {
579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
582 bool isAISrc_512B16() const {
583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
586 bool isAISrc_512V2B16() const {
587 return isAISrc_512B16();
590 bool isAISrc_512F32() const {
591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
594 bool isAISrc_512F16() const {
595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
598 bool isAISrc_512V2F16() const {
599 return isAISrc_512F16() || isAISrc_512B32();
602 bool isAISrc_1024B32() const {
603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
606 bool isAISrc_1024B16() const {
607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
610 bool isAISrc_1024V2B16() const {
611 return isAISrc_1024B16();
614 bool isAISrc_1024F32() const {
615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
618 bool isAISrc_1024F16() const {
619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
622 bool isAISrc_1024V2F16() const {
623 return isAISrc_1024F16() || isAISrc_1024B32();
626 bool isKImmFP32() const {
627 return isLiteralImm(MVT::f32);
630 bool isKImmFP16() const {
631 return isLiteralImm(MVT::f16);
634 bool isMem() const override {
635 return false;
638 bool isExpr() const {
639 return Kind == Expression;
642 bool isSoppBrTarget() const {
643 return isExpr() || isImm();
646 bool isSWaitCnt() const;
647 bool isHwreg() const;
648 bool isSendMsg() const;
649 bool isSwizzle() const;
650 bool isSMRDOffset8() const;
651 bool isSMRDOffset20() const;
652 bool isSMRDLiteralOffset() const;
653 bool isDPP8() const;
654 bool isDPPCtrl() const;
655 bool isBLGP() const;
656 bool isCBSZ() const;
657 bool isABID() const;
658 bool isGPRIdxMode() const;
659 bool isS16Imm() const;
660 bool isU16Imm() const;
661 bool isEndpgm() const;
663 StringRef getExpressionAsToken() const {
664 assert(isExpr());
665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666 return S->getSymbol().getName();
669 StringRef getToken() const {
670 assert(isToken());
672 if (Kind == Expression)
673 return getExpressionAsToken();
675 return StringRef(Tok.Data, Tok.Length);
678 int64_t getImm() const {
679 assert(isImm());
680 return Imm.Val;
683 ImmTy getImmTy() const {
684 assert(isImm());
685 return Imm.Type;
688 unsigned getReg() const override {
689 assert(isRegKind());
690 return Reg.RegNo;
693 SMLoc getStartLoc() const override {
694 return StartLoc;
697 SMLoc getEndLoc() const override {
698 return EndLoc;
701 SMRange getLocRange() const {
702 return SMRange(StartLoc, EndLoc);
705 Modifiers getModifiers() const {
706 assert(isRegKind() || isImmTy(ImmTyNone));
707 return isRegKind() ? Reg.Mods : Imm.Mods;
710 void setModifiers(Modifiers Mods) {
711 assert(isRegKind() || isImmTy(ImmTyNone));
712 if (isRegKind())
713 Reg.Mods = Mods;
714 else
715 Imm.Mods = Mods;
718 bool hasModifiers() const {
719 return getModifiers().hasModifiers();
722 bool hasFPModifiers() const {
723 return getModifiers().hasFPModifiers();
726 bool hasIntModifiers() const {
727 return getModifiers().hasIntModifiers();
730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
736 template <unsigned Bitwidth>
737 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740 addKImmFPOperands<16>(Inst, N);
743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744 addKImmFPOperands<32>(Inst, N);
747 void addRegOperands(MCInst &Inst, unsigned N) const;
749 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750 addRegOperands(Inst, N);
753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754 if (isRegKind())
755 addRegOperands(Inst, N);
756 else if (isExpr())
757 Inst.addOperand(MCOperand::createExpr(Expr));
758 else
759 addImmOperands(Inst, N);
762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763 Modifiers Mods = getModifiers();
764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765 if (isRegKind()) {
766 addRegOperands(Inst, N);
767 } else {
768 addImmOperands(Inst, N, false);
772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773 assert(!hasIntModifiers());
774 addRegOrImmWithInputModsOperands(Inst, N);
777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778 assert(!hasFPModifiers());
779 addRegOrImmWithInputModsOperands(Inst, N);
782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783 Modifiers Mods = getModifiers();
784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785 assert(isRegKind());
786 addRegOperands(Inst, N);
789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790 assert(!hasIntModifiers());
791 addRegWithInputModsOperands(Inst, N);
794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795 assert(!hasFPModifiers());
796 addRegWithInputModsOperands(Inst, N);
799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800 if (isImm())
801 addImmOperands(Inst, N);
802 else {
803 assert(isExpr());
804 Inst.addOperand(MCOperand::createExpr(Expr));
808 static void printImmTy(raw_ostream& OS, ImmTy Type) {
809 switch (Type) {
810 case ImmTyNone: OS << "None"; break;
811 case ImmTyGDS: OS << "GDS"; break;
812 case ImmTyLDS: OS << "LDS"; break;
813 case ImmTyOffen: OS << "Offen"; break;
814 case ImmTyIdxen: OS << "Idxen"; break;
815 case ImmTyAddr64: OS << "Addr64"; break;
816 case ImmTyOffset: OS << "Offset"; break;
817 case ImmTyInstOffset: OS << "InstOffset"; break;
818 case ImmTyOffset0: OS << "Offset0"; break;
819 case ImmTyOffset1: OS << "Offset1"; break;
820 case ImmTyDLC: OS << "DLC"; break;
821 case ImmTyGLC: OS << "GLC"; break;
822 case ImmTySLC: OS << "SLC"; break;
823 case ImmTyTFE: OS << "TFE"; break;
824 case ImmTyD16: OS << "D16"; break;
825 case ImmTyFORMAT: OS << "FORMAT"; break;
826 case ImmTyClampSI: OS << "ClampSI"; break;
827 case ImmTyOModSI: OS << "OModSI"; break;
828 case ImmTyDPP8: OS << "DPP8"; break;
829 case ImmTyDppCtrl: OS << "DppCtrl"; break;
830 case ImmTyDppRowMask: OS << "DppRowMask"; break;
831 case ImmTyDppBankMask: OS << "DppBankMask"; break;
832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833 case ImmTyDppFi: OS << "FI"; break;
834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838 case ImmTyDMask: OS << "DMask"; break;
839 case ImmTyDim: OS << "Dim"; break;
840 case ImmTyUNorm: OS << "UNorm"; break;
841 case ImmTyDA: OS << "DA"; break;
842 case ImmTyR128A16: OS << "R128A16"; break;
843 case ImmTyLWE: OS << "LWE"; break;
844 case ImmTyOff: OS << "Off"; break;
845 case ImmTyExpTgt: OS << "ExpTgt"; break;
846 case ImmTyExpCompr: OS << "ExpCompr"; break;
847 case ImmTyExpVM: OS << "ExpVM"; break;
848 case ImmTyHwreg: OS << "Hwreg"; break;
849 case ImmTySendMsg: OS << "SendMsg"; break;
850 case ImmTyInterpSlot: OS << "InterpSlot"; break;
851 case ImmTyInterpAttr: OS << "InterpAttr"; break;
852 case ImmTyAttrChan: OS << "AttrChan"; break;
853 case ImmTyOpSel: OS << "OpSel"; break;
854 case ImmTyOpSelHi: OS << "OpSelHi"; break;
855 case ImmTyNegLo: OS << "NegLo"; break;
856 case ImmTyNegHi: OS << "NegHi"; break;
857 case ImmTySwizzle: OS << "Swizzle"; break;
858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859 case ImmTyHigh: OS << "High"; break;
860 case ImmTyBLGP: OS << "BLGP"; break;
861 case ImmTyCBSZ: OS << "CBSZ"; break;
862 case ImmTyABID: OS << "ABID"; break;
863 case ImmTyEndpgm: OS << "Endpgm"; break;
867 void print(raw_ostream &OS) const override {
868 switch (Kind) {
869 case Register:
870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871 break;
872 case Immediate:
873 OS << '<' << getImm();
874 if (getImmTy() != ImmTyNone) {
875 OS << " type: "; printImmTy(OS, getImmTy());
877 OS << " mods: " << Imm.Mods << '>';
878 break;
879 case Token:
880 OS << '\'' << getToken() << '\'';
881 break;
882 case Expression:
883 OS << "<expr " << *Expr << '>';
884 break;
888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889 int64_t Val, SMLoc Loc,
890 ImmTy Type = ImmTyNone,
891 bool IsFPImm = false) {
892 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893 Op->Imm.Val = Val;
894 Op->Imm.IsFPImm = IsFPImm;
895 Op->Imm.Type = Type;
896 Op->Imm.Mods = Modifiers();
897 Op->StartLoc = Loc;
898 Op->EndLoc = Loc;
899 return Op;
902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903 StringRef Str, SMLoc Loc,
904 bool HasExplicitEncodingSize = true) {
905 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
906 Res->Tok.Data = Str.data();
907 Res->Tok.Length = Str.size();
908 Res->StartLoc = Loc;
909 Res->EndLoc = Loc;
910 return Res;
913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914 unsigned RegNo, SMLoc S,
915 SMLoc E) {
916 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
917 Op->Reg.RegNo = RegNo;
918 Op->Reg.Mods = Modifiers();
919 Op->StartLoc = S;
920 Op->EndLoc = E;
921 return Op;
924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925 const class MCExpr *Expr, SMLoc S) {
926 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
927 Op->Expr = Expr;
928 Op->StartLoc = S;
929 Op->EndLoc = S;
930 return Op;
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936 return OS;
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947 int SgprIndexUnusedMin = -1;
948 int VgprIndexUnusedMin = -1;
949 MCContext *Ctx = nullptr;
951 void usesSgprAt(int i) {
952 if (i >= SgprIndexUnusedMin) {
953 SgprIndexUnusedMin = ++i;
954 if (Ctx) {
955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
961 void usesVgprAt(int i) {
962 if (i >= VgprIndexUnusedMin) {
963 VgprIndexUnusedMin = ++i;
964 if (Ctx) {
965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
971 public:
972 KernelScopeInfo() = default;
974 void initialize(MCContext &Context) {
975 Ctx = &Context;
976 usesSgprAt(SgprIndexUnusedMin = -1);
977 usesVgprAt(VgprIndexUnusedMin = -1);
980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981 switch (RegKind) {
982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983 case IS_AGPR: // fall through
984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985 default: break;
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991 MCAsmParser &Parser;
993 // Number of extra operands parsed after the first optional operand.
994 // This may be necessary to skip hardcoded mandatory operands.
995 static const unsigned MAX_OPR_LOOKAHEAD = 8;
997 unsigned ForcedEncodingSize = 0;
998 bool ForcedDPP = false;
999 bool ForcedSDWA = false;
1000 KernelScopeInfo KernelScope;
1002 /// @name Auto-generated Match Functions
1003 /// {
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1008 /// }
1010 private:
1011 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012 bool OutOfRangeError(SMRange Range);
1013 /// Calculate VGPR/SGPR blocks required for given target, reserved
1014 /// registers, and user-specified NextFreeXGPR values.
1016 /// \param Features [in] Target features, used for bug corrections.
1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021 /// descriptor field, if valid.
1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026 /// \param VGPRBlocks [out] Result VGPR block count.
1027 /// \param SGPRBlocks [out] Result SGPR block count.
1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029 bool FlatScrUsed, bool XNACKUsed,
1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031 SMRange VGPRRange, unsigned NextFreeSGPR,
1032 SMRange SGPRRange, unsigned &VGPRBlocks,
1033 unsigned &SGPRBlocks);
1034 bool ParseDirectiveAMDGCNTarget();
1035 bool ParseDirectiveAMDHSAKernel();
1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037 bool ParseDirectiveHSACodeObjectVersion();
1038 bool ParseDirectiveHSACodeObjectISA();
1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040 bool ParseDirectiveAMDKernelCodeT();
1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042 bool ParseDirectiveAMDGPUHsaKernel();
1044 bool ParseDirectiveISAVersion();
1045 bool ParseDirectiveHSAMetadata();
1046 bool ParseDirectivePALMetadataBegin();
1047 bool ParseDirectivePALMetadata();
1048 bool ParseDirectiveAMDGPULDS();
1050 /// Common code to parse out a block of text (typically YAML) between start and
1051 /// end directives.
1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053 const char *AssemblerDirectiveEnd,
1054 std::string &CollectString);
1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057 RegisterKind RegKind, unsigned Reg1,
1058 unsigned RegNum);
1059 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060 unsigned& RegNum, unsigned& RegWidth,
1061 unsigned *DwordRegIndex);
1062 bool isRegister();
1063 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065 void initializeGprCountSymbol(RegisterKind RegKind);
1066 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067 unsigned RegWidth);
1068 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071 bool IsGdsHardcoded);
1073 public:
1074 enum AMDGPUMatchResultTy {
1075 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1077 enum OperandMode {
1078 OperandMode_Default,
1079 OperandMode_NSA,
1082 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1084 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085 const MCInstrInfo &MII,
1086 const MCTargetOptions &Options)
1087 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088 MCAsmParserExtension::Initialize(Parser);
1090 if (getFeatureBits().none()) {
1091 // Set default features.
1092 copySTI().ToggleFeature("southern-islands");
1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1098 // TODO: make those pre-defined variables read-only.
1099 // Currently there is none suitable machinery in the core llvm-mc for this.
1100 // MCSymbol::isRedefinable is intended for another purpose, and
1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103 MCContext &Ctx = getContext();
1104 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105 MCSymbol *Sym =
1106 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112 } else {
1113 MCSymbol *Sym =
1114 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1121 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122 initializeGprCountSymbol(IS_VGPR);
1123 initializeGprCountSymbol(IS_SGPR);
1124 } else
1125 KernelScope.initialize(getContext());
1129 bool hasXNACK() const {
1130 return AMDGPU::hasXNACK(getSTI());
1133 bool hasMIMG_R128() const {
1134 return AMDGPU::hasMIMG_R128(getSTI());
1137 bool hasPackedD16() const {
1138 return AMDGPU::hasPackedD16(getSTI());
1141 bool isSI() const {
1142 return AMDGPU::isSI(getSTI());
1145 bool isCI() const {
1146 return AMDGPU::isCI(getSTI());
1149 bool isVI() const {
1150 return AMDGPU::isVI(getSTI());
1153 bool isGFX9() const {
1154 return AMDGPU::isGFX9(getSTI());
1157 bool isGFX10() const {
1158 return AMDGPU::isGFX10(getSTI());
1161 bool hasInv2PiInlineImm() const {
1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1165 bool hasFlatOffsets() const {
1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1169 bool hasSGPR102_SGPR103() const {
1170 return !isVI() && !isGFX9();
1173 bool hasSGPR104_SGPR105() const {
1174 return isGFX10();
1177 bool hasIntClamp() const {
1178 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1181 AMDGPUTargetStreamer &getTargetStreamer() {
1182 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183 return static_cast<AMDGPUTargetStreamer &>(TS);
1186 const MCRegisterInfo *getMRI() const {
1187 // We need this const_cast because for some reason getContext() is not const
1188 // in MCAsmParser.
1189 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1192 const MCInstrInfo *getMII() const {
1193 return &MII;
1196 const FeatureBitset &getFeatureBits() const {
1197 return getSTI().getFeatureBits();
1200 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206 bool isForcedDPP() const { return ForcedDPP; }
1207 bool isForcedSDWA() const { return ForcedSDWA; }
1208 ArrayRef<unsigned> getMatchedVariants() const;
1210 std::unique_ptr<AMDGPUOperand> parseRegister();
1211 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214 unsigned Kind) override;
1215 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216 OperandVector &Operands, MCStreamer &Out,
1217 uint64_t &ErrorInfo,
1218 bool MatchingInlineAsm) override;
1219 bool ParseDirective(AsmToken DirectiveID) override;
1220 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221 OperandMode Mode = OperandMode_Default);
1222 StringRef parseMnemonicSuffix(StringRef Name);
1223 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224 SMLoc NameLoc, OperandVector &Operands) override;
1225 //bool ProcessInstruction(MCInst &Inst);
1227 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1229 OperandMatchResultTy
1230 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232 bool (*ConvertResult)(int64_t &) = nullptr);
1234 OperandMatchResultTy
1235 parseOperandArrayWithPrefix(const char *Prefix,
1236 OperandVector &Operands,
1237 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238 bool (*ConvertResult)(int64_t&) = nullptr);
1240 OperandMatchResultTy
1241 parseNamedBit(const char *Name, OperandVector &Operands,
1242 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244 StringRef &Value);
1246 bool isModifier();
1247 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251 bool parseSP3NegModifier();
1252 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253 OperandMatchResultTy parseReg(OperandVector &Operands);
1254 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1262 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1267 bool parseCnt(int64_t &IntVal);
1268 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1271 private:
1272 struct OperandInfoTy {
1273 int64_t Id;
1274 bool IsSymbolic = false;
1275 bool IsDefined = false;
1277 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1280 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281 bool validateSendMsg(const OperandInfoTy &Msg,
1282 const OperandInfoTy &Op,
1283 const OperandInfoTy &Stream,
1284 const SMLoc Loc);
1286 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287 bool validateHwreg(const OperandInfoTy &HwReg,
1288 const int64_t Offset,
1289 const int64_t Width,
1290 const SMLoc Loc);
1292 void errorExpTgt();
1293 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1296 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298 bool validateSOPLiteral(const MCInst &Inst) const;
1299 bool validateConstantBusLimitations(const MCInst &Inst);
1300 bool validateEarlyClobberLimitations(const MCInst &Inst);
1301 bool validateIntClampSupported(const MCInst &Inst);
1302 bool validateMIMGAtomicDMask(const MCInst &Inst);
1303 bool validateMIMGGatherDMask(const MCInst &Inst);
1304 bool validateMIMGDataSize(const MCInst &Inst);
1305 bool validateMIMGAddrSize(const MCInst &Inst);
1306 bool validateMIMGD16(const MCInst &Inst);
1307 bool validateMIMGDim(const MCInst &Inst);
1308 bool validateLdsDirect(const MCInst &Inst);
1309 bool validateOpSel(const MCInst &Inst);
1310 bool validateVccOperand(unsigned Reg) const;
1311 bool validateVOP3Literal(const MCInst &Inst) const;
1312 unsigned getConstantBusLimit(unsigned Opcode) const;
1313 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1314 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1315 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1317 bool isId(const StringRef Id) const;
1318 bool isId(const AsmToken &Token, const StringRef Id) const;
1319 bool isToken(const AsmToken::TokenKind Kind) const;
1320 bool trySkipId(const StringRef Id);
1321 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1322 bool trySkipToken(const AsmToken::TokenKind Kind);
1323 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1324 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1325 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1326 AsmToken::TokenKind getTokenKind() const;
1327 bool parseExpr(int64_t &Imm);
1328 bool parseExpr(OperandVector &Operands);
1329 StringRef getTokenStr() const;
1330 AsmToken peekToken();
1331 AsmToken getToken() const;
1332 SMLoc getLoc() const;
1333 void lex();
1335 public:
1336 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1337 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1339 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1340 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1341 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1342 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1343 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1344 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1346 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1347 const unsigned MinVal,
1348 const unsigned MaxVal,
1349 const StringRef ErrMsg);
1350 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1351 bool parseSwizzleOffset(int64_t &Imm);
1352 bool parseSwizzleMacro(int64_t &Imm);
1353 bool parseSwizzleQuadPerm(int64_t &Imm);
1354 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1355 bool parseSwizzleBroadcast(int64_t &Imm);
1356 bool parseSwizzleSwap(int64_t &Imm);
1357 bool parseSwizzleReverse(int64_t &Imm);
1359 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1360 int64_t parseGPRIdxMacro();
1362 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1363 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1364 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1365 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1366 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1368 AMDGPUOperand::Ptr defaultDLC() const;
1369 AMDGPUOperand::Ptr defaultGLC() const;
1370 AMDGPUOperand::Ptr defaultSLC() const;
1372 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1373 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1374 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1375 AMDGPUOperand::Ptr defaultFlatOffset() const;
1377 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1379 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1380 OptionalImmIndexMap &OptionalIdx);
1381 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1382 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1383 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1385 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1387 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1388 bool IsAtomic = false);
1389 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1391 OperandMatchResultTy parseDim(OperandVector &Operands);
1392 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1393 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1394 AMDGPUOperand::Ptr defaultRowMask() const;
1395 AMDGPUOperand::Ptr defaultBankMask() const;
1396 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1397 AMDGPUOperand::Ptr defaultFI() const;
1398 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1399 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1401 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1402 AMDGPUOperand::ImmTy Type);
1403 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1404 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1405 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1406 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1407 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1408 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1409 uint64_t BasicInstType, bool skipVcc = false);
1411 AMDGPUOperand::Ptr defaultBLGP() const;
1412 AMDGPUOperand::Ptr defaultCBSZ() const;
1413 AMDGPUOperand::Ptr defaultABID() const;
1415 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1416 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1419 struct OptionalOperand {
1420 const char *Name;
1421 AMDGPUOperand::ImmTy Type;
1422 bool IsBit;
1423 bool (*ConvertResult)(int64_t&);
1426 } // end anonymous namespace
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics *getFltSemantics(unsigned Size) {
1430 switch (Size) {
1431 case 4:
1432 return &APFloat::IEEEsingle();
1433 case 8:
1434 return &APFloat::IEEEdouble();
1435 case 2:
1436 return &APFloat::IEEEhalf();
1437 default:
1438 llvm_unreachable("unsupported fp type");
1442 static const fltSemantics *getFltSemantics(MVT VT) {
1443 return getFltSemantics(VT.getSizeInBits() / 8);
1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1447 switch (OperandType) {
1448 case AMDGPU::OPERAND_REG_IMM_INT32:
1449 case AMDGPU::OPERAND_REG_IMM_FP32:
1450 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1451 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1452 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1453 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1454 return &APFloat::IEEEsingle();
1455 case AMDGPU::OPERAND_REG_IMM_INT64:
1456 case AMDGPU::OPERAND_REG_IMM_FP64:
1457 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1458 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1459 return &APFloat::IEEEdouble();
1460 case AMDGPU::OPERAND_REG_IMM_INT16:
1461 case AMDGPU::OPERAND_REG_IMM_FP16:
1462 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1463 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1464 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1465 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1466 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1467 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1469 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1470 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1471 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1472 return &APFloat::IEEEhalf();
1473 default:
1474 llvm_unreachable("unsupported fp type");
1478 //===----------------------------------------------------------------------===//
1479 // Operand
1480 //===----------------------------------------------------------------------===//
1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1483 bool Lost;
1485 // Convert literal to single precision
1486 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1487 APFloat::rmNearestTiesToEven,
1488 &Lost);
1489 // We allow precision lost but not overflow or underflow
1490 if (Status != APFloat::opOK &&
1491 Lost &&
1492 ((Status & APFloat::opOverflow) != 0 ||
1493 (Status & APFloat::opUnderflow) != 0)) {
1494 return false;
1497 return true;
1500 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1501 return isUIntN(Size, Val) || isIntN(Size, Val);
1504 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1506 // This is a hack to enable named inline values like
1507 // shared_base with both 32-bit and 64-bit operands.
1508 // Note that these values are defined as
1509 // 32-bit operands only.
1510 if (isInlineValue()) {
1511 return true;
1514 if (!isImmTy(ImmTyNone)) {
1515 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1516 return false;
1518 // TODO: We should avoid using host float here. It would be better to
1519 // check the float bit values which is what a few other places do.
1520 // We've had bot failures before due to weird NaN support on mips hosts.
1522 APInt Literal(64, Imm.Val);
1524 if (Imm.IsFPImm) { // We got fp literal token
1525 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1526 return AMDGPU::isInlinableLiteral64(Imm.Val,
1527 AsmParser->hasInv2PiInlineImm());
1530 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1531 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1532 return false;
1534 if (type.getScalarSizeInBits() == 16) {
1535 return AMDGPU::isInlinableLiteral16(
1536 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1537 AsmParser->hasInv2PiInlineImm());
1540 // Check if single precision literal is inlinable
1541 return AMDGPU::isInlinableLiteral32(
1542 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1543 AsmParser->hasInv2PiInlineImm());
1546 // We got int literal token.
1547 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548 return AMDGPU::isInlinableLiteral64(Imm.Val,
1549 AsmParser->hasInv2PiInlineImm());
1552 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1553 return false;
1556 if (type.getScalarSizeInBits() == 16) {
1557 return AMDGPU::isInlinableLiteral16(
1558 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1559 AsmParser->hasInv2PiInlineImm());
1562 return AMDGPU::isInlinableLiteral32(
1563 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1564 AsmParser->hasInv2PiInlineImm());
1567 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1568 // Check that this immediate can be added as literal
1569 if (!isImmTy(ImmTyNone)) {
1570 return false;
1573 if (!Imm.IsFPImm) {
1574 // We got int literal token.
1576 if (type == MVT::f64 && hasFPModifiers()) {
1577 // Cannot apply fp modifiers to int literals preserving the same semantics
1578 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579 // disable these cases.
1580 return false;
1583 unsigned Size = type.getSizeInBits();
1584 if (Size == 64)
1585 Size = 32;
1587 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1588 // types.
1589 return isSafeTruncation(Imm.Val, Size);
1592 // We got fp literal token
1593 if (type == MVT::f64) { // Expected 64-bit fp operand
1594 // We would set low 64-bits of literal to zeroes but we accept this literals
1595 return true;
1598 if (type == MVT::i64) { // Expected 64-bit int operand
1599 // We don't allow fp literals in 64-bit integer instructions. It is
1600 // unclear how we should encode them.
1601 return false;
1604 // We allow fp literals with f16x2 operands assuming that the specified
1605 // literal goes into the lower half and the upper half is zero. We also
1606 // require that the literal may be losslesly converted to f16.
1607 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1608 (type == MVT::v2i16)? MVT::i16 : type;
1610 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1611 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1615 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1619 if (AsmParser->isVI())
1620 return isVReg32();
1621 else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1622 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1623 else
1624 return false;
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628 return isSDWAOperand(MVT::f16);
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632 return isSDWAOperand(MVT::f32);
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636 return isSDWAOperand(MVT::i16);
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640 return isSDWAOperand(MVT::i32);
1643 bool AMDGPUOperand::isBoolReg() const {
1644 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1645 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1650 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1651 assert(Size == 2 || Size == 4 || Size == 8);
1653 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1655 if (Imm.Mods.Abs) {
1656 Val &= ~FpSignMask;
1658 if (Imm.Mods.Neg) {
1659 Val ^= FpSignMask;
1662 return Val;
1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1666 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1667 Inst.getNumOperands())) {
1668 addLiteralImmOperand(Inst, Imm.Val,
1669 ApplyModifiers &
1670 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1671 } else {
1672 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1673 Inst.addOperand(MCOperand::createImm(Imm.Val));
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1678 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1679 auto OpNum = Inst.getNumOperands();
1680 // Check that this operand accepts literals
1681 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1683 if (ApplyModifiers) {
1684 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1685 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1686 Val = applyInputFPModifiers(Val, Size);
1689 APInt Literal(64, Val);
1690 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1692 if (Imm.IsFPImm) { // We got fp literal token
1693 switch (OpTy) {
1694 case AMDGPU::OPERAND_REG_IMM_INT64:
1695 case AMDGPU::OPERAND_REG_IMM_FP64:
1696 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1697 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1698 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1699 AsmParser->hasInv2PiInlineImm())) {
1700 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1701 return;
1704 // Non-inlineable
1705 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1706 // For fp operands we check if low 32 bits are zeros
1707 if (Literal.getLoBits(32) != 0) {
1708 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1709 "Can't encode literal as exact 64-bit floating-point operand. "
1710 "Low 32-bits will be set to zero");
1713 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1714 return;
1717 // We don't allow fp literals in 64-bit integer instructions. It is
1718 // unclear how we should encode them. This case should be checked earlier
1719 // in predicate methods (isLiteralImm())
1720 llvm_unreachable("fp literal in 64-bit integer instruction.");
1722 case AMDGPU::OPERAND_REG_IMM_INT32:
1723 case AMDGPU::OPERAND_REG_IMM_FP32:
1724 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1725 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1726 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1727 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1728 case AMDGPU::OPERAND_REG_IMM_INT16:
1729 case AMDGPU::OPERAND_REG_IMM_FP16:
1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1738 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1739 case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1740 bool lost;
1741 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1742 // Convert literal to single precision
1743 FPLiteral.convert(*getOpFltSemantics(OpTy),
1744 APFloat::rmNearestTiesToEven, &lost);
1745 // We allow precision lost but not overflow or underflow. This should be
1746 // checked earlier in isLiteralImm()
1748 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1749 Inst.addOperand(MCOperand::createImm(ImmVal));
1750 return;
1752 default:
1753 llvm_unreachable("invalid operand size");
1756 return;
1759 // We got int literal token.
1760 // Only sign extend inline immediates.
1761 switch (OpTy) {
1762 case AMDGPU::OPERAND_REG_IMM_INT32:
1763 case AMDGPU::OPERAND_REG_IMM_FP32:
1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1768 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1769 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1770 if (isSafeTruncation(Val, 32) &&
1771 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1772 AsmParser->hasInv2PiInlineImm())) {
1773 Inst.addOperand(MCOperand::createImm(Val));
1774 return;
1777 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1778 return;
1780 case AMDGPU::OPERAND_REG_IMM_INT64:
1781 case AMDGPU::OPERAND_REG_IMM_FP64:
1782 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1783 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1784 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1785 Inst.addOperand(MCOperand::createImm(Val));
1786 return;
1789 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1790 return;
1792 case AMDGPU::OPERAND_REG_IMM_INT16:
1793 case AMDGPU::OPERAND_REG_IMM_FP16:
1794 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1795 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1798 if (isSafeTruncation(Val, 16) &&
1799 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1800 AsmParser->hasInv2PiInlineImm())) {
1801 Inst.addOperand(MCOperand::createImm(Val));
1802 return;
1805 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1806 return;
1808 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1811 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1812 assert(isSafeTruncation(Val, 16));
1813 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1814 AsmParser->hasInv2PiInlineImm()));
1816 Inst.addOperand(MCOperand::createImm(Val));
1817 return;
1819 default:
1820 llvm_unreachable("invalid operand size");
1824 template <unsigned Bitwidth>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1826 APInt Literal(64, Imm.Val);
1828 if (!Imm.IsFPImm) {
1829 // We got int literal token.
1830 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1831 return;
1834 bool Lost;
1835 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1836 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1837 APFloat::rmNearestTiesToEven, &Lost);
1838 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1842 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1845 static bool isInlineValue(unsigned Reg) {
1846 switch (Reg) {
1847 case AMDGPU::SRC_SHARED_BASE:
1848 case AMDGPU::SRC_SHARED_LIMIT:
1849 case AMDGPU::SRC_PRIVATE_BASE:
1850 case AMDGPU::SRC_PRIVATE_LIMIT:
1851 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1852 return true;
1853 case AMDGPU::SRC_VCCZ:
1854 case AMDGPU::SRC_EXECZ:
1855 case AMDGPU::SRC_SCC:
1856 return true;
1857 case AMDGPU::SGPR_NULL:
1858 return true;
1859 default:
1860 return false;
1864 bool AMDGPUOperand::isInlineValue() const {
1865 return isRegKind() && ::isInlineValue(getReg());
1868 //===----------------------------------------------------------------------===//
1869 // AsmParser
1870 //===----------------------------------------------------------------------===//
1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1873 if (Is == IS_VGPR) {
1874 switch (RegWidth) {
1875 default: return -1;
1876 case 1: return AMDGPU::VGPR_32RegClassID;
1877 case 2: return AMDGPU::VReg_64RegClassID;
1878 case 3: return AMDGPU::VReg_96RegClassID;
1879 case 4: return AMDGPU::VReg_128RegClassID;
1880 case 5: return AMDGPU::VReg_160RegClassID;
1881 case 8: return AMDGPU::VReg_256RegClassID;
1882 case 16: return AMDGPU::VReg_512RegClassID;
1883 case 32: return AMDGPU::VReg_1024RegClassID;
1885 } else if (Is == IS_TTMP) {
1886 switch (RegWidth) {
1887 default: return -1;
1888 case 1: return AMDGPU::TTMP_32RegClassID;
1889 case 2: return AMDGPU::TTMP_64RegClassID;
1890 case 4: return AMDGPU::TTMP_128RegClassID;
1891 case 8: return AMDGPU::TTMP_256RegClassID;
1892 case 16: return AMDGPU::TTMP_512RegClassID;
1894 } else if (Is == IS_SGPR) {
1895 switch (RegWidth) {
1896 default: return -1;
1897 case 1: return AMDGPU::SGPR_32RegClassID;
1898 case 2: return AMDGPU::SGPR_64RegClassID;
1899 case 4: return AMDGPU::SGPR_128RegClassID;
1900 case 8: return AMDGPU::SGPR_256RegClassID;
1901 case 16: return AMDGPU::SGPR_512RegClassID;
1903 } else if (Is == IS_AGPR) {
1904 switch (RegWidth) {
1905 default: return -1;
1906 case 1: return AMDGPU::AGPR_32RegClassID;
1907 case 2: return AMDGPU::AReg_64RegClassID;
1908 case 4: return AMDGPU::AReg_128RegClassID;
1909 case 16: return AMDGPU::AReg_512RegClassID;
1910 case 32: return AMDGPU::AReg_1024RegClassID;
1913 return -1;
1916 static unsigned getSpecialRegForName(StringRef RegName) {
1917 return StringSwitch<unsigned>(RegName)
1918 .Case("exec", AMDGPU::EXEC)
1919 .Case("vcc", AMDGPU::VCC)
1920 .Case("flat_scratch", AMDGPU::FLAT_SCR)
1921 .Case("xnack_mask", AMDGPU::XNACK_MASK)
1922 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1923 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1924 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1925 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1926 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1927 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1928 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1929 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1930 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1931 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1932 .Case("lds_direct", AMDGPU::LDS_DIRECT)
1933 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1934 .Case("m0", AMDGPU::M0)
1935 .Case("vccz", AMDGPU::SRC_VCCZ)
1936 .Case("src_vccz", AMDGPU::SRC_VCCZ)
1937 .Case("execz", AMDGPU::SRC_EXECZ)
1938 .Case("src_execz", AMDGPU::SRC_EXECZ)
1939 .Case("scc", AMDGPU::SRC_SCC)
1940 .Case("src_scc", AMDGPU::SRC_SCC)
1941 .Case("tba", AMDGPU::TBA)
1942 .Case("tma", AMDGPU::TMA)
1943 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1944 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1945 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1946 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1947 .Case("vcc_lo", AMDGPU::VCC_LO)
1948 .Case("vcc_hi", AMDGPU::VCC_HI)
1949 .Case("exec_lo", AMDGPU::EXEC_LO)
1950 .Case("exec_hi", AMDGPU::EXEC_HI)
1951 .Case("tma_lo", AMDGPU::TMA_LO)
1952 .Case("tma_hi", AMDGPU::TMA_HI)
1953 .Case("tba_lo", AMDGPU::TBA_LO)
1954 .Case("tba_hi", AMDGPU::TBA_HI)
1955 .Case("null", AMDGPU::SGPR_NULL)
1956 .Default(0);
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1960 SMLoc &EndLoc) {
1961 auto R = parseRegister();
1962 if (!R) return true;
1963 assert(R->isReg());
1964 RegNo = R->getReg();
1965 StartLoc = R->getStartLoc();
1966 EndLoc = R->getEndLoc();
1967 return false;
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1971 RegisterKind RegKind, unsigned Reg1,
1972 unsigned RegNum) {
1973 switch (RegKind) {
1974 case IS_SPECIAL:
1975 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1976 Reg = AMDGPU::EXEC;
1977 RegWidth = 2;
1978 return true;
1980 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1981 Reg = AMDGPU::FLAT_SCR;
1982 RegWidth = 2;
1983 return true;
1985 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1986 Reg = AMDGPU::XNACK_MASK;
1987 RegWidth = 2;
1988 return true;
1990 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1991 Reg = AMDGPU::VCC;
1992 RegWidth = 2;
1993 return true;
1995 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1996 Reg = AMDGPU::TBA;
1997 RegWidth = 2;
1998 return true;
2000 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2001 Reg = AMDGPU::TMA;
2002 RegWidth = 2;
2003 return true;
2005 return false;
2006 case IS_VGPR:
2007 case IS_SGPR:
2008 case IS_AGPR:
2009 case IS_TTMP:
2010 if (Reg1 != Reg + RegWidth) {
2011 return false;
2013 RegWidth++;
2014 return true;
2015 default:
2016 llvm_unreachable("unexpected register kind");
2020 static constexpr StringLiteral Registers[] = {"v", "s", "ttmp", "acc", "a"};
2022 bool
2023 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2024 const AsmToken &NextToken) const {
2026 // A list of consecutive registers: [s0,s1,s2,s3]
2027 if (Token.is(AsmToken::LBrac))
2028 return true;
2030 if (!Token.is(AsmToken::Identifier))
2031 return false;
2033 // A single register like s0 or a range of registers like s[0:1]
2035 StringRef RegName = Token.getString();
2037 for (StringRef Reg : Registers) {
2038 if (RegName.startswith(Reg)) {
2039 if (Reg.size() < RegName.size()) {
2040 unsigned RegNum;
2041 // A single register with an index: rXX
2042 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2043 return true;
2044 } else {
2045 // A range of registers: r[XX:YY].
2046 if (NextToken.is(AsmToken::LBrac))
2047 return true;
2052 return getSpecialRegForName(RegName);
2055 bool
2056 AMDGPUAsmParser::isRegister()
2058 return isRegister(getToken(), peekToken());
2061 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2062 unsigned &RegNum, unsigned &RegWidth,
2063 unsigned *DwordRegIndex) {
2064 if (DwordRegIndex) { *DwordRegIndex = 0; }
2065 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2066 if (getLexer().is(AsmToken::Identifier)) {
2067 StringRef RegName = Parser.getTok().getString();
2068 if ((Reg = getSpecialRegForName(RegName))) {
2069 Parser.Lex();
2070 RegKind = IS_SPECIAL;
2071 } else {
2072 unsigned RegNumIndex = 0;
2073 if (RegName[0] == 'v') {
2074 RegNumIndex = 1;
2075 RegKind = IS_VGPR;
2076 } else if (RegName[0] == 's') {
2077 RegNumIndex = 1;
2078 RegKind = IS_SGPR;
2079 } else if (RegName[0] == 'a') {
2080 RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2081 RegKind = IS_AGPR;
2082 } else if (RegName.startswith("ttmp")) {
2083 RegNumIndex = strlen("ttmp");
2084 RegKind = IS_TTMP;
2085 } else {
2086 return false;
2088 if (RegName.size() > RegNumIndex) {
2089 // Single 32-bit register: vXX.
2090 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2091 return false;
2092 Parser.Lex();
2093 RegWidth = 1;
2094 } else {
2095 // Range of registers: v[XX:YY]. ":YY" is optional.
2096 Parser.Lex();
2097 int64_t RegLo, RegHi;
2098 if (getLexer().isNot(AsmToken::LBrac))
2099 return false;
2100 Parser.Lex();
2102 if (getParser().parseAbsoluteExpression(RegLo))
2103 return false;
2105 const bool isRBrace = getLexer().is(AsmToken::RBrac);
2106 if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2107 return false;
2108 Parser.Lex();
2110 if (isRBrace) {
2111 RegHi = RegLo;
2112 } else {
2113 if (getParser().parseAbsoluteExpression(RegHi))
2114 return false;
2116 if (getLexer().isNot(AsmToken::RBrac))
2117 return false;
2118 Parser.Lex();
2120 RegNum = (unsigned) RegLo;
2121 RegWidth = (RegHi - RegLo) + 1;
2124 } else if (getLexer().is(AsmToken::LBrac)) {
2125 // List of consecutive registers: [s0,s1,s2,s3]
2126 Parser.Lex();
2127 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2128 return false;
2129 if (RegWidth != 1)
2130 return false;
2131 RegisterKind RegKind1;
2132 unsigned Reg1, RegNum1, RegWidth1;
2133 do {
2134 if (getLexer().is(AsmToken::Comma)) {
2135 Parser.Lex();
2136 } else if (getLexer().is(AsmToken::RBrac)) {
2137 Parser.Lex();
2138 break;
2139 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2140 if (RegWidth1 != 1) {
2141 return false;
2143 if (RegKind1 != RegKind) {
2144 return false;
2146 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2147 return false;
2149 } else {
2150 return false;
2152 } while (true);
2153 } else {
2154 return false;
2156 switch (RegKind) {
2157 case IS_SPECIAL:
2158 RegNum = 0;
2159 RegWidth = 1;
2160 break;
2161 case IS_VGPR:
2162 case IS_SGPR:
2163 case IS_AGPR:
2164 case IS_TTMP:
2166 unsigned Size = 1;
2167 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2168 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2169 Size = std::min(RegWidth, 4u);
2171 if (RegNum % Size != 0)
2172 return false;
2173 if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2174 RegNum = RegNum / Size;
2175 int RCID = getRegClass(RegKind, RegWidth);
2176 if (RCID == -1)
2177 return false;
2178 const MCRegisterClass RC = TRI->getRegClass(RCID);
2179 if (RegNum >= RC.getNumRegs())
2180 return false;
2181 Reg = RC.getRegister(RegNum);
2182 break;
2185 default:
2186 llvm_unreachable("unexpected register kind");
2189 if (!subtargetHasRegister(*TRI, Reg))
2190 return false;
2191 return true;
2194 Optional<StringRef>
2195 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2196 switch (RegKind) {
2197 case IS_VGPR:
2198 return StringRef(".amdgcn.next_free_vgpr");
2199 case IS_SGPR:
2200 return StringRef(".amdgcn.next_free_sgpr");
2201 default:
2202 return None;
2206 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2207 auto SymbolName = getGprCountSymbolName(RegKind);
2208 assert(SymbolName && "initializing invalid register kind");
2209 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2210 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2213 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2214 unsigned DwordRegIndex,
2215 unsigned RegWidth) {
2216 // Symbols are only defined for GCN targets
2217 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2218 return true;
2220 auto SymbolName = getGprCountSymbolName(RegKind);
2221 if (!SymbolName)
2222 return true;
2223 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2225 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2226 int64_t OldCount;
2228 if (!Sym->isVariable())
2229 return !Error(getParser().getTok().getLoc(),
2230 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2231 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2232 return !Error(
2233 getParser().getTok().getLoc(),
2234 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2236 if (OldCount <= NewMax)
2237 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2239 return true;
2242 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2243 const auto &Tok = Parser.getTok();
2244 SMLoc StartLoc = Tok.getLoc();
2245 SMLoc EndLoc = Tok.getEndLoc();
2246 RegisterKind RegKind;
2247 unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2249 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2250 //FIXME: improve error messages (bug 41303).
2251 Error(StartLoc, "not a valid operand.");
2252 return nullptr;
2254 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2255 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2256 return nullptr;
2257 } else
2258 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2259 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2262 OperandMatchResultTy
2263 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2264 // TODO: add syntactic sugar for 1/(2*PI)
2266 assert(!isRegister());
2267 assert(!isModifier());
2269 const auto& Tok = getToken();
2270 const auto& NextTok = peekToken();
2271 bool IsReal = Tok.is(AsmToken::Real);
2272 SMLoc S = getLoc();
2273 bool Negate = false;
2275 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2276 lex();
2277 IsReal = true;
2278 Negate = true;
2281 if (IsReal) {
2282 // Floating-point expressions are not supported.
2283 // Can only allow floating-point literals with an
2284 // optional sign.
2286 StringRef Num = getTokenStr();
2287 lex();
2289 APFloat RealVal(APFloat::IEEEdouble());
2290 auto roundMode = APFloat::rmNearestTiesToEven;
2291 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2292 return MatchOperand_ParseFail;
2294 if (Negate)
2295 RealVal.changeSign();
2297 Operands.push_back(
2298 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2299 AMDGPUOperand::ImmTyNone, true));
2301 return MatchOperand_Success;
2303 } else {
2304 int64_t IntVal;
2305 const MCExpr *Expr;
2306 SMLoc S = getLoc();
2308 if (HasSP3AbsModifier) {
2309 // This is a workaround for handling expressions
2310 // as arguments of SP3 'abs' modifier, for example:
2311 // |1.0|
2312 // |-1|
2313 // |1+x|
2314 // This syntax is not compatible with syntax of standard
2315 // MC expressions (due to the trailing '|').
2316 SMLoc EndLoc;
2317 if (getParser().parsePrimaryExpr(Expr, EndLoc))
2318 return MatchOperand_ParseFail;
2319 } else {
2320 if (Parser.parseExpression(Expr))
2321 return MatchOperand_ParseFail;
2324 if (Expr->evaluateAsAbsolute(IntVal)) {
2325 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2326 } else {
2327 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2330 return MatchOperand_Success;
2333 return MatchOperand_NoMatch;
2336 OperandMatchResultTy
2337 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2338 if (!isRegister())
2339 return MatchOperand_NoMatch;
2341 if (auto R = parseRegister()) {
2342 assert(R->isReg());
2343 Operands.push_back(std::move(R));
2344 return MatchOperand_Success;
2346 return MatchOperand_ParseFail;
2349 OperandMatchResultTy
2350 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2351 auto res = parseReg(Operands);
2352 if (res != MatchOperand_NoMatch) {
2353 return res;
2354 } else if (isModifier()) {
2355 return MatchOperand_NoMatch;
2356 } else {
2357 return parseImm(Operands, HasSP3AbsMod);
2361 bool
2362 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2363 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2364 const auto &str = Token.getString();
2365 return str == "abs" || str == "neg" || str == "sext";
2367 return false;
2370 bool
2371 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2372 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2375 bool
2376 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2377 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2380 bool
2381 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2382 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2385 // Check if this is an operand modifier or an opcode modifier
2386 // which may look like an expression but it is not. We should
2387 // avoid parsing these modifiers as expressions. Currently
2388 // recognized sequences are:
2389 // |...|
2390 // abs(...)
2391 // neg(...)
2392 // sext(...)
2393 // -reg
2394 // -|...|
2395 // -abs(...)
2396 // name:...
2397 // Note that simple opcode modifiers like 'gds' may be parsed as
2398 // expressions; this is a special case. See getExpressionAsToken.
2400 bool
2401 AMDGPUAsmParser::isModifier() {
2403 AsmToken Tok = getToken();
2404 AsmToken NextToken[2];
2405 peekTokens(NextToken);
2407 return isOperandModifier(Tok, NextToken[0]) ||
2408 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2409 isOpcodeModifierWithVal(Tok, NextToken[0]);
2412 // Check if the current token is an SP3 'neg' modifier.
2413 // Currently this modifier is allowed in the following context:
2415 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2416 // 2. Before an 'abs' modifier: -abs(...)
2417 // 3. Before an SP3 'abs' modifier: -|...|
2419 // In all other cases "-" is handled as a part
2420 // of an expression that follows the sign.
2422 // Note: When "-" is followed by an integer literal,
2423 // this is interpreted as integer negation rather
2424 // than a floating-point NEG modifier applied to N.
2425 // Beside being contr-intuitive, such use of floating-point
2426 // NEG modifier would have resulted in different meaning
2427 // of integer literals used with VOP1/2/C and VOP3,
2428 // for example:
2429 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2430 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2431 // Negative fp literals with preceding "-" are
2432 // handled likewise for unifomtity
2434 bool
2435 AMDGPUAsmParser::parseSP3NegModifier() {
2437 AsmToken NextToken[2];
2438 peekTokens(NextToken);
2440 if (isToken(AsmToken::Minus) &&
2441 (isRegister(NextToken[0], NextToken[1]) ||
2442 NextToken[0].is(AsmToken::Pipe) ||
2443 isId(NextToken[0], "abs"))) {
2444 lex();
2445 return true;
2448 return false;
2451 OperandMatchResultTy
2452 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2453 bool AllowImm) {
2454 bool Neg, SP3Neg;
2455 bool Abs, SP3Abs;
2456 SMLoc Loc;
2458 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2459 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2460 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2461 return MatchOperand_ParseFail;
2464 SP3Neg = parseSP3NegModifier();
2466 Loc = getLoc();
2467 Neg = trySkipId("neg");
2468 if (Neg && SP3Neg) {
2469 Error(Loc, "expected register or immediate");
2470 return MatchOperand_ParseFail;
2472 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2473 return MatchOperand_ParseFail;
2475 Abs = trySkipId("abs");
2476 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2477 return MatchOperand_ParseFail;
2479 Loc = getLoc();
2480 SP3Abs = trySkipToken(AsmToken::Pipe);
2481 if (Abs && SP3Abs) {
2482 Error(Loc, "expected register or immediate");
2483 return MatchOperand_ParseFail;
2486 OperandMatchResultTy Res;
2487 if (AllowImm) {
2488 Res = parseRegOrImm(Operands, SP3Abs);
2489 } else {
2490 Res = parseReg(Operands);
2492 if (Res != MatchOperand_Success) {
2493 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2496 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2497 return MatchOperand_ParseFail;
2498 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2499 return MatchOperand_ParseFail;
2500 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2501 return MatchOperand_ParseFail;
2503 AMDGPUOperand::Modifiers Mods;
2504 Mods.Abs = Abs || SP3Abs;
2505 Mods.Neg = Neg || SP3Neg;
2507 if (Mods.hasFPModifiers()) {
2508 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2509 if (Op.isExpr()) {
2510 Error(Op.getStartLoc(), "expected an absolute expression");
2511 return MatchOperand_ParseFail;
2513 Op.setModifiers(Mods);
2515 return MatchOperand_Success;
2518 OperandMatchResultTy
2519 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2520 bool AllowImm) {
2521 bool Sext = trySkipId("sext");
2522 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2523 return MatchOperand_ParseFail;
2525 OperandMatchResultTy Res;
2526 if (AllowImm) {
2527 Res = parseRegOrImm(Operands);
2528 } else {
2529 Res = parseReg(Operands);
2531 if (Res != MatchOperand_Success) {
2532 return Sext? MatchOperand_ParseFail : Res;
2535 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2536 return MatchOperand_ParseFail;
2538 AMDGPUOperand::Modifiers Mods;
2539 Mods.Sext = Sext;
2541 if (Mods.hasIntModifiers()) {
2542 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2543 if (Op.isExpr()) {
2544 Error(Op.getStartLoc(), "expected an absolute expression");
2545 return MatchOperand_ParseFail;
2547 Op.setModifiers(Mods);
2550 return MatchOperand_Success;
2553 OperandMatchResultTy
2554 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2555 return parseRegOrImmWithFPInputMods(Operands, false);
2558 OperandMatchResultTy
2559 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2560 return parseRegOrImmWithIntInputMods(Operands, false);
2563 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2564 auto Loc = getLoc();
2565 if (trySkipId("off")) {
2566 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2567 AMDGPUOperand::ImmTyOff, false));
2568 return MatchOperand_Success;
2571 if (!isRegister())
2572 return MatchOperand_NoMatch;
2574 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2575 if (Reg) {
2576 Operands.push_back(std::move(Reg));
2577 return MatchOperand_Success;
2580 return MatchOperand_ParseFail;
2584 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2585 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2587 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2588 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2589 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2590 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2591 return Match_InvalidOperand;
2593 if ((TSFlags & SIInstrFlags::VOP3) &&
2594 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2595 getForcedEncodingSize() != 64)
2596 return Match_PreferE32;
2598 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2599 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2600 // v_mac_f32/16 allow only dst_sel == DWORD;
2601 auto OpNum =
2602 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2603 const auto &Op = Inst.getOperand(OpNum);
2604 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2605 return Match_InvalidOperand;
2609 return Match_Success;
2612 // What asm variants we should check
2613 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2614 if (getForcedEncodingSize() == 32) {
2615 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2616 return makeArrayRef(Variants);
2619 if (isForcedVOP3()) {
2620 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2621 return makeArrayRef(Variants);
2624 if (isForcedSDWA()) {
2625 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2626 AMDGPUAsmVariants::SDWA9};
2627 return makeArrayRef(Variants);
2630 if (isForcedDPP()) {
2631 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2632 return makeArrayRef(Variants);
2635 static const unsigned Variants[] = {
2636 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2637 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2640 return makeArrayRef(Variants);
2643 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2644 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2645 const unsigned Num = Desc.getNumImplicitUses();
2646 for (unsigned i = 0; i < Num; ++i) {
2647 unsigned Reg = Desc.ImplicitUses[i];
2648 switch (Reg) {
2649 case AMDGPU::FLAT_SCR:
2650 case AMDGPU::VCC:
2651 case AMDGPU::VCC_LO:
2652 case AMDGPU::VCC_HI:
2653 case AMDGPU::M0:
2654 return Reg;
2655 default:
2656 break;
2659 return AMDGPU::NoRegister;
2662 // NB: This code is correct only when used to check constant
2663 // bus limitations because GFX7 support no f16 inline constants.
2664 // Note that there are no cases when a GFX7 opcode violates
2665 // constant bus limitations due to the use of an f16 constant.
2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2667 unsigned OpIdx) const {
2668 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2670 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2671 return false;
2674 const MCOperand &MO = Inst.getOperand(OpIdx);
2676 int64_t Val = MO.getImm();
2677 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2679 switch (OpSize) { // expected operand size
2680 case 8:
2681 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2682 case 4:
2683 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2684 case 2: {
2685 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2686 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2687 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2688 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2689 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2690 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2691 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2692 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2693 } else {
2694 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2697 default:
2698 llvm_unreachable("invalid operand size");
2702 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2703 if (!isGFX10())
2704 return 1;
2706 switch (Opcode) {
2707 // 64-bit shift instructions can use only one scalar value input
2708 case AMDGPU::V_LSHLREV_B64:
2709 case AMDGPU::V_LSHLREV_B64_gfx10:
2710 case AMDGPU::V_LSHL_B64:
2711 case AMDGPU::V_LSHRREV_B64:
2712 case AMDGPU::V_LSHRREV_B64_gfx10:
2713 case AMDGPU::V_LSHR_B64:
2714 case AMDGPU::V_ASHRREV_I64:
2715 case AMDGPU::V_ASHRREV_I64_gfx10:
2716 case AMDGPU::V_ASHR_I64:
2717 return 1;
2718 default:
2719 return 2;
2723 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2724 const MCOperand &MO = Inst.getOperand(OpIdx);
2725 if (MO.isImm()) {
2726 return !isInlineConstant(Inst, OpIdx);
2727 } else if (MO.isReg()) {
2728 auto Reg = MO.getReg();
2729 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2730 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2731 } else {
2732 return true;
2736 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2737 const unsigned Opcode = Inst.getOpcode();
2738 const MCInstrDesc &Desc = MII.get(Opcode);
2739 unsigned ConstantBusUseCount = 0;
2740 unsigned NumLiterals = 0;
2741 unsigned LiteralSize;
2743 if (Desc.TSFlags &
2744 (SIInstrFlags::VOPC |
2745 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2746 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2747 SIInstrFlags::SDWA)) {
2748 // Check special imm operands (used by madmk, etc)
2749 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2750 ++ConstantBusUseCount;
2753 SmallDenseSet<unsigned> SGPRsUsed;
2754 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2755 if (SGPRUsed != AMDGPU::NoRegister) {
2756 SGPRsUsed.insert(SGPRUsed);
2757 ++ConstantBusUseCount;
2760 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2761 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2762 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2764 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2766 for (int OpIdx : OpIndices) {
2767 if (OpIdx == -1) break;
2769 const MCOperand &MO = Inst.getOperand(OpIdx);
2770 if (usesConstantBus(Inst, OpIdx)) {
2771 if (MO.isReg()) {
2772 const unsigned Reg = mc2PseudoReg(MO.getReg());
2773 // Pairs of registers with a partial intersections like these
2774 // s0, s[0:1]
2775 // flat_scratch_lo, flat_scratch
2776 // flat_scratch_lo, flat_scratch_hi
2777 // are theoretically valid but they are disabled anyway.
2778 // Note that this code mimics SIInstrInfo::verifyInstruction
2779 if (!SGPRsUsed.count(Reg)) {
2780 SGPRsUsed.insert(Reg);
2781 ++ConstantBusUseCount;
2783 } else { // Expression or a literal
2785 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2786 continue; // special operand like VINTERP attr_chan
2788 // An instruction may use only one literal.
2789 // This has been validated on the previous step.
2790 // See validateVOP3Literal.
2791 // This literal may be used as more than one operand.
2792 // If all these operands are of the same size,
2793 // this literal counts as one scalar value.
2794 // Otherwise it counts as 2 scalar values.
2795 // See "GFX10 Shader Programming", section 3.6.2.3.
2797 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2798 if (Size < 4) Size = 4;
2800 if (NumLiterals == 0) {
2801 NumLiterals = 1;
2802 LiteralSize = Size;
2803 } else if (LiteralSize != Size) {
2804 NumLiterals = 2;
2810 ConstantBusUseCount += NumLiterals;
2812 return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2815 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2816 const unsigned Opcode = Inst.getOpcode();
2817 const MCInstrDesc &Desc = MII.get(Opcode);
2819 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2820 if (DstIdx == -1 ||
2821 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2822 return true;
2825 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2827 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2828 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2829 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2831 assert(DstIdx != -1);
2832 const MCOperand &Dst = Inst.getOperand(DstIdx);
2833 assert(Dst.isReg());
2834 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2836 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2838 for (int SrcIdx : SrcIndices) {
2839 if (SrcIdx == -1) break;
2840 const MCOperand &Src = Inst.getOperand(SrcIdx);
2841 if (Src.isReg()) {
2842 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2843 if (isRegIntersect(DstReg, SrcReg, TRI)) {
2844 return false;
2849 return true;
2852 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2854 const unsigned Opc = Inst.getOpcode();
2855 const MCInstrDesc &Desc = MII.get(Opc);
2857 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2858 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2859 assert(ClampIdx != -1);
2860 return Inst.getOperand(ClampIdx).getImm() == 0;
2863 return true;
2866 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2868 const unsigned Opc = Inst.getOpcode();
2869 const MCInstrDesc &Desc = MII.get(Opc);
2871 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2872 return true;
2874 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2875 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2876 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2878 assert(VDataIdx != -1);
2879 assert(DMaskIdx != -1);
2880 assert(TFEIdx != -1);
2882 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2883 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2884 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2885 if (DMask == 0)
2886 DMask = 1;
2888 unsigned DataSize =
2889 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2890 if (hasPackedD16()) {
2891 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2892 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2893 DataSize = (DataSize + 1) / 2;
2896 return (VDataSize / 4) == DataSize + TFESize;
2899 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2900 const unsigned Opc = Inst.getOpcode();
2901 const MCInstrDesc &Desc = MII.get(Opc);
2903 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2904 return true;
2906 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2907 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2908 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2909 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2910 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2911 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2913 assert(VAddr0Idx != -1);
2914 assert(SrsrcIdx != -1);
2915 assert(DimIdx != -1);
2916 assert(SrsrcIdx > VAddr0Idx);
2918 unsigned Dim = Inst.getOperand(DimIdx).getImm();
2919 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2920 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2921 unsigned VAddrSize =
2922 IsNSA ? SrsrcIdx - VAddr0Idx
2923 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2925 unsigned AddrSize = BaseOpcode->NumExtraArgs +
2926 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2927 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2928 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2929 if (!IsNSA) {
2930 if (AddrSize > 8)
2931 AddrSize = 16;
2932 else if (AddrSize > 4)
2933 AddrSize = 8;
2936 return VAddrSize == AddrSize;
2939 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2941 const unsigned Opc = Inst.getOpcode();
2942 const MCInstrDesc &Desc = MII.get(Opc);
2944 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2945 return true;
2946 if (!Desc.mayLoad() || !Desc.mayStore())
2947 return true; // Not atomic
2949 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2952 // This is an incomplete check because image_atomic_cmpswap
2953 // may only use 0x3 and 0xf while other atomic operations
2954 // may use 0x1 and 0x3. However these limitations are
2955 // verified when we check that dmask matches dst size.
2956 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2959 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2961 const unsigned Opc = Inst.getOpcode();
2962 const MCInstrDesc &Desc = MII.get(Opc);
2964 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2965 return true;
2967 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2968 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2970 // GATHER4 instructions use dmask in a different fashion compared to
2971 // other MIMG instructions. The only useful DMASK values are
2972 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2973 // (red,red,red,red) etc.) The ISA document doesn't mention
2974 // this.
2975 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2978 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2980 const unsigned Opc = Inst.getOpcode();
2981 const MCInstrDesc &Desc = MII.get(Opc);
2983 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2984 return true;
2986 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2987 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2988 if (isCI() || isSI())
2989 return false;
2992 return true;
2995 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2996 const unsigned Opc = Inst.getOpcode();
2997 const MCInstrDesc &Desc = MII.get(Opc);
2999 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3000 return true;
3002 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3003 if (DimIdx < 0)
3004 return true;
3006 long Imm = Inst.getOperand(DimIdx).getImm();
3007 if (Imm < 0 || Imm >= 8)
3008 return false;
3010 return true;
3013 static bool IsRevOpcode(const unsigned Opcode)
3015 switch (Opcode) {
3016 case AMDGPU::V_SUBREV_F32_e32:
3017 case AMDGPU::V_SUBREV_F32_e64:
3018 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3019 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3020 case AMDGPU::V_SUBREV_F32_e32_vi:
3021 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3022 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3023 case AMDGPU::V_SUBREV_F32_e64_vi:
3025 case AMDGPU::V_SUBREV_I32_e32:
3026 case AMDGPU::V_SUBREV_I32_e64:
3027 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3028 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3030 case AMDGPU::V_SUBBREV_U32_e32:
3031 case AMDGPU::V_SUBBREV_U32_e64:
3032 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3033 case AMDGPU::V_SUBBREV_U32_e32_vi:
3034 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3035 case AMDGPU::V_SUBBREV_U32_e64_vi:
3037 case AMDGPU::V_SUBREV_U32_e32:
3038 case AMDGPU::V_SUBREV_U32_e64:
3039 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3040 case AMDGPU::V_SUBREV_U32_e32_vi:
3041 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3042 case AMDGPU::V_SUBREV_U32_e64_vi:
3044 case AMDGPU::V_SUBREV_F16_e32:
3045 case AMDGPU::V_SUBREV_F16_e64:
3046 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3047 case AMDGPU::V_SUBREV_F16_e32_vi:
3048 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3049 case AMDGPU::V_SUBREV_F16_e64_vi:
3051 case AMDGPU::V_SUBREV_U16_e32:
3052 case AMDGPU::V_SUBREV_U16_e64:
3053 case AMDGPU::V_SUBREV_U16_e32_vi:
3054 case AMDGPU::V_SUBREV_U16_e64_vi:
3056 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3057 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3058 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3060 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3061 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3063 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3064 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3066 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3067 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3069 case AMDGPU::V_LSHRREV_B32_e32:
3070 case AMDGPU::V_LSHRREV_B32_e64:
3071 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3072 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3073 case AMDGPU::V_LSHRREV_B32_e32_vi:
3074 case AMDGPU::V_LSHRREV_B32_e64_vi:
3075 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3076 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3078 case AMDGPU::V_ASHRREV_I32_e32:
3079 case AMDGPU::V_ASHRREV_I32_e64:
3080 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3081 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3082 case AMDGPU::V_ASHRREV_I32_e32_vi:
3083 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3084 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3085 case AMDGPU::V_ASHRREV_I32_e64_vi:
3087 case AMDGPU::V_LSHLREV_B32_e32:
3088 case AMDGPU::V_LSHLREV_B32_e64:
3089 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3090 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3091 case AMDGPU::V_LSHLREV_B32_e32_vi:
3092 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3093 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3094 case AMDGPU::V_LSHLREV_B32_e64_vi:
3096 case AMDGPU::V_LSHLREV_B16_e32:
3097 case AMDGPU::V_LSHLREV_B16_e64:
3098 case AMDGPU::V_LSHLREV_B16_e32_vi:
3099 case AMDGPU::V_LSHLREV_B16_e64_vi:
3100 case AMDGPU::V_LSHLREV_B16_gfx10:
3102 case AMDGPU::V_LSHRREV_B16_e32:
3103 case AMDGPU::V_LSHRREV_B16_e64:
3104 case AMDGPU::V_LSHRREV_B16_e32_vi:
3105 case AMDGPU::V_LSHRREV_B16_e64_vi:
3106 case AMDGPU::V_LSHRREV_B16_gfx10:
3108 case AMDGPU::V_ASHRREV_I16_e32:
3109 case AMDGPU::V_ASHRREV_I16_e64:
3110 case AMDGPU::V_ASHRREV_I16_e32_vi:
3111 case AMDGPU::V_ASHRREV_I16_e64_vi:
3112 case AMDGPU::V_ASHRREV_I16_gfx10:
3114 case AMDGPU::V_LSHLREV_B64:
3115 case AMDGPU::V_LSHLREV_B64_gfx10:
3116 case AMDGPU::V_LSHLREV_B64_vi:
3118 case AMDGPU::V_LSHRREV_B64:
3119 case AMDGPU::V_LSHRREV_B64_gfx10:
3120 case AMDGPU::V_LSHRREV_B64_vi:
3122 case AMDGPU::V_ASHRREV_I64:
3123 case AMDGPU::V_ASHRREV_I64_gfx10:
3124 case AMDGPU::V_ASHRREV_I64_vi:
3126 case AMDGPU::V_PK_LSHLREV_B16:
3127 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3128 case AMDGPU::V_PK_LSHLREV_B16_vi:
3130 case AMDGPU::V_PK_LSHRREV_B16:
3131 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3132 case AMDGPU::V_PK_LSHRREV_B16_vi:
3133 case AMDGPU::V_PK_ASHRREV_I16:
3134 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3135 case AMDGPU::V_PK_ASHRREV_I16_vi:
3136 return true;
3137 default:
3138 return false;
3142 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3144 using namespace SIInstrFlags;
3145 const unsigned Opcode = Inst.getOpcode();
3146 const MCInstrDesc &Desc = MII.get(Opcode);
3148 // lds_direct register is defined so that it can be used
3149 // with 9-bit operands only. Ignore encodings which do not accept these.
3150 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3151 return true;
3153 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3154 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3155 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3157 const int SrcIndices[] = { Src1Idx, Src2Idx };
3159 // lds_direct cannot be specified as either src1 or src2.
3160 for (int SrcIdx : SrcIndices) {
3161 if (SrcIdx == -1) break;
3162 const MCOperand &Src = Inst.getOperand(SrcIdx);
3163 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3164 return false;
3168 if (Src0Idx == -1)
3169 return true;
3171 const MCOperand &Src = Inst.getOperand(Src0Idx);
3172 if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3173 return true;
3175 // lds_direct is specified as src0. Check additional limitations.
3176 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3179 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3180 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3181 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3182 if (Op.isFlatOffset())
3183 return Op.getStartLoc();
3185 return getLoc();
3188 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3189 const OperandVector &Operands) {
3190 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3191 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3192 return true;
3194 auto Opcode = Inst.getOpcode();
3195 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3196 assert(OpNum != -1);
3198 const auto &Op = Inst.getOperand(OpNum);
3199 if (!hasFlatOffsets() && Op.getImm() != 0) {
3200 Error(getFlatOffsetLoc(Operands),
3201 "flat offset modifier is not supported on this GPU");
3202 return false;
3205 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3206 // For FLAT segment the offset must be positive;
3207 // MSB is ignored and forced to zero.
3208 unsigned OffsetSize = isGFX9() ? 13 : 12;
3209 if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3210 if (!isIntN(OffsetSize, Op.getImm())) {
3211 Error(getFlatOffsetLoc(Operands),
3212 isGFX9() ? "expected a 13-bit signed offset" :
3213 "expected a 12-bit signed offset");
3214 return false;
3216 } else {
3217 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3218 Error(getFlatOffsetLoc(Operands),
3219 isGFX9() ? "expected a 12-bit unsigned offset" :
3220 "expected an 11-bit unsigned offset");
3221 return false;
3225 return true;
3228 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3229 unsigned Opcode = Inst.getOpcode();
3230 const MCInstrDesc &Desc = MII.get(Opcode);
3231 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3232 return true;
3234 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3235 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3237 const int OpIndices[] = { Src0Idx, Src1Idx };
3239 unsigned NumLiterals = 0;
3240 uint32_t LiteralValue;
3242 for (int OpIdx : OpIndices) {
3243 if (OpIdx == -1) break;
3245 const MCOperand &MO = Inst.getOperand(OpIdx);
3246 if (MO.isImm() &&
3247 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3248 AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3249 !isInlineConstant(Inst, OpIdx)) {
3250 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3251 if (NumLiterals == 0 || LiteralValue != Value) {
3252 LiteralValue = Value;
3253 ++NumLiterals;
3258 return NumLiterals <= 1;
3261 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3262 const unsigned Opc = Inst.getOpcode();
3263 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3264 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3265 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3266 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3268 if (OpSel & ~3)
3269 return false;
3271 return true;
3274 // Check if VCC register matches wavefront size
3275 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3276 auto FB = getFeatureBits();
3277 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3278 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3281 // VOP3 literal is only allowed in GFX10+ and only one can be used
3282 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3283 unsigned Opcode = Inst.getOpcode();
3284 const MCInstrDesc &Desc = MII.get(Opcode);
3285 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3286 return true;
3288 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3289 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3290 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3292 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3294 unsigned NumLiterals = 0;
3295 uint32_t LiteralValue;
3297 for (int OpIdx : OpIndices) {
3298 if (OpIdx == -1) break;
3300 const MCOperand &MO = Inst.getOperand(OpIdx);
3301 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3302 continue;
3304 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3305 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3306 return false;
3308 if (!isInlineConstant(Inst, OpIdx)) {
3309 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3310 if (NumLiterals == 0 || LiteralValue != Value) {
3311 LiteralValue = Value;
3312 ++NumLiterals;
3317 return !NumLiterals ||
3318 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3321 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3322 const SMLoc &IDLoc,
3323 const OperandVector &Operands) {
3324 if (!validateLdsDirect(Inst)) {
3325 Error(IDLoc,
3326 "invalid use of lds_direct");
3327 return false;
3329 if (!validateSOPLiteral(Inst)) {
3330 Error(IDLoc,
3331 "only one literal operand is allowed");
3332 return false;
3334 if (!validateVOP3Literal(Inst)) {
3335 Error(IDLoc,
3336 "invalid literal operand");
3337 return false;
3339 if (!validateConstantBusLimitations(Inst)) {
3340 Error(IDLoc,
3341 "invalid operand (violates constant bus restrictions)");
3342 return false;
3344 if (!validateEarlyClobberLimitations(Inst)) {
3345 Error(IDLoc,
3346 "destination must be different than all sources");
3347 return false;
3349 if (!validateIntClampSupported(Inst)) {
3350 Error(IDLoc,
3351 "integer clamping is not supported on this GPU");
3352 return false;
3354 if (!validateOpSel(Inst)) {
3355 Error(IDLoc,
3356 "invalid op_sel operand");
3357 return false;
3359 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3360 if (!validateMIMGD16(Inst)) {
3361 Error(IDLoc,
3362 "d16 modifier is not supported on this GPU");
3363 return false;
3365 if (!validateMIMGDim(Inst)) {
3366 Error(IDLoc, "dim modifier is required on this GPU");
3367 return false;
3369 if (!validateMIMGDataSize(Inst)) {
3370 Error(IDLoc,
3371 "image data size does not match dmask and tfe");
3372 return false;
3374 if (!validateMIMGAddrSize(Inst)) {
3375 Error(IDLoc,
3376 "image address size does not match dim and a16");
3377 return false;
3379 if (!validateMIMGAtomicDMask(Inst)) {
3380 Error(IDLoc,
3381 "invalid atomic image dmask");
3382 return false;
3384 if (!validateMIMGGatherDMask(Inst)) {
3385 Error(IDLoc,
3386 "invalid image_gather dmask: only one bit must be set");
3387 return false;
3389 if (!validateFlatOffset(Inst, Operands)) {
3390 return false;
3393 return true;
3396 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3397 const FeatureBitset &FBS,
3398 unsigned VariantID = 0);
3400 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3401 OperandVector &Operands,
3402 MCStreamer &Out,
3403 uint64_t &ErrorInfo,
3404 bool MatchingInlineAsm) {
3405 MCInst Inst;
3406 unsigned Result = Match_Success;
3407 for (auto Variant : getMatchedVariants()) {
3408 uint64_t EI;
3409 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3410 Variant);
3411 // We order match statuses from least to most specific. We use most specific
3412 // status as resulting
3413 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3414 if ((R == Match_Success) ||
3415 (R == Match_PreferE32) ||
3416 (R == Match_MissingFeature && Result != Match_PreferE32) ||
3417 (R == Match_InvalidOperand && Result != Match_MissingFeature
3418 && Result != Match_PreferE32) ||
3419 (R == Match_MnemonicFail && Result != Match_InvalidOperand
3420 && Result != Match_MissingFeature
3421 && Result != Match_PreferE32)) {
3422 Result = R;
3423 ErrorInfo = EI;
3425 if (R == Match_Success)
3426 break;
3429 switch (Result) {
3430 default: break;
3431 case Match_Success:
3432 if (!validateInstruction(Inst, IDLoc, Operands)) {
3433 return true;
3435 Inst.setLoc(IDLoc);
3436 Out.EmitInstruction(Inst, getSTI());
3437 return false;
3439 case Match_MissingFeature:
3440 return Error(IDLoc, "instruction not supported on this GPU");
3442 case Match_MnemonicFail: {
3443 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3444 std::string Suggestion = AMDGPUMnemonicSpellCheck(
3445 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3446 return Error(IDLoc, "invalid instruction" + Suggestion,
3447 ((AMDGPUOperand &)*Operands[0]).getLocRange());
3450 case Match_InvalidOperand: {
3451 SMLoc ErrorLoc = IDLoc;
3452 if (ErrorInfo != ~0ULL) {
3453 if (ErrorInfo >= Operands.size()) {
3454 return Error(IDLoc, "too few operands for instruction");
3456 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3457 if (ErrorLoc == SMLoc())
3458 ErrorLoc = IDLoc;
3460 return Error(ErrorLoc, "invalid operand for instruction");
3463 case Match_PreferE32:
3464 return Error(IDLoc, "internal error: instruction without _e64 suffix "
3465 "should be encoded as e32");
3467 llvm_unreachable("Implement any new match types added!");
3470 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3471 int64_t Tmp = -1;
3472 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3473 return true;
3475 if (getParser().parseAbsoluteExpression(Tmp)) {
3476 return true;
3478 Ret = static_cast<uint32_t>(Tmp);
3479 return false;
3482 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3483 uint32_t &Minor) {
3484 if (ParseAsAbsoluteExpression(Major))
3485 return TokError("invalid major version");
3487 if (getLexer().isNot(AsmToken::Comma))
3488 return TokError("minor version number required, comma expected");
3489 Lex();
3491 if (ParseAsAbsoluteExpression(Minor))
3492 return TokError("invalid minor version");
3494 return false;
3497 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3498 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3499 return TokError("directive only supported for amdgcn architecture");
3501 std::string Target;
3503 SMLoc TargetStart = getTok().getLoc();
3504 if (getParser().parseEscapedString(Target))
3505 return true;
3506 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3508 std::string ExpectedTarget;
3509 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3510 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3512 if (Target != ExpectedTargetOS.str())
3513 return getParser().Error(TargetRange.Start, "target must match options",
3514 TargetRange);
3516 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3517 return false;
3520 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3521 return getParser().Error(Range.Start, "value out of range", Range);
3524 bool AMDGPUAsmParser::calculateGPRBlocks(
3525 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3526 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3527 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3528 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3529 // TODO(scott.linder): These calculations are duplicated from
3530 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3531 IsaVersion Version = getIsaVersion(getSTI().getCPU());
3533 unsigned NumVGPRs = NextFreeVGPR;
3534 unsigned NumSGPRs = NextFreeSGPR;
3536 if (Version.Major >= 10)
3537 NumSGPRs = 0;
3538 else {
3539 unsigned MaxAddressableNumSGPRs =
3540 IsaInfo::getAddressableNumSGPRs(&getSTI());
3542 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3543 NumSGPRs > MaxAddressableNumSGPRs)
3544 return OutOfRangeError(SGPRRange);
3546 NumSGPRs +=
3547 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3549 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3550 NumSGPRs > MaxAddressableNumSGPRs)
3551 return OutOfRangeError(SGPRRange);
3553 if (Features.test(FeatureSGPRInitBug))
3554 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3557 VGPRBlocks =
3558 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3559 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3561 return false;
3564 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3565 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3566 return TokError("directive only supported for amdgcn architecture");
3568 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3569 return TokError("directive only supported for amdhsa OS");
3571 StringRef KernelName;
3572 if (getParser().parseIdentifier(KernelName))
3573 return true;
3575 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3577 StringSet<> Seen;
3579 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3581 SMRange VGPRRange;
3582 uint64_t NextFreeVGPR = 0;
3583 SMRange SGPRRange;
3584 uint64_t NextFreeSGPR = 0;
3585 unsigned UserSGPRCount = 0;
3586 bool ReserveVCC = true;
3587 bool ReserveFlatScr = true;
3588 bool ReserveXNACK = hasXNACK();
3589 Optional<bool> EnableWavefrontSize32;
3591 while (true) {
3592 while (getLexer().is(AsmToken::EndOfStatement))
3593 Lex();
3595 if (getLexer().isNot(AsmToken::Identifier))
3596 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3598 StringRef ID = getTok().getIdentifier();
3599 SMRange IDRange = getTok().getLocRange();
3600 Lex();
3602 if (ID == ".end_amdhsa_kernel")
3603 break;
3605 if (Seen.find(ID) != Seen.end())
3606 return TokError(".amdhsa_ directives cannot be repeated");
3607 Seen.insert(ID);
3609 SMLoc ValStart = getTok().getLoc();
3610 int64_t IVal;
3611 if (getParser().parseAbsoluteExpression(IVal))
3612 return true;
3613 SMLoc ValEnd = getTok().getLoc();
3614 SMRange ValRange = SMRange(ValStart, ValEnd);
3616 if (IVal < 0)
3617 return OutOfRangeError(ValRange);
3619 uint64_t Val = IVal;
3621 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3622 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3623 return OutOfRangeError(RANGE); \
3624 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3626 if (ID == ".amdhsa_group_segment_fixed_size") {
3627 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3628 return OutOfRangeError(ValRange);
3629 KD.group_segment_fixed_size = Val;
3630 } else if (ID == ".amdhsa_private_segment_fixed_size") {
3631 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3632 return OutOfRangeError(ValRange);
3633 KD.private_segment_fixed_size = Val;
3634 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3635 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3636 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3637 Val, ValRange);
3638 if (Val)
3639 UserSGPRCount += 4;
3640 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3641 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3643 ValRange);
3644 if (Val)
3645 UserSGPRCount += 2;
3646 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3647 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3649 ValRange);
3650 if (Val)
3651 UserSGPRCount += 2;
3652 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3653 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3654 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3655 Val, ValRange);
3656 if (Val)
3657 UserSGPRCount += 2;
3658 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3659 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3660 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3661 ValRange);
3662 if (Val)
3663 UserSGPRCount += 2;
3664 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3665 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3666 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3667 ValRange);
3668 if (Val)
3669 UserSGPRCount += 2;
3670 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3671 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3672 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3673 Val, ValRange);
3674 if (Val)
3675 UserSGPRCount += 1;
3676 } else if (ID == ".amdhsa_wavefront_size32") {
3677 if (IVersion.Major < 10)
3678 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3679 IDRange);
3680 EnableWavefrontSize32 = Val;
3681 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3682 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3683 Val, ValRange);
3684 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3685 PARSE_BITS_ENTRY(
3686 KD.compute_pgm_rsrc2,
3687 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3688 ValRange);
3689 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3691 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3692 ValRange);
3693 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3694 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3695 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3696 ValRange);
3697 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3698 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3699 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3700 ValRange);
3701 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3702 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3703 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3704 ValRange);
3705 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3706 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3707 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3708 ValRange);
3709 } else if (ID == ".amdhsa_next_free_vgpr") {
3710 VGPRRange = ValRange;
3711 NextFreeVGPR = Val;
3712 } else if (ID == ".amdhsa_next_free_sgpr") {
3713 SGPRRange = ValRange;
3714 NextFreeSGPR = Val;
3715 } else if (ID == ".amdhsa_reserve_vcc") {
3716 if (!isUInt<1>(Val))
3717 return OutOfRangeError(ValRange);
3718 ReserveVCC = Val;
3719 } else if (ID == ".amdhsa_reserve_flat_scratch") {
3720 if (IVersion.Major < 7)
3721 return getParser().Error(IDRange.Start, "directive requires gfx7+",
3722 IDRange);
3723 if (!isUInt<1>(Val))
3724 return OutOfRangeError(ValRange);
3725 ReserveFlatScr = Val;
3726 } else if (ID == ".amdhsa_reserve_xnack_mask") {
3727 if (IVersion.Major < 8)
3728 return getParser().Error(IDRange.Start, "directive requires gfx8+",
3729 IDRange);
3730 if (!isUInt<1>(Val))
3731 return OutOfRangeError(ValRange);
3732 ReserveXNACK = Val;
3733 } else if (ID == ".amdhsa_float_round_mode_32") {
3734 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3735 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3736 } else if (ID == ".amdhsa_float_round_mode_16_64") {
3737 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3738 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3739 } else if (ID == ".amdhsa_float_denorm_mode_32") {
3740 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3741 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3742 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3744 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3745 ValRange);
3746 } else if (ID == ".amdhsa_dx10_clamp") {
3747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3748 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3749 } else if (ID == ".amdhsa_ieee_mode") {
3750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3751 Val, ValRange);
3752 } else if (ID == ".amdhsa_fp16_overflow") {
3753 if (IVersion.Major < 9)
3754 return getParser().Error(IDRange.Start, "directive requires gfx9+",
3755 IDRange);
3756 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3757 ValRange);
3758 } else if (ID == ".amdhsa_workgroup_processor_mode") {
3759 if (IVersion.Major < 10)
3760 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3761 IDRange);
3762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3763 ValRange);
3764 } else if (ID == ".amdhsa_memory_ordered") {
3765 if (IVersion.Major < 10)
3766 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3767 IDRange);
3768 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3769 ValRange);
3770 } else if (ID == ".amdhsa_forward_progress") {
3771 if (IVersion.Major < 10)
3772 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3773 IDRange);
3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3775 ValRange);
3776 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3777 PARSE_BITS_ENTRY(
3778 KD.compute_pgm_rsrc2,
3779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3780 ValRange);
3781 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3784 Val, ValRange);
3785 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3786 PARSE_BITS_ENTRY(
3787 KD.compute_pgm_rsrc2,
3788 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3789 ValRange);
3790 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3791 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3792 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3793 Val, ValRange);
3794 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3795 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3796 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3797 Val, ValRange);
3798 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3799 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3800 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3801 Val, ValRange);
3802 } else if (ID == ".amdhsa_exception_int_div_zero") {
3803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3804 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3805 Val, ValRange);
3806 } else {
3807 return getParser().Error(IDRange.Start,
3808 "unknown .amdhsa_kernel directive", IDRange);
3811 #undef PARSE_BITS_ENTRY
3814 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3815 return TokError(".amdhsa_next_free_vgpr directive is required");
3817 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3818 return TokError(".amdhsa_next_free_sgpr directive is required");
3820 unsigned VGPRBlocks;
3821 unsigned SGPRBlocks;
3822 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3823 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3824 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3825 SGPRBlocks))
3826 return true;
3828 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3829 VGPRBlocks))
3830 return OutOfRangeError(VGPRRange);
3831 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3832 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3834 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3835 SGPRBlocks))
3836 return OutOfRangeError(SGPRRange);
3837 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3838 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3839 SGPRBlocks);
3841 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3842 return TokError("too many user SGPRs enabled");
3843 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3844 UserSGPRCount);
3846 getTargetStreamer().EmitAmdhsaKernelDescriptor(
3847 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3848 ReserveFlatScr, ReserveXNACK);
3849 return false;
3852 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3853 uint32_t Major;
3854 uint32_t Minor;
3856 if (ParseDirectiveMajorMinor(Major, Minor))
3857 return true;
3859 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3860 return false;
3863 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3864 uint32_t Major;
3865 uint32_t Minor;
3866 uint32_t Stepping;
3867 StringRef VendorName;
3868 StringRef ArchName;
3870 // If this directive has no arguments, then use the ISA version for the
3871 // targeted GPU.
3872 if (getLexer().is(AsmToken::EndOfStatement)) {
3873 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3874 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3875 ISA.Stepping,
3876 "AMD", "AMDGPU");
3877 return false;
3880 if (ParseDirectiveMajorMinor(Major, Minor))
3881 return true;
3883 if (getLexer().isNot(AsmToken::Comma))
3884 return TokError("stepping version number required, comma expected");
3885 Lex();
3887 if (ParseAsAbsoluteExpression(Stepping))
3888 return TokError("invalid stepping version");
3890 if (getLexer().isNot(AsmToken::Comma))
3891 return TokError("vendor name required, comma expected");
3892 Lex();
3894 if (getLexer().isNot(AsmToken::String))
3895 return TokError("invalid vendor name");
3897 VendorName = getLexer().getTok().getStringContents();
3898 Lex();
3900 if (getLexer().isNot(AsmToken::Comma))
3901 return TokError("arch name required, comma expected");
3902 Lex();
3904 if (getLexer().isNot(AsmToken::String))
3905 return TokError("invalid arch name");
3907 ArchName = getLexer().getTok().getStringContents();
3908 Lex();
3910 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3911 VendorName, ArchName);
3912 return false;
3915 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3916 amd_kernel_code_t &Header) {
3917 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3918 // assembly for backwards compatibility.
3919 if (ID == "max_scratch_backing_memory_byte_size") {
3920 Parser.eatToEndOfStatement();
3921 return false;
3924 SmallString<40> ErrStr;
3925 raw_svector_ostream Err(ErrStr);
3926 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3927 return TokError(Err.str());
3929 Lex();
3931 if (ID == "enable_wavefront_size32") {
3932 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3933 if (!isGFX10())
3934 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3935 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3936 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3937 } else {
3938 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3939 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3943 if (ID == "wavefront_size") {
3944 if (Header.wavefront_size == 5) {
3945 if (!isGFX10())
3946 return TokError("wavefront_size=5 is only allowed on GFX10+");
3947 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3948 return TokError("wavefront_size=5 requires +WavefrontSize32");
3949 } else if (Header.wavefront_size == 6) {
3950 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3951 return TokError("wavefront_size=6 requires +WavefrontSize64");
3955 if (ID == "enable_wgp_mode") {
3956 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3957 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3960 if (ID == "enable_mem_ordered") {
3961 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3962 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3965 if (ID == "enable_fwd_progress") {
3966 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3967 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3970 return false;
3973 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3974 amd_kernel_code_t Header;
3975 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3977 while (true) {
3978 // Lex EndOfStatement. This is in a while loop, because lexing a comment
3979 // will set the current token to EndOfStatement.
3980 while(getLexer().is(AsmToken::EndOfStatement))
3981 Lex();
3983 if (getLexer().isNot(AsmToken::Identifier))
3984 return TokError("expected value identifier or .end_amd_kernel_code_t");
3986 StringRef ID = getLexer().getTok().getIdentifier();
3987 Lex();
3989 if (ID == ".end_amd_kernel_code_t")
3990 break;
3992 if (ParseAMDKernelCodeTValue(ID, Header))
3993 return true;
3996 getTargetStreamer().EmitAMDKernelCodeT(Header);
3998 return false;
4001 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4002 if (getLexer().isNot(AsmToken::Identifier))
4003 return TokError("expected symbol name");
4005 StringRef KernelName = Parser.getTok().getString();
4007 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4008 ELF::STT_AMDGPU_HSA_KERNEL);
4009 Lex();
4010 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4011 KernelScope.initialize(getContext());
4012 return false;
4015 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4016 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4017 return Error(getParser().getTok().getLoc(),
4018 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4019 "architectures");
4022 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4024 std::string ISAVersionStringFromSTI;
4025 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4026 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4028 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4029 return Error(getParser().getTok().getLoc(),
4030 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4031 "arguments specified through the command line");
4034 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4035 Lex();
4037 return false;
4040 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4041 const char *AssemblerDirectiveBegin;
4042 const char *AssemblerDirectiveEnd;
4043 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4044 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4045 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4046 HSAMD::V3::AssemblerDirectiveEnd)
4047 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4048 HSAMD::AssemblerDirectiveEnd);
4050 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4051 return Error(getParser().getTok().getLoc(),
4052 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4053 "not available on non-amdhsa OSes")).str());
4056 std::string HSAMetadataString;
4057 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4058 HSAMetadataString))
4059 return true;
4061 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4062 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4063 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4064 } else {
4065 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4066 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4069 return false;
4072 /// Common code to parse out a block of text (typically YAML) between start and
4073 /// end directives.
4074 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4075 const char *AssemblerDirectiveEnd,
4076 std::string &CollectString) {
4078 raw_string_ostream CollectStream(CollectString);
4080 getLexer().setSkipSpace(false);
4082 bool FoundEnd = false;
4083 while (!getLexer().is(AsmToken::Eof)) {
4084 while (getLexer().is(AsmToken::Space)) {
4085 CollectStream << getLexer().getTok().getString();
4086 Lex();
4089 if (getLexer().is(AsmToken::Identifier)) {
4090 StringRef ID = getLexer().getTok().getIdentifier();
4091 if (ID == AssemblerDirectiveEnd) {
4092 Lex();
4093 FoundEnd = true;
4094 break;
4098 CollectStream << Parser.parseStringToEndOfStatement()
4099 << getContext().getAsmInfo()->getSeparatorString();
4101 Parser.eatToEndOfStatement();
4104 getLexer().setSkipSpace(true);
4106 if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4107 return TokError(Twine("expected directive ") +
4108 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4111 CollectStream.flush();
4112 return false;
4115 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4116 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4117 std::string String;
4118 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4119 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4120 return true;
4122 auto PALMetadata = getTargetStreamer().getPALMetadata();
4123 if (!PALMetadata->setFromString(String))
4124 return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4125 return false;
4128 /// Parse the assembler directive for old linear-format PAL metadata.
4129 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4130 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4131 return Error(getParser().getTok().getLoc(),
4132 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4133 "not available on non-amdpal OSes")).str());
4136 auto PALMetadata = getTargetStreamer().getPALMetadata();
4137 PALMetadata->setLegacy();
4138 for (;;) {
4139 uint32_t Key, Value;
4140 if (ParseAsAbsoluteExpression(Key)) {
4141 return TokError(Twine("invalid value in ") +
4142 Twine(PALMD::AssemblerDirective));
4144 if (getLexer().isNot(AsmToken::Comma)) {
4145 return TokError(Twine("expected an even number of values in ") +
4146 Twine(PALMD::AssemblerDirective));
4148 Lex();
4149 if (ParseAsAbsoluteExpression(Value)) {
4150 return TokError(Twine("invalid value in ") +
4151 Twine(PALMD::AssemblerDirective));
4153 PALMetadata->setRegister(Key, Value);
4154 if (getLexer().isNot(AsmToken::Comma))
4155 break;
4156 Lex();
4158 return false;
4161 /// ParseDirectiveAMDGPULDS
4162 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4163 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4164 if (getParser().checkForValidSection())
4165 return true;
4167 StringRef Name;
4168 SMLoc NameLoc = getLexer().getLoc();
4169 if (getParser().parseIdentifier(Name))
4170 return TokError("expected identifier in directive");
4172 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4173 if (parseToken(AsmToken::Comma, "expected ','"))
4174 return true;
4176 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4178 int64_t Size;
4179 SMLoc SizeLoc = getLexer().getLoc();
4180 if (getParser().parseAbsoluteExpression(Size))
4181 return true;
4182 if (Size < 0)
4183 return Error(SizeLoc, "size must be non-negative");
4184 if (Size > LocalMemorySize)
4185 return Error(SizeLoc, "size is too large");
4187 int64_t Align = 4;
4188 if (getLexer().is(AsmToken::Comma)) {
4189 Lex();
4190 SMLoc AlignLoc = getLexer().getLoc();
4191 if (getParser().parseAbsoluteExpression(Align))
4192 return true;
4193 if (Align < 0 || !isPowerOf2_64(Align))
4194 return Error(AlignLoc, "alignment must be a power of two");
4196 // Alignment larger than the size of LDS is possible in theory, as long
4197 // as the linker manages to place to symbol at address 0, but we do want
4198 // to make sure the alignment fits nicely into a 32-bit integer.
4199 if (Align >= 1u << 31)
4200 return Error(AlignLoc, "alignment is too large");
4203 if (parseToken(AsmToken::EndOfStatement,
4204 "unexpected token in '.amdgpu_lds' directive"))
4205 return true;
4207 Symbol->redefineIfPossible();
4208 if (!Symbol->isUndefined())
4209 return Error(NameLoc, "invalid symbol redefinition");
4211 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4212 return false;
4215 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4216 StringRef IDVal = DirectiveID.getString();
4218 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4219 if (IDVal == ".amdgcn_target")
4220 return ParseDirectiveAMDGCNTarget();
4222 if (IDVal == ".amdhsa_kernel")
4223 return ParseDirectiveAMDHSAKernel();
4225 // TODO: Restructure/combine with PAL metadata directive.
4226 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4227 return ParseDirectiveHSAMetadata();
4228 } else {
4229 if (IDVal == ".hsa_code_object_version")
4230 return ParseDirectiveHSACodeObjectVersion();
4232 if (IDVal == ".hsa_code_object_isa")
4233 return ParseDirectiveHSACodeObjectISA();
4235 if (IDVal == ".amd_kernel_code_t")
4236 return ParseDirectiveAMDKernelCodeT();
4238 if (IDVal == ".amdgpu_hsa_kernel")
4239 return ParseDirectiveAMDGPUHsaKernel();
4241 if (IDVal == ".amd_amdgpu_isa")
4242 return ParseDirectiveISAVersion();
4244 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4245 return ParseDirectiveHSAMetadata();
4248 if (IDVal == ".amdgpu_lds")
4249 return ParseDirectiveAMDGPULDS();
4251 if (IDVal == PALMD::AssemblerDirectiveBegin)
4252 return ParseDirectivePALMetadataBegin();
4254 if (IDVal == PALMD::AssemblerDirective)
4255 return ParseDirectivePALMetadata();
4257 return true;
4260 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4261 unsigned RegNo) const {
4263 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4264 R.isValid(); ++R) {
4265 if (*R == RegNo)
4266 return isGFX9() || isGFX10();
4269 // GFX10 has 2 more SGPRs 104 and 105.
4270 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4271 R.isValid(); ++R) {
4272 if (*R == RegNo)
4273 return hasSGPR104_SGPR105();
4276 switch (RegNo) {
4277 case AMDGPU::SRC_SHARED_BASE:
4278 case AMDGPU::SRC_SHARED_LIMIT:
4279 case AMDGPU::SRC_PRIVATE_BASE:
4280 case AMDGPU::SRC_PRIVATE_LIMIT:
4281 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4282 return !isCI() && !isSI() && !isVI();
4283 case AMDGPU::TBA:
4284 case AMDGPU::TBA_LO:
4285 case AMDGPU::TBA_HI:
4286 case AMDGPU::TMA:
4287 case AMDGPU::TMA_LO:
4288 case AMDGPU::TMA_HI:
4289 return !isGFX9() && !isGFX10();
4290 case AMDGPU::XNACK_MASK:
4291 case AMDGPU::XNACK_MASK_LO:
4292 case AMDGPU::XNACK_MASK_HI:
4293 return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4294 case AMDGPU::SGPR_NULL:
4295 return isGFX10();
4296 default:
4297 break;
4300 if (isCI())
4301 return true;
4303 if (isSI() || isGFX10()) {
4304 // No flat_scr on SI.
4305 // On GFX10 flat scratch is not a valid register operand and can only be
4306 // accessed with s_setreg/s_getreg.
4307 switch (RegNo) {
4308 case AMDGPU::FLAT_SCR:
4309 case AMDGPU::FLAT_SCR_LO:
4310 case AMDGPU::FLAT_SCR_HI:
4311 return false;
4312 default:
4313 return true;
4317 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4318 // SI/CI have.
4319 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4320 R.isValid(); ++R) {
4321 if (*R == RegNo)
4322 return hasSGPR102_SGPR103();
4325 return true;
4328 OperandMatchResultTy
4329 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4330 OperandMode Mode) {
4331 // Try to parse with a custom parser
4332 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4334 // If we successfully parsed the operand or if there as an error parsing,
4335 // we are done.
4337 // If we are parsing after we reach EndOfStatement then this means we
4338 // are appending default values to the Operands list. This is only done
4339 // by custom parser, so we shouldn't continue on to the generic parsing.
4340 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4341 getLexer().is(AsmToken::EndOfStatement))
4342 return ResTy;
4344 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4345 unsigned Prefix = Operands.size();
4346 SMLoc LBraceLoc = getTok().getLoc();
4347 Parser.Lex(); // eat the '['
4349 for (;;) {
4350 ResTy = parseReg(Operands);
4351 if (ResTy != MatchOperand_Success)
4352 return ResTy;
4354 if (getLexer().is(AsmToken::RBrac))
4355 break;
4357 if (getLexer().isNot(AsmToken::Comma))
4358 return MatchOperand_ParseFail;
4359 Parser.Lex();
4362 if (Operands.size() - Prefix > 1) {
4363 Operands.insert(Operands.begin() + Prefix,
4364 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4365 Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4366 getTok().getLoc()));
4369 Parser.Lex(); // eat the ']'
4370 return MatchOperand_Success;
4373 return parseRegOrImm(Operands);
4376 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4377 // Clear any forced encodings from the previous instruction.
4378 setForcedEncodingSize(0);
4379 setForcedDPP(false);
4380 setForcedSDWA(false);
4382 if (Name.endswith("_e64")) {
4383 setForcedEncodingSize(64);
4384 return Name.substr(0, Name.size() - 4);
4385 } else if (Name.endswith("_e32")) {
4386 setForcedEncodingSize(32);
4387 return Name.substr(0, Name.size() - 4);
4388 } else if (Name.endswith("_dpp")) {
4389 setForcedDPP(true);
4390 return Name.substr(0, Name.size() - 4);
4391 } else if (Name.endswith("_sdwa")) {
4392 setForcedSDWA(true);
4393 return Name.substr(0, Name.size() - 5);
4395 return Name;
4398 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4399 StringRef Name,
4400 SMLoc NameLoc, OperandVector &Operands) {
4401 // Add the instruction mnemonic
4402 Name = parseMnemonicSuffix(Name);
4403 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4405 bool IsMIMG = Name.startswith("image_");
4407 while (!getLexer().is(AsmToken::EndOfStatement)) {
4408 OperandMode Mode = OperandMode_Default;
4409 if (IsMIMG && isGFX10() && Operands.size() == 2)
4410 Mode = OperandMode_NSA;
4411 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4413 // Eat the comma or space if there is one.
4414 if (getLexer().is(AsmToken::Comma))
4415 Parser.Lex();
4417 switch (Res) {
4418 case MatchOperand_Success: break;
4419 case MatchOperand_ParseFail:
4420 // FIXME: use real operand location rather than the current location.
4421 Error(getLexer().getLoc(), "failed parsing operand.");
4422 while (!getLexer().is(AsmToken::EndOfStatement)) {
4423 Parser.Lex();
4425 return true;
4426 case MatchOperand_NoMatch:
4427 // FIXME: use real operand location rather than the current location.
4428 Error(getLexer().getLoc(), "not a valid operand.");
4429 while (!getLexer().is(AsmToken::EndOfStatement)) {
4430 Parser.Lex();
4432 return true;
4436 return false;
4439 //===----------------------------------------------------------------------===//
4440 // Utility functions
4441 //===----------------------------------------------------------------------===//
4443 OperandMatchResultTy
4444 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4446 if (!trySkipId(Prefix, AsmToken::Colon))
4447 return MatchOperand_NoMatch;
4449 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4452 OperandMatchResultTy
4453 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4454 AMDGPUOperand::ImmTy ImmTy,
4455 bool (*ConvertResult)(int64_t&)) {
4456 SMLoc S = getLoc();
4457 int64_t Value = 0;
4459 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4460 if (Res != MatchOperand_Success)
4461 return Res;
4463 if (ConvertResult && !ConvertResult(Value)) {
4464 Error(S, "invalid " + StringRef(Prefix) + " value.");
4467 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4468 return MatchOperand_Success;
4471 OperandMatchResultTy
4472 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4473 OperandVector &Operands,
4474 AMDGPUOperand::ImmTy ImmTy,
4475 bool (*ConvertResult)(int64_t&)) {
4476 SMLoc S = getLoc();
4477 if (!trySkipId(Prefix, AsmToken::Colon))
4478 return MatchOperand_NoMatch;
4480 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4481 return MatchOperand_ParseFail;
4483 unsigned Val = 0;
4484 const unsigned MaxSize = 4;
4486 // FIXME: How to verify the number of elements matches the number of src
4487 // operands?
4488 for (int I = 0; ; ++I) {
4489 int64_t Op;
4490 SMLoc Loc = getLoc();
4491 if (!parseExpr(Op))
4492 return MatchOperand_ParseFail;
4494 if (Op != 0 && Op != 1) {
4495 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4496 return MatchOperand_ParseFail;
4499 Val |= (Op << I);
4501 if (trySkipToken(AsmToken::RBrac))
4502 break;
4504 if (I + 1 == MaxSize) {
4505 Error(getLoc(), "expected a closing square bracket");
4506 return MatchOperand_ParseFail;
4509 if (!skipToken(AsmToken::Comma, "expected a comma"))
4510 return MatchOperand_ParseFail;
4513 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4514 return MatchOperand_Success;
4517 OperandMatchResultTy
4518 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4519 AMDGPUOperand::ImmTy ImmTy) {
4520 int64_t Bit = 0;
4521 SMLoc S = Parser.getTok().getLoc();
4523 // We are at the end of the statement, and this is a default argument, so
4524 // use a default value.
4525 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4526 switch(getLexer().getKind()) {
4527 case AsmToken::Identifier: {
4528 StringRef Tok = Parser.getTok().getString();
4529 if (Tok == Name) {
4530 if (Tok == "r128" && isGFX9())
4531 Error(S, "r128 modifier is not supported on this GPU");
4532 if (Tok == "a16" && !isGFX9() && !isGFX10())
4533 Error(S, "a16 modifier is not supported on this GPU");
4534 Bit = 1;
4535 Parser.Lex();
4536 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4537 Bit = 0;
4538 Parser.Lex();
4539 } else {
4540 return MatchOperand_NoMatch;
4542 break;
4544 default:
4545 return MatchOperand_NoMatch;
4549 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4550 return MatchOperand_ParseFail;
4552 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4553 return MatchOperand_Success;
4556 static void addOptionalImmOperand(
4557 MCInst& Inst, const OperandVector& Operands,
4558 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4559 AMDGPUOperand::ImmTy ImmT,
4560 int64_t Default = 0) {
4561 auto i = OptionalIdx.find(ImmT);
4562 if (i != OptionalIdx.end()) {
4563 unsigned Idx = i->second;
4564 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4565 } else {
4566 Inst.addOperand(MCOperand::createImm(Default));
4570 OperandMatchResultTy
4571 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4572 if (getLexer().isNot(AsmToken::Identifier)) {
4573 return MatchOperand_NoMatch;
4575 StringRef Tok = Parser.getTok().getString();
4576 if (Tok != Prefix) {
4577 return MatchOperand_NoMatch;
4580 Parser.Lex();
4581 if (getLexer().isNot(AsmToken::Colon)) {
4582 return MatchOperand_ParseFail;
4585 Parser.Lex();
4586 if (getLexer().isNot(AsmToken::Identifier)) {
4587 return MatchOperand_ParseFail;
4590 Value = Parser.getTok().getString();
4591 return MatchOperand_Success;
4594 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4595 // values to live in a joint format operand in the MCInst encoding.
4596 OperandMatchResultTy
4597 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4598 SMLoc S = Parser.getTok().getLoc();
4599 int64_t Dfmt = 0, Nfmt = 0;
4600 // dfmt and nfmt can appear in either order, and each is optional.
4601 bool GotDfmt = false, GotNfmt = false;
4602 while (!GotDfmt || !GotNfmt) {
4603 if (!GotDfmt) {
4604 auto Res = parseIntWithPrefix("dfmt", Dfmt);
4605 if (Res != MatchOperand_NoMatch) {
4606 if (Res != MatchOperand_Success)
4607 return Res;
4608 if (Dfmt >= 16) {
4609 Error(Parser.getTok().getLoc(), "out of range dfmt");
4610 return MatchOperand_ParseFail;
4612 GotDfmt = true;
4613 Parser.Lex();
4614 continue;
4617 if (!GotNfmt) {
4618 auto Res = parseIntWithPrefix("nfmt", Nfmt);
4619 if (Res != MatchOperand_NoMatch) {
4620 if (Res != MatchOperand_Success)
4621 return Res;
4622 if (Nfmt >= 8) {
4623 Error(Parser.getTok().getLoc(), "out of range nfmt");
4624 return MatchOperand_ParseFail;
4626 GotNfmt = true;
4627 Parser.Lex();
4628 continue;
4631 break;
4633 if (!GotDfmt && !GotNfmt)
4634 return MatchOperand_NoMatch;
4635 auto Format = Dfmt | Nfmt << 4;
4636 Operands.push_back(
4637 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4638 return MatchOperand_Success;
4641 //===----------------------------------------------------------------------===//
4642 // ds
4643 //===----------------------------------------------------------------------===//
4645 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4646 const OperandVector &Operands) {
4647 OptionalImmIndexMap OptionalIdx;
4649 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4650 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4652 // Add the register arguments
4653 if (Op.isReg()) {
4654 Op.addRegOperands(Inst, 1);
4655 continue;
4658 // Handle optional arguments
4659 OptionalIdx[Op.getImmTy()] = i;
4662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4666 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4669 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4670 bool IsGdsHardcoded) {
4671 OptionalImmIndexMap OptionalIdx;
4673 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4674 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4676 // Add the register arguments
4677 if (Op.isReg()) {
4678 Op.addRegOperands(Inst, 1);
4679 continue;
4682 if (Op.isToken() && Op.getToken() == "gds") {
4683 IsGdsHardcoded = true;
4684 continue;
4687 // Handle optional arguments
4688 OptionalIdx[Op.getImmTy()] = i;
4691 AMDGPUOperand::ImmTy OffsetType =
4692 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4693 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4694 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4695 AMDGPUOperand::ImmTyOffset;
4697 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4699 if (!IsGdsHardcoded) {
4700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4702 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4705 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4706 OptionalImmIndexMap OptionalIdx;
4708 unsigned OperandIdx[4];
4709 unsigned EnMask = 0;
4710 int SrcIdx = 0;
4712 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4713 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4715 // Add the register arguments
4716 if (Op.isReg()) {
4717 assert(SrcIdx < 4);
4718 OperandIdx[SrcIdx] = Inst.size();
4719 Op.addRegOperands(Inst, 1);
4720 ++SrcIdx;
4721 continue;
4724 if (Op.isOff()) {
4725 assert(SrcIdx < 4);
4726 OperandIdx[SrcIdx] = Inst.size();
4727 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4728 ++SrcIdx;
4729 continue;
4732 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4733 Op.addImmOperands(Inst, 1);
4734 continue;
4737 if (Op.isToken() && Op.getToken() == "done")
4738 continue;
4740 // Handle optional arguments
4741 OptionalIdx[Op.getImmTy()] = i;
4744 assert(SrcIdx == 4);
4746 bool Compr = false;
4747 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4748 Compr = true;
4749 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4750 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4751 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4754 for (auto i = 0; i < SrcIdx; ++i) {
4755 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4756 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4763 Inst.addOperand(MCOperand::createImm(EnMask));
4766 //===----------------------------------------------------------------------===//
4767 // s_waitcnt
4768 //===----------------------------------------------------------------------===//
4770 static bool
4771 encodeCnt(
4772 const AMDGPU::IsaVersion ISA,
4773 int64_t &IntVal,
4774 int64_t CntVal,
4775 bool Saturate,
4776 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4777 unsigned (*decode)(const IsaVersion &Version, unsigned))
4779 bool Failed = false;
4781 IntVal = encode(ISA, IntVal, CntVal);
4782 if (CntVal != decode(ISA, IntVal)) {
4783 if (Saturate) {
4784 IntVal = encode(ISA, IntVal, -1);
4785 } else {
4786 Failed = true;
4789 return Failed;
4792 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4794 SMLoc CntLoc = getLoc();
4795 StringRef CntName = getTokenStr();
4797 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4798 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4799 return false;
4801 int64_t CntVal;
4802 SMLoc ValLoc = getLoc();
4803 if (!parseExpr(CntVal))
4804 return false;
4806 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4808 bool Failed = true;
4809 bool Sat = CntName.endswith("_sat");
4811 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4812 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4813 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4814 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4815 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4816 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4817 } else {
4818 Error(CntLoc, "invalid counter name " + CntName);
4819 return false;
4822 if (Failed) {
4823 Error(ValLoc, "too large value for " + CntName);
4824 return false;
4827 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4828 return false;
4830 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4831 if (isToken(AsmToken::EndOfStatement)) {
4832 Error(getLoc(), "expected a counter name");
4833 return false;
4837 return true;
4840 OperandMatchResultTy
4841 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4842 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4843 int64_t Waitcnt = getWaitcntBitMask(ISA);
4844 SMLoc S = getLoc();
4846 // If parse failed, do not return error code
4847 // to avoid excessive error messages.
4848 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4849 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4850 } else {
4851 parseExpr(Waitcnt);
4854 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4855 return MatchOperand_Success;
4858 bool
4859 AMDGPUOperand::isSWaitCnt() const {
4860 return isImm();
4863 //===----------------------------------------------------------------------===//
4864 // hwreg
4865 //===----------------------------------------------------------------------===//
4867 bool
4868 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4869 int64_t &Offset,
4870 int64_t &Width) {
4871 using namespace llvm::AMDGPU::Hwreg;
4873 // The register may be specified by name or using a numeric code
4874 if (isToken(AsmToken::Identifier) &&
4875 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4876 HwReg.IsSymbolic = true;
4877 lex(); // skip message name
4878 } else if (!parseExpr(HwReg.Id)) {
4879 return false;
4882 if (trySkipToken(AsmToken::RParen))
4883 return true;
4885 // parse optional params
4886 return
4887 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4888 parseExpr(Offset) &&
4889 skipToken(AsmToken::Comma, "expected a comma") &&
4890 parseExpr(Width) &&
4891 skipToken(AsmToken::RParen, "expected a closing parenthesis");
4894 bool
4895 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4896 const int64_t Offset,
4897 const int64_t Width,
4898 const SMLoc Loc) {
4900 using namespace llvm::AMDGPU::Hwreg;
4902 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4903 Error(Loc, "specified hardware register is not supported on this GPU");
4904 return false;
4905 } else if (!isValidHwreg(HwReg.Id)) {
4906 Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4907 return false;
4908 } else if (!isValidHwregOffset(Offset)) {
4909 Error(Loc, "invalid bit offset: only 5-bit values are legal");
4910 return false;
4911 } else if (!isValidHwregWidth(Width)) {
4912 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4913 return false;
4915 return true;
4918 OperandMatchResultTy
4919 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4920 using namespace llvm::AMDGPU::Hwreg;
4922 int64_t ImmVal = 0;
4923 SMLoc Loc = getLoc();
4925 // If parse failed, do not return error code
4926 // to avoid excessive error messages.
4927 if (trySkipId("hwreg", AsmToken::LParen)) {
4928 OperandInfoTy HwReg(ID_UNKNOWN_);
4929 int64_t Offset = OFFSET_DEFAULT_;
4930 int64_t Width = WIDTH_DEFAULT_;
4931 if (parseHwregBody(HwReg, Offset, Width) &&
4932 validateHwreg(HwReg, Offset, Width, Loc)) {
4933 ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4935 } else if (parseExpr(ImmVal)) {
4936 if (ImmVal < 0 || !isUInt<16>(ImmVal))
4937 Error(Loc, "invalid immediate: only 16-bit values are legal");
4940 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4941 return MatchOperand_Success;
4944 bool AMDGPUOperand::isHwreg() const {
4945 return isImmTy(ImmTyHwreg);
4948 //===----------------------------------------------------------------------===//
4949 // sendmsg
4950 //===----------------------------------------------------------------------===//
4952 bool
4953 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4954 OperandInfoTy &Op,
4955 OperandInfoTy &Stream) {
4956 using namespace llvm::AMDGPU::SendMsg;
4958 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4959 Msg.IsSymbolic = true;
4960 lex(); // skip message name
4961 } else if (!parseExpr(Msg.Id)) {
4962 return false;
4965 if (trySkipToken(AsmToken::Comma)) {
4966 Op.IsDefined = true;
4967 if (isToken(AsmToken::Identifier) &&
4968 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4969 lex(); // skip operation name
4970 } else if (!parseExpr(Op.Id)) {
4971 return false;
4974 if (trySkipToken(AsmToken::Comma)) {
4975 Stream.IsDefined = true;
4976 if (!parseExpr(Stream.Id))
4977 return false;
4981 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4984 bool
4985 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4986 const OperandInfoTy &Op,
4987 const OperandInfoTy &Stream,
4988 const SMLoc S) {
4989 using namespace llvm::AMDGPU::SendMsg;
4991 // Validation strictness depends on whether message is specified
4992 // in a symbolc or in a numeric form. In the latter case
4993 // only encoding possibility is checked.
4994 bool Strict = Msg.IsSymbolic;
4996 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4997 Error(S, "invalid message id");
4998 return false;
4999 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5000 Error(S, Op.IsDefined ?
5001 "message does not support operations" :
5002 "missing message operation");
5003 return false;
5004 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5005 Error(S, "invalid operation id");
5006 return false;
5007 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5008 Error(S, "message operation does not support streams");
5009 return false;
5010 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5011 Error(S, "invalid message stream id");
5012 return false;
5014 return true;
5017 OperandMatchResultTy
5018 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5019 using namespace llvm::AMDGPU::SendMsg;
5021 int64_t ImmVal = 0;
5022 SMLoc Loc = getLoc();
5024 // If parse failed, do not return error code
5025 // to avoid excessive error messages.
5026 if (trySkipId("sendmsg", AsmToken::LParen)) {
5027 OperandInfoTy Msg(ID_UNKNOWN_);
5028 OperandInfoTy Op(OP_NONE_);
5029 OperandInfoTy Stream(STREAM_ID_NONE_);
5030 if (parseSendMsgBody(Msg, Op, Stream) &&
5031 validateSendMsg(Msg, Op, Stream, Loc)) {
5032 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5034 } else if (parseExpr(ImmVal)) {
5035 if (ImmVal < 0 || !isUInt<16>(ImmVal))
5036 Error(Loc, "invalid immediate: only 16-bit values are legal");
5039 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5040 return MatchOperand_Success;
5043 bool AMDGPUOperand::isSendMsg() const {
5044 return isImmTy(ImmTySendMsg);
5047 //===----------------------------------------------------------------------===//
5048 // v_interp
5049 //===----------------------------------------------------------------------===//
5051 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5052 if (getLexer().getKind() != AsmToken::Identifier)
5053 return MatchOperand_NoMatch;
5055 StringRef Str = Parser.getTok().getString();
5056 int Slot = StringSwitch<int>(Str)
5057 .Case("p10", 0)
5058 .Case("p20", 1)
5059 .Case("p0", 2)
5060 .Default(-1);
5062 SMLoc S = Parser.getTok().getLoc();
5063 if (Slot == -1)
5064 return MatchOperand_ParseFail;
5066 Parser.Lex();
5067 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5068 AMDGPUOperand::ImmTyInterpSlot));
5069 return MatchOperand_Success;
5072 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5073 if (getLexer().getKind() != AsmToken::Identifier)
5074 return MatchOperand_NoMatch;
5076 StringRef Str = Parser.getTok().getString();
5077 if (!Str.startswith("attr"))
5078 return MatchOperand_NoMatch;
5080 StringRef Chan = Str.take_back(2);
5081 int AttrChan = StringSwitch<int>(Chan)
5082 .Case(".x", 0)
5083 .Case(".y", 1)
5084 .Case(".z", 2)
5085 .Case(".w", 3)
5086 .Default(-1);
5087 if (AttrChan == -1)
5088 return MatchOperand_ParseFail;
5090 Str = Str.drop_back(2).drop_front(4);
5092 uint8_t Attr;
5093 if (Str.getAsInteger(10, Attr))
5094 return MatchOperand_ParseFail;
5096 SMLoc S = Parser.getTok().getLoc();
5097 Parser.Lex();
5098 if (Attr > 63) {
5099 Error(S, "out of bounds attr");
5100 return MatchOperand_Success;
5103 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5105 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5106 AMDGPUOperand::ImmTyInterpAttr));
5107 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5108 AMDGPUOperand::ImmTyAttrChan));
5109 return MatchOperand_Success;
5112 //===----------------------------------------------------------------------===//
5113 // exp
5114 //===----------------------------------------------------------------------===//
5116 void AMDGPUAsmParser::errorExpTgt() {
5117 Error(Parser.getTok().getLoc(), "invalid exp target");
5120 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5121 uint8_t &Val) {
5122 if (Str == "null") {
5123 Val = 9;
5124 return MatchOperand_Success;
5127 if (Str.startswith("mrt")) {
5128 Str = Str.drop_front(3);
5129 if (Str == "z") { // == mrtz
5130 Val = 8;
5131 return MatchOperand_Success;
5134 if (Str.getAsInteger(10, Val))
5135 return MatchOperand_ParseFail;
5137 if (Val > 7)
5138 errorExpTgt();
5140 return MatchOperand_Success;
5143 if (Str.startswith("pos")) {
5144 Str = Str.drop_front(3);
5145 if (Str.getAsInteger(10, Val))
5146 return MatchOperand_ParseFail;
5148 if (Val > 4 || (Val == 4 && !isGFX10()))
5149 errorExpTgt();
5151 Val += 12;
5152 return MatchOperand_Success;
5155 if (isGFX10() && Str == "prim") {
5156 Val = 20;
5157 return MatchOperand_Success;
5160 if (Str.startswith("param")) {
5161 Str = Str.drop_front(5);
5162 if (Str.getAsInteger(10, Val))
5163 return MatchOperand_ParseFail;
5165 if (Val >= 32)
5166 errorExpTgt();
5168 Val += 32;
5169 return MatchOperand_Success;
5172 if (Str.startswith("invalid_target_")) {
5173 Str = Str.drop_front(15);
5174 if (Str.getAsInteger(10, Val))
5175 return MatchOperand_ParseFail;
5177 errorExpTgt();
5178 return MatchOperand_Success;
5181 return MatchOperand_NoMatch;
5184 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5185 uint8_t Val;
5186 StringRef Str = Parser.getTok().getString();
5188 auto Res = parseExpTgtImpl(Str, Val);
5189 if (Res != MatchOperand_Success)
5190 return Res;
5192 SMLoc S = Parser.getTok().getLoc();
5193 Parser.Lex();
5195 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5196 AMDGPUOperand::ImmTyExpTgt));
5197 return MatchOperand_Success;
5200 //===----------------------------------------------------------------------===//
5201 // parser helpers
5202 //===----------------------------------------------------------------------===//
5204 bool
5205 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5206 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5209 bool
5210 AMDGPUAsmParser::isId(const StringRef Id) const {
5211 return isId(getToken(), Id);
5214 bool
5215 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5216 return getTokenKind() == Kind;
5219 bool
5220 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5221 if (isId(Id)) {
5222 lex();
5223 return true;
5225 return false;
5228 bool
5229 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5230 if (isId(Id) && peekToken().is(Kind)) {
5231 lex();
5232 lex();
5233 return true;
5235 return false;
5238 bool
5239 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5240 if (isToken(Kind)) {
5241 lex();
5242 return true;
5244 return false;
5247 bool
5248 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5249 const StringRef ErrMsg) {
5250 if (!trySkipToken(Kind)) {
5251 Error(getLoc(), ErrMsg);
5252 return false;
5254 return true;
5257 bool
5258 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5259 return !getParser().parseAbsoluteExpression(Imm);
5262 bool
5263 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5264 SMLoc S = getLoc();
5266 const MCExpr *Expr;
5267 if (Parser.parseExpression(Expr))
5268 return false;
5270 int64_t IntVal;
5271 if (Expr->evaluateAsAbsolute(IntVal)) {
5272 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5273 } else {
5274 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5276 return true;
5279 bool
5280 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5281 if (isToken(AsmToken::String)) {
5282 Val = getToken().getStringContents();
5283 lex();
5284 return true;
5285 } else {
5286 Error(getLoc(), ErrMsg);
5287 return false;
5291 AsmToken
5292 AMDGPUAsmParser::getToken() const {
5293 return Parser.getTok();
5296 AsmToken
5297 AMDGPUAsmParser::peekToken() {
5298 return getLexer().peekTok();
5301 void
5302 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5303 auto TokCount = getLexer().peekTokens(Tokens);
5305 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5306 Tokens[Idx] = AsmToken(AsmToken::Error, "");
5309 AsmToken::TokenKind
5310 AMDGPUAsmParser::getTokenKind() const {
5311 return getLexer().getKind();
5314 SMLoc
5315 AMDGPUAsmParser::getLoc() const {
5316 return getToken().getLoc();
5319 StringRef
5320 AMDGPUAsmParser::getTokenStr() const {
5321 return getToken().getString();
5324 void
5325 AMDGPUAsmParser::lex() {
5326 Parser.Lex();
5329 //===----------------------------------------------------------------------===//
5330 // swizzle
5331 //===----------------------------------------------------------------------===//
5333 LLVM_READNONE
5334 static unsigned
5335 encodeBitmaskPerm(const unsigned AndMask,
5336 const unsigned OrMask,
5337 const unsigned XorMask) {
5338 using namespace llvm::AMDGPU::Swizzle;
5340 return BITMASK_PERM_ENC |
5341 (AndMask << BITMASK_AND_SHIFT) |
5342 (OrMask << BITMASK_OR_SHIFT) |
5343 (XorMask << BITMASK_XOR_SHIFT);
5346 bool
5347 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5348 const unsigned MinVal,
5349 const unsigned MaxVal,
5350 const StringRef ErrMsg) {
5351 for (unsigned i = 0; i < OpNum; ++i) {
5352 if (!skipToken(AsmToken::Comma, "expected a comma")){
5353 return false;
5355 SMLoc ExprLoc = Parser.getTok().getLoc();
5356 if (!parseExpr(Op[i])) {
5357 return false;
5359 if (Op[i] < MinVal || Op[i] > MaxVal) {
5360 Error(ExprLoc, ErrMsg);
5361 return false;
5365 return true;
5368 bool
5369 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5370 using namespace llvm::AMDGPU::Swizzle;
5372 int64_t Lane[LANE_NUM];
5373 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5374 "expected a 2-bit lane id")) {
5375 Imm = QUAD_PERM_ENC;
5376 for (unsigned I = 0; I < LANE_NUM; ++I) {
5377 Imm |= Lane[I] << (LANE_SHIFT * I);
5379 return true;
5381 return false;
5384 bool
5385 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5386 using namespace llvm::AMDGPU::Swizzle;
5388 SMLoc S = Parser.getTok().getLoc();
5389 int64_t GroupSize;
5390 int64_t LaneIdx;
5392 if (!parseSwizzleOperands(1, &GroupSize,
5393 2, 32,
5394 "group size must be in the interval [2,32]")) {
5395 return false;
5397 if (!isPowerOf2_64(GroupSize)) {
5398 Error(S, "group size must be a power of two");
5399 return false;
5401 if (parseSwizzleOperands(1, &LaneIdx,
5402 0, GroupSize - 1,
5403 "lane id must be in the interval [0,group size - 1]")) {
5404 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5405 return true;
5407 return false;
5410 bool
5411 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5412 using namespace llvm::AMDGPU::Swizzle;
5414 SMLoc S = Parser.getTok().getLoc();
5415 int64_t GroupSize;
5417 if (!parseSwizzleOperands(1, &GroupSize,
5418 2, 32, "group size must be in the interval [2,32]")) {
5419 return false;
5421 if (!isPowerOf2_64(GroupSize)) {
5422 Error(S, "group size must be a power of two");
5423 return false;
5426 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5427 return true;
5430 bool
5431 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5432 using namespace llvm::AMDGPU::Swizzle;
5434 SMLoc S = Parser.getTok().getLoc();
5435 int64_t GroupSize;
5437 if (!parseSwizzleOperands(1, &GroupSize,
5438 1, 16, "group size must be in the interval [1,16]")) {
5439 return false;
5441 if (!isPowerOf2_64(GroupSize)) {
5442 Error(S, "group size must be a power of two");
5443 return false;
5446 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5447 return true;
5450 bool
5451 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5452 using namespace llvm::AMDGPU::Swizzle;
5454 if (!skipToken(AsmToken::Comma, "expected a comma")) {
5455 return false;
5458 StringRef Ctl;
5459 SMLoc StrLoc = Parser.getTok().getLoc();
5460 if (!parseString(Ctl)) {
5461 return false;
5463 if (Ctl.size() != BITMASK_WIDTH) {
5464 Error(StrLoc, "expected a 5-character mask");
5465 return false;
5468 unsigned AndMask = 0;
5469 unsigned OrMask = 0;
5470 unsigned XorMask = 0;
5472 for (size_t i = 0; i < Ctl.size(); ++i) {
5473 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5474 switch(Ctl[i]) {
5475 default:
5476 Error(StrLoc, "invalid mask");
5477 return false;
5478 case '0':
5479 break;
5480 case '1':
5481 OrMask |= Mask;
5482 break;
5483 case 'p':
5484 AndMask |= Mask;
5485 break;
5486 case 'i':
5487 AndMask |= Mask;
5488 XorMask |= Mask;
5489 break;
5493 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5494 return true;
5497 bool
5498 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5500 SMLoc OffsetLoc = Parser.getTok().getLoc();
5502 if (!parseExpr(Imm)) {
5503 return false;
5505 if (!isUInt<16>(Imm)) {
5506 Error(OffsetLoc, "expected a 16-bit offset");
5507 return false;
5509 return true;
5512 bool
5513 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5514 using namespace llvm::AMDGPU::Swizzle;
5516 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5518 SMLoc ModeLoc = Parser.getTok().getLoc();
5519 bool Ok = false;
5521 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5522 Ok = parseSwizzleQuadPerm(Imm);
5523 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5524 Ok = parseSwizzleBitmaskPerm(Imm);
5525 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5526 Ok = parseSwizzleBroadcast(Imm);
5527 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5528 Ok = parseSwizzleSwap(Imm);
5529 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5530 Ok = parseSwizzleReverse(Imm);
5531 } else {
5532 Error(ModeLoc, "expected a swizzle mode");
5535 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5538 return false;
5541 OperandMatchResultTy
5542 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5543 SMLoc S = Parser.getTok().getLoc();
5544 int64_t Imm = 0;
5546 if (trySkipId("offset")) {
5548 bool Ok = false;
5549 if (skipToken(AsmToken::Colon, "expected a colon")) {
5550 if (trySkipId("swizzle")) {
5551 Ok = parseSwizzleMacro(Imm);
5552 } else {
5553 Ok = parseSwizzleOffset(Imm);
5557 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5559 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5560 } else {
5561 // Swizzle "offset" operand is optional.
5562 // If it is omitted, try parsing other optional operands.
5563 return parseOptionalOpr(Operands);
5567 bool
5568 AMDGPUOperand::isSwizzle() const {
5569 return isImmTy(ImmTySwizzle);
5572 //===----------------------------------------------------------------------===//
5573 // VGPR Index Mode
5574 //===----------------------------------------------------------------------===//
5576 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5578 using namespace llvm::AMDGPU::VGPRIndexMode;
5580 if (trySkipToken(AsmToken::RParen)) {
5581 return OFF;
5584 int64_t Imm = 0;
5586 while (true) {
5587 unsigned Mode = 0;
5588 SMLoc S = Parser.getTok().getLoc();
5590 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5591 if (trySkipId(IdSymbolic[ModeId])) {
5592 Mode = 1 << ModeId;
5593 break;
5597 if (Mode == 0) {
5598 Error(S, (Imm == 0)?
5599 "expected a VGPR index mode or a closing parenthesis" :
5600 "expected a VGPR index mode");
5601 break;
5604 if (Imm & Mode) {
5605 Error(S, "duplicate VGPR index mode");
5606 break;
5608 Imm |= Mode;
5610 if (trySkipToken(AsmToken::RParen))
5611 break;
5612 if (!skipToken(AsmToken::Comma,
5613 "expected a comma or a closing parenthesis"))
5614 break;
5617 return Imm;
5620 OperandMatchResultTy
5621 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5623 int64_t Imm = 0;
5624 SMLoc S = Parser.getTok().getLoc();
5626 if (getLexer().getKind() == AsmToken::Identifier &&
5627 Parser.getTok().getString() == "gpr_idx" &&
5628 getLexer().peekTok().is(AsmToken::LParen)) {
5630 Parser.Lex();
5631 Parser.Lex();
5633 // If parse failed, trigger an error but do not return error code
5634 // to avoid excessive error messages.
5635 Imm = parseGPRIdxMacro();
5637 } else {
5638 if (getParser().parseAbsoluteExpression(Imm))
5639 return MatchOperand_NoMatch;
5640 if (Imm < 0 || !isUInt<4>(Imm)) {
5641 Error(S, "invalid immediate: only 4-bit values are legal");
5645 Operands.push_back(
5646 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5647 return MatchOperand_Success;
5650 bool AMDGPUOperand::isGPRIdxMode() const {
5651 return isImmTy(ImmTyGprIdxMode);
5654 //===----------------------------------------------------------------------===//
5655 // sopp branch targets
5656 //===----------------------------------------------------------------------===//
5658 OperandMatchResultTy
5659 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5661 // Make sure we are not parsing something
5662 // that looks like a label or an expression but is not.
5663 // This will improve error messages.
5664 if (isRegister() || isModifier())
5665 return MatchOperand_NoMatch;
5667 if (parseExpr(Operands)) {
5669 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5670 assert(Opr.isImm() || Opr.isExpr());
5671 SMLoc Loc = Opr.getStartLoc();
5673 // Currently we do not support arbitrary expressions as branch targets.
5674 // Only labels and absolute expressions are accepted.
5675 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5676 Error(Loc, "expected an absolute expression or a label");
5677 } else if (Opr.isImm() && !Opr.isS16Imm()) {
5678 Error(Loc, "expected a 16-bit signed jump offset");
5682 return MatchOperand_Success; // avoid excessive error messages
5685 //===----------------------------------------------------------------------===//
5686 // Boolean holding registers
5687 //===----------------------------------------------------------------------===//
5689 OperandMatchResultTy
5690 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5691 return parseReg(Operands);
5694 //===----------------------------------------------------------------------===//
5695 // mubuf
5696 //===----------------------------------------------------------------------===//
5698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5699 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5703 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5707 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5710 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5711 const OperandVector &Operands,
5712 bool IsAtomic,
5713 bool IsAtomicReturn,
5714 bool IsLds) {
5715 bool IsLdsOpcode = IsLds;
5716 bool HasLdsModifier = false;
5717 OptionalImmIndexMap OptionalIdx;
5718 assert(IsAtomicReturn ? IsAtomic : true);
5719 unsigned FirstOperandIdx = 1;
5721 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5722 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5724 // Add the register arguments
5725 if (Op.isReg()) {
5726 Op.addRegOperands(Inst, 1);
5727 // Insert a tied src for atomic return dst.
5728 // This cannot be postponed as subsequent calls to
5729 // addImmOperands rely on correct number of MC operands.
5730 if (IsAtomicReturn && i == FirstOperandIdx)
5731 Op.addRegOperands(Inst, 1);
5732 continue;
5735 // Handle the case where soffset is an immediate
5736 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5737 Op.addImmOperands(Inst, 1);
5738 continue;
5741 HasLdsModifier |= Op.isLDS();
5743 // Handle tokens like 'offen' which are sometimes hard-coded into the
5744 // asm string. There are no MCInst operands for these.
5745 if (Op.isToken()) {
5746 continue;
5748 assert(Op.isImm());
5750 // Handle optional arguments
5751 OptionalIdx[Op.getImmTy()] = i;
5754 // This is a workaround for an llvm quirk which may result in an
5755 // incorrect instruction selection. Lds and non-lds versions of
5756 // MUBUF instructions are identical except that lds versions
5757 // have mandatory 'lds' modifier. However this modifier follows
5758 // optional modifiers and llvm asm matcher regards this 'lds'
5759 // modifier as an optional one. As a result, an lds version
5760 // of opcode may be selected even if it has no 'lds' modifier.
5761 if (IsLdsOpcode && !HasLdsModifier) {
5762 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5763 if (NoLdsOpcode != -1) { // Got lds version - correct it.
5764 Inst.setOpcode(NoLdsOpcode);
5765 IsLdsOpcode = false;
5769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5770 if (!IsAtomic) { // glc is hard-coded.
5771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5775 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5779 if (isGFX10())
5780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5783 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5784 OptionalImmIndexMap OptionalIdx;
5786 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5787 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5789 // Add the register arguments
5790 if (Op.isReg()) {
5791 Op.addRegOperands(Inst, 1);
5792 continue;
5795 // Handle the case where soffset is an immediate
5796 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5797 Op.addImmOperands(Inst, 1);
5798 continue;
5801 // Handle tokens like 'offen' which are sometimes hard-coded into the
5802 // asm string. There are no MCInst operands for these.
5803 if (Op.isToken()) {
5804 continue;
5806 assert(Op.isImm());
5808 // Handle optional arguments
5809 OptionalIdx[Op.getImmTy()] = i;
5812 addOptionalImmOperand(Inst, Operands, OptionalIdx,
5813 AMDGPUOperand::ImmTyOffset);
5814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5819 if (isGFX10())
5820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5823 //===----------------------------------------------------------------------===//
5824 // mimg
5825 //===----------------------------------------------------------------------===//
5827 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5828 bool IsAtomic) {
5829 unsigned I = 1;
5830 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5831 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5832 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5835 if (IsAtomic) {
5836 // Add src, same as dst
5837 assert(Desc.getNumDefs() == 1);
5838 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5841 OptionalImmIndexMap OptionalIdx;
5843 for (unsigned E = Operands.size(); I != E; ++I) {
5844 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5846 // Add the register arguments
5847 if (Op.isReg()) {
5848 Op.addRegOperands(Inst, 1);
5849 } else if (Op.isImmModifier()) {
5850 OptionalIdx[Op.getImmTy()] = I;
5851 } else if (!Op.isToken()) {
5852 llvm_unreachable("unexpected operand type");
5856 bool IsGFX10 = isGFX10();
5858 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5859 if (IsGFX10)
5860 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5861 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5862 if (IsGFX10)
5863 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5865 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5866 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5867 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5868 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5869 if (!IsGFX10)
5870 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5874 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5875 cvtMIMG(Inst, Operands, true);
5878 //===----------------------------------------------------------------------===//
5879 // smrd
5880 //===----------------------------------------------------------------------===//
5882 bool AMDGPUOperand::isSMRDOffset8() const {
5883 return isImm() && isUInt<8>(getImm());
5886 bool AMDGPUOperand::isSMRDOffset20() const {
5887 return isImm() && isUInt<20>(getImm());
5890 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5891 // 32-bit literals are only supported on CI and we only want to use them
5892 // when the offset is > 8-bits.
5893 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5897 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5900 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5901 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5904 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5905 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5908 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5909 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5912 //===----------------------------------------------------------------------===//
5913 // vop3
5914 //===----------------------------------------------------------------------===//
5916 static bool ConvertOmodMul(int64_t &Mul) {
5917 if (Mul != 1 && Mul != 2 && Mul != 4)
5918 return false;
5920 Mul >>= 1;
5921 return true;
5924 static bool ConvertOmodDiv(int64_t &Div) {
5925 if (Div == 1) {
5926 Div = 0;
5927 return true;
5930 if (Div == 2) {
5931 Div = 3;
5932 return true;
5935 return false;
5938 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5939 if (BoundCtrl == 0) {
5940 BoundCtrl = 1;
5941 return true;
5944 if (BoundCtrl == -1) {
5945 BoundCtrl = 0;
5946 return true;
5949 return false;
5952 // Note: the order in this table matches the order of operands in AsmString.
5953 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5954 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
5955 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
5956 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
5957 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5958 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5959 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
5960 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
5961 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
5962 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5963 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
5964 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5965 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
5966 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
5967 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5968 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5969 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
5970 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
5971 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5972 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
5973 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
5974 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5975 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5976 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
5977 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5978 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
5979 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
5980 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5981 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5982 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5983 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
5984 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5985 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5986 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5987 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5988 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5989 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5990 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5991 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5992 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5993 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5994 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5995 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5996 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5999 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6000 unsigned size = Operands.size();
6001 assert(size > 0);
6003 OperandMatchResultTy res = parseOptionalOpr(Operands);
6005 // This is a hack to enable hardcoded mandatory operands which follow
6006 // optional operands.
6008 // Current design assumes that all operands after the first optional operand
6009 // are also optional. However implementation of some instructions violates
6010 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6012 // To alleviate this problem, we have to (implicitly) parse extra operands
6013 // to make sure autogenerated parser of custom operands never hit hardcoded
6014 // mandatory operands.
6016 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
6018 // We have parsed the first optional operand.
6019 // Parse as many operands as necessary to skip all mandatory operands.
6021 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6022 if (res != MatchOperand_Success ||
6023 getLexer().is(AsmToken::EndOfStatement)) break;
6024 if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6025 res = parseOptionalOpr(Operands);
6029 return res;
6032 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6033 OperandMatchResultTy res;
6034 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6035 // try to parse any optional operand here
6036 if (Op.IsBit) {
6037 res = parseNamedBit(Op.Name, Operands, Op.Type);
6038 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6039 res = parseOModOperand(Operands);
6040 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6041 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6042 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6043 res = parseSDWASel(Operands, Op.Name, Op.Type);
6044 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6045 res = parseSDWADstUnused(Operands);
6046 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6047 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6048 Op.Type == AMDGPUOperand::ImmTyNegLo ||
6049 Op.Type == AMDGPUOperand::ImmTyNegHi) {
6050 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6051 Op.ConvertResult);
6052 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6053 res = parseDim(Operands);
6054 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6055 res = parseDfmtNfmt(Operands);
6056 } else {
6057 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6059 if (res != MatchOperand_NoMatch) {
6060 return res;
6063 return MatchOperand_NoMatch;
6066 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6067 StringRef Name = Parser.getTok().getString();
6068 if (Name == "mul") {
6069 return parseIntWithPrefix("mul", Operands,
6070 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6073 if (Name == "div") {
6074 return parseIntWithPrefix("div", Operands,
6075 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6078 return MatchOperand_NoMatch;
6081 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6082 cvtVOP3P(Inst, Operands);
6084 int Opc = Inst.getOpcode();
6086 int SrcNum;
6087 const int Ops[] = { AMDGPU::OpName::src0,
6088 AMDGPU::OpName::src1,
6089 AMDGPU::OpName::src2 };
6090 for (SrcNum = 0;
6091 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6092 ++SrcNum);
6093 assert(SrcNum > 0);
6095 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6096 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6098 if ((OpSel & (1 << SrcNum)) != 0) {
6099 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6100 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6101 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6105 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6106 // 1. This operand is input modifiers
6107 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6108 // 2. This is not last operand
6109 && Desc.NumOperands > (OpNum + 1)
6110 // 3. Next operand is register class
6111 && Desc.OpInfo[OpNum + 1].RegClass != -1
6112 // 4. Next register is not tied to any other operand
6113 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6116 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6118 OptionalImmIndexMap OptionalIdx;
6119 unsigned Opc = Inst.getOpcode();
6121 unsigned I = 1;
6122 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6123 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6124 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6127 for (unsigned E = Operands.size(); I != E; ++I) {
6128 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6129 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6130 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6131 } else if (Op.isInterpSlot() ||
6132 Op.isInterpAttr() ||
6133 Op.isAttrChan()) {
6134 Inst.addOperand(MCOperand::createImm(Op.getImm()));
6135 } else if (Op.isImmModifier()) {
6136 OptionalIdx[Op.getImmTy()] = I;
6137 } else {
6138 llvm_unreachable("unhandled operand type");
6142 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6143 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6146 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6150 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6155 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6156 OptionalImmIndexMap &OptionalIdx) {
6157 unsigned Opc = Inst.getOpcode();
6159 unsigned I = 1;
6160 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6161 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6162 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6165 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6166 // This instruction has src modifiers
6167 for (unsigned E = Operands.size(); I != E; ++I) {
6168 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6169 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6170 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6171 } else if (Op.isImmModifier()) {
6172 OptionalIdx[Op.getImmTy()] = I;
6173 } else if (Op.isRegOrImm()) {
6174 Op.addRegOrImmOperands(Inst, 1);
6175 } else {
6176 llvm_unreachable("unhandled operand type");
6179 } else {
6180 // No src modifiers
6181 for (unsigned E = Operands.size(); I != E; ++I) {
6182 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6183 if (Op.isMod()) {
6184 OptionalIdx[Op.getImmTy()] = I;
6185 } else {
6186 Op.addRegOrImmOperands(Inst, 1);
6191 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6195 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6199 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6200 // it has src2 register operand that is tied to dst operand
6201 // we don't allow modifiers for this operand in assembler so src2_modifiers
6202 // should be 0.
6203 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6204 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6205 Opc == AMDGPU::V_MAC_F32_e64_vi ||
6206 Opc == AMDGPU::V_MAC_F16_e64_vi ||
6207 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6208 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6209 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6210 auto it = Inst.begin();
6211 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6212 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6213 ++it;
6214 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6218 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6219 OptionalImmIndexMap OptionalIdx;
6220 cvtVOP3(Inst, Operands, OptionalIdx);
6223 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6224 const OperandVector &Operands) {
6225 OptionalImmIndexMap OptIdx;
6226 const int Opc = Inst.getOpcode();
6227 const MCInstrDesc &Desc = MII.get(Opc);
6229 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6231 cvtVOP3(Inst, Operands, OptIdx);
6233 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6234 assert(!IsPacked);
6235 Inst.addOperand(Inst.getOperand(0));
6238 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6239 // instruction, and then figure out where to actually put the modifiers
6241 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6243 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6244 if (OpSelHiIdx != -1) {
6245 int DefaultVal = IsPacked ? -1 : 0;
6246 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6247 DefaultVal);
6250 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6251 if (NegLoIdx != -1) {
6252 assert(IsPacked);
6253 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6254 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6257 const int Ops[] = { AMDGPU::OpName::src0,
6258 AMDGPU::OpName::src1,
6259 AMDGPU::OpName::src2 };
6260 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6261 AMDGPU::OpName::src1_modifiers,
6262 AMDGPU::OpName::src2_modifiers };
6264 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6266 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6267 unsigned OpSelHi = 0;
6268 unsigned NegLo = 0;
6269 unsigned NegHi = 0;
6271 if (OpSelHiIdx != -1) {
6272 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6275 if (NegLoIdx != -1) {
6276 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6277 NegLo = Inst.getOperand(NegLoIdx).getImm();
6278 NegHi = Inst.getOperand(NegHiIdx).getImm();
6281 for (int J = 0; J < 3; ++J) {
6282 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6283 if (OpIdx == -1)
6284 break;
6286 uint32_t ModVal = 0;
6288 if ((OpSel & (1 << J)) != 0)
6289 ModVal |= SISrcMods::OP_SEL_0;
6291 if ((OpSelHi & (1 << J)) != 0)
6292 ModVal |= SISrcMods::OP_SEL_1;
6294 if ((NegLo & (1 << J)) != 0)
6295 ModVal |= SISrcMods::NEG;
6297 if ((NegHi & (1 << J)) != 0)
6298 ModVal |= SISrcMods::NEG_HI;
6300 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6302 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6306 //===----------------------------------------------------------------------===//
6307 // dpp
6308 //===----------------------------------------------------------------------===//
6310 bool AMDGPUOperand::isDPP8() const {
6311 return isImmTy(ImmTyDPP8);
6314 bool AMDGPUOperand::isDPPCtrl() const {
6315 using namespace AMDGPU::DPP;
6317 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6318 if (result) {
6319 int64_t Imm = getImm();
6320 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6321 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6322 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6323 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6324 (Imm == DppCtrl::WAVE_SHL1) ||
6325 (Imm == DppCtrl::WAVE_ROL1) ||
6326 (Imm == DppCtrl::WAVE_SHR1) ||
6327 (Imm == DppCtrl::WAVE_ROR1) ||
6328 (Imm == DppCtrl::ROW_MIRROR) ||
6329 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6330 (Imm == DppCtrl::BCAST15) ||
6331 (Imm == DppCtrl::BCAST31) ||
6332 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6333 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6335 return false;
6338 //===----------------------------------------------------------------------===//
6339 // mAI
6340 //===----------------------------------------------------------------------===//
6342 bool AMDGPUOperand::isBLGP() const {
6343 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6346 bool AMDGPUOperand::isCBSZ() const {
6347 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6350 bool AMDGPUOperand::isABID() const {
6351 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6354 bool AMDGPUOperand::isS16Imm() const {
6355 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6358 bool AMDGPUOperand::isU16Imm() const {
6359 return isImm() && isUInt<16>(getImm());
6362 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6363 if (!isGFX10())
6364 return MatchOperand_NoMatch;
6366 SMLoc S = Parser.getTok().getLoc();
6368 if (getLexer().isNot(AsmToken::Identifier))
6369 return MatchOperand_NoMatch;
6370 if (getLexer().getTok().getString() != "dim")
6371 return MatchOperand_NoMatch;
6373 Parser.Lex();
6374 if (getLexer().isNot(AsmToken::Colon))
6375 return MatchOperand_ParseFail;
6377 Parser.Lex();
6379 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6380 // integer.
6381 std::string Token;
6382 if (getLexer().is(AsmToken::Integer)) {
6383 SMLoc Loc = getLexer().getTok().getEndLoc();
6384 Token = getLexer().getTok().getString();
6385 Parser.Lex();
6386 if (getLexer().getTok().getLoc() != Loc)
6387 return MatchOperand_ParseFail;
6389 if (getLexer().isNot(AsmToken::Identifier))
6390 return MatchOperand_ParseFail;
6391 Token += getLexer().getTok().getString();
6393 StringRef DimId = Token;
6394 if (DimId.startswith("SQ_RSRC_IMG_"))
6395 DimId = DimId.substr(12);
6397 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6398 if (!DimInfo)
6399 return MatchOperand_ParseFail;
6401 Parser.Lex();
6403 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6404 AMDGPUOperand::ImmTyDim));
6405 return MatchOperand_Success;
6408 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6409 SMLoc S = Parser.getTok().getLoc();
6410 StringRef Prefix;
6412 if (getLexer().getKind() == AsmToken::Identifier) {
6413 Prefix = Parser.getTok().getString();
6414 } else {
6415 return MatchOperand_NoMatch;
6418 if (Prefix != "dpp8")
6419 return parseDPPCtrl(Operands);
6420 if (!isGFX10())
6421 return MatchOperand_NoMatch;
6423 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6425 int64_t Sels[8];
6427 Parser.Lex();
6428 if (getLexer().isNot(AsmToken::Colon))
6429 return MatchOperand_ParseFail;
6431 Parser.Lex();
6432 if (getLexer().isNot(AsmToken::LBrac))
6433 return MatchOperand_ParseFail;
6435 Parser.Lex();
6436 if (getParser().parseAbsoluteExpression(Sels[0]))
6437 return MatchOperand_ParseFail;
6438 if (0 > Sels[0] || 7 < Sels[0])
6439 return MatchOperand_ParseFail;
6441 for (size_t i = 1; i < 8; ++i) {
6442 if (getLexer().isNot(AsmToken::Comma))
6443 return MatchOperand_ParseFail;
6445 Parser.Lex();
6446 if (getParser().parseAbsoluteExpression(Sels[i]))
6447 return MatchOperand_ParseFail;
6448 if (0 > Sels[i] || 7 < Sels[i])
6449 return MatchOperand_ParseFail;
6452 if (getLexer().isNot(AsmToken::RBrac))
6453 return MatchOperand_ParseFail;
6454 Parser.Lex();
6456 unsigned DPP8 = 0;
6457 for (size_t i = 0; i < 8; ++i)
6458 DPP8 |= (Sels[i] << (i * 3));
6460 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6461 return MatchOperand_Success;
6464 OperandMatchResultTy
6465 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6466 using namespace AMDGPU::DPP;
6468 SMLoc S = Parser.getTok().getLoc();
6469 StringRef Prefix;
6470 int64_t Int;
6472 if (getLexer().getKind() == AsmToken::Identifier) {
6473 Prefix = Parser.getTok().getString();
6474 } else {
6475 return MatchOperand_NoMatch;
6478 if (Prefix == "row_mirror") {
6479 Int = DppCtrl::ROW_MIRROR;
6480 Parser.Lex();
6481 } else if (Prefix == "row_half_mirror") {
6482 Int = DppCtrl::ROW_HALF_MIRROR;
6483 Parser.Lex();
6484 } else {
6485 // Check to prevent parseDPPCtrlOps from eating invalid tokens
6486 if (Prefix != "quad_perm"
6487 && Prefix != "row_shl"
6488 && Prefix != "row_shr"
6489 && Prefix != "row_ror"
6490 && Prefix != "wave_shl"
6491 && Prefix != "wave_rol"
6492 && Prefix != "wave_shr"
6493 && Prefix != "wave_ror"
6494 && Prefix != "row_bcast"
6495 && Prefix != "row_share"
6496 && Prefix != "row_xmask") {
6497 return MatchOperand_NoMatch;
6500 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6501 return MatchOperand_NoMatch;
6503 if (!isVI() && !isGFX9() &&
6504 (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6505 Prefix == "wave_rol" || Prefix == "wave_ror" ||
6506 Prefix == "row_bcast"))
6507 return MatchOperand_NoMatch;
6509 Parser.Lex();
6510 if (getLexer().isNot(AsmToken::Colon))
6511 return MatchOperand_ParseFail;
6513 if (Prefix == "quad_perm") {
6514 // quad_perm:[%d,%d,%d,%d]
6515 Parser.Lex();
6516 if (getLexer().isNot(AsmToken::LBrac))
6517 return MatchOperand_ParseFail;
6518 Parser.Lex();
6520 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6521 return MatchOperand_ParseFail;
6523 for (int i = 0; i < 3; ++i) {
6524 if (getLexer().isNot(AsmToken::Comma))
6525 return MatchOperand_ParseFail;
6526 Parser.Lex();
6528 int64_t Temp;
6529 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6530 return MatchOperand_ParseFail;
6531 const int shift = i*2 + 2;
6532 Int += (Temp << shift);
6535 if (getLexer().isNot(AsmToken::RBrac))
6536 return MatchOperand_ParseFail;
6537 Parser.Lex();
6538 } else {
6539 // sel:%d
6540 Parser.Lex();
6541 if (getParser().parseAbsoluteExpression(Int))
6542 return MatchOperand_ParseFail;
6544 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6545 Int |= DppCtrl::ROW_SHL0;
6546 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6547 Int |= DppCtrl::ROW_SHR0;
6548 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6549 Int |= DppCtrl::ROW_ROR0;
6550 } else if (Prefix == "wave_shl" && 1 == Int) {
6551 Int = DppCtrl::WAVE_SHL1;
6552 } else if (Prefix == "wave_rol" && 1 == Int) {
6553 Int = DppCtrl::WAVE_ROL1;
6554 } else if (Prefix == "wave_shr" && 1 == Int) {
6555 Int = DppCtrl::WAVE_SHR1;
6556 } else if (Prefix == "wave_ror" && 1 == Int) {
6557 Int = DppCtrl::WAVE_ROR1;
6558 } else if (Prefix == "row_bcast") {
6559 if (Int == 15) {
6560 Int = DppCtrl::BCAST15;
6561 } else if (Int == 31) {
6562 Int = DppCtrl::BCAST31;
6563 } else {
6564 return MatchOperand_ParseFail;
6566 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6567 Int |= DppCtrl::ROW_SHARE_FIRST;
6568 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6569 Int |= DppCtrl::ROW_XMASK_FIRST;
6570 } else {
6571 return MatchOperand_ParseFail;
6576 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6577 return MatchOperand_Success;
6580 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6581 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6584 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6585 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6588 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6589 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6592 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6593 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6596 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6597 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6600 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6601 OptionalImmIndexMap OptionalIdx;
6603 unsigned I = 1;
6604 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6605 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6606 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6609 int Fi = 0;
6610 for (unsigned E = Operands.size(); I != E; ++I) {
6611 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6612 MCOI::TIED_TO);
6613 if (TiedTo != -1) {
6614 assert((unsigned)TiedTo < Inst.getNumOperands());
6615 // handle tied old or src2 for MAC instructions
6616 Inst.addOperand(Inst.getOperand(TiedTo));
6618 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6619 // Add the register arguments
6620 if (Op.isReg() && validateVccOperand(Op.getReg())) {
6621 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6622 // Skip it.
6623 continue;
6626 if (IsDPP8) {
6627 if (Op.isDPP8()) {
6628 Op.addImmOperands(Inst, 1);
6629 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6630 Op.addRegWithFPInputModsOperands(Inst, 2);
6631 } else if (Op.isFI()) {
6632 Fi = Op.getImm();
6633 } else if (Op.isReg()) {
6634 Op.addRegOperands(Inst, 1);
6635 } else {
6636 llvm_unreachable("Invalid operand type");
6638 } else {
6639 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6640 Op.addRegWithFPInputModsOperands(Inst, 2);
6641 } else if (Op.isDPPCtrl()) {
6642 Op.addImmOperands(Inst, 1);
6643 } else if (Op.isImm()) {
6644 // Handle optional arguments
6645 OptionalIdx[Op.getImmTy()] = I;
6646 } else {
6647 llvm_unreachable("Invalid operand type");
6652 if (IsDPP8) {
6653 using namespace llvm::AMDGPU::DPP;
6654 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6655 } else {
6656 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6658 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6659 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6665 //===----------------------------------------------------------------------===//
6666 // sdwa
6667 //===----------------------------------------------------------------------===//
6669 OperandMatchResultTy
6670 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6671 AMDGPUOperand::ImmTy Type) {
6672 using namespace llvm::AMDGPU::SDWA;
6674 SMLoc S = Parser.getTok().getLoc();
6675 StringRef Value;
6676 OperandMatchResultTy res;
6678 res = parseStringWithPrefix(Prefix, Value);
6679 if (res != MatchOperand_Success) {
6680 return res;
6683 int64_t Int;
6684 Int = StringSwitch<int64_t>(Value)
6685 .Case("BYTE_0", SdwaSel::BYTE_0)
6686 .Case("BYTE_1", SdwaSel::BYTE_1)
6687 .Case("BYTE_2", SdwaSel::BYTE_2)
6688 .Case("BYTE_3", SdwaSel::BYTE_3)
6689 .Case("WORD_0", SdwaSel::WORD_0)
6690 .Case("WORD_1", SdwaSel::WORD_1)
6691 .Case("DWORD", SdwaSel::DWORD)
6692 .Default(0xffffffff);
6693 Parser.Lex(); // eat last token
6695 if (Int == 0xffffffff) {
6696 return MatchOperand_ParseFail;
6699 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6700 return MatchOperand_Success;
6703 OperandMatchResultTy
6704 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6705 using namespace llvm::AMDGPU::SDWA;
6707 SMLoc S = Parser.getTok().getLoc();
6708 StringRef Value;
6709 OperandMatchResultTy res;
6711 res = parseStringWithPrefix("dst_unused", Value);
6712 if (res != MatchOperand_Success) {
6713 return res;
6716 int64_t Int;
6717 Int = StringSwitch<int64_t>(Value)
6718 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6719 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6720 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6721 .Default(0xffffffff);
6722 Parser.Lex(); // eat last token
6724 if (Int == 0xffffffff) {
6725 return MatchOperand_ParseFail;
6728 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6729 return MatchOperand_Success;
6732 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6733 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6736 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6737 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6740 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6741 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6744 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6745 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6748 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6749 uint64_t BasicInstType, bool skipVcc) {
6750 using namespace llvm::AMDGPU::SDWA;
6752 OptionalImmIndexMap OptionalIdx;
6753 bool skippedVcc = false;
6755 unsigned I = 1;
6756 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6757 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6758 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6761 for (unsigned E = Operands.size(); I != E; ++I) {
6762 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6763 if (skipVcc && !skippedVcc && Op.isReg() &&
6764 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6765 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6766 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6767 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6768 // Skip VCC only if we didn't skip it on previous iteration.
6769 if (BasicInstType == SIInstrFlags::VOP2 &&
6770 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6771 skippedVcc = true;
6772 continue;
6773 } else if (BasicInstType == SIInstrFlags::VOPC &&
6774 Inst.getNumOperands() == 0) {
6775 skippedVcc = true;
6776 continue;
6779 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6780 Op.addRegOrImmWithInputModsOperands(Inst, 2);
6781 } else if (Op.isImm()) {
6782 // Handle optional arguments
6783 OptionalIdx[Op.getImmTy()] = I;
6784 } else {
6785 llvm_unreachable("Invalid operand type");
6787 skippedVcc = false;
6790 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6791 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6792 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6793 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6794 switch (BasicInstType) {
6795 case SIInstrFlags::VOP1:
6796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6797 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6800 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6801 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6802 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6803 break;
6805 case SIInstrFlags::VOP2:
6806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6807 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6808 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6811 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6814 break;
6816 case SIInstrFlags::VOPC:
6817 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6818 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6819 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6821 break;
6823 default:
6824 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6828 // special case v_mac_{f16, f32}:
6829 // it has src2 register operand that is tied to dst operand
6830 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6831 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
6832 auto it = Inst.begin();
6833 std::advance(
6834 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6835 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6839 //===----------------------------------------------------------------------===//
6840 // mAI
6841 //===----------------------------------------------------------------------===//
6843 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6844 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6847 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6848 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6851 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6852 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6855 /// Force static initialization.
6856 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6857 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6858 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6861 #define GET_REGISTER_MATCHER
6862 #define GET_MATCHER_IMPLEMENTATION
6863 #define GET_MNEMONIC_SPELL_CHECKER
6864 #include "AMDGPUGenAsmMatcher.inc"
6866 // This fuction should be defined after auto-generated include so that we have
6867 // MatchClassKind enum defined
6868 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6869 unsigned Kind) {
6870 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6871 // But MatchInstructionImpl() expects to meet token and fails to validate
6872 // operand. This method checks if we are given immediate operand but expect to
6873 // get corresponding token.
6874 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6875 switch (Kind) {
6876 case MCK_addr64:
6877 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6878 case MCK_gds:
6879 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6880 case MCK_lds:
6881 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6882 case MCK_glc:
6883 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6884 case MCK_idxen:
6885 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6886 case MCK_offen:
6887 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6888 case MCK_SSrcB32:
6889 // When operands have expression values, they will return true for isToken,
6890 // because it is not possible to distinguish between a token and an
6891 // expression at parse time. MatchInstructionImpl() will always try to
6892 // match an operand as a token, when isToken returns true, and when the
6893 // name of the expression is not a valid token, the match will fail,
6894 // so we need to handle it here.
6895 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6896 case MCK_SSrcF32:
6897 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6898 case MCK_SoppBrTarget:
6899 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6900 case MCK_VReg32OrOff:
6901 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6902 case MCK_InterpSlot:
6903 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6904 case MCK_Attr:
6905 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6906 case MCK_AttrChan:
6907 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6908 default:
6909 return Match_InvalidOperand;
6913 //===----------------------------------------------------------------------===//
6914 // endpgm
6915 //===----------------------------------------------------------------------===//
6917 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6918 SMLoc S = Parser.getTok().getLoc();
6919 int64_t Imm = 0;
6921 if (!parseExpr(Imm)) {
6922 // The operand is optional, if not present default to 0
6923 Imm = 0;
6926 if (!isUInt<16>(Imm)) {
6927 Error(S, "expected a 16-bit value");
6928 return MatchOperand_ParseFail;
6931 Operands.push_back(
6932 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6933 return MatchOperand_Success;
6936 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }