[ARM] Better patterns for fp <> predicate vectors
[llvm-complete.git] / lib / Target / AMDGPU / AsmParser / AMDGPUAsmParser.cpp
blob43d71425a3d2699b946014c34c75171f1fefd3ae
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
68 namespace {
70 class AMDGPUAsmParser;
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
78 class AMDGPUOperand : public MCParsedAsmOperand {
79 enum KindTy {
80 Token,
81 Immediate,
82 Register,
83 Expression
84 } Kind;
86 SMLoc StartLoc, EndLoc;
87 const AMDGPUAsmParser *AsmParser;
89 public:
90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
93 using Ptr = std::unique_ptr<AMDGPUOperand>;
95 struct Modifiers {
96 bool Abs = false;
97 bool Neg = false;
98 bool Sext = false;
100 bool hasFPModifiers() const { return Abs || Neg; }
101 bool hasIntModifiers() const { return Sext; }
102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
104 int64_t getFPModifiersOperand() const {
105 int64_t Operand = 0;
106 Operand |= Abs ? SISrcMods::ABS : 0u;
107 Operand |= Neg ? SISrcMods::NEG : 0u;
108 return Operand;
111 int64_t getIntModifiersOperand() const {
112 int64_t Operand = 0;
113 Operand |= Sext ? SISrcMods::SEXT : 0u;
114 return Operand;
117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
124 } else {
125 return 0;
129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
132 enum ImmTy {
133 ImmTyNone,
134 ImmTyGDS,
135 ImmTyLDS,
136 ImmTyOffen,
137 ImmTyIdxen,
138 ImmTyAddr64,
139 ImmTyOffset,
140 ImmTyInstOffset,
141 ImmTyOffset0,
142 ImmTyOffset1,
143 ImmTyDLC,
144 ImmTyGLC,
145 ImmTySLC,
146 ImmTyTFE,
147 ImmTyD16,
148 ImmTyClampSI,
149 ImmTyOModSI,
150 ImmTyDPP8,
151 ImmTyDppCtrl,
152 ImmTyDppRowMask,
153 ImmTyDppBankMask,
154 ImmTyDppBoundCtrl,
155 ImmTyDppFi,
156 ImmTySdwaDstSel,
157 ImmTySdwaSrc0Sel,
158 ImmTySdwaSrc1Sel,
159 ImmTySdwaDstUnused,
160 ImmTyDMask,
161 ImmTyDim,
162 ImmTyUNorm,
163 ImmTyDA,
164 ImmTyR128A16,
165 ImmTyLWE,
166 ImmTyExpTgt,
167 ImmTyExpCompr,
168 ImmTyExpVM,
169 ImmTyFORMAT,
170 ImmTyHwreg,
171 ImmTyOff,
172 ImmTySendMsg,
173 ImmTyInterpSlot,
174 ImmTyInterpAttr,
175 ImmTyAttrChan,
176 ImmTyOpSel,
177 ImmTyOpSelHi,
178 ImmTyNegLo,
179 ImmTyNegHi,
180 ImmTySwizzle,
181 ImmTyGprIdxMode,
182 ImmTyHigh,
183 ImmTyBLGP,
184 ImmTyCBSZ,
185 ImmTyABID,
186 ImmTyEndpgm,
189 private:
190 struct TokOp {
191 const char *Data;
192 unsigned Length;
195 struct ImmOp {
196 int64_t Val;
197 ImmTy Type;
198 bool IsFPImm;
199 Modifiers Mods;
202 struct RegOp {
203 unsigned RegNo;
204 Modifiers Mods;
207 union {
208 TokOp Tok;
209 ImmOp Imm;
210 RegOp Reg;
211 const MCExpr *Expr;
214 public:
215 bool isToken() const override {
216 if (Kind == Token)
217 return true;
219 // When parsing operands, we can't always tell if something was meant to be
220 // a token, like 'gds', or an expression that references a global variable.
221 // In this case, we assume the string is an expression, and if we need to
222 // interpret is a token, then we treat the symbol name as the token.
223 return isSymbolRefExpr();
226 bool isSymbolRefExpr() const {
227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 bool isImm() const override {
231 return Kind == Immediate;
234 bool isInlinableImm(MVT type) const;
235 bool isLiteralImm(MVT type) const;
237 bool isRegKind() const {
238 return Kind == Register;
241 bool isReg() const override {
242 return isRegKind() && !hasModifiers();
245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
249 bool isRegOrImmWithInt16InputMods() const {
250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
253 bool isRegOrImmWithInt32InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
257 bool isRegOrImmWithInt64InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
261 bool isRegOrImmWithFP16InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
265 bool isRegOrImmWithFP32InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
269 bool isRegOrImmWithFP64InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
273 bool isVReg() const {
274 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
275 isRegClass(AMDGPU::VReg_64RegClassID) ||
276 isRegClass(AMDGPU::VReg_96RegClassID) ||
277 isRegClass(AMDGPU::VReg_128RegClassID) ||
278 isRegClass(AMDGPU::VReg_160RegClassID) ||
279 isRegClass(AMDGPU::VReg_256RegClassID) ||
280 isRegClass(AMDGPU::VReg_512RegClassID) ||
281 isRegClass(AMDGPU::VReg_1024RegClassID);
284 bool isVReg32() const {
285 return isRegClass(AMDGPU::VGPR_32RegClassID);
288 bool isVReg32OrOff() const {
289 return isOff() || isVReg32();
292 bool isSDWAOperand(MVT type) const;
293 bool isSDWAFP16Operand() const;
294 bool isSDWAFP32Operand() const;
295 bool isSDWAInt16Operand() const;
296 bool isSDWAInt32Operand() const;
298 bool isImmTy(ImmTy ImmT) const {
299 return isImm() && Imm.Type == ImmT;
302 bool isImmModifier() const {
303 return isImm() && Imm.Type != ImmTyNone;
306 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
307 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
308 bool isDMask() const { return isImmTy(ImmTyDMask); }
309 bool isDim() const { return isImmTy(ImmTyDim); }
310 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
311 bool isDA() const { return isImmTy(ImmTyDA); }
312 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
313 bool isLWE() const { return isImmTy(ImmTyLWE); }
314 bool isOff() const { return isImmTy(ImmTyOff); }
315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
316 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
318 bool isOffen() const { return isImmTy(ImmTyOffen); }
319 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
320 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
326 bool isGDS() const { return isImmTy(ImmTyGDS); }
327 bool isLDS() const { return isImmTy(ImmTyLDS); }
328 bool isDLC() const { return isImmTy(ImmTyDLC); }
329 bool isGLC() const { return isImmTy(ImmTyGLC); }
330 bool isSLC() const { return isImmTy(ImmTySLC); }
331 bool isTFE() const { return isImmTy(ImmTyTFE); }
332 bool isD16() const { return isImmTy(ImmTyD16); }
333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
337 bool isFI() const { return isImmTy(ImmTyDppFi); }
338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
345 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
347 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
348 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
349 bool isHigh() const { return isImmTy(ImmTyHigh); }
351 bool isMod() const {
352 return isClampSI() || isOModSI();
355 bool isRegOrImm() const {
356 return isReg() || isImm();
359 bool isRegClass(unsigned RCID) const;
361 bool isInlineValue() const;
363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
367 bool isSCSrcB16() const {
368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
371 bool isSCSrcV2B16() const {
372 return isSCSrcB16();
375 bool isSCSrcB32() const {
376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
379 bool isSCSrcB64() const {
380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
383 bool isBoolReg() const;
385 bool isSCSrcF16() const {
386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
389 bool isSCSrcV2F16() const {
390 return isSCSrcF16();
393 bool isSCSrcF32() const {
394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
397 bool isSCSrcF64() const {
398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
401 bool isSSrcB32() const {
402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
405 bool isSSrcB16() const {
406 return isSCSrcB16() || isLiteralImm(MVT::i16);
409 bool isSSrcV2B16() const {
410 llvm_unreachable("cannot happen");
411 return isSSrcB16();
414 bool isSSrcB64() const {
415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
416 // See isVSrc64().
417 return isSCSrcB64() || isLiteralImm(MVT::i64);
420 bool isSSrcF32() const {
421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
424 bool isSSrcF64() const {
425 return isSCSrcB64() || isLiteralImm(MVT::f64);
428 bool isSSrcF16() const {
429 return isSCSrcB16() || isLiteralImm(MVT::f16);
432 bool isSSrcV2F16() const {
433 llvm_unreachable("cannot happen");
434 return isSSrcF16();
437 bool isSSrcOrLdsB32() const {
438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
439 isLiteralImm(MVT::i32) || isExpr();
442 bool isVCSrcB32() const {
443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
446 bool isVCSrcB64() const {
447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
450 bool isVCSrcB16() const {
451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
454 bool isVCSrcV2B16() const {
455 return isVCSrcB16();
458 bool isVCSrcF32() const {
459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
462 bool isVCSrcF64() const {
463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
466 bool isVCSrcF16() const {
467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
470 bool isVCSrcV2F16() const {
471 return isVCSrcF16();
474 bool isVSrcB32() const {
475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
478 bool isVSrcB64() const {
479 return isVCSrcF64() || isLiteralImm(MVT::i64);
482 bool isVSrcB16() const {
483 return isVCSrcF16() || isLiteralImm(MVT::i16);
486 bool isVSrcV2B16() const {
487 return isVSrcB16() || isLiteralImm(MVT::v2i16);
490 bool isVSrcF32() const {
491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
494 bool isVSrcF64() const {
495 return isVCSrcF64() || isLiteralImm(MVT::f64);
498 bool isVSrcF16() const {
499 return isVCSrcF16() || isLiteralImm(MVT::f16);
502 bool isVSrcV2F16() const {
503 return isVSrcF16() || isLiteralImm(MVT::v2f16);
506 bool isVISrcB32() const {
507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
510 bool isVISrcB16() const {
511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
514 bool isVISrcV2B16() const {
515 return isVISrcB16();
518 bool isVISrcF32() const {
519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
522 bool isVISrcF16() const {
523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
526 bool isVISrcV2F16() const {
527 return isVISrcF16() || isVISrcB32();
530 bool isAISrcB32() const {
531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
534 bool isAISrcB16() const {
535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
538 bool isAISrcV2B16() const {
539 return isAISrcB16();
542 bool isAISrcF32() const {
543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
546 bool isAISrcF16() const {
547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
550 bool isAISrcV2F16() const {
551 return isAISrcF16() || isAISrcB32();
554 bool isAISrc_128B32() const {
555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
558 bool isAISrc_128B16() const {
559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
562 bool isAISrc_128V2B16() const {
563 return isAISrc_128B16();
566 bool isAISrc_128F32() const {
567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
570 bool isAISrc_128F16() const {
571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
574 bool isAISrc_128V2F16() const {
575 return isAISrc_128F16() || isAISrc_128B32();
578 bool isAISrc_512B32() const {
579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
582 bool isAISrc_512B16() const {
583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
586 bool isAISrc_512V2B16() const {
587 return isAISrc_512B16();
590 bool isAISrc_512F32() const {
591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
594 bool isAISrc_512F16() const {
595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
598 bool isAISrc_512V2F16() const {
599 return isAISrc_512F16() || isAISrc_512B32();
602 bool isAISrc_1024B32() const {
603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
606 bool isAISrc_1024B16() const {
607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
610 bool isAISrc_1024V2B16() const {
611 return isAISrc_1024B16();
614 bool isAISrc_1024F32() const {
615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
618 bool isAISrc_1024F16() const {
619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
622 bool isAISrc_1024V2F16() const {
623 return isAISrc_1024F16() || isAISrc_1024B32();
626 bool isKImmFP32() const {
627 return isLiteralImm(MVT::f32);
630 bool isKImmFP16() const {
631 return isLiteralImm(MVT::f16);
634 bool isMem() const override {
635 return false;
638 bool isExpr() const {
639 return Kind == Expression;
642 bool isSoppBrTarget() const {
643 return isExpr() || isImm();
646 bool isSWaitCnt() const;
647 bool isHwreg() const;
648 bool isSendMsg() const;
649 bool isSwizzle() const;
650 bool isSMRDOffset8() const;
651 bool isSMRDOffset20() const;
652 bool isSMRDLiteralOffset() const;
653 bool isDPP8() const;
654 bool isDPPCtrl() const;
655 bool isBLGP() const;
656 bool isCBSZ() const;
657 bool isABID() const;
658 bool isGPRIdxMode() const;
659 bool isS16Imm() const;
660 bool isU16Imm() const;
661 bool isEndpgm() const;
663 StringRef getExpressionAsToken() const {
664 assert(isExpr());
665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
666 return S->getSymbol().getName();
669 StringRef getToken() const {
670 assert(isToken());
672 if (Kind == Expression)
673 return getExpressionAsToken();
675 return StringRef(Tok.Data, Tok.Length);
678 int64_t getImm() const {
679 assert(isImm());
680 return Imm.Val;
683 ImmTy getImmTy() const {
684 assert(isImm());
685 return Imm.Type;
688 unsigned getReg() const override {
689 assert(isRegKind());
690 return Reg.RegNo;
693 SMLoc getStartLoc() const override {
694 return StartLoc;
697 SMLoc getEndLoc() const override {
698 return EndLoc;
701 SMRange getLocRange() const {
702 return SMRange(StartLoc, EndLoc);
705 Modifiers getModifiers() const {
706 assert(isRegKind() || isImmTy(ImmTyNone));
707 return isRegKind() ? Reg.Mods : Imm.Mods;
710 void setModifiers(Modifiers Mods) {
711 assert(isRegKind() || isImmTy(ImmTyNone));
712 if (isRegKind())
713 Reg.Mods = Mods;
714 else
715 Imm.Mods = Mods;
718 bool hasModifiers() const {
719 return getModifiers().hasModifiers();
722 bool hasFPModifiers() const {
723 return getModifiers().hasFPModifiers();
726 bool hasIntModifiers() const {
727 return getModifiers().hasIntModifiers();
730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
736 template <unsigned Bitwidth>
737 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
740 addKImmFPOperands<16>(Inst, N);
743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
744 addKImmFPOperands<32>(Inst, N);
747 void addRegOperands(MCInst &Inst, unsigned N) const;
749 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
750 addRegOperands(Inst, N);
753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
754 if (isRegKind())
755 addRegOperands(Inst, N);
756 else if (isExpr())
757 Inst.addOperand(MCOperand::createExpr(Expr));
758 else
759 addImmOperands(Inst, N);
762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
763 Modifiers Mods = getModifiers();
764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
765 if (isRegKind()) {
766 addRegOperands(Inst, N);
767 } else {
768 addImmOperands(Inst, N, false);
772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
773 assert(!hasIntModifiers());
774 addRegOrImmWithInputModsOperands(Inst, N);
777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
778 assert(!hasFPModifiers());
779 addRegOrImmWithInputModsOperands(Inst, N);
782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
783 Modifiers Mods = getModifiers();
784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785 assert(isRegKind());
786 addRegOperands(Inst, N);
789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
790 assert(!hasIntModifiers());
791 addRegWithInputModsOperands(Inst, N);
794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
795 assert(!hasFPModifiers());
796 addRegWithInputModsOperands(Inst, N);
799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
800 if (isImm())
801 addImmOperands(Inst, N);
802 else {
803 assert(isExpr());
804 Inst.addOperand(MCOperand::createExpr(Expr));
808 static void printImmTy(raw_ostream& OS, ImmTy Type) {
809 switch (Type) {
810 case ImmTyNone: OS << "None"; break;
811 case ImmTyGDS: OS << "GDS"; break;
812 case ImmTyLDS: OS << "LDS"; break;
813 case ImmTyOffen: OS << "Offen"; break;
814 case ImmTyIdxen: OS << "Idxen"; break;
815 case ImmTyAddr64: OS << "Addr64"; break;
816 case ImmTyOffset: OS << "Offset"; break;
817 case ImmTyInstOffset: OS << "InstOffset"; break;
818 case ImmTyOffset0: OS << "Offset0"; break;
819 case ImmTyOffset1: OS << "Offset1"; break;
820 case ImmTyDLC: OS << "DLC"; break;
821 case ImmTyGLC: OS << "GLC"; break;
822 case ImmTySLC: OS << "SLC"; break;
823 case ImmTyTFE: OS << "TFE"; break;
824 case ImmTyD16: OS << "D16"; break;
825 case ImmTyFORMAT: OS << "FORMAT"; break;
826 case ImmTyClampSI: OS << "ClampSI"; break;
827 case ImmTyOModSI: OS << "OModSI"; break;
828 case ImmTyDPP8: OS << "DPP8"; break;
829 case ImmTyDppCtrl: OS << "DppCtrl"; break;
830 case ImmTyDppRowMask: OS << "DppRowMask"; break;
831 case ImmTyDppBankMask: OS << "DppBankMask"; break;
832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
833 case ImmTyDppFi: OS << "FI"; break;
834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
838 case ImmTyDMask: OS << "DMask"; break;
839 case ImmTyDim: OS << "Dim"; break;
840 case ImmTyUNorm: OS << "UNorm"; break;
841 case ImmTyDA: OS << "DA"; break;
842 case ImmTyR128A16: OS << "R128A16"; break;
843 case ImmTyLWE: OS << "LWE"; break;
844 case ImmTyOff: OS << "Off"; break;
845 case ImmTyExpTgt: OS << "ExpTgt"; break;
846 case ImmTyExpCompr: OS << "ExpCompr"; break;
847 case ImmTyExpVM: OS << "ExpVM"; break;
848 case ImmTyHwreg: OS << "Hwreg"; break;
849 case ImmTySendMsg: OS << "SendMsg"; break;
850 case ImmTyInterpSlot: OS << "InterpSlot"; break;
851 case ImmTyInterpAttr: OS << "InterpAttr"; break;
852 case ImmTyAttrChan: OS << "AttrChan"; break;
853 case ImmTyOpSel: OS << "OpSel"; break;
854 case ImmTyOpSelHi: OS << "OpSelHi"; break;
855 case ImmTyNegLo: OS << "NegLo"; break;
856 case ImmTyNegHi: OS << "NegHi"; break;
857 case ImmTySwizzle: OS << "Swizzle"; break;
858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
859 case ImmTyHigh: OS << "High"; break;
860 case ImmTyBLGP: OS << "BLGP"; break;
861 case ImmTyCBSZ: OS << "CBSZ"; break;
862 case ImmTyABID: OS << "ABID"; break;
863 case ImmTyEndpgm: OS << "Endpgm"; break;
867 void print(raw_ostream &OS) const override {
868 switch (Kind) {
869 case Register:
870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
871 break;
872 case Immediate:
873 OS << '<' << getImm();
874 if (getImmTy() != ImmTyNone) {
875 OS << " type: "; printImmTy(OS, getImmTy());
877 OS << " mods: " << Imm.Mods << '>';
878 break;
879 case Token:
880 OS << '\'' << getToken() << '\'';
881 break;
882 case Expression:
883 OS << "<expr " << *Expr << '>';
884 break;
888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
889 int64_t Val, SMLoc Loc,
890 ImmTy Type = ImmTyNone,
891 bool IsFPImm = false) {
892 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
893 Op->Imm.Val = Val;
894 Op->Imm.IsFPImm = IsFPImm;
895 Op->Imm.Type = Type;
896 Op->Imm.Mods = Modifiers();
897 Op->StartLoc = Loc;
898 Op->EndLoc = Loc;
899 return Op;
902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
903 StringRef Str, SMLoc Loc,
904 bool HasExplicitEncodingSize = true) {
905 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
906 Res->Tok.Data = Str.data();
907 Res->Tok.Length = Str.size();
908 Res->StartLoc = Loc;
909 Res->EndLoc = Loc;
910 return Res;
913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
914 unsigned RegNo, SMLoc S,
915 SMLoc E) {
916 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
917 Op->Reg.RegNo = RegNo;
918 Op->Reg.Mods = Modifiers();
919 Op->StartLoc = S;
920 Op->EndLoc = E;
921 return Op;
924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
925 const class MCExpr *Expr, SMLoc S) {
926 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
927 Op->Expr = Expr;
928 Op->StartLoc = S;
929 Op->EndLoc = S;
930 return Op;
934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
936 return OS;
939 //===----------------------------------------------------------------------===//
940 // AsmParser
941 //===----------------------------------------------------------------------===//
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo {
947 int SgprIndexUnusedMin = -1;
948 int VgprIndexUnusedMin = -1;
949 MCContext *Ctx = nullptr;
951 void usesSgprAt(int i) {
952 if (i >= SgprIndexUnusedMin) {
953 SgprIndexUnusedMin = ++i;
954 if (Ctx) {
955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
961 void usesVgprAt(int i) {
962 if (i >= VgprIndexUnusedMin) {
963 VgprIndexUnusedMin = ++i;
964 if (Ctx) {
965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
971 public:
972 KernelScopeInfo() = default;
974 void initialize(MCContext &Context) {
975 Ctx = &Context;
976 usesSgprAt(SgprIndexUnusedMin = -1);
977 usesVgprAt(VgprIndexUnusedMin = -1);
980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
981 switch (RegKind) {
982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
983 case IS_AGPR: // fall through
984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
985 default: break;
990 class AMDGPUAsmParser : public MCTargetAsmParser {
991 MCAsmParser &Parser;
993 // Number of extra operands parsed after the first optional operand.
994 // This may be necessary to skip hardcoded mandatory operands.
995 static const unsigned MAX_OPR_LOOKAHEAD = 8;
997 unsigned ForcedEncodingSize = 0;
998 bool ForcedDPP = false;
999 bool ForcedSDWA = false;
1000 KernelScopeInfo KernelScope;
1002 /// @name Auto-generated Match Functions
1003 /// {
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1008 /// }
1010 private:
1011 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012 bool OutOfRangeError(SMRange Range);
1013 /// Calculate VGPR/SGPR blocks required for given target, reserved
1014 /// registers, and user-specified NextFreeXGPR values.
1016 /// \param Features [in] Target features, used for bug corrections.
1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021 /// descriptor field, if valid.
1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026 /// \param VGPRBlocks [out] Result VGPR block count.
1027 /// \param SGPRBlocks [out] Result SGPR block count.
1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029 bool FlatScrUsed, bool XNACKUsed,
1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031 SMRange VGPRRange, unsigned NextFreeSGPR,
1032 SMRange SGPRRange, unsigned &VGPRBlocks,
1033 unsigned &SGPRBlocks);
1034 bool ParseDirectiveAMDGCNTarget();
1035 bool ParseDirectiveAMDHSAKernel();
1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037 bool ParseDirectiveHSACodeObjectVersion();
1038 bool ParseDirectiveHSACodeObjectISA();
1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040 bool ParseDirectiveAMDKernelCodeT();
1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042 bool ParseDirectiveAMDGPUHsaKernel();
1044 bool ParseDirectiveISAVersion();
1045 bool ParseDirectiveHSAMetadata();
1046 bool ParseDirectivePALMetadataBegin();
1047 bool ParseDirectivePALMetadata();
1048 bool ParseDirectiveAMDGPULDS();
1050 /// Common code to parse out a block of text (typically YAML) between start and
1051 /// end directives.
1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053 const char *AssemblerDirectiveEnd,
1054 std::string &CollectString);
1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057 RegisterKind RegKind, unsigned Reg1,
1058 unsigned RegNum);
1059 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060 unsigned& RegNum, unsigned& RegWidth,
1061 unsigned *DwordRegIndex);
1062 bool isRegister();
1063 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065 void initializeGprCountSymbol(RegisterKind RegKind);
1066 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067 unsigned RegWidth);
1068 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071 bool IsGdsHardcoded);
1073 public:
1074 enum AMDGPUMatchResultTy {
1075 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1077 enum OperandMode {
1078 OperandMode_Default,
1079 OperandMode_NSA,
1082 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1084 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085 const MCInstrInfo &MII,
1086 const MCTargetOptions &Options)
1087 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088 MCAsmParserExtension::Initialize(Parser);
1090 if (getFeatureBits().none()) {
1091 // Set default features.
1092 copySTI().ToggleFeature("southern-islands");
1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1098 // TODO: make those pre-defined variables read-only.
1099 // Currently there is none suitable machinery in the core llvm-mc for this.
1100 // MCSymbol::isRedefinable is intended for another purpose, and
1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103 MCContext &Ctx = getContext();
1104 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105 MCSymbol *Sym =
1106 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112 } else {
1113 MCSymbol *Sym =
1114 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1121 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122 initializeGprCountSymbol(IS_VGPR);
1123 initializeGprCountSymbol(IS_SGPR);
1124 } else
1125 KernelScope.initialize(getContext());
1129 bool hasXNACK() const {
1130 return AMDGPU::hasXNACK(getSTI());
1133 bool hasMIMG_R128() const {
1134 return AMDGPU::hasMIMG_R128(getSTI());
1137 bool hasPackedD16() const {
1138 return AMDGPU::hasPackedD16(getSTI());
1141 bool isSI() const {
1142 return AMDGPU::isSI(getSTI());
1145 bool isCI() const {
1146 return AMDGPU::isCI(getSTI());
1149 bool isVI() const {
1150 return AMDGPU::isVI(getSTI());
1153 bool isGFX9() const {
1154 return AMDGPU::isGFX9(getSTI());
1157 bool isGFX10() const {
1158 return AMDGPU::isGFX10(getSTI());
1161 bool hasInv2PiInlineImm() const {
1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1165 bool hasFlatOffsets() const {
1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1169 bool hasSGPR102_SGPR103() const {
1170 return !isVI() && !isGFX9();
1173 bool hasSGPR104_SGPR105() const {
1174 return isGFX10();
1177 bool hasIntClamp() const {
1178 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1181 AMDGPUTargetStreamer &getTargetStreamer() {
1182 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183 return static_cast<AMDGPUTargetStreamer &>(TS);
1186 const MCRegisterInfo *getMRI() const {
1187 // We need this const_cast because for some reason getContext() is not const
1188 // in MCAsmParser.
1189 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1192 const MCInstrInfo *getMII() const {
1193 return &MII;
1196 const FeatureBitset &getFeatureBits() const {
1197 return getSTI().getFeatureBits();
1200 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206 bool isForcedDPP() const { return ForcedDPP; }
1207 bool isForcedSDWA() const { return ForcedSDWA; }
1208 ArrayRef<unsigned> getMatchedVariants() const;
1210 std::unique_ptr<AMDGPUOperand> parseRegister();
1211 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214 unsigned Kind) override;
1215 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216 OperandVector &Operands, MCStreamer &Out,
1217 uint64_t &ErrorInfo,
1218 bool MatchingInlineAsm) override;
1219 bool ParseDirective(AsmToken DirectiveID) override;
1220 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221 OperandMode Mode = OperandMode_Default);
1222 StringRef parseMnemonicSuffix(StringRef Name);
1223 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224 SMLoc NameLoc, OperandVector &Operands) override;
1225 //bool ProcessInstruction(MCInst &Inst);
1227 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1229 OperandMatchResultTy
1230 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232 bool (*ConvertResult)(int64_t &) = nullptr);
1234 OperandMatchResultTy
1235 parseOperandArrayWithPrefix(const char *Prefix,
1236 OperandVector &Operands,
1237 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238 bool (*ConvertResult)(int64_t&) = nullptr);
1240 OperandMatchResultTy
1241 parseNamedBit(const char *Name, OperandVector &Operands,
1242 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244 StringRef &Value);
1246 bool isModifier();
1247 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251 bool parseSP3NegModifier();
1252 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253 OperandMatchResultTy parseReg(OperandVector &Operands);
1254 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1262 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1267 bool parseCnt(int64_t &IntVal);
1268 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1271 private:
1272 struct OperandInfoTy {
1273 int64_t Id;
1274 bool IsSymbolic = false;
1275 bool IsDefined = false;
1277 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1280 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281 bool validateSendMsg(const OperandInfoTy &Msg,
1282 const OperandInfoTy &Op,
1283 const OperandInfoTy &Stream,
1284 const SMLoc Loc);
1286 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287 bool validateHwreg(const OperandInfoTy &HwReg,
1288 const int64_t Offset,
1289 const int64_t Width,
1290 const SMLoc Loc);
1292 void errorExpTgt();
1293 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1296 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298 bool validateSOPLiteral(const MCInst &Inst) const;
1299 bool validateConstantBusLimitations(const MCInst &Inst);
1300 bool validateEarlyClobberLimitations(const MCInst &Inst);
1301 bool validateIntClampSupported(const MCInst &Inst);
1302 bool validateMIMGAtomicDMask(const MCInst &Inst);
1303 bool validateMIMGGatherDMask(const MCInst &Inst);
1304 bool validateMIMGDataSize(const MCInst &Inst);
1305 bool validateMIMGAddrSize(const MCInst &Inst);
1306 bool validateMIMGD16(const MCInst &Inst);
1307 bool validateMIMGDim(const MCInst &Inst);
1308 bool validateLdsDirect(const MCInst &Inst);
1309 bool validateOpSel(const MCInst &Inst);
1310 bool validateVccOperand(unsigned Reg) const;
1311 bool validateVOP3Literal(const MCInst &Inst) const;
1312 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1313 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1314 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1316 bool isId(const StringRef Id) const;
1317 bool isId(const AsmToken &Token, const StringRef Id) const;
1318 bool isToken(const AsmToken::TokenKind Kind) const;
1319 bool trySkipId(const StringRef Id);
1320 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1321 bool trySkipToken(const AsmToken::TokenKind Kind);
1322 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1323 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1324 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1325 AsmToken::TokenKind getTokenKind() const;
1326 bool parseExpr(int64_t &Imm);
1327 bool parseExpr(OperandVector &Operands);
1328 StringRef getTokenStr() const;
1329 AsmToken peekToken();
1330 AsmToken getToken() const;
1331 SMLoc getLoc() const;
1332 void lex();
1334 public:
1335 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1336 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1338 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1339 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1340 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1341 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1342 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1343 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1345 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1346 const unsigned MinVal,
1347 const unsigned MaxVal,
1348 const StringRef ErrMsg);
1349 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1350 bool parseSwizzleOffset(int64_t &Imm);
1351 bool parseSwizzleMacro(int64_t &Imm);
1352 bool parseSwizzleQuadPerm(int64_t &Imm);
1353 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1354 bool parseSwizzleBroadcast(int64_t &Imm);
1355 bool parseSwizzleSwap(int64_t &Imm);
1356 bool parseSwizzleReverse(int64_t &Imm);
1358 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1359 int64_t parseGPRIdxMacro();
1361 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1362 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1363 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1364 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1365 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1367 AMDGPUOperand::Ptr defaultDLC() const;
1368 AMDGPUOperand::Ptr defaultGLC() const;
1369 AMDGPUOperand::Ptr defaultSLC() const;
1371 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1372 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1373 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1374 AMDGPUOperand::Ptr defaultFlatOffset() const;
1376 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1378 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1379 OptionalImmIndexMap &OptionalIdx);
1380 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1381 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1382 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1384 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1386 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1387 bool IsAtomic = false);
1388 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1390 OperandMatchResultTy parseDim(OperandVector &Operands);
1391 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1392 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1393 AMDGPUOperand::Ptr defaultRowMask() const;
1394 AMDGPUOperand::Ptr defaultBankMask() const;
1395 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1396 AMDGPUOperand::Ptr defaultFI() const;
1397 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1398 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1400 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1401 AMDGPUOperand::ImmTy Type);
1402 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1403 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1404 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1405 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1406 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1407 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1408 uint64_t BasicInstType, bool skipVcc = false);
1410 AMDGPUOperand::Ptr defaultBLGP() const;
1411 AMDGPUOperand::Ptr defaultCBSZ() const;
1412 AMDGPUOperand::Ptr defaultABID() const;
1414 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1415 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1418 struct OptionalOperand {
1419 const char *Name;
1420 AMDGPUOperand::ImmTy Type;
1421 bool IsBit;
1422 bool (*ConvertResult)(int64_t&);
1425 } // end anonymous namespace
1427 // May be called with integer type with equivalent bitwidth.
1428 static const fltSemantics *getFltSemantics(unsigned Size) {
1429 switch (Size) {
1430 case 4:
1431 return &APFloat::IEEEsingle();
1432 case 8:
1433 return &APFloat::IEEEdouble();
1434 case 2:
1435 return &APFloat::IEEEhalf();
1436 default:
1437 llvm_unreachable("unsupported fp type");
1441 static const fltSemantics *getFltSemantics(MVT VT) {
1442 return getFltSemantics(VT.getSizeInBits() / 8);
1445 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1446 switch (OperandType) {
1447 case AMDGPU::OPERAND_REG_IMM_INT32:
1448 case AMDGPU::OPERAND_REG_IMM_FP32:
1449 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1450 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1451 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1452 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1453 return &APFloat::IEEEsingle();
1454 case AMDGPU::OPERAND_REG_IMM_INT64:
1455 case AMDGPU::OPERAND_REG_IMM_FP64:
1456 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1457 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1458 return &APFloat::IEEEdouble();
1459 case AMDGPU::OPERAND_REG_IMM_INT16:
1460 case AMDGPU::OPERAND_REG_IMM_FP16:
1461 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1462 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1463 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1464 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1465 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1466 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1467 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1469 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1470 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1471 return &APFloat::IEEEhalf();
1472 default:
1473 llvm_unreachable("unsupported fp type");
1477 //===----------------------------------------------------------------------===//
1478 // Operand
1479 //===----------------------------------------------------------------------===//
1481 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1482 bool Lost;
1484 // Convert literal to single precision
1485 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1486 APFloat::rmNearestTiesToEven,
1487 &Lost);
1488 // We allow precision lost but not overflow or underflow
1489 if (Status != APFloat::opOK &&
1490 Lost &&
1491 ((Status & APFloat::opOverflow) != 0 ||
1492 (Status & APFloat::opUnderflow) != 0)) {
1493 return false;
1496 return true;
1499 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1500 return isUIntN(Size, Val) || isIntN(Size, Val);
1503 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1505 // This is a hack to enable named inline values like
1506 // shared_base with both 32-bit and 64-bit operands.
1507 // Note that these values are defined as
1508 // 32-bit operands only.
1509 if (isInlineValue()) {
1510 return true;
1513 if (!isImmTy(ImmTyNone)) {
1514 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1515 return false;
1517 // TODO: We should avoid using host float here. It would be better to
1518 // check the float bit values which is what a few other places do.
1519 // We've had bot failures before due to weird NaN support on mips hosts.
1521 APInt Literal(64, Imm.Val);
1523 if (Imm.IsFPImm) { // We got fp literal token
1524 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1525 return AMDGPU::isInlinableLiteral64(Imm.Val,
1526 AsmParser->hasInv2PiInlineImm());
1529 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1530 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1531 return false;
1533 if (type.getScalarSizeInBits() == 16) {
1534 return AMDGPU::isInlinableLiteral16(
1535 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1536 AsmParser->hasInv2PiInlineImm());
1539 // Check if single precision literal is inlinable
1540 return AMDGPU::isInlinableLiteral32(
1541 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1542 AsmParser->hasInv2PiInlineImm());
1545 // We got int literal token.
1546 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1547 return AMDGPU::isInlinableLiteral64(Imm.Val,
1548 AsmParser->hasInv2PiInlineImm());
1551 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1552 return false;
1555 if (type.getScalarSizeInBits() == 16) {
1556 return AMDGPU::isInlinableLiteral16(
1557 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1558 AsmParser->hasInv2PiInlineImm());
1561 return AMDGPU::isInlinableLiteral32(
1562 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1563 AsmParser->hasInv2PiInlineImm());
1566 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1567 // Check that this immediate can be added as literal
1568 if (!isImmTy(ImmTyNone)) {
1569 return false;
1572 if (!Imm.IsFPImm) {
1573 // We got int literal token.
1575 if (type == MVT::f64 && hasFPModifiers()) {
1576 // Cannot apply fp modifiers to int literals preserving the same semantics
1577 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1578 // disable these cases.
1579 return false;
1582 unsigned Size = type.getSizeInBits();
1583 if (Size == 64)
1584 Size = 32;
1586 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1587 // types.
1588 return isSafeTruncation(Imm.Val, Size);
1591 // We got fp literal token
1592 if (type == MVT::f64) { // Expected 64-bit fp operand
1593 // We would set low 64-bits of literal to zeroes but we accept this literals
1594 return true;
1597 if (type == MVT::i64) { // Expected 64-bit int operand
1598 // We don't allow fp literals in 64-bit integer instructions. It is
1599 // unclear how we should encode them.
1600 return false;
1603 // We allow fp literals with f16x2 operands assuming that the specified
1604 // literal goes into the lower half and the upper half is zero. We also
1605 // require that the literal may be losslesly converted to f16.
1606 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1607 (type == MVT::v2i16)? MVT::i16 : type;
1609 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1610 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1613 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1614 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1617 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1618 if (AsmParser->isVI())
1619 return isVReg32();
1620 else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1621 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1622 else
1623 return false;
1626 bool AMDGPUOperand::isSDWAFP16Operand() const {
1627 return isSDWAOperand(MVT::f16);
1630 bool AMDGPUOperand::isSDWAFP32Operand() const {
1631 return isSDWAOperand(MVT::f32);
1634 bool AMDGPUOperand::isSDWAInt16Operand() const {
1635 return isSDWAOperand(MVT::i16);
1638 bool AMDGPUOperand::isSDWAInt32Operand() const {
1639 return isSDWAOperand(MVT::i32);
1642 bool AMDGPUOperand::isBoolReg() const {
1643 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1644 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1647 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1649 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1650 assert(Size == 2 || Size == 4 || Size == 8);
1652 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1654 if (Imm.Mods.Abs) {
1655 Val &= ~FpSignMask;
1657 if (Imm.Mods.Neg) {
1658 Val ^= FpSignMask;
1661 return Val;
1664 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1665 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1666 Inst.getNumOperands())) {
1667 addLiteralImmOperand(Inst, Imm.Val,
1668 ApplyModifiers &
1669 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1670 } else {
1671 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1672 Inst.addOperand(MCOperand::createImm(Imm.Val));
1676 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1677 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1678 auto OpNum = Inst.getNumOperands();
1679 // Check that this operand accepts literals
1680 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1682 if (ApplyModifiers) {
1683 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1684 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1685 Val = applyInputFPModifiers(Val, Size);
1688 APInt Literal(64, Val);
1689 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1691 if (Imm.IsFPImm) { // We got fp literal token
1692 switch (OpTy) {
1693 case AMDGPU::OPERAND_REG_IMM_INT64:
1694 case AMDGPU::OPERAND_REG_IMM_FP64:
1695 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1696 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1697 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1698 AsmParser->hasInv2PiInlineImm())) {
1699 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1700 return;
1703 // Non-inlineable
1704 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1705 // For fp operands we check if low 32 bits are zeros
1706 if (Literal.getLoBits(32) != 0) {
1707 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1708 "Can't encode literal as exact 64-bit floating-point operand. "
1709 "Low 32-bits will be set to zero");
1712 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1713 return;
1716 // We don't allow fp literals in 64-bit integer instructions. It is
1717 // unclear how we should encode them. This case should be checked earlier
1718 // in predicate methods (isLiteralImm())
1719 llvm_unreachable("fp literal in 64-bit integer instruction.");
1721 case AMDGPU::OPERAND_REG_IMM_INT32:
1722 case AMDGPU::OPERAND_REG_IMM_FP32:
1723 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1724 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1727 case AMDGPU::OPERAND_REG_IMM_INT16:
1728 case AMDGPU::OPERAND_REG_IMM_FP16:
1729 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1730 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1731 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1732 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1733 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1734 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1735 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1737 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1738 case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1739 bool lost;
1740 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1741 // Convert literal to single precision
1742 FPLiteral.convert(*getOpFltSemantics(OpTy),
1743 APFloat::rmNearestTiesToEven, &lost);
1744 // We allow precision lost but not overflow or underflow. This should be
1745 // checked earlier in isLiteralImm()
1747 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1748 Inst.addOperand(MCOperand::createImm(ImmVal));
1749 return;
1751 default:
1752 llvm_unreachable("invalid operand size");
1755 return;
1758 // We got int literal token.
1759 // Only sign extend inline immediates.
1760 switch (OpTy) {
1761 case AMDGPU::OPERAND_REG_IMM_INT32:
1762 case AMDGPU::OPERAND_REG_IMM_FP32:
1763 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1764 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1765 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1766 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1767 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1768 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1769 if (isSafeTruncation(Val, 32) &&
1770 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1771 AsmParser->hasInv2PiInlineImm())) {
1772 Inst.addOperand(MCOperand::createImm(Val));
1773 return;
1776 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1777 return;
1779 case AMDGPU::OPERAND_REG_IMM_INT64:
1780 case AMDGPU::OPERAND_REG_IMM_FP64:
1781 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1782 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1783 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1784 Inst.addOperand(MCOperand::createImm(Val));
1785 return;
1788 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1789 return;
1791 case AMDGPU::OPERAND_REG_IMM_INT16:
1792 case AMDGPU::OPERAND_REG_IMM_FP16:
1793 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1794 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1795 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1796 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1797 if (isSafeTruncation(Val, 16) &&
1798 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1799 AsmParser->hasInv2PiInlineImm())) {
1800 Inst.addOperand(MCOperand::createImm(Val));
1801 return;
1804 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1805 return;
1807 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1808 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1809 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1811 assert(isSafeTruncation(Val, 16));
1812 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1813 AsmParser->hasInv2PiInlineImm()));
1815 Inst.addOperand(MCOperand::createImm(Val));
1816 return;
1818 default:
1819 llvm_unreachable("invalid operand size");
1823 template <unsigned Bitwidth>
1824 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1825 APInt Literal(64, Imm.Val);
1827 if (!Imm.IsFPImm) {
1828 // We got int literal token.
1829 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1830 return;
1833 bool Lost;
1834 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1835 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1836 APFloat::rmNearestTiesToEven, &Lost);
1837 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1840 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1841 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1844 static bool isInlineValue(unsigned Reg) {
1845 switch (Reg) {
1846 case AMDGPU::SRC_SHARED_BASE:
1847 case AMDGPU::SRC_SHARED_LIMIT:
1848 case AMDGPU::SRC_PRIVATE_BASE:
1849 case AMDGPU::SRC_PRIVATE_LIMIT:
1850 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1851 return true;
1852 case AMDGPU::SRC_VCCZ:
1853 case AMDGPU::SRC_EXECZ:
1854 case AMDGPU::SRC_SCC:
1855 return true;
1856 default:
1857 return false;
1861 bool AMDGPUOperand::isInlineValue() const {
1862 return isRegKind() && ::isInlineValue(getReg());
1865 //===----------------------------------------------------------------------===//
1866 // AsmParser
1867 //===----------------------------------------------------------------------===//
1869 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1870 if (Is == IS_VGPR) {
1871 switch (RegWidth) {
1872 default: return -1;
1873 case 1: return AMDGPU::VGPR_32RegClassID;
1874 case 2: return AMDGPU::VReg_64RegClassID;
1875 case 3: return AMDGPU::VReg_96RegClassID;
1876 case 4: return AMDGPU::VReg_128RegClassID;
1877 case 5: return AMDGPU::VReg_160RegClassID;
1878 case 8: return AMDGPU::VReg_256RegClassID;
1879 case 16: return AMDGPU::VReg_512RegClassID;
1880 case 32: return AMDGPU::VReg_1024RegClassID;
1882 } else if (Is == IS_TTMP) {
1883 switch (RegWidth) {
1884 default: return -1;
1885 case 1: return AMDGPU::TTMP_32RegClassID;
1886 case 2: return AMDGPU::TTMP_64RegClassID;
1887 case 4: return AMDGPU::TTMP_128RegClassID;
1888 case 8: return AMDGPU::TTMP_256RegClassID;
1889 case 16: return AMDGPU::TTMP_512RegClassID;
1891 } else if (Is == IS_SGPR) {
1892 switch (RegWidth) {
1893 default: return -1;
1894 case 1: return AMDGPU::SGPR_32RegClassID;
1895 case 2: return AMDGPU::SGPR_64RegClassID;
1896 case 4: return AMDGPU::SGPR_128RegClassID;
1897 case 8: return AMDGPU::SGPR_256RegClassID;
1898 case 16: return AMDGPU::SGPR_512RegClassID;
1900 } else if (Is == IS_AGPR) {
1901 switch (RegWidth) {
1902 default: return -1;
1903 case 1: return AMDGPU::AGPR_32RegClassID;
1904 case 2: return AMDGPU::AReg_64RegClassID;
1905 case 4: return AMDGPU::AReg_128RegClassID;
1906 case 16: return AMDGPU::AReg_512RegClassID;
1907 case 32: return AMDGPU::AReg_1024RegClassID;
1910 return -1;
1913 static unsigned getSpecialRegForName(StringRef RegName) {
1914 return StringSwitch<unsigned>(RegName)
1915 .Case("exec", AMDGPU::EXEC)
1916 .Case("vcc", AMDGPU::VCC)
1917 .Case("flat_scratch", AMDGPU::FLAT_SCR)
1918 .Case("xnack_mask", AMDGPU::XNACK_MASK)
1919 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1920 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1921 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1922 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1923 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1924 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1925 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1926 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1927 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1928 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1929 .Case("lds_direct", AMDGPU::LDS_DIRECT)
1930 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1931 .Case("m0", AMDGPU::M0)
1932 .Case("vccz", AMDGPU::SRC_VCCZ)
1933 .Case("src_vccz", AMDGPU::SRC_VCCZ)
1934 .Case("execz", AMDGPU::SRC_EXECZ)
1935 .Case("src_execz", AMDGPU::SRC_EXECZ)
1936 .Case("scc", AMDGPU::SRC_SCC)
1937 .Case("src_scc", AMDGPU::SRC_SCC)
1938 .Case("tba", AMDGPU::TBA)
1939 .Case("tma", AMDGPU::TMA)
1940 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1941 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1942 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1943 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1944 .Case("vcc_lo", AMDGPU::VCC_LO)
1945 .Case("vcc_hi", AMDGPU::VCC_HI)
1946 .Case("exec_lo", AMDGPU::EXEC_LO)
1947 .Case("exec_hi", AMDGPU::EXEC_HI)
1948 .Case("tma_lo", AMDGPU::TMA_LO)
1949 .Case("tma_hi", AMDGPU::TMA_HI)
1950 .Case("tba_lo", AMDGPU::TBA_LO)
1951 .Case("tba_hi", AMDGPU::TBA_HI)
1952 .Case("null", AMDGPU::SGPR_NULL)
1953 .Default(0);
1956 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1957 SMLoc &EndLoc) {
1958 auto R = parseRegister();
1959 if (!R) return true;
1960 assert(R->isReg());
1961 RegNo = R->getReg();
1962 StartLoc = R->getStartLoc();
1963 EndLoc = R->getEndLoc();
1964 return false;
1967 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1968 RegisterKind RegKind, unsigned Reg1,
1969 unsigned RegNum) {
1970 switch (RegKind) {
1971 case IS_SPECIAL:
1972 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1973 Reg = AMDGPU::EXEC;
1974 RegWidth = 2;
1975 return true;
1977 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1978 Reg = AMDGPU::FLAT_SCR;
1979 RegWidth = 2;
1980 return true;
1982 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1983 Reg = AMDGPU::XNACK_MASK;
1984 RegWidth = 2;
1985 return true;
1987 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1988 Reg = AMDGPU::VCC;
1989 RegWidth = 2;
1990 return true;
1992 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1993 Reg = AMDGPU::TBA;
1994 RegWidth = 2;
1995 return true;
1997 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1998 Reg = AMDGPU::TMA;
1999 RegWidth = 2;
2000 return true;
2002 return false;
2003 case IS_VGPR:
2004 case IS_SGPR:
2005 case IS_AGPR:
2006 case IS_TTMP:
2007 if (Reg1 != Reg + RegWidth) {
2008 return false;
2010 RegWidth++;
2011 return true;
2012 default:
2013 llvm_unreachable("unexpected register kind");
2017 static const StringRef Registers[] = {
2018 { "v" },
2019 { "s" },
2020 { "ttmp" },
2021 { "acc" },
2022 { "a" },
2025 bool
2026 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2027 const AsmToken &NextToken) const {
2029 // A list of consecutive registers: [s0,s1,s2,s3]
2030 if (Token.is(AsmToken::LBrac))
2031 return true;
2033 if (!Token.is(AsmToken::Identifier))
2034 return false;
2036 // A single register like s0 or a range of registers like s[0:1]
2038 StringRef RegName = Token.getString();
2040 for (StringRef Reg : Registers) {
2041 if (RegName.startswith(Reg)) {
2042 if (Reg.size() < RegName.size()) {
2043 unsigned RegNum;
2044 // A single register with an index: rXX
2045 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2046 return true;
2047 } else {
2048 // A range of registers: r[XX:YY].
2049 if (NextToken.is(AsmToken::LBrac))
2050 return true;
2055 return getSpecialRegForName(RegName);
2058 bool
2059 AMDGPUAsmParser::isRegister()
2061 return isRegister(getToken(), peekToken());
2064 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2065 unsigned &RegNum, unsigned &RegWidth,
2066 unsigned *DwordRegIndex) {
2067 if (DwordRegIndex) { *DwordRegIndex = 0; }
2068 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2069 if (getLexer().is(AsmToken::Identifier)) {
2070 StringRef RegName = Parser.getTok().getString();
2071 if ((Reg = getSpecialRegForName(RegName))) {
2072 Parser.Lex();
2073 RegKind = IS_SPECIAL;
2074 } else {
2075 unsigned RegNumIndex = 0;
2076 if (RegName[0] == 'v') {
2077 RegNumIndex = 1;
2078 RegKind = IS_VGPR;
2079 } else if (RegName[0] == 's') {
2080 RegNumIndex = 1;
2081 RegKind = IS_SGPR;
2082 } else if (RegName[0] == 'a') {
2083 RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2084 RegKind = IS_AGPR;
2085 } else if (RegName.startswith("ttmp")) {
2086 RegNumIndex = strlen("ttmp");
2087 RegKind = IS_TTMP;
2088 } else {
2089 return false;
2091 if (RegName.size() > RegNumIndex) {
2092 // Single 32-bit register: vXX.
2093 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2094 return false;
2095 Parser.Lex();
2096 RegWidth = 1;
2097 } else {
2098 // Range of registers: v[XX:YY]. ":YY" is optional.
2099 Parser.Lex();
2100 int64_t RegLo, RegHi;
2101 if (getLexer().isNot(AsmToken::LBrac))
2102 return false;
2103 Parser.Lex();
2105 if (getParser().parseAbsoluteExpression(RegLo))
2106 return false;
2108 const bool isRBrace = getLexer().is(AsmToken::RBrac);
2109 if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2110 return false;
2111 Parser.Lex();
2113 if (isRBrace) {
2114 RegHi = RegLo;
2115 } else {
2116 if (getParser().parseAbsoluteExpression(RegHi))
2117 return false;
2119 if (getLexer().isNot(AsmToken::RBrac))
2120 return false;
2121 Parser.Lex();
2123 RegNum = (unsigned) RegLo;
2124 RegWidth = (RegHi - RegLo) + 1;
2127 } else if (getLexer().is(AsmToken::LBrac)) {
2128 // List of consecutive registers: [s0,s1,s2,s3]
2129 Parser.Lex();
2130 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2131 return false;
2132 if (RegWidth != 1)
2133 return false;
2134 RegisterKind RegKind1;
2135 unsigned Reg1, RegNum1, RegWidth1;
2136 do {
2137 if (getLexer().is(AsmToken::Comma)) {
2138 Parser.Lex();
2139 } else if (getLexer().is(AsmToken::RBrac)) {
2140 Parser.Lex();
2141 break;
2142 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2143 if (RegWidth1 != 1) {
2144 return false;
2146 if (RegKind1 != RegKind) {
2147 return false;
2149 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2150 return false;
2152 } else {
2153 return false;
2155 } while (true);
2156 } else {
2157 return false;
2159 switch (RegKind) {
2160 case IS_SPECIAL:
2161 RegNum = 0;
2162 RegWidth = 1;
2163 break;
2164 case IS_VGPR:
2165 case IS_SGPR:
2166 case IS_AGPR:
2167 case IS_TTMP:
2169 unsigned Size = 1;
2170 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2171 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2172 Size = std::min(RegWidth, 4u);
2174 if (RegNum % Size != 0)
2175 return false;
2176 if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2177 RegNum = RegNum / Size;
2178 int RCID = getRegClass(RegKind, RegWidth);
2179 if (RCID == -1)
2180 return false;
2181 const MCRegisterClass RC = TRI->getRegClass(RCID);
2182 if (RegNum >= RC.getNumRegs())
2183 return false;
2184 Reg = RC.getRegister(RegNum);
2185 break;
2188 default:
2189 llvm_unreachable("unexpected register kind");
2192 if (!subtargetHasRegister(*TRI, Reg))
2193 return false;
2194 return true;
2197 Optional<StringRef>
2198 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2199 switch (RegKind) {
2200 case IS_VGPR:
2201 return StringRef(".amdgcn.next_free_vgpr");
2202 case IS_SGPR:
2203 return StringRef(".amdgcn.next_free_sgpr");
2204 default:
2205 return None;
2209 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2210 auto SymbolName = getGprCountSymbolName(RegKind);
2211 assert(SymbolName && "initializing invalid register kind");
2212 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2213 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2216 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2217 unsigned DwordRegIndex,
2218 unsigned RegWidth) {
2219 // Symbols are only defined for GCN targets
2220 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2221 return true;
2223 auto SymbolName = getGprCountSymbolName(RegKind);
2224 if (!SymbolName)
2225 return true;
2226 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2228 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2229 int64_t OldCount;
2231 if (!Sym->isVariable())
2232 return !Error(getParser().getTok().getLoc(),
2233 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2234 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2235 return !Error(
2236 getParser().getTok().getLoc(),
2237 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2239 if (OldCount <= NewMax)
2240 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2242 return true;
2245 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2246 const auto &Tok = Parser.getTok();
2247 SMLoc StartLoc = Tok.getLoc();
2248 SMLoc EndLoc = Tok.getEndLoc();
2249 RegisterKind RegKind;
2250 unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2252 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2253 //FIXME: improve error messages (bug 41303).
2254 Error(StartLoc, "not a valid operand.");
2255 return nullptr;
2257 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2258 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2259 return nullptr;
2260 } else
2261 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2262 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2265 OperandMatchResultTy
2266 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2267 // TODO: add syntactic sugar for 1/(2*PI)
2269 assert(!isRegister());
2270 assert(!isModifier());
2272 const auto& Tok = getToken();
2273 const auto& NextTok = peekToken();
2274 bool IsReal = Tok.is(AsmToken::Real);
2275 SMLoc S = getLoc();
2276 bool Negate = false;
2278 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2279 lex();
2280 IsReal = true;
2281 Negate = true;
2284 if (IsReal) {
2285 // Floating-point expressions are not supported.
2286 // Can only allow floating-point literals with an
2287 // optional sign.
2289 StringRef Num = getTokenStr();
2290 lex();
2292 APFloat RealVal(APFloat::IEEEdouble());
2293 auto roundMode = APFloat::rmNearestTiesToEven;
2294 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2295 return MatchOperand_ParseFail;
2297 if (Negate)
2298 RealVal.changeSign();
2300 Operands.push_back(
2301 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2302 AMDGPUOperand::ImmTyNone, true));
2304 return MatchOperand_Success;
2306 } else {
2307 int64_t IntVal;
2308 const MCExpr *Expr;
2309 SMLoc S = getLoc();
2311 if (HasSP3AbsModifier) {
2312 // This is a workaround for handling expressions
2313 // as arguments of SP3 'abs' modifier, for example:
2314 // |1.0|
2315 // |-1|
2316 // |1+x|
2317 // This syntax is not compatible with syntax of standard
2318 // MC expressions (due to the trailing '|').
2319 SMLoc EndLoc;
2320 if (getParser().parsePrimaryExpr(Expr, EndLoc))
2321 return MatchOperand_ParseFail;
2322 } else {
2323 if (Parser.parseExpression(Expr))
2324 return MatchOperand_ParseFail;
2327 if (Expr->evaluateAsAbsolute(IntVal)) {
2328 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2329 } else {
2330 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2333 return MatchOperand_Success;
2336 return MatchOperand_NoMatch;
2339 OperandMatchResultTy
2340 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2341 if (!isRegister())
2342 return MatchOperand_NoMatch;
2344 if (auto R = parseRegister()) {
2345 assert(R->isReg());
2346 Operands.push_back(std::move(R));
2347 return MatchOperand_Success;
2349 return MatchOperand_ParseFail;
2352 OperandMatchResultTy
2353 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2354 auto res = parseReg(Operands);
2355 if (res != MatchOperand_NoMatch) {
2356 return res;
2357 } else if (isModifier()) {
2358 return MatchOperand_NoMatch;
2359 } else {
2360 return parseImm(Operands, HasSP3AbsMod);
2364 bool
2365 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2366 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2367 const auto &str = Token.getString();
2368 return str == "abs" || str == "neg" || str == "sext";
2370 return false;
2373 bool
2374 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2375 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2378 bool
2379 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2380 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2383 bool
2384 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2385 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2388 // Check if this is an operand modifier or an opcode modifier
2389 // which may look like an expression but it is not. We should
2390 // avoid parsing these modifiers as expressions. Currently
2391 // recognized sequences are:
2392 // |...|
2393 // abs(...)
2394 // neg(...)
2395 // sext(...)
2396 // -reg
2397 // -|...|
2398 // -abs(...)
2399 // name:...
2400 // Note that simple opcode modifiers like 'gds' may be parsed as
2401 // expressions; this is a special case. See getExpressionAsToken.
2403 bool
2404 AMDGPUAsmParser::isModifier() {
2406 AsmToken Tok = getToken();
2407 AsmToken NextToken[2];
2408 peekTokens(NextToken);
2410 return isOperandModifier(Tok, NextToken[0]) ||
2411 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2412 isOpcodeModifierWithVal(Tok, NextToken[0]);
2415 // Check if the current token is an SP3 'neg' modifier.
2416 // Currently this modifier is allowed in the following context:
2418 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2419 // 2. Before an 'abs' modifier: -abs(...)
2420 // 3. Before an SP3 'abs' modifier: -|...|
2422 // In all other cases "-" is handled as a part
2423 // of an expression that follows the sign.
2425 // Note: When "-" is followed by an integer literal,
2426 // this is interpreted as integer negation rather
2427 // than a floating-point NEG modifier applied to N.
2428 // Beside being contr-intuitive, such use of floating-point
2429 // NEG modifier would have resulted in different meaning
2430 // of integer literals used with VOP1/2/C and VOP3,
2431 // for example:
2432 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2433 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2434 // Negative fp literals with preceding "-" are
2435 // handled likewise for unifomtity
2437 bool
2438 AMDGPUAsmParser::parseSP3NegModifier() {
2440 AsmToken NextToken[2];
2441 peekTokens(NextToken);
2443 if (isToken(AsmToken::Minus) &&
2444 (isRegister(NextToken[0], NextToken[1]) ||
2445 NextToken[0].is(AsmToken::Pipe) ||
2446 isId(NextToken[0], "abs"))) {
2447 lex();
2448 return true;
2451 return false;
2454 OperandMatchResultTy
2455 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2456 bool AllowImm) {
2457 bool Neg, SP3Neg;
2458 bool Abs, SP3Abs;
2459 SMLoc Loc;
2461 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2462 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2463 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2464 return MatchOperand_ParseFail;
2467 SP3Neg = parseSP3NegModifier();
2469 Loc = getLoc();
2470 Neg = trySkipId("neg");
2471 if (Neg && SP3Neg) {
2472 Error(Loc, "expected register or immediate");
2473 return MatchOperand_ParseFail;
2475 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2476 return MatchOperand_ParseFail;
2478 Abs = trySkipId("abs");
2479 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2480 return MatchOperand_ParseFail;
2482 Loc = getLoc();
2483 SP3Abs = trySkipToken(AsmToken::Pipe);
2484 if (Abs && SP3Abs) {
2485 Error(Loc, "expected register or immediate");
2486 return MatchOperand_ParseFail;
2489 OperandMatchResultTy Res;
2490 if (AllowImm) {
2491 Res = parseRegOrImm(Operands, SP3Abs);
2492 } else {
2493 Res = parseReg(Operands);
2495 if (Res != MatchOperand_Success) {
2496 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2499 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2500 return MatchOperand_ParseFail;
2501 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2502 return MatchOperand_ParseFail;
2503 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2504 return MatchOperand_ParseFail;
2506 AMDGPUOperand::Modifiers Mods;
2507 Mods.Abs = Abs || SP3Abs;
2508 Mods.Neg = Neg || SP3Neg;
2510 if (Mods.hasFPModifiers()) {
2511 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2512 if (Op.isExpr()) {
2513 Error(Op.getStartLoc(), "expected an absolute expression");
2514 return MatchOperand_ParseFail;
2516 Op.setModifiers(Mods);
2518 return MatchOperand_Success;
2521 OperandMatchResultTy
2522 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2523 bool AllowImm) {
2524 bool Sext = trySkipId("sext");
2525 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2526 return MatchOperand_ParseFail;
2528 OperandMatchResultTy Res;
2529 if (AllowImm) {
2530 Res = parseRegOrImm(Operands);
2531 } else {
2532 Res = parseReg(Operands);
2534 if (Res != MatchOperand_Success) {
2535 return Sext? MatchOperand_ParseFail : Res;
2538 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2539 return MatchOperand_ParseFail;
2541 AMDGPUOperand::Modifiers Mods;
2542 Mods.Sext = Sext;
2544 if (Mods.hasIntModifiers()) {
2545 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2546 if (Op.isExpr()) {
2547 Error(Op.getStartLoc(), "expected an absolute expression");
2548 return MatchOperand_ParseFail;
2550 Op.setModifiers(Mods);
2553 return MatchOperand_Success;
2556 OperandMatchResultTy
2557 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2558 return parseRegOrImmWithFPInputMods(Operands, false);
2561 OperandMatchResultTy
2562 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2563 return parseRegOrImmWithIntInputMods(Operands, false);
2566 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2567 auto Loc = getLoc();
2568 if (trySkipId("off")) {
2569 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2570 AMDGPUOperand::ImmTyOff, false));
2571 return MatchOperand_Success;
2574 if (!isRegister())
2575 return MatchOperand_NoMatch;
2577 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2578 if (Reg) {
2579 Operands.push_back(std::move(Reg));
2580 return MatchOperand_Success;
2583 return MatchOperand_ParseFail;
2587 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2588 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2590 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2591 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2592 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2593 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2594 return Match_InvalidOperand;
2596 if ((TSFlags & SIInstrFlags::VOP3) &&
2597 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2598 getForcedEncodingSize() != 64)
2599 return Match_PreferE32;
2601 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2602 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2603 // v_mac_f32/16 allow only dst_sel == DWORD;
2604 auto OpNum =
2605 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2606 const auto &Op = Inst.getOperand(OpNum);
2607 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2608 return Match_InvalidOperand;
2612 return Match_Success;
2615 // What asm variants we should check
2616 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2617 if (getForcedEncodingSize() == 32) {
2618 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2619 return makeArrayRef(Variants);
2622 if (isForcedVOP3()) {
2623 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2624 return makeArrayRef(Variants);
2627 if (isForcedSDWA()) {
2628 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2629 AMDGPUAsmVariants::SDWA9};
2630 return makeArrayRef(Variants);
2633 if (isForcedDPP()) {
2634 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2635 return makeArrayRef(Variants);
2638 static const unsigned Variants[] = {
2639 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2640 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2643 return makeArrayRef(Variants);
2646 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2647 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2648 const unsigned Num = Desc.getNumImplicitUses();
2649 for (unsigned i = 0; i < Num; ++i) {
2650 unsigned Reg = Desc.ImplicitUses[i];
2651 switch (Reg) {
2652 case AMDGPU::FLAT_SCR:
2653 case AMDGPU::VCC:
2654 case AMDGPU::VCC_LO:
2655 case AMDGPU::VCC_HI:
2656 case AMDGPU::M0:
2657 case AMDGPU::SGPR_NULL:
2658 return Reg;
2659 default:
2660 break;
2663 return AMDGPU::NoRegister;
2666 // NB: This code is correct only when used to check constant
2667 // bus limitations because GFX7 support no f16 inline constants.
2668 // Note that there are no cases when a GFX7 opcode violates
2669 // constant bus limitations due to the use of an f16 constant.
2670 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2671 unsigned OpIdx) const {
2672 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2674 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2675 return false;
2678 const MCOperand &MO = Inst.getOperand(OpIdx);
2680 int64_t Val = MO.getImm();
2681 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2683 switch (OpSize) { // expected operand size
2684 case 8:
2685 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2686 case 4:
2687 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2688 case 2: {
2689 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2690 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2691 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2692 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2693 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2694 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2695 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2696 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2697 } else {
2698 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2701 default:
2702 llvm_unreachable("invalid operand size");
2706 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2707 const MCOperand &MO = Inst.getOperand(OpIdx);
2708 if (MO.isImm()) {
2709 return !isInlineConstant(Inst, OpIdx);
2711 return !MO.isReg() ||
2712 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2715 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2716 const unsigned Opcode = Inst.getOpcode();
2717 const MCInstrDesc &Desc = MII.get(Opcode);
2718 unsigned ConstantBusUseCount = 0;
2719 unsigned NumLiterals = 0;
2720 unsigned LiteralSize;
2722 if (Desc.TSFlags &
2723 (SIInstrFlags::VOPC |
2724 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2725 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2726 SIInstrFlags::SDWA)) {
2727 // Check special imm operands (used by madmk, etc)
2728 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2729 ++ConstantBusUseCount;
2732 SmallDenseSet<unsigned> SGPRsUsed;
2733 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2734 if (SGPRUsed != AMDGPU::NoRegister) {
2735 SGPRsUsed.insert(SGPRUsed);
2736 ++ConstantBusUseCount;
2739 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2740 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2741 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2743 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2745 for (int OpIdx : OpIndices) {
2746 if (OpIdx == -1) break;
2748 const MCOperand &MO = Inst.getOperand(OpIdx);
2749 if (usesConstantBus(Inst, OpIdx)) {
2750 if (MO.isReg()) {
2751 const unsigned Reg = mc2PseudoReg(MO.getReg());
2752 // Pairs of registers with a partial intersections like these
2753 // s0, s[0:1]
2754 // flat_scratch_lo, flat_scratch
2755 // flat_scratch_lo, flat_scratch_hi
2756 // are theoretically valid but they are disabled anyway.
2757 // Note that this code mimics SIInstrInfo::verifyInstruction
2758 if (!SGPRsUsed.count(Reg)) {
2759 SGPRsUsed.insert(Reg);
2760 ++ConstantBusUseCount;
2762 } else { // Expression or a literal
2764 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2765 continue; // special operand like VINTERP attr_chan
2767 // An instruction may use only one literal.
2768 // This has been validated on the previous step.
2769 // See validateVOP3Literal.
2770 // This literal may be used as more than one operand.
2771 // If all these operands are of the same size,
2772 // this literal counts as one scalar value.
2773 // Otherwise it counts as 2 scalar values.
2774 // See "GFX10 Shader Programming", section 3.6.2.3.
2776 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2777 if (Size < 4) Size = 4;
2779 if (NumLiterals == 0) {
2780 NumLiterals = 1;
2781 LiteralSize = Size;
2782 } else if (LiteralSize != Size) {
2783 NumLiterals = 2;
2789 ConstantBusUseCount += NumLiterals;
2791 if (isGFX10())
2792 return ConstantBusUseCount <= 2;
2794 return ConstantBusUseCount <= 1;
2797 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2798 const unsigned Opcode = Inst.getOpcode();
2799 const MCInstrDesc &Desc = MII.get(Opcode);
2801 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2802 if (DstIdx == -1 ||
2803 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2804 return true;
2807 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2809 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2810 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2811 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2813 assert(DstIdx != -1);
2814 const MCOperand &Dst = Inst.getOperand(DstIdx);
2815 assert(Dst.isReg());
2816 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2818 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2820 for (int SrcIdx : SrcIndices) {
2821 if (SrcIdx == -1) break;
2822 const MCOperand &Src = Inst.getOperand(SrcIdx);
2823 if (Src.isReg()) {
2824 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2825 if (isRegIntersect(DstReg, SrcReg, TRI)) {
2826 return false;
2831 return true;
2834 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2836 const unsigned Opc = Inst.getOpcode();
2837 const MCInstrDesc &Desc = MII.get(Opc);
2839 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2840 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2841 assert(ClampIdx != -1);
2842 return Inst.getOperand(ClampIdx).getImm() == 0;
2845 return true;
2848 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2850 const unsigned Opc = Inst.getOpcode();
2851 const MCInstrDesc &Desc = MII.get(Opc);
2853 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2854 return true;
2856 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2857 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2858 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2860 assert(VDataIdx != -1);
2861 assert(DMaskIdx != -1);
2862 assert(TFEIdx != -1);
2864 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2865 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2866 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2867 if (DMask == 0)
2868 DMask = 1;
2870 unsigned DataSize =
2871 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2872 if (hasPackedD16()) {
2873 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2874 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2875 DataSize = (DataSize + 1) / 2;
2878 return (VDataSize / 4) == DataSize + TFESize;
2881 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2882 const unsigned Opc = Inst.getOpcode();
2883 const MCInstrDesc &Desc = MII.get(Opc);
2885 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2886 return true;
2888 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2889 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2890 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2891 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2892 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2893 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2895 assert(VAddr0Idx != -1);
2896 assert(SrsrcIdx != -1);
2897 assert(DimIdx != -1);
2898 assert(SrsrcIdx > VAddr0Idx);
2900 unsigned Dim = Inst.getOperand(DimIdx).getImm();
2901 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2902 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2903 unsigned VAddrSize =
2904 IsNSA ? SrsrcIdx - VAddr0Idx
2905 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2907 unsigned AddrSize = BaseOpcode->NumExtraArgs +
2908 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2909 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2910 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2911 if (!IsNSA) {
2912 if (AddrSize > 8)
2913 AddrSize = 16;
2914 else if (AddrSize > 4)
2915 AddrSize = 8;
2918 return VAddrSize == AddrSize;
2921 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2923 const unsigned Opc = Inst.getOpcode();
2924 const MCInstrDesc &Desc = MII.get(Opc);
2926 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2927 return true;
2928 if (!Desc.mayLoad() || !Desc.mayStore())
2929 return true; // Not atomic
2931 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2932 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2934 // This is an incomplete check because image_atomic_cmpswap
2935 // may only use 0x3 and 0xf while other atomic operations
2936 // may use 0x1 and 0x3. However these limitations are
2937 // verified when we check that dmask matches dst size.
2938 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2941 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2943 const unsigned Opc = Inst.getOpcode();
2944 const MCInstrDesc &Desc = MII.get(Opc);
2946 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2947 return true;
2949 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2952 // GATHER4 instructions use dmask in a different fashion compared to
2953 // other MIMG instructions. The only useful DMASK values are
2954 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2955 // (red,red,red,red) etc.) The ISA document doesn't mention
2956 // this.
2957 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2960 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2962 const unsigned Opc = Inst.getOpcode();
2963 const MCInstrDesc &Desc = MII.get(Opc);
2965 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2966 return true;
2968 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2969 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2970 if (isCI() || isSI())
2971 return false;
2974 return true;
2977 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2978 const unsigned Opc = Inst.getOpcode();
2979 const MCInstrDesc &Desc = MII.get(Opc);
2981 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2982 return true;
2984 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2985 if (DimIdx < 0)
2986 return true;
2988 long Imm = Inst.getOperand(DimIdx).getImm();
2989 if (Imm < 0 || Imm >= 8)
2990 return false;
2992 return true;
2995 static bool IsRevOpcode(const unsigned Opcode)
2997 switch (Opcode) {
2998 case AMDGPU::V_SUBREV_F32_e32:
2999 case AMDGPU::V_SUBREV_F32_e64:
3000 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3001 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3002 case AMDGPU::V_SUBREV_F32_e32_vi:
3003 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3004 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3005 case AMDGPU::V_SUBREV_F32_e64_vi:
3007 case AMDGPU::V_SUBREV_I32_e32:
3008 case AMDGPU::V_SUBREV_I32_e64:
3009 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3010 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3012 case AMDGPU::V_SUBBREV_U32_e32:
3013 case AMDGPU::V_SUBBREV_U32_e64:
3014 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3015 case AMDGPU::V_SUBBREV_U32_e32_vi:
3016 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3017 case AMDGPU::V_SUBBREV_U32_e64_vi:
3019 case AMDGPU::V_SUBREV_U32_e32:
3020 case AMDGPU::V_SUBREV_U32_e64:
3021 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3022 case AMDGPU::V_SUBREV_U32_e32_vi:
3023 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3024 case AMDGPU::V_SUBREV_U32_e64_vi:
3026 case AMDGPU::V_SUBREV_F16_e32:
3027 case AMDGPU::V_SUBREV_F16_e64:
3028 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3029 case AMDGPU::V_SUBREV_F16_e32_vi:
3030 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3031 case AMDGPU::V_SUBREV_F16_e64_vi:
3033 case AMDGPU::V_SUBREV_U16_e32:
3034 case AMDGPU::V_SUBREV_U16_e64:
3035 case AMDGPU::V_SUBREV_U16_e32_vi:
3036 case AMDGPU::V_SUBREV_U16_e64_vi:
3038 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3039 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3040 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3042 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3043 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3045 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3046 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3048 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3049 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3051 case AMDGPU::V_LSHRREV_B32_e32:
3052 case AMDGPU::V_LSHRREV_B32_e64:
3053 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3054 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3055 case AMDGPU::V_LSHRREV_B32_e32_vi:
3056 case AMDGPU::V_LSHRREV_B32_e64_vi:
3057 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3058 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3060 case AMDGPU::V_ASHRREV_I32_e32:
3061 case AMDGPU::V_ASHRREV_I32_e64:
3062 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3063 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3064 case AMDGPU::V_ASHRREV_I32_e32_vi:
3065 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3066 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3067 case AMDGPU::V_ASHRREV_I32_e64_vi:
3069 case AMDGPU::V_LSHLREV_B32_e32:
3070 case AMDGPU::V_LSHLREV_B32_e64:
3071 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3072 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3073 case AMDGPU::V_LSHLREV_B32_e32_vi:
3074 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3075 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3076 case AMDGPU::V_LSHLREV_B32_e64_vi:
3078 case AMDGPU::V_LSHLREV_B16_e32:
3079 case AMDGPU::V_LSHLREV_B16_e64:
3080 case AMDGPU::V_LSHLREV_B16_e32_vi:
3081 case AMDGPU::V_LSHLREV_B16_e64_vi:
3082 case AMDGPU::V_LSHLREV_B16_gfx10:
3084 case AMDGPU::V_LSHRREV_B16_e32:
3085 case AMDGPU::V_LSHRREV_B16_e64:
3086 case AMDGPU::V_LSHRREV_B16_e32_vi:
3087 case AMDGPU::V_LSHRREV_B16_e64_vi:
3088 case AMDGPU::V_LSHRREV_B16_gfx10:
3090 case AMDGPU::V_ASHRREV_I16_e32:
3091 case AMDGPU::V_ASHRREV_I16_e64:
3092 case AMDGPU::V_ASHRREV_I16_e32_vi:
3093 case AMDGPU::V_ASHRREV_I16_e64_vi:
3094 case AMDGPU::V_ASHRREV_I16_gfx10:
3096 case AMDGPU::V_LSHLREV_B64:
3097 case AMDGPU::V_LSHLREV_B64_gfx10:
3098 case AMDGPU::V_LSHLREV_B64_vi:
3100 case AMDGPU::V_LSHRREV_B64:
3101 case AMDGPU::V_LSHRREV_B64_gfx10:
3102 case AMDGPU::V_LSHRREV_B64_vi:
3104 case AMDGPU::V_ASHRREV_I64:
3105 case AMDGPU::V_ASHRREV_I64_gfx10:
3106 case AMDGPU::V_ASHRREV_I64_vi:
3108 case AMDGPU::V_PK_LSHLREV_B16:
3109 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3110 case AMDGPU::V_PK_LSHLREV_B16_vi:
3112 case AMDGPU::V_PK_LSHRREV_B16:
3113 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3114 case AMDGPU::V_PK_LSHRREV_B16_vi:
3115 case AMDGPU::V_PK_ASHRREV_I16:
3116 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3117 case AMDGPU::V_PK_ASHRREV_I16_vi:
3118 return true;
3119 default:
3120 return false;
3124 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3126 using namespace SIInstrFlags;
3127 const unsigned Opcode = Inst.getOpcode();
3128 const MCInstrDesc &Desc = MII.get(Opcode);
3130 // lds_direct register is defined so that it can be used
3131 // with 9-bit operands only. Ignore encodings which do not accept these.
3132 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3133 return true;
3135 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3136 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3137 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3139 const int SrcIndices[] = { Src1Idx, Src2Idx };
3141 // lds_direct cannot be specified as either src1 or src2.
3142 for (int SrcIdx : SrcIndices) {
3143 if (SrcIdx == -1) break;
3144 const MCOperand &Src = Inst.getOperand(SrcIdx);
3145 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3146 return false;
3150 if (Src0Idx == -1)
3151 return true;
3153 const MCOperand &Src = Inst.getOperand(Src0Idx);
3154 if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3155 return true;
3157 // lds_direct is specified as src0. Check additional limitations.
3158 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3161 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3162 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3163 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3164 if (Op.isFlatOffset())
3165 return Op.getStartLoc();
3167 return getLoc();
3170 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3171 const OperandVector &Operands) {
3172 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3173 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3174 return true;
3176 auto Opcode = Inst.getOpcode();
3177 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3178 assert(OpNum != -1);
3180 const auto &Op = Inst.getOperand(OpNum);
3181 if (!hasFlatOffsets() && Op.getImm() != 0) {
3182 Error(getFlatOffsetLoc(Operands),
3183 "flat offset modifier is not supported on this GPU");
3184 return false;
3187 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3188 // For FLAT segment the offset must be positive;
3189 // MSB is ignored and forced to zero.
3190 unsigned OffsetSize = isGFX9() ? 13 : 12;
3191 if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3192 if (!isIntN(OffsetSize, Op.getImm())) {
3193 Error(getFlatOffsetLoc(Operands),
3194 isGFX9() ? "expected a 13-bit signed offset" :
3195 "expected a 12-bit signed offset");
3196 return false;
3198 } else {
3199 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3200 Error(getFlatOffsetLoc(Operands),
3201 isGFX9() ? "expected a 12-bit unsigned offset" :
3202 "expected an 11-bit unsigned offset");
3203 return false;
3207 return true;
3210 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3211 unsigned Opcode = Inst.getOpcode();
3212 const MCInstrDesc &Desc = MII.get(Opcode);
3213 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3214 return true;
3216 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3217 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3219 const int OpIndices[] = { Src0Idx, Src1Idx };
3221 unsigned NumLiterals = 0;
3222 uint32_t LiteralValue;
3224 for (int OpIdx : OpIndices) {
3225 if (OpIdx == -1) break;
3227 const MCOperand &MO = Inst.getOperand(OpIdx);
3228 if (MO.isImm() &&
3229 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3230 AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3231 !isInlineConstant(Inst, OpIdx)) {
3232 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3233 if (NumLiterals == 0 || LiteralValue != Value) {
3234 LiteralValue = Value;
3235 ++NumLiterals;
3240 return NumLiterals <= 1;
3243 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3244 const unsigned Opc = Inst.getOpcode();
3245 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3246 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3247 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3248 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3250 if (OpSel & ~3)
3251 return false;
3253 return true;
3256 // Check if VCC register matches wavefront size
3257 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3258 auto FB = getFeatureBits();
3259 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3260 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3263 // VOP3 literal is only allowed in GFX10+ and only one can be used
3264 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3265 unsigned Opcode = Inst.getOpcode();
3266 const MCInstrDesc &Desc = MII.get(Opcode);
3267 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3268 return true;
3270 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3271 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3272 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3274 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3276 unsigned NumLiterals = 0;
3277 uint32_t LiteralValue;
3279 for (int OpIdx : OpIndices) {
3280 if (OpIdx == -1) break;
3282 const MCOperand &MO = Inst.getOperand(OpIdx);
3283 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3284 continue;
3286 if (!isInlineConstant(Inst, OpIdx)) {
3287 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3288 if (NumLiterals == 0 || LiteralValue != Value) {
3289 LiteralValue = Value;
3290 ++NumLiterals;
3295 return !NumLiterals ||
3296 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3299 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3300 const SMLoc &IDLoc,
3301 const OperandVector &Operands) {
3302 if (!validateLdsDirect(Inst)) {
3303 Error(IDLoc,
3304 "invalid use of lds_direct");
3305 return false;
3307 if (!validateSOPLiteral(Inst)) {
3308 Error(IDLoc,
3309 "only one literal operand is allowed");
3310 return false;
3312 if (!validateVOP3Literal(Inst)) {
3313 Error(IDLoc,
3314 "invalid literal operand");
3315 return false;
3317 if (!validateConstantBusLimitations(Inst)) {
3318 Error(IDLoc,
3319 "invalid operand (violates constant bus restrictions)");
3320 return false;
3322 if (!validateEarlyClobberLimitations(Inst)) {
3323 Error(IDLoc,
3324 "destination must be different than all sources");
3325 return false;
3327 if (!validateIntClampSupported(Inst)) {
3328 Error(IDLoc,
3329 "integer clamping is not supported on this GPU");
3330 return false;
3332 if (!validateOpSel(Inst)) {
3333 Error(IDLoc,
3334 "invalid op_sel operand");
3335 return false;
3337 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3338 if (!validateMIMGD16(Inst)) {
3339 Error(IDLoc,
3340 "d16 modifier is not supported on this GPU");
3341 return false;
3343 if (!validateMIMGDim(Inst)) {
3344 Error(IDLoc, "dim modifier is required on this GPU");
3345 return false;
3347 if (!validateMIMGDataSize(Inst)) {
3348 Error(IDLoc,
3349 "image data size does not match dmask and tfe");
3350 return false;
3352 if (!validateMIMGAddrSize(Inst)) {
3353 Error(IDLoc,
3354 "image address size does not match dim and a16");
3355 return false;
3357 if (!validateMIMGAtomicDMask(Inst)) {
3358 Error(IDLoc,
3359 "invalid atomic image dmask");
3360 return false;
3362 if (!validateMIMGGatherDMask(Inst)) {
3363 Error(IDLoc,
3364 "invalid image_gather dmask: only one bit must be set");
3365 return false;
3367 if (!validateFlatOffset(Inst, Operands)) {
3368 return false;
3371 return true;
3374 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3375 const FeatureBitset &FBS,
3376 unsigned VariantID = 0);
3378 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3379 OperandVector &Operands,
3380 MCStreamer &Out,
3381 uint64_t &ErrorInfo,
3382 bool MatchingInlineAsm) {
3383 MCInst Inst;
3384 unsigned Result = Match_Success;
3385 for (auto Variant : getMatchedVariants()) {
3386 uint64_t EI;
3387 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3388 Variant);
3389 // We order match statuses from least to most specific. We use most specific
3390 // status as resulting
3391 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3392 if ((R == Match_Success) ||
3393 (R == Match_PreferE32) ||
3394 (R == Match_MissingFeature && Result != Match_PreferE32) ||
3395 (R == Match_InvalidOperand && Result != Match_MissingFeature
3396 && Result != Match_PreferE32) ||
3397 (R == Match_MnemonicFail && Result != Match_InvalidOperand
3398 && Result != Match_MissingFeature
3399 && Result != Match_PreferE32)) {
3400 Result = R;
3401 ErrorInfo = EI;
3403 if (R == Match_Success)
3404 break;
3407 switch (Result) {
3408 default: break;
3409 case Match_Success:
3410 if (!validateInstruction(Inst, IDLoc, Operands)) {
3411 return true;
3413 Inst.setLoc(IDLoc);
3414 Out.EmitInstruction(Inst, getSTI());
3415 return false;
3417 case Match_MissingFeature:
3418 return Error(IDLoc, "instruction not supported on this GPU");
3420 case Match_MnemonicFail: {
3421 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3422 std::string Suggestion = AMDGPUMnemonicSpellCheck(
3423 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3424 return Error(IDLoc, "invalid instruction" + Suggestion,
3425 ((AMDGPUOperand &)*Operands[0]).getLocRange());
3428 case Match_InvalidOperand: {
3429 SMLoc ErrorLoc = IDLoc;
3430 if (ErrorInfo != ~0ULL) {
3431 if (ErrorInfo >= Operands.size()) {
3432 return Error(IDLoc, "too few operands for instruction");
3434 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3435 if (ErrorLoc == SMLoc())
3436 ErrorLoc = IDLoc;
3438 return Error(ErrorLoc, "invalid operand for instruction");
3441 case Match_PreferE32:
3442 return Error(IDLoc, "internal error: instruction without _e64 suffix "
3443 "should be encoded as e32");
3445 llvm_unreachable("Implement any new match types added!");
3448 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3449 int64_t Tmp = -1;
3450 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3451 return true;
3453 if (getParser().parseAbsoluteExpression(Tmp)) {
3454 return true;
3456 Ret = static_cast<uint32_t>(Tmp);
3457 return false;
3460 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3461 uint32_t &Minor) {
3462 if (ParseAsAbsoluteExpression(Major))
3463 return TokError("invalid major version");
3465 if (getLexer().isNot(AsmToken::Comma))
3466 return TokError("minor version number required, comma expected");
3467 Lex();
3469 if (ParseAsAbsoluteExpression(Minor))
3470 return TokError("invalid minor version");
3472 return false;
3475 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3476 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3477 return TokError("directive only supported for amdgcn architecture");
3479 std::string Target;
3481 SMLoc TargetStart = getTok().getLoc();
3482 if (getParser().parseEscapedString(Target))
3483 return true;
3484 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3486 std::string ExpectedTarget;
3487 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3488 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3490 if (Target != ExpectedTargetOS.str())
3491 return getParser().Error(TargetRange.Start, "target must match options",
3492 TargetRange);
3494 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3495 return false;
3498 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3499 return getParser().Error(Range.Start, "value out of range", Range);
3502 bool AMDGPUAsmParser::calculateGPRBlocks(
3503 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3504 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3505 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3506 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3507 // TODO(scott.linder): These calculations are duplicated from
3508 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3509 IsaVersion Version = getIsaVersion(getSTI().getCPU());
3511 unsigned NumVGPRs = NextFreeVGPR;
3512 unsigned NumSGPRs = NextFreeSGPR;
3514 if (Version.Major >= 10)
3515 NumSGPRs = 0;
3516 else {
3517 unsigned MaxAddressableNumSGPRs =
3518 IsaInfo::getAddressableNumSGPRs(&getSTI());
3520 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3521 NumSGPRs > MaxAddressableNumSGPRs)
3522 return OutOfRangeError(SGPRRange);
3524 NumSGPRs +=
3525 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3527 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3528 NumSGPRs > MaxAddressableNumSGPRs)
3529 return OutOfRangeError(SGPRRange);
3531 if (Features.test(FeatureSGPRInitBug))
3532 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3535 VGPRBlocks =
3536 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3537 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3539 return false;
3542 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3543 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3544 return TokError("directive only supported for amdgcn architecture");
3546 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3547 return TokError("directive only supported for amdhsa OS");
3549 StringRef KernelName;
3550 if (getParser().parseIdentifier(KernelName))
3551 return true;
3553 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3555 StringSet<> Seen;
3557 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3559 SMRange VGPRRange;
3560 uint64_t NextFreeVGPR = 0;
3561 SMRange SGPRRange;
3562 uint64_t NextFreeSGPR = 0;
3563 unsigned UserSGPRCount = 0;
3564 bool ReserveVCC = true;
3565 bool ReserveFlatScr = true;
3566 bool ReserveXNACK = hasXNACK();
3567 Optional<bool> EnableWavefrontSize32;
3569 while (true) {
3570 while (getLexer().is(AsmToken::EndOfStatement))
3571 Lex();
3573 if (getLexer().isNot(AsmToken::Identifier))
3574 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3576 StringRef ID = getTok().getIdentifier();
3577 SMRange IDRange = getTok().getLocRange();
3578 Lex();
3580 if (ID == ".end_amdhsa_kernel")
3581 break;
3583 if (Seen.find(ID) != Seen.end())
3584 return TokError(".amdhsa_ directives cannot be repeated");
3585 Seen.insert(ID);
3587 SMLoc ValStart = getTok().getLoc();
3588 int64_t IVal;
3589 if (getParser().parseAbsoluteExpression(IVal))
3590 return true;
3591 SMLoc ValEnd = getTok().getLoc();
3592 SMRange ValRange = SMRange(ValStart, ValEnd);
3594 if (IVal < 0)
3595 return OutOfRangeError(ValRange);
3597 uint64_t Val = IVal;
3599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3600 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3601 return OutOfRangeError(RANGE); \
3602 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3604 if (ID == ".amdhsa_group_segment_fixed_size") {
3605 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3606 return OutOfRangeError(ValRange);
3607 KD.group_segment_fixed_size = Val;
3608 } else if (ID == ".amdhsa_private_segment_fixed_size") {
3609 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3610 return OutOfRangeError(ValRange);
3611 KD.private_segment_fixed_size = Val;
3612 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3613 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3614 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3615 Val, ValRange);
3616 UserSGPRCount += 4;
3617 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3618 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3619 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3620 ValRange);
3621 UserSGPRCount += 2;
3622 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3623 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3624 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3625 ValRange);
3626 UserSGPRCount += 2;
3627 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3628 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3629 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3630 Val, ValRange);
3631 UserSGPRCount += 2;
3632 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3633 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3635 ValRange);
3636 UserSGPRCount += 2;
3637 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3638 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3640 ValRange);
3641 UserSGPRCount += 2;
3642 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3643 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3644 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3645 Val, ValRange);
3646 UserSGPRCount += 1;
3647 } else if (ID == ".amdhsa_wavefront_size32") {
3648 if (IVersion.Major < 10)
3649 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3650 IDRange);
3651 EnableWavefrontSize32 = Val;
3652 PARSE_BITS_ENTRY(KD.kernel_code_properties,
3653 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3654 Val, ValRange);
3655 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3656 PARSE_BITS_ENTRY(
3657 KD.compute_pgm_rsrc2,
3658 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3659 ValRange);
3660 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3661 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3662 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3663 ValRange);
3664 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3666 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3667 ValRange);
3668 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3669 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3670 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3671 ValRange);
3672 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3674 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3675 ValRange);
3676 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3677 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3678 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3679 ValRange);
3680 } else if (ID == ".amdhsa_next_free_vgpr") {
3681 VGPRRange = ValRange;
3682 NextFreeVGPR = Val;
3683 } else if (ID == ".amdhsa_next_free_sgpr") {
3684 SGPRRange = ValRange;
3685 NextFreeSGPR = Val;
3686 } else if (ID == ".amdhsa_reserve_vcc") {
3687 if (!isUInt<1>(Val))
3688 return OutOfRangeError(ValRange);
3689 ReserveVCC = Val;
3690 } else if (ID == ".amdhsa_reserve_flat_scratch") {
3691 if (IVersion.Major < 7)
3692 return getParser().Error(IDRange.Start, "directive requires gfx7+",
3693 IDRange);
3694 if (!isUInt<1>(Val))
3695 return OutOfRangeError(ValRange);
3696 ReserveFlatScr = Val;
3697 } else if (ID == ".amdhsa_reserve_xnack_mask") {
3698 if (IVersion.Major < 8)
3699 return getParser().Error(IDRange.Start, "directive requires gfx8+",
3700 IDRange);
3701 if (!isUInt<1>(Val))
3702 return OutOfRangeError(ValRange);
3703 ReserveXNACK = Val;
3704 } else if (ID == ".amdhsa_float_round_mode_32") {
3705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3706 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3707 } else if (ID == ".amdhsa_float_round_mode_16_64") {
3708 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3709 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3710 } else if (ID == ".amdhsa_float_denorm_mode_32") {
3711 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3712 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3713 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3714 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3715 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3716 ValRange);
3717 } else if (ID == ".amdhsa_dx10_clamp") {
3718 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3719 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3720 } else if (ID == ".amdhsa_ieee_mode") {
3721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3722 Val, ValRange);
3723 } else if (ID == ".amdhsa_fp16_overflow") {
3724 if (IVersion.Major < 9)
3725 return getParser().Error(IDRange.Start, "directive requires gfx9+",
3726 IDRange);
3727 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3728 ValRange);
3729 } else if (ID == ".amdhsa_workgroup_processor_mode") {
3730 if (IVersion.Major < 10)
3731 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3732 IDRange);
3733 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3734 ValRange);
3735 } else if (ID == ".amdhsa_memory_ordered") {
3736 if (IVersion.Major < 10)
3737 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3738 IDRange);
3739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3740 ValRange);
3741 } else if (ID == ".amdhsa_forward_progress") {
3742 if (IVersion.Major < 10)
3743 return getParser().Error(IDRange.Start, "directive requires gfx10+",
3744 IDRange);
3745 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3746 ValRange);
3747 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3748 PARSE_BITS_ENTRY(
3749 KD.compute_pgm_rsrc2,
3750 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3751 ValRange);
3752 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3753 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3754 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3755 Val, ValRange);
3756 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3757 PARSE_BITS_ENTRY(
3758 KD.compute_pgm_rsrc2,
3759 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3760 ValRange);
3761 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3764 Val, ValRange);
3765 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3768 Val, ValRange);
3769 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3772 Val, ValRange);
3773 } else if (ID == ".amdhsa_exception_int_div_zero") {
3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3776 Val, ValRange);
3777 } else {
3778 return getParser().Error(IDRange.Start,
3779 "unknown .amdhsa_kernel directive", IDRange);
3782 #undef PARSE_BITS_ENTRY
3785 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3786 return TokError(".amdhsa_next_free_vgpr directive is required");
3788 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3789 return TokError(".amdhsa_next_free_sgpr directive is required");
3791 unsigned VGPRBlocks;
3792 unsigned SGPRBlocks;
3793 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3794 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3795 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3796 SGPRBlocks))
3797 return true;
3799 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3800 VGPRBlocks))
3801 return OutOfRangeError(VGPRRange);
3802 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3803 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3805 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3806 SGPRBlocks))
3807 return OutOfRangeError(SGPRRange);
3808 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3809 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3810 SGPRBlocks);
3812 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3813 return TokError("too many user SGPRs enabled");
3814 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3815 UserSGPRCount);
3817 getTargetStreamer().EmitAmdhsaKernelDescriptor(
3818 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3819 ReserveFlatScr, ReserveXNACK);
3820 return false;
3823 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3824 uint32_t Major;
3825 uint32_t Minor;
3827 if (ParseDirectiveMajorMinor(Major, Minor))
3828 return true;
3830 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3831 return false;
3834 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3835 uint32_t Major;
3836 uint32_t Minor;
3837 uint32_t Stepping;
3838 StringRef VendorName;
3839 StringRef ArchName;
3841 // If this directive has no arguments, then use the ISA version for the
3842 // targeted GPU.
3843 if (getLexer().is(AsmToken::EndOfStatement)) {
3844 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3845 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3846 ISA.Stepping,
3847 "AMD", "AMDGPU");
3848 return false;
3851 if (ParseDirectiveMajorMinor(Major, Minor))
3852 return true;
3854 if (getLexer().isNot(AsmToken::Comma))
3855 return TokError("stepping version number required, comma expected");
3856 Lex();
3858 if (ParseAsAbsoluteExpression(Stepping))
3859 return TokError("invalid stepping version");
3861 if (getLexer().isNot(AsmToken::Comma))
3862 return TokError("vendor name required, comma expected");
3863 Lex();
3865 if (getLexer().isNot(AsmToken::String))
3866 return TokError("invalid vendor name");
3868 VendorName = getLexer().getTok().getStringContents();
3869 Lex();
3871 if (getLexer().isNot(AsmToken::Comma))
3872 return TokError("arch name required, comma expected");
3873 Lex();
3875 if (getLexer().isNot(AsmToken::String))
3876 return TokError("invalid arch name");
3878 ArchName = getLexer().getTok().getStringContents();
3879 Lex();
3881 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3882 VendorName, ArchName);
3883 return false;
3886 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3887 amd_kernel_code_t &Header) {
3888 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3889 // assembly for backwards compatibility.
3890 if (ID == "max_scratch_backing_memory_byte_size") {
3891 Parser.eatToEndOfStatement();
3892 return false;
3895 SmallString<40> ErrStr;
3896 raw_svector_ostream Err(ErrStr);
3897 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3898 return TokError(Err.str());
3900 Lex();
3902 if (ID == "enable_wavefront_size32") {
3903 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3904 if (!isGFX10())
3905 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3906 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3907 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3908 } else {
3909 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3910 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3914 if (ID == "wavefront_size") {
3915 if (Header.wavefront_size == 5) {
3916 if (!isGFX10())
3917 return TokError("wavefront_size=5 is only allowed on GFX10+");
3918 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3919 return TokError("wavefront_size=5 requires +WavefrontSize32");
3920 } else if (Header.wavefront_size == 6) {
3921 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3922 return TokError("wavefront_size=6 requires +WavefrontSize64");
3926 if (ID == "enable_wgp_mode") {
3927 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3928 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3931 if (ID == "enable_mem_ordered") {
3932 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3933 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3936 if (ID == "enable_fwd_progress") {
3937 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3938 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3941 return false;
3944 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3945 amd_kernel_code_t Header;
3946 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3948 while (true) {
3949 // Lex EndOfStatement. This is in a while loop, because lexing a comment
3950 // will set the current token to EndOfStatement.
3951 while(getLexer().is(AsmToken::EndOfStatement))
3952 Lex();
3954 if (getLexer().isNot(AsmToken::Identifier))
3955 return TokError("expected value identifier or .end_amd_kernel_code_t");
3957 StringRef ID = getLexer().getTok().getIdentifier();
3958 Lex();
3960 if (ID == ".end_amd_kernel_code_t")
3961 break;
3963 if (ParseAMDKernelCodeTValue(ID, Header))
3964 return true;
3967 getTargetStreamer().EmitAMDKernelCodeT(Header);
3969 return false;
3972 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3973 if (getLexer().isNot(AsmToken::Identifier))
3974 return TokError("expected symbol name");
3976 StringRef KernelName = Parser.getTok().getString();
3978 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3979 ELF::STT_AMDGPU_HSA_KERNEL);
3980 Lex();
3981 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3982 KernelScope.initialize(getContext());
3983 return false;
3986 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3987 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3988 return Error(getParser().getTok().getLoc(),
3989 ".amd_amdgpu_isa directive is not available on non-amdgcn "
3990 "architectures");
3993 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3995 std::string ISAVersionStringFromSTI;
3996 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3997 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3999 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4000 return Error(getParser().getTok().getLoc(),
4001 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4002 "arguments specified through the command line");
4005 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4006 Lex();
4008 return false;
4011 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4012 const char *AssemblerDirectiveBegin;
4013 const char *AssemblerDirectiveEnd;
4014 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4015 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4016 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4017 HSAMD::V3::AssemblerDirectiveEnd)
4018 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4019 HSAMD::AssemblerDirectiveEnd);
4021 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4022 return Error(getParser().getTok().getLoc(),
4023 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4024 "not available on non-amdhsa OSes")).str());
4027 std::string HSAMetadataString;
4028 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4029 HSAMetadataString))
4030 return true;
4032 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4033 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4034 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4035 } else {
4036 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4037 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4040 return false;
4043 /// Common code to parse out a block of text (typically YAML) between start and
4044 /// end directives.
4045 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4046 const char *AssemblerDirectiveEnd,
4047 std::string &CollectString) {
4049 raw_string_ostream CollectStream(CollectString);
4051 getLexer().setSkipSpace(false);
4053 bool FoundEnd = false;
4054 while (!getLexer().is(AsmToken::Eof)) {
4055 while (getLexer().is(AsmToken::Space)) {
4056 CollectStream << getLexer().getTok().getString();
4057 Lex();
4060 if (getLexer().is(AsmToken::Identifier)) {
4061 StringRef ID = getLexer().getTok().getIdentifier();
4062 if (ID == AssemblerDirectiveEnd) {
4063 Lex();
4064 FoundEnd = true;
4065 break;
4069 CollectStream << Parser.parseStringToEndOfStatement()
4070 << getContext().getAsmInfo()->getSeparatorString();
4072 Parser.eatToEndOfStatement();
4075 getLexer().setSkipSpace(true);
4077 if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4078 return TokError(Twine("expected directive ") +
4079 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4082 CollectStream.flush();
4083 return false;
4086 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4087 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4088 std::string String;
4089 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4090 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4091 return true;
4093 auto PALMetadata = getTargetStreamer().getPALMetadata();
4094 if (!PALMetadata->setFromString(String))
4095 return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4096 return false;
4099 /// Parse the assembler directive for old linear-format PAL metadata.
4100 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4101 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4102 return Error(getParser().getTok().getLoc(),
4103 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4104 "not available on non-amdpal OSes")).str());
4107 auto PALMetadata = getTargetStreamer().getPALMetadata();
4108 PALMetadata->setLegacy();
4109 for (;;) {
4110 uint32_t Key, Value;
4111 if (ParseAsAbsoluteExpression(Key)) {
4112 return TokError(Twine("invalid value in ") +
4113 Twine(PALMD::AssemblerDirective));
4115 if (getLexer().isNot(AsmToken::Comma)) {
4116 return TokError(Twine("expected an even number of values in ") +
4117 Twine(PALMD::AssemblerDirective));
4119 Lex();
4120 if (ParseAsAbsoluteExpression(Value)) {
4121 return TokError(Twine("invalid value in ") +
4122 Twine(PALMD::AssemblerDirective));
4124 PALMetadata->setRegister(Key, Value);
4125 if (getLexer().isNot(AsmToken::Comma))
4126 break;
4127 Lex();
4129 return false;
4132 /// ParseDirectiveAMDGPULDS
4133 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4134 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4135 if (getParser().checkForValidSection())
4136 return true;
4138 StringRef Name;
4139 SMLoc NameLoc = getLexer().getLoc();
4140 if (getParser().parseIdentifier(Name))
4141 return TokError("expected identifier in directive");
4143 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4144 if (parseToken(AsmToken::Comma, "expected ','"))
4145 return true;
4147 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4149 int64_t Size;
4150 SMLoc SizeLoc = getLexer().getLoc();
4151 if (getParser().parseAbsoluteExpression(Size))
4152 return true;
4153 if (Size < 0)
4154 return Error(SizeLoc, "size must be non-negative");
4155 if (Size > LocalMemorySize)
4156 return Error(SizeLoc, "size is too large");
4158 int64_t Align = 4;
4159 if (getLexer().is(AsmToken::Comma)) {
4160 Lex();
4161 SMLoc AlignLoc = getLexer().getLoc();
4162 if (getParser().parseAbsoluteExpression(Align))
4163 return true;
4164 if (Align < 0 || !isPowerOf2_64(Align))
4165 return Error(AlignLoc, "alignment must be a power of two");
4167 // Alignment larger than the size of LDS is possible in theory, as long
4168 // as the linker manages to place to symbol at address 0, but we do want
4169 // to make sure the alignment fits nicely into a 32-bit integer.
4170 if (Align >= 1u << 31)
4171 return Error(AlignLoc, "alignment is too large");
4174 if (parseToken(AsmToken::EndOfStatement,
4175 "unexpected token in '.amdgpu_lds' directive"))
4176 return true;
4178 Symbol->redefineIfPossible();
4179 if (!Symbol->isUndefined())
4180 return Error(NameLoc, "invalid symbol redefinition");
4182 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4183 return false;
4186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4187 StringRef IDVal = DirectiveID.getString();
4189 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4190 if (IDVal == ".amdgcn_target")
4191 return ParseDirectiveAMDGCNTarget();
4193 if (IDVal == ".amdhsa_kernel")
4194 return ParseDirectiveAMDHSAKernel();
4196 // TODO: Restructure/combine with PAL metadata directive.
4197 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4198 return ParseDirectiveHSAMetadata();
4199 } else {
4200 if (IDVal == ".hsa_code_object_version")
4201 return ParseDirectiveHSACodeObjectVersion();
4203 if (IDVal == ".hsa_code_object_isa")
4204 return ParseDirectiveHSACodeObjectISA();
4206 if (IDVal == ".amd_kernel_code_t")
4207 return ParseDirectiveAMDKernelCodeT();
4209 if (IDVal == ".amdgpu_hsa_kernel")
4210 return ParseDirectiveAMDGPUHsaKernel();
4212 if (IDVal == ".amd_amdgpu_isa")
4213 return ParseDirectiveISAVersion();
4215 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4216 return ParseDirectiveHSAMetadata();
4219 if (IDVal == ".amdgpu_lds")
4220 return ParseDirectiveAMDGPULDS();
4222 if (IDVal == PALMD::AssemblerDirectiveBegin)
4223 return ParseDirectivePALMetadataBegin();
4225 if (IDVal == PALMD::AssemblerDirective)
4226 return ParseDirectivePALMetadata();
4228 return true;
4231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4232 unsigned RegNo) const {
4234 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4235 R.isValid(); ++R) {
4236 if (*R == RegNo)
4237 return isGFX9() || isGFX10();
4240 // GFX10 has 2 more SGPRs 104 and 105.
4241 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4242 R.isValid(); ++R) {
4243 if (*R == RegNo)
4244 return hasSGPR104_SGPR105();
4247 switch (RegNo) {
4248 case AMDGPU::SRC_SHARED_BASE:
4249 case AMDGPU::SRC_SHARED_LIMIT:
4250 case AMDGPU::SRC_PRIVATE_BASE:
4251 case AMDGPU::SRC_PRIVATE_LIMIT:
4252 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4253 return !isCI() && !isSI() && !isVI();
4254 case AMDGPU::TBA:
4255 case AMDGPU::TBA_LO:
4256 case AMDGPU::TBA_HI:
4257 case AMDGPU::TMA:
4258 case AMDGPU::TMA_LO:
4259 case AMDGPU::TMA_HI:
4260 return !isGFX9() && !isGFX10();
4261 case AMDGPU::XNACK_MASK:
4262 case AMDGPU::XNACK_MASK_LO:
4263 case AMDGPU::XNACK_MASK_HI:
4264 return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4265 case AMDGPU::SGPR_NULL:
4266 return isGFX10();
4267 default:
4268 break;
4271 if (isCI())
4272 return true;
4274 if (isSI() || isGFX10()) {
4275 // No flat_scr on SI.
4276 // On GFX10 flat scratch is not a valid register operand and can only be
4277 // accessed with s_setreg/s_getreg.
4278 switch (RegNo) {
4279 case AMDGPU::FLAT_SCR:
4280 case AMDGPU::FLAT_SCR_LO:
4281 case AMDGPU::FLAT_SCR_HI:
4282 return false;
4283 default:
4284 return true;
4288 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4289 // SI/CI have.
4290 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4291 R.isValid(); ++R) {
4292 if (*R == RegNo)
4293 return hasSGPR102_SGPR103();
4296 return true;
4299 OperandMatchResultTy
4300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4301 OperandMode Mode) {
4302 // Try to parse with a custom parser
4303 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4305 // If we successfully parsed the operand or if there as an error parsing,
4306 // we are done.
4308 // If we are parsing after we reach EndOfStatement then this means we
4309 // are appending default values to the Operands list. This is only done
4310 // by custom parser, so we shouldn't continue on to the generic parsing.
4311 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4312 getLexer().is(AsmToken::EndOfStatement))
4313 return ResTy;
4315 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4316 unsigned Prefix = Operands.size();
4317 SMLoc LBraceLoc = getTok().getLoc();
4318 Parser.Lex(); // eat the '['
4320 for (;;) {
4321 ResTy = parseReg(Operands);
4322 if (ResTy != MatchOperand_Success)
4323 return ResTy;
4325 if (getLexer().is(AsmToken::RBrac))
4326 break;
4328 if (getLexer().isNot(AsmToken::Comma))
4329 return MatchOperand_ParseFail;
4330 Parser.Lex();
4333 if (Operands.size() - Prefix > 1) {
4334 Operands.insert(Operands.begin() + Prefix,
4335 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4336 Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4337 getTok().getLoc()));
4340 Parser.Lex(); // eat the ']'
4341 return MatchOperand_Success;
4344 return parseRegOrImm(Operands);
4347 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4348 // Clear any forced encodings from the previous instruction.
4349 setForcedEncodingSize(0);
4350 setForcedDPP(false);
4351 setForcedSDWA(false);
4353 if (Name.endswith("_e64")) {
4354 setForcedEncodingSize(64);
4355 return Name.substr(0, Name.size() - 4);
4356 } else if (Name.endswith("_e32")) {
4357 setForcedEncodingSize(32);
4358 return Name.substr(0, Name.size() - 4);
4359 } else if (Name.endswith("_dpp")) {
4360 setForcedDPP(true);
4361 return Name.substr(0, Name.size() - 4);
4362 } else if (Name.endswith("_sdwa")) {
4363 setForcedSDWA(true);
4364 return Name.substr(0, Name.size() - 5);
4366 return Name;
4369 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4370 StringRef Name,
4371 SMLoc NameLoc, OperandVector &Operands) {
4372 // Add the instruction mnemonic
4373 Name = parseMnemonicSuffix(Name);
4374 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4376 bool IsMIMG = Name.startswith("image_");
4378 while (!getLexer().is(AsmToken::EndOfStatement)) {
4379 OperandMode Mode = OperandMode_Default;
4380 if (IsMIMG && isGFX10() && Operands.size() == 2)
4381 Mode = OperandMode_NSA;
4382 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4384 // Eat the comma or space if there is one.
4385 if (getLexer().is(AsmToken::Comma))
4386 Parser.Lex();
4388 switch (Res) {
4389 case MatchOperand_Success: break;
4390 case MatchOperand_ParseFail:
4391 // FIXME: use real operand location rather than the current location.
4392 Error(getLexer().getLoc(), "failed parsing operand.");
4393 while (!getLexer().is(AsmToken::EndOfStatement)) {
4394 Parser.Lex();
4396 return true;
4397 case MatchOperand_NoMatch:
4398 // FIXME: use real operand location rather than the current location.
4399 Error(getLexer().getLoc(), "not a valid operand.");
4400 while (!getLexer().is(AsmToken::EndOfStatement)) {
4401 Parser.Lex();
4403 return true;
4407 return false;
4410 //===----------------------------------------------------------------------===//
4411 // Utility functions
4412 //===----------------------------------------------------------------------===//
4414 OperandMatchResultTy
4415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4417 if (!trySkipId(Prefix, AsmToken::Colon))
4418 return MatchOperand_NoMatch;
4420 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4423 OperandMatchResultTy
4424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4425 AMDGPUOperand::ImmTy ImmTy,
4426 bool (*ConvertResult)(int64_t&)) {
4427 SMLoc S = getLoc();
4428 int64_t Value = 0;
4430 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4431 if (Res != MatchOperand_Success)
4432 return Res;
4434 if (ConvertResult && !ConvertResult(Value)) {
4435 Error(S, "invalid " + StringRef(Prefix) + " value.");
4438 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4439 return MatchOperand_Success;
4442 OperandMatchResultTy
4443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4444 OperandVector &Operands,
4445 AMDGPUOperand::ImmTy ImmTy,
4446 bool (*ConvertResult)(int64_t&)) {
4447 SMLoc S = getLoc();
4448 if (!trySkipId(Prefix, AsmToken::Colon))
4449 return MatchOperand_NoMatch;
4451 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4452 return MatchOperand_ParseFail;
4454 unsigned Val = 0;
4455 const unsigned MaxSize = 4;
4457 // FIXME: How to verify the number of elements matches the number of src
4458 // operands?
4459 for (int I = 0; ; ++I) {
4460 int64_t Op;
4461 SMLoc Loc = getLoc();
4462 if (!parseExpr(Op))
4463 return MatchOperand_ParseFail;
4465 if (Op != 0 && Op != 1) {
4466 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4467 return MatchOperand_ParseFail;
4470 Val |= (Op << I);
4472 if (trySkipToken(AsmToken::RBrac))
4473 break;
4475 if (I + 1 == MaxSize) {
4476 Error(getLoc(), "expected a closing square bracket");
4477 return MatchOperand_ParseFail;
4480 if (!skipToken(AsmToken::Comma, "expected a comma"))
4481 return MatchOperand_ParseFail;
4484 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4485 return MatchOperand_Success;
4488 OperandMatchResultTy
4489 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4490 AMDGPUOperand::ImmTy ImmTy) {
4491 int64_t Bit = 0;
4492 SMLoc S = Parser.getTok().getLoc();
4494 // We are at the end of the statement, and this is a default argument, so
4495 // use a default value.
4496 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4497 switch(getLexer().getKind()) {
4498 case AsmToken::Identifier: {
4499 StringRef Tok = Parser.getTok().getString();
4500 if (Tok == Name) {
4501 if (Tok == "r128" && isGFX9())
4502 Error(S, "r128 modifier is not supported on this GPU");
4503 if (Tok == "a16" && !isGFX9() && !isGFX10())
4504 Error(S, "a16 modifier is not supported on this GPU");
4505 Bit = 1;
4506 Parser.Lex();
4507 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4508 Bit = 0;
4509 Parser.Lex();
4510 } else {
4511 return MatchOperand_NoMatch;
4513 break;
4515 default:
4516 return MatchOperand_NoMatch;
4520 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4521 return MatchOperand_ParseFail;
4523 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4524 return MatchOperand_Success;
4527 static void addOptionalImmOperand(
4528 MCInst& Inst, const OperandVector& Operands,
4529 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4530 AMDGPUOperand::ImmTy ImmT,
4531 int64_t Default = 0) {
4532 auto i = OptionalIdx.find(ImmT);
4533 if (i != OptionalIdx.end()) {
4534 unsigned Idx = i->second;
4535 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4536 } else {
4537 Inst.addOperand(MCOperand::createImm(Default));
4541 OperandMatchResultTy
4542 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4543 if (getLexer().isNot(AsmToken::Identifier)) {
4544 return MatchOperand_NoMatch;
4546 StringRef Tok = Parser.getTok().getString();
4547 if (Tok != Prefix) {
4548 return MatchOperand_NoMatch;
4551 Parser.Lex();
4552 if (getLexer().isNot(AsmToken::Colon)) {
4553 return MatchOperand_ParseFail;
4556 Parser.Lex();
4557 if (getLexer().isNot(AsmToken::Identifier)) {
4558 return MatchOperand_ParseFail;
4561 Value = Parser.getTok().getString();
4562 return MatchOperand_Success;
4565 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4566 // values to live in a joint format operand in the MCInst encoding.
4567 OperandMatchResultTy
4568 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4569 SMLoc S = Parser.getTok().getLoc();
4570 int64_t Dfmt = 0, Nfmt = 0;
4571 // dfmt and nfmt can appear in either order, and each is optional.
4572 bool GotDfmt = false, GotNfmt = false;
4573 while (!GotDfmt || !GotNfmt) {
4574 if (!GotDfmt) {
4575 auto Res = parseIntWithPrefix("dfmt", Dfmt);
4576 if (Res != MatchOperand_NoMatch) {
4577 if (Res != MatchOperand_Success)
4578 return Res;
4579 if (Dfmt >= 16) {
4580 Error(Parser.getTok().getLoc(), "out of range dfmt");
4581 return MatchOperand_ParseFail;
4583 GotDfmt = true;
4584 Parser.Lex();
4585 continue;
4588 if (!GotNfmt) {
4589 auto Res = parseIntWithPrefix("nfmt", Nfmt);
4590 if (Res != MatchOperand_NoMatch) {
4591 if (Res != MatchOperand_Success)
4592 return Res;
4593 if (Nfmt >= 8) {
4594 Error(Parser.getTok().getLoc(), "out of range nfmt");
4595 return MatchOperand_ParseFail;
4597 GotNfmt = true;
4598 Parser.Lex();
4599 continue;
4602 break;
4604 if (!GotDfmt && !GotNfmt)
4605 return MatchOperand_NoMatch;
4606 auto Format = Dfmt | Nfmt << 4;
4607 Operands.push_back(
4608 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4609 return MatchOperand_Success;
4612 //===----------------------------------------------------------------------===//
4613 // ds
4614 //===----------------------------------------------------------------------===//
4616 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4617 const OperandVector &Operands) {
4618 OptionalImmIndexMap OptionalIdx;
4620 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4621 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4623 // Add the register arguments
4624 if (Op.isReg()) {
4625 Op.addRegOperands(Inst, 1);
4626 continue;
4629 // Handle optional arguments
4630 OptionalIdx[Op.getImmTy()] = i;
4633 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4637 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4640 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4641 bool IsGdsHardcoded) {
4642 OptionalImmIndexMap OptionalIdx;
4644 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4645 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4647 // Add the register arguments
4648 if (Op.isReg()) {
4649 Op.addRegOperands(Inst, 1);
4650 continue;
4653 if (Op.isToken() && Op.getToken() == "gds") {
4654 IsGdsHardcoded = true;
4655 continue;
4658 // Handle optional arguments
4659 OptionalIdx[Op.getImmTy()] = i;
4662 AMDGPUOperand::ImmTy OffsetType =
4663 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4664 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4665 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4666 AMDGPUOperand::ImmTyOffset;
4668 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4670 if (!IsGdsHardcoded) {
4671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4673 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4676 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4677 OptionalImmIndexMap OptionalIdx;
4679 unsigned OperandIdx[4];
4680 unsigned EnMask = 0;
4681 int SrcIdx = 0;
4683 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4684 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4686 // Add the register arguments
4687 if (Op.isReg()) {
4688 assert(SrcIdx < 4);
4689 OperandIdx[SrcIdx] = Inst.size();
4690 Op.addRegOperands(Inst, 1);
4691 ++SrcIdx;
4692 continue;
4695 if (Op.isOff()) {
4696 assert(SrcIdx < 4);
4697 OperandIdx[SrcIdx] = Inst.size();
4698 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4699 ++SrcIdx;
4700 continue;
4703 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4704 Op.addImmOperands(Inst, 1);
4705 continue;
4708 if (Op.isToken() && Op.getToken() == "done")
4709 continue;
4711 // Handle optional arguments
4712 OptionalIdx[Op.getImmTy()] = i;
4715 assert(SrcIdx == 4);
4717 bool Compr = false;
4718 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4719 Compr = true;
4720 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4721 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4722 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4725 for (auto i = 0; i < SrcIdx; ++i) {
4726 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4727 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4734 Inst.addOperand(MCOperand::createImm(EnMask));
4737 //===----------------------------------------------------------------------===//
4738 // s_waitcnt
4739 //===----------------------------------------------------------------------===//
4741 static bool
4742 encodeCnt(
4743 const AMDGPU::IsaVersion ISA,
4744 int64_t &IntVal,
4745 int64_t CntVal,
4746 bool Saturate,
4747 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4748 unsigned (*decode)(const IsaVersion &Version, unsigned))
4750 bool Failed = false;
4752 IntVal = encode(ISA, IntVal, CntVal);
4753 if (CntVal != decode(ISA, IntVal)) {
4754 if (Saturate) {
4755 IntVal = encode(ISA, IntVal, -1);
4756 } else {
4757 Failed = true;
4760 return Failed;
4763 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4765 SMLoc CntLoc = getLoc();
4766 StringRef CntName = getTokenStr();
4768 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4769 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4770 return false;
4772 int64_t CntVal;
4773 SMLoc ValLoc = getLoc();
4774 if (!parseExpr(CntVal))
4775 return false;
4777 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4779 bool Failed = true;
4780 bool Sat = CntName.endswith("_sat");
4782 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4783 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4784 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4785 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4786 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4787 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4788 } else {
4789 Error(CntLoc, "invalid counter name " + CntName);
4790 return false;
4793 if (Failed) {
4794 Error(ValLoc, "too large value for " + CntName);
4795 return false;
4798 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4799 return false;
4801 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4802 if (isToken(AsmToken::EndOfStatement)) {
4803 Error(getLoc(), "expected a counter name");
4804 return false;
4808 return true;
4811 OperandMatchResultTy
4812 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4813 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4814 int64_t Waitcnt = getWaitcntBitMask(ISA);
4815 SMLoc S = getLoc();
4817 // If parse failed, do not return error code
4818 // to avoid excessive error messages.
4819 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4820 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4821 } else {
4822 parseExpr(Waitcnt);
4825 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4826 return MatchOperand_Success;
4829 bool
4830 AMDGPUOperand::isSWaitCnt() const {
4831 return isImm();
4834 //===----------------------------------------------------------------------===//
4835 // hwreg
4836 //===----------------------------------------------------------------------===//
4838 bool
4839 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4840 int64_t &Offset,
4841 int64_t &Width) {
4842 using namespace llvm::AMDGPU::Hwreg;
4844 // The register may be specified by name or using a numeric code
4845 if (isToken(AsmToken::Identifier) &&
4846 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4847 HwReg.IsSymbolic = true;
4848 lex(); // skip message name
4849 } else if (!parseExpr(HwReg.Id)) {
4850 return false;
4853 if (trySkipToken(AsmToken::RParen))
4854 return true;
4856 // parse optional params
4857 return
4858 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4859 parseExpr(Offset) &&
4860 skipToken(AsmToken::Comma, "expected a comma") &&
4861 parseExpr(Width) &&
4862 skipToken(AsmToken::RParen, "expected a closing parenthesis");
4865 bool
4866 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4867 const int64_t Offset,
4868 const int64_t Width,
4869 const SMLoc Loc) {
4871 using namespace llvm::AMDGPU::Hwreg;
4873 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4874 Error(Loc, "specified hardware register is not supported on this GPU");
4875 return false;
4876 } else if (!isValidHwreg(HwReg.Id)) {
4877 Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4878 return false;
4879 } else if (!isValidHwregOffset(Offset)) {
4880 Error(Loc, "invalid bit offset: only 5-bit values are legal");
4881 return false;
4882 } else if (!isValidHwregWidth(Width)) {
4883 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4884 return false;
4886 return true;
4889 OperandMatchResultTy
4890 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4891 using namespace llvm::AMDGPU::Hwreg;
4893 int64_t ImmVal = 0;
4894 SMLoc Loc = getLoc();
4896 // If parse failed, do not return error code
4897 // to avoid excessive error messages.
4898 if (trySkipId("hwreg", AsmToken::LParen)) {
4899 OperandInfoTy HwReg(ID_UNKNOWN_);
4900 int64_t Offset = OFFSET_DEFAULT_;
4901 int64_t Width = WIDTH_DEFAULT_;
4902 if (parseHwregBody(HwReg, Offset, Width) &&
4903 validateHwreg(HwReg, Offset, Width, Loc)) {
4904 ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4906 } else if (parseExpr(ImmVal)) {
4907 if (ImmVal < 0 || !isUInt<16>(ImmVal))
4908 Error(Loc, "invalid immediate: only 16-bit values are legal");
4911 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4912 return MatchOperand_Success;
4915 bool AMDGPUOperand::isHwreg() const {
4916 return isImmTy(ImmTyHwreg);
4919 //===----------------------------------------------------------------------===//
4920 // sendmsg
4921 //===----------------------------------------------------------------------===//
4923 bool
4924 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4925 OperandInfoTy &Op,
4926 OperandInfoTy &Stream) {
4927 using namespace llvm::AMDGPU::SendMsg;
4929 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4930 Msg.IsSymbolic = true;
4931 lex(); // skip message name
4932 } else if (!parseExpr(Msg.Id)) {
4933 return false;
4936 if (trySkipToken(AsmToken::Comma)) {
4937 Op.IsDefined = true;
4938 if (isToken(AsmToken::Identifier) &&
4939 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4940 lex(); // skip operation name
4941 } else if (!parseExpr(Op.Id)) {
4942 return false;
4945 if (trySkipToken(AsmToken::Comma)) {
4946 Stream.IsDefined = true;
4947 if (!parseExpr(Stream.Id))
4948 return false;
4952 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4955 bool
4956 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4957 const OperandInfoTy &Op,
4958 const OperandInfoTy &Stream,
4959 const SMLoc S) {
4960 using namespace llvm::AMDGPU::SendMsg;
4962 // Validation strictness depends on whether message is specified
4963 // in a symbolc or in a numeric form. In the latter case
4964 // only encoding possibility is checked.
4965 bool Strict = Msg.IsSymbolic;
4967 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4968 Error(S, "invalid message id");
4969 return false;
4970 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4971 Error(S, Op.IsDefined ?
4972 "message does not support operations" :
4973 "missing message operation");
4974 return false;
4975 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4976 Error(S, "invalid operation id");
4977 return false;
4978 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4979 Error(S, "message operation does not support streams");
4980 return false;
4981 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4982 Error(S, "invalid message stream id");
4983 return false;
4985 return true;
4988 OperandMatchResultTy
4989 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4990 using namespace llvm::AMDGPU::SendMsg;
4992 int64_t ImmVal = 0;
4993 SMLoc Loc = getLoc();
4995 // If parse failed, do not return error code
4996 // to avoid excessive error messages.
4997 if (trySkipId("sendmsg", AsmToken::LParen)) {
4998 OperandInfoTy Msg(ID_UNKNOWN_);
4999 OperandInfoTy Op(OP_NONE_);
5000 OperandInfoTy Stream(STREAM_ID_NONE_);
5001 if (parseSendMsgBody(Msg, Op, Stream) &&
5002 validateSendMsg(Msg, Op, Stream, Loc)) {
5003 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5005 } else if (parseExpr(ImmVal)) {
5006 if (ImmVal < 0 || !isUInt<16>(ImmVal))
5007 Error(Loc, "invalid immediate: only 16-bit values are legal");
5010 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5011 return MatchOperand_Success;
5014 bool AMDGPUOperand::isSendMsg() const {
5015 return isImmTy(ImmTySendMsg);
5018 //===----------------------------------------------------------------------===//
5019 // v_interp
5020 //===----------------------------------------------------------------------===//
5022 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5023 if (getLexer().getKind() != AsmToken::Identifier)
5024 return MatchOperand_NoMatch;
5026 StringRef Str = Parser.getTok().getString();
5027 int Slot = StringSwitch<int>(Str)
5028 .Case("p10", 0)
5029 .Case("p20", 1)
5030 .Case("p0", 2)
5031 .Default(-1);
5033 SMLoc S = Parser.getTok().getLoc();
5034 if (Slot == -1)
5035 return MatchOperand_ParseFail;
5037 Parser.Lex();
5038 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5039 AMDGPUOperand::ImmTyInterpSlot));
5040 return MatchOperand_Success;
5043 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5044 if (getLexer().getKind() != AsmToken::Identifier)
5045 return MatchOperand_NoMatch;
5047 StringRef Str = Parser.getTok().getString();
5048 if (!Str.startswith("attr"))
5049 return MatchOperand_NoMatch;
5051 StringRef Chan = Str.take_back(2);
5052 int AttrChan = StringSwitch<int>(Chan)
5053 .Case(".x", 0)
5054 .Case(".y", 1)
5055 .Case(".z", 2)
5056 .Case(".w", 3)
5057 .Default(-1);
5058 if (AttrChan == -1)
5059 return MatchOperand_ParseFail;
5061 Str = Str.drop_back(2).drop_front(4);
5063 uint8_t Attr;
5064 if (Str.getAsInteger(10, Attr))
5065 return MatchOperand_ParseFail;
5067 SMLoc S = Parser.getTok().getLoc();
5068 Parser.Lex();
5069 if (Attr > 63) {
5070 Error(S, "out of bounds attr");
5071 return MatchOperand_Success;
5074 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5076 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5077 AMDGPUOperand::ImmTyInterpAttr));
5078 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5079 AMDGPUOperand::ImmTyAttrChan));
5080 return MatchOperand_Success;
5083 //===----------------------------------------------------------------------===//
5084 // exp
5085 //===----------------------------------------------------------------------===//
5087 void AMDGPUAsmParser::errorExpTgt() {
5088 Error(Parser.getTok().getLoc(), "invalid exp target");
5091 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5092 uint8_t &Val) {
5093 if (Str == "null") {
5094 Val = 9;
5095 return MatchOperand_Success;
5098 if (Str.startswith("mrt")) {
5099 Str = Str.drop_front(3);
5100 if (Str == "z") { // == mrtz
5101 Val = 8;
5102 return MatchOperand_Success;
5105 if (Str.getAsInteger(10, Val))
5106 return MatchOperand_ParseFail;
5108 if (Val > 7)
5109 errorExpTgt();
5111 return MatchOperand_Success;
5114 if (Str.startswith("pos")) {
5115 Str = Str.drop_front(3);
5116 if (Str.getAsInteger(10, Val))
5117 return MatchOperand_ParseFail;
5119 if (Val > 4 || (Val == 4 && !isGFX10()))
5120 errorExpTgt();
5122 Val += 12;
5123 return MatchOperand_Success;
5126 if (isGFX10() && Str == "prim") {
5127 Val = 20;
5128 return MatchOperand_Success;
5131 if (Str.startswith("param")) {
5132 Str = Str.drop_front(5);
5133 if (Str.getAsInteger(10, Val))
5134 return MatchOperand_ParseFail;
5136 if (Val >= 32)
5137 errorExpTgt();
5139 Val += 32;
5140 return MatchOperand_Success;
5143 if (Str.startswith("invalid_target_")) {
5144 Str = Str.drop_front(15);
5145 if (Str.getAsInteger(10, Val))
5146 return MatchOperand_ParseFail;
5148 errorExpTgt();
5149 return MatchOperand_Success;
5152 return MatchOperand_NoMatch;
5155 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5156 uint8_t Val;
5157 StringRef Str = Parser.getTok().getString();
5159 auto Res = parseExpTgtImpl(Str, Val);
5160 if (Res != MatchOperand_Success)
5161 return Res;
5163 SMLoc S = Parser.getTok().getLoc();
5164 Parser.Lex();
5166 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5167 AMDGPUOperand::ImmTyExpTgt));
5168 return MatchOperand_Success;
5171 //===----------------------------------------------------------------------===//
5172 // parser helpers
5173 //===----------------------------------------------------------------------===//
5175 bool
5176 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5177 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5180 bool
5181 AMDGPUAsmParser::isId(const StringRef Id) const {
5182 return isId(getToken(), Id);
5185 bool
5186 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5187 return getTokenKind() == Kind;
5190 bool
5191 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5192 if (isId(Id)) {
5193 lex();
5194 return true;
5196 return false;
5199 bool
5200 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5201 if (isId(Id) && peekToken().is(Kind)) {
5202 lex();
5203 lex();
5204 return true;
5206 return false;
5209 bool
5210 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5211 if (isToken(Kind)) {
5212 lex();
5213 return true;
5215 return false;
5218 bool
5219 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5220 const StringRef ErrMsg) {
5221 if (!trySkipToken(Kind)) {
5222 Error(getLoc(), ErrMsg);
5223 return false;
5225 return true;
5228 bool
5229 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5230 return !getParser().parseAbsoluteExpression(Imm);
5233 bool
5234 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5235 SMLoc S = getLoc();
5237 const MCExpr *Expr;
5238 if (Parser.parseExpression(Expr))
5239 return false;
5241 int64_t IntVal;
5242 if (Expr->evaluateAsAbsolute(IntVal)) {
5243 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5244 } else {
5245 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5247 return true;
5250 bool
5251 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5252 if (isToken(AsmToken::String)) {
5253 Val = getToken().getStringContents();
5254 lex();
5255 return true;
5256 } else {
5257 Error(getLoc(), ErrMsg);
5258 return false;
5262 AsmToken
5263 AMDGPUAsmParser::getToken() const {
5264 return Parser.getTok();
5267 AsmToken
5268 AMDGPUAsmParser::peekToken() {
5269 return getLexer().peekTok();
5272 void
5273 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5274 auto TokCount = getLexer().peekTokens(Tokens);
5276 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5277 Tokens[Idx] = AsmToken(AsmToken::Error, "");
5280 AsmToken::TokenKind
5281 AMDGPUAsmParser::getTokenKind() const {
5282 return getLexer().getKind();
5285 SMLoc
5286 AMDGPUAsmParser::getLoc() const {
5287 return getToken().getLoc();
5290 StringRef
5291 AMDGPUAsmParser::getTokenStr() const {
5292 return getToken().getString();
5295 void
5296 AMDGPUAsmParser::lex() {
5297 Parser.Lex();
5300 //===----------------------------------------------------------------------===//
5301 // swizzle
5302 //===----------------------------------------------------------------------===//
5304 LLVM_READNONE
5305 static unsigned
5306 encodeBitmaskPerm(const unsigned AndMask,
5307 const unsigned OrMask,
5308 const unsigned XorMask) {
5309 using namespace llvm::AMDGPU::Swizzle;
5311 return BITMASK_PERM_ENC |
5312 (AndMask << BITMASK_AND_SHIFT) |
5313 (OrMask << BITMASK_OR_SHIFT) |
5314 (XorMask << BITMASK_XOR_SHIFT);
5317 bool
5318 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5319 const unsigned MinVal,
5320 const unsigned MaxVal,
5321 const StringRef ErrMsg) {
5322 for (unsigned i = 0; i < OpNum; ++i) {
5323 if (!skipToken(AsmToken::Comma, "expected a comma")){
5324 return false;
5326 SMLoc ExprLoc = Parser.getTok().getLoc();
5327 if (!parseExpr(Op[i])) {
5328 return false;
5330 if (Op[i] < MinVal || Op[i] > MaxVal) {
5331 Error(ExprLoc, ErrMsg);
5332 return false;
5336 return true;
5339 bool
5340 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5341 using namespace llvm::AMDGPU::Swizzle;
5343 int64_t Lane[LANE_NUM];
5344 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5345 "expected a 2-bit lane id")) {
5346 Imm = QUAD_PERM_ENC;
5347 for (unsigned I = 0; I < LANE_NUM; ++I) {
5348 Imm |= Lane[I] << (LANE_SHIFT * I);
5350 return true;
5352 return false;
5355 bool
5356 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5357 using namespace llvm::AMDGPU::Swizzle;
5359 SMLoc S = Parser.getTok().getLoc();
5360 int64_t GroupSize;
5361 int64_t LaneIdx;
5363 if (!parseSwizzleOperands(1, &GroupSize,
5364 2, 32,
5365 "group size must be in the interval [2,32]")) {
5366 return false;
5368 if (!isPowerOf2_64(GroupSize)) {
5369 Error(S, "group size must be a power of two");
5370 return false;
5372 if (parseSwizzleOperands(1, &LaneIdx,
5373 0, GroupSize - 1,
5374 "lane id must be in the interval [0,group size - 1]")) {
5375 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5376 return true;
5378 return false;
5381 bool
5382 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5383 using namespace llvm::AMDGPU::Swizzle;
5385 SMLoc S = Parser.getTok().getLoc();
5386 int64_t GroupSize;
5388 if (!parseSwizzleOperands(1, &GroupSize,
5389 2, 32, "group size must be in the interval [2,32]")) {
5390 return false;
5392 if (!isPowerOf2_64(GroupSize)) {
5393 Error(S, "group size must be a power of two");
5394 return false;
5397 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5398 return true;
5401 bool
5402 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5403 using namespace llvm::AMDGPU::Swizzle;
5405 SMLoc S = Parser.getTok().getLoc();
5406 int64_t GroupSize;
5408 if (!parseSwizzleOperands(1, &GroupSize,
5409 1, 16, "group size must be in the interval [1,16]")) {
5410 return false;
5412 if (!isPowerOf2_64(GroupSize)) {
5413 Error(S, "group size must be a power of two");
5414 return false;
5417 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5418 return true;
5421 bool
5422 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5423 using namespace llvm::AMDGPU::Swizzle;
5425 if (!skipToken(AsmToken::Comma, "expected a comma")) {
5426 return false;
5429 StringRef Ctl;
5430 SMLoc StrLoc = Parser.getTok().getLoc();
5431 if (!parseString(Ctl)) {
5432 return false;
5434 if (Ctl.size() != BITMASK_WIDTH) {
5435 Error(StrLoc, "expected a 5-character mask");
5436 return false;
5439 unsigned AndMask = 0;
5440 unsigned OrMask = 0;
5441 unsigned XorMask = 0;
5443 for (size_t i = 0; i < Ctl.size(); ++i) {
5444 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5445 switch(Ctl[i]) {
5446 default:
5447 Error(StrLoc, "invalid mask");
5448 return false;
5449 case '0':
5450 break;
5451 case '1':
5452 OrMask |= Mask;
5453 break;
5454 case 'p':
5455 AndMask |= Mask;
5456 break;
5457 case 'i':
5458 AndMask |= Mask;
5459 XorMask |= Mask;
5460 break;
5464 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5465 return true;
5468 bool
5469 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5471 SMLoc OffsetLoc = Parser.getTok().getLoc();
5473 if (!parseExpr(Imm)) {
5474 return false;
5476 if (!isUInt<16>(Imm)) {
5477 Error(OffsetLoc, "expected a 16-bit offset");
5478 return false;
5480 return true;
5483 bool
5484 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5485 using namespace llvm::AMDGPU::Swizzle;
5487 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5489 SMLoc ModeLoc = Parser.getTok().getLoc();
5490 bool Ok = false;
5492 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5493 Ok = parseSwizzleQuadPerm(Imm);
5494 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5495 Ok = parseSwizzleBitmaskPerm(Imm);
5496 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5497 Ok = parseSwizzleBroadcast(Imm);
5498 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5499 Ok = parseSwizzleSwap(Imm);
5500 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5501 Ok = parseSwizzleReverse(Imm);
5502 } else {
5503 Error(ModeLoc, "expected a swizzle mode");
5506 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5509 return false;
5512 OperandMatchResultTy
5513 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5514 SMLoc S = Parser.getTok().getLoc();
5515 int64_t Imm = 0;
5517 if (trySkipId("offset")) {
5519 bool Ok = false;
5520 if (skipToken(AsmToken::Colon, "expected a colon")) {
5521 if (trySkipId("swizzle")) {
5522 Ok = parseSwizzleMacro(Imm);
5523 } else {
5524 Ok = parseSwizzleOffset(Imm);
5528 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5530 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5531 } else {
5532 // Swizzle "offset" operand is optional.
5533 // If it is omitted, try parsing other optional operands.
5534 return parseOptionalOpr(Operands);
5538 bool
5539 AMDGPUOperand::isSwizzle() const {
5540 return isImmTy(ImmTySwizzle);
5543 //===----------------------------------------------------------------------===//
5544 // VGPR Index Mode
5545 //===----------------------------------------------------------------------===//
5547 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5549 using namespace llvm::AMDGPU::VGPRIndexMode;
5551 if (trySkipToken(AsmToken::RParen)) {
5552 return OFF;
5555 int64_t Imm = 0;
5557 while (true) {
5558 unsigned Mode = 0;
5559 SMLoc S = Parser.getTok().getLoc();
5561 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5562 if (trySkipId(IdSymbolic[ModeId])) {
5563 Mode = 1 << ModeId;
5564 break;
5568 if (Mode == 0) {
5569 Error(S, (Imm == 0)?
5570 "expected a VGPR index mode or a closing parenthesis" :
5571 "expected a VGPR index mode");
5572 break;
5575 if (Imm & Mode) {
5576 Error(S, "duplicate VGPR index mode");
5577 break;
5579 Imm |= Mode;
5581 if (trySkipToken(AsmToken::RParen))
5582 break;
5583 if (!skipToken(AsmToken::Comma,
5584 "expected a comma or a closing parenthesis"))
5585 break;
5588 return Imm;
5591 OperandMatchResultTy
5592 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5594 int64_t Imm = 0;
5595 SMLoc S = Parser.getTok().getLoc();
5597 if (getLexer().getKind() == AsmToken::Identifier &&
5598 Parser.getTok().getString() == "gpr_idx" &&
5599 getLexer().peekTok().is(AsmToken::LParen)) {
5601 Parser.Lex();
5602 Parser.Lex();
5604 // If parse failed, trigger an error but do not return error code
5605 // to avoid excessive error messages.
5606 Imm = parseGPRIdxMacro();
5608 } else {
5609 if (getParser().parseAbsoluteExpression(Imm))
5610 return MatchOperand_NoMatch;
5611 if (Imm < 0 || !isUInt<4>(Imm)) {
5612 Error(S, "invalid immediate: only 4-bit values are legal");
5616 Operands.push_back(
5617 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5618 return MatchOperand_Success;
5621 bool AMDGPUOperand::isGPRIdxMode() const {
5622 return isImmTy(ImmTyGprIdxMode);
5625 //===----------------------------------------------------------------------===//
5626 // sopp branch targets
5627 //===----------------------------------------------------------------------===//
5629 OperandMatchResultTy
5630 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5632 // Make sure we are not parsing something
5633 // that looks like a label or an expression but is not.
5634 // This will improve error messages.
5635 if (isRegister() || isModifier())
5636 return MatchOperand_NoMatch;
5638 if (parseExpr(Operands)) {
5640 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5641 assert(Opr.isImm() || Opr.isExpr());
5642 SMLoc Loc = Opr.getStartLoc();
5644 // Currently we do not support arbitrary expressions as branch targets.
5645 // Only labels and absolute expressions are accepted.
5646 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5647 Error(Loc, "expected an absolute expression or a label");
5648 } else if (Opr.isImm() && !Opr.isS16Imm()) {
5649 Error(Loc, "expected a 16-bit signed jump offset");
5653 return MatchOperand_Success; // avoid excessive error messages
5656 //===----------------------------------------------------------------------===//
5657 // Boolean holding registers
5658 //===----------------------------------------------------------------------===//
5660 OperandMatchResultTy
5661 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5662 return parseReg(Operands);
5665 //===----------------------------------------------------------------------===//
5666 // mubuf
5667 //===----------------------------------------------------------------------===//
5669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5670 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5674 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5677 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5678 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5681 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5682 const OperandVector &Operands,
5683 bool IsAtomic,
5684 bool IsAtomicReturn,
5685 bool IsLds) {
5686 bool IsLdsOpcode = IsLds;
5687 bool HasLdsModifier = false;
5688 OptionalImmIndexMap OptionalIdx;
5689 assert(IsAtomicReturn ? IsAtomic : true);
5690 unsigned FirstOperandIdx = 1;
5692 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5693 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5695 // Add the register arguments
5696 if (Op.isReg()) {
5697 Op.addRegOperands(Inst, 1);
5698 // Insert a tied src for atomic return dst.
5699 // This cannot be postponed as subsequent calls to
5700 // addImmOperands rely on correct number of MC operands.
5701 if (IsAtomicReturn && i == FirstOperandIdx)
5702 Op.addRegOperands(Inst, 1);
5703 continue;
5706 // Handle the case where soffset is an immediate
5707 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5708 Op.addImmOperands(Inst, 1);
5709 continue;
5712 HasLdsModifier |= Op.isLDS();
5714 // Handle tokens like 'offen' which are sometimes hard-coded into the
5715 // asm string. There are no MCInst operands for these.
5716 if (Op.isToken()) {
5717 continue;
5719 assert(Op.isImm());
5721 // Handle optional arguments
5722 OptionalIdx[Op.getImmTy()] = i;
5725 // This is a workaround for an llvm quirk which may result in an
5726 // incorrect instruction selection. Lds and non-lds versions of
5727 // MUBUF instructions are identical except that lds versions
5728 // have mandatory 'lds' modifier. However this modifier follows
5729 // optional modifiers and llvm asm matcher regards this 'lds'
5730 // modifier as an optional one. As a result, an lds version
5731 // of opcode may be selected even if it has no 'lds' modifier.
5732 if (IsLdsOpcode && !HasLdsModifier) {
5733 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5734 if (NoLdsOpcode != -1) { // Got lds version - correct it.
5735 Inst.setOpcode(NoLdsOpcode);
5736 IsLdsOpcode = false;
5740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5741 if (!IsAtomic) { // glc is hard-coded.
5742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5744 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5746 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5750 if (isGFX10())
5751 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5754 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5755 OptionalImmIndexMap OptionalIdx;
5757 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5758 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5760 // Add the register arguments
5761 if (Op.isReg()) {
5762 Op.addRegOperands(Inst, 1);
5763 continue;
5766 // Handle the case where soffset is an immediate
5767 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5768 Op.addImmOperands(Inst, 1);
5769 continue;
5772 // Handle tokens like 'offen' which are sometimes hard-coded into the
5773 // asm string. There are no MCInst operands for these.
5774 if (Op.isToken()) {
5775 continue;
5777 assert(Op.isImm());
5779 // Handle optional arguments
5780 OptionalIdx[Op.getImmTy()] = i;
5783 addOptionalImmOperand(Inst, Operands, OptionalIdx,
5784 AMDGPUOperand::ImmTyOffset);
5785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5790 if (isGFX10())
5791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5794 //===----------------------------------------------------------------------===//
5795 // mimg
5796 //===----------------------------------------------------------------------===//
5798 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5799 bool IsAtomic) {
5800 unsigned I = 1;
5801 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5802 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5803 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5806 if (IsAtomic) {
5807 // Add src, same as dst
5808 assert(Desc.getNumDefs() == 1);
5809 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5812 OptionalImmIndexMap OptionalIdx;
5814 for (unsigned E = Operands.size(); I != E; ++I) {
5815 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5817 // Add the register arguments
5818 if (Op.isReg()) {
5819 Op.addRegOperands(Inst, 1);
5820 } else if (Op.isImmModifier()) {
5821 OptionalIdx[Op.getImmTy()] = I;
5822 } else if (!Op.isToken()) {
5823 llvm_unreachable("unexpected operand type");
5827 bool IsGFX10 = isGFX10();
5829 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5830 if (IsGFX10)
5831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5833 if (IsGFX10)
5834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5835 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5838 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5839 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5840 if (!IsGFX10)
5841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5842 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5845 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5846 cvtMIMG(Inst, Operands, true);
5849 //===----------------------------------------------------------------------===//
5850 // smrd
5851 //===----------------------------------------------------------------------===//
5853 bool AMDGPUOperand::isSMRDOffset8() const {
5854 return isImm() && isUInt<8>(getImm());
5857 bool AMDGPUOperand::isSMRDOffset20() const {
5858 return isImm() && isUInt<20>(getImm());
5861 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5862 // 32-bit literals are only supported on CI and we only want to use them
5863 // when the offset is > 8-bits.
5864 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5868 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5872 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5876 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5880 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5883 //===----------------------------------------------------------------------===//
5884 // vop3
5885 //===----------------------------------------------------------------------===//
5887 static bool ConvertOmodMul(int64_t &Mul) {
5888 if (Mul != 1 && Mul != 2 && Mul != 4)
5889 return false;
5891 Mul >>= 1;
5892 return true;
5895 static bool ConvertOmodDiv(int64_t &Div) {
5896 if (Div == 1) {
5897 Div = 0;
5898 return true;
5901 if (Div == 2) {
5902 Div = 3;
5903 return true;
5906 return false;
5909 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5910 if (BoundCtrl == 0) {
5911 BoundCtrl = 1;
5912 return true;
5915 if (BoundCtrl == -1) {
5916 BoundCtrl = 0;
5917 return true;
5920 return false;
5923 // Note: the order in this table matches the order of operands in AsmString.
5924 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5925 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
5926 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
5927 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
5928 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5929 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5930 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
5931 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
5932 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
5933 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5934 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
5935 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5936 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
5937 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
5938 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
5939 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5940 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
5941 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
5942 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5943 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
5944 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
5945 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5946 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
5947 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
5948 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
5949 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
5950 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
5951 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5952 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5953 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5954 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
5955 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5956 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5957 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5958 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5959 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5960 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5961 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5962 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5963 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5964 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5965 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5966 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5967 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5970 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5971 unsigned size = Operands.size();
5972 assert(size > 0);
5974 OperandMatchResultTy res = parseOptionalOpr(Operands);
5976 // This is a hack to enable hardcoded mandatory operands which follow
5977 // optional operands.
5979 // Current design assumes that all operands after the first optional operand
5980 // are also optional. However implementation of some instructions violates
5981 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5983 // To alleviate this problem, we have to (implicitly) parse extra operands
5984 // to make sure autogenerated parser of custom operands never hit hardcoded
5985 // mandatory operands.
5987 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5989 // We have parsed the first optional operand.
5990 // Parse as many operands as necessary to skip all mandatory operands.
5992 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5993 if (res != MatchOperand_Success ||
5994 getLexer().is(AsmToken::EndOfStatement)) break;
5995 if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5996 res = parseOptionalOpr(Operands);
6000 return res;
6003 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6004 OperandMatchResultTy res;
6005 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6006 // try to parse any optional operand here
6007 if (Op.IsBit) {
6008 res = parseNamedBit(Op.Name, Operands, Op.Type);
6009 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6010 res = parseOModOperand(Operands);
6011 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6012 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6013 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6014 res = parseSDWASel(Operands, Op.Name, Op.Type);
6015 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6016 res = parseSDWADstUnused(Operands);
6017 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6018 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6019 Op.Type == AMDGPUOperand::ImmTyNegLo ||
6020 Op.Type == AMDGPUOperand::ImmTyNegHi) {
6021 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6022 Op.ConvertResult);
6023 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6024 res = parseDim(Operands);
6025 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6026 res = parseDfmtNfmt(Operands);
6027 } else {
6028 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6030 if (res != MatchOperand_NoMatch) {
6031 return res;
6034 return MatchOperand_NoMatch;
6037 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6038 StringRef Name = Parser.getTok().getString();
6039 if (Name == "mul") {
6040 return parseIntWithPrefix("mul", Operands,
6041 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6044 if (Name == "div") {
6045 return parseIntWithPrefix("div", Operands,
6046 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6049 return MatchOperand_NoMatch;
6052 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6053 cvtVOP3P(Inst, Operands);
6055 int Opc = Inst.getOpcode();
6057 int SrcNum;
6058 const int Ops[] = { AMDGPU::OpName::src0,
6059 AMDGPU::OpName::src1,
6060 AMDGPU::OpName::src2 };
6061 for (SrcNum = 0;
6062 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6063 ++SrcNum);
6064 assert(SrcNum > 0);
6066 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6067 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6069 if ((OpSel & (1 << SrcNum)) != 0) {
6070 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6071 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6072 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6076 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6077 // 1. This operand is input modifiers
6078 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6079 // 2. This is not last operand
6080 && Desc.NumOperands > (OpNum + 1)
6081 // 3. Next operand is register class
6082 && Desc.OpInfo[OpNum + 1].RegClass != -1
6083 // 4. Next register is not tied to any other operand
6084 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6087 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6089 OptionalImmIndexMap OptionalIdx;
6090 unsigned Opc = Inst.getOpcode();
6092 unsigned I = 1;
6093 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6094 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6095 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6098 for (unsigned E = Operands.size(); I != E; ++I) {
6099 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6100 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6101 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6102 } else if (Op.isInterpSlot() ||
6103 Op.isInterpAttr() ||
6104 Op.isAttrChan()) {
6105 Inst.addOperand(MCOperand::createImm(Op.getImm()));
6106 } else if (Op.isImmModifier()) {
6107 OptionalIdx[Op.getImmTy()] = I;
6108 } else {
6109 llvm_unreachable("unhandled operand type");
6113 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6117 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6121 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6126 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6127 OptionalImmIndexMap &OptionalIdx) {
6128 unsigned Opc = Inst.getOpcode();
6130 unsigned I = 1;
6131 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6132 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6133 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6136 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6137 // This instruction has src modifiers
6138 for (unsigned E = Operands.size(); I != E; ++I) {
6139 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6140 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6141 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6142 } else if (Op.isImmModifier()) {
6143 OptionalIdx[Op.getImmTy()] = I;
6144 } else if (Op.isRegOrImm()) {
6145 Op.addRegOrImmOperands(Inst, 1);
6146 } else {
6147 llvm_unreachable("unhandled operand type");
6150 } else {
6151 // No src modifiers
6152 for (unsigned E = Operands.size(); I != E; ++I) {
6153 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6154 if (Op.isMod()) {
6155 OptionalIdx[Op.getImmTy()] = I;
6156 } else {
6157 Op.addRegOrImmOperands(Inst, 1);
6162 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6166 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6170 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6171 // it has src2 register operand that is tied to dst operand
6172 // we don't allow modifiers for this operand in assembler so src2_modifiers
6173 // should be 0.
6174 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6175 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6176 Opc == AMDGPU::V_MAC_F32_e64_vi ||
6177 Opc == AMDGPU::V_MAC_F16_e64_vi ||
6178 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6179 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6180 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6181 auto it = Inst.begin();
6182 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6183 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6184 ++it;
6185 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6189 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6190 OptionalImmIndexMap OptionalIdx;
6191 cvtVOP3(Inst, Operands, OptionalIdx);
6194 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6195 const OperandVector &Operands) {
6196 OptionalImmIndexMap OptIdx;
6197 const int Opc = Inst.getOpcode();
6198 const MCInstrDesc &Desc = MII.get(Opc);
6200 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6202 cvtVOP3(Inst, Operands, OptIdx);
6204 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6205 assert(!IsPacked);
6206 Inst.addOperand(Inst.getOperand(0));
6209 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6210 // instruction, and then figure out where to actually put the modifiers
6212 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6214 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6215 if (OpSelHiIdx != -1) {
6216 int DefaultVal = IsPacked ? -1 : 0;
6217 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6218 DefaultVal);
6221 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6222 if (NegLoIdx != -1) {
6223 assert(IsPacked);
6224 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6225 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6228 const int Ops[] = { AMDGPU::OpName::src0,
6229 AMDGPU::OpName::src1,
6230 AMDGPU::OpName::src2 };
6231 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6232 AMDGPU::OpName::src1_modifiers,
6233 AMDGPU::OpName::src2_modifiers };
6235 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6237 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6238 unsigned OpSelHi = 0;
6239 unsigned NegLo = 0;
6240 unsigned NegHi = 0;
6242 if (OpSelHiIdx != -1) {
6243 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6246 if (NegLoIdx != -1) {
6247 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6248 NegLo = Inst.getOperand(NegLoIdx).getImm();
6249 NegHi = Inst.getOperand(NegHiIdx).getImm();
6252 for (int J = 0; J < 3; ++J) {
6253 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6254 if (OpIdx == -1)
6255 break;
6257 uint32_t ModVal = 0;
6259 if ((OpSel & (1 << J)) != 0)
6260 ModVal |= SISrcMods::OP_SEL_0;
6262 if ((OpSelHi & (1 << J)) != 0)
6263 ModVal |= SISrcMods::OP_SEL_1;
6265 if ((NegLo & (1 << J)) != 0)
6266 ModVal |= SISrcMods::NEG;
6268 if ((NegHi & (1 << J)) != 0)
6269 ModVal |= SISrcMods::NEG_HI;
6271 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6273 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6277 //===----------------------------------------------------------------------===//
6278 // dpp
6279 //===----------------------------------------------------------------------===//
6281 bool AMDGPUOperand::isDPP8() const {
6282 return isImmTy(ImmTyDPP8);
6285 bool AMDGPUOperand::isDPPCtrl() const {
6286 using namespace AMDGPU::DPP;
6288 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6289 if (result) {
6290 int64_t Imm = getImm();
6291 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6292 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6293 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6294 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6295 (Imm == DppCtrl::WAVE_SHL1) ||
6296 (Imm == DppCtrl::WAVE_ROL1) ||
6297 (Imm == DppCtrl::WAVE_SHR1) ||
6298 (Imm == DppCtrl::WAVE_ROR1) ||
6299 (Imm == DppCtrl::ROW_MIRROR) ||
6300 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6301 (Imm == DppCtrl::BCAST15) ||
6302 (Imm == DppCtrl::BCAST31) ||
6303 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6304 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6306 return false;
6309 //===----------------------------------------------------------------------===//
6310 // mAI
6311 //===----------------------------------------------------------------------===//
6313 bool AMDGPUOperand::isBLGP() const {
6314 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6317 bool AMDGPUOperand::isCBSZ() const {
6318 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6321 bool AMDGPUOperand::isABID() const {
6322 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6325 bool AMDGPUOperand::isS16Imm() const {
6326 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6329 bool AMDGPUOperand::isU16Imm() const {
6330 return isImm() && isUInt<16>(getImm());
6333 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6334 if (!isGFX10())
6335 return MatchOperand_NoMatch;
6337 SMLoc S = Parser.getTok().getLoc();
6339 if (getLexer().isNot(AsmToken::Identifier))
6340 return MatchOperand_NoMatch;
6341 if (getLexer().getTok().getString() != "dim")
6342 return MatchOperand_NoMatch;
6344 Parser.Lex();
6345 if (getLexer().isNot(AsmToken::Colon))
6346 return MatchOperand_ParseFail;
6348 Parser.Lex();
6350 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6351 // integer.
6352 std::string Token;
6353 if (getLexer().is(AsmToken::Integer)) {
6354 SMLoc Loc = getLexer().getTok().getEndLoc();
6355 Token = getLexer().getTok().getString();
6356 Parser.Lex();
6357 if (getLexer().getTok().getLoc() != Loc)
6358 return MatchOperand_ParseFail;
6360 if (getLexer().isNot(AsmToken::Identifier))
6361 return MatchOperand_ParseFail;
6362 Token += getLexer().getTok().getString();
6364 StringRef DimId = Token;
6365 if (DimId.startswith("SQ_RSRC_IMG_"))
6366 DimId = DimId.substr(12);
6368 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6369 if (!DimInfo)
6370 return MatchOperand_ParseFail;
6372 Parser.Lex();
6374 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6375 AMDGPUOperand::ImmTyDim));
6376 return MatchOperand_Success;
6379 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6380 SMLoc S = Parser.getTok().getLoc();
6381 StringRef Prefix;
6383 if (getLexer().getKind() == AsmToken::Identifier) {
6384 Prefix = Parser.getTok().getString();
6385 } else {
6386 return MatchOperand_NoMatch;
6389 if (Prefix != "dpp8")
6390 return parseDPPCtrl(Operands);
6391 if (!isGFX10())
6392 return MatchOperand_NoMatch;
6394 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6396 int64_t Sels[8];
6398 Parser.Lex();
6399 if (getLexer().isNot(AsmToken::Colon))
6400 return MatchOperand_ParseFail;
6402 Parser.Lex();
6403 if (getLexer().isNot(AsmToken::LBrac))
6404 return MatchOperand_ParseFail;
6406 Parser.Lex();
6407 if (getParser().parseAbsoluteExpression(Sels[0]))
6408 return MatchOperand_ParseFail;
6409 if (0 > Sels[0] || 7 < Sels[0])
6410 return MatchOperand_ParseFail;
6412 for (size_t i = 1; i < 8; ++i) {
6413 if (getLexer().isNot(AsmToken::Comma))
6414 return MatchOperand_ParseFail;
6416 Parser.Lex();
6417 if (getParser().parseAbsoluteExpression(Sels[i]))
6418 return MatchOperand_ParseFail;
6419 if (0 > Sels[i] || 7 < Sels[i])
6420 return MatchOperand_ParseFail;
6423 if (getLexer().isNot(AsmToken::RBrac))
6424 return MatchOperand_ParseFail;
6425 Parser.Lex();
6427 unsigned DPP8 = 0;
6428 for (size_t i = 0; i < 8; ++i)
6429 DPP8 |= (Sels[i] << (i * 3));
6431 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6432 return MatchOperand_Success;
6435 OperandMatchResultTy
6436 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6437 using namespace AMDGPU::DPP;
6439 SMLoc S = Parser.getTok().getLoc();
6440 StringRef Prefix;
6441 int64_t Int;
6443 if (getLexer().getKind() == AsmToken::Identifier) {
6444 Prefix = Parser.getTok().getString();
6445 } else {
6446 return MatchOperand_NoMatch;
6449 if (Prefix == "row_mirror") {
6450 Int = DppCtrl::ROW_MIRROR;
6451 Parser.Lex();
6452 } else if (Prefix == "row_half_mirror") {
6453 Int = DppCtrl::ROW_HALF_MIRROR;
6454 Parser.Lex();
6455 } else {
6456 // Check to prevent parseDPPCtrlOps from eating invalid tokens
6457 if (Prefix != "quad_perm"
6458 && Prefix != "row_shl"
6459 && Prefix != "row_shr"
6460 && Prefix != "row_ror"
6461 && Prefix != "wave_shl"
6462 && Prefix != "wave_rol"
6463 && Prefix != "wave_shr"
6464 && Prefix != "wave_ror"
6465 && Prefix != "row_bcast"
6466 && Prefix != "row_share"
6467 && Prefix != "row_xmask") {
6468 return MatchOperand_NoMatch;
6471 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6472 return MatchOperand_NoMatch;
6474 if (!isVI() && !isGFX9() &&
6475 (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6476 Prefix == "wave_rol" || Prefix == "wave_ror" ||
6477 Prefix == "row_bcast"))
6478 return MatchOperand_NoMatch;
6480 Parser.Lex();
6481 if (getLexer().isNot(AsmToken::Colon))
6482 return MatchOperand_ParseFail;
6484 if (Prefix == "quad_perm") {
6485 // quad_perm:[%d,%d,%d,%d]
6486 Parser.Lex();
6487 if (getLexer().isNot(AsmToken::LBrac))
6488 return MatchOperand_ParseFail;
6489 Parser.Lex();
6491 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6492 return MatchOperand_ParseFail;
6494 for (int i = 0; i < 3; ++i) {
6495 if (getLexer().isNot(AsmToken::Comma))
6496 return MatchOperand_ParseFail;
6497 Parser.Lex();
6499 int64_t Temp;
6500 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6501 return MatchOperand_ParseFail;
6502 const int shift = i*2 + 2;
6503 Int += (Temp << shift);
6506 if (getLexer().isNot(AsmToken::RBrac))
6507 return MatchOperand_ParseFail;
6508 Parser.Lex();
6509 } else {
6510 // sel:%d
6511 Parser.Lex();
6512 if (getParser().parseAbsoluteExpression(Int))
6513 return MatchOperand_ParseFail;
6515 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6516 Int |= DppCtrl::ROW_SHL0;
6517 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6518 Int |= DppCtrl::ROW_SHR0;
6519 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6520 Int |= DppCtrl::ROW_ROR0;
6521 } else if (Prefix == "wave_shl" && 1 == Int) {
6522 Int = DppCtrl::WAVE_SHL1;
6523 } else if (Prefix == "wave_rol" && 1 == Int) {
6524 Int = DppCtrl::WAVE_ROL1;
6525 } else if (Prefix == "wave_shr" && 1 == Int) {
6526 Int = DppCtrl::WAVE_SHR1;
6527 } else if (Prefix == "wave_ror" && 1 == Int) {
6528 Int = DppCtrl::WAVE_ROR1;
6529 } else if (Prefix == "row_bcast") {
6530 if (Int == 15) {
6531 Int = DppCtrl::BCAST15;
6532 } else if (Int == 31) {
6533 Int = DppCtrl::BCAST31;
6534 } else {
6535 return MatchOperand_ParseFail;
6537 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6538 Int |= DppCtrl::ROW_SHARE_FIRST;
6539 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6540 Int |= DppCtrl::ROW_XMASK_FIRST;
6541 } else {
6542 return MatchOperand_ParseFail;
6547 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6548 return MatchOperand_Success;
6551 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6552 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6556 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6560 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6564 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6568 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6571 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6572 OptionalImmIndexMap OptionalIdx;
6574 unsigned I = 1;
6575 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6576 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6577 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6580 int Fi = 0;
6581 for (unsigned E = Operands.size(); I != E; ++I) {
6582 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6583 MCOI::TIED_TO);
6584 if (TiedTo != -1) {
6585 assert((unsigned)TiedTo < Inst.getNumOperands());
6586 // handle tied old or src2 for MAC instructions
6587 Inst.addOperand(Inst.getOperand(TiedTo));
6589 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6590 // Add the register arguments
6591 if (Op.isReg() && validateVccOperand(Op.getReg())) {
6592 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6593 // Skip it.
6594 continue;
6597 if (IsDPP8) {
6598 if (Op.isDPP8()) {
6599 Op.addImmOperands(Inst, 1);
6600 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6601 Op.addRegWithFPInputModsOperands(Inst, 2);
6602 } else if (Op.isFI()) {
6603 Fi = Op.getImm();
6604 } else if (Op.isReg()) {
6605 Op.addRegOperands(Inst, 1);
6606 } else {
6607 llvm_unreachable("Invalid operand type");
6609 } else {
6610 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6611 Op.addRegWithFPInputModsOperands(Inst, 2);
6612 } else if (Op.isDPPCtrl()) {
6613 Op.addImmOperands(Inst, 1);
6614 } else if (Op.isImm()) {
6615 // Handle optional arguments
6616 OptionalIdx[Op.getImmTy()] = I;
6617 } else {
6618 llvm_unreachable("Invalid operand type");
6623 if (IsDPP8) {
6624 using namespace llvm::AMDGPU::DPP;
6625 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6626 } else {
6627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6630 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6636 //===----------------------------------------------------------------------===//
6637 // sdwa
6638 //===----------------------------------------------------------------------===//
6640 OperandMatchResultTy
6641 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6642 AMDGPUOperand::ImmTy Type) {
6643 using namespace llvm::AMDGPU::SDWA;
6645 SMLoc S = Parser.getTok().getLoc();
6646 StringRef Value;
6647 OperandMatchResultTy res;
6649 res = parseStringWithPrefix(Prefix, Value);
6650 if (res != MatchOperand_Success) {
6651 return res;
6654 int64_t Int;
6655 Int = StringSwitch<int64_t>(Value)
6656 .Case("BYTE_0", SdwaSel::BYTE_0)
6657 .Case("BYTE_1", SdwaSel::BYTE_1)
6658 .Case("BYTE_2", SdwaSel::BYTE_2)
6659 .Case("BYTE_3", SdwaSel::BYTE_3)
6660 .Case("WORD_0", SdwaSel::WORD_0)
6661 .Case("WORD_1", SdwaSel::WORD_1)
6662 .Case("DWORD", SdwaSel::DWORD)
6663 .Default(0xffffffff);
6664 Parser.Lex(); // eat last token
6666 if (Int == 0xffffffff) {
6667 return MatchOperand_ParseFail;
6670 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6671 return MatchOperand_Success;
6674 OperandMatchResultTy
6675 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6676 using namespace llvm::AMDGPU::SDWA;
6678 SMLoc S = Parser.getTok().getLoc();
6679 StringRef Value;
6680 OperandMatchResultTy res;
6682 res = parseStringWithPrefix("dst_unused", Value);
6683 if (res != MatchOperand_Success) {
6684 return res;
6687 int64_t Int;
6688 Int = StringSwitch<int64_t>(Value)
6689 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6690 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6691 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6692 .Default(0xffffffff);
6693 Parser.Lex(); // eat last token
6695 if (Int == 0xffffffff) {
6696 return MatchOperand_ParseFail;
6699 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6700 return MatchOperand_Success;
6703 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6704 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6707 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6708 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6711 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6712 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6715 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6716 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6719 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6720 uint64_t BasicInstType, bool skipVcc) {
6721 using namespace llvm::AMDGPU::SDWA;
6723 OptionalImmIndexMap OptionalIdx;
6724 bool skippedVcc = false;
6726 unsigned I = 1;
6727 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6728 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6729 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6732 for (unsigned E = Operands.size(); I != E; ++I) {
6733 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6734 if (skipVcc && !skippedVcc && Op.isReg() &&
6735 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6736 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6737 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6738 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6739 // Skip VCC only if we didn't skip it on previous iteration.
6740 if (BasicInstType == SIInstrFlags::VOP2 &&
6741 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6742 skippedVcc = true;
6743 continue;
6744 } else if (BasicInstType == SIInstrFlags::VOPC &&
6745 Inst.getNumOperands() == 0) {
6746 skippedVcc = true;
6747 continue;
6750 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6751 Op.addRegOrImmWithInputModsOperands(Inst, 2);
6752 } else if (Op.isImm()) {
6753 // Handle optional arguments
6754 OptionalIdx[Op.getImmTy()] = I;
6755 } else {
6756 llvm_unreachable("Invalid operand type");
6758 skippedVcc = false;
6761 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6762 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6763 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6764 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6765 switch (BasicInstType) {
6766 case SIInstrFlags::VOP1:
6767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6768 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6769 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6771 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6772 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6774 break;
6776 case SIInstrFlags::VOP2:
6777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6778 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6785 break;
6787 case SIInstrFlags::VOPC:
6788 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6790 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6792 break;
6794 default:
6795 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6799 // special case v_mac_{f16, f32}:
6800 // it has src2 register operand that is tied to dst operand
6801 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6802 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
6803 auto it = Inst.begin();
6804 std::advance(
6805 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6806 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6810 //===----------------------------------------------------------------------===//
6811 // mAI
6812 //===----------------------------------------------------------------------===//
6814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6815 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6819 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6823 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6826 /// Force static initialization.
6827 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6828 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6829 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6832 #define GET_REGISTER_MATCHER
6833 #define GET_MATCHER_IMPLEMENTATION
6834 #define GET_MNEMONIC_SPELL_CHECKER
6835 #include "AMDGPUGenAsmMatcher.inc"
6837 // This fuction should be defined after auto-generated include so that we have
6838 // MatchClassKind enum defined
6839 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6840 unsigned Kind) {
6841 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6842 // But MatchInstructionImpl() expects to meet token and fails to validate
6843 // operand. This method checks if we are given immediate operand but expect to
6844 // get corresponding token.
6845 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6846 switch (Kind) {
6847 case MCK_addr64:
6848 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6849 case MCK_gds:
6850 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6851 case MCK_lds:
6852 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6853 case MCK_glc:
6854 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6855 case MCK_idxen:
6856 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6857 case MCK_offen:
6858 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6859 case MCK_SSrcB32:
6860 // When operands have expression values, they will return true for isToken,
6861 // because it is not possible to distinguish between a token and an
6862 // expression at parse time. MatchInstructionImpl() will always try to
6863 // match an operand as a token, when isToken returns true, and when the
6864 // name of the expression is not a valid token, the match will fail,
6865 // so we need to handle it here.
6866 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6867 case MCK_SSrcF32:
6868 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6869 case MCK_SoppBrTarget:
6870 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6871 case MCK_VReg32OrOff:
6872 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6873 case MCK_InterpSlot:
6874 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6875 case MCK_Attr:
6876 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6877 case MCK_AttrChan:
6878 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6879 default:
6880 return Match_InvalidOperand;
6884 //===----------------------------------------------------------------------===//
6885 // endpgm
6886 //===----------------------------------------------------------------------===//
6888 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6889 SMLoc S = Parser.getTok().getLoc();
6890 int64_t Imm = 0;
6892 if (!parseExpr(Imm)) {
6893 // The operand is optional, if not present default to 0
6894 Imm = 0;
6897 if (!isUInt<16>(Imm)) {
6898 Error(S, "expected a 16-bit value");
6899 return MatchOperand_ParseFail;
6902 Operands.push_back(
6903 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6904 return MatchOperand_Success;
6907 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }