1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
44 using namespace llvm::AMDGPU
;
45 using namespace llvm::amdhsa
;
49 class AMDGPUAsmParser
;
51 enum RegisterKind
{ IS_UNKNOWN
, IS_VGPR
, IS_SGPR
, IS_AGPR
, IS_TTMP
, IS_SPECIAL
};
53 //===----------------------------------------------------------------------===//
55 //===----------------------------------------------------------------------===//
57 class AMDGPUOperand
: public MCParsedAsmOperand
{
65 SMLoc StartLoc
, EndLoc
;
66 const AMDGPUAsmParser
*AsmParser
;
69 AMDGPUOperand(KindTy Kind_
, const AMDGPUAsmParser
*AsmParser_
)
70 : Kind(Kind_
), AsmParser(AsmParser_
) {}
72 using Ptr
= std::unique_ptr
<AMDGPUOperand
>;
79 bool hasFPModifiers() const { return Abs
|| Neg
; }
80 bool hasIntModifiers() const { return Sext
; }
81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83 int64_t getFPModifiersOperand() const {
85 Operand
|= Abs
? SISrcMods::ABS
: 0u;
86 Operand
|= Neg
? SISrcMods::NEG
: 0u;
90 int64_t getIntModifiersOperand() const {
92 Operand
|= Sext
? SISrcMods::SEXT
: 0u;
96 int64_t getModifiersOperand() const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 && "fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 } else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
108 friend raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
);
170 // Immediate operand kind.
171 // It helps to identify the location of an offending operand after an error.
172 // Note that regular literals and mandatory literals (KImm) must be handled
173 // differently. When looking for an offending operand, we should usually
174 // ignore mandatory literals because they are part of the instruction and
175 // cannot be changed. Report location of mandatory operands only for VOPD,
176 // when both OpX and OpY have a KImm and there are no other literals.
180 ImmKindTyMandatoryLiteral
,
194 mutable ImmKindTy Kind
;
211 bool isToken() const override
{ return Kind
== Token
; }
213 bool isSymbolRefExpr() const {
214 return isExpr() && Expr
&& isa
<MCSymbolRefExpr
>(Expr
);
217 bool isImm() const override
{
218 return Kind
== Immediate
;
221 void setImmKindNone() const {
223 Imm
.Kind
= ImmKindTyNone
;
226 void setImmKindLiteral() const {
228 Imm
.Kind
= ImmKindTyLiteral
;
231 void setImmKindMandatoryLiteral() const {
233 Imm
.Kind
= ImmKindTyMandatoryLiteral
;
236 void setImmKindConst() const {
238 Imm
.Kind
= ImmKindTyConst
;
241 bool IsImmKindLiteral() const {
242 return isImm() && Imm
.Kind
== ImmKindTyLiteral
;
245 bool IsImmKindMandatoryLiteral() const {
246 return isImm() && Imm
.Kind
== ImmKindTyMandatoryLiteral
;
249 bool isImmKindConst() const {
250 return isImm() && Imm
.Kind
== ImmKindTyConst
;
253 bool isInlinableImm(MVT type
) const;
254 bool isLiteralImm(MVT type
) const;
256 bool isRegKind() const {
257 return Kind
== Register
;
260 bool isReg() const override
{
261 return isRegKind() && !hasModifiers();
264 bool isRegOrInline(unsigned RCID
, MVT type
) const {
265 return isRegClass(RCID
) || isInlinableImm(type
);
268 bool isRegOrImmWithInputMods(unsigned RCID
, MVT type
) const {
269 return isRegOrInline(RCID
, type
) || isLiteralImm(type
);
272 bool isRegOrImmWithInt16InputMods() const {
273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
276 bool isRegOrImmWithInt32InputMods() const {
277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
280 bool isRegOrInlineImmWithInt16InputMods() const {
281 return isRegOrInline(AMDGPU::VS_32RegClassID
, MVT::i16
);
284 bool isRegOrInlineImmWithInt32InputMods() const {
285 return isRegOrInline(AMDGPU::VS_32RegClassID
, MVT::i32
);
288 bool isRegOrImmWithInt64InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
292 bool isRegOrImmWithFP16InputMods() const {
293 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
296 bool isRegOrImmWithFP32InputMods() const {
297 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
300 bool isRegOrImmWithFP64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
304 bool isRegOrInlineImmWithFP16InputMods() const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID
, MVT::f16
);
308 bool isRegOrInlineImmWithFP32InputMods() const {
309 return isRegOrInline(AMDGPU::VS_32RegClassID
, MVT::f32
);
313 bool isVReg() const {
314 return isRegClass(AMDGPU::VGPR_32RegClassID
) ||
315 isRegClass(AMDGPU::VReg_64RegClassID
) ||
316 isRegClass(AMDGPU::VReg_96RegClassID
) ||
317 isRegClass(AMDGPU::VReg_128RegClassID
) ||
318 isRegClass(AMDGPU::VReg_160RegClassID
) ||
319 isRegClass(AMDGPU::VReg_192RegClassID
) ||
320 isRegClass(AMDGPU::VReg_256RegClassID
) ||
321 isRegClass(AMDGPU::VReg_512RegClassID
) ||
322 isRegClass(AMDGPU::VReg_1024RegClassID
);
325 bool isVReg32() const {
326 return isRegClass(AMDGPU::VGPR_32RegClassID
);
329 bool isVReg32OrOff() const {
330 return isOff() || isVReg32();
333 bool isNull() const {
334 return isRegKind() && getReg() == AMDGPU::SGPR_NULL
;
337 bool isVRegWithInputMods() const;
338 bool isT16VRegWithInputMods() const;
340 bool isSDWAOperand(MVT type
) const;
341 bool isSDWAFP16Operand() const;
342 bool isSDWAFP32Operand() const;
343 bool isSDWAInt16Operand() const;
344 bool isSDWAInt32Operand() const;
346 bool isImmTy(ImmTy ImmT
) const {
347 return isImm() && Imm
.Type
== ImmT
;
350 bool isImmLiteral() const { return isImmTy(ImmTyNone
); }
352 bool isImmModifier() const {
353 return isImm() && Imm
.Type
!= ImmTyNone
;
356 bool isClampSI() const { return isImmTy(ImmTyClampSI
); }
357 bool isOModSI() const { return isImmTy(ImmTyOModSI
); }
358 bool isDMask() const { return isImmTy(ImmTyDMask
); }
359 bool isDim() const { return isImmTy(ImmTyDim
); }
360 bool isUNorm() const { return isImmTy(ImmTyUNorm
); }
361 bool isDA() const { return isImmTy(ImmTyDA
); }
362 bool isR128A16() const { return isImmTy(ImmTyR128A16
); }
363 bool isA16() const { return isImmTy(ImmTyA16
); }
364 bool isLWE() const { return isImmTy(ImmTyLWE
); }
365 bool isOff() const { return isImmTy(ImmTyOff
); }
366 bool isExpTgt() const { return isImmTy(ImmTyExpTgt
); }
367 bool isExpVM() const { return isImmTy(ImmTyExpVM
); }
368 bool isExpCompr() const { return isImmTy(ImmTyExpCompr
); }
369 bool isOffen() const { return isImmTy(ImmTyOffen
); }
370 bool isIdxen() const { return isImmTy(ImmTyIdxen
); }
371 bool isAddr64() const { return isImmTy(ImmTyAddr64
); }
372 bool isOffset() const { return isImmTy(ImmTyOffset
) && isUInt
<16>(getImm()); }
373 bool isOffset0() const { return isImmTy(ImmTyOffset0
) && isUInt
<8>(getImm()); }
374 bool isOffset1() const { return isImmTy(ImmTyOffset1
) && isUInt
<8>(getImm()); }
375 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod
); }
376 bool isFlatOffset() const { return isImmTy(ImmTyOffset
) || isImmTy(ImmTyInstOffset
); }
377 bool isGDS() const { return isImmTy(ImmTyGDS
); }
378 bool isLDS() const { return isImmTy(ImmTyLDS
); }
379 bool isCPol() const { return isImmTy(ImmTyCPol
); }
380 bool isTFE() const { return isImmTy(ImmTyTFE
); }
381 bool isD16() const { return isImmTy(ImmTyD16
); }
382 bool isFORMAT() const { return isImmTy(ImmTyFORMAT
) && isUInt
<7>(getImm()); }
383 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask
); }
384 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask
); }
385 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl
); }
386 bool isDppFI() const { return isImmTy(ImmTyDppFI
); }
387 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel
); }
388 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel
); }
389 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel
); }
390 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused
); }
391 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot
); }
392 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr
); }
393 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan
); }
394 bool isOpSel() const { return isImmTy(ImmTyOpSel
); }
395 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi
); }
396 bool isNegLo() const { return isImmTy(ImmTyNegLo
); }
397 bool isNegHi() const { return isImmTy(ImmTyNegHi
); }
398 bool isHigh() const { return isImmTy(ImmTyHigh
); }
400 bool isRegOrImm() const {
401 return isReg() || isImm();
404 bool isRegClass(unsigned RCID
) const;
406 bool isInlineValue() const;
408 bool isRegOrInlineNoMods(unsigned RCID
, MVT type
) const {
409 return isRegOrInline(RCID
, type
) && !hasModifiers();
412 bool isSCSrcB16() const {
413 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i16
);
416 bool isSCSrcV2B16() const {
420 bool isSCSrcB32() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i32
);
424 bool isSCSrcB64() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::i64
);
428 bool isBoolReg() const;
430 bool isSCSrcF16() const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f16
);
434 bool isSCSrcV2F16() const {
438 bool isSCSrcF32() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f32
);
442 bool isSCSrcF64() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::f64
);
446 bool isSSrcB32() const {
447 return isSCSrcB32() || isLiteralImm(MVT::i32
) || isExpr();
450 bool isSSrcB16() const {
451 return isSCSrcB16() || isLiteralImm(MVT::i16
);
454 bool isSSrcV2B16() const {
455 llvm_unreachable("cannot happen");
459 bool isSSrcB64() const {
460 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
462 return isSCSrcB64() || isLiteralImm(MVT::i64
);
465 bool isSSrcF32() const {
466 return isSCSrcB32() || isLiteralImm(MVT::f32
) || isExpr();
469 bool isSSrcF64() const {
470 return isSCSrcB64() || isLiteralImm(MVT::f64
);
473 bool isSSrcF16() const {
474 return isSCSrcB16() || isLiteralImm(MVT::f16
);
477 bool isSSrcV2F16() const {
478 llvm_unreachable("cannot happen");
482 bool isSSrcV2FP32() const {
483 llvm_unreachable("cannot happen");
487 bool isSCSrcV2FP32() const {
488 llvm_unreachable("cannot happen");
492 bool isSSrcV2INT32() const {
493 llvm_unreachable("cannot happen");
497 bool isSCSrcV2INT32() const {
498 llvm_unreachable("cannot happen");
502 bool isSSrcOrLdsB32() const {
503 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID
, MVT::i32
) ||
504 isLiteralImm(MVT::i32
) || isExpr();
507 bool isVCSrcB32() const {
508 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
511 bool isVCSrcB64() const {
512 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
515 bool isVCSrcTB16_Lo128() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID
, MVT::i16
);
519 bool isVCSrcB16() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
523 bool isVCSrcV2B16() const {
527 bool isVCSrcF32() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
531 bool isVCSrcF64() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
535 bool isVCSrcTF16_Lo128() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID
, MVT::f16
);
539 bool isVCSrcF16() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
543 bool isVCSrcV2F16() const {
547 bool isVSrcB32() const {
548 return isVCSrcF32() || isLiteralImm(MVT::i32
) || isExpr();
551 bool isVSrcB64() const {
552 return isVCSrcF64() || isLiteralImm(MVT::i64
);
555 bool isVSrcTB16_Lo128() const {
556 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16
);
559 bool isVSrcB16() const {
560 return isVCSrcB16() || isLiteralImm(MVT::i16
);
563 bool isVSrcV2B16() const {
564 return isVSrcB16() || isLiteralImm(MVT::v2i16
);
567 bool isVCSrcV2FP32() const {
571 bool isVSrcV2FP32() const {
572 return isVSrcF64() || isLiteralImm(MVT::v2f32
);
575 bool isVCSrcV2INT32() const {
579 bool isVSrcV2INT32() const {
580 return isVSrcB64() || isLiteralImm(MVT::v2i32
);
583 bool isVSrcF32() const {
584 return isVCSrcF32() || isLiteralImm(MVT::f32
) || isExpr();
587 bool isVSrcF64() const {
588 return isVCSrcF64() || isLiteralImm(MVT::f64
);
591 bool isVSrcTF16_Lo128() const {
592 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16
);
595 bool isVSrcF16() const {
596 return isVCSrcF16() || isLiteralImm(MVT::f16
);
599 bool isVSrcV2F16() const {
600 return isVSrcF16() || isLiteralImm(MVT::v2f16
);
603 bool isVISrcB32() const {
604 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i32
);
607 bool isVISrcB16() const {
608 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i16
);
611 bool isVISrcV2B16() const {
615 bool isVISrcF32() const {
616 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f32
);
619 bool isVISrcF16() const {
620 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f16
);
623 bool isVISrcV2F16() const {
624 return isVISrcF16() || isVISrcB32();
627 bool isVISrc_64B64() const {
628 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::i64
);
631 bool isVISrc_64F64() const {
632 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::f64
);
635 bool isVISrc_64V2FP32() const {
636 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::f32
);
639 bool isVISrc_64V2INT32() const {
640 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::i32
);
643 bool isVISrc_256B64() const {
644 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::i64
);
647 bool isVISrc_256F64() const {
648 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::f64
);
651 bool isVISrc_128B16() const {
652 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::i16
);
655 bool isVISrc_128V2B16() const {
656 return isVISrc_128B16();
659 bool isVISrc_128B32() const {
660 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::i32
);
663 bool isVISrc_128F32() const {
664 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::f32
);
667 bool isVISrc_256V2FP32() const {
668 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::f32
);
671 bool isVISrc_256V2INT32() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::i32
);
675 bool isVISrc_512B32() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::i32
);
679 bool isVISrc_512B16() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::i16
);
683 bool isVISrc_512V2B16() const {
684 return isVISrc_512B16();
687 bool isVISrc_512F32() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::f32
);
691 bool isVISrc_512F16() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::f16
);
695 bool isVISrc_512V2F16() const {
696 return isVISrc_512F16() || isVISrc_512B32();
699 bool isVISrc_1024B32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::i32
);
703 bool isVISrc_1024B16() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::i16
);
707 bool isVISrc_1024V2B16() const {
708 return isVISrc_1024B16();
711 bool isVISrc_1024F32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::f32
);
715 bool isVISrc_1024F16() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::f16
);
719 bool isVISrc_1024V2F16() const {
720 return isVISrc_1024F16() || isVISrc_1024B32();
723 bool isAISrcB32() const {
724 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i32
);
727 bool isAISrcB16() const {
728 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i16
);
731 bool isAISrcV2B16() const {
735 bool isAISrcF32() const {
736 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f32
);
739 bool isAISrcF16() const {
740 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f16
);
743 bool isAISrcV2F16() const {
744 return isAISrcF16() || isAISrcB32();
747 bool isAISrc_64B64() const {
748 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID
, MVT::i64
);
751 bool isAISrc_64F64() const {
752 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID
, MVT::f64
);
755 bool isAISrc_128B32() const {
756 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i32
);
759 bool isAISrc_128B16() const {
760 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i16
);
763 bool isAISrc_128V2B16() const {
764 return isAISrc_128B16();
767 bool isAISrc_128F32() const {
768 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f32
);
771 bool isAISrc_128F16() const {
772 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f16
);
775 bool isAISrc_128V2F16() const {
776 return isAISrc_128F16() || isAISrc_128B32();
779 bool isVISrc_128F16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::f16
);
783 bool isVISrc_128V2F16() const {
784 return isVISrc_128F16() || isVISrc_128B32();
787 bool isAISrc_256B64() const {
788 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID
, MVT::i64
);
791 bool isAISrc_256F64() const {
792 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID
, MVT::f64
);
795 bool isAISrc_512B32() const {
796 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i32
);
799 bool isAISrc_512B16() const {
800 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i16
);
803 bool isAISrc_512V2B16() const {
804 return isAISrc_512B16();
807 bool isAISrc_512F32() const {
808 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f32
);
811 bool isAISrc_512F16() const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f16
);
815 bool isAISrc_512V2F16() const {
816 return isAISrc_512F16() || isAISrc_512B32();
819 bool isAISrc_1024B32() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i32
);
823 bool isAISrc_1024B16() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i16
);
827 bool isAISrc_1024V2B16() const {
828 return isAISrc_1024B16();
831 bool isAISrc_1024F32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f32
);
835 bool isAISrc_1024F16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f16
);
839 bool isAISrc_1024V2F16() const {
840 return isAISrc_1024F16() || isAISrc_1024B32();
843 bool isKImmFP32() const {
844 return isLiteralImm(MVT::f32
);
847 bool isKImmFP16() const {
848 return isLiteralImm(MVT::f16
);
851 bool isMem() const override
{
855 bool isExpr() const {
856 return Kind
== Expression
;
859 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
861 bool isSWaitCnt() const;
862 bool isDepCtr() const;
863 bool isSDelayALU() const;
864 bool isHwreg() const;
865 bool isSendMsg() const;
866 bool isSwizzle() const;
867 bool isSMRDOffset8() const;
868 bool isSMEMOffset() const;
869 bool isSMRDLiteralOffset() const;
871 bool isDPPCtrl() const;
875 bool isGPRIdxMode() const;
876 bool isS16Imm() const;
877 bool isU16Imm() const;
878 bool isEndpgm() const;
879 bool isWaitVDST() const;
880 bool isWaitEXP() const;
882 StringRef
getToken() const {
884 return StringRef(Tok
.Data
, Tok
.Length
);
887 int64_t getImm() const {
892 void setImm(int64_t Val
) {
897 ImmTy
getImmTy() const {
902 unsigned getReg() const override
{
907 SMLoc
getStartLoc() const override
{
911 SMLoc
getEndLoc() const override
{
915 SMRange
getLocRange() const {
916 return SMRange(StartLoc
, EndLoc
);
919 Modifiers
getModifiers() const {
920 assert(isRegKind() || isImmTy(ImmTyNone
));
921 return isRegKind() ? Reg
.Mods
: Imm
.Mods
;
924 void setModifiers(Modifiers Mods
) {
925 assert(isRegKind() || isImmTy(ImmTyNone
));
932 bool hasModifiers() const {
933 return getModifiers().hasModifiers();
936 bool hasFPModifiers() const {
937 return getModifiers().hasFPModifiers();
940 bool hasIntModifiers() const {
941 return getModifiers().hasIntModifiers();
944 uint64_t applyInputFPModifiers(uint64_t Val
, unsigned Size
) const;
946 void addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
= true) const;
948 void addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const;
950 void addRegOperands(MCInst
&Inst
, unsigned N
) const;
952 void addRegOrImmOperands(MCInst
&Inst
, unsigned N
) const {
954 addRegOperands(Inst
, N
);
956 addImmOperands(Inst
, N
);
959 void addRegOrImmWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
960 Modifiers Mods
= getModifiers();
961 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
963 addRegOperands(Inst
, N
);
965 addImmOperands(Inst
, N
, false);
969 void addRegOrImmWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
970 assert(!hasIntModifiers());
971 addRegOrImmWithInputModsOperands(Inst
, N
);
974 void addRegOrImmWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
975 assert(!hasFPModifiers());
976 addRegOrImmWithInputModsOperands(Inst
, N
);
979 void addRegWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
980 Modifiers Mods
= getModifiers();
981 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
983 addRegOperands(Inst
, N
);
986 void addRegWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
987 assert(!hasIntModifiers());
988 addRegWithInputModsOperands(Inst
, N
);
991 void addRegWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
992 assert(!hasFPModifiers());
993 addRegWithInputModsOperands(Inst
, N
);
996 static void printImmTy(raw_ostream
& OS
, ImmTy Type
) {
998 case ImmTyNone
: OS
<< "None"; break;
999 case ImmTyGDS
: OS
<< "GDS"; break;
1000 case ImmTyLDS
: OS
<< "LDS"; break;
1001 case ImmTyOffen
: OS
<< "Offen"; break;
1002 case ImmTyIdxen
: OS
<< "Idxen"; break;
1003 case ImmTyAddr64
: OS
<< "Addr64"; break;
1004 case ImmTyOffset
: OS
<< "Offset"; break;
1005 case ImmTyInstOffset
: OS
<< "InstOffset"; break;
1006 case ImmTyOffset0
: OS
<< "Offset0"; break;
1007 case ImmTyOffset1
: OS
<< "Offset1"; break;
1008 case ImmTySMEMOffsetMod
: OS
<< "SMEMOffsetMod"; break;
1009 case ImmTyCPol
: OS
<< "CPol"; break;
1010 case ImmTyTFE
: OS
<< "TFE"; break;
1011 case ImmTyD16
: OS
<< "D16"; break;
1012 case ImmTyFORMAT
: OS
<< "FORMAT"; break;
1013 case ImmTyClampSI
: OS
<< "ClampSI"; break;
1014 case ImmTyOModSI
: OS
<< "OModSI"; break;
1015 case ImmTyDPP8
: OS
<< "DPP8"; break;
1016 case ImmTyDppCtrl
: OS
<< "DppCtrl"; break;
1017 case ImmTyDppRowMask
: OS
<< "DppRowMask"; break;
1018 case ImmTyDppBankMask
: OS
<< "DppBankMask"; break;
1019 case ImmTyDppBoundCtrl
: OS
<< "DppBoundCtrl"; break;
1020 case ImmTyDppFI
: OS
<< "DppFI"; break;
1021 case ImmTySDWADstSel
: OS
<< "SDWADstSel"; break;
1022 case ImmTySDWASrc0Sel
: OS
<< "SDWASrc0Sel"; break;
1023 case ImmTySDWASrc1Sel
: OS
<< "SDWASrc1Sel"; break;
1024 case ImmTySDWADstUnused
: OS
<< "SDWADstUnused"; break;
1025 case ImmTyDMask
: OS
<< "DMask"; break;
1026 case ImmTyDim
: OS
<< "Dim"; break;
1027 case ImmTyUNorm
: OS
<< "UNorm"; break;
1028 case ImmTyDA
: OS
<< "DA"; break;
1029 case ImmTyR128A16
: OS
<< "R128A16"; break;
1030 case ImmTyA16
: OS
<< "A16"; break;
1031 case ImmTyLWE
: OS
<< "LWE"; break;
1032 case ImmTyOff
: OS
<< "Off"; break;
1033 case ImmTyExpTgt
: OS
<< "ExpTgt"; break;
1034 case ImmTyExpCompr
: OS
<< "ExpCompr"; break;
1035 case ImmTyExpVM
: OS
<< "ExpVM"; break;
1036 case ImmTyHwreg
: OS
<< "Hwreg"; break;
1037 case ImmTySendMsg
: OS
<< "SendMsg"; break;
1038 case ImmTyInterpSlot
: OS
<< "InterpSlot"; break;
1039 case ImmTyInterpAttr
: OS
<< "InterpAttr"; break;
1040 case ImmTyInterpAttrChan
: OS
<< "InterpAttrChan"; break;
1041 case ImmTyOpSel
: OS
<< "OpSel"; break;
1042 case ImmTyOpSelHi
: OS
<< "OpSelHi"; break;
1043 case ImmTyNegLo
: OS
<< "NegLo"; break;
1044 case ImmTyNegHi
: OS
<< "NegHi"; break;
1045 case ImmTySwizzle
: OS
<< "Swizzle"; break;
1046 case ImmTyGprIdxMode
: OS
<< "GprIdxMode"; break;
1047 case ImmTyHigh
: OS
<< "High"; break;
1048 case ImmTyBLGP
: OS
<< "BLGP"; break;
1049 case ImmTyCBSZ
: OS
<< "CBSZ"; break;
1050 case ImmTyABID
: OS
<< "ABID"; break;
1051 case ImmTyEndpgm
: OS
<< "Endpgm"; break;
1052 case ImmTyWaitVDST
: OS
<< "WaitVDST"; break;
1053 case ImmTyWaitEXP
: OS
<< "WaitEXP"; break;
1057 void print(raw_ostream
&OS
) const override
{
1060 OS
<< "<register " << getReg() << " mods: " << Reg
.Mods
<< '>';
1063 OS
<< '<' << getImm();
1064 if (getImmTy() != ImmTyNone
) {
1065 OS
<< " type: "; printImmTy(OS
, getImmTy());
1067 OS
<< " mods: " << Imm
.Mods
<< '>';
1070 OS
<< '\'' << getToken() << '\'';
1073 OS
<< "<expr " << *Expr
<< '>';
1078 static AMDGPUOperand::Ptr
CreateImm(const AMDGPUAsmParser
*AsmParser
,
1079 int64_t Val
, SMLoc Loc
,
1080 ImmTy Type
= ImmTyNone
,
1081 bool IsFPImm
= false) {
1082 auto Op
= std::make_unique
<AMDGPUOperand
>(Immediate
, AsmParser
);
1084 Op
->Imm
.IsFPImm
= IsFPImm
;
1085 Op
->Imm
.Kind
= ImmKindTyNone
;
1086 Op
->Imm
.Type
= Type
;
1087 Op
->Imm
.Mods
= Modifiers();
1093 static AMDGPUOperand::Ptr
CreateToken(const AMDGPUAsmParser
*AsmParser
,
1094 StringRef Str
, SMLoc Loc
,
1095 bool HasExplicitEncodingSize
= true) {
1096 auto Res
= std::make_unique
<AMDGPUOperand
>(Token
, AsmParser
);
1097 Res
->Tok
.Data
= Str
.data();
1098 Res
->Tok
.Length
= Str
.size();
1099 Res
->StartLoc
= Loc
;
1104 static AMDGPUOperand::Ptr
CreateReg(const AMDGPUAsmParser
*AsmParser
,
1105 unsigned RegNo
, SMLoc S
,
1107 auto Op
= std::make_unique
<AMDGPUOperand
>(Register
, AsmParser
);
1108 Op
->Reg
.RegNo
= RegNo
;
1109 Op
->Reg
.Mods
= Modifiers();
1115 static AMDGPUOperand::Ptr
CreateExpr(const AMDGPUAsmParser
*AsmParser
,
1116 const class MCExpr
*Expr
, SMLoc S
) {
1117 auto Op
= std::make_unique
<AMDGPUOperand
>(Expression
, AsmParser
);
1125 raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
) {
1126 OS
<< "abs:" << Mods
.Abs
<< " neg: " << Mods
.Neg
<< " sext:" << Mods
.Sext
;
1130 //===----------------------------------------------------------------------===//
1132 //===----------------------------------------------------------------------===//
1134 // Holds info related to the current kernel, e.g. count of SGPRs used.
1135 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1136 // .amdgpu_hsa_kernel or at EOF.
1137 class KernelScopeInfo
{
1138 int SgprIndexUnusedMin
= -1;
1139 int VgprIndexUnusedMin
= -1;
1140 int AgprIndexUnusedMin
= -1;
1141 MCContext
*Ctx
= nullptr;
1142 MCSubtargetInfo
const *MSTI
= nullptr;
1144 void usesSgprAt(int i
) {
1145 if (i
>= SgprIndexUnusedMin
) {
1146 SgprIndexUnusedMin
= ++i
;
1148 MCSymbol
* const Sym
=
1149 Ctx
->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1150 Sym
->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin
, *Ctx
));
1155 void usesVgprAt(int i
) {
1156 if (i
>= VgprIndexUnusedMin
) {
1157 VgprIndexUnusedMin
= ++i
;
1159 MCSymbol
* const Sym
=
1160 Ctx
->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1161 int totalVGPR
= getTotalNumVGPRs(isGFX90A(*MSTI
), AgprIndexUnusedMin
,
1162 VgprIndexUnusedMin
);
1163 Sym
->setVariableValue(MCConstantExpr::create(totalVGPR
, *Ctx
));
1168 void usesAgprAt(int i
) {
1169 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1170 if (!hasMAIInsts(*MSTI
))
1173 if (i
>= AgprIndexUnusedMin
) {
1174 AgprIndexUnusedMin
= ++i
;
1176 MCSymbol
* const Sym
=
1177 Ctx
->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1178 Sym
->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin
, *Ctx
));
1180 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1181 MCSymbol
* const vSym
=
1182 Ctx
->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1183 int totalVGPR
= getTotalNumVGPRs(isGFX90A(*MSTI
), AgprIndexUnusedMin
,
1184 VgprIndexUnusedMin
);
1185 vSym
->setVariableValue(MCConstantExpr::create(totalVGPR
, *Ctx
));
1191 KernelScopeInfo() = default;
1193 void initialize(MCContext
&Context
) {
1195 MSTI
= Ctx
->getSubtargetInfo();
1197 usesSgprAt(SgprIndexUnusedMin
= -1);
1198 usesVgprAt(VgprIndexUnusedMin
= -1);
1199 if (hasMAIInsts(*MSTI
)) {
1200 usesAgprAt(AgprIndexUnusedMin
= -1);
1204 void usesRegister(RegisterKind RegKind
, unsigned DwordRegIndex
,
1205 unsigned RegWidth
) {
1208 usesSgprAt(DwordRegIndex
+ divideCeil(RegWidth
, 32) - 1);
1211 usesAgprAt(DwordRegIndex
+ divideCeil(RegWidth
, 32) - 1);
1214 usesVgprAt(DwordRegIndex
+ divideCeil(RegWidth
, 32) - 1);
1222 class AMDGPUAsmParser
: public MCTargetAsmParser
{
1223 MCAsmParser
&Parser
;
1225 unsigned ForcedEncodingSize
= 0;
1226 bool ForcedDPP
= false;
1227 bool ForcedSDWA
= false;
1228 KernelScopeInfo KernelScope
;
1230 /// @name Auto-generated Match Functions
1233 #define GET_ASSEMBLER_HEADER
1234 #include "AMDGPUGenAsmMatcher.inc"
1239 bool ParseAsAbsoluteExpression(uint32_t &Ret
);
1240 bool OutOfRangeError(SMRange Range
);
1241 /// Calculate VGPR/SGPR blocks required for given target, reserved
1242 /// registers, and user-specified NextFreeXGPR values.
1244 /// \param Features [in] Target features, used for bug corrections.
1245 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1246 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1247 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1248 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1249 /// descriptor field, if valid.
1250 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1251 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1252 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1253 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1254 /// \param VGPRBlocks [out] Result VGPR block count.
1255 /// \param SGPRBlocks [out] Result SGPR block count.
1256 bool calculateGPRBlocks(const FeatureBitset
&Features
, bool VCCUsed
,
1257 bool FlatScrUsed
, bool XNACKUsed
,
1258 std::optional
<bool> EnableWavefrontSize32
,
1259 unsigned NextFreeVGPR
, SMRange VGPRRange
,
1260 unsigned NextFreeSGPR
, SMRange SGPRRange
,
1261 unsigned &VGPRBlocks
, unsigned &SGPRBlocks
);
1262 bool ParseDirectiveAMDGCNTarget();
1263 bool ParseDirectiveAMDHSAKernel();
1264 bool ParseDirectiveMajorMinor(uint32_t &Major
, uint32_t &Minor
);
1265 bool ParseDirectiveHSACodeObjectVersion();
1266 bool ParseDirectiveHSACodeObjectISA();
1267 bool ParseAMDKernelCodeTValue(StringRef ID
, amd_kernel_code_t
&Header
);
1268 bool ParseDirectiveAMDKernelCodeT();
1269 // TODO: Possibly make subtargetHasRegister const.
1270 bool subtargetHasRegister(const MCRegisterInfo
&MRI
, unsigned RegNo
);
1271 bool ParseDirectiveAMDGPUHsaKernel();
1273 bool ParseDirectiveISAVersion();
1274 bool ParseDirectiveHSAMetadata();
1275 bool ParseDirectivePALMetadataBegin();
1276 bool ParseDirectivePALMetadata();
1277 bool ParseDirectiveAMDGPULDS();
1279 /// Common code to parse out a block of text (typically YAML) between start and
1281 bool ParseToEndDirective(const char *AssemblerDirectiveBegin
,
1282 const char *AssemblerDirectiveEnd
,
1283 std::string
&CollectString
);
1285 bool AddNextRegisterToList(unsigned& Reg
, unsigned& RegWidth
,
1286 RegisterKind RegKind
, unsigned Reg1
, SMLoc Loc
);
1287 bool ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
1288 unsigned &RegNum
, unsigned &RegWidth
,
1289 bool RestoreOnFailure
= false);
1290 bool ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
1291 unsigned &RegNum
, unsigned &RegWidth
,
1292 SmallVectorImpl
<AsmToken
> &Tokens
);
1293 unsigned ParseRegularReg(RegisterKind
&RegKind
, unsigned &RegNum
,
1295 SmallVectorImpl
<AsmToken
> &Tokens
);
1296 unsigned ParseSpecialReg(RegisterKind
&RegKind
, unsigned &RegNum
,
1298 SmallVectorImpl
<AsmToken
> &Tokens
);
1299 unsigned ParseRegList(RegisterKind
&RegKind
, unsigned &RegNum
,
1300 unsigned &RegWidth
, SmallVectorImpl
<AsmToken
> &Tokens
);
1301 bool ParseRegRange(unsigned& Num
, unsigned& Width
);
1302 unsigned getRegularReg(RegisterKind RegKind
,
1308 bool isRegister(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1309 std::optional
<StringRef
> getGprCountSymbolName(RegisterKind RegKind
);
1310 void initializeGprCountSymbol(RegisterKind RegKind
);
1311 bool updateGprCountSymbols(RegisterKind RegKind
, unsigned DwordRegIndex
,
1313 void cvtMubufImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1315 void cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1316 bool IsGdsHardcoded
);
1319 enum AMDGPUMatchResultTy
{
1320 Match_PreferE32
= FIRST_TARGET_MATCH_RESULT_TY
1323 OperandMode_Default
,
1327 using OptionalImmIndexMap
= std::map
<AMDGPUOperand::ImmTy
, unsigned>;
1329 AMDGPUAsmParser(const MCSubtargetInfo
&STI
, MCAsmParser
&_Parser
,
1330 const MCInstrInfo
&MII
,
1331 const MCTargetOptions
&Options
)
1332 : MCTargetAsmParser(Options
, STI
, MII
), Parser(_Parser
) {
1333 MCAsmParserExtension::Initialize(Parser
);
1335 if (getFeatureBits().none()) {
1336 // Set default features.
1337 copySTI().ToggleFeature("southern-islands");
1340 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1343 // TODO: make those pre-defined variables read-only.
1344 // Currently there is none suitable machinery in the core llvm-mc for this.
1345 // MCSymbol::isRedefinable is intended for another purpose, and
1346 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1347 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
1348 MCContext
&Ctx
= getContext();
1349 if (ISA
.Major
>= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1351 Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1352 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1353 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1354 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1355 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1356 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1359 Ctx
.getOrCreateSymbol(Twine(".option.machine_version_major"));
1360 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1361 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1362 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1363 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1364 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1366 if (ISA
.Major
>= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1367 initializeGprCountSymbol(IS_VGPR
);
1368 initializeGprCountSymbol(IS_SGPR
);
1370 KernelScope
.initialize(getContext());
1374 bool hasMIMG_R128() const {
1375 return AMDGPU::hasMIMG_R128(getSTI());
1378 bool hasPackedD16() const {
1379 return AMDGPU::hasPackedD16(getSTI());
1382 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1384 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1387 return AMDGPU::isSI(getSTI());
1391 return AMDGPU::isCI(getSTI());
1395 return AMDGPU::isVI(getSTI());
1398 bool isGFX9() const {
1399 return AMDGPU::isGFX9(getSTI());
1402 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1403 bool isGFX90A() const {
1404 return AMDGPU::isGFX90A(getSTI());
1407 bool isGFX940() const {
1408 return AMDGPU::isGFX940(getSTI());
1411 bool isGFX9Plus() const {
1412 return AMDGPU::isGFX9Plus(getSTI());
1415 bool isGFX10() const {
1416 return AMDGPU::isGFX10(getSTI());
1419 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1421 bool isGFX11() const {
1422 return AMDGPU::isGFX11(getSTI());
1425 bool isGFX11Plus() const {
1426 return AMDGPU::isGFX11Plus(getSTI());
1429 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1431 bool isGFX10_BEncoding() const {
1432 return AMDGPU::isGFX10_BEncoding(getSTI());
1435 bool hasInv2PiInlineImm() const {
1436 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
];
1439 bool hasFlatOffsets() const {
1440 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets
];
1443 bool hasArchitectedFlatScratch() const {
1444 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch
];
1447 bool hasSGPR102_SGPR103() const {
1448 return !isVI() && !isGFX9();
1451 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1453 bool hasIntClamp() const {
1454 return getFeatureBits()[AMDGPU::FeatureIntClamp
];
1457 bool hasPartialNSAEncoding() const {
1458 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding
];
1461 unsigned getNSAMaxSize() const {
1462 return AMDGPU::getNSAMaxSize(getSTI());
1465 AMDGPUTargetStreamer
&getTargetStreamer() {
1466 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
1467 return static_cast<AMDGPUTargetStreamer
&>(TS
);
1470 const MCRegisterInfo
*getMRI() const {
1471 // We need this const_cast because for some reason getContext() is not const
1473 return const_cast<AMDGPUAsmParser
*>(this)->getContext().getRegisterInfo();
1476 const MCInstrInfo
*getMII() const {
1480 const FeatureBitset
&getFeatureBits() const {
1481 return getSTI().getFeatureBits();
1484 void setForcedEncodingSize(unsigned Size
) { ForcedEncodingSize
= Size
; }
1485 void setForcedDPP(bool ForceDPP_
) { ForcedDPP
= ForceDPP_
; }
1486 void setForcedSDWA(bool ForceSDWA_
) { ForcedSDWA
= ForceSDWA_
; }
1488 unsigned getForcedEncodingSize() const { return ForcedEncodingSize
; }
1489 bool isForcedVOP3() const { return ForcedEncodingSize
== 64; }
1490 bool isForcedDPP() const { return ForcedDPP
; }
1491 bool isForcedSDWA() const { return ForcedSDWA
; }
1492 ArrayRef
<unsigned> getMatchedVariants() const;
1493 StringRef
getMatchedVariantName() const;
1495 std::unique_ptr
<AMDGPUOperand
> parseRegister(bool RestoreOnFailure
= false);
1496 bool ParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
,
1497 bool RestoreOnFailure
);
1498 bool parseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
,
1499 SMLoc
&EndLoc
) override
;
1500 OperandMatchResultTy
tryParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
,
1501 SMLoc
&EndLoc
) override
;
1502 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1503 unsigned validateTargetOperandClass(MCParsedAsmOperand
&Op
,
1504 unsigned Kind
) override
;
1505 bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1506 OperandVector
&Operands
, MCStreamer
&Out
,
1507 uint64_t &ErrorInfo
,
1508 bool MatchingInlineAsm
) override
;
1509 bool ParseDirective(AsmToken DirectiveID
) override
;
1510 ParseStatus
parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
1511 OperandMode Mode
= OperandMode_Default
);
1512 StringRef
parseMnemonicSuffix(StringRef Name
);
1513 bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1514 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1515 //bool ProcessInstruction(MCInst &Inst);
1517 ParseStatus
parseTokenOp(StringRef Name
, OperandVector
&Operands
);
1519 ParseStatus
parseIntWithPrefix(const char *Prefix
, int64_t &Int
);
1522 parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
1523 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1524 std::function
<bool(int64_t &)> ConvertResult
= nullptr);
1526 ParseStatus
parseOperandArrayWithPrefix(
1527 const char *Prefix
, OperandVector
&Operands
,
1528 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1529 bool (*ConvertResult
)(int64_t &) = nullptr);
1532 parseNamedBit(StringRef Name
, OperandVector
&Operands
,
1533 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
);
1534 unsigned getCPolKind(StringRef Id
, StringRef Mnemo
, bool &Disabling
) const;
1535 ParseStatus
parseCPol(OperandVector
&Operands
);
1536 ParseStatus
parseStringWithPrefix(StringRef Prefix
, StringRef
&Value
,
1540 bool isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1541 bool isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1542 bool isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1543 bool isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1544 bool parseSP3NegModifier();
1545 ParseStatus
parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
= false);
1546 ParseStatus
parseReg(OperandVector
&Operands
);
1547 ParseStatus
parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
= false);
1548 ParseStatus
parseRegOrImmWithFPInputMods(OperandVector
&Operands
,
1549 bool AllowImm
= true);
1550 ParseStatus
parseRegOrImmWithIntInputMods(OperandVector
&Operands
,
1551 bool AllowImm
= true);
1552 ParseStatus
parseRegWithFPInputMods(OperandVector
&Operands
);
1553 ParseStatus
parseRegWithIntInputMods(OperandVector
&Operands
);
1554 ParseStatus
parseVReg32OrOff(OperandVector
&Operands
);
1555 ParseStatus
parseDfmtNfmt(int64_t &Format
);
1556 ParseStatus
parseUfmt(int64_t &Format
);
1557 ParseStatus
parseSymbolicSplitFormat(StringRef FormatStr
, SMLoc Loc
,
1559 ParseStatus
parseSymbolicUnifiedFormat(StringRef FormatStr
, SMLoc Loc
,
1561 ParseStatus
parseFORMAT(OperandVector
&Operands
);
1562 ParseStatus
parseSymbolicOrNumericFormat(int64_t &Format
);
1563 ParseStatus
parseNumericFormat(int64_t &Format
);
1564 ParseStatus
parseFlatOffset(OperandVector
&Operands
);
1565 ParseStatus
parseR128A16(OperandVector
&Operands
);
1566 ParseStatus
parseBLGP(OperandVector
&Operands
);
1567 bool tryParseFmt(const char *Pref
, int64_t MaxVal
, int64_t &Val
);
1568 bool matchDfmtNfmt(int64_t &Dfmt
, int64_t &Nfmt
, StringRef FormatStr
, SMLoc Loc
);
1570 void cvtDSOffset01(MCInst
&Inst
, const OperandVector
&Operands
);
1571 void cvtDS(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, false); }
1572 void cvtDSGds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, true); }
1573 void cvtExp(MCInst
&Inst
, const OperandVector
&Operands
);
1575 bool parseCnt(int64_t &IntVal
);
1576 ParseStatus
parseSWaitCnt(OperandVector
&Operands
);
1578 bool parseDepCtr(int64_t &IntVal
, unsigned &Mask
);
1579 void depCtrError(SMLoc Loc
, int ErrorId
, StringRef DepCtrName
);
1580 ParseStatus
parseDepCtr(OperandVector
&Operands
);
1582 bool parseDelay(int64_t &Delay
);
1583 ParseStatus
parseSDelayALU(OperandVector
&Operands
);
1585 ParseStatus
parseHwreg(OperandVector
&Operands
);
1588 struct OperandInfoTy
{
1591 bool IsSymbolic
= false;
1592 bool IsDefined
= false;
1594 OperandInfoTy(int64_t Id_
) : Id(Id_
) {}
1597 bool parseSendMsgBody(OperandInfoTy
&Msg
, OperandInfoTy
&Op
, OperandInfoTy
&Stream
);
1598 bool validateSendMsg(const OperandInfoTy
&Msg
,
1599 const OperandInfoTy
&Op
,
1600 const OperandInfoTy
&Stream
);
1602 bool parseHwregBody(OperandInfoTy
&HwReg
,
1603 OperandInfoTy
&Offset
,
1604 OperandInfoTy
&Width
);
1605 bool validateHwreg(const OperandInfoTy
&HwReg
,
1606 const OperandInfoTy
&Offset
,
1607 const OperandInfoTy
&Width
);
1609 SMLoc
getFlatOffsetLoc(const OperandVector
&Operands
) const;
1610 SMLoc
getSMEMOffsetLoc(const OperandVector
&Operands
) const;
1611 SMLoc
getBLGPLoc(const OperandVector
&Operands
) const;
1613 SMLoc
getOperandLoc(std::function
<bool(const AMDGPUOperand
&)> Test
,
1614 const OperandVector
&Operands
) const;
1615 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type
, const OperandVector
&Operands
) const;
1616 SMLoc
getRegLoc(unsigned Reg
, const OperandVector
&Operands
) const;
1617 SMLoc
getLitLoc(const OperandVector
&Operands
,
1618 bool SearchMandatoryLiterals
= false) const;
1619 SMLoc
getMandatoryLitLoc(const OperandVector
&Operands
) const;
1620 SMLoc
getConstLoc(const OperandVector
&Operands
) const;
1621 SMLoc
getInstLoc(const OperandVector
&Operands
) const;
1623 bool validateInstruction(const MCInst
&Inst
, const SMLoc
&IDLoc
, const OperandVector
&Operands
);
1624 bool validateFlatOffset(const MCInst
&Inst
, const OperandVector
&Operands
);
1625 bool validateSMEMOffset(const MCInst
&Inst
, const OperandVector
&Operands
);
1626 bool validateSOPLiteral(const MCInst
&Inst
) const;
1627 bool validateConstantBusLimitations(const MCInst
&Inst
, const OperandVector
&Operands
);
1628 bool validateVOPDRegBankConstraints(const MCInst
&Inst
,
1629 const OperandVector
&Operands
);
1630 bool validateIntClampSupported(const MCInst
&Inst
);
1631 bool validateMIMGAtomicDMask(const MCInst
&Inst
);
1632 bool validateMIMGGatherDMask(const MCInst
&Inst
);
1633 bool validateMovrels(const MCInst
&Inst
, const OperandVector
&Operands
);
1634 bool validateMIMGDataSize(const MCInst
&Inst
, const SMLoc
&IDLoc
);
1635 bool validateMIMGAddrSize(const MCInst
&Inst
, const SMLoc
&IDLoc
);
1636 bool validateMIMGD16(const MCInst
&Inst
);
1637 bool validateMIMGMSAA(const MCInst
&Inst
);
1638 bool validateOpSel(const MCInst
&Inst
);
1639 bool validateDPP(const MCInst
&Inst
, const OperandVector
&Operands
);
1640 bool validateVccOperand(unsigned Reg
) const;
1641 bool validateVOPLiteral(const MCInst
&Inst
, const OperandVector
&Operands
);
1642 bool validateMAIAccWrite(const MCInst
&Inst
, const OperandVector
&Operands
);
1643 bool validateMAISrc2(const MCInst
&Inst
, const OperandVector
&Operands
);
1644 bool validateMFMA(const MCInst
&Inst
, const OperandVector
&Operands
);
1645 bool validateAGPRLdSt(const MCInst
&Inst
) const;
1646 bool validateVGPRAlign(const MCInst
&Inst
) const;
1647 bool validateBLGP(const MCInst
&Inst
, const OperandVector
&Operands
);
1648 bool validateGWS(const MCInst
&Inst
, const OperandVector
&Operands
);
1649 bool validateDivScale(const MCInst
&Inst
);
1650 bool validateWaitCnt(const MCInst
&Inst
, const OperandVector
&Operands
);
1651 bool validateCoherencyBits(const MCInst
&Inst
, const OperandVector
&Operands
,
1652 const SMLoc
&IDLoc
);
1653 bool validateExeczVcczOperands(const OperandVector
&Operands
);
1654 bool validateTFE(const MCInst
&Inst
, const OperandVector
&Operands
);
1655 std::optional
<StringRef
> validateLdsDirect(const MCInst
&Inst
);
1656 unsigned getConstantBusLimit(unsigned Opcode
) const;
1657 bool usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
);
1658 bool isInlineConstant(const MCInst
&Inst
, unsigned OpIdx
) const;
1659 unsigned findImplicitSGPRReadInVOP(const MCInst
&Inst
) const;
1661 bool isSupportedMnemo(StringRef Mnemo
,
1662 const FeatureBitset
&FBS
);
1663 bool isSupportedMnemo(StringRef Mnemo
,
1664 const FeatureBitset
&FBS
,
1665 ArrayRef
<unsigned> Variants
);
1666 bool checkUnsupportedInstruction(StringRef Name
, const SMLoc
&IDLoc
);
1668 bool isId(const StringRef Id
) const;
1669 bool isId(const AsmToken
&Token
, const StringRef Id
) const;
1670 bool isToken(const AsmToken::TokenKind Kind
) const;
1671 StringRef
getId() const;
1672 bool trySkipId(const StringRef Id
);
1673 bool trySkipId(const StringRef Pref
, const StringRef Id
);
1674 bool trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
);
1675 bool trySkipToken(const AsmToken::TokenKind Kind
);
1676 bool skipToken(const AsmToken::TokenKind Kind
, const StringRef ErrMsg
);
1677 bool parseString(StringRef
&Val
, const StringRef ErrMsg
= "expected a string");
1678 bool parseId(StringRef
&Val
, const StringRef ErrMsg
= "");
1680 void peekTokens(MutableArrayRef
<AsmToken
> Tokens
);
1681 AsmToken::TokenKind
getTokenKind() const;
1682 bool parseExpr(int64_t &Imm
, StringRef Expected
= "");
1683 bool parseExpr(OperandVector
&Operands
);
1684 StringRef
getTokenStr() const;
1685 AsmToken
peekToken(bool ShouldSkipSpace
= true);
1686 AsmToken
getToken() const;
1687 SMLoc
getLoc() const;
1691 void onBeginOfFile() override
;
1693 ParseStatus
parseCustomOperand(OperandVector
&Operands
, unsigned MCK
);
1695 ParseStatus
parseExpTgt(OperandVector
&Operands
);
1696 ParseStatus
parseSendMsg(OperandVector
&Operands
);
1697 ParseStatus
parseInterpSlot(OperandVector
&Operands
);
1698 ParseStatus
parseInterpAttr(OperandVector
&Operands
);
1699 ParseStatus
parseSOPPBrTarget(OperandVector
&Operands
);
1700 ParseStatus
parseBoolReg(OperandVector
&Operands
);
1702 bool parseSwizzleOperand(int64_t &Op
,
1703 const unsigned MinVal
,
1704 const unsigned MaxVal
,
1705 const StringRef ErrMsg
,
1707 bool parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
1708 const unsigned MinVal
,
1709 const unsigned MaxVal
,
1710 const StringRef ErrMsg
);
1711 ParseStatus
parseSwizzle(OperandVector
&Operands
);
1712 bool parseSwizzleOffset(int64_t &Imm
);
1713 bool parseSwizzleMacro(int64_t &Imm
);
1714 bool parseSwizzleQuadPerm(int64_t &Imm
);
1715 bool parseSwizzleBitmaskPerm(int64_t &Imm
);
1716 bool parseSwizzleBroadcast(int64_t &Imm
);
1717 bool parseSwizzleSwap(int64_t &Imm
);
1718 bool parseSwizzleReverse(int64_t &Imm
);
1720 ParseStatus
parseGPRIdxMode(OperandVector
&Operands
);
1721 int64_t parseGPRIdxMacro();
1723 void cvtMubuf(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false); }
1724 void cvtMubufAtomic(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, true); }
1726 ParseStatus
parseOModSI(OperandVector
&Operands
);
1728 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
1729 OptionalImmIndexMap
&OptionalIdx
);
1730 void cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
);
1731 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
);
1732 void cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
);
1733 void cvtVOPD(MCInst
&Inst
, const OperandVector
&Operands
);
1734 void cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
,
1735 OptionalImmIndexMap
&OptionalIdx
);
1736 void cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
,
1737 OptionalImmIndexMap
&OptionalIdx
);
1739 void cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
);
1740 void cvtVINTERP(MCInst
&Inst
, const OperandVector
&Operands
);
1741 void cvtSMEMAtomic(MCInst
&Inst
, const OperandVector
&Operands
);
1743 bool parseDimId(unsigned &Encoding
);
1744 ParseStatus
parseDim(OperandVector
&Operands
);
1745 bool convertDppBoundCtrl(int64_t &BoundCtrl
);
1746 ParseStatus
parseDPP8(OperandVector
&Operands
);
1747 ParseStatus
parseDPPCtrl(OperandVector
&Operands
);
1748 bool isSupportedDPPCtrl(StringRef Ctrl
, const OperandVector
&Operands
);
1749 int64_t parseDPPCtrlSel(StringRef Ctrl
);
1750 int64_t parseDPPCtrlPerm();
1751 void cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
= false);
1752 void cvtDPP8(MCInst
&Inst
, const OperandVector
&Operands
) {
1753 cvtDPP(Inst
, Operands
, true);
1755 void cvtVOP3DPP(MCInst
&Inst
, const OperandVector
&Operands
,
1756 bool IsDPP8
= false);
1757 void cvtVOP3DPP8(MCInst
&Inst
, const OperandVector
&Operands
) {
1758 cvtVOP3DPP(Inst
, Operands
, true);
1761 ParseStatus
parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
1762 AMDGPUOperand::ImmTy Type
);
1763 ParseStatus
parseSDWADstUnused(OperandVector
&Operands
);
1764 void cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
);
1765 void cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
);
1766 void cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
);
1767 void cvtSdwaVOP2e(MCInst
&Inst
, const OperandVector
&Operands
);
1768 void cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
);
1769 void cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
1770 uint64_t BasicInstType
,
1771 bool SkipDstVcc
= false,
1772 bool SkipSrcVcc
= false);
1774 ParseStatus
parseEndpgm(OperandVector
&Operands
);
1776 ParseStatus
parseVOPD(OperandVector
&Operands
);
1779 } // end anonymous namespace
1781 // May be called with integer type with equivalent bitwidth.
1782 static const fltSemantics
*getFltSemantics(unsigned Size
) {
1785 return &APFloat::IEEEsingle();
1787 return &APFloat::IEEEdouble();
1789 return &APFloat::IEEEhalf();
1791 llvm_unreachable("unsupported fp type");
1795 static const fltSemantics
*getFltSemantics(MVT VT
) {
1796 return getFltSemantics(VT
.getSizeInBits() / 8);
1799 static const fltSemantics
*getOpFltSemantics(uint8_t OperandType
) {
1800 switch (OperandType
) {
1801 case AMDGPU::OPERAND_REG_IMM_INT32
:
1802 case AMDGPU::OPERAND_REG_IMM_FP32
:
1803 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED
:
1804 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1805 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1806 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1807 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1808 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
1809 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
1810 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
1811 case AMDGPU::OPERAND_REG_IMM_V2INT32
:
1812 case AMDGPU::OPERAND_KIMM32
:
1813 return &APFloat::IEEEsingle();
1814 case AMDGPU::OPERAND_REG_IMM_INT64
:
1815 case AMDGPU::OPERAND_REG_IMM_FP64
:
1816 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1817 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1818 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
1819 return &APFloat::IEEEdouble();
1820 case AMDGPU::OPERAND_REG_IMM_INT16
:
1821 case AMDGPU::OPERAND_REG_IMM_FP16
:
1822 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED
:
1823 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1824 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1825 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1826 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1827 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1828 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1829 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1830 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
1831 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1832 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
1833 case AMDGPU::OPERAND_KIMM16
:
1834 return &APFloat::IEEEhalf();
1836 llvm_unreachable("unsupported fp type");
1840 //===----------------------------------------------------------------------===//
1842 //===----------------------------------------------------------------------===//
1844 static bool canLosslesslyConvertToFPType(APFloat
&FPLiteral
, MVT VT
) {
1847 // Convert literal to single precision
1848 APFloat::opStatus Status
= FPLiteral
.convert(*getFltSemantics(VT
),
1849 APFloat::rmNearestTiesToEven
,
1851 // We allow precision lost but not overflow or underflow
1852 if (Status
!= APFloat::opOK
&&
1854 ((Status
& APFloat::opOverflow
) != 0 ||
1855 (Status
& APFloat::opUnderflow
) != 0)) {
1862 static bool isSafeTruncation(int64_t Val
, unsigned Size
) {
1863 return isUIntN(Size
, Val
) || isIntN(Size
, Val
);
1866 static bool isInlineableLiteralOp16(int64_t Val
, MVT VT
, bool HasInv2Pi
) {
1867 if (VT
.getScalarType() == MVT::i16
) {
1868 // FP immediate values are broken.
1869 return isInlinableIntLiteral(Val
);
1872 // f16/v2f16 operands work correctly for all values.
1873 return AMDGPU::isInlinableLiteral16(Val
, HasInv2Pi
);
1876 bool AMDGPUOperand::isInlinableImm(MVT type
) const {
1878 // This is a hack to enable named inline values like
1879 // shared_base with both 32-bit and 64-bit operands.
1880 // Note that these values are defined as
1881 // 32-bit operands only.
1882 if (isInlineValue()) {
1886 if (!isImmTy(ImmTyNone
)) {
1887 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1890 // TODO: We should avoid using host float here. It would be better to
1891 // check the float bit values which is what a few other places do.
1892 // We've had bot failures before due to weird NaN support on mips hosts.
1894 APInt
Literal(64, Imm
.Val
);
1896 if (Imm
.IsFPImm
) { // We got fp literal token
1897 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1898 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1899 AsmParser
->hasInv2PiInlineImm());
1902 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1903 if (!canLosslesslyConvertToFPType(FPLiteral
, type
))
1906 if (type
.getScalarSizeInBits() == 16) {
1907 return isInlineableLiteralOp16(
1908 static_cast<int16_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1909 type
, AsmParser
->hasInv2PiInlineImm());
1912 // Check if single precision literal is inlinable
1913 return AMDGPU::isInlinableLiteral32(
1914 static_cast<int32_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1915 AsmParser
->hasInv2PiInlineImm());
1918 // We got int literal token.
1919 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1920 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1921 AsmParser
->hasInv2PiInlineImm());
1924 if (!isSafeTruncation(Imm
.Val
, type
.getScalarSizeInBits())) {
1928 if (type
.getScalarSizeInBits() == 16) {
1929 return isInlineableLiteralOp16(
1930 static_cast<int16_t>(Literal
.getLoBits(16).getSExtValue()),
1931 type
, AsmParser
->hasInv2PiInlineImm());
1934 return AMDGPU::isInlinableLiteral32(
1935 static_cast<int32_t>(Literal
.getLoBits(32).getZExtValue()),
1936 AsmParser
->hasInv2PiInlineImm());
1939 bool AMDGPUOperand::isLiteralImm(MVT type
) const {
1940 // Check that this immediate can be added as literal
1941 if (!isImmTy(ImmTyNone
)) {
1946 // We got int literal token.
1948 if (type
== MVT::f64
&& hasFPModifiers()) {
1949 // Cannot apply fp modifiers to int literals preserving the same semantics
1950 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1951 // disable these cases.
1955 unsigned Size
= type
.getSizeInBits();
1959 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1961 return isSafeTruncation(Imm
.Val
, Size
);
1964 // We got fp literal token
1965 if (type
== MVT::f64
) { // Expected 64-bit fp operand
1966 // We would set low 64-bits of literal to zeroes but we accept this literals
1970 if (type
== MVT::i64
) { // Expected 64-bit int operand
1971 // We don't allow fp literals in 64-bit integer instructions. It is
1972 // unclear how we should encode them.
1976 // We allow fp literals with f16x2 operands assuming that the specified
1977 // literal goes into the lower half and the upper half is zero. We also
1978 // require that the literal may be losslessly converted to f16.
1979 MVT ExpectedType
= (type
== MVT::v2f16
)? MVT::f16
:
1980 (type
== MVT::v2i16
)? MVT::i16
:
1981 (type
== MVT::v2f32
)? MVT::f32
: type
;
1983 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1984 return canLosslesslyConvertToFPType(FPLiteral
, ExpectedType
);
1987 bool AMDGPUOperand::isRegClass(unsigned RCID
) const {
1988 return isRegKind() && AsmParser
->getMRI()->getRegClass(RCID
).contains(getReg());
1991 bool AMDGPUOperand::isVRegWithInputMods() const {
1992 return isRegClass(AMDGPU::VGPR_32RegClassID
) ||
1993 // GFX90A allows DPP on 64-bit operands.
1994 (isRegClass(AMDGPU::VReg_64RegClassID
) &&
1995 AsmParser
->getFeatureBits()[AMDGPU::Feature64BitDPP
]);
1998 bool AMDGPUOperand::isT16VRegWithInputMods() const {
1999 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID
);
2002 bool AMDGPUOperand::isSDWAOperand(MVT type
) const {
2003 if (AsmParser
->isVI())
2005 else if (AsmParser
->isGFX9Plus())
2006 return isRegClass(AMDGPU::VS_32RegClassID
) || isInlinableImm(type
);
2011 bool AMDGPUOperand::isSDWAFP16Operand() const {
2012 return isSDWAOperand(MVT::f16
);
2015 bool AMDGPUOperand::isSDWAFP32Operand() const {
2016 return isSDWAOperand(MVT::f32
);
2019 bool AMDGPUOperand::isSDWAInt16Operand() const {
2020 return isSDWAOperand(MVT::i16
);
2023 bool AMDGPUOperand::isSDWAInt32Operand() const {
2024 return isSDWAOperand(MVT::i32
);
2027 bool AMDGPUOperand::isBoolReg() const {
2028 auto FB
= AsmParser
->getFeatureBits();
2029 return isReg() && ((FB
[AMDGPU::FeatureWavefrontSize64
] && isSCSrcB64()) ||
2030 (FB
[AMDGPU::FeatureWavefrontSize32
] && isSCSrcB32()));
2033 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val
, unsigned Size
) const
2035 assert(isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
2036 assert(Size
== 2 || Size
== 4 || Size
== 8);
2038 const uint64_t FpSignMask
= (1ULL << (Size
* 8 - 1));
2050 void AMDGPUOperand::addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
) const {
2052 Inst
.addOperand(MCOperand::createExpr(Expr
));
2056 if (AMDGPU::isSISrcOperand(AsmParser
->getMII()->get(Inst
.getOpcode()),
2057 Inst
.getNumOperands())) {
2058 addLiteralImmOperand(Inst
, Imm
.Val
,
2060 isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
2062 assert(!isImmTy(ImmTyNone
) || !hasModifiers());
2063 Inst
.addOperand(MCOperand::createImm(Imm
.Val
));
2068 void AMDGPUOperand::addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const {
2069 const auto& InstDesc
= AsmParser
->getMII()->get(Inst
.getOpcode());
2070 auto OpNum
= Inst
.getNumOperands();
2071 // Check that this operand accepts literals
2072 assert(AMDGPU::isSISrcOperand(InstDesc
, OpNum
));
2074 if (ApplyModifiers
) {
2075 assert(AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
));
2076 const unsigned Size
= Imm
.IsFPImm
? sizeof(double) : getOperandSize(InstDesc
, OpNum
);
2077 Val
= applyInputFPModifiers(Val
, Size
);
2080 APInt
Literal(64, Val
);
2081 uint8_t OpTy
= InstDesc
.operands()[OpNum
].OperandType
;
2083 if (Imm
.IsFPImm
) { // We got fp literal token
2085 case AMDGPU::OPERAND_REG_IMM_INT64
:
2086 case AMDGPU::OPERAND_REG_IMM_FP64
:
2087 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
2088 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
2089 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
2090 if (AMDGPU::isInlinableLiteral64(Literal
.getZExtValue(),
2091 AsmParser
->hasInv2PiInlineImm())) {
2092 Inst
.addOperand(MCOperand::createImm(Literal
.getZExtValue()));
2098 if (AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
)) { // Expected 64-bit fp operand
2099 // For fp operands we check if low 32 bits are zeros
2100 if (Literal
.getLoBits(32) != 0) {
2101 const_cast<AMDGPUAsmParser
*>(AsmParser
)->Warning(Inst
.getLoc(),
2102 "Can't encode literal as exact 64-bit floating-point operand. "
2103 "Low 32-bits will be set to zero");
2106 Inst
.addOperand(MCOperand::createImm(Literal
.lshr(32).getZExtValue()));
2107 setImmKindLiteral();
2111 // We don't allow fp literals in 64-bit integer instructions. It is
2112 // unclear how we should encode them. This case should be checked earlier
2113 // in predicate methods (isLiteralImm())
2114 llvm_unreachable("fp literal in 64-bit integer instruction.");
2116 case AMDGPU::OPERAND_REG_IMM_INT32
:
2117 case AMDGPU::OPERAND_REG_IMM_FP32
:
2118 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED
:
2119 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
2120 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
2121 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
2122 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
2123 case AMDGPU::OPERAND_REG_IMM_INT16
:
2124 case AMDGPU::OPERAND_REG_IMM_FP16
:
2125 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED
:
2126 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
2127 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
2128 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
2129 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
2130 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
2131 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
2132 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
2133 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
2134 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
2135 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
2136 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
2137 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
2138 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
2139 case AMDGPU::OPERAND_REG_IMM_V2INT32
:
2140 case AMDGPU::OPERAND_KIMM32
:
2141 case AMDGPU::OPERAND_KIMM16
: {
2143 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
2144 // Convert literal to single precision
2145 FPLiteral
.convert(*getOpFltSemantics(OpTy
),
2146 APFloat::rmNearestTiesToEven
, &lost
);
2147 // We allow precision lost but not overflow or underflow. This should be
2148 // checked earlier in isLiteralImm()
2150 uint64_t ImmVal
= FPLiteral
.bitcastToAPInt().getZExtValue();
2151 Inst
.addOperand(MCOperand::createImm(ImmVal
));
2152 if (OpTy
== AMDGPU::OPERAND_KIMM32
|| OpTy
== AMDGPU::OPERAND_KIMM16
) {
2153 setImmKindMandatoryLiteral();
2155 setImmKindLiteral();
2160 llvm_unreachable("invalid operand size");
2166 // We got int literal token.
2167 // Only sign extend inline immediates.
2169 case AMDGPU::OPERAND_REG_IMM_INT32
:
2170 case AMDGPU::OPERAND_REG_IMM_FP32
:
2171 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED
:
2172 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
2173 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
2174 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
2175 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
2176 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
2177 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
2178 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
2179 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
2180 case AMDGPU::OPERAND_REG_IMM_V2INT32
:
2181 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
2182 if (isSafeTruncation(Val
, 32) &&
2183 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val
),
2184 AsmParser
->hasInv2PiInlineImm())) {
2185 Inst
.addOperand(MCOperand::createImm(Val
));
2190 Inst
.addOperand(MCOperand::createImm(Val
& 0xffffffff));
2191 setImmKindLiteral();
2194 case AMDGPU::OPERAND_REG_IMM_INT64
:
2195 case AMDGPU::OPERAND_REG_IMM_FP64
:
2196 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
2197 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
2198 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
2199 if (AMDGPU::isInlinableLiteral64(Val
, AsmParser
->hasInv2PiInlineImm())) {
2200 Inst
.addOperand(MCOperand::createImm(Val
));
2205 Inst
.addOperand(MCOperand::createImm(Lo_32(Val
)));
2206 setImmKindLiteral();
2209 case AMDGPU::OPERAND_REG_IMM_INT16
:
2210 case AMDGPU::OPERAND_REG_IMM_FP16
:
2211 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED
:
2212 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
2213 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
2214 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
2215 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
2216 if (isSafeTruncation(Val
, 16) &&
2217 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
2218 AsmParser
->hasInv2PiInlineImm())) {
2219 Inst
.addOperand(MCOperand::createImm(Val
));
2224 Inst
.addOperand(MCOperand::createImm(Val
& 0xffff));
2225 setImmKindLiteral();
2228 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
2229 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
2230 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
2231 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
: {
2232 assert(isSafeTruncation(Val
, 16));
2233 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
2234 AsmParser
->hasInv2PiInlineImm()));
2236 Inst
.addOperand(MCOperand::createImm(Val
));
2239 case AMDGPU::OPERAND_KIMM32
:
2240 Inst
.addOperand(MCOperand::createImm(Literal
.getLoBits(32).getZExtValue()));
2241 setImmKindMandatoryLiteral();
2243 case AMDGPU::OPERAND_KIMM16
:
2244 Inst
.addOperand(MCOperand::createImm(Literal
.getLoBits(16).getZExtValue()));
2245 setImmKindMandatoryLiteral();
2248 llvm_unreachable("invalid operand size");
2252 void AMDGPUOperand::addRegOperands(MCInst
&Inst
, unsigned N
) const {
2253 Inst
.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser
->getSTI())));
2256 bool AMDGPUOperand::isInlineValue() const {
2257 return isRegKind() && ::isInlineValue(getReg());
2260 //===----------------------------------------------------------------------===//
2262 //===----------------------------------------------------------------------===//
2264 static int getRegClass(RegisterKind Is
, unsigned RegWidth
) {
2265 if (Is
== IS_VGPR
) {
2269 return AMDGPU::VGPR_32RegClassID
;
2271 return AMDGPU::VReg_64RegClassID
;
2273 return AMDGPU::VReg_96RegClassID
;
2275 return AMDGPU::VReg_128RegClassID
;
2277 return AMDGPU::VReg_160RegClassID
;
2279 return AMDGPU::VReg_192RegClassID
;
2281 return AMDGPU::VReg_224RegClassID
;
2283 return AMDGPU::VReg_256RegClassID
;
2285 return AMDGPU::VReg_288RegClassID
;
2287 return AMDGPU::VReg_320RegClassID
;
2289 return AMDGPU::VReg_352RegClassID
;
2291 return AMDGPU::VReg_384RegClassID
;
2293 return AMDGPU::VReg_512RegClassID
;
2295 return AMDGPU::VReg_1024RegClassID
;
2297 } else if (Is
== IS_TTMP
) {
2301 return AMDGPU::TTMP_32RegClassID
;
2303 return AMDGPU::TTMP_64RegClassID
;
2305 return AMDGPU::TTMP_128RegClassID
;
2307 return AMDGPU::TTMP_256RegClassID
;
2309 return AMDGPU::TTMP_512RegClassID
;
2311 } else if (Is
== IS_SGPR
) {
2315 return AMDGPU::SGPR_32RegClassID
;
2317 return AMDGPU::SGPR_64RegClassID
;
2319 return AMDGPU::SGPR_96RegClassID
;
2321 return AMDGPU::SGPR_128RegClassID
;
2323 return AMDGPU::SGPR_160RegClassID
;
2325 return AMDGPU::SGPR_192RegClassID
;
2327 return AMDGPU::SGPR_224RegClassID
;
2329 return AMDGPU::SGPR_256RegClassID
;
2331 return AMDGPU::SGPR_288RegClassID
;
2333 return AMDGPU::SGPR_320RegClassID
;
2335 return AMDGPU::SGPR_352RegClassID
;
2337 return AMDGPU::SGPR_384RegClassID
;
2339 return AMDGPU::SGPR_512RegClassID
;
2341 } else if (Is
== IS_AGPR
) {
2345 return AMDGPU::AGPR_32RegClassID
;
2347 return AMDGPU::AReg_64RegClassID
;
2349 return AMDGPU::AReg_96RegClassID
;
2351 return AMDGPU::AReg_128RegClassID
;
2353 return AMDGPU::AReg_160RegClassID
;
2355 return AMDGPU::AReg_192RegClassID
;
2357 return AMDGPU::AReg_224RegClassID
;
2359 return AMDGPU::AReg_256RegClassID
;
2361 return AMDGPU::AReg_288RegClassID
;
2363 return AMDGPU::AReg_320RegClassID
;
2365 return AMDGPU::AReg_352RegClassID
;
2367 return AMDGPU::AReg_384RegClassID
;
2369 return AMDGPU::AReg_512RegClassID
;
2371 return AMDGPU::AReg_1024RegClassID
;
2377 static unsigned getSpecialRegForName(StringRef RegName
) {
2378 return StringSwitch
<unsigned>(RegName
)
2379 .Case("exec", AMDGPU::EXEC
)
2380 .Case("vcc", AMDGPU::VCC
)
2381 .Case("flat_scratch", AMDGPU::FLAT_SCR
)
2382 .Case("xnack_mask", AMDGPU::XNACK_MASK
)
2383 .Case("shared_base", AMDGPU::SRC_SHARED_BASE
)
2384 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE
)
2385 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
2386 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
2387 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE
)
2388 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE
)
2389 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
2390 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
2391 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
2392 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
2393 .Case("lds_direct", AMDGPU::LDS_DIRECT
)
2394 .Case("src_lds_direct", AMDGPU::LDS_DIRECT
)
2395 .Case("m0", AMDGPU::M0
)
2396 .Case("vccz", AMDGPU::SRC_VCCZ
)
2397 .Case("src_vccz", AMDGPU::SRC_VCCZ
)
2398 .Case("execz", AMDGPU::SRC_EXECZ
)
2399 .Case("src_execz", AMDGPU::SRC_EXECZ
)
2400 .Case("scc", AMDGPU::SRC_SCC
)
2401 .Case("src_scc", AMDGPU::SRC_SCC
)
2402 .Case("tba", AMDGPU::TBA
)
2403 .Case("tma", AMDGPU::TMA
)
2404 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO
)
2405 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI
)
2406 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO
)
2407 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI
)
2408 .Case("vcc_lo", AMDGPU::VCC_LO
)
2409 .Case("vcc_hi", AMDGPU::VCC_HI
)
2410 .Case("exec_lo", AMDGPU::EXEC_LO
)
2411 .Case("exec_hi", AMDGPU::EXEC_HI
)
2412 .Case("tma_lo", AMDGPU::TMA_LO
)
2413 .Case("tma_hi", AMDGPU::TMA_HI
)
2414 .Case("tba_lo", AMDGPU::TBA_LO
)
2415 .Case("tba_hi", AMDGPU::TBA_HI
)
2416 .Case("pc", AMDGPU::PC_REG
)
2417 .Case("null", AMDGPU::SGPR_NULL
)
2418 .Default(AMDGPU::NoRegister
);
2421 bool AMDGPUAsmParser::ParseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
,
2422 SMLoc
&EndLoc
, bool RestoreOnFailure
) {
2423 auto R
= parseRegister();
2424 if (!R
) return true;
2426 RegNo
= R
->getReg();
2427 StartLoc
= R
->getStartLoc();
2428 EndLoc
= R
->getEndLoc();
2432 bool AMDGPUAsmParser::parseRegister(MCRegister
&RegNo
, SMLoc
&StartLoc
,
2434 return ParseRegister(RegNo
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/false);
2437 OperandMatchResultTy
AMDGPUAsmParser::tryParseRegister(MCRegister
&RegNo
,
2441 ParseRegister(RegNo
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/true);
2442 bool PendingErrors
= getParser().hasPendingError();
2443 getParser().clearPendingErrors();
2445 return MatchOperand_ParseFail
;
2447 return MatchOperand_NoMatch
;
2448 return MatchOperand_Success
;
2451 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg
, unsigned &RegWidth
,
2452 RegisterKind RegKind
, unsigned Reg1
,
2456 if (Reg
== AMDGPU::EXEC_LO
&& Reg1
== AMDGPU::EXEC_HI
) {
2461 if (Reg
== AMDGPU::FLAT_SCR_LO
&& Reg1
== AMDGPU::FLAT_SCR_HI
) {
2462 Reg
= AMDGPU::FLAT_SCR
;
2466 if (Reg
== AMDGPU::XNACK_MASK_LO
&& Reg1
== AMDGPU::XNACK_MASK_HI
) {
2467 Reg
= AMDGPU::XNACK_MASK
;
2471 if (Reg
== AMDGPU::VCC_LO
&& Reg1
== AMDGPU::VCC_HI
) {
2476 if (Reg
== AMDGPU::TBA_LO
&& Reg1
== AMDGPU::TBA_HI
) {
2481 if (Reg
== AMDGPU::TMA_LO
&& Reg1
== AMDGPU::TMA_HI
) {
2486 Error(Loc
, "register does not fit in the list");
2492 if (Reg1
!= Reg
+ RegWidth
/ 32) {
2493 Error(Loc
, "registers in a list must have consecutive indices");
2499 llvm_unreachable("unexpected register kind");
2508 static constexpr RegInfo RegularRegisters
[] = {
2511 {{"ttmp"}, IS_TTMP
},
2516 static bool isRegularReg(RegisterKind Kind
) {
2517 return Kind
== IS_VGPR
||
2523 static const RegInfo
* getRegularRegInfo(StringRef Str
) {
2524 for (const RegInfo
&Reg
: RegularRegisters
)
2525 if (Str
.startswith(Reg
.Name
))
2530 static bool getRegNum(StringRef Str
, unsigned& Num
) {
2531 return !Str
.getAsInteger(10, Num
);
2535 AMDGPUAsmParser::isRegister(const AsmToken
&Token
,
2536 const AsmToken
&NextToken
) const {
2538 // A list of consecutive registers: [s0,s1,s2,s3]
2539 if (Token
.is(AsmToken::LBrac
))
2542 if (!Token
.is(AsmToken::Identifier
))
2545 // A single register like s0 or a range of registers like s[0:1]
2547 StringRef Str
= Token
.getString();
2548 const RegInfo
*Reg
= getRegularRegInfo(Str
);
2550 StringRef RegName
= Reg
->Name
;
2551 StringRef RegSuffix
= Str
.substr(RegName
.size());
2552 if (!RegSuffix
.empty()) {
2554 // A single register with an index: rXX
2555 if (getRegNum(RegSuffix
, Num
))
2558 // A range of registers: r[XX:YY].
2559 if (NextToken
.is(AsmToken::LBrac
))
2564 return getSpecialRegForName(Str
) != AMDGPU::NoRegister
;
2568 AMDGPUAsmParser::isRegister()
2570 return isRegister(getToken(), peekToken());
2574 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind
,
2579 assert(isRegularReg(RegKind
));
2581 unsigned AlignSize
= 1;
2582 if (RegKind
== IS_SGPR
|| RegKind
== IS_TTMP
) {
2583 // SGPR and TTMP registers must be aligned.
2584 // Max required alignment is 4 dwords.
2585 AlignSize
= std::min(RegWidth
/ 32, 4u);
2588 if (RegNum
% AlignSize
!= 0) {
2589 Error(Loc
, "invalid register alignment");
2590 return AMDGPU::NoRegister
;
2593 unsigned RegIdx
= RegNum
/ AlignSize
;
2594 int RCID
= getRegClass(RegKind
, RegWidth
);
2596 Error(Loc
, "invalid or unsupported register size");
2597 return AMDGPU::NoRegister
;
2600 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2601 const MCRegisterClass RC
= TRI
->getRegClass(RCID
);
2602 if (RegIdx
>= RC
.getNumRegs()) {
2603 Error(Loc
, "register index is out of range");
2604 return AMDGPU::NoRegister
;
2607 return RC
.getRegister(RegIdx
);
2610 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num
, unsigned &RegWidth
) {
2611 int64_t RegLo
, RegHi
;
2612 if (!skipToken(AsmToken::LBrac
, "missing register index"))
2615 SMLoc FirstIdxLoc
= getLoc();
2618 if (!parseExpr(RegLo
))
2621 if (trySkipToken(AsmToken::Colon
)) {
2622 SecondIdxLoc
= getLoc();
2623 if (!parseExpr(RegHi
))
2629 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
2632 if (!isUInt
<32>(RegLo
)) {
2633 Error(FirstIdxLoc
, "invalid register index");
2637 if (!isUInt
<32>(RegHi
)) {
2638 Error(SecondIdxLoc
, "invalid register index");
2642 if (RegLo
> RegHi
) {
2643 Error(FirstIdxLoc
, "first register index should not exceed second index");
2647 Num
= static_cast<unsigned>(RegLo
);
2648 RegWidth
= 32 * ((RegHi
- RegLo
) + 1);
2652 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind
&RegKind
,
2653 unsigned &RegNum
, unsigned &RegWidth
,
2654 SmallVectorImpl
<AsmToken
> &Tokens
) {
2655 assert(isToken(AsmToken::Identifier
));
2656 unsigned Reg
= getSpecialRegForName(getTokenStr());
2660 RegKind
= IS_SPECIAL
;
2661 Tokens
.push_back(getToken());
2662 lex(); // skip register name
2667 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind
&RegKind
,
2668 unsigned &RegNum
, unsigned &RegWidth
,
2669 SmallVectorImpl
<AsmToken
> &Tokens
) {
2670 assert(isToken(AsmToken::Identifier
));
2671 StringRef RegName
= getTokenStr();
2672 auto Loc
= getLoc();
2674 const RegInfo
*RI
= getRegularRegInfo(RegName
);
2676 Error(Loc
, "invalid register name");
2677 return AMDGPU::NoRegister
;
2680 Tokens
.push_back(getToken());
2681 lex(); // skip register name
2684 StringRef RegSuffix
= RegName
.substr(RI
->Name
.size());
2685 if (!RegSuffix
.empty()) {
2686 // Single 32-bit register: vXX.
2687 if (!getRegNum(RegSuffix
, RegNum
)) {
2688 Error(Loc
, "invalid register index");
2689 return AMDGPU::NoRegister
;
2693 // Range of registers: v[XX:YY]. ":YY" is optional.
2694 if (!ParseRegRange(RegNum
, RegWidth
))
2695 return AMDGPU::NoRegister
;
2698 return getRegularReg(RegKind
, RegNum
, RegWidth
, Loc
);
2701 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind
&RegKind
, unsigned &RegNum
,
2703 SmallVectorImpl
<AsmToken
> &Tokens
) {
2704 unsigned Reg
= AMDGPU::NoRegister
;
2705 auto ListLoc
= getLoc();
2707 if (!skipToken(AsmToken::LBrac
,
2708 "expected a register or a list of registers")) {
2709 return AMDGPU::NoRegister
;
2712 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2714 auto Loc
= getLoc();
2715 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
))
2716 return AMDGPU::NoRegister
;
2717 if (RegWidth
!= 32) {
2718 Error(Loc
, "expected a single 32-bit register");
2719 return AMDGPU::NoRegister
;
2722 for (; trySkipToken(AsmToken::Comma
); ) {
2723 RegisterKind NextRegKind
;
2724 unsigned NextReg
, NextRegNum
, NextRegWidth
;
2727 if (!ParseAMDGPURegister(NextRegKind
, NextReg
,
2728 NextRegNum
, NextRegWidth
,
2730 return AMDGPU::NoRegister
;
2732 if (NextRegWidth
!= 32) {
2733 Error(Loc
, "expected a single 32-bit register");
2734 return AMDGPU::NoRegister
;
2736 if (NextRegKind
!= RegKind
) {
2737 Error(Loc
, "registers in a list must be of the same kind");
2738 return AMDGPU::NoRegister
;
2740 if (!AddNextRegisterToList(Reg
, RegWidth
, RegKind
, NextReg
, Loc
))
2741 return AMDGPU::NoRegister
;
2744 if (!skipToken(AsmToken::RBrac
,
2745 "expected a comma or a closing square bracket")) {
2746 return AMDGPU::NoRegister
;
2749 if (isRegularReg(RegKind
))
2750 Reg
= getRegularReg(RegKind
, RegNum
, RegWidth
, ListLoc
);
2755 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
2756 unsigned &RegNum
, unsigned &RegWidth
,
2757 SmallVectorImpl
<AsmToken
> &Tokens
) {
2758 auto Loc
= getLoc();
2759 Reg
= AMDGPU::NoRegister
;
2761 if (isToken(AsmToken::Identifier
)) {
2762 Reg
= ParseSpecialReg(RegKind
, RegNum
, RegWidth
, Tokens
);
2763 if (Reg
== AMDGPU::NoRegister
)
2764 Reg
= ParseRegularReg(RegKind
, RegNum
, RegWidth
, Tokens
);
2766 Reg
= ParseRegList(RegKind
, RegNum
, RegWidth
, Tokens
);
2769 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2770 if (Reg
== AMDGPU::NoRegister
) {
2771 assert(Parser
.hasPendingError());
2775 if (!subtargetHasRegister(*TRI
, Reg
)) {
2776 if (Reg
== AMDGPU::SGPR_NULL
) {
2777 Error(Loc
, "'null' operand is not supported on this GPU");
2779 Error(Loc
, "register not available on this GPU");
2787 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
2788 unsigned &RegNum
, unsigned &RegWidth
,
2789 bool RestoreOnFailure
/*=false*/) {
2790 Reg
= AMDGPU::NoRegister
;
2792 SmallVector
<AsmToken
, 1> Tokens
;
2793 if (ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
, Tokens
)) {
2794 if (RestoreOnFailure
) {
2795 while (!Tokens
.empty()) {
2796 getLexer().UnLex(Tokens
.pop_back_val());
2804 std::optional
<StringRef
>
2805 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind
) {
2808 return StringRef(".amdgcn.next_free_vgpr");
2810 return StringRef(".amdgcn.next_free_sgpr");
2812 return std::nullopt
;
2816 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind
) {
2817 auto SymbolName
= getGprCountSymbolName(RegKind
);
2818 assert(SymbolName
&& "initializing invalid register kind");
2819 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2820 Sym
->setVariableValue(MCConstantExpr::create(0, getContext()));
2823 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind
,
2824 unsigned DwordRegIndex
,
2825 unsigned RegWidth
) {
2826 // Symbols are only defined for GCN targets
2827 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major
< 6)
2830 auto SymbolName
= getGprCountSymbolName(RegKind
);
2833 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2835 int64_t NewMax
= DwordRegIndex
+ divideCeil(RegWidth
, 32) - 1;
2838 if (!Sym
->isVariable())
2839 return !Error(getLoc(),
2840 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2841 if (!Sym
->getVariableValue(false)->evaluateAsAbsolute(OldCount
))
2844 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2846 if (OldCount
<= NewMax
)
2847 Sym
->setVariableValue(MCConstantExpr::create(NewMax
+ 1, getContext()));
2852 std::unique_ptr
<AMDGPUOperand
>
2853 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure
) {
2854 const auto &Tok
= getToken();
2855 SMLoc StartLoc
= Tok
.getLoc();
2856 SMLoc EndLoc
= Tok
.getEndLoc();
2857 RegisterKind RegKind
;
2858 unsigned Reg
, RegNum
, RegWidth
;
2860 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
)) {
2863 if (isHsaAbiVersion3AndAbove(&getSTI())) {
2864 if (!updateGprCountSymbols(RegKind
, RegNum
, RegWidth
))
2867 KernelScope
.usesRegister(RegKind
, RegNum
, RegWidth
);
2868 return AMDGPUOperand::CreateReg(this, Reg
, StartLoc
, EndLoc
);
2871 ParseStatus
AMDGPUAsmParser::parseImm(OperandVector
&Operands
,
2872 bool HasSP3AbsModifier
) {
2873 // TODO: add syntactic sugar for 1/(2*PI)
2876 return ParseStatus::NoMatch
;
2877 assert(!isModifier());
2879 const auto& Tok
= getToken();
2880 const auto& NextTok
= peekToken();
2881 bool IsReal
= Tok
.is(AsmToken::Real
);
2883 bool Negate
= false;
2885 if (!IsReal
&& Tok
.is(AsmToken::Minus
) && NextTok
.is(AsmToken::Real
)) {
2892 // Floating-point expressions are not supported.
2893 // Can only allow floating-point literals with an
2896 StringRef Num
= getTokenStr();
2899 APFloat
RealVal(APFloat::IEEEdouble());
2900 auto roundMode
= APFloat::rmNearestTiesToEven
;
2901 if (errorToBool(RealVal
.convertFromString(Num
, roundMode
).takeError()))
2902 return ParseStatus::Failure
;
2904 RealVal
.changeSign();
2907 AMDGPUOperand::CreateImm(this, RealVal
.bitcastToAPInt().getZExtValue(), S
,
2908 AMDGPUOperand::ImmTyNone
, true));
2910 return ParseStatus::Success
;
2917 if (HasSP3AbsModifier
) {
2918 // This is a workaround for handling expressions
2919 // as arguments of SP3 'abs' modifier, for example:
2923 // This syntax is not compatible with syntax of standard
2924 // MC expressions (due to the trailing '|').
2926 if (getParser().parsePrimaryExpr(Expr
, EndLoc
, nullptr))
2927 return ParseStatus::Failure
;
2929 if (Parser
.parseExpression(Expr
))
2930 return ParseStatus::Failure
;
2933 if (Expr
->evaluateAsAbsolute(IntVal
)) {
2934 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
2936 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
2939 return ParseStatus::Success
;
2942 return ParseStatus::NoMatch
;
2945 ParseStatus
AMDGPUAsmParser::parseReg(OperandVector
&Operands
) {
2947 return ParseStatus::NoMatch
;
2949 if (auto R
= parseRegister()) {
2951 Operands
.push_back(std::move(R
));
2952 return ParseStatus::Success
;
2954 return ParseStatus::Failure
;
2957 ParseStatus
AMDGPUAsmParser::parseRegOrImm(OperandVector
&Operands
,
2958 bool HasSP3AbsMod
) {
2959 ParseStatus Res
= parseReg(Operands
);
2960 if (!Res
.isNoMatch())
2963 return ParseStatus::NoMatch
;
2964 return parseImm(Operands
, HasSP3AbsMod
);
2968 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2969 if (Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::LParen
)) {
2970 const auto &str
= Token
.getString();
2971 return str
== "abs" || str
== "neg" || str
== "sext";
2977 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2978 return Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::Colon
);
2982 AMDGPUAsmParser::isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2983 return isNamedOperandModifier(Token
, NextToken
) || Token
.is(AsmToken::Pipe
);
2987 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2988 return isRegister(Token
, NextToken
) || isOperandModifier(Token
, NextToken
);
2991 // Check if this is an operand modifier or an opcode modifier
2992 // which may look like an expression but it is not. We should
2993 // avoid parsing these modifiers as expressions. Currently
2994 // recognized sequences are:
3005 AMDGPUAsmParser::isModifier() {
3007 AsmToken Tok
= getToken();
3008 AsmToken NextToken
[2];
3009 peekTokens(NextToken
);
3011 return isOperandModifier(Tok
, NextToken
[0]) ||
3012 (Tok
.is(AsmToken::Minus
) && isRegOrOperandModifier(NextToken
[0], NextToken
[1])) ||
3013 isOpcodeModifierWithVal(Tok
, NextToken
[0]);
3016 // Check if the current token is an SP3 'neg' modifier.
3017 // Currently this modifier is allowed in the following context:
3019 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3020 // 2. Before an 'abs' modifier: -abs(...)
3021 // 3. Before an SP3 'abs' modifier: -|...|
3023 // In all other cases "-" is handled as a part
3024 // of an expression that follows the sign.
3026 // Note: When "-" is followed by an integer literal,
3027 // this is interpreted as integer negation rather
3028 // than a floating-point NEG modifier applied to N.
3029 // Beside being contr-intuitive, such use of floating-point
3030 // NEG modifier would have resulted in different meaning
3031 // of integer literals used with VOP1/2/C and VOP3,
3033 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3034 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3035 // Negative fp literals with preceding "-" are
3036 // handled likewise for uniformity
3039 AMDGPUAsmParser::parseSP3NegModifier() {
3041 AsmToken NextToken
[2];
3042 peekTokens(NextToken
);
3044 if (isToken(AsmToken::Minus
) &&
3045 (isRegister(NextToken
[0], NextToken
[1]) ||
3046 NextToken
[0].is(AsmToken::Pipe
) ||
3047 isId(NextToken
[0], "abs"))) {
3056 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector
&Operands
,
3062 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3063 if (isToken(AsmToken::Minus
) && peekToken().is(AsmToken::Minus
))
3064 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3066 SP3Neg
= parseSP3NegModifier();
3069 Neg
= trySkipId("neg");
3071 return Error(Loc
, "expected register or immediate");
3072 if (Neg
&& !skipToken(AsmToken::LParen
, "expected left paren after neg"))
3073 return ParseStatus::Failure
;
3075 Abs
= trySkipId("abs");
3076 if (Abs
&& !skipToken(AsmToken::LParen
, "expected left paren after abs"))
3077 return ParseStatus::Failure
;
3080 SP3Abs
= trySkipToken(AsmToken::Pipe
);
3082 return Error(Loc
, "expected register or immediate");
3086 Res
= parseRegOrImm(Operands
, SP3Abs
);
3088 Res
= parseReg(Operands
);
3090 if (!Res
.isSuccess())
3091 return (SP3Neg
|| Neg
|| SP3Abs
|| Abs
) ? ParseStatus::Failure
: Res
;
3093 if (SP3Abs
&& !skipToken(AsmToken::Pipe
, "expected vertical bar"))
3094 return ParseStatus::Failure
;
3095 if (Abs
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
3096 return ParseStatus::Failure
;
3097 if (Neg
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
3098 return ParseStatus::Failure
;
3100 AMDGPUOperand::Modifiers Mods
;
3101 Mods
.Abs
= Abs
|| SP3Abs
;
3102 Mods
.Neg
= Neg
|| SP3Neg
;
3104 if (Mods
.hasFPModifiers()) {
3105 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
3107 return Error(Op
.getStartLoc(), "expected an absolute expression");
3108 Op
.setModifiers(Mods
);
3110 return ParseStatus::Success
;
3114 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector
&Operands
,
3116 bool Sext
= trySkipId("sext");
3117 if (Sext
&& !skipToken(AsmToken::LParen
, "expected left paren after sext"))
3118 return ParseStatus::Failure
;
3122 Res
= parseRegOrImm(Operands
);
3124 Res
= parseReg(Operands
);
3126 if (!Res
.isSuccess())
3127 return Sext
? ParseStatus::Failure
: Res
;
3129 if (Sext
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
3130 return ParseStatus::Failure
;
3132 AMDGPUOperand::Modifiers Mods
;
3135 if (Mods
.hasIntModifiers()) {
3136 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
3138 return Error(Op
.getStartLoc(), "expected an absolute expression");
3139 Op
.setModifiers(Mods
);
3142 return ParseStatus::Success
;
3145 ParseStatus
AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector
&Operands
) {
3146 return parseRegOrImmWithFPInputMods(Operands
, false);
3149 ParseStatus
AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector
&Operands
) {
3150 return parseRegOrImmWithIntInputMods(Operands
, false);
3153 ParseStatus
AMDGPUAsmParser::parseVReg32OrOff(OperandVector
&Operands
) {
3154 auto Loc
= getLoc();
3155 if (trySkipId("off")) {
3156 Operands
.push_back(AMDGPUOperand::CreateImm(this, 0, Loc
,
3157 AMDGPUOperand::ImmTyOff
, false));
3158 return ParseStatus::Success
;
3162 return ParseStatus::NoMatch
;
3164 std::unique_ptr
<AMDGPUOperand
> Reg
= parseRegister();
3166 Operands
.push_back(std::move(Reg
));
3167 return ParseStatus::Success
;
3170 return ParseStatus::Failure
;
3173 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
3174 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
3176 if ((getForcedEncodingSize() == 32 && (TSFlags
& SIInstrFlags::VOP3
)) ||
3177 (getForcedEncodingSize() == 64 && !(TSFlags
& SIInstrFlags::VOP3
)) ||
3178 (isForcedDPP() && !(TSFlags
& SIInstrFlags::DPP
)) ||
3179 (isForcedSDWA() && !(TSFlags
& SIInstrFlags::SDWA
)) )
3180 return Match_InvalidOperand
;
3182 if ((TSFlags
& SIInstrFlags::VOP3
) &&
3183 (TSFlags
& SIInstrFlags::VOPAsmPrefer32Bit
) &&
3184 getForcedEncodingSize() != 64)
3185 return Match_PreferE32
;
3187 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
3188 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
3189 // v_mac_f32/16 allow only dst_sel == DWORD;
3191 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::dst_sel
);
3192 const auto &Op
= Inst
.getOperand(OpNum
);
3193 if (!Op
.isImm() || Op
.getImm() != AMDGPU::SDWA::SdwaSel::DWORD
) {
3194 return Match_InvalidOperand
;
3198 return Match_Success
;
3201 static ArrayRef
<unsigned> getAllVariants() {
3202 static const unsigned Variants
[] = {
3203 AMDGPUAsmVariants::DEFAULT
, AMDGPUAsmVariants::VOP3
,
3204 AMDGPUAsmVariants::SDWA
, AMDGPUAsmVariants::SDWA9
,
3205 AMDGPUAsmVariants::DPP
, AMDGPUAsmVariants::VOP3_DPP
3208 return ArrayRef(Variants
);
3211 // What asm variants we should check
3212 ArrayRef
<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3213 if (isForcedDPP() && isForcedVOP3()) {
3214 static const unsigned Variants
[] = {AMDGPUAsmVariants::VOP3_DPP
};
3215 return ArrayRef(Variants
);
3217 if (getForcedEncodingSize() == 32) {
3218 static const unsigned Variants
[] = {AMDGPUAsmVariants::DEFAULT
};
3219 return ArrayRef(Variants
);
3222 if (isForcedVOP3()) {
3223 static const unsigned Variants
[] = {AMDGPUAsmVariants::VOP3
};
3224 return ArrayRef(Variants
);
3227 if (isForcedSDWA()) {
3228 static const unsigned Variants
[] = {AMDGPUAsmVariants::SDWA
,
3229 AMDGPUAsmVariants::SDWA9
};
3230 return ArrayRef(Variants
);
3233 if (isForcedDPP()) {
3234 static const unsigned Variants
[] = {AMDGPUAsmVariants::DPP
};
3235 return ArrayRef(Variants
);
3238 return getAllVariants();
3241 StringRef
AMDGPUAsmParser::getMatchedVariantName() const {
3242 if (isForcedDPP() && isForcedVOP3())
3245 if (getForcedEncodingSize() == 32)
3260 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst
&Inst
) const {
3261 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
3262 for (MCPhysReg Reg
: Desc
.implicit_uses()) {
3264 case AMDGPU::FLAT_SCR
:
3266 case AMDGPU::VCC_LO
:
3267 case AMDGPU::VCC_HI
:
3274 return AMDGPU::NoRegister
;
3277 // NB: This code is correct only when used to check constant
3278 // bus limitations because GFX7 support no f16 inline constants.
3279 // Note that there are no cases when a GFX7 opcode violates
3280 // constant bus limitations due to the use of an f16 constant.
3281 bool AMDGPUAsmParser::isInlineConstant(const MCInst
&Inst
,
3282 unsigned OpIdx
) const {
3283 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
3285 if (!AMDGPU::isSISrcOperand(Desc
, OpIdx
) ||
3286 AMDGPU::isKImmOperand(Desc
, OpIdx
)) {
3290 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3292 int64_t Val
= MO
.getImm();
3293 auto OpSize
= AMDGPU::getOperandSize(Desc
, OpIdx
);
3295 switch (OpSize
) { // expected operand size
3297 return AMDGPU::isInlinableLiteral64(Val
, hasInv2PiInlineImm());
3299 return AMDGPU::isInlinableLiteral32(Val
, hasInv2PiInlineImm());
3301 const unsigned OperandType
= Desc
.operands()[OpIdx
].OperandType
;
3302 if (OperandType
== AMDGPU::OPERAND_REG_IMM_INT16
||
3303 OperandType
== AMDGPU::OPERAND_REG_INLINE_C_INT16
||
3304 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_INT16
)
3305 return AMDGPU::isInlinableIntLiteral(Val
);
3307 if (OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2INT16
||
3308 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
||
3309 OperandType
== AMDGPU::OPERAND_REG_IMM_V2INT16
)
3310 return AMDGPU::isInlinableIntLiteralV216(Val
);
3312 if (OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2FP16
||
3313 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
||
3314 OperandType
== AMDGPU::OPERAND_REG_IMM_V2FP16
)
3315 return AMDGPU::isInlinableLiteralV216(Val
, hasInv2PiInlineImm());
3317 return AMDGPU::isInlinableLiteral16(Val
, hasInv2PiInlineImm());
3320 llvm_unreachable("invalid operand size");
3324 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode
) const {
3329 // 64-bit shift instructions can use only one scalar value input
3330 case AMDGPU::V_LSHLREV_B64_e64
:
3331 case AMDGPU::V_LSHLREV_B64_gfx10
:
3332 case AMDGPU::V_LSHLREV_B64_e64_gfx11
:
3333 case AMDGPU::V_LSHRREV_B64_e64
:
3334 case AMDGPU::V_LSHRREV_B64_gfx10
:
3335 case AMDGPU::V_LSHRREV_B64_e64_gfx11
:
3336 case AMDGPU::V_ASHRREV_I64_e64
:
3337 case AMDGPU::V_ASHRREV_I64_gfx10
:
3338 case AMDGPU::V_ASHRREV_I64_e64_gfx11
:
3339 case AMDGPU::V_LSHL_B64_e64
:
3340 case AMDGPU::V_LSHR_B64_e64
:
3341 case AMDGPU::V_ASHR_I64_e64
:
3348 constexpr unsigned MAX_SRC_OPERANDS_NUM
= 6;
3349 using OperandIndices
= SmallVector
<int16_t, MAX_SRC_OPERANDS_NUM
>;
3351 // Get regular operand indices in the same order as specified
3352 // in the instruction (but append mandatory literals to the end).
3353 static OperandIndices
getSrcOperandIndices(unsigned Opcode
,
3354 bool AddMandatoryLiterals
= false) {
3357 AddMandatoryLiterals
? getNamedOperandIdx(Opcode
, OpName::imm
) : -1;
3359 if (isVOPD(Opcode
)) {
3360 int16_t ImmDeferredIdx
=
3361 AddMandatoryLiterals
? getNamedOperandIdx(Opcode
, OpName::immDeferred
)
3364 return {getNamedOperandIdx(Opcode
, OpName::src0X
),
3365 getNamedOperandIdx(Opcode
, OpName::vsrc1X
),
3366 getNamedOperandIdx(Opcode
, OpName::src0Y
),
3367 getNamedOperandIdx(Opcode
, OpName::vsrc1Y
),
3372 return {getNamedOperandIdx(Opcode
, OpName::src0
),
3373 getNamedOperandIdx(Opcode
, OpName::src1
),
3374 getNamedOperandIdx(Opcode
, OpName::src2
), ImmIdx
};
3377 bool AMDGPUAsmParser::usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
) {
3378 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3380 return !isInlineConstant(Inst
, OpIdx
);
3381 } else if (MO
.isReg()) {
3382 auto Reg
= MO
.getReg();
3383 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3384 auto PReg
= mc2PseudoReg(Reg
);
3385 return isSGPR(PReg
, TRI
) && PReg
!= SGPR_NULL
;
3391 bool AMDGPUAsmParser::validateConstantBusLimitations(
3392 const MCInst
&Inst
, const OperandVector
&Operands
) {
3393 const unsigned Opcode
= Inst
.getOpcode();
3394 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3395 unsigned LastSGPR
= AMDGPU::NoRegister
;
3396 unsigned ConstantBusUseCount
= 0;
3397 unsigned NumLiterals
= 0;
3398 unsigned LiteralSize
;
3400 if (!(Desc
.TSFlags
&
3401 (SIInstrFlags::VOPC
| SIInstrFlags::VOP1
| SIInstrFlags::VOP2
|
3402 SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
| SIInstrFlags::SDWA
)) &&
3406 // Check special imm operands (used by madmk, etc)
3407 if (AMDGPU::hasNamedOperand(Opcode
, AMDGPU::OpName::imm
)) {
3412 SmallDenseSet
<unsigned> SGPRsUsed
;
3413 unsigned SGPRUsed
= findImplicitSGPRReadInVOP(Inst
);
3414 if (SGPRUsed
!= AMDGPU::NoRegister
) {
3415 SGPRsUsed
.insert(SGPRUsed
);
3416 ++ConstantBusUseCount
;
3419 OperandIndices OpIndices
= getSrcOperandIndices(Opcode
);
3421 for (int OpIdx
: OpIndices
) {
3425 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3426 if (usesConstantBus(Inst
, OpIdx
)) {
3428 LastSGPR
= mc2PseudoReg(MO
.getReg());
3429 // Pairs of registers with a partial intersections like these
3431 // flat_scratch_lo, flat_scratch
3432 // flat_scratch_lo, flat_scratch_hi
3433 // are theoretically valid but they are disabled anyway.
3434 // Note that this code mimics SIInstrInfo::verifyInstruction
3435 if (SGPRsUsed
.insert(LastSGPR
).second
) {
3436 ++ConstantBusUseCount
;
3438 } else { // Expression or a literal
3440 if (Desc
.operands()[OpIdx
].OperandType
== MCOI::OPERAND_IMMEDIATE
)
3441 continue; // special operand like VINTERP attr_chan
3443 // An instruction may use only one literal.
3444 // This has been validated on the previous step.
3445 // See validateVOPLiteral.
3446 // This literal may be used as more than one operand.
3447 // If all these operands are of the same size,
3448 // this literal counts as one scalar value.
3449 // Otherwise it counts as 2 scalar values.
3450 // See "GFX10 Shader Programming", section 3.6.2.3.
3452 unsigned Size
= AMDGPU::getOperandSize(Desc
, OpIdx
);
3456 if (NumLiterals
== 0) {
3459 } else if (LiteralSize
!= Size
) {
3465 ConstantBusUseCount
+= NumLiterals
;
3467 if (ConstantBusUseCount
<= getConstantBusLimit(Opcode
))
3470 SMLoc LitLoc
= getLitLoc(Operands
);
3471 SMLoc RegLoc
= getRegLoc(LastSGPR
, Operands
);
3472 SMLoc Loc
= (LitLoc
.getPointer() < RegLoc
.getPointer()) ? RegLoc
: LitLoc
;
3473 Error(Loc
, "invalid operand (violates constant bus restrictions)");
3477 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3478 const MCInst
&Inst
, const OperandVector
&Operands
) {
3480 const unsigned Opcode
= Inst
.getOpcode();
3481 if (!isVOPD(Opcode
))
3484 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3486 auto getVRegIdx
= [&](unsigned, unsigned OperandIdx
) {
3487 const MCOperand
&Opr
= Inst
.getOperand(OperandIdx
);
3488 return (Opr
.isReg() && !isSGPR(mc2PseudoReg(Opr
.getReg()), TRI
))
3490 : MCRegister::NoRegister
;
3493 const auto &InstInfo
= getVOPDInstInfo(Opcode
, &MII
);
3494 auto InvalidCompOprIdx
= InstInfo
.getInvalidCompOperandIndex(getVRegIdx
);
3495 if (!InvalidCompOprIdx
)
3498 auto CompOprIdx
= *InvalidCompOprIdx
;
3500 std::max(InstInfo
[VOPD::X
].getIndexInParsedOperands(CompOprIdx
),
3501 InstInfo
[VOPD::Y
].getIndexInParsedOperands(CompOprIdx
));
3502 assert(ParsedIdx
> 0 && ParsedIdx
< Operands
.size());
3504 auto Loc
= ((AMDGPUOperand
&)*Operands
[ParsedIdx
]).getStartLoc();
3505 if (CompOprIdx
== VOPD::Component::DST
) {
3506 Error(Loc
, "one dst register must be even and the other odd");
3508 auto CompSrcIdx
= CompOprIdx
- VOPD::Component::DST_NUM
;
3509 Error(Loc
, Twine("src") + Twine(CompSrcIdx
) +
3510 " operands must use different VGPR banks");
3516 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst
&Inst
) {
3518 const unsigned Opc
= Inst
.getOpcode();
3519 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3521 if ((Desc
.TSFlags
& SIInstrFlags::IntClamp
) != 0 && !hasIntClamp()) {
3522 int ClampIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
);
3523 assert(ClampIdx
!= -1);
3524 return Inst
.getOperand(ClampIdx
).getImm() == 0;
3530 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst
&Inst
,
3531 const SMLoc
&IDLoc
) {
3533 const unsigned Opc
= Inst
.getOpcode();
3534 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3536 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3539 int VDataIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdata
);
3540 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3541 int TFEIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::tfe
);
3543 assert(VDataIdx
!= -1);
3545 if ((DMaskIdx
== -1 || TFEIdx
== -1) && isGFX10_AEncoding()) // intersect_ray
3548 unsigned VDataSize
= AMDGPU::getRegOperandSize(getMRI(), Desc
, VDataIdx
);
3549 unsigned TFESize
= (TFEIdx
!= -1 && Inst
.getOperand(TFEIdx
).getImm()) ? 1 : 0;
3550 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3554 bool IsPackedD16
= false;
3556 (Desc
.TSFlags
& SIInstrFlags::Gather4
) ? 4 : llvm::popcount(DMask
);
3557 if (hasPackedD16()) {
3558 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
3559 IsPackedD16
= D16Idx
>= 0;
3560 if (IsPackedD16
&& Inst
.getOperand(D16Idx
).getImm())
3561 DataSize
= (DataSize
+ 1) / 2;
3564 if ((VDataSize
/ 4) == DataSize
+ TFESize
)
3567 StringRef Modifiers
;
3569 Modifiers
= IsPackedD16
? "dmask and d16" : "dmask";
3571 Modifiers
= IsPackedD16
? "dmask, d16 and tfe" : "dmask and tfe";
3573 Error(IDLoc
, Twine("image data size does not match ") + Modifiers
);
3577 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst
&Inst
,
3578 const SMLoc
&IDLoc
) {
3579 const unsigned Opc
= Inst
.getOpcode();
3580 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3582 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0 || !isGFX10Plus())
3585 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(Opc
);
3587 const AMDGPU::MIMGBaseOpcodeInfo
*BaseOpcode
=
3588 AMDGPU::getMIMGBaseOpcodeInfo(Info
->BaseOpcode
);
3589 int VAddr0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vaddr0
);
3590 int SrsrcIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::srsrc
);
3591 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3592 int A16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::a16
);
3594 assert(VAddr0Idx
!= -1);
3595 assert(SrsrcIdx
!= -1);
3596 assert(SrsrcIdx
> VAddr0Idx
);
3598 bool IsA16
= Inst
.getOperand(A16Idx
).getImm();
3599 if (BaseOpcode
->BVH
) {
3600 if (IsA16
== BaseOpcode
->A16
)
3602 Error(IDLoc
, "image address size does not match a16");
3606 unsigned Dim
= Inst
.getOperand(DimIdx
).getImm();
3607 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByEncoding(Dim
);
3608 bool IsNSA
= SrsrcIdx
- VAddr0Idx
> 1;
3609 unsigned ActualAddrSize
=
3610 IsNSA
? SrsrcIdx
- VAddr0Idx
3611 : AMDGPU::getRegOperandSize(getMRI(), Desc
, VAddr0Idx
) / 4;
3613 unsigned ExpectedAddrSize
=
3614 AMDGPU::getAddrSizeMIMGOp(BaseOpcode
, DimInfo
, IsA16
, hasG16());
3617 if (hasPartialNSAEncoding() && ExpectedAddrSize
> getNSAMaxSize()) {
3618 int VAddrLastIdx
= SrsrcIdx
- 1;
3619 unsigned VAddrLastSize
=
3620 AMDGPU::getRegOperandSize(getMRI(), Desc
, VAddrLastIdx
) / 4;
3622 ActualAddrSize
= VAddrLastIdx
- VAddr0Idx
+ VAddrLastSize
;
3625 if (ExpectedAddrSize
> 12)
3626 ExpectedAddrSize
= 16;
3628 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3629 // This provides backward compatibility for assembly created
3630 // before 160b/192b/224b types were directly supported.
3631 if (ActualAddrSize
== 8 && (ExpectedAddrSize
>= 5 && ExpectedAddrSize
<= 7))
3635 if (ActualAddrSize
== ExpectedAddrSize
)
3638 Error(IDLoc
, "image address size does not match dim and a16");
3642 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst
&Inst
) {
3644 const unsigned Opc
= Inst
.getOpcode();
3645 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3647 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3649 if (!Desc
.mayLoad() || !Desc
.mayStore())
3650 return true; // Not atomic
3652 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3653 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3655 // This is an incomplete check because image_atomic_cmpswap
3656 // may only use 0x3 and 0xf while other atomic operations
3657 // may use 0x1 and 0x3. However these limitations are
3658 // verified when we check that dmask matches dst size.
3659 return DMask
== 0x1 || DMask
== 0x3 || DMask
== 0xf;
3662 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst
&Inst
) {
3664 const unsigned Opc
= Inst
.getOpcode();
3665 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3667 if ((Desc
.TSFlags
& SIInstrFlags::Gather4
) == 0)
3670 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3671 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3673 // GATHER4 instructions use dmask in a different fashion compared to
3674 // other MIMG instructions. The only useful DMASK values are
3675 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3676 // (red,red,red,red) etc.) The ISA document doesn't mention
3678 return DMask
== 0x1 || DMask
== 0x2 || DMask
== 0x4 || DMask
== 0x8;
3681 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst
&Inst
) {
3682 const unsigned Opc
= Inst
.getOpcode();
3683 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3685 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3688 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(Opc
);
3689 const AMDGPU::MIMGBaseOpcodeInfo
*BaseOpcode
=
3690 AMDGPU::getMIMGBaseOpcodeInfo(Info
->BaseOpcode
);
3692 if (!BaseOpcode
->MSAA
)
3695 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3696 assert(DimIdx
!= -1);
3698 unsigned Dim
= Inst
.getOperand(DimIdx
).getImm();
3699 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByEncoding(Dim
);
3701 return DimInfo
->MSAA
;
3704 static bool IsMovrelsSDWAOpcode(const unsigned Opcode
)
3707 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10
:
3708 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10
:
3709 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10
:
3716 // movrels* opcodes should only allow VGPRS as src0.
3717 // This is specified in .td description for vop1/vop3,
3718 // but sdwa is handled differently. See isSDWAOperand.
3719 bool AMDGPUAsmParser::validateMovrels(const MCInst
&Inst
,
3720 const OperandVector
&Operands
) {
3722 const unsigned Opc
= Inst
.getOpcode();
3723 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3725 if ((Desc
.TSFlags
& SIInstrFlags::SDWA
) == 0 || !IsMovrelsSDWAOpcode(Opc
))
3728 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
3729 assert(Src0Idx
!= -1);
3732 const MCOperand
&Src0
= Inst
.getOperand(Src0Idx
);
3734 auto Reg
= mc2PseudoReg(Src0
.getReg());
3735 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3736 if (!isSGPR(Reg
, TRI
))
3738 ErrLoc
= getRegLoc(Reg
, Operands
);
3740 ErrLoc
= getConstLoc(Operands
);
3743 Error(ErrLoc
, "source operand must be a VGPR");
3747 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst
&Inst
,
3748 const OperandVector
&Operands
) {
3750 const unsigned Opc
= Inst
.getOpcode();
3752 if (Opc
!= AMDGPU::V_ACCVGPR_WRITE_B32_vi
)
3755 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
3756 assert(Src0Idx
!= -1);
3758 const MCOperand
&Src0
= Inst
.getOperand(Src0Idx
);
3762 auto Reg
= mc2PseudoReg(Src0
.getReg());
3763 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3764 if (!isGFX90A() && isSGPR(Reg
, TRI
)) {
3765 Error(getRegLoc(Reg
, Operands
),
3766 "source operand must be either a VGPR or an inline constant");
3773 bool AMDGPUAsmParser::validateMAISrc2(const MCInst
&Inst
,
3774 const OperandVector
&Operands
) {
3775 unsigned Opcode
= Inst
.getOpcode();
3776 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3778 if (!(Desc
.TSFlags
& SIInstrFlags::IsMAI
) ||
3779 !getFeatureBits()[FeatureMFMAInlineLiteralBug
])
3782 const int Src2Idx
= getNamedOperandIdx(Opcode
, OpName::src2
);
3786 if (Inst
.getOperand(Src2Idx
).isImm() && isInlineConstant(Inst
, Src2Idx
)) {
3787 Error(getConstLoc(Operands
),
3788 "inline constants are not allowed for this operand");
3795 bool AMDGPUAsmParser::validateMFMA(const MCInst
&Inst
,
3796 const OperandVector
&Operands
) {
3797 const unsigned Opc
= Inst
.getOpcode();
3798 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3800 if ((Desc
.TSFlags
& SIInstrFlags::IsMAI
) == 0)
3803 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src2
);
3807 const MCOperand
&Src2
= Inst
.getOperand(Src2Idx
);
3811 MCRegister Src2Reg
= Src2
.getReg();
3812 MCRegister DstReg
= Inst
.getOperand(0).getReg();
3813 if (Src2Reg
== DstReg
)
3816 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3817 if (TRI
->getRegClass(Desc
.operands()[0].RegClass
).getSizeInBits() <= 128)
3820 if (TRI
->regsOverlap(Src2Reg
, DstReg
)) {
3821 Error(getRegLoc(mc2PseudoReg(Src2Reg
), Operands
),
3822 "source 2 operand must not partially overlap with dst");
3829 bool AMDGPUAsmParser::validateDivScale(const MCInst
&Inst
) {
3830 switch (Inst
.getOpcode()) {
3833 case V_DIV_SCALE_F32_gfx6_gfx7
:
3834 case V_DIV_SCALE_F32_vi
:
3835 case V_DIV_SCALE_F32_gfx10
:
3836 case V_DIV_SCALE_F64_gfx6_gfx7
:
3837 case V_DIV_SCALE_F64_vi
:
3838 case V_DIV_SCALE_F64_gfx10
:
3842 // TODO: Check that src0 = src1 or src2.
3844 for (auto Name
: {AMDGPU::OpName::src0_modifiers
,
3845 AMDGPU::OpName::src2_modifiers
,
3846 AMDGPU::OpName::src2_modifiers
}) {
3847 if (Inst
.getOperand(AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), Name
))
3857 bool AMDGPUAsmParser::validateMIMGD16(const MCInst
&Inst
) {
3859 const unsigned Opc
= Inst
.getOpcode();
3860 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3862 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3865 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
3866 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm()) {
3867 if (isCI() || isSI())
3874 static bool IsRevOpcode(const unsigned Opcode
)
3877 case AMDGPU::V_SUBREV_F32_e32
:
3878 case AMDGPU::V_SUBREV_F32_e64
:
3879 case AMDGPU::V_SUBREV_F32_e32_gfx10
:
3880 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7
:
3881 case AMDGPU::V_SUBREV_F32_e32_vi
:
3882 case AMDGPU::V_SUBREV_F32_e64_gfx10
:
3883 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7
:
3884 case AMDGPU::V_SUBREV_F32_e64_vi
:
3886 case AMDGPU::V_SUBREV_CO_U32_e32
:
3887 case AMDGPU::V_SUBREV_CO_U32_e64
:
3888 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7
:
3889 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7
:
3891 case AMDGPU::V_SUBBREV_U32_e32
:
3892 case AMDGPU::V_SUBBREV_U32_e64
:
3893 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7
:
3894 case AMDGPU::V_SUBBREV_U32_e32_vi
:
3895 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7
:
3896 case AMDGPU::V_SUBBREV_U32_e64_vi
:
3898 case AMDGPU::V_SUBREV_U32_e32
:
3899 case AMDGPU::V_SUBREV_U32_e64
:
3900 case AMDGPU::V_SUBREV_U32_e32_gfx9
:
3901 case AMDGPU::V_SUBREV_U32_e32_vi
:
3902 case AMDGPU::V_SUBREV_U32_e64_gfx9
:
3903 case AMDGPU::V_SUBREV_U32_e64_vi
:
3905 case AMDGPU::V_SUBREV_F16_e32
:
3906 case AMDGPU::V_SUBREV_F16_e64
:
3907 case AMDGPU::V_SUBREV_F16_e32_gfx10
:
3908 case AMDGPU::V_SUBREV_F16_e32_vi
:
3909 case AMDGPU::V_SUBREV_F16_e64_gfx10
:
3910 case AMDGPU::V_SUBREV_F16_e64_vi
:
3912 case AMDGPU::V_SUBREV_U16_e32
:
3913 case AMDGPU::V_SUBREV_U16_e64
:
3914 case AMDGPU::V_SUBREV_U16_e32_vi
:
3915 case AMDGPU::V_SUBREV_U16_e64_vi
:
3917 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9
:
3918 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10
:
3919 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9
:
3921 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9
:
3922 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9
:
3924 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10
:
3925 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10
:
3927 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10
:
3928 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10
:
3930 case AMDGPU::V_LSHRREV_B32_e32
:
3931 case AMDGPU::V_LSHRREV_B32_e64
:
3932 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7
:
3933 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7
:
3934 case AMDGPU::V_LSHRREV_B32_e32_vi
:
3935 case AMDGPU::V_LSHRREV_B32_e64_vi
:
3936 case AMDGPU::V_LSHRREV_B32_e32_gfx10
:
3937 case AMDGPU::V_LSHRREV_B32_e64_gfx10
:
3939 case AMDGPU::V_ASHRREV_I32_e32
:
3940 case AMDGPU::V_ASHRREV_I32_e64
:
3941 case AMDGPU::V_ASHRREV_I32_e32_gfx10
:
3942 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7
:
3943 case AMDGPU::V_ASHRREV_I32_e32_vi
:
3944 case AMDGPU::V_ASHRREV_I32_e64_gfx10
:
3945 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7
:
3946 case AMDGPU::V_ASHRREV_I32_e64_vi
:
3948 case AMDGPU::V_LSHLREV_B32_e32
:
3949 case AMDGPU::V_LSHLREV_B32_e64
:
3950 case AMDGPU::V_LSHLREV_B32_e32_gfx10
:
3951 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7
:
3952 case AMDGPU::V_LSHLREV_B32_e32_vi
:
3953 case AMDGPU::V_LSHLREV_B32_e64_gfx10
:
3954 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7
:
3955 case AMDGPU::V_LSHLREV_B32_e64_vi
:
3957 case AMDGPU::V_LSHLREV_B16_e32
:
3958 case AMDGPU::V_LSHLREV_B16_e64
:
3959 case AMDGPU::V_LSHLREV_B16_e32_vi
:
3960 case AMDGPU::V_LSHLREV_B16_e64_vi
:
3961 case AMDGPU::V_LSHLREV_B16_gfx10
:
3963 case AMDGPU::V_LSHRREV_B16_e32
:
3964 case AMDGPU::V_LSHRREV_B16_e64
:
3965 case AMDGPU::V_LSHRREV_B16_e32_vi
:
3966 case AMDGPU::V_LSHRREV_B16_e64_vi
:
3967 case AMDGPU::V_LSHRREV_B16_gfx10
:
3969 case AMDGPU::V_ASHRREV_I16_e32
:
3970 case AMDGPU::V_ASHRREV_I16_e64
:
3971 case AMDGPU::V_ASHRREV_I16_e32_vi
:
3972 case AMDGPU::V_ASHRREV_I16_e64_vi
:
3973 case AMDGPU::V_ASHRREV_I16_gfx10
:
3975 case AMDGPU::V_LSHLREV_B64_e64
:
3976 case AMDGPU::V_LSHLREV_B64_gfx10
:
3977 case AMDGPU::V_LSHLREV_B64_vi
:
3979 case AMDGPU::V_LSHRREV_B64_e64
:
3980 case AMDGPU::V_LSHRREV_B64_gfx10
:
3981 case AMDGPU::V_LSHRREV_B64_vi
:
3983 case AMDGPU::V_ASHRREV_I64_e64
:
3984 case AMDGPU::V_ASHRREV_I64_gfx10
:
3985 case AMDGPU::V_ASHRREV_I64_vi
:
3987 case AMDGPU::V_PK_LSHLREV_B16
:
3988 case AMDGPU::V_PK_LSHLREV_B16_gfx10
:
3989 case AMDGPU::V_PK_LSHLREV_B16_vi
:
3991 case AMDGPU::V_PK_LSHRREV_B16
:
3992 case AMDGPU::V_PK_LSHRREV_B16_gfx10
:
3993 case AMDGPU::V_PK_LSHRREV_B16_vi
:
3994 case AMDGPU::V_PK_ASHRREV_I16
:
3995 case AMDGPU::V_PK_ASHRREV_I16_gfx10
:
3996 case AMDGPU::V_PK_ASHRREV_I16_vi
:
4003 std::optional
<StringRef
>
4004 AMDGPUAsmParser::validateLdsDirect(const MCInst
&Inst
) {
4006 using namespace SIInstrFlags
;
4007 const unsigned Opcode
= Inst
.getOpcode();
4008 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
4010 // lds_direct register is defined so that it can be used
4011 // with 9-bit operands only. Ignore encodings which do not accept these.
4012 const auto Enc
= VOP1
| VOP2
| VOP3
| VOPC
| VOP3P
| SIInstrFlags::SDWA
;
4013 if ((Desc
.TSFlags
& Enc
) == 0)
4014 return std::nullopt
;
4016 for (auto SrcName
: {OpName::src0
, OpName::src1
, OpName::src2
}) {
4017 auto SrcIdx
= getNamedOperandIdx(Opcode
, SrcName
);
4020 const auto &Src
= Inst
.getOperand(SrcIdx
);
4021 if (Src
.isReg() && Src
.getReg() == LDS_DIRECT
) {
4023 if (isGFX90A() || isGFX11Plus())
4024 return StringRef("lds_direct is not supported on this GPU");
4026 if (IsRevOpcode(Opcode
) || (Desc
.TSFlags
& SIInstrFlags::SDWA
))
4027 return StringRef("lds_direct cannot be used with this instruction");
4029 if (SrcName
!= OpName::src0
)
4030 return StringRef("lds_direct may be used as src0 only");
4034 return std::nullopt
;
4037 SMLoc
AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector
&Operands
) const {
4038 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4039 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4040 if (Op
.isFlatOffset())
4041 return Op
.getStartLoc();
4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst
&Inst
,
4047 const OperandVector
&Operands
) {
4048 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
4049 if ((TSFlags
& SIInstrFlags::FLAT
) == 0)
4052 auto Opcode
= Inst
.getOpcode();
4053 auto OpNum
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::offset
);
4054 assert(OpNum
!= -1);
4056 const auto &Op
= Inst
.getOperand(OpNum
);
4057 if (!hasFlatOffsets() && Op
.getImm() != 0) {
4058 Error(getFlatOffsetLoc(Operands
),
4059 "flat offset modifier is not supported on this GPU");
4063 // For FLAT segment the offset must be positive;
4064 // MSB is ignored and forced to zero.
4065 unsigned OffsetSize
= AMDGPU::getNumFlatOffsetBits(getSTI());
4066 bool AllowNegative
=
4067 TSFlags
& (SIInstrFlags::FlatGlobal
| SIInstrFlags::FlatScratch
);
4068 if (!isIntN(OffsetSize
, Op
.getImm()) || (!AllowNegative
&& Op
.getImm() < 0)) {
4069 Error(getFlatOffsetLoc(Operands
),
4070 Twine("expected a ") +
4071 (AllowNegative
? Twine(OffsetSize
) + "-bit signed offset"
4072 : Twine(OffsetSize
- 1) + "-bit unsigned offset"));
4079 SMLoc
AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector
&Operands
) const {
4080 // Start with second operand because SMEM Offset cannot be dst or src0.
4081 for (unsigned i
= 2, e
= Operands
.size(); i
!= e
; ++i
) {
4082 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4083 if (Op
.isSMEMOffset() || Op
.isSMEMOffsetMod())
4084 return Op
.getStartLoc();
4089 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst
&Inst
,
4090 const OperandVector
&Operands
) {
4091 if (isCI() || isSI())
4094 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
4095 if ((TSFlags
& SIInstrFlags::SMRD
) == 0)
4098 auto Opcode
= Inst
.getOpcode();
4099 auto OpNum
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::offset
);
4103 const auto &Op
= Inst
.getOperand(OpNum
);
4107 uint64_t Offset
= Op
.getImm();
4108 bool IsBuffer
= AMDGPU::getSMEMIsBuffer(Opcode
);
4109 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset
) ||
4110 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset
, IsBuffer
))
4113 Error(getSMEMOffsetLoc(Operands
),
4114 (isVI() || IsBuffer
) ? "expected a 20-bit unsigned offset" :
4115 "expected a 21-bit signed offset");
4120 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst
&Inst
) const {
4121 unsigned Opcode
= Inst
.getOpcode();
4122 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
4123 if (!(Desc
.TSFlags
& (SIInstrFlags::SOP2
| SIInstrFlags::SOPC
)))
4126 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
4127 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
4129 const int OpIndices
[] = { Src0Idx
, Src1Idx
};
4131 unsigned NumExprs
= 0;
4132 unsigned NumLiterals
= 0;
4133 uint32_t LiteralValue
;
4135 for (int OpIdx
: OpIndices
) {
4136 if (OpIdx
== -1) break;
4138 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
4139 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4140 if (AMDGPU::isSISrcOperand(Desc
, OpIdx
)) {
4141 if (MO
.isImm() && !isInlineConstant(Inst
, OpIdx
)) {
4142 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
4143 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
4144 LiteralValue
= Value
;
4147 } else if (MO
.isExpr()) {
4153 return NumLiterals
+ NumExprs
<= 1;
4156 bool AMDGPUAsmParser::validateOpSel(const MCInst
&Inst
) {
4157 const unsigned Opc
= Inst
.getOpcode();
4158 if (isPermlane16(Opc
)) {
4159 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
4160 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
4166 uint64_t TSFlags
= MII
.get(Opc
).TSFlags
;
4168 if (isGFX940() && (TSFlags
& SIInstrFlags::IsDOT
)) {
4169 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
4170 if (OpSelIdx
!= -1) {
4171 if (Inst
.getOperand(OpSelIdx
).getImm() != 0)
4174 int OpSelHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel_hi
);
4175 if (OpSelHiIdx
!= -1) {
4176 if (Inst
.getOperand(OpSelHiIdx
).getImm() != -1)
4181 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4182 if (isGFX11Plus() && (TSFlags
& SIInstrFlags::IsDOT
) &&
4183 (TSFlags
& SIInstrFlags::VOP3
) && !(TSFlags
& SIInstrFlags::VOP3P
)) {
4184 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
4185 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
4193 bool AMDGPUAsmParser::validateDPP(const MCInst
&Inst
,
4194 const OperandVector
&Operands
) {
4195 const unsigned Opc
= Inst
.getOpcode();
4196 int DppCtrlIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dpp_ctrl
);
4199 unsigned DppCtrl
= Inst
.getOperand(DppCtrlIdx
).getImm();
4201 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl
)) {
4202 // DPP64 is supported for row_newbcast only.
4203 int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
4205 getMRI()->getSubReg(Inst
.getOperand(Src0Idx
).getReg(), AMDGPU::sub1
)) {
4206 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyDppCtrl
, Operands
);
4207 Error(S
, "64 bit dpp only supports row_newbcast");
4215 // Check if VCC register matches wavefront size
4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg
) const {
4217 auto FB
= getFeatureBits();
4218 return (FB
[AMDGPU::FeatureWavefrontSize64
] && Reg
== AMDGPU::VCC
) ||
4219 (FB
[AMDGPU::FeatureWavefrontSize32
] && Reg
== AMDGPU::VCC_LO
);
4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst
&Inst
,
4224 const OperandVector
&Operands
) {
4225 unsigned Opcode
= Inst
.getOpcode();
4226 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
4227 bool HasMandatoryLiteral
= getNamedOperandIdx(Opcode
, OpName::imm
) != -1;
4228 if (!(Desc
.TSFlags
& (SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
)) &&
4229 !HasMandatoryLiteral
&& !isVOPD(Opcode
))
4232 OperandIndices OpIndices
= getSrcOperandIndices(Opcode
, HasMandatoryLiteral
);
4234 unsigned NumExprs
= 0;
4235 unsigned NumLiterals
= 0;
4236 uint32_t LiteralValue
;
4238 for (int OpIdx
: OpIndices
) {
4242 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
4243 if (!MO
.isImm() && !MO
.isExpr())
4245 if (!isSISrcOperand(Desc
, OpIdx
))
4248 if (MO
.isImm() && !isInlineConstant(Inst
, OpIdx
)) {
4249 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
4250 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
4251 LiteralValue
= Value
;
4254 } else if (MO
.isExpr()) {
4258 NumLiterals
+= NumExprs
;
4263 if (!HasMandatoryLiteral
&& !getFeatureBits()[FeatureVOP3Literal
]) {
4264 Error(getLitLoc(Operands
), "literal operands are not supported");
4268 if (NumLiterals
> 1) {
4269 Error(getLitLoc(Operands
, true), "only one unique literal operand is allowed");
4276 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4277 static int IsAGPROperand(const MCInst
&Inst
, uint16_t NameIdx
,
4278 const MCRegisterInfo
*MRI
) {
4279 int OpIdx
= AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), NameIdx
);
4283 const MCOperand
&Op
= Inst
.getOperand(OpIdx
);
4287 unsigned Sub
= MRI
->getSubReg(Op
.getReg(), AMDGPU::sub0
);
4288 auto Reg
= Sub
? Sub
: Op
.getReg();
4289 const MCRegisterClass
&AGPR32
= MRI
->getRegClass(AMDGPU::AGPR_32RegClassID
);
4290 return AGPR32
.contains(Reg
) ? 1 : 0;
4293 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst
&Inst
) const {
4294 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
4295 if ((TSFlags
& (SIInstrFlags::FLAT
| SIInstrFlags::MUBUF
|
4296 SIInstrFlags::MTBUF
| SIInstrFlags::MIMG
|
4297 SIInstrFlags::DS
)) == 0)
4300 uint16_t DataNameIdx
= (TSFlags
& SIInstrFlags::DS
) ? AMDGPU::OpName::data0
4301 : AMDGPU::OpName::vdata
;
4303 const MCRegisterInfo
*MRI
= getMRI();
4304 int DstAreg
= IsAGPROperand(Inst
, AMDGPU::OpName::vdst
, MRI
);
4305 int DataAreg
= IsAGPROperand(Inst
, DataNameIdx
, MRI
);
4307 if ((TSFlags
& SIInstrFlags::DS
) && DataAreg
>= 0) {
4308 int Data2Areg
= IsAGPROperand(Inst
, AMDGPU::OpName::data1
, MRI
);
4309 if (Data2Areg
>= 0 && Data2Areg
!= DataAreg
)
4313 auto FB
= getFeatureBits();
4314 if (FB
[AMDGPU::FeatureGFX90AInsts
]) {
4315 if (DataAreg
< 0 || DstAreg
< 0)
4317 return DstAreg
== DataAreg
;
4320 return DstAreg
< 1 && DataAreg
< 1;
4323 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst
&Inst
) const {
4324 auto FB
= getFeatureBits();
4325 if (!FB
[AMDGPU::FeatureGFX90AInsts
])
4328 const MCRegisterInfo
*MRI
= getMRI();
4329 const MCRegisterClass
&VGPR32
= MRI
->getRegClass(AMDGPU::VGPR_32RegClassID
);
4330 const MCRegisterClass
&AGPR32
= MRI
->getRegClass(AMDGPU::AGPR_32RegClassID
);
4331 for (unsigned I
= 0, E
= Inst
.getNumOperands(); I
!= E
; ++I
) {
4332 const MCOperand
&Op
= Inst
.getOperand(I
);
4336 unsigned Sub
= MRI
->getSubReg(Op
.getReg(), AMDGPU::sub0
);
4340 if (VGPR32
.contains(Sub
) && ((Sub
- AMDGPU::VGPR0
) & 1))
4342 if (AGPR32
.contains(Sub
) && ((Sub
- AMDGPU::AGPR0
) & 1))
4349 SMLoc
AMDGPUAsmParser::getBLGPLoc(const OperandVector
&Operands
) const {
4350 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4351 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4353 return Op
.getStartLoc();
4358 bool AMDGPUAsmParser::validateBLGP(const MCInst
&Inst
,
4359 const OperandVector
&Operands
) {
4360 unsigned Opc
= Inst
.getOpcode();
4361 int BlgpIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::blgp
);
4364 SMLoc BLGPLoc
= getBLGPLoc(Operands
);
4365 if (!BLGPLoc
.isValid())
4367 bool IsNeg
= StringRef(BLGPLoc
.getPointer()).startswith("neg:");
4368 auto FB
= getFeatureBits();
4369 bool UsesNeg
= false;
4370 if (FB
[AMDGPU::FeatureGFX940Insts
]) {
4372 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd
:
4373 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd
:
4374 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd
:
4375 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd
:
4380 if (IsNeg
== UsesNeg
)
4384 UsesNeg
? "invalid modifier: blgp is not supported"
4385 : "invalid modifier: neg is not supported");
4390 bool AMDGPUAsmParser::validateWaitCnt(const MCInst
&Inst
,
4391 const OperandVector
&Operands
) {
4395 unsigned Opc
= Inst
.getOpcode();
4396 if (Opc
!= AMDGPU::S_WAITCNT_EXPCNT_gfx11
&&
4397 Opc
!= AMDGPU::S_WAITCNT_LGKMCNT_gfx11
&&
4398 Opc
!= AMDGPU::S_WAITCNT_VMCNT_gfx11
&&
4399 Opc
!= AMDGPU::S_WAITCNT_VSCNT_gfx11
)
4402 int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::sdst
);
4403 assert(Src0Idx
>= 0 && Inst
.getOperand(Src0Idx
).isReg());
4404 auto Reg
= mc2PseudoReg(Inst
.getOperand(Src0Idx
).getReg());
4405 if (Reg
== AMDGPU::SGPR_NULL
)
4408 SMLoc RegLoc
= getRegLoc(Reg
, Operands
);
4409 Error(RegLoc
, "src0 must be null");
4413 // gfx90a has an undocumented limitation:
4414 // DS_GWS opcodes must use even aligned registers.
4415 bool AMDGPUAsmParser::validateGWS(const MCInst
&Inst
,
4416 const OperandVector
&Operands
) {
4417 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts
])
4420 int Opc
= Inst
.getOpcode();
4421 if (Opc
!= AMDGPU::DS_GWS_INIT_vi
&& Opc
!= AMDGPU::DS_GWS_BARRIER_vi
&&
4422 Opc
!= AMDGPU::DS_GWS_SEMA_BR_vi
)
4425 const MCRegisterInfo
*MRI
= getMRI();
4426 const MCRegisterClass
&VGPR32
= MRI
->getRegClass(AMDGPU::VGPR_32RegClassID
);
4428 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::data0
);
4429 assert(Data0Pos
!= -1);
4430 auto Reg
= Inst
.getOperand(Data0Pos
).getReg();
4431 auto RegIdx
= Reg
- (VGPR32
.contains(Reg
) ? AMDGPU::VGPR0
: AMDGPU::AGPR0
);
4433 SMLoc RegLoc
= getRegLoc(Reg
, Operands
);
4434 Error(RegLoc
, "vgpr must be even aligned");
4441 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst
&Inst
,
4442 const OperandVector
&Operands
,
4443 const SMLoc
&IDLoc
) {
4444 int CPolPos
= AMDGPU::getNamedOperandIdx(Inst
.getOpcode(),
4445 AMDGPU::OpName::cpol
);
4449 unsigned CPol
= Inst
.getOperand(CPolPos
).getImm();
4451 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
4452 if (TSFlags
& SIInstrFlags::SMRD
) {
4453 if (CPol
&& (isSI() || isCI())) {
4454 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyCPol
, Operands
);
4455 Error(S
, "cache policy is not supported for SMRD instructions");
4458 if (CPol
& ~(AMDGPU::CPol::GLC
| AMDGPU::CPol::DLC
)) {
4459 Error(IDLoc
, "invalid cache policy for SMEM instruction");
4464 if (isGFX90A() && !isGFX940() && (CPol
& CPol::SCC
)) {
4465 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyCPol
, Operands
);
4466 StringRef
CStr(S
.getPointer());
4467 S
= SMLoc::getFromPointer(&CStr
.data()[CStr
.find("scc")]);
4468 Error(S
, "scc is not supported on this GPU");
4472 if (!(TSFlags
& (SIInstrFlags::IsAtomicNoRet
| SIInstrFlags::IsAtomicRet
)))
4475 if (TSFlags
& SIInstrFlags::IsAtomicRet
) {
4476 if (!(TSFlags
& SIInstrFlags::MIMG
) && !(CPol
& CPol::GLC
)) {
4477 Error(IDLoc
, isGFX940() ? "instruction must use sc0"
4478 : "instruction must use glc");
4482 if (CPol
& CPol::GLC
) {
4483 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyCPol
, Operands
);
4484 StringRef
CStr(S
.getPointer());
4485 S
= SMLoc::getFromPointer(
4486 &CStr
.data()[CStr
.find(isGFX940() ? "sc0" : "glc")]);
4487 Error(S
, isGFX940() ? "instruction must not use sc0"
4488 : "instruction must not use glc");
4496 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector
&Operands
) {
4499 for (auto &Operand
: Operands
) {
4500 if (!Operand
->isReg())
4502 unsigned Reg
= Operand
->getReg();
4503 if (Reg
== SRC_EXECZ
|| Reg
== SRC_VCCZ
) {
4504 Error(getRegLoc(Reg
, Operands
),
4505 "execz and vccz are not supported on this GPU");
4512 bool AMDGPUAsmParser::validateTFE(const MCInst
&Inst
,
4513 const OperandVector
&Operands
) {
4514 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
4515 if (Desc
.mayStore() &&
4516 (Desc
.TSFlags
& (SIInstrFlags::MUBUF
| SIInstrFlags::MTBUF
))) {
4517 SMLoc Loc
= getImmLoc(AMDGPUOperand::ImmTyTFE
, Operands
);
4518 if (Loc
!= getInstLoc(Operands
)) {
4519 Error(Loc
, "TFE modifier has no meaning for store instructions");
4527 bool AMDGPUAsmParser::validateInstruction(const MCInst
&Inst
,
4529 const OperandVector
&Operands
) {
4530 if (auto ErrMsg
= validateLdsDirect(Inst
)) {
4531 Error(getRegLoc(LDS_DIRECT
, Operands
), *ErrMsg
);
4534 if (!validateSOPLiteral(Inst
)) {
4535 Error(getLitLoc(Operands
),
4536 "only one unique literal operand is allowed");
4539 if (!validateVOPLiteral(Inst
, Operands
)) {
4542 if (!validateConstantBusLimitations(Inst
, Operands
)) {
4545 if (!validateVOPDRegBankConstraints(Inst
, Operands
)) {
4548 if (!validateIntClampSupported(Inst
)) {
4549 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI
, Operands
),
4550 "integer clamping is not supported on this GPU");
4553 if (!validateOpSel(Inst
)) {
4554 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel
, Operands
),
4555 "invalid op_sel operand");
4558 if (!validateDPP(Inst
, Operands
)) {
4561 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4562 if (!validateMIMGD16(Inst
)) {
4563 Error(getImmLoc(AMDGPUOperand::ImmTyD16
, Operands
),
4564 "d16 modifier is not supported on this GPU");
4567 if (!validateMIMGMSAA(Inst
)) {
4568 Error(getImmLoc(AMDGPUOperand::ImmTyDim
, Operands
),
4569 "invalid dim; must be MSAA type");
4572 if (!validateMIMGDataSize(Inst
, IDLoc
)) {
4575 if (!validateMIMGAddrSize(Inst
, IDLoc
))
4577 if (!validateMIMGAtomicDMask(Inst
)) {
4578 Error(getImmLoc(AMDGPUOperand::ImmTyDMask
, Operands
),
4579 "invalid atomic image dmask");
4582 if (!validateMIMGGatherDMask(Inst
)) {
4583 Error(getImmLoc(AMDGPUOperand::ImmTyDMask
, Operands
),
4584 "invalid image_gather dmask: only one bit must be set");
4587 if (!validateMovrels(Inst
, Operands
)) {
4590 if (!validateFlatOffset(Inst
, Operands
)) {
4593 if (!validateSMEMOffset(Inst
, Operands
)) {
4596 if (!validateMAIAccWrite(Inst
, Operands
)) {
4599 if (!validateMAISrc2(Inst
, Operands
)) {
4602 if (!validateMFMA(Inst
, Operands
)) {
4605 if (!validateCoherencyBits(Inst
, Operands
, IDLoc
)) {
4609 if (!validateAGPRLdSt(Inst
)) {
4610 Error(IDLoc
, getFeatureBits()[AMDGPU::FeatureGFX90AInsts
]
4611 ? "invalid register class: data and dst should be all VGPR or AGPR"
4612 : "invalid register class: agpr loads and stores not supported on this GPU"
4616 if (!validateVGPRAlign(Inst
)) {
4618 "invalid register class: vgpr tuples must be 64 bit aligned");
4621 if (!validateGWS(Inst
, Operands
)) {
4625 if (!validateBLGP(Inst
, Operands
)) {
4629 if (!validateDivScale(Inst
)) {
4630 Error(IDLoc
, "ABS not allowed in VOP3B instructions");
4633 if (!validateWaitCnt(Inst
, Operands
)) {
4636 if (!validateExeczVcczOperands(Operands
)) {
4639 if (!validateTFE(Inst
, Operands
)) {
4646 static std::string
AMDGPUMnemonicSpellCheck(StringRef S
,
4647 const FeatureBitset
&FBS
,
4648 unsigned VariantID
= 0);
4650 static bool AMDGPUCheckMnemonic(StringRef Mnemonic
,
4651 const FeatureBitset
&AvailableFeatures
,
4652 unsigned VariantID
);
4654 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo
,
4655 const FeatureBitset
&FBS
) {
4656 return isSupportedMnemo(Mnemo
, FBS
, getAllVariants());
4659 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo
,
4660 const FeatureBitset
&FBS
,
4661 ArrayRef
<unsigned> Variants
) {
4662 for (auto Variant
: Variants
) {
4663 if (AMDGPUCheckMnemonic(Mnemo
, FBS
, Variant
))
4670 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo
,
4671 const SMLoc
&IDLoc
) {
4672 FeatureBitset FBS
= ComputeAvailableFeatures(getFeatureBits());
4674 // Check if requested instruction variant is supported.
4675 if (isSupportedMnemo(Mnemo
, FBS
, getMatchedVariants()))
4678 // This instruction is not supported.
4679 // Clear any other pending errors because they are no longer relevant.
4680 getParser().clearPendingErrors();
4682 // Requested instruction variant is not supported.
4683 // Check if any other variants are supported.
4684 StringRef VariantName
= getMatchedVariantName();
4685 if (!VariantName
.empty() && isSupportedMnemo(Mnemo
, FBS
)) {
4688 " variant of this instruction is not supported"));
4691 // Check if this instruction may be used with a different wavesize.
4692 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64
] &&
4693 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32
]) {
4695 FeatureBitset FeaturesWS32
= getFeatureBits();
4696 FeaturesWS32
.flip(AMDGPU::FeatureWavefrontSize64
)
4697 .flip(AMDGPU::FeatureWavefrontSize32
);
4698 FeatureBitset AvailableFeaturesWS32
=
4699 ComputeAvailableFeatures(FeaturesWS32
);
4701 if (isSupportedMnemo(Mnemo
, AvailableFeaturesWS32
, getMatchedVariants()))
4702 return Error(IDLoc
, "instruction requires wavesize=32");
4705 // Finally check if this instruction is supported on any other GPU.
4706 if (isSupportedMnemo(Mnemo
, FeatureBitset().set())) {
4707 return Error(IDLoc
, "instruction not supported on this GPU");
4710 // Instruction not supported on any GPU. Probably a typo.
4711 std::string Suggestion
= AMDGPUMnemonicSpellCheck(Mnemo
, FBS
);
4712 return Error(IDLoc
, "invalid instruction" + Suggestion
);
4715 static bool isInvalidVOPDY(const OperandVector
&Operands
,
4716 uint64_t InvalidOprIdx
) {
4717 assert(InvalidOprIdx
< Operands
.size());
4718 const auto &Op
= ((AMDGPUOperand
&)*Operands
[InvalidOprIdx
]);
4719 if (Op
.isToken() && InvalidOprIdx
> 1) {
4720 const auto &PrevOp
= ((AMDGPUOperand
&)*Operands
[InvalidOprIdx
- 1]);
4721 return PrevOp
.isToken() && PrevOp
.getToken() == "::";
4726 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4727 OperandVector
&Operands
,
4729 uint64_t &ErrorInfo
,
4730 bool MatchingInlineAsm
) {
4732 unsigned Result
= Match_Success
;
4733 for (auto Variant
: getMatchedVariants()) {
4735 auto R
= MatchInstructionImpl(Operands
, Inst
, EI
, MatchingInlineAsm
,
4737 // We order match statuses from least to most specific. We use most specific
4738 // status as resulting
4739 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4740 if ((R
== Match_Success
) ||
4741 (R
== Match_PreferE32
) ||
4742 (R
== Match_MissingFeature
&& Result
!= Match_PreferE32
) ||
4743 (R
== Match_InvalidOperand
&& Result
!= Match_MissingFeature
4744 && Result
!= Match_PreferE32
) ||
4745 (R
== Match_MnemonicFail
&& Result
!= Match_InvalidOperand
4746 && Result
!= Match_MissingFeature
4747 && Result
!= Match_PreferE32
)) {
4751 if (R
== Match_Success
)
4755 if (Result
== Match_Success
) {
4756 if (!validateInstruction(Inst
, IDLoc
, Operands
)) {
4760 Out
.emitInstruction(Inst
, getSTI());
4764 StringRef Mnemo
= ((AMDGPUOperand
&)*Operands
[0]).getToken();
4765 if (checkUnsupportedInstruction(Mnemo
, IDLoc
)) {
4771 case Match_MissingFeature
:
4772 // It has been verified that the specified instruction
4773 // mnemonic is valid. A match was found but it requires
4774 // features which are not supported on this GPU.
4775 return Error(IDLoc
, "operands are not valid for this GPU or mode");
4777 case Match_InvalidOperand
: {
4778 SMLoc ErrorLoc
= IDLoc
;
4779 if (ErrorInfo
!= ~0ULL) {
4780 if (ErrorInfo
>= Operands
.size()) {
4781 return Error(IDLoc
, "too few operands for instruction");
4783 ErrorLoc
= ((AMDGPUOperand
&)*Operands
[ErrorInfo
]).getStartLoc();
4784 if (ErrorLoc
== SMLoc())
4787 if (isInvalidVOPDY(Operands
, ErrorInfo
))
4788 return Error(ErrorLoc
, "invalid VOPDY instruction");
4790 return Error(ErrorLoc
, "invalid operand for instruction");
4793 case Match_PreferE32
:
4794 return Error(IDLoc
, "internal error: instruction without _e64 suffix "
4795 "should be encoded as e32");
4796 case Match_MnemonicFail
:
4797 llvm_unreachable("Invalid instructions should have been handled already");
4799 llvm_unreachable("Implement any new match types added!");
4802 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret
) {
4804 if (!isToken(AsmToken::Integer
) && !isToken(AsmToken::Identifier
)) {
4807 if (getParser().parseAbsoluteExpression(Tmp
)) {
4810 Ret
= static_cast<uint32_t>(Tmp
);
4814 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major
,
4816 if (ParseAsAbsoluteExpression(Major
))
4817 return TokError("invalid major version");
4819 if (!trySkipToken(AsmToken::Comma
))
4820 return TokError("minor version number required, comma expected");
4822 if (ParseAsAbsoluteExpression(Minor
))
4823 return TokError("invalid minor version");
4828 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4829 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
4830 return TokError("directive only supported for amdgcn architecture");
4832 std::string TargetIDDirective
;
4833 SMLoc TargetStart
= getTok().getLoc();
4834 if (getParser().parseEscapedString(TargetIDDirective
))
4837 SMRange TargetRange
= SMRange(TargetStart
, getTok().getLoc());
4838 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective
)
4839 return getParser().Error(TargetRange
.Start
,
4840 (Twine(".amdgcn_target directive's target id ") +
4841 Twine(TargetIDDirective
) +
4842 Twine(" does not match the specified target id ") +
4843 Twine(getTargetStreamer().getTargetID()->toString())).str());
4848 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range
) {
4849 return Error(Range
.Start
, "value out of range", Range
);
4852 bool AMDGPUAsmParser::calculateGPRBlocks(
4853 const FeatureBitset
&Features
, bool VCCUsed
, bool FlatScrUsed
,
4854 bool XNACKUsed
, std::optional
<bool> EnableWavefrontSize32
,
4855 unsigned NextFreeVGPR
, SMRange VGPRRange
, unsigned NextFreeSGPR
,
4856 SMRange SGPRRange
, unsigned &VGPRBlocks
, unsigned &SGPRBlocks
) {
4857 // TODO(scott.linder): These calculations are duplicated from
4858 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4859 IsaVersion Version
= getIsaVersion(getSTI().getCPU());
4861 unsigned NumVGPRs
= NextFreeVGPR
;
4862 unsigned NumSGPRs
= NextFreeSGPR
;
4864 if (Version
.Major
>= 10)
4867 unsigned MaxAddressableNumSGPRs
=
4868 IsaInfo::getAddressableNumSGPRs(&getSTI());
4870 if (Version
.Major
>= 8 && !Features
.test(FeatureSGPRInitBug
) &&
4871 NumSGPRs
> MaxAddressableNumSGPRs
)
4872 return OutOfRangeError(SGPRRange
);
4875 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed
, FlatScrUsed
, XNACKUsed
);
4877 if ((Version
.Major
<= 7 || Features
.test(FeatureSGPRInitBug
)) &&
4878 NumSGPRs
> MaxAddressableNumSGPRs
)
4879 return OutOfRangeError(SGPRRange
);
4881 if (Features
.test(FeatureSGPRInitBug
))
4882 NumSGPRs
= IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
;
4886 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs
, EnableWavefrontSize32
);
4887 SGPRBlocks
= IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs
);
4892 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4893 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
4894 return TokError("directive only supported for amdgcn architecture");
4896 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
)
4897 return TokError("directive only supported for amdhsa OS");
4899 StringRef KernelName
;
4900 if (getParser().parseIdentifier(KernelName
))
4903 kernel_descriptor_t KD
= getDefaultAmdhsaKernelDescriptor(&getSTI());
4907 IsaVersion IVersion
= getIsaVersion(getSTI().getCPU());
4910 uint64_t NextFreeVGPR
= 0;
4911 uint64_t AccumOffset
= 0;
4912 uint64_t SharedVGPRCount
= 0;
4914 uint64_t NextFreeSGPR
= 0;
4916 // Count the number of user SGPRs implied from the enabled feature bits.
4917 unsigned ImpliedUserSGPRCount
= 0;
4919 // Track if the asm explicitly contains the directive for the user SGPR
4921 std::optional
<unsigned> ExplicitUserSGPRCount
;
4922 bool ReserveVCC
= true;
4923 bool ReserveFlatScr
= true;
4924 std::optional
<bool> EnableWavefrontSize32
;
4927 while (trySkipToken(AsmToken::EndOfStatement
));
4930 SMRange IDRange
= getTok().getLocRange();
4931 if (!parseId(ID
, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4934 if (ID
== ".end_amdhsa_kernel")
4937 if (!Seen
.insert(ID
).second
)
4938 return TokError(".amdhsa_ directives cannot be repeated");
4940 SMLoc ValStart
= getLoc();
4942 if (getParser().parseAbsoluteExpression(IVal
))
4944 SMLoc ValEnd
= getLoc();
4945 SMRange ValRange
= SMRange(ValStart
, ValEnd
);
4948 return OutOfRangeError(ValRange
);
4950 uint64_t Val
= IVal
;
4952 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4953 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4954 return OutOfRangeError(RANGE); \
4955 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4957 if (ID
== ".amdhsa_group_segment_fixed_size") {
4958 if (!isUInt
<sizeof(KD
.group_segment_fixed_size
) * CHAR_BIT
>(Val
))
4959 return OutOfRangeError(ValRange
);
4960 KD
.group_segment_fixed_size
= Val
;
4961 } else if (ID
== ".amdhsa_private_segment_fixed_size") {
4962 if (!isUInt
<sizeof(KD
.private_segment_fixed_size
) * CHAR_BIT
>(Val
))
4963 return OutOfRangeError(ValRange
);
4964 KD
.private_segment_fixed_size
= Val
;
4965 } else if (ID
== ".amdhsa_kernarg_size") {
4966 if (!isUInt
<sizeof(KD
.kernarg_size
) * CHAR_BIT
>(Val
))
4967 return OutOfRangeError(ValRange
);
4968 KD
.kernarg_size
= Val
;
4969 } else if (ID
== ".amdhsa_user_sgpr_count") {
4970 ExplicitUserSGPRCount
= Val
;
4971 } else if (ID
== ".amdhsa_user_sgpr_private_segment_buffer") {
4972 if (hasArchitectedFlatScratch())
4973 return Error(IDRange
.Start
,
4974 "directive is not supported with architected flat scratch",
4976 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4977 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
,
4980 ImpliedUserSGPRCount
+= 4;
4981 } else if (ID
== ".amdhsa_user_sgpr_dispatch_ptr") {
4982 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4983 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
, Val
,
4986 ImpliedUserSGPRCount
+= 2;
4987 } else if (ID
== ".amdhsa_user_sgpr_queue_ptr") {
4988 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4989 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
, Val
,
4992 ImpliedUserSGPRCount
+= 2;
4993 } else if (ID
== ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4994 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4995 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
,
4998 ImpliedUserSGPRCount
+= 2;
4999 } else if (ID
== ".amdhsa_user_sgpr_dispatch_id") {
5000 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
5001 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
, Val
,
5004 ImpliedUserSGPRCount
+= 2;
5005 } else if (ID
== ".amdhsa_user_sgpr_flat_scratch_init") {
5006 if (hasArchitectedFlatScratch())
5007 return Error(IDRange
.Start
,
5008 "directive is not supported with architected flat scratch",
5010 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
5011 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
, Val
,
5014 ImpliedUserSGPRCount
+= 2;
5015 } else if (ID
== ".amdhsa_user_sgpr_private_segment_size") {
5016 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
5017 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
,
5020 ImpliedUserSGPRCount
+= 1;
5021 } else if (ID
== ".amdhsa_wavefront_size32") {
5022 if (IVersion
.Major
< 10)
5023 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
5024 EnableWavefrontSize32
= Val
;
5025 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
5026 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
,
5028 } else if (ID
== ".amdhsa_uses_dynamic_stack") {
5029 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
5030 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK
, Val
, ValRange
);
5031 } else if (ID
== ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5032 if (hasArchitectedFlatScratch())
5033 return Error(IDRange
.Start
,
5034 "directive is not supported with architected flat scratch",
5036 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5037 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT
, Val
, ValRange
);
5038 } else if (ID
== ".amdhsa_enable_private_segment") {
5039 if (!hasArchitectedFlatScratch())
5042 "directive is not supported without architected flat scratch",
5044 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5045 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT
, Val
, ValRange
);
5046 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_x") {
5047 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5048 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X
, Val
,
5050 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_y") {
5051 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5052 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y
, Val
,
5054 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_z") {
5055 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5056 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z
, Val
,
5058 } else if (ID
== ".amdhsa_system_sgpr_workgroup_info") {
5059 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5060 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO
, Val
,
5062 } else if (ID
== ".amdhsa_system_vgpr_workitem_id") {
5063 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5064 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID
, Val
,
5066 } else if (ID
== ".amdhsa_next_free_vgpr") {
5067 VGPRRange
= ValRange
;
5069 } else if (ID
== ".amdhsa_next_free_sgpr") {
5070 SGPRRange
= ValRange
;
5072 } else if (ID
== ".amdhsa_accum_offset") {
5074 return Error(IDRange
.Start
, "directive requires gfx90a+", IDRange
);
5076 } else if (ID
== ".amdhsa_reserve_vcc") {
5077 if (!isUInt
<1>(Val
))
5078 return OutOfRangeError(ValRange
);
5080 } else if (ID
== ".amdhsa_reserve_flat_scratch") {
5081 if (IVersion
.Major
< 7)
5082 return Error(IDRange
.Start
, "directive requires gfx7+", IDRange
);
5083 if (hasArchitectedFlatScratch())
5084 return Error(IDRange
.Start
,
5085 "directive is not supported with architected flat scratch",
5087 if (!isUInt
<1>(Val
))
5088 return OutOfRangeError(ValRange
);
5089 ReserveFlatScr
= Val
;
5090 } else if (ID
== ".amdhsa_reserve_xnack_mask") {
5091 if (IVersion
.Major
< 8)
5092 return Error(IDRange
.Start
, "directive requires gfx8+", IDRange
);
5093 if (!isUInt
<1>(Val
))
5094 return OutOfRangeError(ValRange
);
5095 if (Val
!= getTargetStreamer().getTargetID()->isXnackOnOrAny())
5096 return getParser().Error(IDRange
.Start
, ".amdhsa_reserve_xnack_mask does not match target id",
5098 } else if (ID
== ".amdhsa_float_round_mode_32") {
5099 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
5100 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32
, Val
, ValRange
);
5101 } else if (ID
== ".amdhsa_float_round_mode_16_64") {
5102 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
5103 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64
, Val
, ValRange
);
5104 } else if (ID
== ".amdhsa_float_denorm_mode_32") {
5105 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
5106 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32
, Val
, ValRange
);
5107 } else if (ID
== ".amdhsa_float_denorm_mode_16_64") {
5108 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
5109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64
, Val
,
5111 } else if (ID
== ".amdhsa_dx10_clamp") {
5112 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
5113 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP
, Val
, ValRange
);
5114 } else if (ID
== ".amdhsa_ieee_mode") {
5115 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE
,
5117 } else if (ID
== ".amdhsa_fp16_overflow") {
5118 if (IVersion
.Major
< 9)
5119 return Error(IDRange
.Start
, "directive requires gfx9+", IDRange
);
5120 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FP16_OVFL
, Val
,
5122 } else if (ID
== ".amdhsa_tg_split") {
5124 return Error(IDRange
.Start
, "directive requires gfx90a+", IDRange
);
5125 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc3
, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT
, Val
,
5127 } else if (ID
== ".amdhsa_workgroup_processor_mode") {
5128 if (IVersion
.Major
< 10)
5129 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
5130 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_WGP_MODE
, Val
,
5132 } else if (ID
== ".amdhsa_memory_ordered") {
5133 if (IVersion
.Major
< 10)
5134 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
5135 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_MEM_ORDERED
, Val
,
5137 } else if (ID
== ".amdhsa_forward_progress") {
5138 if (IVersion
.Major
< 10)
5139 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
5140 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FWD_PROGRESS
, Val
,
5142 } else if (ID
== ".amdhsa_shared_vgpr_count") {
5143 if (IVersion
.Major
< 10)
5144 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
5145 SharedVGPRCount
= Val
;
5146 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc3
,
5147 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT
, Val
,
5149 } else if (ID
== ".amdhsa_exception_fp_ieee_invalid_op") {
5151 KD
.compute_pgm_rsrc2
,
5152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION
, Val
,
5154 } else if (ID
== ".amdhsa_exception_fp_denorm_src") {
5155 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5156 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE
,
5158 } else if (ID
== ".amdhsa_exception_fp_ieee_div_zero") {
5160 KD
.compute_pgm_rsrc2
,
5161 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO
, Val
,
5163 } else if (ID
== ".amdhsa_exception_fp_ieee_overflow") {
5164 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5165 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW
,
5167 } else if (ID
== ".amdhsa_exception_fp_ieee_underflow") {
5168 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5169 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW
,
5171 } else if (ID
== ".amdhsa_exception_fp_ieee_inexact") {
5172 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5173 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT
,
5175 } else if (ID
== ".amdhsa_exception_int_div_zero") {
5176 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
5177 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO
,
5180 return Error(IDRange
.Start
, "unknown .amdhsa_kernel directive", IDRange
);
5183 #undef PARSE_BITS_ENTRY
5186 if (!Seen
.contains(".amdhsa_next_free_vgpr"))
5187 return TokError(".amdhsa_next_free_vgpr directive is required");
5189 if (!Seen
.contains(".amdhsa_next_free_sgpr"))
5190 return TokError(".amdhsa_next_free_sgpr directive is required");
5192 unsigned VGPRBlocks
;
5193 unsigned SGPRBlocks
;
5194 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC
, ReserveFlatScr
,
5195 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5196 EnableWavefrontSize32
, NextFreeVGPR
,
5197 VGPRRange
, NextFreeSGPR
, SGPRRange
, VGPRBlocks
,
5201 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH
>(
5203 return OutOfRangeError(VGPRRange
);
5204 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
5205 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT
, VGPRBlocks
);
5207 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH
>(
5209 return OutOfRangeError(SGPRRange
);
5210 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
5211 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
,
5214 if (ExplicitUserSGPRCount
&& ImpliedUserSGPRCount
> *ExplicitUserSGPRCount
)
5215 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5216 "enabled user SGPRs");
5218 unsigned UserSGPRCount
=
5219 ExplicitUserSGPRCount
? *ExplicitUserSGPRCount
: ImpliedUserSGPRCount
;
5221 if (!isUInt
<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH
>(UserSGPRCount
))
5222 return TokError("too many user SGPRs enabled");
5223 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc2
, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT
,
5227 if (!Seen
.contains(".amdhsa_accum_offset"))
5228 return TokError(".amdhsa_accum_offset directive is required");
5229 if (AccumOffset
< 4 || AccumOffset
> 256 || (AccumOffset
& 3))
5230 return TokError("accum_offset should be in range [4..256] in "
5232 if (AccumOffset
> alignTo(std::max((uint64_t)1, NextFreeVGPR
), 4))
5233 return TokError("accum_offset exceeds total VGPR allocation");
5234 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc3
, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET
,
5235 (AccumOffset
/ 4 - 1));
5238 if (IVersion
.Major
>= 10) {
5239 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5240 if (SharedVGPRCount
&& EnableWavefrontSize32
&& *EnableWavefrontSize32
) {
5241 return TokError("shared_vgpr_count directive not valid on "
5242 "wavefront size 32");
5244 if (SharedVGPRCount
* 2 + VGPRBlocks
> 63) {
5245 return TokError("shared_vgpr_count*2 + "
5246 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5251 getTargetStreamer().EmitAmdhsaKernelDescriptor(
5252 getSTI(), KernelName
, KD
, NextFreeVGPR
, NextFreeSGPR
, ReserveVCC
,
5253 ReserveFlatScr
, AMDGPU::getAmdhsaCodeObjectVersion());
5257 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5261 if (ParseDirectiveMajorMinor(Major
, Minor
))
5264 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major
, Minor
);
5268 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5272 StringRef VendorName
;
5275 // If this directive has no arguments, then use the ISA version for the
5277 if (isToken(AsmToken::EndOfStatement
)) {
5278 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
5279 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA
.Major
, ISA
.Minor
,
5285 if (ParseDirectiveMajorMinor(Major
, Minor
))
5288 if (!trySkipToken(AsmToken::Comma
))
5289 return TokError("stepping version number required, comma expected");
5291 if (ParseAsAbsoluteExpression(Stepping
))
5292 return TokError("invalid stepping version");
5294 if (!trySkipToken(AsmToken::Comma
))
5295 return TokError("vendor name required, comma expected");
5297 if (!parseString(VendorName
, "invalid vendor name"))
5300 if (!trySkipToken(AsmToken::Comma
))
5301 return TokError("arch name required, comma expected");
5303 if (!parseString(ArchName
, "invalid arch name"))
5306 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major
, Minor
, Stepping
,
5307 VendorName
, ArchName
);
5311 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID
,
5312 amd_kernel_code_t
&Header
) {
5313 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5314 // assembly for backwards compatibility.
5315 if (ID
== "max_scratch_backing_memory_byte_size") {
5316 Parser
.eatToEndOfStatement();
5320 SmallString
<40> ErrStr
;
5321 raw_svector_ostream
Err(ErrStr
);
5322 if (!parseAmdKernelCodeField(ID
, getParser(), Header
, Err
)) {
5323 return TokError(Err
.str());
5327 if (ID
== "enable_wavefront_size32") {
5328 if (Header
.code_properties
& AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
) {
5330 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5331 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
5332 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5334 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
5335 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5339 if (ID
== "wavefront_size") {
5340 if (Header
.wavefront_size
== 5) {
5342 return TokError("wavefront_size=5 is only allowed on GFX10+");
5343 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
5344 return TokError("wavefront_size=5 requires +WavefrontSize32");
5345 } else if (Header
.wavefront_size
== 6) {
5346 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
5347 return TokError("wavefront_size=6 requires +WavefrontSize64");
5351 if (ID
== "enable_wgp_mode") {
5352 if (G_00B848_WGP_MODE(Header
.compute_pgm_resource_registers
) &&
5354 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5357 if (ID
== "enable_mem_ordered") {
5358 if (G_00B848_MEM_ORDERED(Header
.compute_pgm_resource_registers
) &&
5360 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5363 if (ID
== "enable_fwd_progress") {
5364 if (G_00B848_FWD_PROGRESS(Header
.compute_pgm_resource_registers
) &&
5366 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5372 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5373 amd_kernel_code_t Header
;
5374 AMDGPU::initDefaultAMDKernelCodeT(Header
, &getSTI());
5377 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5378 // will set the current token to EndOfStatement.
5379 while(trySkipToken(AsmToken::EndOfStatement
));
5382 if (!parseId(ID
, "expected value identifier or .end_amd_kernel_code_t"))
5385 if (ID
== ".end_amd_kernel_code_t")
5388 if (ParseAMDKernelCodeTValue(ID
, Header
))
5392 getTargetStreamer().EmitAMDKernelCodeT(Header
);
5397 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5398 StringRef KernelName
;
5399 if (!parseId(KernelName
, "expected symbol name"))
5402 getTargetStreamer().EmitAMDGPUSymbolType(KernelName
,
5403 ELF::STT_AMDGPU_HSA_KERNEL
);
5405 KernelScope
.initialize(getContext());
5409 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5410 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
) {
5411 return Error(getLoc(),
5412 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5416 auto TargetIDDirective
= getLexer().getTok().getStringContents();
5417 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective
)
5418 return Error(getParser().getTok().getLoc(), "target id must match options");
5420 getTargetStreamer().EmitISAVersion();
5426 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5427 const char *AssemblerDirectiveBegin
;
5428 const char *AssemblerDirectiveEnd
;
5429 std::tie(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
) =
5430 isHsaAbiVersion3AndAbove(&getSTI())
5431 ? std::pair(HSAMD::V3::AssemblerDirectiveBegin
,
5432 HSAMD::V3::AssemblerDirectiveEnd
)
5433 : std::pair(HSAMD::AssemblerDirectiveBegin
,
5434 HSAMD::AssemblerDirectiveEnd
);
5436 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
) {
5437 return Error(getLoc(),
5438 (Twine(AssemblerDirectiveBegin
) + Twine(" directive is "
5439 "not available on non-amdhsa OSes")).str());
5442 std::string HSAMetadataString
;
5443 if (ParseToEndDirective(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
,
5447 if (isHsaAbiVersion3AndAbove(&getSTI())) {
5448 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString
))
5449 return Error(getLoc(), "invalid HSA metadata");
5451 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString
))
5452 return Error(getLoc(), "invalid HSA metadata");
5458 /// Common code to parse out a block of text (typically YAML) between start and
5460 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin
,
5461 const char *AssemblerDirectiveEnd
,
5462 std::string
&CollectString
) {
5464 raw_string_ostream
CollectStream(CollectString
);
5466 getLexer().setSkipSpace(false);
5468 bool FoundEnd
= false;
5469 while (!isToken(AsmToken::Eof
)) {
5470 while (isToken(AsmToken::Space
)) {
5471 CollectStream
<< getTokenStr();
5475 if (trySkipId(AssemblerDirectiveEnd
)) {
5480 CollectStream
<< Parser
.parseStringToEndOfStatement()
5481 << getContext().getAsmInfo()->getSeparatorString();
5483 Parser
.eatToEndOfStatement();
5486 getLexer().setSkipSpace(true);
5488 if (isToken(AsmToken::Eof
) && !FoundEnd
) {
5489 return TokError(Twine("expected directive ") +
5490 Twine(AssemblerDirectiveEnd
) + Twine(" not found"));
5493 CollectStream
.flush();
5497 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5498 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5500 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin
,
5501 AMDGPU::PALMD::AssemblerDirectiveEnd
, String
))
5504 auto PALMetadata
= getTargetStreamer().getPALMetadata();
5505 if (!PALMetadata
->setFromString(String
))
5506 return Error(getLoc(), "invalid PAL metadata");
5510 /// Parse the assembler directive for old linear-format PAL metadata.
5511 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5512 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL
) {
5513 return Error(getLoc(),
5514 (Twine(PALMD::AssemblerDirective
) + Twine(" directive is "
5515 "not available on non-amdpal OSes")).str());
5518 auto PALMetadata
= getTargetStreamer().getPALMetadata();
5519 PALMetadata
->setLegacy();
5521 uint32_t Key
, Value
;
5522 if (ParseAsAbsoluteExpression(Key
)) {
5523 return TokError(Twine("invalid value in ") +
5524 Twine(PALMD::AssemblerDirective
));
5526 if (!trySkipToken(AsmToken::Comma
)) {
5527 return TokError(Twine("expected an even number of values in ") +
5528 Twine(PALMD::AssemblerDirective
));
5530 if (ParseAsAbsoluteExpression(Value
)) {
5531 return TokError(Twine("invalid value in ") +
5532 Twine(PALMD::AssemblerDirective
));
5534 PALMetadata
->setRegister(Key
, Value
);
5535 if (!trySkipToken(AsmToken::Comma
))
5541 /// ParseDirectiveAMDGPULDS
5542 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5543 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5544 if (getParser().checkForValidSection())
5548 SMLoc NameLoc
= getLoc();
5549 if (getParser().parseIdentifier(Name
))
5550 return TokError("expected identifier in directive");
5552 MCSymbol
*Symbol
= getContext().getOrCreateSymbol(Name
);
5553 if (getParser().parseComma())
5556 unsigned LocalMemorySize
= AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5559 SMLoc SizeLoc
= getLoc();
5560 if (getParser().parseAbsoluteExpression(Size
))
5563 return Error(SizeLoc
, "size must be non-negative");
5564 if (Size
> LocalMemorySize
)
5565 return Error(SizeLoc
, "size is too large");
5567 int64_t Alignment
= 4;
5568 if (trySkipToken(AsmToken::Comma
)) {
5569 SMLoc AlignLoc
= getLoc();
5570 if (getParser().parseAbsoluteExpression(Alignment
))
5572 if (Alignment
< 0 || !isPowerOf2_64(Alignment
))
5573 return Error(AlignLoc
, "alignment must be a power of two");
5575 // Alignment larger than the size of LDS is possible in theory, as long
5576 // as the linker manages to place to symbol at address 0, but we do want
5577 // to make sure the alignment fits nicely into a 32-bit integer.
5578 if (Alignment
>= 1u << 31)
5579 return Error(AlignLoc
, "alignment is too large");
5585 Symbol
->redefineIfPossible();
5586 if (!Symbol
->isUndefined())
5587 return Error(NameLoc
, "invalid symbol redefinition");
5589 getTargetStreamer().emitAMDGPULDS(Symbol
, Size
, Align(Alignment
));
5593 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID
) {
5594 StringRef IDVal
= DirectiveID
.getString();
5596 if (isHsaAbiVersion3AndAbove(&getSTI())) {
5597 if (IDVal
== ".amdhsa_kernel")
5598 return ParseDirectiveAMDHSAKernel();
5600 // TODO: Restructure/combine with PAL metadata directive.
5601 if (IDVal
== AMDGPU::HSAMD::V3::AssemblerDirectiveBegin
)
5602 return ParseDirectiveHSAMetadata();
5604 if (IDVal
== ".hsa_code_object_version")
5605 return ParseDirectiveHSACodeObjectVersion();
5607 if (IDVal
== ".hsa_code_object_isa")
5608 return ParseDirectiveHSACodeObjectISA();
5610 if (IDVal
== ".amd_kernel_code_t")
5611 return ParseDirectiveAMDKernelCodeT();
5613 if (IDVal
== ".amdgpu_hsa_kernel")
5614 return ParseDirectiveAMDGPUHsaKernel();
5616 if (IDVal
== ".amd_amdgpu_isa")
5617 return ParseDirectiveISAVersion();
5619 if (IDVal
== AMDGPU::HSAMD::AssemblerDirectiveBegin
)
5620 return ParseDirectiveHSAMetadata();
5623 if (IDVal
== ".amdgcn_target")
5624 return ParseDirectiveAMDGCNTarget();
5626 if (IDVal
== ".amdgpu_lds")
5627 return ParseDirectiveAMDGPULDS();
5629 if (IDVal
== PALMD::AssemblerDirectiveBegin
)
5630 return ParseDirectivePALMetadataBegin();
5632 if (IDVal
== PALMD::AssemblerDirective
)
5633 return ParseDirectivePALMetadata();
5638 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo
&MRI
,
5641 if (MRI
.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15
, RegNo
))
5642 return isGFX9Plus();
5644 // GFX10+ has 2 more SGPRs 104 and 105.
5645 if (MRI
.regsOverlap(AMDGPU::SGPR104_SGPR105
, RegNo
))
5646 return hasSGPR104_SGPR105();
5649 case AMDGPU::SRC_SHARED_BASE_LO
:
5650 case AMDGPU::SRC_SHARED_BASE
:
5651 case AMDGPU::SRC_SHARED_LIMIT_LO
:
5652 case AMDGPU::SRC_SHARED_LIMIT
:
5653 case AMDGPU::SRC_PRIVATE_BASE_LO
:
5654 case AMDGPU::SRC_PRIVATE_BASE
:
5655 case AMDGPU::SRC_PRIVATE_LIMIT_LO
:
5656 case AMDGPU::SRC_PRIVATE_LIMIT
:
5657 return isGFX9Plus();
5658 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
5659 return isGFX9Plus() && !isGFX11Plus();
5661 case AMDGPU::TBA_LO
:
5662 case AMDGPU::TBA_HI
:
5664 case AMDGPU::TMA_LO
:
5665 case AMDGPU::TMA_HI
:
5666 return !isGFX9Plus();
5667 case AMDGPU::XNACK_MASK
:
5668 case AMDGPU::XNACK_MASK_LO
:
5669 case AMDGPU::XNACK_MASK_HI
:
5670 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5671 case AMDGPU::SGPR_NULL
:
5672 return isGFX10Plus();
5680 if (isSI() || isGFX10Plus()) {
5681 // No flat_scr on SI.
5682 // On GFX10Plus flat scratch is not a valid register operand and can only be
5683 // accessed with s_setreg/s_getreg.
5685 case AMDGPU::FLAT_SCR
:
5686 case AMDGPU::FLAT_SCR_LO
:
5687 case AMDGPU::FLAT_SCR_HI
:
5694 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5696 if (MRI
.regsOverlap(AMDGPU::SGPR102_SGPR103
, RegNo
))
5697 return hasSGPR102_SGPR103();
5702 ParseStatus
AMDGPUAsmParser::parseOperand(OperandVector
&Operands
,
5705 ParseStatus Res
= parseVOPD(Operands
);
5706 if (Res
.isSuccess() || Res
.isFailure() || isToken(AsmToken::EndOfStatement
))
5709 // Try to parse with a custom parser
5710 Res
= MatchOperandParserImpl(Operands
, Mnemonic
);
5712 // If we successfully parsed the operand or if there as an error parsing,
5715 // If we are parsing after we reach EndOfStatement then this means we
5716 // are appending default values to the Operands list. This is only done
5717 // by custom parser, so we shouldn't continue on to the generic parsing.
5718 if (Res
.isSuccess() || Res
.isFailure() || isToken(AsmToken::EndOfStatement
))
5722 SMLoc LBraceLoc
= getLoc();
5723 if (Mode
== OperandMode_NSA
&& trySkipToken(AsmToken::LBrac
)) {
5724 unsigned Prefix
= Operands
.size();
5727 auto Loc
= getLoc();
5728 Res
= parseReg(Operands
);
5729 if (Res
.isNoMatch())
5730 Error(Loc
, "expected a register");
5731 if (!Res
.isSuccess())
5732 return ParseStatus::Failure
;
5734 RBraceLoc
= getLoc();
5735 if (trySkipToken(AsmToken::RBrac
))
5738 if (!skipToken(AsmToken::Comma
,
5739 "expected a comma or a closing square bracket"))
5740 return ParseStatus::Failure
;
5743 if (Operands
.size() - Prefix
> 1) {
5744 Operands
.insert(Operands
.begin() + Prefix
,
5745 AMDGPUOperand::CreateToken(this, "[", LBraceLoc
));
5746 Operands
.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc
));
5749 return ParseStatus::Success
;
5752 return parseRegOrImm(Operands
);
5755 StringRef
AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name
) {
5756 // Clear any forced encodings from the previous instruction.
5757 setForcedEncodingSize(0);
5758 setForcedDPP(false);
5759 setForcedSDWA(false);
5761 if (Name
.endswith("_e64_dpp")) {
5763 setForcedEncodingSize(64);
5764 return Name
.substr(0, Name
.size() - 8);
5765 } else if (Name
.endswith("_e64")) {
5766 setForcedEncodingSize(64);
5767 return Name
.substr(0, Name
.size() - 4);
5768 } else if (Name
.endswith("_e32")) {
5769 setForcedEncodingSize(32);
5770 return Name
.substr(0, Name
.size() - 4);
5771 } else if (Name
.endswith("_dpp")) {
5773 return Name
.substr(0, Name
.size() - 4);
5774 } else if (Name
.endswith("_sdwa")) {
5775 setForcedSDWA(true);
5776 return Name
.substr(0, Name
.size() - 5);
5781 static void applyMnemonicAliases(StringRef
&Mnemonic
,
5782 const FeatureBitset
&Features
,
5783 unsigned VariantID
);
5785 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo
&Info
,
5787 SMLoc NameLoc
, OperandVector
&Operands
) {
5788 // Add the instruction mnemonic
5789 Name
= parseMnemonicSuffix(Name
);
5791 // If the target architecture uses MnemonicAlias, call it here to parse
5792 // operands correctly.
5793 applyMnemonicAliases(Name
, getAvailableFeatures(), 0);
5795 Operands
.push_back(AMDGPUOperand::CreateToken(this, Name
, NameLoc
));
5797 bool IsMIMG
= Name
.startswith("image_");
5799 while (!trySkipToken(AsmToken::EndOfStatement
)) {
5800 OperandMode Mode
= OperandMode_Default
;
5801 if (IsMIMG
&& isGFX10Plus() && Operands
.size() == 2)
5802 Mode
= OperandMode_NSA
;
5803 ParseStatus Res
= parseOperand(Operands
, Name
, Mode
);
5805 if (!Res
.isSuccess()) {
5806 checkUnsupportedInstruction(Name
, NameLoc
);
5807 if (!Parser
.hasPendingError()) {
5808 // FIXME: use real operand location rather than the current location.
5809 StringRef Msg
= Res
.isFailure() ? "failed parsing operand."
5810 : "not a valid operand.";
5811 Error(getLoc(), Msg
);
5813 while (!trySkipToken(AsmToken::EndOfStatement
)) {
5819 // Eat the comma or space if there is one.
5820 trySkipToken(AsmToken::Comma
);
5826 //===----------------------------------------------------------------------===//
5827 // Utility functions
5828 //===----------------------------------------------------------------------===//
5830 ParseStatus
AMDGPUAsmParser::parseTokenOp(StringRef Name
,
5831 OperandVector
&Operands
) {
5833 if (!trySkipId(Name
))
5834 return ParseStatus::NoMatch
;
5836 Operands
.push_back(AMDGPUOperand::CreateToken(this, Name
, S
));
5837 return ParseStatus::Success
;
5840 ParseStatus
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
,
5843 if (!trySkipId(Prefix
, AsmToken::Colon
))
5844 return ParseStatus::NoMatch
;
5846 return parseExpr(IntVal
) ? ParseStatus::Success
: ParseStatus::Failure
;
5849 ParseStatus
AMDGPUAsmParser::parseIntWithPrefix(
5850 const char *Prefix
, OperandVector
&Operands
, AMDGPUOperand::ImmTy ImmTy
,
5851 std::function
<bool(int64_t &)> ConvertResult
) {
5855 ParseStatus Res
= parseIntWithPrefix(Prefix
, Value
);
5856 if (!Res
.isSuccess())
5859 if (ConvertResult
&& !ConvertResult(Value
)) {
5860 Error(S
, "invalid " + StringRef(Prefix
) + " value.");
5863 Operands
.push_back(AMDGPUOperand::CreateImm(this, Value
, S
, ImmTy
));
5864 return ParseStatus::Success
;
5867 ParseStatus
AMDGPUAsmParser::parseOperandArrayWithPrefix(
5868 const char *Prefix
, OperandVector
&Operands
, AMDGPUOperand::ImmTy ImmTy
,
5869 bool (*ConvertResult
)(int64_t &)) {
5871 if (!trySkipId(Prefix
, AsmToken::Colon
))
5872 return ParseStatus::NoMatch
;
5874 if (!skipToken(AsmToken::LBrac
, "expected a left square bracket"))
5875 return ParseStatus::Failure
;
5878 const unsigned MaxSize
= 4;
5880 // FIXME: How to verify the number of elements matches the number of src
5882 for (int I
= 0; ; ++I
) {
5884 SMLoc Loc
= getLoc();
5886 return ParseStatus::Failure
;
5888 if (Op
!= 0 && Op
!= 1)
5889 return Error(Loc
, "invalid " + StringRef(Prefix
) + " value.");
5893 if (trySkipToken(AsmToken::RBrac
))
5896 if (I
+ 1 == MaxSize
)
5897 return Error(getLoc(), "expected a closing square bracket");
5899 if (!skipToken(AsmToken::Comma
, "expected a comma"))
5900 return ParseStatus::Failure
;
5903 Operands
.push_back(AMDGPUOperand::CreateImm(this, Val
, S
, ImmTy
));
5904 return ParseStatus::Success
;
5907 ParseStatus
AMDGPUAsmParser::parseNamedBit(StringRef Name
,
5908 OperandVector
&Operands
,
5909 AMDGPUOperand::ImmTy ImmTy
) {
5913 if (trySkipId(Name
)) {
5915 } else if (trySkipId("no", Name
)) {
5918 return ParseStatus::NoMatch
;
5921 if (Name
== "r128" && !hasMIMG_R128())
5922 return Error(S
, "r128 modifier is not supported on this GPU");
5923 if (Name
== "a16" && !hasA16())
5924 return Error(S
, "a16 modifier is not supported on this GPU");
5926 if (isGFX9() && ImmTy
== AMDGPUOperand::ImmTyA16
)
5927 ImmTy
= AMDGPUOperand::ImmTyR128A16
;
5929 Operands
.push_back(AMDGPUOperand::CreateImm(this, Bit
, S
, ImmTy
));
5930 return ParseStatus::Success
;
5933 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id
, StringRef Mnemo
,
5934 bool &Disabling
) const {
5935 Disabling
= Id
.consume_front("no");
5937 if (isGFX940() && !Mnemo
.startswith("s_")) {
5938 return StringSwitch
<unsigned>(Id
)
5939 .Case("nt", AMDGPU::CPol::NT
)
5940 .Case("sc0", AMDGPU::CPol::SC0
)
5941 .Case("sc1", AMDGPU::CPol::SC1
)
5945 return StringSwitch
<unsigned>(Id
)
5946 .Case("dlc", AMDGPU::CPol::DLC
)
5947 .Case("glc", AMDGPU::CPol::GLC
)
5948 .Case("scc", AMDGPU::CPol::SCC
)
5949 .Case("slc", AMDGPU::CPol::SLC
)
5953 ParseStatus
AMDGPUAsmParser::parseCPol(OperandVector
&Operands
) {
5954 StringRef Mnemo
= ((AMDGPUOperand
&)*Operands
[0]).getToken();
5955 SMLoc OpLoc
= getLoc();
5956 unsigned Enabled
= 0, Seen
= 0;
5960 unsigned CPol
= getCPolKind(getId(), Mnemo
, Disabling
);
5966 if (!isGFX10Plus() && CPol
== AMDGPU::CPol::DLC
)
5967 return Error(S
, "dlc modifier is not supported on this GPU");
5969 if (!isGFX90A() && CPol
== AMDGPU::CPol::SCC
)
5970 return Error(S
, "scc modifier is not supported on this GPU");
5973 return Error(S
, "duplicate cache policy modifier");
5982 return ParseStatus::NoMatch
;
5985 AMDGPUOperand::CreateImm(this, Enabled
, OpLoc
, AMDGPUOperand::ImmTyCPol
));
5986 return ParseStatus::Success
;
5989 static void addOptionalImmOperand(
5990 MCInst
& Inst
, const OperandVector
& Operands
,
5991 AMDGPUAsmParser::OptionalImmIndexMap
& OptionalIdx
,
5992 AMDGPUOperand::ImmTy ImmT
,
5993 int64_t Default
= 0) {
5994 auto i
= OptionalIdx
.find(ImmT
);
5995 if (i
!= OptionalIdx
.end()) {
5996 unsigned Idx
= i
->second
;
5997 ((AMDGPUOperand
&)*Operands
[Idx
]).addImmOperands(Inst
, 1);
5999 Inst
.addOperand(MCOperand::createImm(Default
));
6003 ParseStatus
AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix
,
6006 if (!trySkipId(Prefix
, AsmToken::Colon
))
6007 return ParseStatus::NoMatch
;
6009 StringLoc
= getLoc();
6010 return parseId(Value
, "expected an identifier") ? ParseStatus::Success
6011 : ParseStatus::Failure
;
6014 //===----------------------------------------------------------------------===//
6016 //===----------------------------------------------------------------------===//
6018 bool AMDGPUAsmParser::tryParseFmt(const char *Pref
,
6022 SMLoc Loc
= getLoc();
6024 auto Res
= parseIntWithPrefix(Pref
, Val
);
6025 if (Res
.isFailure())
6027 if (Res
.isNoMatch())
6030 if (Val
< 0 || Val
> MaxVal
) {
6031 Error(Loc
, Twine("out of range ", StringRef(Pref
)));
6039 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6040 // values to live in a joint format operand in the MCInst encoding.
6041 ParseStatus
AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format
) {
6042 using namespace llvm::AMDGPU::MTBUFFormat
;
6044 int64_t Dfmt
= DFMT_UNDEF
;
6045 int64_t Nfmt
= NFMT_UNDEF
;
6047 // dfmt and nfmt can appear in either order, and each is optional.
6048 for (int I
= 0; I
< 2; ++I
) {
6049 if (Dfmt
== DFMT_UNDEF
&& !tryParseFmt("dfmt", DFMT_MAX
, Dfmt
))
6050 return ParseStatus::Failure
;
6052 if (Nfmt
== NFMT_UNDEF
&& !tryParseFmt("nfmt", NFMT_MAX
, Nfmt
))
6053 return ParseStatus::Failure
;
6055 // Skip optional comma between dfmt/nfmt
6056 // but guard against 2 commas following each other.
6057 if ((Dfmt
== DFMT_UNDEF
) != (Nfmt
== NFMT_UNDEF
) &&
6058 !peekToken().is(AsmToken::Comma
)) {
6059 trySkipToken(AsmToken::Comma
);
6063 if (Dfmt
== DFMT_UNDEF
&& Nfmt
== NFMT_UNDEF
)
6064 return ParseStatus::NoMatch
;
6066 Dfmt
= (Dfmt
== DFMT_UNDEF
) ? DFMT_DEFAULT
: Dfmt
;
6067 Nfmt
= (Nfmt
== NFMT_UNDEF
) ? NFMT_DEFAULT
: Nfmt
;
6069 Format
= encodeDfmtNfmt(Dfmt
, Nfmt
);
6070 return ParseStatus::Success
;
6073 ParseStatus
AMDGPUAsmParser::parseUfmt(int64_t &Format
) {
6074 using namespace llvm::AMDGPU::MTBUFFormat
;
6076 int64_t Fmt
= UFMT_UNDEF
;
6078 if (!tryParseFmt("format", UFMT_MAX
, Fmt
))
6079 return ParseStatus::Failure
;
6081 if (Fmt
== UFMT_UNDEF
)
6082 return ParseStatus::NoMatch
;
6085 return ParseStatus::Success
;
6088 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt
,
6090 StringRef FormatStr
,
6092 using namespace llvm::AMDGPU::MTBUFFormat
;
6095 Format
= getDfmt(FormatStr
);
6096 if (Format
!= DFMT_UNDEF
) {
6101 Format
= getNfmt(FormatStr
, getSTI());
6102 if (Format
!= NFMT_UNDEF
) {
6107 Error(Loc
, "unsupported format");
6111 ParseStatus
AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr
,
6114 using namespace llvm::AMDGPU::MTBUFFormat
;
6116 int64_t Dfmt
= DFMT_UNDEF
;
6117 int64_t Nfmt
= NFMT_UNDEF
;
6118 if (!matchDfmtNfmt(Dfmt
, Nfmt
, FormatStr
, FormatLoc
))
6119 return ParseStatus::Failure
;
6121 if (trySkipToken(AsmToken::Comma
)) {
6123 SMLoc Loc
= getLoc();
6124 if (!parseId(Str
, "expected a format string") ||
6125 !matchDfmtNfmt(Dfmt
, Nfmt
, Str
, Loc
))
6126 return ParseStatus::Failure
;
6127 if (Dfmt
== DFMT_UNDEF
)
6128 return Error(Loc
, "duplicate numeric format");
6129 if (Nfmt
== NFMT_UNDEF
)
6130 return Error(Loc
, "duplicate data format");
6133 Dfmt
= (Dfmt
== DFMT_UNDEF
) ? DFMT_DEFAULT
: Dfmt
;
6134 Nfmt
= (Nfmt
== NFMT_UNDEF
) ? NFMT_DEFAULT
: Nfmt
;
6136 if (isGFX10Plus()) {
6137 auto Ufmt
= convertDfmtNfmt2Ufmt(Dfmt
, Nfmt
, getSTI());
6138 if (Ufmt
== UFMT_UNDEF
)
6139 return Error(FormatLoc
, "unsupported format");
6142 Format
= encodeDfmtNfmt(Dfmt
, Nfmt
);
6145 return ParseStatus::Success
;
6148 ParseStatus
AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr
,
6151 using namespace llvm::AMDGPU::MTBUFFormat
;
6153 auto Id
= getUnifiedFormat(FormatStr
, getSTI());
6154 if (Id
== UFMT_UNDEF
)
6155 return ParseStatus::NoMatch
;
6158 return Error(Loc
, "unified format is not supported on this GPU");
6161 return ParseStatus::Success
;
6164 ParseStatus
AMDGPUAsmParser::parseNumericFormat(int64_t &Format
) {
6165 using namespace llvm::AMDGPU::MTBUFFormat
;
6166 SMLoc Loc
= getLoc();
6168 if (!parseExpr(Format
))
6169 return ParseStatus::Failure
;
6170 if (!isValidFormatEncoding(Format
, getSTI()))
6171 return Error(Loc
, "out of range format");
6173 return ParseStatus::Success
;
6176 ParseStatus
AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format
) {
6177 using namespace llvm::AMDGPU::MTBUFFormat
;
6179 if (!trySkipId("format", AsmToken::Colon
))
6180 return ParseStatus::NoMatch
;
6182 if (trySkipToken(AsmToken::LBrac
)) {
6183 StringRef FormatStr
;
6184 SMLoc Loc
= getLoc();
6185 if (!parseId(FormatStr
, "expected a format string"))
6186 return ParseStatus::Failure
;
6188 auto Res
= parseSymbolicUnifiedFormat(FormatStr
, Loc
, Format
);
6189 if (Res
.isNoMatch())
6190 Res
= parseSymbolicSplitFormat(FormatStr
, Loc
, Format
);
6191 if (!Res
.isSuccess())
6194 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
6195 return ParseStatus::Failure
;
6197 return ParseStatus::Success
;
6200 return parseNumericFormat(Format
);
6203 ParseStatus
AMDGPUAsmParser::parseFORMAT(OperandVector
&Operands
) {
6204 using namespace llvm::AMDGPU::MTBUFFormat
;
6206 int64_t Format
= getDefaultFormatEncoding(getSTI());
6208 SMLoc Loc
= getLoc();
6210 // Parse legacy format syntax.
6211 Res
= isGFX10Plus() ? parseUfmt(Format
) : parseDfmtNfmt(Format
);
6212 if (Res
.isFailure())
6215 bool FormatFound
= Res
.isSuccess();
6218 AMDGPUOperand::CreateImm(this, Format
, Loc
, AMDGPUOperand::ImmTyFORMAT
));
6221 trySkipToken(AsmToken::Comma
);
6223 if (isToken(AsmToken::EndOfStatement
)) {
6224 // We are expecting an soffset operand,
6225 // but let matcher handle the error.
6226 return ParseStatus::Success
;
6230 Res
= parseRegOrImm(Operands
);
6231 if (!Res
.isSuccess())
6234 trySkipToken(AsmToken::Comma
);
6237 Res
= parseSymbolicOrNumericFormat(Format
);
6238 if (Res
.isFailure())
6240 if (Res
.isSuccess()) {
6241 auto Size
= Operands
.size();
6242 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
[Size
- 2]);
6243 assert(Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyFORMAT
);
6246 return ParseStatus::Success
;
6249 if (isId("format") && peekToken().is(AsmToken::Colon
))
6250 return Error(getLoc(), "duplicate format");
6251 return ParseStatus::Success
;
6254 ParseStatus
AMDGPUAsmParser::parseFlatOffset(OperandVector
&Operands
) {
6256 parseIntWithPrefix("offset", Operands
, AMDGPUOperand::ImmTyOffset
);
6257 if (Res
.isNoMatch()) {
6258 Res
= parseIntWithPrefix("inst_offset", Operands
,
6259 AMDGPUOperand::ImmTyInstOffset
);
6264 ParseStatus
AMDGPUAsmParser::parseR128A16(OperandVector
&Operands
) {
6266 parseNamedBit("r128", Operands
, AMDGPUOperand::ImmTyR128A16
);
6267 if (Res
.isNoMatch())
6268 Res
= parseNamedBit("a16", Operands
, AMDGPUOperand::ImmTyA16
);
6272 ParseStatus
AMDGPUAsmParser::parseBLGP(OperandVector
&Operands
) {
6274 parseIntWithPrefix("blgp", Operands
, AMDGPUOperand::ImmTyBLGP
);
6275 if (Res
.isNoMatch()) {
6277 parseOperandArrayWithPrefix("neg", Operands
, AMDGPUOperand::ImmTyBLGP
);
6282 //===----------------------------------------------------------------------===//
6284 //===----------------------------------------------------------------------===//
6286 void AMDGPUAsmParser::cvtDSOffset01(MCInst
&Inst
,
6287 const OperandVector
&Operands
) {
6288 OptionalImmIndexMap OptionalIdx
;
6290 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
6291 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
6293 // Add the register arguments
6295 Op
.addRegOperands(Inst
, 1);
6299 // Handle optional arguments
6300 OptionalIdx
[Op
.getImmTy()] = i
;
6303 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset0
);
6304 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset1
);
6305 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
6307 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
6310 void AMDGPUAsmParser::cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
6311 bool IsGdsHardcoded
) {
6312 OptionalImmIndexMap OptionalIdx
;
6313 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6314 AMDGPUOperand::ImmTy OffsetType
= AMDGPUOperand::ImmTyOffset
;
6316 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
6317 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
6320 Desc
.getOperandConstraint(Inst
.getNumOperands(), MCOI::TIED_TO
);
6323 assert((unsigned)TiedTo
< Inst
.getNumOperands());
6324 Inst
.addOperand(Inst
.getOperand(TiedTo
));
6327 // Add the register arguments
6329 Op
.addRegOperands(Inst
, 1);
6333 if (Op
.isToken() && Op
.getToken() == "gds") {
6334 IsGdsHardcoded
= true;
6338 // Handle optional arguments
6339 OptionalIdx
[Op
.getImmTy()] = i
;
6341 if (Op
.getImmTy() == AMDGPUOperand::ImmTySwizzle
)
6342 OffsetType
= AMDGPUOperand::ImmTySwizzle
;
6345 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, OffsetType
);
6347 if (!IsGdsHardcoded
) {
6348 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
6350 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
6353 void AMDGPUAsmParser::cvtExp(MCInst
&Inst
, const OperandVector
&Operands
) {
6354 OptionalImmIndexMap OptionalIdx
;
6356 unsigned OperandIdx
[4];
6357 unsigned EnMask
= 0;
6360 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
6361 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
6363 // Add the register arguments
6366 OperandIdx
[SrcIdx
] = Inst
.size();
6367 Op
.addRegOperands(Inst
, 1);
6374 OperandIdx
[SrcIdx
] = Inst
.size();
6375 Inst
.addOperand(MCOperand::createReg(AMDGPU::NoRegister
));
6380 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyExpTgt
) {
6381 Op
.addImmOperands(Inst
, 1);
6385 if (Op
.isToken() && (Op
.getToken() == "done" || Op
.getToken() == "row_en"))
6388 // Handle optional arguments
6389 OptionalIdx
[Op
.getImmTy()] = i
;
6392 assert(SrcIdx
== 4);
6395 if (OptionalIdx
.find(AMDGPUOperand::ImmTyExpCompr
) != OptionalIdx
.end()) {
6397 Inst
.getOperand(OperandIdx
[1]) = Inst
.getOperand(OperandIdx
[2]);
6398 Inst
.getOperand(OperandIdx
[2]).setReg(AMDGPU::NoRegister
);
6399 Inst
.getOperand(OperandIdx
[3]).setReg(AMDGPU::NoRegister
);
6402 for (auto i
= 0; i
< SrcIdx
; ++i
) {
6403 if (Inst
.getOperand(OperandIdx
[i
]).getReg() != AMDGPU::NoRegister
) {
6404 EnMask
|= Compr
? (0x3 << i
* 2) : (0x1 << i
);
6408 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpVM
);
6409 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpCompr
);
6411 Inst
.addOperand(MCOperand::createImm(EnMask
));
6414 //===----------------------------------------------------------------------===//
6416 //===----------------------------------------------------------------------===//
6420 const AMDGPU::IsaVersion ISA
,
6424 unsigned (*encode
)(const IsaVersion
&Version
, unsigned, unsigned),
6425 unsigned (*decode
)(const IsaVersion
&Version
, unsigned))
6427 bool Failed
= false;
6429 IntVal
= encode(ISA
, IntVal
, CntVal
);
6430 if (CntVal
!= decode(ISA
, IntVal
)) {
6432 IntVal
= encode(ISA
, IntVal
, -1);
6440 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal
) {
6442 SMLoc CntLoc
= getLoc();
6443 StringRef CntName
= getTokenStr();
6445 if (!skipToken(AsmToken::Identifier
, "expected a counter name") ||
6446 !skipToken(AsmToken::LParen
, "expected a left parenthesis"))
6450 SMLoc ValLoc
= getLoc();
6451 if (!parseExpr(CntVal
))
6454 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
6457 bool Sat
= CntName
.endswith("_sat");
6459 if (CntName
== "vmcnt" || CntName
== "vmcnt_sat") {
6460 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeVmcnt
, decodeVmcnt
);
6461 } else if (CntName
== "expcnt" || CntName
== "expcnt_sat") {
6462 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeExpcnt
, decodeExpcnt
);
6463 } else if (CntName
== "lgkmcnt" || CntName
== "lgkmcnt_sat") {
6464 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeLgkmcnt
, decodeLgkmcnt
);
6466 Error(CntLoc
, "invalid counter name " + CntName
);
6471 Error(ValLoc
, "too large value for " + CntName
);
6475 if (!skipToken(AsmToken::RParen
, "expected a closing parenthesis"))
6478 if (trySkipToken(AsmToken::Amp
) || trySkipToken(AsmToken::Comma
)) {
6479 if (isToken(AsmToken::EndOfStatement
)) {
6480 Error(getLoc(), "expected a counter name");
6488 ParseStatus
AMDGPUAsmParser::parseSWaitCnt(OperandVector
&Operands
) {
6489 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
6490 int64_t Waitcnt
= getWaitcntBitMask(ISA
);
6493 if (isToken(AsmToken::Identifier
) && peekToken().is(AsmToken::LParen
)) {
6494 while (!isToken(AsmToken::EndOfStatement
)) {
6495 if (!parseCnt(Waitcnt
))
6496 return ParseStatus::Failure
;
6499 if (!parseExpr(Waitcnt
))
6500 return ParseStatus::Failure
;
6503 Operands
.push_back(AMDGPUOperand::CreateImm(this, Waitcnt
, S
));
6504 return ParseStatus::Success
;
6507 bool AMDGPUAsmParser::parseDelay(int64_t &Delay
) {
6508 SMLoc FieldLoc
= getLoc();
6509 StringRef FieldName
= getTokenStr();
6510 if (!skipToken(AsmToken::Identifier
, "expected a field name") ||
6511 !skipToken(AsmToken::LParen
, "expected a left parenthesis"))
6514 SMLoc ValueLoc
= getLoc();
6515 StringRef ValueName
= getTokenStr();
6516 if (!skipToken(AsmToken::Identifier
, "expected a value name") ||
6517 !skipToken(AsmToken::RParen
, "expected a right parenthesis"))
6521 if (FieldName
== "instid0") {
6523 } else if (FieldName
== "instskip") {
6525 } else if (FieldName
== "instid1") {
6528 Error(FieldLoc
, "invalid field name " + FieldName
);
6534 // Parse values for instskip.
6535 Value
= StringSwitch
<int>(ValueName
)
6544 // Parse values for instid0 and instid1.
6545 Value
= StringSwitch
<int>(ValueName
)
6547 .Case("VALU_DEP_1", 1)
6548 .Case("VALU_DEP_2", 2)
6549 .Case("VALU_DEP_3", 3)
6550 .Case("VALU_DEP_4", 4)
6551 .Case("TRANS32_DEP_1", 5)
6552 .Case("TRANS32_DEP_2", 6)
6553 .Case("TRANS32_DEP_3", 7)
6554 .Case("FMA_ACCUM_CYCLE_1", 8)
6555 .Case("SALU_CYCLE_1", 9)
6556 .Case("SALU_CYCLE_2", 10)
6557 .Case("SALU_CYCLE_3", 11)
6561 Error(ValueLoc
, "invalid value name " + ValueName
);
6565 Delay
|= Value
<< Shift
;
6569 ParseStatus
AMDGPUAsmParser::parseSDelayALU(OperandVector
&Operands
) {
6573 if (isToken(AsmToken::Identifier
) && peekToken().is(AsmToken::LParen
)) {
6575 if (!parseDelay(Delay
))
6576 return ParseStatus::Failure
;
6577 } while (trySkipToken(AsmToken::Pipe
));
6579 if (!parseExpr(Delay
))
6580 return ParseStatus::Failure
;
6583 Operands
.push_back(AMDGPUOperand::CreateImm(this, Delay
, S
));
6584 return ParseStatus::Success
;
6588 AMDGPUOperand::isSWaitCnt() const {
6592 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6594 //===----------------------------------------------------------------------===//
6596 //===----------------------------------------------------------------------===//
6598 void AMDGPUAsmParser::depCtrError(SMLoc Loc
, int ErrorId
,
6599 StringRef DepCtrName
) {
6601 case OPR_ID_UNKNOWN
:
6602 Error(Loc
, Twine("invalid counter name ", DepCtrName
));
6604 case OPR_ID_UNSUPPORTED
:
6605 Error(Loc
, Twine(DepCtrName
, " is not supported on this GPU"));
6607 case OPR_ID_DUPLICATE
:
6608 Error(Loc
, Twine("duplicate counter name ", DepCtrName
));
6610 case OPR_VAL_INVALID
:
6611 Error(Loc
, Twine("invalid value for ", DepCtrName
));
6618 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr
, unsigned &UsedOprMask
) {
6620 using namespace llvm::AMDGPU::DepCtr
;
6622 SMLoc DepCtrLoc
= getLoc();
6623 StringRef DepCtrName
= getTokenStr();
6625 if (!skipToken(AsmToken::Identifier
, "expected a counter name") ||
6626 !skipToken(AsmToken::LParen
, "expected a left parenthesis"))
6630 if (!parseExpr(ExprVal
))
6633 unsigned PrevOprMask
= UsedOprMask
;
6634 int CntVal
= encodeDepCtr(DepCtrName
, ExprVal
, UsedOprMask
, getSTI());
6637 depCtrError(DepCtrLoc
, CntVal
, DepCtrName
);
6641 if (!skipToken(AsmToken::RParen
, "expected a closing parenthesis"))
6644 if (trySkipToken(AsmToken::Amp
) || trySkipToken(AsmToken::Comma
)) {
6645 if (isToken(AsmToken::EndOfStatement
)) {
6646 Error(getLoc(), "expected a counter name");
6651 unsigned CntValMask
= PrevOprMask
^ UsedOprMask
;
6652 DepCtr
= (DepCtr
& ~CntValMask
) | CntVal
;
6656 ParseStatus
AMDGPUAsmParser::parseDepCtr(OperandVector
&Operands
) {
6657 using namespace llvm::AMDGPU::DepCtr
;
6659 int64_t DepCtr
= getDefaultDepCtrEncoding(getSTI());
6660 SMLoc Loc
= getLoc();
6662 if (isToken(AsmToken::Identifier
) && peekToken().is(AsmToken::LParen
)) {
6663 unsigned UsedOprMask
= 0;
6664 while (!isToken(AsmToken::EndOfStatement
)) {
6665 if (!parseDepCtr(DepCtr
, UsedOprMask
))
6666 return ParseStatus::Failure
;
6669 if (!parseExpr(DepCtr
))
6670 return ParseStatus::Failure
;
6673 Operands
.push_back(AMDGPUOperand::CreateImm(this, DepCtr
, Loc
));
6674 return ParseStatus::Success
;
6677 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6679 //===----------------------------------------------------------------------===//
6681 //===----------------------------------------------------------------------===//
6684 AMDGPUAsmParser::parseHwregBody(OperandInfoTy
&HwReg
,
6685 OperandInfoTy
&Offset
,
6686 OperandInfoTy
&Width
) {
6687 using namespace llvm::AMDGPU::Hwreg
;
6689 // The register may be specified by name or using a numeric code
6690 HwReg
.Loc
= getLoc();
6691 if (isToken(AsmToken::Identifier
) &&
6692 (HwReg
.Id
= getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN
) {
6693 HwReg
.IsSymbolic
= true;
6694 lex(); // skip register name
6695 } else if (!parseExpr(HwReg
.Id
, "a register name")) {
6699 if (trySkipToken(AsmToken::RParen
))
6702 // parse optional params
6703 if (!skipToken(AsmToken::Comma
, "expected a comma or a closing parenthesis"))
6706 Offset
.Loc
= getLoc();
6707 if (!parseExpr(Offset
.Id
))
6710 if (!skipToken(AsmToken::Comma
, "expected a comma"))
6713 Width
.Loc
= getLoc();
6714 return parseExpr(Width
.Id
) &&
6715 skipToken(AsmToken::RParen
, "expected a closing parenthesis");
6719 AMDGPUAsmParser::validateHwreg(const OperandInfoTy
&HwReg
,
6720 const OperandInfoTy
&Offset
,
6721 const OperandInfoTy
&Width
) {
6723 using namespace llvm::AMDGPU::Hwreg
;
6725 if (HwReg
.IsSymbolic
) {
6726 if (HwReg
.Id
== OPR_ID_UNSUPPORTED
) {
6728 "specified hardware register is not supported on this GPU");
6732 if (!isValidHwreg(HwReg
.Id
)) {
6734 "invalid code of hardware register: only 6-bit values are legal");
6738 if (!isValidHwregOffset(Offset
.Id
)) {
6739 Error(Offset
.Loc
, "invalid bit offset: only 5-bit values are legal");
6742 if (!isValidHwregWidth(Width
.Id
)) {
6744 "invalid bitfield width: only values from 1 to 32 are legal");
6750 ParseStatus
AMDGPUAsmParser::parseHwreg(OperandVector
&Operands
) {
6751 using namespace llvm::AMDGPU::Hwreg
;
6754 SMLoc Loc
= getLoc();
6756 if (trySkipId("hwreg", AsmToken::LParen
)) {
6757 OperandInfoTy
HwReg(OPR_ID_UNKNOWN
);
6758 OperandInfoTy
Offset(OFFSET_DEFAULT_
);
6759 OperandInfoTy
Width(WIDTH_DEFAULT_
);
6760 if (parseHwregBody(HwReg
, Offset
, Width
) &&
6761 validateHwreg(HwReg
, Offset
, Width
)) {
6762 ImmVal
= encodeHwreg(HwReg
.Id
, Offset
.Id
, Width
.Id
);
6764 return ParseStatus::Failure
;
6766 } else if (parseExpr(ImmVal
, "a hwreg macro")) {
6767 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
))
6768 return Error(Loc
, "invalid immediate: only 16-bit values are legal");
6770 return ParseStatus::Failure
;
6773 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTyHwreg
));
6774 return ParseStatus::Success
;
6777 bool AMDGPUOperand::isHwreg() const {
6778 return isImmTy(ImmTyHwreg
);
6781 //===----------------------------------------------------------------------===//
6783 //===----------------------------------------------------------------------===//
6786 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy
&Msg
,
6788 OperandInfoTy
&Stream
) {
6789 using namespace llvm::AMDGPU::SendMsg
;
6792 if (isToken(AsmToken::Identifier
) &&
6793 (Msg
.Id
= getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN
) {
6794 Msg
.IsSymbolic
= true;
6795 lex(); // skip message name
6796 } else if (!parseExpr(Msg
.Id
, "a message name")) {
6800 if (trySkipToken(AsmToken::Comma
)) {
6801 Op
.IsDefined
= true;
6803 if (isToken(AsmToken::Identifier
) &&
6804 (Op
.Id
= getMsgOpId(Msg
.Id
, getTokenStr())) >= 0) {
6805 lex(); // skip operation name
6806 } else if (!parseExpr(Op
.Id
, "an operation name")) {
6810 if (trySkipToken(AsmToken::Comma
)) {
6811 Stream
.IsDefined
= true;
6812 Stream
.Loc
= getLoc();
6813 if (!parseExpr(Stream
.Id
))
6818 return skipToken(AsmToken::RParen
, "expected a closing parenthesis");
6822 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy
&Msg
,
6823 const OperandInfoTy
&Op
,
6824 const OperandInfoTy
&Stream
) {
6825 using namespace llvm::AMDGPU::SendMsg
;
6827 // Validation strictness depends on whether message is specified
6828 // in a symbolic or in a numeric form. In the latter case
6829 // only encoding possibility is checked.
6830 bool Strict
= Msg
.IsSymbolic
;
6833 if (Msg
.Id
== OPR_ID_UNSUPPORTED
) {
6834 Error(Msg
.Loc
, "specified message id is not supported on this GPU");
6838 if (!isValidMsgId(Msg
.Id
, getSTI())) {
6839 Error(Msg
.Loc
, "invalid message id");
6843 if (Strict
&& (msgRequiresOp(Msg
.Id
, getSTI()) != Op
.IsDefined
)) {
6845 Error(Op
.Loc
, "message does not support operations");
6847 Error(Msg
.Loc
, "missing message operation");
6851 if (!isValidMsgOp(Msg
.Id
, Op
.Id
, getSTI(), Strict
)) {
6852 Error(Op
.Loc
, "invalid operation id");
6855 if (Strict
&& !msgSupportsStream(Msg
.Id
, Op
.Id
, getSTI()) &&
6857 Error(Stream
.Loc
, "message operation does not support streams");
6860 if (!isValidMsgStream(Msg
.Id
, Op
.Id
, Stream
.Id
, getSTI(), Strict
)) {
6861 Error(Stream
.Loc
, "invalid message stream id");
6867 ParseStatus
AMDGPUAsmParser::parseSendMsg(OperandVector
&Operands
) {
6868 using namespace llvm::AMDGPU::SendMsg
;
6871 SMLoc Loc
= getLoc();
6873 if (trySkipId("sendmsg", AsmToken::LParen
)) {
6874 OperandInfoTy
Msg(OPR_ID_UNKNOWN
);
6875 OperandInfoTy
Op(OP_NONE_
);
6876 OperandInfoTy
Stream(STREAM_ID_NONE_
);
6877 if (parseSendMsgBody(Msg
, Op
, Stream
) &&
6878 validateSendMsg(Msg
, Op
, Stream
)) {
6879 ImmVal
= encodeMsg(Msg
.Id
, Op
.Id
, Stream
.Id
);
6881 return ParseStatus::Failure
;
6883 } else if (parseExpr(ImmVal
, "a sendmsg macro")) {
6884 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
))
6885 return Error(Loc
, "invalid immediate: only 16-bit values are legal");
6887 return ParseStatus::Failure
;
6890 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTySendMsg
));
6891 return ParseStatus::Success
;
6894 bool AMDGPUOperand::isSendMsg() const {
6895 return isImmTy(ImmTySendMsg
);
6898 //===----------------------------------------------------------------------===//
6900 //===----------------------------------------------------------------------===//
6902 ParseStatus
AMDGPUAsmParser::parseInterpSlot(OperandVector
&Operands
) {
6907 return ParseStatus::NoMatch
;
6909 int Slot
= StringSwitch
<int>(Str
)
6916 return Error(S
, "invalid interpolation slot");
6918 Operands
.push_back(AMDGPUOperand::CreateImm(this, Slot
, S
,
6919 AMDGPUOperand::ImmTyInterpSlot
));
6920 return ParseStatus::Success
;
6923 ParseStatus
AMDGPUAsmParser::parseInterpAttr(OperandVector
&Operands
) {
6928 return ParseStatus::NoMatch
;
6930 if (!Str
.startswith("attr"))
6931 return Error(S
, "invalid interpolation attribute");
6933 StringRef Chan
= Str
.take_back(2);
6934 int AttrChan
= StringSwitch
<int>(Chan
)
6941 return Error(S
, "invalid or missing interpolation attribute channel");
6943 Str
= Str
.drop_back(2).drop_front(4);
6946 if (Str
.getAsInteger(10, Attr
))
6947 return Error(S
, "invalid or missing interpolation attribute number");
6950 return Error(S
, "out of bounds interpolation attribute number");
6952 SMLoc SChan
= SMLoc::getFromPointer(Chan
.data());
6954 Operands
.push_back(AMDGPUOperand::CreateImm(this, Attr
, S
,
6955 AMDGPUOperand::ImmTyInterpAttr
));
6956 Operands
.push_back(AMDGPUOperand::CreateImm(
6957 this, AttrChan
, SChan
, AMDGPUOperand::ImmTyInterpAttrChan
));
6958 return ParseStatus::Success
;
6961 //===----------------------------------------------------------------------===//
6963 //===----------------------------------------------------------------------===//
6965 ParseStatus
AMDGPUAsmParser::parseExpTgt(OperandVector
&Operands
) {
6966 using namespace llvm::AMDGPU::Exp
;
6972 return ParseStatus::NoMatch
;
6974 unsigned Id
= getTgtId(Str
);
6975 if (Id
== ET_INVALID
|| !isSupportedTgtId(Id
, getSTI()))
6976 return Error(S
, (Id
== ET_INVALID
)
6977 ? "invalid exp target"
6978 : "exp target is not supported on this GPU");
6980 Operands
.push_back(AMDGPUOperand::CreateImm(this, Id
, S
,
6981 AMDGPUOperand::ImmTyExpTgt
));
6982 return ParseStatus::Success
;
6985 //===----------------------------------------------------------------------===//
6987 //===----------------------------------------------------------------------===//
6990 AMDGPUAsmParser::isId(const AsmToken
&Token
, const StringRef Id
) const {
6991 return Token
.is(AsmToken::Identifier
) && Token
.getString() == Id
;
6995 AMDGPUAsmParser::isId(const StringRef Id
) const {
6996 return isId(getToken(), Id
);
7000 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind
) const {
7001 return getTokenKind() == Kind
;
7004 StringRef
AMDGPUAsmParser::getId() const {
7005 return isToken(AsmToken::Identifier
) ? getTokenStr() : StringRef();
7009 AMDGPUAsmParser::trySkipId(const StringRef Id
) {
7018 AMDGPUAsmParser::trySkipId(const StringRef Pref
, const StringRef Id
) {
7019 if (isToken(AsmToken::Identifier
)) {
7020 StringRef Tok
= getTokenStr();
7021 if (Tok
.startswith(Pref
) && Tok
.drop_front(Pref
.size()) == Id
) {
7030 AMDGPUAsmParser::trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
) {
7031 if (isId(Id
) && peekToken().is(Kind
)) {
7040 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind
) {
7041 if (isToken(Kind
)) {
7049 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind
,
7050 const StringRef ErrMsg
) {
7051 if (!trySkipToken(Kind
)) {
7052 Error(getLoc(), ErrMsg
);
7059 AMDGPUAsmParser::parseExpr(int64_t &Imm
, StringRef Expected
) {
7063 if (Parser
.parseExpression(Expr
))
7066 if (Expr
->evaluateAsAbsolute(Imm
))
7069 if (Expected
.empty()) {
7070 Error(S
, "expected absolute expression");
7072 Error(S
, Twine("expected ", Expected
) +
7073 Twine(" or an absolute expression"));
7079 AMDGPUAsmParser::parseExpr(OperandVector
&Operands
) {
7083 if (Parser
.parseExpression(Expr
))
7087 if (Expr
->evaluateAsAbsolute(IntVal
)) {
7088 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
7090 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
7096 AMDGPUAsmParser::parseString(StringRef
&Val
, const StringRef ErrMsg
) {
7097 if (isToken(AsmToken::String
)) {
7098 Val
= getToken().getStringContents();
7102 Error(getLoc(), ErrMsg
);
7108 AMDGPUAsmParser::parseId(StringRef
&Val
, const StringRef ErrMsg
) {
7109 if (isToken(AsmToken::Identifier
)) {
7110 Val
= getTokenStr();
7114 if (!ErrMsg
.empty())
7115 Error(getLoc(), ErrMsg
);
7121 AMDGPUAsmParser::getToken() const {
7122 return Parser
.getTok();
7125 AsmToken
AMDGPUAsmParser::peekToken(bool ShouldSkipSpace
) {
7126 return isToken(AsmToken::EndOfStatement
)
7128 : getLexer().peekTok(ShouldSkipSpace
);
7132 AMDGPUAsmParser::peekTokens(MutableArrayRef
<AsmToken
> Tokens
) {
7133 auto TokCount
= getLexer().peekTokens(Tokens
);
7135 for (auto Idx
= TokCount
; Idx
< Tokens
.size(); ++Idx
)
7136 Tokens
[Idx
] = AsmToken(AsmToken::Error
, "");
7140 AMDGPUAsmParser::getTokenKind() const {
7141 return getLexer().getKind();
7145 AMDGPUAsmParser::getLoc() const {
7146 return getToken().getLoc();
7150 AMDGPUAsmParser::getTokenStr() const {
7151 return getToken().getString();
7155 AMDGPUAsmParser::lex() {
7159 SMLoc
AMDGPUAsmParser::getInstLoc(const OperandVector
&Operands
) const {
7160 return ((AMDGPUOperand
&)*Operands
[0]).getStartLoc();
7164 AMDGPUAsmParser::getOperandLoc(std::function
<bool(const AMDGPUOperand
&)> Test
,
7165 const OperandVector
&Operands
) const {
7166 for (unsigned i
= Operands
.size() - 1; i
> 0; --i
) {
7167 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7169 return Op
.getStartLoc();
7171 return getInstLoc(Operands
);
7175 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type
,
7176 const OperandVector
&Operands
) const {
7177 auto Test
= [=](const AMDGPUOperand
& Op
) { return Op
.isImmTy(Type
); };
7178 return getOperandLoc(Test
, Operands
);
7182 AMDGPUAsmParser::getRegLoc(unsigned Reg
,
7183 const OperandVector
&Operands
) const {
7184 auto Test
= [=](const AMDGPUOperand
& Op
) {
7185 return Op
.isRegKind() && Op
.getReg() == Reg
;
7187 return getOperandLoc(Test
, Operands
);
7190 SMLoc
AMDGPUAsmParser::getLitLoc(const OperandVector
&Operands
,
7191 bool SearchMandatoryLiterals
) const {
7192 auto Test
= [](const AMDGPUOperand
& Op
) {
7193 return Op
.IsImmKindLiteral() || Op
.isExpr();
7195 SMLoc Loc
= getOperandLoc(Test
, Operands
);
7196 if (SearchMandatoryLiterals
&& Loc
== getInstLoc(Operands
))
7197 Loc
= getMandatoryLitLoc(Operands
);
7201 SMLoc
AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector
&Operands
) const {
7202 auto Test
= [](const AMDGPUOperand
&Op
) {
7203 return Op
.IsImmKindMandatoryLiteral();
7205 return getOperandLoc(Test
, Operands
);
7209 AMDGPUAsmParser::getConstLoc(const OperandVector
&Operands
) const {
7210 auto Test
= [](const AMDGPUOperand
& Op
) {
7211 return Op
.isImmKindConst();
7213 return getOperandLoc(Test
, Operands
);
7216 //===----------------------------------------------------------------------===//
7218 //===----------------------------------------------------------------------===//
7222 encodeBitmaskPerm(const unsigned AndMask
,
7223 const unsigned OrMask
,
7224 const unsigned XorMask
) {
7225 using namespace llvm::AMDGPU::Swizzle
;
7227 return BITMASK_PERM_ENC
|
7228 (AndMask
<< BITMASK_AND_SHIFT
) |
7229 (OrMask
<< BITMASK_OR_SHIFT
) |
7230 (XorMask
<< BITMASK_XOR_SHIFT
);
7234 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op
,
7235 const unsigned MinVal
,
7236 const unsigned MaxVal
,
7237 const StringRef ErrMsg
,
7239 if (!skipToken(AsmToken::Comma
, "expected a comma")) {
7243 if (!parseExpr(Op
)) {
7246 if (Op
< MinVal
|| Op
> MaxVal
) {
7255 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
7256 const unsigned MinVal
,
7257 const unsigned MaxVal
,
7258 const StringRef ErrMsg
) {
7260 for (unsigned i
= 0; i
< OpNum
; ++i
) {
7261 if (!parseSwizzleOperand(Op
[i
], MinVal
, MaxVal
, ErrMsg
, Loc
))
7269 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm
) {
7270 using namespace llvm::AMDGPU::Swizzle
;
7272 int64_t Lane
[LANE_NUM
];
7273 if (parseSwizzleOperands(LANE_NUM
, Lane
, 0, LANE_MAX
,
7274 "expected a 2-bit lane id")) {
7275 Imm
= QUAD_PERM_ENC
;
7276 for (unsigned I
= 0; I
< LANE_NUM
; ++I
) {
7277 Imm
|= Lane
[I
] << (LANE_SHIFT
* I
);
7285 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm
) {
7286 using namespace llvm::AMDGPU::Swizzle
;
7292 if (!parseSwizzleOperand(GroupSize
,
7294 "group size must be in the interval [2,32]",
7298 if (!isPowerOf2_64(GroupSize
)) {
7299 Error(Loc
, "group size must be a power of two");
7302 if (parseSwizzleOperand(LaneIdx
,
7304 "lane id must be in the interval [0,group size - 1]",
7306 Imm
= encodeBitmaskPerm(BITMASK_MAX
- GroupSize
+ 1, LaneIdx
, 0);
7313 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm
) {
7314 using namespace llvm::AMDGPU::Swizzle
;
7319 if (!parseSwizzleOperand(GroupSize
,
7321 "group size must be in the interval [2,32]",
7325 if (!isPowerOf2_64(GroupSize
)) {
7326 Error(Loc
, "group size must be a power of two");
7330 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
- 1);
7335 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm
) {
7336 using namespace llvm::AMDGPU::Swizzle
;
7341 if (!parseSwizzleOperand(GroupSize
,
7343 "group size must be in the interval [1,16]",
7347 if (!isPowerOf2_64(GroupSize
)) {
7348 Error(Loc
, "group size must be a power of two");
7352 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
);
7357 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm
) {
7358 using namespace llvm::AMDGPU::Swizzle
;
7360 if (!skipToken(AsmToken::Comma
, "expected a comma")) {
7365 SMLoc StrLoc
= getLoc();
7366 if (!parseString(Ctl
)) {
7369 if (Ctl
.size() != BITMASK_WIDTH
) {
7370 Error(StrLoc
, "expected a 5-character mask");
7374 unsigned AndMask
= 0;
7375 unsigned OrMask
= 0;
7376 unsigned XorMask
= 0;
7378 for (size_t i
= 0; i
< Ctl
.size(); ++i
) {
7379 unsigned Mask
= 1 << (BITMASK_WIDTH
- 1 - i
);
7382 Error(StrLoc
, "invalid mask");
7399 Imm
= encodeBitmaskPerm(AndMask
, OrMask
, XorMask
);
7404 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm
) {
7406 SMLoc OffsetLoc
= getLoc();
7408 if (!parseExpr(Imm
, "a swizzle macro")) {
7411 if (!isUInt
<16>(Imm
)) {
7412 Error(OffsetLoc
, "expected a 16-bit offset");
7419 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm
) {
7420 using namespace llvm::AMDGPU::Swizzle
;
7422 if (skipToken(AsmToken::LParen
, "expected a left parentheses")) {
7424 SMLoc ModeLoc
= getLoc();
7427 if (trySkipId(IdSymbolic
[ID_QUAD_PERM
])) {
7428 Ok
= parseSwizzleQuadPerm(Imm
);
7429 } else if (trySkipId(IdSymbolic
[ID_BITMASK_PERM
])) {
7430 Ok
= parseSwizzleBitmaskPerm(Imm
);
7431 } else if (trySkipId(IdSymbolic
[ID_BROADCAST
])) {
7432 Ok
= parseSwizzleBroadcast(Imm
);
7433 } else if (trySkipId(IdSymbolic
[ID_SWAP
])) {
7434 Ok
= parseSwizzleSwap(Imm
);
7435 } else if (trySkipId(IdSymbolic
[ID_REVERSE
])) {
7436 Ok
= parseSwizzleReverse(Imm
);
7438 Error(ModeLoc
, "expected a swizzle mode");
7441 return Ok
&& skipToken(AsmToken::RParen
, "expected a closing parentheses");
7447 ParseStatus
AMDGPUAsmParser::parseSwizzle(OperandVector
&Operands
) {
7451 if (trySkipId("offset")) {
7454 if (skipToken(AsmToken::Colon
, "expected a colon")) {
7455 if (trySkipId("swizzle")) {
7456 Ok
= parseSwizzleMacro(Imm
);
7458 Ok
= parseSwizzleOffset(Imm
);
7462 Operands
.push_back(AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTySwizzle
));
7464 return Ok
? ParseStatus::Success
: ParseStatus::Failure
;
7466 return ParseStatus::NoMatch
;
7470 AMDGPUOperand::isSwizzle() const {
7471 return isImmTy(ImmTySwizzle
);
7474 //===----------------------------------------------------------------------===//
7476 //===----------------------------------------------------------------------===//
7478 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7480 using namespace llvm::AMDGPU::VGPRIndexMode
;
7482 if (trySkipToken(AsmToken::RParen
)) {
7492 for (unsigned ModeId
= ID_MIN
; ModeId
<= ID_MAX
; ++ModeId
) {
7493 if (trySkipId(IdSymbolic
[ModeId
])) {
7500 Error(S
, (Imm
== 0)?
7501 "expected a VGPR index mode or a closing parenthesis" :
7502 "expected a VGPR index mode");
7507 Error(S
, "duplicate VGPR index mode");
7512 if (trySkipToken(AsmToken::RParen
))
7514 if (!skipToken(AsmToken::Comma
,
7515 "expected a comma or a closing parenthesis"))
7522 ParseStatus
AMDGPUAsmParser::parseGPRIdxMode(OperandVector
&Operands
) {
7524 using namespace llvm::AMDGPU::VGPRIndexMode
;
7529 if (trySkipId("gpr_idx", AsmToken::LParen
)) {
7530 Imm
= parseGPRIdxMacro();
7532 return ParseStatus::Failure
;
7534 if (getParser().parseAbsoluteExpression(Imm
))
7535 return ParseStatus::Failure
;
7536 if (Imm
< 0 || !isUInt
<4>(Imm
))
7537 return Error(S
, "invalid immediate: only 4-bit values are legal");
7541 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyGprIdxMode
));
7542 return ParseStatus::Success
;
7545 bool AMDGPUOperand::isGPRIdxMode() const {
7546 return isImmTy(ImmTyGprIdxMode
);
7549 //===----------------------------------------------------------------------===//
7550 // sopp branch targets
7551 //===----------------------------------------------------------------------===//
7553 ParseStatus
AMDGPUAsmParser::parseSOPPBrTarget(OperandVector
&Operands
) {
7555 // Make sure we are not parsing something
7556 // that looks like a label or an expression but is not.
7557 // This will improve error messages.
7558 if (isRegister() || isModifier())
7559 return ParseStatus::NoMatch
;
7561 if (!parseExpr(Operands
))
7562 return ParseStatus::Failure
;
7564 AMDGPUOperand
&Opr
= ((AMDGPUOperand
&)*Operands
[Operands
.size() - 1]);
7565 assert(Opr
.isImm() || Opr
.isExpr());
7566 SMLoc Loc
= Opr
.getStartLoc();
7568 // Currently we do not support arbitrary expressions as branch targets.
7569 // Only labels and absolute expressions are accepted.
7570 if (Opr
.isExpr() && !Opr
.isSymbolRefExpr()) {
7571 Error(Loc
, "expected an absolute expression or a label");
7572 } else if (Opr
.isImm() && !Opr
.isS16Imm()) {
7573 Error(Loc
, "expected a 16-bit signed jump offset");
7576 return ParseStatus::Success
;
7579 //===----------------------------------------------------------------------===//
7580 // Boolean holding registers
7581 //===----------------------------------------------------------------------===//
7583 ParseStatus
AMDGPUAsmParser::parseBoolReg(OperandVector
&Operands
) {
7584 return parseReg(Operands
);
7587 //===----------------------------------------------------------------------===//
7589 //===----------------------------------------------------------------------===//
7591 void AMDGPUAsmParser::cvtMubufImpl(MCInst
&Inst
,
7592 const OperandVector
&Operands
,
7594 OptionalImmIndexMap OptionalIdx
;
7595 unsigned FirstOperandIdx
= 1;
7596 bool IsAtomicReturn
= false;
7599 for (unsigned i
= FirstOperandIdx
, e
= Operands
.size(); i
!= e
; ++i
) {
7600 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7603 IsAtomicReturn
= Op
.getImm() & AMDGPU::CPol::GLC
;
7607 if (!IsAtomicReturn
) {
7608 int NewOpc
= AMDGPU::getAtomicNoRetOp(Inst
.getOpcode());
7610 Inst
.setOpcode(NewOpc
);
7613 IsAtomicReturn
= MII
.get(Inst
.getOpcode()).TSFlags
&
7614 SIInstrFlags::IsAtomicRet
;
7617 for (unsigned i
= FirstOperandIdx
, e
= Operands
.size(); i
!= e
; ++i
) {
7618 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7620 // Add the register arguments
7622 Op
.addRegOperands(Inst
, 1);
7623 // Insert a tied src for atomic return dst.
7624 // This cannot be postponed as subsequent calls to
7625 // addImmOperands rely on correct number of MC operands.
7626 if (IsAtomicReturn
&& i
== FirstOperandIdx
)
7627 Op
.addRegOperands(Inst
, 1);
7631 // Handle the case where soffset is an immediate
7632 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
7633 Op
.addImmOperands(Inst
, 1);
7637 // Handle tokens like 'offen' which are sometimes hard-coded into the
7638 // asm string. There are no MCInst operands for these.
7644 // Handle optional arguments
7645 OptionalIdx
[Op
.getImmTy()] = i
;
7648 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset
);
7649 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyCPol
, 0);
7652 //===----------------------------------------------------------------------===//
7654 //===----------------------------------------------------------------------===//
7656 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst
&Inst
, const OperandVector
&Operands
) {
7657 OptionalImmIndexMap OptionalIdx
;
7658 bool IsAtomicReturn
= false;
7660 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
7661 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7664 IsAtomicReturn
= Op
.getImm() & AMDGPU::CPol::GLC
;
7668 if (!IsAtomicReturn
) {
7669 int NewOpc
= AMDGPU::getAtomicNoRetOp(Inst
.getOpcode());
7671 Inst
.setOpcode(NewOpc
);
7674 IsAtomicReturn
= MII
.get(Inst
.getOpcode()).TSFlags
&
7675 SIInstrFlags::IsAtomicRet
;
7677 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
7678 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7680 // Add the register arguments
7682 Op
.addRegOperands(Inst
, 1);
7683 if (IsAtomicReturn
&& i
== 1)
7684 Op
.addRegOperands(Inst
, 1);
7688 // Handle the case where soffset is an immediate
7689 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
7690 Op
.addImmOperands(Inst
, 1);
7694 // Handle tokens like 'offen' which are sometimes hard-coded into the
7695 // asm string. There are no MCInst operands for these.
7701 // Handle optional arguments
7702 OptionalIdx
[Op
.getImmTy()] = i
;
7705 if ((int)Inst
.getNumOperands() <=
7706 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::offset
))
7707 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7708 AMDGPUOperand::ImmTySMEMOffsetMod
);
7709 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyCPol
, 0);
7712 //===----------------------------------------------------------------------===//
7714 //===----------------------------------------------------------------------===//
7716 bool AMDGPUOperand::isSMRDOffset8() const {
7717 return isImmLiteral() && isUInt
<8>(getImm());
7720 bool AMDGPUOperand::isSMEMOffset() const {
7721 // Offset range is checked later by validator.
7722 return isImmLiteral();
7725 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7726 // 32-bit literals are only supported on CI and we only want to use them
7727 // when the offset is > 8-bits.
7728 return isImmLiteral() && !isUInt
<8>(getImm()) && isUInt
<32>(getImm());
7731 //===----------------------------------------------------------------------===//
7733 //===----------------------------------------------------------------------===//
7735 static bool ConvertOmodMul(int64_t &Mul
) {
7736 if (Mul
!= 1 && Mul
!= 2 && Mul
!= 4)
7743 static bool ConvertOmodDiv(int64_t &Div
) {
7757 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7758 // This is intentional and ensures compatibility with sp3.
7759 // See bug 35397 for details.
7760 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl
) {
7761 if (BoundCtrl
== 0 || BoundCtrl
== 1) {
7769 void AMDGPUAsmParser::onBeginOfFile() {
7770 if (!getParser().getStreamer().getTargetStreamer() ||
7771 getSTI().getTargetTriple().getArch() == Triple::r600
)
7774 if (!getTargetStreamer().getTargetID())
7775 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
7776 // TODO: Should try to check code object version from directive???
7777 AMDGPU::getAmdhsaCodeObjectVersion());
7779 if (isHsaAbiVersion3AndAbove(&getSTI()))
7780 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7783 ParseStatus
AMDGPUAsmParser::parseOModSI(OperandVector
&Operands
) {
7784 StringRef Name
= getTokenStr();
7785 if (Name
== "mul") {
7786 return parseIntWithPrefix("mul", Operands
,
7787 AMDGPUOperand::ImmTyOModSI
, ConvertOmodMul
);
7790 if (Name
== "div") {
7791 return parseIntWithPrefix("div", Operands
,
7792 AMDGPUOperand::ImmTyOModSI
, ConvertOmodDiv
);
7795 return ParseStatus::NoMatch
;
7798 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
7799 // the number of src operands present, then copies that bit into src0_modifiers.
7800 void cvtVOP3DstOpSelOnly(MCInst
&Inst
) {
7801 int Opc
= Inst
.getOpcode();
7802 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
7807 const int Ops
[] = { AMDGPU::OpName::src0
,
7808 AMDGPU::OpName::src1
,
7809 AMDGPU::OpName::src2
};
7810 for (SrcNum
= 0; SrcNum
< 3 && AMDGPU::hasNamedOperand(Opc
, Ops
[SrcNum
]);
7815 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
7817 if ((OpSel
& (1 << SrcNum
)) != 0) {
7818 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
);
7819 uint32_t ModVal
= Inst
.getOperand(ModIdx
).getImm();
7820 Inst
.getOperand(ModIdx
).setImm(ModVal
| SISrcMods::DST_OP_SEL
);
7824 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst
&Inst
,
7825 const OperandVector
&Operands
) {
7826 cvtVOP3P(Inst
, Operands
);
7827 cvtVOP3DstOpSelOnly(Inst
);
7830 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
,
7831 OptionalImmIndexMap
&OptionalIdx
) {
7832 cvtVOP3P(Inst
, Operands
, OptionalIdx
);
7833 cvtVOP3DstOpSelOnly(Inst
);
7836 static bool isRegOrImmWithInputMods(const MCInstrDesc
&Desc
, unsigned OpNum
) {
7838 // 1. This operand is input modifiers
7839 Desc
.operands()[OpNum
].OperandType
== AMDGPU::OPERAND_INPUT_MODS
7840 // 2. This is not last operand
7841 && Desc
.NumOperands
> (OpNum
+ 1)
7842 // 3. Next operand is register class
7843 && Desc
.operands()[OpNum
+ 1].RegClass
!= -1
7844 // 4. Next register is not tied to any other operand
7845 && Desc
.getOperandConstraint(OpNum
+ 1,
7846 MCOI::OperandConstraint::TIED_TO
) == -1;
7849 void AMDGPUAsmParser::cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
)
7851 OptionalImmIndexMap OptionalIdx
;
7852 unsigned Opc
= Inst
.getOpcode();
7855 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7856 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7857 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
7860 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7861 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7862 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
7863 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
7864 } else if (Op
.isInterpSlot() || Op
.isInterpAttr() ||
7865 Op
.isInterpAttrChan()) {
7866 Inst
.addOperand(MCOperand::createImm(Op
.getImm()));
7867 } else if (Op
.isImmModifier()) {
7868 OptionalIdx
[Op
.getImmTy()] = I
;
7870 llvm_unreachable("unhandled operand type");
7874 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::high
))
7875 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7876 AMDGPUOperand::ImmTyHigh
);
7878 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::clamp
))
7879 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7880 AMDGPUOperand::ImmTyClampSI
);
7882 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::omod
))
7883 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7884 AMDGPUOperand::ImmTyOModSI
);
7887 void AMDGPUAsmParser::cvtVINTERP(MCInst
&Inst
, const OperandVector
&Operands
)
7889 OptionalImmIndexMap OptionalIdx
;
7890 unsigned Opc
= Inst
.getOpcode();
7893 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7894 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7895 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
7898 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7899 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7900 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
7901 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
7902 } else if (Op
.isImmModifier()) {
7903 OptionalIdx
[Op
.getImmTy()] = I
;
7905 llvm_unreachable("unhandled operand type");
7909 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
7911 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
7913 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOpSel
);
7915 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyWaitEXP
);
7920 const int Ops
[] = { AMDGPU::OpName::src0
,
7921 AMDGPU::OpName::src1
,
7922 AMDGPU::OpName::src2
};
7923 const int ModOps
[] = { AMDGPU::OpName::src0_modifiers
,
7924 AMDGPU::OpName::src1_modifiers
,
7925 AMDGPU::OpName::src2_modifiers
};
7927 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
7929 for (int J
= 0; J
< 3; ++J
) {
7930 int OpIdx
= AMDGPU::getNamedOperandIdx(Opc
, Ops
[J
]);
7934 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, ModOps
[J
]);
7935 uint32_t ModVal
= Inst
.getOperand(ModIdx
).getImm();
7937 if ((OpSel
& (1 << J
)) != 0)
7938 ModVal
|= SISrcMods::OP_SEL_0
;
7939 if (ModOps
[J
] == AMDGPU::OpName::src0_modifiers
&&
7940 (OpSel
& (1 << 3)) != 0)
7941 ModVal
|= SISrcMods::DST_OP_SEL
;
7943 Inst
.getOperand(ModIdx
).setImm(ModVal
);
7947 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
7948 OptionalImmIndexMap
&OptionalIdx
) {
7949 unsigned Opc
= Inst
.getOpcode();
7952 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7953 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7954 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
7957 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7958 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7959 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
7960 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
7961 } else if (Op
.isImmModifier()) {
7962 OptionalIdx
[Op
.getImmTy()] = I
;
7963 } else if (Op
.isRegOrImm()) {
7964 Op
.addRegOrImmOperands(Inst
, 1);
7966 llvm_unreachable("unhandled operand type");
7970 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::clamp
))
7971 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7972 AMDGPUOperand::ImmTyClampSI
);
7974 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::omod
))
7975 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7976 AMDGPUOperand::ImmTyOModSI
);
7978 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7979 // it has src2 register operand that is tied to dst operand
7980 // we don't allow modifiers for this operand in assembler so src2_modifiers
7983 auto it
= Inst
.begin();
7984 std::advance(it
, AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src2_modifiers
));
7985 it
= Inst
.insert(it
, MCOperand::createImm(0)); // no modifiers for src2
7987 // Copy the operand to ensure it's not invalidated when Inst grows.
7988 Inst
.insert(it
, MCOperand(Inst
.getOperand(0))); // src2 = dst
7992 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
) {
7993 OptionalImmIndexMap OptionalIdx
;
7994 cvtVOP3(Inst
, Operands
, OptionalIdx
);
7997 void AMDGPUAsmParser::cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
,
7998 OptionalImmIndexMap
&OptIdx
) {
7999 const int Opc
= Inst
.getOpcode();
8000 const MCInstrDesc
&Desc
= MII
.get(Opc
);
8002 const bool IsPacked
= (Desc
.TSFlags
& SIInstrFlags::IsPacked
) != 0;
8004 if (Opc
== AMDGPU::V_CVT_SR_BF8_F32_vi
||
8005 Opc
== AMDGPU::V_CVT_SR_FP8_F32_vi
) {
8006 Inst
.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8007 Inst
.addOperand(Inst
.getOperand(0));
8010 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::vdst_in
)) {
8012 Inst
.addOperand(Inst
.getOperand(0));
8015 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8016 // instruction, and then figure out where to actually put the modifiers
8018 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
8019 if (OpSelIdx
!= -1) {
8020 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSel
);
8023 int OpSelHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel_hi
);
8024 if (OpSelHiIdx
!= -1) {
8025 int DefaultVal
= IsPacked
? -1 : 0;
8026 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSelHi
,
8030 int NegLoIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_lo
);
8031 if (NegLoIdx
!= -1) {
8032 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegLo
);
8033 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegHi
);
8036 const int Ops
[] = { AMDGPU::OpName::src0
,
8037 AMDGPU::OpName::src1
,
8038 AMDGPU::OpName::src2
};
8039 const int ModOps
[] = { AMDGPU::OpName::src0_modifiers
,
8040 AMDGPU::OpName::src1_modifiers
,
8041 AMDGPU::OpName::src2_modifiers
};
8044 unsigned OpSelHi
= 0;
8049 OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
8051 if (OpSelHiIdx
!= -1)
8052 OpSelHi
= Inst
.getOperand(OpSelHiIdx
).getImm();
8054 if (NegLoIdx
!= -1) {
8055 int NegHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_hi
);
8056 NegLo
= Inst
.getOperand(NegLoIdx
).getImm();
8057 NegHi
= Inst
.getOperand(NegHiIdx
).getImm();
8060 for (int J
= 0; J
< 3; ++J
) {
8061 int OpIdx
= AMDGPU::getNamedOperandIdx(Opc
, Ops
[J
]);
8065 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, ModOps
[J
]);
8070 uint32_t ModVal
= 0;
8072 if ((OpSel
& (1 << J
)) != 0)
8073 ModVal
|= SISrcMods::OP_SEL_0
;
8075 if ((OpSelHi
& (1 << J
)) != 0)
8076 ModVal
|= SISrcMods::OP_SEL_1
;
8078 if ((NegLo
& (1 << J
)) != 0)
8079 ModVal
|= SISrcMods::NEG
;
8081 if ((NegHi
& (1 << J
)) != 0)
8082 ModVal
|= SISrcMods::NEG_HI
;
8084 Inst
.getOperand(ModIdx
).setImm(Inst
.getOperand(ModIdx
).getImm() | ModVal
);
8088 void AMDGPUAsmParser::cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
) {
8089 OptionalImmIndexMap OptIdx
;
8090 cvtVOP3(Inst
, Operands
, OptIdx
);
8091 cvtVOP3P(Inst
, Operands
, OptIdx
);
8094 //===----------------------------------------------------------------------===//
8096 //===----------------------------------------------------------------------===//
8098 ParseStatus
AMDGPUAsmParser::parseVOPD(OperandVector
&Operands
) {
8099 if (!hasVOPD(getSTI()))
8100 return ParseStatus::NoMatch
;
8102 if (isToken(AsmToken::Colon
) && peekToken(false).is(AsmToken::Colon
)) {
8106 Operands
.push_back(AMDGPUOperand::CreateToken(this, "::", S
));
8107 SMLoc OpYLoc
= getLoc();
8109 if (isToken(AsmToken::Identifier
) && !Parser
.parseIdentifier(OpYName
)) {
8110 Operands
.push_back(AMDGPUOperand::CreateToken(this, OpYName
, OpYLoc
));
8111 return ParseStatus::Success
;
8113 return Error(OpYLoc
, "expected a VOPDY instruction after ::");
8115 return ParseStatus::NoMatch
;
8118 // Create VOPD MCInst operands using parsed assembler operands.
8119 void AMDGPUAsmParser::cvtVOPD(MCInst
&Inst
, const OperandVector
&Operands
) {
8120 auto addOp
= [&](uint16_t ParsedOprIdx
) { // NOLINT:function pointer
8121 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[ParsedOprIdx
]);
8123 Op
.addRegOperands(Inst
, 1);
8127 Op
.addImmOperands(Inst
, 1);
8130 llvm_unreachable("Unhandled operand type in cvtVOPD");
8133 const auto &InstInfo
= getVOPDInstInfo(Inst
.getOpcode(), &MII
);
8135 // MCInst operands are ordered as follows:
8136 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8138 for (auto CompIdx
: VOPD::COMPONENTS
) {
8139 addOp(InstInfo
[CompIdx
].getIndexOfDstInParsedOperands());
8142 for (auto CompIdx
: VOPD::COMPONENTS
) {
8143 const auto &CInfo
= InstInfo
[CompIdx
];
8144 auto CompSrcOperandsNum
= InstInfo
[CompIdx
].getCompParsedSrcOperandsNum();
8145 for (unsigned CompSrcIdx
= 0; CompSrcIdx
< CompSrcOperandsNum
; ++CompSrcIdx
)
8146 addOp(CInfo
.getIndexOfSrcInParsedOperands(CompSrcIdx
));
8147 if (CInfo
.hasSrc2Acc())
8148 addOp(CInfo
.getIndexOfDstInParsedOperands());
8152 //===----------------------------------------------------------------------===//
8154 //===----------------------------------------------------------------------===//
8156 bool AMDGPUOperand::isDPP8() const {
8157 return isImmTy(ImmTyDPP8
);
8160 bool AMDGPUOperand::isDPPCtrl() const {
8161 using namespace AMDGPU::DPP
;
8163 bool result
= isImm() && getImmTy() == ImmTyDppCtrl
&& isUInt
<9>(getImm());
8165 int64_t Imm
= getImm();
8166 return (Imm
>= DppCtrl::QUAD_PERM_FIRST
&& Imm
<= DppCtrl::QUAD_PERM_LAST
) ||
8167 (Imm
>= DppCtrl::ROW_SHL_FIRST
&& Imm
<= DppCtrl::ROW_SHL_LAST
) ||
8168 (Imm
>= DppCtrl::ROW_SHR_FIRST
&& Imm
<= DppCtrl::ROW_SHR_LAST
) ||
8169 (Imm
>= DppCtrl::ROW_ROR_FIRST
&& Imm
<= DppCtrl::ROW_ROR_LAST
) ||
8170 (Imm
== DppCtrl::WAVE_SHL1
) ||
8171 (Imm
== DppCtrl::WAVE_ROL1
) ||
8172 (Imm
== DppCtrl::WAVE_SHR1
) ||
8173 (Imm
== DppCtrl::WAVE_ROR1
) ||
8174 (Imm
== DppCtrl::ROW_MIRROR
) ||
8175 (Imm
== DppCtrl::ROW_HALF_MIRROR
) ||
8176 (Imm
== DppCtrl::BCAST15
) ||
8177 (Imm
== DppCtrl::BCAST31
) ||
8178 (Imm
>= DppCtrl::ROW_SHARE_FIRST
&& Imm
<= DppCtrl::ROW_SHARE_LAST
) ||
8179 (Imm
>= DppCtrl::ROW_XMASK_FIRST
&& Imm
<= DppCtrl::ROW_XMASK_LAST
);
8184 //===----------------------------------------------------------------------===//
8186 //===----------------------------------------------------------------------===//
8188 bool AMDGPUOperand::isBLGP() const {
8189 return isImm() && getImmTy() == ImmTyBLGP
&& isUInt
<3>(getImm());
8192 bool AMDGPUOperand::isCBSZ() const {
8193 return isImm() && getImmTy() == ImmTyCBSZ
&& isUInt
<3>(getImm());
8196 bool AMDGPUOperand::isABID() const {
8197 return isImm() && getImmTy() == ImmTyABID
&& isUInt
<4>(getImm());
8200 bool AMDGPUOperand::isS16Imm() const {
8201 return isImmLiteral() && (isInt
<16>(getImm()) || isUInt
<16>(getImm()));
8204 bool AMDGPUOperand::isU16Imm() const {
8205 return isImmLiteral() && isUInt
<16>(getImm());
8208 //===----------------------------------------------------------------------===//
8210 //===----------------------------------------------------------------------===//
8212 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding
) {
8213 // We want to allow "dim:1D" etc.,
8214 // but the initial 1 is tokenized as an integer.
8216 if (isToken(AsmToken::Integer
)) {
8217 SMLoc Loc
= getToken().getEndLoc();
8218 Token
= std::string(getTokenStr());
8220 if (getLoc() != Loc
)
8225 if (!parseId(Suffix
))
8229 StringRef DimId
= Token
;
8230 if (DimId
.startswith("SQ_RSRC_IMG_"))
8231 DimId
= DimId
.drop_front(12);
8233 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByAsmSuffix(DimId
);
8237 Encoding
= DimInfo
->Encoding
;
8241 ParseStatus
AMDGPUAsmParser::parseDim(OperandVector
&Operands
) {
8243 return ParseStatus::NoMatch
;
8247 if (!trySkipId("dim", AsmToken::Colon
))
8248 return ParseStatus::NoMatch
;
8251 SMLoc Loc
= getLoc();
8252 if (!parseDimId(Encoding
))
8253 return Error(Loc
, "invalid dim value");
8255 Operands
.push_back(AMDGPUOperand::CreateImm(this, Encoding
, S
,
8256 AMDGPUOperand::ImmTyDim
));
8257 return ParseStatus::Success
;
8260 //===----------------------------------------------------------------------===//
8262 //===----------------------------------------------------------------------===//
8264 ParseStatus
AMDGPUAsmParser::parseDPP8(OperandVector
&Operands
) {
8267 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon
))
8268 return ParseStatus::NoMatch
;
8270 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8274 if (!skipToken(AsmToken::LBrac
, "expected an opening square bracket"))
8275 return ParseStatus::Failure
;
8277 for (size_t i
= 0; i
< 8; ++i
) {
8278 if (i
> 0 && !skipToken(AsmToken::Comma
, "expected a comma"))
8279 return ParseStatus::Failure
;
8281 SMLoc Loc
= getLoc();
8282 if (getParser().parseAbsoluteExpression(Sels
[i
]))
8283 return ParseStatus::Failure
;
8284 if (0 > Sels
[i
] || 7 < Sels
[i
])
8285 return Error(Loc
, "expected a 3-bit value");
8288 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
8289 return ParseStatus::Failure
;
8292 for (size_t i
= 0; i
< 8; ++i
)
8293 DPP8
|= (Sels
[i
] << (i
* 3));
8295 Operands
.push_back(AMDGPUOperand::CreateImm(this, DPP8
, S
, AMDGPUOperand::ImmTyDPP8
));
8296 return ParseStatus::Success
;
8300 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl
,
8301 const OperandVector
&Operands
) {
8302 if (Ctrl
== "row_newbcast")
8305 if (Ctrl
== "row_share" ||
8306 Ctrl
== "row_xmask")
8307 return isGFX10Plus();
8309 if (Ctrl
== "wave_shl" ||
8310 Ctrl
== "wave_shr" ||
8311 Ctrl
== "wave_rol" ||
8312 Ctrl
== "wave_ror" ||
8313 Ctrl
== "row_bcast")
8314 return isVI() || isGFX9();
8316 return Ctrl
== "row_mirror" ||
8317 Ctrl
== "row_half_mirror" ||
8318 Ctrl
== "quad_perm" ||
8319 Ctrl
== "row_shl" ||
8320 Ctrl
== "row_shr" ||
8325 AMDGPUAsmParser::parseDPPCtrlPerm() {
8326 // quad_perm:[%d,%d,%d,%d]
8328 if (!skipToken(AsmToken::LBrac
, "expected an opening square bracket"))
8332 for (int i
= 0; i
< 4; ++i
) {
8333 if (i
> 0 && !skipToken(AsmToken::Comma
, "expected a comma"))
8337 SMLoc Loc
= getLoc();
8338 if (getParser().parseAbsoluteExpression(Temp
))
8340 if (Temp
< 0 || Temp
> 3) {
8341 Error(Loc
, "expected a 2-bit value");
8345 Val
+= (Temp
<< i
* 2);
8348 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
8355 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl
) {
8356 using namespace AMDGPU::DPP
;
8361 SMLoc Loc
= getLoc();
8363 if (getParser().parseAbsoluteExpression(Val
))
8366 struct DppCtrlCheck
{
8372 DppCtrlCheck Check
= StringSwitch
<DppCtrlCheck
>(Ctrl
)
8373 .Case("wave_shl", {DppCtrl::WAVE_SHL1
, 1, 1})
8374 .Case("wave_rol", {DppCtrl::WAVE_ROL1
, 1, 1})
8375 .Case("wave_shr", {DppCtrl::WAVE_SHR1
, 1, 1})
8376 .Case("wave_ror", {DppCtrl::WAVE_ROR1
, 1, 1})
8377 .Case("row_shl", {DppCtrl::ROW_SHL0
, 1, 15})
8378 .Case("row_shr", {DppCtrl::ROW_SHR0
, 1, 15})
8379 .Case("row_ror", {DppCtrl::ROW_ROR0
, 1, 15})
8380 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST
, 0, 15})
8381 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST
, 0, 15})
8382 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST
, 0, 15})
8383 .Default({-1, 0, 0});
8386 if (Check
.Ctrl
== -1) {
8387 Valid
= (Ctrl
== "row_bcast" && (Val
== 15 || Val
== 31));
8388 Val
= (Val
== 15)? DppCtrl::BCAST15
: DppCtrl::BCAST31
;
8390 Valid
= Check
.Lo
<= Val
&& Val
<= Check
.Hi
;
8391 Val
= (Check
.Lo
== Check
.Hi
) ? Check
.Ctrl
: (Check
.Ctrl
| Val
);
8395 Error(Loc
, Twine("invalid ", Ctrl
) + Twine(" value"));
8402 ParseStatus
AMDGPUAsmParser::parseDPPCtrl(OperandVector
&Operands
) {
8403 using namespace AMDGPU::DPP
;
8405 if (!isToken(AsmToken::Identifier
) ||
8406 !isSupportedDPPCtrl(getTokenStr(), Operands
))
8407 return ParseStatus::NoMatch
;
8415 if (Ctrl
== "row_mirror") {
8416 Val
= DppCtrl::ROW_MIRROR
;
8417 } else if (Ctrl
== "row_half_mirror") {
8418 Val
= DppCtrl::ROW_HALF_MIRROR
;
8420 if (skipToken(AsmToken::Colon
, "expected a colon")) {
8421 if (Ctrl
== "quad_perm") {
8422 Val
= parseDPPCtrlPerm();
8424 Val
= parseDPPCtrlSel(Ctrl
);
8430 return ParseStatus::Failure
;
8433 AMDGPUOperand::CreateImm(this, Val
, S
, AMDGPUOperand::ImmTyDppCtrl
));
8434 return ParseStatus::Success
;
8437 void AMDGPUAsmParser::cvtVOP3DPP(MCInst
&Inst
, const OperandVector
&Operands
,
8439 OptionalImmIndexMap OptionalIdx
;
8440 unsigned Opc
= Inst
.getOpcode();
8441 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
8443 // MAC instructions are special because they have 'old'
8444 // operand which is not tied to dst (but assumed to be).
8445 // They also have dummy unused src2_modifiers.
8446 int OldIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::old
);
8448 AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src2_modifiers
);
8449 bool IsMAC
= OldIdx
!= -1 && Src2ModIdx
!= -1 &&
8450 Desc
.getOperandConstraint(OldIdx
, MCOI::TIED_TO
) == -1;
8453 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
8454 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
8458 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
8461 int NumOperands
= Inst
.getNumOperands();
8462 if (OldIdx
== NumOperands
) {
8463 // Handle old operand
8464 constexpr int DST_IDX
= 0;
8465 Inst
.addOperand(Inst
.getOperand(DST_IDX
));
8466 } else if (Src2ModIdx
== NumOperands
) {
8467 // Add unused dummy src2_modifiers
8468 Inst
.addOperand(MCOperand::createImm(0));
8472 auto TiedTo
= Desc
.getOperandConstraint(Inst
.getNumOperands(),
8475 assert((unsigned)TiedTo
< Inst
.getNumOperands());
8476 // handle tied old or src2 for MAC instructions
8477 Inst
.addOperand(Inst
.getOperand(TiedTo
));
8479 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
8480 // Add the register arguments
8481 if (IsDPP8
&& Op
.isDppFI()) {
8483 } else if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8484 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
8485 } else if (Op
.isReg()) {
8486 Op
.addRegOperands(Inst
, 1);
8487 } else if (Op
.isImm() &&
8488 Desc
.operands()[Inst
.getNumOperands()].RegClass
!= -1) {
8489 assert(!Op
.IsImmKindLiteral() && "Cannot use literal with DPP");
8490 Op
.addImmOperands(Inst
, 1);
8491 } else if (Op
.isImm()) {
8492 OptionalIdx
[Op
.getImmTy()] = I
;
8494 llvm_unreachable("unhandled operand type");
8497 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::clamp
))
8498 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
8500 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::omod
))
8501 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
8503 if (Desc
.TSFlags
& SIInstrFlags::VOP3P
)
8504 cvtVOP3P(Inst
, Operands
, OptionalIdx
);
8505 else if (Desc
.TSFlags
& SIInstrFlags::VOP3
)
8506 cvtVOP3OpSel(Inst
, Operands
, OptionalIdx
);
8507 else if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::op_sel
)) {
8508 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOpSel
);
8512 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDPP8
);
8513 using namespace llvm::AMDGPU::DPP
;
8514 Inst
.addOperand(MCOperand::createImm(Fi
? DPP8_FI_1
: DPP8_FI_0
));
8516 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppCtrl
, 0xe4);
8517 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppRowMask
, 0xf);
8518 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBankMask
, 0xf);
8519 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBoundCtrl
);
8521 if (AMDGPU::hasNamedOperand(Inst
.getOpcode(), AMDGPU::OpName::fi
))
8522 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
8523 AMDGPUOperand::ImmTyDppFI
);
8527 void AMDGPUAsmParser::cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
) {
8528 OptionalImmIndexMap OptionalIdx
;
8531 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
8532 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
8533 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
8537 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
8538 auto TiedTo
= Desc
.getOperandConstraint(Inst
.getNumOperands(),
8541 assert((unsigned)TiedTo
< Inst
.getNumOperands());
8542 // handle tied old or src2 for MAC instructions
8543 Inst
.addOperand(Inst
.getOperand(TiedTo
));
8545 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
8546 // Add the register arguments
8547 if (Op
.isReg() && validateVccOperand(Op
.getReg())) {
8548 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8555 Op
.addImmOperands(Inst
, 1);
8556 } else if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8557 Op
.addRegWithFPInputModsOperands(Inst
, 2);
8558 } else if (Op
.isDppFI()) {
8560 } else if (Op
.isReg()) {
8561 Op
.addRegOperands(Inst
, 1);
8563 llvm_unreachable("Invalid operand type");
8566 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8567 Op
.addRegWithFPInputModsOperands(Inst
, 2);
8568 } else if (Op
.isReg()) {
8569 Op
.addRegOperands(Inst
, 1);
8570 } else if (Op
.isDPPCtrl()) {
8571 Op
.addImmOperands(Inst
, 1);
8572 } else if (Op
.isImm()) {
8573 // Handle optional arguments
8574 OptionalIdx
[Op
.getImmTy()] = I
;
8576 llvm_unreachable("Invalid operand type");
8582 using namespace llvm::AMDGPU::DPP
;
8583 Inst
.addOperand(MCOperand::createImm(Fi
? DPP8_FI_1
: DPP8_FI_0
));
8585 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppRowMask
, 0xf);
8586 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBankMask
, 0xf);
8587 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBoundCtrl
);
8588 if (AMDGPU::hasNamedOperand(Inst
.getOpcode(), AMDGPU::OpName::fi
)) {
8589 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
8590 AMDGPUOperand::ImmTyDppFI
);
8595 //===----------------------------------------------------------------------===//
8597 //===----------------------------------------------------------------------===//
8599 ParseStatus
AMDGPUAsmParser::parseSDWASel(OperandVector
&Operands
,
8601 AMDGPUOperand::ImmTy Type
) {
8602 using namespace llvm::AMDGPU::SDWA
;
8608 ParseStatus Res
= parseStringWithPrefix(Prefix
, Value
, StringLoc
);
8609 if (!Res
.isSuccess())
8613 Int
= StringSwitch
<int64_t>(Value
)
8614 .Case("BYTE_0", SdwaSel::BYTE_0
)
8615 .Case("BYTE_1", SdwaSel::BYTE_1
)
8616 .Case("BYTE_2", SdwaSel::BYTE_2
)
8617 .Case("BYTE_3", SdwaSel::BYTE_3
)
8618 .Case("WORD_0", SdwaSel::WORD_0
)
8619 .Case("WORD_1", SdwaSel::WORD_1
)
8620 .Case("DWORD", SdwaSel::DWORD
)
8621 .Default(0xffffffff);
8623 if (Int
== 0xffffffff)
8624 return Error(StringLoc
, "invalid " + Twine(Prefix
) + " value");
8626 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, Type
));
8627 return ParseStatus::Success
;
8630 ParseStatus
AMDGPUAsmParser::parseSDWADstUnused(OperandVector
&Operands
) {
8631 using namespace llvm::AMDGPU::SDWA
;
8637 ParseStatus Res
= parseStringWithPrefix("dst_unused", Value
, StringLoc
);
8638 if (!Res
.isSuccess())
8642 Int
= StringSwitch
<int64_t>(Value
)
8643 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD
)
8644 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT
)
8645 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE
)
8646 .Default(0xffffffff);
8648 if (Int
== 0xffffffff)
8649 return Error(StringLoc
, "invalid dst_unused value");
8651 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, AMDGPUOperand::ImmTySDWADstUnused
));
8652 return ParseStatus::Success
;
8655 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
) {
8656 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP1
);
8659 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
) {
8660 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
);
8663 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
) {
8664 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, true, true);
8667 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst
&Inst
, const OperandVector
&Operands
) {
8668 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, false, true);
8671 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
) {
8672 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOPC
, isVI());
8675 void AMDGPUAsmParser::cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
8676 uint64_t BasicInstType
,
8679 using namespace llvm::AMDGPU::SDWA
;
8681 OptionalImmIndexMap OptionalIdx
;
8682 bool SkipVcc
= SkipDstVcc
|| SkipSrcVcc
;
8683 bool SkippedVcc
= false;
8686 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
8687 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
8688 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
8691 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
8692 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
8693 if (SkipVcc
&& !SkippedVcc
&& Op
.isReg() &&
8694 (Op
.getReg() == AMDGPU::VCC
|| Op
.getReg() == AMDGPU::VCC_LO
)) {
8695 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8696 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8697 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8698 // Skip VCC only if we didn't skip it on previous iteration.
8699 // Note that src0 and src1 occupy 2 slots each because of modifiers.
8700 if (BasicInstType
== SIInstrFlags::VOP2
&&
8701 ((SkipDstVcc
&& Inst
.getNumOperands() == 1) ||
8702 (SkipSrcVcc
&& Inst
.getNumOperands() == 5))) {
8705 } else if (BasicInstType
== SIInstrFlags::VOPC
&&
8706 Inst
.getNumOperands() == 0) {
8711 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8712 Op
.addRegOrImmWithInputModsOperands(Inst
, 2);
8713 } else if (Op
.isImm()) {
8714 // Handle optional arguments
8715 OptionalIdx
[Op
.getImmTy()] = I
;
8717 llvm_unreachable("Invalid operand type");
8722 const unsigned Opc
= Inst
.getOpcode();
8723 if (Opc
!= AMDGPU::V_NOP_sdwa_gfx10
&& Opc
!= AMDGPU::V_NOP_sdwa_gfx9
&&
8724 Opc
!= AMDGPU::V_NOP_sdwa_vi
) {
8725 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8726 switch (BasicInstType
) {
8727 case SIInstrFlags::VOP1
:
8728 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::clamp
))
8729 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
8730 AMDGPUOperand::ImmTyClampSI
, 0);
8732 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::omod
))
8733 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
8734 AMDGPUOperand::ImmTyOModSI
, 0);
8736 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::dst_sel
))
8737 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
8738 AMDGPUOperand::ImmTySDWADstSel
, SdwaSel::DWORD
);
8740 if (AMDGPU::hasNamedOperand(Opc
, AMDGPU::OpName::dst_unused
))
8741 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
8742 AMDGPUOperand::ImmTySDWADstUnused
,
8743 DstUnused::UNUSED_PRESERVE
);
8745 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWASrc0Sel
, SdwaSel::DWORD
);
8748 case SIInstrFlags::VOP2
:
8749 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
8751 if (AMDGPU::hasNamedOperand(Inst
.getOpcode(), AMDGPU::OpName::omod
))
8752 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
8754 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWADstSel
, SdwaSel::DWORD
);
8755 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWADstUnused
, DstUnused::UNUSED_PRESERVE
);
8756 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWASrc0Sel
, SdwaSel::DWORD
);
8757 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWASrc1Sel
, SdwaSel::DWORD
);
8760 case SIInstrFlags::VOPC
:
8761 if (AMDGPU::hasNamedOperand(Inst
.getOpcode(), AMDGPU::OpName::clamp
))
8762 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
8763 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWASrc0Sel
, SdwaSel::DWORD
);
8764 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySDWASrc1Sel
, SdwaSel::DWORD
);
8768 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8772 // special case v_mac_{f16, f32}:
8773 // it has src2 register operand that is tied to dst operand
8774 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
8775 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
8776 auto it
= Inst
.begin();
8778 it
, AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::src2
));
8779 Inst
.insert(it
, Inst
.getOperand(0)); // src2 = dst
8783 /// Force static initialization.
8784 extern "C" LLVM_EXTERNAL_VISIBILITY
void LLVMInitializeAMDGPUAsmParser() {
8785 RegisterMCAsmParser
<AMDGPUAsmParser
> A(getTheR600Target());
8786 RegisterMCAsmParser
<AMDGPUAsmParser
> B(getTheGCNTarget());
8789 #define GET_REGISTER_MATCHER
8790 #define GET_MATCHER_IMPLEMENTATION
8791 #define GET_MNEMONIC_SPELL_CHECKER
8792 #define GET_MNEMONIC_CHECKER
8793 #include "AMDGPUGenAsmMatcher.inc"
8795 ParseStatus
AMDGPUAsmParser::parseCustomOperand(OperandVector
&Operands
,
8799 return parseTokenOp("addr64", Operands
);
8801 return parseTokenOp("done", Operands
);
8803 return parseTokenOp("idxen", Operands
);
8805 return parseTokenOp("lds", Operands
);
8807 return parseTokenOp("offen", Operands
);
8809 return parseTokenOp("off", Operands
);
8811 return parseTokenOp("row_en", Operands
);
8813 return parseNamedBit("gds", Operands
, AMDGPUOperand::ImmTyGDS
);
8815 return parseNamedBit("tfe", Operands
, AMDGPUOperand::ImmTyTFE
);
8817 return tryCustomParseOperand(Operands
, MCK
);
8820 // This function should be defined after auto-generated include so that we have
8821 // MatchClassKind enum defined
8822 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand
&Op
,
8824 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8825 // But MatchInstructionImpl() expects to meet token and fails to validate
8826 // operand. This method checks if we are given immediate operand but expect to
8827 // get corresponding token.
8828 AMDGPUOperand
&Operand
= (AMDGPUOperand
&)Op
;
8831 return Operand
.isAddr64() ? Match_Success
: Match_InvalidOperand
;
8833 return Operand
.isGDS() ? Match_Success
: Match_InvalidOperand
;
8835 return Operand
.isLDS() ? Match_Success
: Match_InvalidOperand
;
8837 return Operand
.isIdxen() ? Match_Success
: Match_InvalidOperand
;
8839 return Operand
.isOffen() ? Match_Success
: Match_InvalidOperand
;
8841 return Operand
.isTFE() ? Match_Success
: Match_InvalidOperand
;
8843 // When operands have expression values, they will return true for isToken,
8844 // because it is not possible to distinguish between a token and an
8845 // expression at parse time. MatchInstructionImpl() will always try to
8846 // match an operand as a token, when isToken returns true, and when the
8847 // name of the expression is not a valid token, the match will fail,
8848 // so we need to handle it here.
8849 return Operand
.isSSrcB32() ? Match_Success
: Match_InvalidOperand
;
8851 return Operand
.isSSrcF32() ? Match_Success
: Match_InvalidOperand
;
8852 case MCK_SOPPBrTarget
:
8853 return Operand
.isSOPPBrTarget() ? Match_Success
: Match_InvalidOperand
;
8854 case MCK_VReg32OrOff
:
8855 return Operand
.isVReg32OrOff() ? Match_Success
: Match_InvalidOperand
;
8856 case MCK_InterpSlot
:
8857 return Operand
.isInterpSlot() ? Match_Success
: Match_InvalidOperand
;
8858 case MCK_InterpAttr
:
8859 return Operand
.isInterpAttr() ? Match_Success
: Match_InvalidOperand
;
8860 case MCK_InterpAttrChan
:
8861 return Operand
.isInterpAttrChan() ? Match_Success
: Match_InvalidOperand
;
8863 case MCK_SReg_64_XEXEC
:
8864 // Null is defined as a 32-bit register but
8865 // it should also be enabled with 64-bit operands.
8866 // The following code enables it for SReg_64 operands
8867 // used as source and destination. Remaining source
8868 // operands are handled in isInlinableImm.
8869 return Operand
.isNull() ? Match_Success
: Match_InvalidOperand
;
8871 return Match_InvalidOperand
;
8875 //===----------------------------------------------------------------------===//
8877 //===----------------------------------------------------------------------===//
8879 ParseStatus
AMDGPUAsmParser::parseEndpgm(OperandVector
&Operands
) {
8883 if (!parseExpr(Imm
)) {
8884 // The operand is optional, if not present default to 0
8888 if (!isUInt
<16>(Imm
))
8889 return Error(S
, "expected a 16-bit value");
8892 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyEndpgm
));
8893 return ParseStatus::Success
;
8896 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm
); }
8898 //===----------------------------------------------------------------------===//
8900 //===----------------------------------------------------------------------===//
8902 bool AMDGPUOperand::isWaitVDST() const {
8903 return isImmTy(ImmTyWaitVDST
) && isUInt
<4>(getImm());
8906 //===----------------------------------------------------------------------===//
8908 //===----------------------------------------------------------------------===//
8910 bool AMDGPUOperand::isWaitEXP() const {
8911 return isImmTy(ImmTyWaitEXP
) && isUInt
<3>(getImm());