1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
65 using namespace llvm::AMDGPU
;
66 using namespace llvm::amdhsa
;
70 class AMDGPUAsmParser
;
72 enum RegisterKind
{ IS_UNKNOWN
, IS_VGPR
, IS_SGPR
, IS_AGPR
, IS_TTMP
, IS_SPECIAL
};
74 //===----------------------------------------------------------------------===//
76 //===----------------------------------------------------------------------===//
78 class AMDGPUOperand
: public MCParsedAsmOperand
{
86 SMLoc StartLoc
, EndLoc
;
87 const AMDGPUAsmParser
*AsmParser
;
90 AMDGPUOperand(KindTy Kind_
, const AMDGPUAsmParser
*AsmParser_
)
91 : MCParsedAsmOperand(), Kind(Kind_
), AsmParser(AsmParser_
) {}
93 using Ptr
= std::unique_ptr
<AMDGPUOperand
>;
100 bool hasFPModifiers() const { return Abs
|| Neg
; }
101 bool hasIntModifiers() const { return Sext
; }
102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
104 int64_t getFPModifiersOperand() const {
106 Operand
|= Abs
? SISrcMods::ABS
: 0u;
107 Operand
|= Neg
? SISrcMods::NEG
: 0u;
111 int64_t getIntModifiersOperand() const {
113 Operand
|= Sext
? SISrcMods::SEXT
: 0u;
117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
129 friend raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
);
216 bool isToken() const override
{
220 // When parsing operands, we can't always tell if something was meant to be
221 // a token, like 'gds', or an expression that references a global variable.
222 // In this case, we assume the string is an expression, and if we need to
223 // interpret is a token, then we treat the symbol name as the token.
224 return isSymbolRefExpr();
227 bool isSymbolRefExpr() const {
228 return isExpr() && Expr
&& isa
<MCSymbolRefExpr
>(Expr
);
231 bool isImm() const override
{
232 return Kind
== Immediate
;
235 bool isInlinableImm(MVT type
) const;
236 bool isLiteralImm(MVT type
) const;
238 bool isRegKind() const {
239 return Kind
== Register
;
242 bool isReg() const override
{
243 return isRegKind() && !hasModifiers();
246 bool isRegOrImmWithInputMods(unsigned RCID
, MVT type
) const {
247 return isRegClass(RCID
) || isInlinableImm(type
) || isLiteralImm(type
);
250 bool isRegOrImmWithInt16InputMods() const {
251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
254 bool isRegOrImmWithInt32InputMods() const {
255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
258 bool isRegOrImmWithInt64InputMods() const {
259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
262 bool isRegOrImmWithFP16InputMods() const {
263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
266 bool isRegOrImmWithFP32InputMods() const {
267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
270 bool isRegOrImmWithFP64InputMods() const {
271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
274 bool isVReg() const {
275 return isRegClass(AMDGPU::VGPR_32RegClassID
) ||
276 isRegClass(AMDGPU::VReg_64RegClassID
) ||
277 isRegClass(AMDGPU::VReg_96RegClassID
) ||
278 isRegClass(AMDGPU::VReg_128RegClassID
) ||
279 isRegClass(AMDGPU::VReg_160RegClassID
) ||
280 isRegClass(AMDGPU::VReg_256RegClassID
) ||
281 isRegClass(AMDGPU::VReg_512RegClassID
) ||
282 isRegClass(AMDGPU::VReg_1024RegClassID
);
285 bool isVReg32() const {
286 return isRegClass(AMDGPU::VGPR_32RegClassID
);
289 bool isVReg32OrOff() const {
290 return isOff() || isVReg32();
293 bool isNull() const {
294 return isRegKind() && getReg() == AMDGPU::SGPR_NULL
;
297 bool isSDWAOperand(MVT type
) const;
298 bool isSDWAFP16Operand() const;
299 bool isSDWAFP32Operand() const;
300 bool isSDWAInt16Operand() const;
301 bool isSDWAInt32Operand() const;
303 bool isImmTy(ImmTy ImmT
) const {
304 return isImm() && Imm
.Type
== ImmT
;
307 bool isImmModifier() const {
308 return isImm() && Imm
.Type
!= ImmTyNone
;
311 bool isClampSI() const { return isImmTy(ImmTyClampSI
); }
312 bool isOModSI() const { return isImmTy(ImmTyOModSI
); }
313 bool isDMask() const { return isImmTy(ImmTyDMask
); }
314 bool isDim() const { return isImmTy(ImmTyDim
); }
315 bool isUNorm() const { return isImmTy(ImmTyUNorm
); }
316 bool isDA() const { return isImmTy(ImmTyDA
); }
317 bool isR128A16() const { return isImmTy(ImmTyR128A16
); }
318 bool isLWE() const { return isImmTy(ImmTyLWE
); }
319 bool isOff() const { return isImmTy(ImmTyOff
); }
320 bool isExpTgt() const { return isImmTy(ImmTyExpTgt
); }
321 bool isExpVM() const { return isImmTy(ImmTyExpVM
); }
322 bool isExpCompr() const { return isImmTy(ImmTyExpCompr
); }
323 bool isOffen() const { return isImmTy(ImmTyOffen
); }
324 bool isIdxen() const { return isImmTy(ImmTyIdxen
); }
325 bool isAddr64() const { return isImmTy(ImmTyAddr64
); }
326 bool isOffset() const { return isImmTy(ImmTyOffset
) && isUInt
<16>(getImm()); }
327 bool isOffset0() const { return isImmTy(ImmTyOffset0
) && isUInt
<8>(getImm()); }
328 bool isOffset1() const { return isImmTy(ImmTyOffset1
) && isUInt
<8>(getImm()); }
330 bool isFlatOffset() const { return isImmTy(ImmTyOffset
) || isImmTy(ImmTyInstOffset
); }
331 bool isGDS() const { return isImmTy(ImmTyGDS
); }
332 bool isLDS() const { return isImmTy(ImmTyLDS
); }
333 bool isDLC() const { return isImmTy(ImmTyDLC
); }
334 bool isGLC() const { return isImmTy(ImmTyGLC
); }
335 bool isSLC() const { return isImmTy(ImmTySLC
); }
336 bool isSWZ() const { return isImmTy(ImmTySWZ
); }
337 bool isTFE() const { return isImmTy(ImmTyTFE
); }
338 bool isD16() const { return isImmTy(ImmTyD16
); }
339 bool isFORMAT() const { return isImmTy(ImmTyFORMAT
) && isUInt
<8>(getImm()); }
340 bool isBankMask() const { return isImmTy(ImmTyDppBankMask
); }
341 bool isRowMask() const { return isImmTy(ImmTyDppRowMask
); }
342 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl
); }
343 bool isFI() const { return isImmTy(ImmTyDppFi
); }
344 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel
); }
345 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel
); }
346 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel
); }
347 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused
); }
348 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot
); }
349 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr
); }
350 bool isAttrChan() const { return isImmTy(ImmTyAttrChan
); }
351 bool isOpSel() const { return isImmTy(ImmTyOpSel
); }
352 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi
); }
353 bool isNegLo() const { return isImmTy(ImmTyNegLo
); }
354 bool isNegHi() const { return isImmTy(ImmTyNegHi
); }
355 bool isHigh() const { return isImmTy(ImmTyHigh
); }
358 return isClampSI() || isOModSI();
361 bool isRegOrImm() const {
362 return isReg() || isImm();
365 bool isRegClass(unsigned RCID
) const;
367 bool isInlineValue() const;
369 bool isRegOrInlineNoMods(unsigned RCID
, MVT type
) const {
370 return (isRegClass(RCID
) || isInlinableImm(type
)) && !hasModifiers();
373 bool isSCSrcB16() const {
374 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i16
);
377 bool isSCSrcV2B16() const {
381 bool isSCSrcB32() const {
382 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i32
);
385 bool isSCSrcB64() const {
386 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::i64
);
389 bool isBoolReg() const;
391 bool isSCSrcF16() const {
392 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f16
);
395 bool isSCSrcV2F16() const {
399 bool isSCSrcF32() const {
400 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f32
);
403 bool isSCSrcF64() const {
404 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::f64
);
407 bool isSSrcB32() const {
408 return isSCSrcB32() || isLiteralImm(MVT::i32
) || isExpr();
411 bool isSSrcB16() const {
412 return isSCSrcB16() || isLiteralImm(MVT::i16
);
415 bool isSSrcV2B16() const {
416 llvm_unreachable("cannot happen");
420 bool isSSrcB64() const {
421 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
423 return isSCSrcB64() || isLiteralImm(MVT::i64
);
426 bool isSSrcF32() const {
427 return isSCSrcB32() || isLiteralImm(MVT::f32
) || isExpr();
430 bool isSSrcF64() const {
431 return isSCSrcB64() || isLiteralImm(MVT::f64
);
434 bool isSSrcF16() const {
435 return isSCSrcB16() || isLiteralImm(MVT::f16
);
438 bool isSSrcV2F16() const {
439 llvm_unreachable("cannot happen");
443 bool isSSrcOrLdsB32() const {
444 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID
, MVT::i32
) ||
445 isLiteralImm(MVT::i32
) || isExpr();
448 bool isVCSrcB32() const {
449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
452 bool isVCSrcB64() const {
453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
456 bool isVCSrcB16() const {
457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
460 bool isVCSrcV2B16() const {
464 bool isVCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
468 bool isVCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
472 bool isVCSrcF16() const {
473 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
476 bool isVCSrcV2F16() const {
480 bool isVSrcB32() const {
481 return isVCSrcF32() || isLiteralImm(MVT::i32
) || isExpr();
484 bool isVSrcB64() const {
485 return isVCSrcF64() || isLiteralImm(MVT::i64
);
488 bool isVSrcB16() const {
489 return isVCSrcF16() || isLiteralImm(MVT::i16
);
492 bool isVSrcV2B16() const {
493 return isVSrcB16() || isLiteralImm(MVT::v2i16
);
496 bool isVSrcF32() const {
497 return isVCSrcF32() || isLiteralImm(MVT::f32
) || isExpr();
500 bool isVSrcF64() const {
501 return isVCSrcF64() || isLiteralImm(MVT::f64
);
504 bool isVSrcF16() const {
505 return isVCSrcF16() || isLiteralImm(MVT::f16
);
508 bool isVSrcV2F16() const {
509 return isVSrcF16() || isLiteralImm(MVT::v2f16
);
512 bool isVISrcB32() const {
513 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i32
);
516 bool isVISrcB16() const {
517 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i16
);
520 bool isVISrcV2B16() const {
524 bool isVISrcF32() const {
525 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f32
);
528 bool isVISrcF16() const {
529 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f16
);
532 bool isVISrcV2F16() const {
533 return isVISrcF16() || isVISrcB32();
536 bool isAISrcB32() const {
537 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i32
);
540 bool isAISrcB16() const {
541 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i16
);
544 bool isAISrcV2B16() const {
548 bool isAISrcF32() const {
549 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f32
);
552 bool isAISrcF16() const {
553 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f16
);
556 bool isAISrcV2F16() const {
557 return isAISrcF16() || isAISrcB32();
560 bool isAISrc_128B32() const {
561 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i32
);
564 bool isAISrc_128B16() const {
565 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i16
);
568 bool isAISrc_128V2B16() const {
569 return isAISrc_128B16();
572 bool isAISrc_128F32() const {
573 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f32
);
576 bool isAISrc_128F16() const {
577 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f16
);
580 bool isAISrc_128V2F16() const {
581 return isAISrc_128F16() || isAISrc_128B32();
584 bool isAISrc_512B32() const {
585 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i32
);
588 bool isAISrc_512B16() const {
589 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i16
);
592 bool isAISrc_512V2B16() const {
593 return isAISrc_512B16();
596 bool isAISrc_512F32() const {
597 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f32
);
600 bool isAISrc_512F16() const {
601 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f16
);
604 bool isAISrc_512V2F16() const {
605 return isAISrc_512F16() || isAISrc_512B32();
608 bool isAISrc_1024B32() const {
609 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i32
);
612 bool isAISrc_1024B16() const {
613 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i16
);
616 bool isAISrc_1024V2B16() const {
617 return isAISrc_1024B16();
620 bool isAISrc_1024F32() const {
621 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f32
);
624 bool isAISrc_1024F16() const {
625 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f16
);
628 bool isAISrc_1024V2F16() const {
629 return isAISrc_1024F16() || isAISrc_1024B32();
632 bool isKImmFP32() const {
633 return isLiteralImm(MVT::f32
);
636 bool isKImmFP16() const {
637 return isLiteralImm(MVT::f16
);
640 bool isMem() const override
{
644 bool isExpr() const {
645 return Kind
== Expression
;
648 bool isSoppBrTarget() const {
649 return isExpr() || isImm();
652 bool isSWaitCnt() const;
653 bool isHwreg() const;
654 bool isSendMsg() const;
655 bool isSwizzle() const;
656 bool isSMRDOffset8() const;
657 bool isSMRDOffset20() const;
658 bool isSMRDLiteralOffset() const;
660 bool isDPPCtrl() const;
664 bool isGPRIdxMode() const;
665 bool isS16Imm() const;
666 bool isU16Imm() const;
667 bool isEndpgm() const;
669 StringRef
getExpressionAsToken() const {
671 const MCSymbolRefExpr
*S
= cast
<MCSymbolRefExpr
>(Expr
);
672 return S
->getSymbol().getName();
675 StringRef
getToken() const {
678 if (Kind
== Expression
)
679 return getExpressionAsToken();
681 return StringRef(Tok
.Data
, Tok
.Length
);
684 int64_t getImm() const {
689 ImmTy
getImmTy() const {
694 unsigned getReg() const override
{
699 SMLoc
getStartLoc() const override
{
703 SMLoc
getEndLoc() const override
{
707 SMRange
getLocRange() const {
708 return SMRange(StartLoc
, EndLoc
);
711 Modifiers
getModifiers() const {
712 assert(isRegKind() || isImmTy(ImmTyNone
));
713 return isRegKind() ? Reg
.Mods
: Imm
.Mods
;
716 void setModifiers(Modifiers Mods
) {
717 assert(isRegKind() || isImmTy(ImmTyNone
));
724 bool hasModifiers() const {
725 return getModifiers().hasModifiers();
728 bool hasFPModifiers() const {
729 return getModifiers().hasFPModifiers();
732 bool hasIntModifiers() const {
733 return getModifiers().hasIntModifiers();
736 uint64_t applyInputFPModifiers(uint64_t Val
, unsigned Size
) const;
738 void addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
= true) const;
740 void addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const;
742 template <unsigned Bitwidth
>
743 void addKImmFPOperands(MCInst
&Inst
, unsigned N
) const;
745 void addKImmFP16Operands(MCInst
&Inst
, unsigned N
) const {
746 addKImmFPOperands
<16>(Inst
, N
);
749 void addKImmFP32Operands(MCInst
&Inst
, unsigned N
) const {
750 addKImmFPOperands
<32>(Inst
, N
);
753 void addRegOperands(MCInst
&Inst
, unsigned N
) const;
755 void addBoolRegOperands(MCInst
&Inst
, unsigned N
) const {
756 addRegOperands(Inst
, N
);
759 void addRegOrImmOperands(MCInst
&Inst
, unsigned N
) const {
761 addRegOperands(Inst
, N
);
763 Inst
.addOperand(MCOperand::createExpr(Expr
));
765 addImmOperands(Inst
, N
);
768 void addRegOrImmWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
769 Modifiers Mods
= getModifiers();
770 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
772 addRegOperands(Inst
, N
);
774 addImmOperands(Inst
, N
, false);
778 void addRegOrImmWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
779 assert(!hasIntModifiers());
780 addRegOrImmWithInputModsOperands(Inst
, N
);
783 void addRegOrImmWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
784 assert(!hasFPModifiers());
785 addRegOrImmWithInputModsOperands(Inst
, N
);
788 void addRegWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
789 Modifiers Mods
= getModifiers();
790 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
792 addRegOperands(Inst
, N
);
795 void addRegWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
796 assert(!hasIntModifiers());
797 addRegWithInputModsOperands(Inst
, N
);
800 void addRegWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
801 assert(!hasFPModifiers());
802 addRegWithInputModsOperands(Inst
, N
);
805 void addSoppBrTargetOperands(MCInst
&Inst
, unsigned N
) const {
807 addImmOperands(Inst
, N
);
810 Inst
.addOperand(MCOperand::createExpr(Expr
));
814 static void printImmTy(raw_ostream
& OS
, ImmTy Type
) {
816 case ImmTyNone
: OS
<< "None"; break;
817 case ImmTyGDS
: OS
<< "GDS"; break;
818 case ImmTyLDS
: OS
<< "LDS"; break;
819 case ImmTyOffen
: OS
<< "Offen"; break;
820 case ImmTyIdxen
: OS
<< "Idxen"; break;
821 case ImmTyAddr64
: OS
<< "Addr64"; break;
822 case ImmTyOffset
: OS
<< "Offset"; break;
823 case ImmTyInstOffset
: OS
<< "InstOffset"; break;
824 case ImmTyOffset0
: OS
<< "Offset0"; break;
825 case ImmTyOffset1
: OS
<< "Offset1"; break;
826 case ImmTyDLC
: OS
<< "DLC"; break;
827 case ImmTyGLC
: OS
<< "GLC"; break;
828 case ImmTySLC
: OS
<< "SLC"; break;
829 case ImmTySWZ
: OS
<< "SWZ"; break;
830 case ImmTyTFE
: OS
<< "TFE"; break;
831 case ImmTyD16
: OS
<< "D16"; break;
832 case ImmTyFORMAT
: OS
<< "FORMAT"; break;
833 case ImmTyClampSI
: OS
<< "ClampSI"; break;
834 case ImmTyOModSI
: OS
<< "OModSI"; break;
835 case ImmTyDPP8
: OS
<< "DPP8"; break;
836 case ImmTyDppCtrl
: OS
<< "DppCtrl"; break;
837 case ImmTyDppRowMask
: OS
<< "DppRowMask"; break;
838 case ImmTyDppBankMask
: OS
<< "DppBankMask"; break;
839 case ImmTyDppBoundCtrl
: OS
<< "DppBoundCtrl"; break;
840 case ImmTyDppFi
: OS
<< "FI"; break;
841 case ImmTySdwaDstSel
: OS
<< "SdwaDstSel"; break;
842 case ImmTySdwaSrc0Sel
: OS
<< "SdwaSrc0Sel"; break;
843 case ImmTySdwaSrc1Sel
: OS
<< "SdwaSrc1Sel"; break;
844 case ImmTySdwaDstUnused
: OS
<< "SdwaDstUnused"; break;
845 case ImmTyDMask
: OS
<< "DMask"; break;
846 case ImmTyDim
: OS
<< "Dim"; break;
847 case ImmTyUNorm
: OS
<< "UNorm"; break;
848 case ImmTyDA
: OS
<< "DA"; break;
849 case ImmTyR128A16
: OS
<< "R128A16"; break;
850 case ImmTyLWE
: OS
<< "LWE"; break;
851 case ImmTyOff
: OS
<< "Off"; break;
852 case ImmTyExpTgt
: OS
<< "ExpTgt"; break;
853 case ImmTyExpCompr
: OS
<< "ExpCompr"; break;
854 case ImmTyExpVM
: OS
<< "ExpVM"; break;
855 case ImmTyHwreg
: OS
<< "Hwreg"; break;
856 case ImmTySendMsg
: OS
<< "SendMsg"; break;
857 case ImmTyInterpSlot
: OS
<< "InterpSlot"; break;
858 case ImmTyInterpAttr
: OS
<< "InterpAttr"; break;
859 case ImmTyAttrChan
: OS
<< "AttrChan"; break;
860 case ImmTyOpSel
: OS
<< "OpSel"; break;
861 case ImmTyOpSelHi
: OS
<< "OpSelHi"; break;
862 case ImmTyNegLo
: OS
<< "NegLo"; break;
863 case ImmTyNegHi
: OS
<< "NegHi"; break;
864 case ImmTySwizzle
: OS
<< "Swizzle"; break;
865 case ImmTyGprIdxMode
: OS
<< "GprIdxMode"; break;
866 case ImmTyHigh
: OS
<< "High"; break;
867 case ImmTyBLGP
: OS
<< "BLGP"; break;
868 case ImmTyCBSZ
: OS
<< "CBSZ"; break;
869 case ImmTyABID
: OS
<< "ABID"; break;
870 case ImmTyEndpgm
: OS
<< "Endpgm"; break;
874 void print(raw_ostream
&OS
) const override
{
877 OS
<< "<register " << getReg() << " mods: " << Reg
.Mods
<< '>';
880 OS
<< '<' << getImm();
881 if (getImmTy() != ImmTyNone
) {
882 OS
<< " type: "; printImmTy(OS
, getImmTy());
884 OS
<< " mods: " << Imm
.Mods
<< '>';
887 OS
<< '\'' << getToken() << '\'';
890 OS
<< "<expr " << *Expr
<< '>';
895 static AMDGPUOperand::Ptr
CreateImm(const AMDGPUAsmParser
*AsmParser
,
896 int64_t Val
, SMLoc Loc
,
897 ImmTy Type
= ImmTyNone
,
898 bool IsFPImm
= false) {
899 auto Op
= std::make_unique
<AMDGPUOperand
>(Immediate
, AsmParser
);
901 Op
->Imm
.IsFPImm
= IsFPImm
;
903 Op
->Imm
.Mods
= Modifiers();
909 static AMDGPUOperand::Ptr
CreateToken(const AMDGPUAsmParser
*AsmParser
,
910 StringRef Str
, SMLoc Loc
,
911 bool HasExplicitEncodingSize
= true) {
912 auto Res
= std::make_unique
<AMDGPUOperand
>(Token
, AsmParser
);
913 Res
->Tok
.Data
= Str
.data();
914 Res
->Tok
.Length
= Str
.size();
920 static AMDGPUOperand::Ptr
CreateReg(const AMDGPUAsmParser
*AsmParser
,
921 unsigned RegNo
, SMLoc S
,
923 auto Op
= std::make_unique
<AMDGPUOperand
>(Register
, AsmParser
);
924 Op
->Reg
.RegNo
= RegNo
;
925 Op
->Reg
.Mods
= Modifiers();
931 static AMDGPUOperand::Ptr
CreateExpr(const AMDGPUAsmParser
*AsmParser
,
932 const class MCExpr
*Expr
, SMLoc S
) {
933 auto Op
= std::make_unique
<AMDGPUOperand
>(Expression
, AsmParser
);
941 raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
) {
942 OS
<< "abs:" << Mods
.Abs
<< " neg: " << Mods
.Neg
<< " sext:" << Mods
.Sext
;
946 //===----------------------------------------------------------------------===//
948 //===----------------------------------------------------------------------===//
950 // Holds info related to the current kernel, e.g. count of SGPRs used.
951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
952 // .amdgpu_hsa_kernel or at EOF.
953 class KernelScopeInfo
{
954 int SgprIndexUnusedMin
= -1;
955 int VgprIndexUnusedMin
= -1;
956 MCContext
*Ctx
= nullptr;
958 void usesSgprAt(int i
) {
959 if (i
>= SgprIndexUnusedMin
) {
960 SgprIndexUnusedMin
= ++i
;
962 MCSymbol
* const Sym
= Ctx
->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
963 Sym
->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin
, *Ctx
));
968 void usesVgprAt(int i
) {
969 if (i
>= VgprIndexUnusedMin
) {
970 VgprIndexUnusedMin
= ++i
;
972 MCSymbol
* const Sym
= Ctx
->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
973 Sym
->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin
, *Ctx
));
979 KernelScopeInfo() = default;
981 void initialize(MCContext
&Context
) {
983 usesSgprAt(SgprIndexUnusedMin
= -1);
984 usesVgprAt(VgprIndexUnusedMin
= -1);
987 void usesRegister(RegisterKind RegKind
, unsigned DwordRegIndex
, unsigned RegWidth
) {
989 case IS_SGPR
: usesSgprAt(DwordRegIndex
+ RegWidth
- 1); break;
990 case IS_AGPR
: // fall through
991 case IS_VGPR
: usesVgprAt(DwordRegIndex
+ RegWidth
- 1); break;
997 class AMDGPUAsmParser
: public MCTargetAsmParser
{
1000 // Number of extra operands parsed after the first optional operand.
1001 // This may be necessary to skip hardcoded mandatory operands.
1002 static const unsigned MAX_OPR_LOOKAHEAD
= 8;
1004 unsigned ForcedEncodingSize
= 0;
1005 bool ForcedDPP
= false;
1006 bool ForcedSDWA
= false;
1007 KernelScopeInfo KernelScope
;
1009 /// @name Auto-generated Match Functions
1012 #define GET_ASSEMBLER_HEADER
1013 #include "AMDGPUGenAsmMatcher.inc"
1018 bool ParseAsAbsoluteExpression(uint32_t &Ret
);
1019 bool OutOfRangeError(SMRange Range
);
1020 /// Calculate VGPR/SGPR blocks required for given target, reserved
1021 /// registers, and user-specified NextFreeXGPR values.
1023 /// \param Features [in] Target features, used for bug corrections.
1024 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1025 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1026 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1027 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1028 /// descriptor field, if valid.
1029 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1030 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1031 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1032 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1033 /// \param VGPRBlocks [out] Result VGPR block count.
1034 /// \param SGPRBlocks [out] Result SGPR block count.
1035 bool calculateGPRBlocks(const FeatureBitset
&Features
, bool VCCUsed
,
1036 bool FlatScrUsed
, bool XNACKUsed
,
1037 Optional
<bool> EnableWavefrontSize32
, unsigned NextFreeVGPR
,
1038 SMRange VGPRRange
, unsigned NextFreeSGPR
,
1039 SMRange SGPRRange
, unsigned &VGPRBlocks
,
1040 unsigned &SGPRBlocks
);
1041 bool ParseDirectiveAMDGCNTarget();
1042 bool ParseDirectiveAMDHSAKernel();
1043 bool ParseDirectiveMajorMinor(uint32_t &Major
, uint32_t &Minor
);
1044 bool ParseDirectiveHSACodeObjectVersion();
1045 bool ParseDirectiveHSACodeObjectISA();
1046 bool ParseAMDKernelCodeTValue(StringRef ID
, amd_kernel_code_t
&Header
);
1047 bool ParseDirectiveAMDKernelCodeT();
1048 bool subtargetHasRegister(const MCRegisterInfo
&MRI
, unsigned RegNo
) const;
1049 bool ParseDirectiveAMDGPUHsaKernel();
1051 bool ParseDirectiveISAVersion();
1052 bool ParseDirectiveHSAMetadata();
1053 bool ParseDirectivePALMetadataBegin();
1054 bool ParseDirectivePALMetadata();
1055 bool ParseDirectiveAMDGPULDS();
1057 /// Common code to parse out a block of text (typically YAML) between start and
1059 bool ParseToEndDirective(const char *AssemblerDirectiveBegin
,
1060 const char *AssemblerDirectiveEnd
,
1061 std::string
&CollectString
);
1063 bool AddNextRegisterToList(unsigned& Reg
, unsigned& RegWidth
,
1064 RegisterKind RegKind
, unsigned Reg1
);
1065 bool ParseAMDGPURegister(RegisterKind
& RegKind
, unsigned& Reg
,
1066 unsigned& RegNum
, unsigned& RegWidth
);
1067 unsigned ParseRegularReg(RegisterKind
&RegKind
,
1069 unsigned &RegWidth
);
1070 unsigned ParseSpecialReg(RegisterKind
&RegKind
,
1072 unsigned &RegWidth
);
1073 unsigned ParseRegList(RegisterKind
&RegKind
,
1075 unsigned &RegWidth
);
1076 bool ParseRegRange(unsigned& Num
, unsigned& Width
);
1077 unsigned getRegularReg(RegisterKind RegKind
,
1082 bool isRegister(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1083 Optional
<StringRef
> getGprCountSymbolName(RegisterKind RegKind
);
1084 void initializeGprCountSymbol(RegisterKind RegKind
);
1085 bool updateGprCountSymbols(RegisterKind RegKind
, unsigned DwordRegIndex
,
1087 void cvtMubufImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1088 bool IsAtomic
, bool IsAtomicReturn
, bool IsLds
= false);
1089 void cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1090 bool IsGdsHardcoded
);
1093 enum AMDGPUMatchResultTy
{
1094 Match_PreferE32
= FIRST_TARGET_MATCH_RESULT_TY
1097 OperandMode_Default
,
1101 using OptionalImmIndexMap
= std::map
<AMDGPUOperand::ImmTy
, unsigned>;
1103 AMDGPUAsmParser(const MCSubtargetInfo
&STI
, MCAsmParser
&_Parser
,
1104 const MCInstrInfo
&MII
,
1105 const MCTargetOptions
&Options
)
1106 : MCTargetAsmParser(Options
, STI
, MII
), Parser(_Parser
) {
1107 MCAsmParserExtension::Initialize(Parser
);
1109 if (getFeatureBits().none()) {
1110 // Set default features.
1111 copySTI().ToggleFeature("southern-islands");
1114 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1117 // TODO: make those pre-defined variables read-only.
1118 // Currently there is none suitable machinery in the core llvm-mc for this.
1119 // MCSymbol::isRedefinable is intended for another purpose, and
1120 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1121 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
1122 MCContext
&Ctx
= getContext();
1123 if (ISA
.Major
>= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1125 Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1126 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1127 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1128 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1129 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1130 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1133 Ctx
.getOrCreateSymbol(Twine(".option.machine_version_major"));
1134 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1135 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1136 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1137 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1138 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1140 if (ISA
.Major
>= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1141 initializeGprCountSymbol(IS_VGPR
);
1142 initializeGprCountSymbol(IS_SGPR
);
1144 KernelScope
.initialize(getContext());
1148 bool hasXNACK() const {
1149 return AMDGPU::hasXNACK(getSTI());
1152 bool hasMIMG_R128() const {
1153 return AMDGPU::hasMIMG_R128(getSTI());
1156 bool hasPackedD16() const {
1157 return AMDGPU::hasPackedD16(getSTI());
1161 return AMDGPU::isSI(getSTI());
1165 return AMDGPU::isCI(getSTI());
1169 return AMDGPU::isVI(getSTI());
1172 bool isGFX9() const {
1173 return AMDGPU::isGFX9(getSTI());
1176 bool isGFX10() const {
1177 return AMDGPU::isGFX10(getSTI());
1180 bool hasInv2PiInlineImm() const {
1181 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
];
1184 bool hasFlatOffsets() const {
1185 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets
];
1188 bool hasSGPR102_SGPR103() const {
1189 return !isVI() && !isGFX9();
1192 bool hasSGPR104_SGPR105() const {
1196 bool hasIntClamp() const {
1197 return getFeatureBits()[AMDGPU::FeatureIntClamp
];
1200 AMDGPUTargetStreamer
&getTargetStreamer() {
1201 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
1202 return static_cast<AMDGPUTargetStreamer
&>(TS
);
1205 const MCRegisterInfo
*getMRI() const {
1206 // We need this const_cast because for some reason getContext() is not const
1208 return const_cast<AMDGPUAsmParser
*>(this)->getContext().getRegisterInfo();
1211 const MCInstrInfo
*getMII() const {
1215 const FeatureBitset
&getFeatureBits() const {
1216 return getSTI().getFeatureBits();
1219 void setForcedEncodingSize(unsigned Size
) { ForcedEncodingSize
= Size
; }
1220 void setForcedDPP(bool ForceDPP_
) { ForcedDPP
= ForceDPP_
; }
1221 void setForcedSDWA(bool ForceSDWA_
) { ForcedSDWA
= ForceSDWA_
; }
1223 unsigned getForcedEncodingSize() const { return ForcedEncodingSize
; }
1224 bool isForcedVOP3() const { return ForcedEncodingSize
== 64; }
1225 bool isForcedDPP() const { return ForcedDPP
; }
1226 bool isForcedSDWA() const { return ForcedSDWA
; }
1227 ArrayRef
<unsigned> getMatchedVariants() const;
1229 std::unique_ptr
<AMDGPUOperand
> parseRegister();
1230 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
) override
;
1231 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1232 unsigned validateTargetOperandClass(MCParsedAsmOperand
&Op
,
1233 unsigned Kind
) override
;
1234 bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1235 OperandVector
&Operands
, MCStreamer
&Out
,
1236 uint64_t &ErrorInfo
,
1237 bool MatchingInlineAsm
) override
;
1238 bool ParseDirective(AsmToken DirectiveID
) override
;
1239 OperandMatchResultTy
parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
1240 OperandMode Mode
= OperandMode_Default
);
1241 StringRef
parseMnemonicSuffix(StringRef Name
);
1242 bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1243 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1244 //bool ProcessInstruction(MCInst &Inst);
1246 OperandMatchResultTy
parseIntWithPrefix(const char *Prefix
, int64_t &Int
);
1248 OperandMatchResultTy
1249 parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
1250 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1251 bool (*ConvertResult
)(int64_t &) = nullptr);
1253 OperandMatchResultTy
1254 parseOperandArrayWithPrefix(const char *Prefix
,
1255 OperandVector
&Operands
,
1256 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1257 bool (*ConvertResult
)(int64_t&) = nullptr);
1259 OperandMatchResultTy
1260 parseNamedBit(const char *Name
, OperandVector
&Operands
,
1261 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
);
1262 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix
,
1266 bool isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1267 bool isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1268 bool isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1269 bool isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1270 bool parseSP3NegModifier();
1271 OperandMatchResultTy
parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
= false);
1272 OperandMatchResultTy
parseReg(OperandVector
&Operands
);
1273 OperandMatchResultTy
parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
= false);
1274 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector
&Operands
, bool AllowImm
= true);
1275 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector
&Operands
, bool AllowImm
= true);
1276 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector
&Operands
);
1277 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector
&Operands
);
1278 OperandMatchResultTy
parseVReg32OrOff(OperandVector
&Operands
);
1279 OperandMatchResultTy
parseDfmtNfmt(OperandVector
&Operands
);
1281 void cvtDSOffset01(MCInst
&Inst
, const OperandVector
&Operands
);
1282 void cvtDS(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, false); }
1283 void cvtDSGds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, true); }
1284 void cvtExp(MCInst
&Inst
, const OperandVector
&Operands
);
1286 bool parseCnt(int64_t &IntVal
);
1287 OperandMatchResultTy
parseSWaitCntOps(OperandVector
&Operands
);
1288 OperandMatchResultTy
parseHwreg(OperandVector
&Operands
);
1291 struct OperandInfoTy
{
1293 bool IsSymbolic
= false;
1294 bool IsDefined
= false;
1296 OperandInfoTy(int64_t Id_
) : Id(Id_
) {}
1299 bool parseSendMsgBody(OperandInfoTy
&Msg
, OperandInfoTy
&Op
, OperandInfoTy
&Stream
);
1300 bool validateSendMsg(const OperandInfoTy
&Msg
,
1301 const OperandInfoTy
&Op
,
1302 const OperandInfoTy
&Stream
,
1305 bool parseHwregBody(OperandInfoTy
&HwReg
, int64_t &Offset
, int64_t &Width
);
1306 bool validateHwreg(const OperandInfoTy
&HwReg
,
1307 const int64_t Offset
,
1308 const int64_t Width
,
1312 OperandMatchResultTy
parseExpTgtImpl(StringRef Str
, uint8_t &Val
);
1313 SMLoc
getFlatOffsetLoc(const OperandVector
&Operands
) const;
1315 bool validateInstruction(const MCInst
&Inst
, const SMLoc
&IDLoc
, const OperandVector
&Operands
);
1316 bool validateFlatOffset(const MCInst
&Inst
, const OperandVector
&Operands
);
1317 bool validateSOPLiteral(const MCInst
&Inst
) const;
1318 bool validateConstantBusLimitations(const MCInst
&Inst
);
1319 bool validateEarlyClobberLimitations(const MCInst
&Inst
);
1320 bool validateIntClampSupported(const MCInst
&Inst
);
1321 bool validateMIMGAtomicDMask(const MCInst
&Inst
);
1322 bool validateMIMGGatherDMask(const MCInst
&Inst
);
1323 bool validateMIMGDataSize(const MCInst
&Inst
);
1324 bool validateMIMGAddrSize(const MCInst
&Inst
);
1325 bool validateMIMGD16(const MCInst
&Inst
);
1326 bool validateMIMGDim(const MCInst
&Inst
);
1327 bool validateLdsDirect(const MCInst
&Inst
);
1328 bool validateOpSel(const MCInst
&Inst
);
1329 bool validateVccOperand(unsigned Reg
) const;
1330 bool validateVOP3Literal(const MCInst
&Inst
) const;
1331 unsigned getConstantBusLimit(unsigned Opcode
) const;
1332 bool usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
);
1333 bool isInlineConstant(const MCInst
&Inst
, unsigned OpIdx
) const;
1334 unsigned findImplicitSGPRReadInVOP(const MCInst
&Inst
) const;
1336 bool isId(const StringRef Id
) const;
1337 bool isId(const AsmToken
&Token
, const StringRef Id
) const;
1338 bool isToken(const AsmToken::TokenKind Kind
) const;
1339 bool trySkipId(const StringRef Id
);
1340 bool trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
);
1341 bool trySkipToken(const AsmToken::TokenKind Kind
);
1342 bool skipToken(const AsmToken::TokenKind Kind
, const StringRef ErrMsg
);
1343 bool parseString(StringRef
&Val
, const StringRef ErrMsg
= "expected a string");
1344 void peekTokens(MutableArrayRef
<AsmToken
> Tokens
);
1345 AsmToken::TokenKind
getTokenKind() const;
1346 bool parseExpr(int64_t &Imm
);
1347 bool parseExpr(OperandVector
&Operands
);
1348 StringRef
getTokenStr() const;
1349 AsmToken
peekToken();
1350 AsmToken
getToken() const;
1351 SMLoc
getLoc() const;
1355 OperandMatchResultTy
parseOptionalOperand(OperandVector
&Operands
);
1356 OperandMatchResultTy
parseOptionalOpr(OperandVector
&Operands
);
1358 OperandMatchResultTy
parseExpTgt(OperandVector
&Operands
);
1359 OperandMatchResultTy
parseSendMsgOp(OperandVector
&Operands
);
1360 OperandMatchResultTy
parseInterpSlot(OperandVector
&Operands
);
1361 OperandMatchResultTy
parseInterpAttr(OperandVector
&Operands
);
1362 OperandMatchResultTy
parseSOppBrTarget(OperandVector
&Operands
);
1363 OperandMatchResultTy
parseBoolReg(OperandVector
&Operands
);
1365 bool parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
1366 const unsigned MinVal
,
1367 const unsigned MaxVal
,
1368 const StringRef ErrMsg
);
1369 OperandMatchResultTy
parseSwizzleOp(OperandVector
&Operands
);
1370 bool parseSwizzleOffset(int64_t &Imm
);
1371 bool parseSwizzleMacro(int64_t &Imm
);
1372 bool parseSwizzleQuadPerm(int64_t &Imm
);
1373 bool parseSwizzleBitmaskPerm(int64_t &Imm
);
1374 bool parseSwizzleBroadcast(int64_t &Imm
);
1375 bool parseSwizzleSwap(int64_t &Imm
);
1376 bool parseSwizzleReverse(int64_t &Imm
);
1378 OperandMatchResultTy
parseGPRIdxMode(OperandVector
&Operands
);
1379 int64_t parseGPRIdxMacro();
1381 void cvtMubuf(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false, false); }
1382 void cvtMubufAtomic(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, true, false); }
1383 void cvtMubufAtomicReturn(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, true, true); }
1384 void cvtMubufLds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false, false, true); }
1385 void cvtMtbuf(MCInst
&Inst
, const OperandVector
&Operands
);
1387 AMDGPUOperand::Ptr
defaultDLC() const;
1388 AMDGPUOperand::Ptr
defaultGLC() const;
1389 AMDGPUOperand::Ptr
defaultSLC() const;
1391 AMDGPUOperand::Ptr
defaultSMRDOffset8() const;
1392 AMDGPUOperand::Ptr
defaultSMRDOffset20() const;
1393 AMDGPUOperand::Ptr
defaultSMRDLiteralOffset() const;
1394 AMDGPUOperand::Ptr
defaultFlatOffset() const;
1396 OperandMatchResultTy
parseOModOperand(OperandVector
&Operands
);
1398 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
1399 OptionalImmIndexMap
&OptionalIdx
);
1400 void cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
);
1401 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
);
1402 void cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
);
1404 void cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
);
1406 void cvtMIMG(MCInst
&Inst
, const OperandVector
&Operands
,
1407 bool IsAtomic
= false);
1408 void cvtMIMGAtomic(MCInst
&Inst
, const OperandVector
&Operands
);
1410 OperandMatchResultTy
parseDim(OperandVector
&Operands
);
1411 OperandMatchResultTy
parseDPP8(OperandVector
&Operands
);
1412 OperandMatchResultTy
parseDPPCtrl(OperandVector
&Operands
);
1413 AMDGPUOperand::Ptr
defaultRowMask() const;
1414 AMDGPUOperand::Ptr
defaultBankMask() const;
1415 AMDGPUOperand::Ptr
defaultBoundCtrl() const;
1416 AMDGPUOperand::Ptr
defaultFI() const;
1417 void cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
= false);
1418 void cvtDPP8(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDPP(Inst
, Operands
, true); }
1420 OperandMatchResultTy
parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
1421 AMDGPUOperand::ImmTy Type
);
1422 OperandMatchResultTy
parseSDWADstUnused(OperandVector
&Operands
);
1423 void cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
);
1424 void cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
);
1425 void cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
);
1426 void cvtSdwaVOP2e(MCInst
&Inst
, const OperandVector
&Operands
);
1427 void cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
);
1428 void cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
1429 uint64_t BasicInstType
,
1430 bool SkipDstVcc
= false,
1431 bool SkipSrcVcc
= false);
1433 AMDGPUOperand::Ptr
defaultBLGP() const;
1434 AMDGPUOperand::Ptr
defaultCBSZ() const;
1435 AMDGPUOperand::Ptr
defaultABID() const;
1437 OperandMatchResultTy
parseEndpgmOp(OperandVector
&Operands
);
1438 AMDGPUOperand::Ptr
defaultEndpgmImmOperands() const;
1441 struct OptionalOperand
{
1443 AMDGPUOperand::ImmTy Type
;
1445 bool (*ConvertResult
)(int64_t&);
1448 } // end anonymous namespace
1450 // May be called with integer type with equivalent bitwidth.
1451 static const fltSemantics
*getFltSemantics(unsigned Size
) {
1454 return &APFloat::IEEEsingle();
1456 return &APFloat::IEEEdouble();
1458 return &APFloat::IEEEhalf();
1460 llvm_unreachable("unsupported fp type");
1464 static const fltSemantics
*getFltSemantics(MVT VT
) {
1465 return getFltSemantics(VT
.getSizeInBits() / 8);
1468 static const fltSemantics
*getOpFltSemantics(uint8_t OperandType
) {
1469 switch (OperandType
) {
1470 case AMDGPU::OPERAND_REG_IMM_INT32
:
1471 case AMDGPU::OPERAND_REG_IMM_FP32
:
1472 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1473 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1474 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1475 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1476 return &APFloat::IEEEsingle();
1477 case AMDGPU::OPERAND_REG_IMM_INT64
:
1478 case AMDGPU::OPERAND_REG_IMM_FP64
:
1479 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1480 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1481 return &APFloat::IEEEdouble();
1482 case AMDGPU::OPERAND_REG_IMM_INT16
:
1483 case AMDGPU::OPERAND_REG_IMM_FP16
:
1484 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1485 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1486 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1487 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1488 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1489 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1490 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1491 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
1492 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1493 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
1494 return &APFloat::IEEEhalf();
1496 llvm_unreachable("unsupported fp type");
1500 //===----------------------------------------------------------------------===//
1502 //===----------------------------------------------------------------------===//
1504 static bool canLosslesslyConvertToFPType(APFloat
&FPLiteral
, MVT VT
) {
1507 // Convert literal to single precision
1508 APFloat::opStatus Status
= FPLiteral
.convert(*getFltSemantics(VT
),
1509 APFloat::rmNearestTiesToEven
,
1511 // We allow precision lost but not overflow or underflow
1512 if (Status
!= APFloat::opOK
&&
1514 ((Status
& APFloat::opOverflow
) != 0 ||
1515 (Status
& APFloat::opUnderflow
) != 0)) {
1522 static bool isSafeTruncation(int64_t Val
, unsigned Size
) {
1523 return isUIntN(Size
, Val
) || isIntN(Size
, Val
);
1526 bool AMDGPUOperand::isInlinableImm(MVT type
) const {
1528 // This is a hack to enable named inline values like
1529 // shared_base with both 32-bit and 64-bit operands.
1530 // Note that these values are defined as
1531 // 32-bit operands only.
1532 if (isInlineValue()) {
1536 if (!isImmTy(ImmTyNone
)) {
1537 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1540 // TODO: We should avoid using host float here. It would be better to
1541 // check the float bit values which is what a few other places do.
1542 // We've had bot failures before due to weird NaN support on mips hosts.
1544 APInt
Literal(64, Imm
.Val
);
1546 if (Imm
.IsFPImm
) { // We got fp literal token
1547 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1548 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1549 AsmParser
->hasInv2PiInlineImm());
1552 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1553 if (!canLosslesslyConvertToFPType(FPLiteral
, type
))
1556 if (type
.getScalarSizeInBits() == 16) {
1557 return AMDGPU::isInlinableLiteral16(
1558 static_cast<int16_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1559 AsmParser
->hasInv2PiInlineImm());
1562 // Check if single precision literal is inlinable
1563 return AMDGPU::isInlinableLiteral32(
1564 static_cast<int32_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1565 AsmParser
->hasInv2PiInlineImm());
1568 // We got int literal token.
1569 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1570 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1571 AsmParser
->hasInv2PiInlineImm());
1574 if (!isSafeTruncation(Imm
.Val
, type
.getScalarSizeInBits())) {
1578 if (type
.getScalarSizeInBits() == 16) {
1579 return AMDGPU::isInlinableLiteral16(
1580 static_cast<int16_t>(Literal
.getLoBits(16).getSExtValue()),
1581 AsmParser
->hasInv2PiInlineImm());
1584 return AMDGPU::isInlinableLiteral32(
1585 static_cast<int32_t>(Literal
.getLoBits(32).getZExtValue()),
1586 AsmParser
->hasInv2PiInlineImm());
1589 bool AMDGPUOperand::isLiteralImm(MVT type
) const {
1590 // Check that this immediate can be added as literal
1591 if (!isImmTy(ImmTyNone
)) {
1596 // We got int literal token.
1598 if (type
== MVT::f64
&& hasFPModifiers()) {
1599 // Cannot apply fp modifiers to int literals preserving the same semantics
1600 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1601 // disable these cases.
1605 unsigned Size
= type
.getSizeInBits();
1609 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1611 return isSafeTruncation(Imm
.Val
, Size
);
1614 // We got fp literal token
1615 if (type
== MVT::f64
) { // Expected 64-bit fp operand
1616 // We would set low 64-bits of literal to zeroes but we accept this literals
1620 if (type
== MVT::i64
) { // Expected 64-bit int operand
1621 // We don't allow fp literals in 64-bit integer instructions. It is
1622 // unclear how we should encode them.
1626 // We allow fp literals with f16x2 operands assuming that the specified
1627 // literal goes into the lower half and the upper half is zero. We also
1628 // require that the literal may be losslesly converted to f16.
1629 MVT ExpectedType
= (type
== MVT::v2f16
)? MVT::f16
:
1630 (type
== MVT::v2i16
)? MVT::i16
: type
;
1632 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1633 return canLosslesslyConvertToFPType(FPLiteral
, ExpectedType
);
1636 bool AMDGPUOperand::isRegClass(unsigned RCID
) const {
1637 return isRegKind() && AsmParser
->getMRI()->getRegClass(RCID
).contains(getReg());
1640 bool AMDGPUOperand::isSDWAOperand(MVT type
) const {
1641 if (AsmParser
->isVI())
1643 else if (AsmParser
->isGFX9() || AsmParser
->isGFX10())
1644 return isRegClass(AMDGPU::VS_32RegClassID
) || isInlinableImm(type
);
1649 bool AMDGPUOperand::isSDWAFP16Operand() const {
1650 return isSDWAOperand(MVT::f16
);
1653 bool AMDGPUOperand::isSDWAFP32Operand() const {
1654 return isSDWAOperand(MVT::f32
);
1657 bool AMDGPUOperand::isSDWAInt16Operand() const {
1658 return isSDWAOperand(MVT::i16
);
1661 bool AMDGPUOperand::isSDWAInt32Operand() const {
1662 return isSDWAOperand(MVT::i32
);
1665 bool AMDGPUOperand::isBoolReg() const {
1666 return (AsmParser
->getFeatureBits()[AMDGPU::FeatureWavefrontSize64
] && isSCSrcB64()) ||
1667 (AsmParser
->getFeatureBits()[AMDGPU::FeatureWavefrontSize32
] && isSCSrcB32());
1670 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val
, unsigned Size
) const
1672 assert(isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
1673 assert(Size
== 2 || Size
== 4 || Size
== 8);
1675 const uint64_t FpSignMask
= (1ULL << (Size
* 8 - 1));
1687 void AMDGPUOperand::addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
) const {
1688 if (AMDGPU::isSISrcOperand(AsmParser
->getMII()->get(Inst
.getOpcode()),
1689 Inst
.getNumOperands())) {
1690 addLiteralImmOperand(Inst
, Imm
.Val
,
1692 isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
1694 assert(!isImmTy(ImmTyNone
) || !hasModifiers());
1695 Inst
.addOperand(MCOperand::createImm(Imm
.Val
));
1699 void AMDGPUOperand::addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const {
1700 const auto& InstDesc
= AsmParser
->getMII()->get(Inst
.getOpcode());
1701 auto OpNum
= Inst
.getNumOperands();
1702 // Check that this operand accepts literals
1703 assert(AMDGPU::isSISrcOperand(InstDesc
, OpNum
));
1705 if (ApplyModifiers
) {
1706 assert(AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
));
1707 const unsigned Size
= Imm
.IsFPImm
? sizeof(double) : getOperandSize(InstDesc
, OpNum
);
1708 Val
= applyInputFPModifiers(Val
, Size
);
1711 APInt
Literal(64, Val
);
1712 uint8_t OpTy
= InstDesc
.OpInfo
[OpNum
].OperandType
;
1714 if (Imm
.IsFPImm
) { // We got fp literal token
1716 case AMDGPU::OPERAND_REG_IMM_INT64
:
1717 case AMDGPU::OPERAND_REG_IMM_FP64
:
1718 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1719 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1720 if (AMDGPU::isInlinableLiteral64(Literal
.getZExtValue(),
1721 AsmParser
->hasInv2PiInlineImm())) {
1722 Inst
.addOperand(MCOperand::createImm(Literal
.getZExtValue()));
1727 if (AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
)) { // Expected 64-bit fp operand
1728 // For fp operands we check if low 32 bits are zeros
1729 if (Literal
.getLoBits(32) != 0) {
1730 const_cast<AMDGPUAsmParser
*>(AsmParser
)->Warning(Inst
.getLoc(),
1731 "Can't encode literal as exact 64-bit floating-point operand. "
1732 "Low 32-bits will be set to zero");
1735 Inst
.addOperand(MCOperand::createImm(Literal
.lshr(32).getZExtValue()));
1739 // We don't allow fp literals in 64-bit integer instructions. It is
1740 // unclear how we should encode them. This case should be checked earlier
1741 // in predicate methods (isLiteralImm())
1742 llvm_unreachable("fp literal in 64-bit integer instruction.");
1744 case AMDGPU::OPERAND_REG_IMM_INT32
:
1745 case AMDGPU::OPERAND_REG_IMM_FP32
:
1746 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1747 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1748 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1749 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1750 case AMDGPU::OPERAND_REG_IMM_INT16
:
1751 case AMDGPU::OPERAND_REG_IMM_FP16
:
1752 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1753 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1754 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1755 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1756 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1757 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1758 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1759 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
1760 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1761 case AMDGPU::OPERAND_REG_IMM_V2FP16
: {
1763 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
1764 // Convert literal to single precision
1765 FPLiteral
.convert(*getOpFltSemantics(OpTy
),
1766 APFloat::rmNearestTiesToEven
, &lost
);
1767 // We allow precision lost but not overflow or underflow. This should be
1768 // checked earlier in isLiteralImm()
1770 uint64_t ImmVal
= FPLiteral
.bitcastToAPInt().getZExtValue();
1771 Inst
.addOperand(MCOperand::createImm(ImmVal
));
1775 llvm_unreachable("invalid operand size");
1781 // We got int literal token.
1782 // Only sign extend inline immediates.
1784 case AMDGPU::OPERAND_REG_IMM_INT32
:
1785 case AMDGPU::OPERAND_REG_IMM_FP32
:
1786 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1787 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1788 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1789 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1790 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1791 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
1792 if (isSafeTruncation(Val
, 32) &&
1793 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val
),
1794 AsmParser
->hasInv2PiInlineImm())) {
1795 Inst
.addOperand(MCOperand::createImm(Val
));
1799 Inst
.addOperand(MCOperand::createImm(Val
& 0xffffffff));
1802 case AMDGPU::OPERAND_REG_IMM_INT64
:
1803 case AMDGPU::OPERAND_REG_IMM_FP64
:
1804 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1805 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1806 if (AMDGPU::isInlinableLiteral64(Val
, AsmParser
->hasInv2PiInlineImm())) {
1807 Inst
.addOperand(MCOperand::createImm(Val
));
1811 Inst
.addOperand(MCOperand::createImm(Lo_32(Val
)));
1814 case AMDGPU::OPERAND_REG_IMM_INT16
:
1815 case AMDGPU::OPERAND_REG_IMM_FP16
:
1816 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1817 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1818 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1820 if (isSafeTruncation(Val
, 16) &&
1821 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
1822 AsmParser
->hasInv2PiInlineImm())) {
1823 Inst
.addOperand(MCOperand::createImm(Val
));
1827 Inst
.addOperand(MCOperand::createImm(Val
& 0xffff));
1830 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1831 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1832 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1833 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
: {
1834 assert(isSafeTruncation(Val
, 16));
1835 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
1836 AsmParser
->hasInv2PiInlineImm()));
1838 Inst
.addOperand(MCOperand::createImm(Val
));
1842 llvm_unreachable("invalid operand size");
1846 template <unsigned Bitwidth
>
1847 void AMDGPUOperand::addKImmFPOperands(MCInst
&Inst
, unsigned N
) const {
1848 APInt
Literal(64, Imm
.Val
);
1851 // We got int literal token.
1852 Inst
.addOperand(MCOperand::createImm(Literal
.getLoBits(Bitwidth
).getZExtValue()));
1857 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
1858 FPLiteral
.convert(*getFltSemantics(Bitwidth
/ 8),
1859 APFloat::rmNearestTiesToEven
, &Lost
);
1860 Inst
.addOperand(MCOperand::createImm(FPLiteral
.bitcastToAPInt().getZExtValue()));
1863 void AMDGPUOperand::addRegOperands(MCInst
&Inst
, unsigned N
) const {
1864 Inst
.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser
->getSTI())));
1867 static bool isInlineValue(unsigned Reg
) {
1869 case AMDGPU::SRC_SHARED_BASE
:
1870 case AMDGPU::SRC_SHARED_LIMIT
:
1871 case AMDGPU::SRC_PRIVATE_BASE
:
1872 case AMDGPU::SRC_PRIVATE_LIMIT
:
1873 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
1875 case AMDGPU::SRC_VCCZ
:
1876 case AMDGPU::SRC_EXECZ
:
1877 case AMDGPU::SRC_SCC
:
1879 case AMDGPU::SGPR_NULL
:
1886 bool AMDGPUOperand::isInlineValue() const {
1887 return isRegKind() && ::isInlineValue(getReg());
1890 //===----------------------------------------------------------------------===//
1892 //===----------------------------------------------------------------------===//
1894 static int getRegClass(RegisterKind Is
, unsigned RegWidth
) {
1895 if (Is
== IS_VGPR
) {
1898 case 1: return AMDGPU::VGPR_32RegClassID
;
1899 case 2: return AMDGPU::VReg_64RegClassID
;
1900 case 3: return AMDGPU::VReg_96RegClassID
;
1901 case 4: return AMDGPU::VReg_128RegClassID
;
1902 case 5: return AMDGPU::VReg_160RegClassID
;
1903 case 8: return AMDGPU::VReg_256RegClassID
;
1904 case 16: return AMDGPU::VReg_512RegClassID
;
1905 case 32: return AMDGPU::VReg_1024RegClassID
;
1907 } else if (Is
== IS_TTMP
) {
1910 case 1: return AMDGPU::TTMP_32RegClassID
;
1911 case 2: return AMDGPU::TTMP_64RegClassID
;
1912 case 4: return AMDGPU::TTMP_128RegClassID
;
1913 case 8: return AMDGPU::TTMP_256RegClassID
;
1914 case 16: return AMDGPU::TTMP_512RegClassID
;
1916 } else if (Is
== IS_SGPR
) {
1919 case 1: return AMDGPU::SGPR_32RegClassID
;
1920 case 2: return AMDGPU::SGPR_64RegClassID
;
1921 case 4: return AMDGPU::SGPR_128RegClassID
;
1922 case 8: return AMDGPU::SGPR_256RegClassID
;
1923 case 16: return AMDGPU::SGPR_512RegClassID
;
1925 } else if (Is
== IS_AGPR
) {
1928 case 1: return AMDGPU::AGPR_32RegClassID
;
1929 case 2: return AMDGPU::AReg_64RegClassID
;
1930 case 4: return AMDGPU::AReg_128RegClassID
;
1931 case 16: return AMDGPU::AReg_512RegClassID
;
1932 case 32: return AMDGPU::AReg_1024RegClassID
;
1938 static unsigned getSpecialRegForName(StringRef RegName
) {
1939 return StringSwitch
<unsigned>(RegName
)
1940 .Case("exec", AMDGPU::EXEC
)
1941 .Case("vcc", AMDGPU::VCC
)
1942 .Case("flat_scratch", AMDGPU::FLAT_SCR
)
1943 .Case("xnack_mask", AMDGPU::XNACK_MASK
)
1944 .Case("shared_base", AMDGPU::SRC_SHARED_BASE
)
1945 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE
)
1946 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
1947 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
1948 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE
)
1949 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE
)
1950 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
1951 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
1952 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
1953 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
1954 .Case("lds_direct", AMDGPU::LDS_DIRECT
)
1955 .Case("src_lds_direct", AMDGPU::LDS_DIRECT
)
1956 .Case("m0", AMDGPU::M0
)
1957 .Case("vccz", AMDGPU::SRC_VCCZ
)
1958 .Case("src_vccz", AMDGPU::SRC_VCCZ
)
1959 .Case("execz", AMDGPU::SRC_EXECZ
)
1960 .Case("src_execz", AMDGPU::SRC_EXECZ
)
1961 .Case("scc", AMDGPU::SRC_SCC
)
1962 .Case("src_scc", AMDGPU::SRC_SCC
)
1963 .Case("tba", AMDGPU::TBA
)
1964 .Case("tma", AMDGPU::TMA
)
1965 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO
)
1966 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI
)
1967 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO
)
1968 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI
)
1969 .Case("vcc_lo", AMDGPU::VCC_LO
)
1970 .Case("vcc_hi", AMDGPU::VCC_HI
)
1971 .Case("exec_lo", AMDGPU::EXEC_LO
)
1972 .Case("exec_hi", AMDGPU::EXEC_HI
)
1973 .Case("tma_lo", AMDGPU::TMA_LO
)
1974 .Case("tma_hi", AMDGPU::TMA_HI
)
1975 .Case("tba_lo", AMDGPU::TBA_LO
)
1976 .Case("tba_hi", AMDGPU::TBA_HI
)
1977 .Case("null", AMDGPU::SGPR_NULL
)
1978 .Default(AMDGPU::NoRegister
);
1981 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
1983 auto R
= parseRegister();
1984 if (!R
) return true;
1986 RegNo
= R
->getReg();
1987 StartLoc
= R
->getStartLoc();
1988 EndLoc
= R
->getEndLoc();
1992 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg
, unsigned &RegWidth
,
1993 RegisterKind RegKind
, unsigned Reg1
) {
1996 if (Reg
== AMDGPU::EXEC_LO
&& Reg1
== AMDGPU::EXEC_HI
) {
2001 if (Reg
== AMDGPU::FLAT_SCR_LO
&& Reg1
== AMDGPU::FLAT_SCR_HI
) {
2002 Reg
= AMDGPU::FLAT_SCR
;
2006 if (Reg
== AMDGPU::XNACK_MASK_LO
&& Reg1
== AMDGPU::XNACK_MASK_HI
) {
2007 Reg
= AMDGPU::XNACK_MASK
;
2011 if (Reg
== AMDGPU::VCC_LO
&& Reg1
== AMDGPU::VCC_HI
) {
2016 if (Reg
== AMDGPU::TBA_LO
&& Reg1
== AMDGPU::TBA_HI
) {
2021 if (Reg
== AMDGPU::TMA_LO
&& Reg1
== AMDGPU::TMA_HI
) {
2031 if (Reg1
!= Reg
+ RegWidth
) {
2037 llvm_unreachable("unexpected register kind");
2046 static constexpr RegInfo RegularRegisters
[] = {
2049 {{"ttmp"}, IS_TTMP
},
2054 static bool isRegularReg(RegisterKind Kind
) {
2055 return Kind
== IS_VGPR
||
2061 static const RegInfo
* getRegularRegInfo(StringRef Str
) {
2062 for (const RegInfo
&Reg
: RegularRegisters
)
2063 if (Str
.startswith(Reg
.Name
))
2068 static bool getRegNum(StringRef Str
, unsigned& Num
) {
2069 return !Str
.getAsInteger(10, Num
);
2073 AMDGPUAsmParser::isRegister(const AsmToken
&Token
,
2074 const AsmToken
&NextToken
) const {
2076 // A list of consecutive registers: [s0,s1,s2,s3]
2077 if (Token
.is(AsmToken::LBrac
))
2080 if (!Token
.is(AsmToken::Identifier
))
2083 // A single register like s0 or a range of registers like s[0:1]
2085 StringRef Str
= Token
.getString();
2086 const RegInfo
*Reg
= getRegularRegInfo(Str
);
2088 StringRef RegName
= Reg
->Name
;
2089 StringRef RegSuffix
= Str
.substr(RegName
.size());
2090 if (!RegSuffix
.empty()) {
2092 // A single register with an index: rXX
2093 if (getRegNum(RegSuffix
, Num
))
2096 // A range of registers: r[XX:YY].
2097 if (NextToken
.is(AsmToken::LBrac
))
2102 return getSpecialRegForName(Str
) != AMDGPU::NoRegister
;
2106 AMDGPUAsmParser::isRegister()
2108 return isRegister(getToken(), peekToken());
2112 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind
,
2114 unsigned RegWidth
) {
2116 assert(isRegularReg(RegKind
));
2118 unsigned AlignSize
= 1;
2119 if (RegKind
== IS_SGPR
|| RegKind
== IS_TTMP
) {
2120 // SGPR and TTMP registers must be aligned.
2121 // Max required alignment is 4 dwords.
2122 AlignSize
= std::min(RegWidth
, 4u);
2125 if (RegNum
% AlignSize
!= 0)
2126 return AMDGPU::NoRegister
;
2128 unsigned RegIdx
= RegNum
/ AlignSize
;
2129 int RCID
= getRegClass(RegKind
, RegWidth
);
2131 return AMDGPU::NoRegister
;
2133 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2134 const MCRegisterClass RC
= TRI
->getRegClass(RCID
);
2135 if (RegIdx
>= RC
.getNumRegs())
2136 return AMDGPU::NoRegister
;
2138 return RC
.getRegister(RegIdx
);
2142 AMDGPUAsmParser::ParseRegRange(unsigned& Num
, unsigned& Width
) {
2143 int64_t RegLo
, RegHi
;
2144 if (!trySkipToken(AsmToken::LBrac
))
2147 if (!parseExpr(RegLo
))
2150 if (trySkipToken(AsmToken::Colon
)) {
2151 if (!parseExpr(RegHi
))
2157 if (!trySkipToken(AsmToken::RBrac
))
2160 if (!isUInt
<32>(RegLo
) || !isUInt
<32>(RegHi
) || RegLo
> RegHi
)
2163 Num
= static_cast<unsigned>(RegLo
);
2164 Width
= (RegHi
- RegLo
) + 1;
2169 AMDGPUAsmParser::ParseSpecialReg(RegisterKind
&RegKind
,
2171 unsigned &RegWidth
) {
2172 assert(isToken(AsmToken::Identifier
));
2173 unsigned Reg
= getSpecialRegForName(getTokenStr());
2177 RegKind
= IS_SPECIAL
;
2178 lex(); // skip register name
2184 AMDGPUAsmParser::ParseRegularReg(RegisterKind
&RegKind
,
2186 unsigned &RegWidth
) {
2187 assert(isToken(AsmToken::Identifier
));
2188 StringRef RegName
= getTokenStr();
2190 const RegInfo
*RI
= getRegularRegInfo(RegName
);
2192 return AMDGPU::NoRegister
;
2193 lex(); // skip register name
2196 StringRef RegSuffix
= RegName
.substr(RI
->Name
.size());
2197 if (!RegSuffix
.empty()) {
2198 // Single 32-bit register: vXX.
2199 if (!getRegNum(RegSuffix
, RegNum
))
2200 return AMDGPU::NoRegister
;
2203 // Range of registers: v[XX:YY]. ":YY" is optional.
2204 if (!ParseRegRange(RegNum
, RegWidth
))
2205 return AMDGPU::NoRegister
;
2208 return getRegularReg(RegKind
, RegNum
, RegWidth
);
2212 AMDGPUAsmParser::ParseRegList(RegisterKind
&RegKind
,
2214 unsigned &RegWidth
) {
2215 unsigned Reg
= AMDGPU::NoRegister
;
2217 if (!trySkipToken(AsmToken::LBrac
))
2218 return AMDGPU::NoRegister
;
2220 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2222 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
))
2223 return AMDGPU::NoRegister
;
2225 return AMDGPU::NoRegister
;
2227 for (; trySkipToken(AsmToken::Comma
); ) {
2228 RegisterKind NextRegKind
;
2229 unsigned NextReg
, NextRegNum
, NextRegWidth
;
2231 if (!ParseAMDGPURegister(NextRegKind
, NextReg
, NextRegNum
, NextRegWidth
))
2232 return AMDGPU::NoRegister
;
2233 if (NextRegWidth
!= 1)
2234 return AMDGPU::NoRegister
;
2235 if (NextRegKind
!= RegKind
)
2236 return AMDGPU::NoRegister
;
2237 if (!AddNextRegisterToList(Reg
, RegWidth
, RegKind
, NextReg
))
2238 return AMDGPU::NoRegister
;
2241 if (!trySkipToken(AsmToken::RBrac
))
2242 return AMDGPU::NoRegister
;
2244 if (isRegularReg(RegKind
))
2245 Reg
= getRegularReg(RegKind
, RegNum
, RegWidth
);
2250 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind
&RegKind
,
2253 unsigned &RegWidth
) {
2254 Reg
= AMDGPU::NoRegister
;
2256 if (isToken(AsmToken::Identifier
)) {
2257 Reg
= ParseSpecialReg(RegKind
, RegNum
, RegWidth
);
2258 if (Reg
== AMDGPU::NoRegister
)
2259 Reg
= ParseRegularReg(RegKind
, RegNum
, RegWidth
);
2261 Reg
= ParseRegList(RegKind
, RegNum
, RegWidth
);
2264 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2265 return Reg
!= AMDGPU::NoRegister
&& subtargetHasRegister(*TRI
, Reg
);
2269 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind
) {
2272 return StringRef(".amdgcn.next_free_vgpr");
2274 return StringRef(".amdgcn.next_free_sgpr");
2280 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind
) {
2281 auto SymbolName
= getGprCountSymbolName(RegKind
);
2282 assert(SymbolName
&& "initializing invalid register kind");
2283 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2284 Sym
->setVariableValue(MCConstantExpr::create(0, getContext()));
2287 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind
,
2288 unsigned DwordRegIndex
,
2289 unsigned RegWidth
) {
2290 // Symbols are only defined for GCN targets
2291 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major
< 6)
2294 auto SymbolName
= getGprCountSymbolName(RegKind
);
2297 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2299 int64_t NewMax
= DwordRegIndex
+ RegWidth
- 1;
2302 if (!Sym
->isVariable())
2303 return !Error(getParser().getTok().getLoc(),
2304 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2305 if (!Sym
->getVariableValue(false)->evaluateAsAbsolute(OldCount
))
2307 getParser().getTok().getLoc(),
2308 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2310 if (OldCount
<= NewMax
)
2311 Sym
->setVariableValue(MCConstantExpr::create(NewMax
+ 1, getContext()));
2316 std::unique_ptr
<AMDGPUOperand
> AMDGPUAsmParser::parseRegister() {
2317 const auto &Tok
= Parser
.getTok();
2318 SMLoc StartLoc
= Tok
.getLoc();
2319 SMLoc EndLoc
= Tok
.getEndLoc();
2320 RegisterKind RegKind
;
2321 unsigned Reg
, RegNum
, RegWidth
;
2323 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
)) {
2324 //FIXME: improve error messages (bug 41303).
2325 Error(StartLoc
, "not a valid operand.");
2328 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2329 if (!updateGprCountSymbols(RegKind
, RegNum
, RegWidth
))
2332 KernelScope
.usesRegister(RegKind
, RegNum
, RegWidth
);
2333 return AMDGPUOperand::CreateReg(this, Reg
, StartLoc
, EndLoc
);
2336 OperandMatchResultTy
2337 AMDGPUAsmParser::parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
) {
2338 // TODO: add syntactic sugar for 1/(2*PI)
2340 assert(!isRegister());
2341 assert(!isModifier());
2343 const auto& Tok
= getToken();
2344 const auto& NextTok
= peekToken();
2345 bool IsReal
= Tok
.is(AsmToken::Real
);
2347 bool Negate
= false;
2349 if (!IsReal
&& Tok
.is(AsmToken::Minus
) && NextTok
.is(AsmToken::Real
)) {
2356 // Floating-point expressions are not supported.
2357 // Can only allow floating-point literals with an
2360 StringRef Num
= getTokenStr();
2363 APFloat
RealVal(APFloat::IEEEdouble());
2364 auto roundMode
= APFloat::rmNearestTiesToEven
;
2365 if (RealVal
.convertFromString(Num
, roundMode
) == APFloat::opInvalidOp
) {
2366 return MatchOperand_ParseFail
;
2369 RealVal
.changeSign();
2372 AMDGPUOperand::CreateImm(this, RealVal
.bitcastToAPInt().getZExtValue(), S
,
2373 AMDGPUOperand::ImmTyNone
, true));
2375 return MatchOperand_Success
;
2382 if (HasSP3AbsModifier
) {
2383 // This is a workaround for handling expressions
2384 // as arguments of SP3 'abs' modifier, for example:
2388 // This syntax is not compatible with syntax of standard
2389 // MC expressions (due to the trailing '|').
2391 if (getParser().parsePrimaryExpr(Expr
, EndLoc
))
2392 return MatchOperand_ParseFail
;
2394 if (Parser
.parseExpression(Expr
))
2395 return MatchOperand_ParseFail
;
2398 if (Expr
->evaluateAsAbsolute(IntVal
)) {
2399 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
2401 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
2404 return MatchOperand_Success
;
2407 return MatchOperand_NoMatch
;
2410 OperandMatchResultTy
2411 AMDGPUAsmParser::parseReg(OperandVector
&Operands
) {
2413 return MatchOperand_NoMatch
;
2415 if (auto R
= parseRegister()) {
2417 Operands
.push_back(std::move(R
));
2418 return MatchOperand_Success
;
2420 return MatchOperand_ParseFail
;
2423 OperandMatchResultTy
2424 AMDGPUAsmParser::parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
) {
2425 auto res
= parseReg(Operands
);
2426 if (res
!= MatchOperand_NoMatch
) {
2428 } else if (isModifier()) {
2429 return MatchOperand_NoMatch
;
2431 return parseImm(Operands
, HasSP3AbsMod
);
2436 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2437 if (Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::LParen
)) {
2438 const auto &str
= Token
.getString();
2439 return str
== "abs" || str
== "neg" || str
== "sext";
2445 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2446 return Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::Colon
);
2450 AMDGPUAsmParser::isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2451 return isNamedOperandModifier(Token
, NextToken
) || Token
.is(AsmToken::Pipe
);
2455 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2456 return isRegister(Token
, NextToken
) || isOperandModifier(Token
, NextToken
);
2459 // Check if this is an operand modifier or an opcode modifier
2460 // which may look like an expression but it is not. We should
2461 // avoid parsing these modifiers as expressions. Currently
2462 // recognized sequences are:
2471 // Note that simple opcode modifiers like 'gds' may be parsed as
2472 // expressions; this is a special case. See getExpressionAsToken.
2475 AMDGPUAsmParser::isModifier() {
2477 AsmToken Tok
= getToken();
2478 AsmToken NextToken
[2];
2479 peekTokens(NextToken
);
2481 return isOperandModifier(Tok
, NextToken
[0]) ||
2482 (Tok
.is(AsmToken::Minus
) && isRegOrOperandModifier(NextToken
[0], NextToken
[1])) ||
2483 isOpcodeModifierWithVal(Tok
, NextToken
[0]);
2486 // Check if the current token is an SP3 'neg' modifier.
2487 // Currently this modifier is allowed in the following context:
2489 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2490 // 2. Before an 'abs' modifier: -abs(...)
2491 // 3. Before an SP3 'abs' modifier: -|...|
2493 // In all other cases "-" is handled as a part
2494 // of an expression that follows the sign.
2496 // Note: When "-" is followed by an integer literal,
2497 // this is interpreted as integer negation rather
2498 // than a floating-point NEG modifier applied to N.
2499 // Beside being contr-intuitive, such use of floating-point
2500 // NEG modifier would have resulted in different meaning
2501 // of integer literals used with VOP1/2/C and VOP3,
2503 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2504 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2505 // Negative fp literals with preceding "-" are
2506 // handled likewise for unifomtity
2509 AMDGPUAsmParser::parseSP3NegModifier() {
2511 AsmToken NextToken
[2];
2512 peekTokens(NextToken
);
2514 if (isToken(AsmToken::Minus
) &&
2515 (isRegister(NextToken
[0], NextToken
[1]) ||
2516 NextToken
[0].is(AsmToken::Pipe
) ||
2517 isId(NextToken
[0], "abs"))) {
2525 OperandMatchResultTy
2526 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector
&Operands
,
2532 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2533 if (isToken(AsmToken::Minus
) && peekToken().is(AsmToken::Minus
)) {
2534 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2535 return MatchOperand_ParseFail
;
2538 SP3Neg
= parseSP3NegModifier();
2541 Neg
= trySkipId("neg");
2542 if (Neg
&& SP3Neg
) {
2543 Error(Loc
, "expected register or immediate");
2544 return MatchOperand_ParseFail
;
2546 if (Neg
&& !skipToken(AsmToken::LParen
, "expected left paren after neg"))
2547 return MatchOperand_ParseFail
;
2549 Abs
= trySkipId("abs");
2550 if (Abs
&& !skipToken(AsmToken::LParen
, "expected left paren after abs"))
2551 return MatchOperand_ParseFail
;
2554 SP3Abs
= trySkipToken(AsmToken::Pipe
);
2555 if (Abs
&& SP3Abs
) {
2556 Error(Loc
, "expected register or immediate");
2557 return MatchOperand_ParseFail
;
2560 OperandMatchResultTy Res
;
2562 Res
= parseRegOrImm(Operands
, SP3Abs
);
2564 Res
= parseReg(Operands
);
2566 if (Res
!= MatchOperand_Success
) {
2567 return (SP3Neg
|| Neg
|| SP3Abs
|| Abs
)? MatchOperand_ParseFail
: Res
;
2570 if (SP3Abs
&& !skipToken(AsmToken::Pipe
, "expected vertical bar"))
2571 return MatchOperand_ParseFail
;
2572 if (Abs
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2573 return MatchOperand_ParseFail
;
2574 if (Neg
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2575 return MatchOperand_ParseFail
;
2577 AMDGPUOperand::Modifiers Mods
;
2578 Mods
.Abs
= Abs
|| SP3Abs
;
2579 Mods
.Neg
= Neg
|| SP3Neg
;
2581 if (Mods
.hasFPModifiers()) {
2582 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
2584 Error(Op
.getStartLoc(), "expected an absolute expression");
2585 return MatchOperand_ParseFail
;
2587 Op
.setModifiers(Mods
);
2589 return MatchOperand_Success
;
2592 OperandMatchResultTy
2593 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector
&Operands
,
2595 bool Sext
= trySkipId("sext");
2596 if (Sext
&& !skipToken(AsmToken::LParen
, "expected left paren after sext"))
2597 return MatchOperand_ParseFail
;
2599 OperandMatchResultTy Res
;
2601 Res
= parseRegOrImm(Operands
);
2603 Res
= parseReg(Operands
);
2605 if (Res
!= MatchOperand_Success
) {
2606 return Sext
? MatchOperand_ParseFail
: Res
;
2609 if (Sext
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2610 return MatchOperand_ParseFail
;
2612 AMDGPUOperand::Modifiers Mods
;
2615 if (Mods
.hasIntModifiers()) {
2616 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
2618 Error(Op
.getStartLoc(), "expected an absolute expression");
2619 return MatchOperand_ParseFail
;
2621 Op
.setModifiers(Mods
);
2624 return MatchOperand_Success
;
2627 OperandMatchResultTy
2628 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector
&Operands
) {
2629 return parseRegOrImmWithFPInputMods(Operands
, false);
2632 OperandMatchResultTy
2633 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector
&Operands
) {
2634 return parseRegOrImmWithIntInputMods(Operands
, false);
2637 OperandMatchResultTy
AMDGPUAsmParser::parseVReg32OrOff(OperandVector
&Operands
) {
2638 auto Loc
= getLoc();
2639 if (trySkipId("off")) {
2640 Operands
.push_back(AMDGPUOperand::CreateImm(this, 0, Loc
,
2641 AMDGPUOperand::ImmTyOff
, false));
2642 return MatchOperand_Success
;
2646 return MatchOperand_NoMatch
;
2648 std::unique_ptr
<AMDGPUOperand
> Reg
= parseRegister();
2650 Operands
.push_back(std::move(Reg
));
2651 return MatchOperand_Success
;
2654 return MatchOperand_ParseFail
;
2658 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
2659 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
2661 if ((getForcedEncodingSize() == 32 && (TSFlags
& SIInstrFlags::VOP3
)) ||
2662 (getForcedEncodingSize() == 64 && !(TSFlags
& SIInstrFlags::VOP3
)) ||
2663 (isForcedDPP() && !(TSFlags
& SIInstrFlags::DPP
)) ||
2664 (isForcedSDWA() && !(TSFlags
& SIInstrFlags::SDWA
)) )
2665 return Match_InvalidOperand
;
2667 if ((TSFlags
& SIInstrFlags::VOP3
) &&
2668 (TSFlags
& SIInstrFlags::VOPAsmPrefer32Bit
) &&
2669 getForcedEncodingSize() != 64)
2670 return Match_PreferE32
;
2672 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
2673 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
2674 // v_mac_f32/16 allow only dst_sel == DWORD;
2676 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::dst_sel
);
2677 const auto &Op
= Inst
.getOperand(OpNum
);
2678 if (!Op
.isImm() || Op
.getImm() != AMDGPU::SDWA::SdwaSel::DWORD
) {
2679 return Match_InvalidOperand
;
2683 return Match_Success
;
2686 // What asm variants we should check
2687 ArrayRef
<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2688 if (getForcedEncodingSize() == 32) {
2689 static const unsigned Variants
[] = {AMDGPUAsmVariants::DEFAULT
};
2690 return makeArrayRef(Variants
);
2693 if (isForcedVOP3()) {
2694 static const unsigned Variants
[] = {AMDGPUAsmVariants::VOP3
};
2695 return makeArrayRef(Variants
);
2698 if (isForcedSDWA()) {
2699 static const unsigned Variants
[] = {AMDGPUAsmVariants::SDWA
,
2700 AMDGPUAsmVariants::SDWA9
};
2701 return makeArrayRef(Variants
);
2704 if (isForcedDPP()) {
2705 static const unsigned Variants
[] = {AMDGPUAsmVariants::DPP
};
2706 return makeArrayRef(Variants
);
2709 static const unsigned Variants
[] = {
2710 AMDGPUAsmVariants::DEFAULT
, AMDGPUAsmVariants::VOP3
,
2711 AMDGPUAsmVariants::SDWA
, AMDGPUAsmVariants::SDWA9
, AMDGPUAsmVariants::DPP
2714 return makeArrayRef(Variants
);
2717 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst
&Inst
) const {
2718 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
2719 const unsigned Num
= Desc
.getNumImplicitUses();
2720 for (unsigned i
= 0; i
< Num
; ++i
) {
2721 unsigned Reg
= Desc
.ImplicitUses
[i
];
2723 case AMDGPU::FLAT_SCR
:
2725 case AMDGPU::VCC_LO
:
2726 case AMDGPU::VCC_HI
:
2733 return AMDGPU::NoRegister
;
2736 // NB: This code is correct only when used to check constant
2737 // bus limitations because GFX7 support no f16 inline constants.
2738 // Note that there are no cases when a GFX7 opcode violates
2739 // constant bus limitations due to the use of an f16 constant.
2740 bool AMDGPUAsmParser::isInlineConstant(const MCInst
&Inst
,
2741 unsigned OpIdx
) const {
2742 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
2744 if (!AMDGPU::isSISrcOperand(Desc
, OpIdx
)) {
2748 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
2750 int64_t Val
= MO
.getImm();
2751 auto OpSize
= AMDGPU::getOperandSize(Desc
, OpIdx
);
2753 switch (OpSize
) { // expected operand size
2755 return AMDGPU::isInlinableLiteral64(Val
, hasInv2PiInlineImm());
2757 return AMDGPU::isInlinableLiteral32(Val
, hasInv2PiInlineImm());
2759 const unsigned OperandType
= Desc
.OpInfo
[OpIdx
].OperandType
;
2760 if (OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2INT16
||
2761 OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2FP16
||
2762 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
||
2763 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
||
2764 OperandType
== AMDGPU::OPERAND_REG_IMM_V2INT16
||
2765 OperandType
== AMDGPU::OPERAND_REG_IMM_V2FP16
) {
2766 return AMDGPU::isInlinableLiteralV216(Val
, hasInv2PiInlineImm());
2768 return AMDGPU::isInlinableLiteral16(Val
, hasInv2PiInlineImm());
2772 llvm_unreachable("invalid operand size");
2776 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode
) const {
2781 // 64-bit shift instructions can use only one scalar value input
2782 case AMDGPU::V_LSHLREV_B64
:
2783 case AMDGPU::V_LSHLREV_B64_gfx10
:
2784 case AMDGPU::V_LSHL_B64
:
2785 case AMDGPU::V_LSHRREV_B64
:
2786 case AMDGPU::V_LSHRREV_B64_gfx10
:
2787 case AMDGPU::V_LSHR_B64
:
2788 case AMDGPU::V_ASHRREV_I64
:
2789 case AMDGPU::V_ASHRREV_I64_gfx10
:
2790 case AMDGPU::V_ASHR_I64
:
2797 bool AMDGPUAsmParser::usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
) {
2798 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
2800 return !isInlineConstant(Inst
, OpIdx
);
2801 } else if (MO
.isReg()) {
2802 auto Reg
= MO
.getReg();
2803 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2804 return isSGPR(mc2PseudoReg(Reg
), TRI
) && Reg
!= SGPR_NULL
;
2810 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst
&Inst
) {
2811 const unsigned Opcode
= Inst
.getOpcode();
2812 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
2813 unsigned ConstantBusUseCount
= 0;
2814 unsigned NumLiterals
= 0;
2815 unsigned LiteralSize
;
2818 (SIInstrFlags::VOPC
|
2819 SIInstrFlags::VOP1
| SIInstrFlags::VOP2
|
2820 SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
|
2821 SIInstrFlags::SDWA
)) {
2822 // Check special imm operands (used by madmk, etc)
2823 if (AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::imm
) != -1) {
2824 ++ConstantBusUseCount
;
2827 SmallDenseSet
<unsigned> SGPRsUsed
;
2828 unsigned SGPRUsed
= findImplicitSGPRReadInVOP(Inst
);
2829 if (SGPRUsed
!= AMDGPU::NoRegister
) {
2830 SGPRsUsed
.insert(SGPRUsed
);
2831 ++ConstantBusUseCount
;
2834 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
2835 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
2836 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
2838 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
2840 for (int OpIdx
: OpIndices
) {
2841 if (OpIdx
== -1) break;
2843 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
2844 if (usesConstantBus(Inst
, OpIdx
)) {
2846 const unsigned Reg
= mc2PseudoReg(MO
.getReg());
2847 // Pairs of registers with a partial intersections like these
2849 // flat_scratch_lo, flat_scratch
2850 // flat_scratch_lo, flat_scratch_hi
2851 // are theoretically valid but they are disabled anyway.
2852 // Note that this code mimics SIInstrInfo::verifyInstruction
2853 if (!SGPRsUsed
.count(Reg
)) {
2854 SGPRsUsed
.insert(Reg
);
2855 ++ConstantBusUseCount
;
2857 } else { // Expression or a literal
2859 if (Desc
.OpInfo
[OpIdx
].OperandType
== MCOI::OPERAND_IMMEDIATE
)
2860 continue; // special operand like VINTERP attr_chan
2862 // An instruction may use only one literal.
2863 // This has been validated on the previous step.
2864 // See validateVOP3Literal.
2865 // This literal may be used as more than one operand.
2866 // If all these operands are of the same size,
2867 // this literal counts as one scalar value.
2868 // Otherwise it counts as 2 scalar values.
2869 // See "GFX10 Shader Programming", section 3.6.2.3.
2871 unsigned Size
= AMDGPU::getOperandSize(Desc
, OpIdx
);
2872 if (Size
< 4) Size
= 4;
2874 if (NumLiterals
== 0) {
2877 } else if (LiteralSize
!= Size
) {
2884 ConstantBusUseCount
+= NumLiterals
;
2886 return ConstantBusUseCount
<= getConstantBusLimit(Opcode
);
2889 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst
&Inst
) {
2890 const unsigned Opcode
= Inst
.getOpcode();
2891 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
2893 const int DstIdx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::vdst
);
2895 Desc
.getOperandConstraint(DstIdx
, MCOI::EARLY_CLOBBER
) == -1) {
2899 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2901 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
2902 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
2903 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
2905 assert(DstIdx
!= -1);
2906 const MCOperand
&Dst
= Inst
.getOperand(DstIdx
);
2907 assert(Dst
.isReg());
2908 const unsigned DstReg
= mc2PseudoReg(Dst
.getReg());
2910 const int SrcIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
2912 for (int SrcIdx
: SrcIndices
) {
2913 if (SrcIdx
== -1) break;
2914 const MCOperand
&Src
= Inst
.getOperand(SrcIdx
);
2916 const unsigned SrcReg
= mc2PseudoReg(Src
.getReg());
2917 if (isRegIntersect(DstReg
, SrcReg
, TRI
)) {
2926 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst
&Inst
) {
2928 const unsigned Opc
= Inst
.getOpcode();
2929 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2931 if ((Desc
.TSFlags
& SIInstrFlags::IntClamp
) != 0 && !hasIntClamp()) {
2932 int ClampIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
);
2933 assert(ClampIdx
!= -1);
2934 return Inst
.getOperand(ClampIdx
).getImm() == 0;
2940 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst
&Inst
) {
2942 const unsigned Opc
= Inst
.getOpcode();
2943 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2945 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
2948 int VDataIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdata
);
2949 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
2950 int TFEIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::tfe
);
2952 assert(VDataIdx
!= -1);
2953 assert(DMaskIdx
!= -1);
2954 assert(TFEIdx
!= -1);
2956 unsigned VDataSize
= AMDGPU::getRegOperandSize(getMRI(), Desc
, VDataIdx
);
2957 unsigned TFESize
= Inst
.getOperand(TFEIdx
).getImm()? 1 : 0;
2958 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
2963 (Desc
.TSFlags
& SIInstrFlags::Gather4
) ? 4 : countPopulation(DMask
);
2964 if (hasPackedD16()) {
2965 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
2966 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm())
2967 DataSize
= (DataSize
+ 1) / 2;
2970 return (VDataSize
/ 4) == DataSize
+ TFESize
;
2973 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst
&Inst
) {
2974 const unsigned Opc
= Inst
.getOpcode();
2975 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2977 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0 || !isGFX10())
2980 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(Opc
);
2981 const AMDGPU::MIMGBaseOpcodeInfo
*BaseOpcode
=
2982 AMDGPU::getMIMGBaseOpcodeInfo(Info
->BaseOpcode
);
2983 int VAddr0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vaddr0
);
2984 int SrsrcIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::srsrc
);
2985 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
2987 assert(VAddr0Idx
!= -1);
2988 assert(SrsrcIdx
!= -1);
2989 assert(DimIdx
!= -1);
2990 assert(SrsrcIdx
> VAddr0Idx
);
2992 unsigned Dim
= Inst
.getOperand(DimIdx
).getImm();
2993 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByEncoding(Dim
);
2994 bool IsNSA
= SrsrcIdx
- VAddr0Idx
> 1;
2995 unsigned VAddrSize
=
2996 IsNSA
? SrsrcIdx
- VAddr0Idx
2997 : AMDGPU::getRegOperandSize(getMRI(), Desc
, VAddr0Idx
) / 4;
2999 unsigned AddrSize
= BaseOpcode
->NumExtraArgs
+
3000 (BaseOpcode
->Gradients
? DimInfo
->NumGradients
: 0) +
3001 (BaseOpcode
->Coordinates
? DimInfo
->NumCoords
: 0) +
3002 (BaseOpcode
->LodOrClampOrMip
? 1 : 0);
3006 else if (AddrSize
> 4)
3010 return VAddrSize
== AddrSize
;
3013 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst
&Inst
) {
3015 const unsigned Opc
= Inst
.getOpcode();
3016 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3018 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3020 if (!Desc
.mayLoad() || !Desc
.mayStore())
3021 return true; // Not atomic
3023 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3024 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3026 // This is an incomplete check because image_atomic_cmpswap
3027 // may only use 0x3 and 0xf while other atomic operations
3028 // may use 0x1 and 0x3. However these limitations are
3029 // verified when we check that dmask matches dst size.
3030 return DMask
== 0x1 || DMask
== 0x3 || DMask
== 0xf;
3033 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst
&Inst
) {
3035 const unsigned Opc
= Inst
.getOpcode();
3036 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3038 if ((Desc
.TSFlags
& SIInstrFlags::Gather4
) == 0)
3041 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3042 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3044 // GATHER4 instructions use dmask in a different fashion compared to
3045 // other MIMG instructions. The only useful DMASK values are
3046 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3047 // (red,red,red,red) etc.) The ISA document doesn't mention
3049 return DMask
== 0x1 || DMask
== 0x2 || DMask
== 0x4 || DMask
== 0x8;
3052 bool AMDGPUAsmParser::validateMIMGD16(const MCInst
&Inst
) {
3054 const unsigned Opc
= Inst
.getOpcode();
3055 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3057 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3060 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
3061 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm()) {
3062 if (isCI() || isSI())
3069 bool AMDGPUAsmParser::validateMIMGDim(const MCInst
&Inst
) {
3070 const unsigned Opc
= Inst
.getOpcode();
3071 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3073 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3076 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3080 long Imm
= Inst
.getOperand(DimIdx
).getImm();
3081 if (Imm
< 0 || Imm
>= 8)
3087 static bool IsRevOpcode(const unsigned Opcode
)
3090 case AMDGPU::V_SUBREV_F32_e32
:
3091 case AMDGPU::V_SUBREV_F32_e64
:
3092 case AMDGPU::V_SUBREV_F32_e32_gfx10
:
3093 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7
:
3094 case AMDGPU::V_SUBREV_F32_e32_vi
:
3095 case AMDGPU::V_SUBREV_F32_e64_gfx10
:
3096 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7
:
3097 case AMDGPU::V_SUBREV_F32_e64_vi
:
3099 case AMDGPU::V_SUBREV_I32_e32
:
3100 case AMDGPU::V_SUBREV_I32_e64
:
3101 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7
:
3102 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7
:
3104 case AMDGPU::V_SUBBREV_U32_e32
:
3105 case AMDGPU::V_SUBBREV_U32_e64
:
3106 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7
:
3107 case AMDGPU::V_SUBBREV_U32_e32_vi
:
3108 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7
:
3109 case AMDGPU::V_SUBBREV_U32_e64_vi
:
3111 case AMDGPU::V_SUBREV_U32_e32
:
3112 case AMDGPU::V_SUBREV_U32_e64
:
3113 case AMDGPU::V_SUBREV_U32_e32_gfx9
:
3114 case AMDGPU::V_SUBREV_U32_e32_vi
:
3115 case AMDGPU::V_SUBREV_U32_e64_gfx9
:
3116 case AMDGPU::V_SUBREV_U32_e64_vi
:
3118 case AMDGPU::V_SUBREV_F16_e32
:
3119 case AMDGPU::V_SUBREV_F16_e64
:
3120 case AMDGPU::V_SUBREV_F16_e32_gfx10
:
3121 case AMDGPU::V_SUBREV_F16_e32_vi
:
3122 case AMDGPU::V_SUBREV_F16_e64_gfx10
:
3123 case AMDGPU::V_SUBREV_F16_e64_vi
:
3125 case AMDGPU::V_SUBREV_U16_e32
:
3126 case AMDGPU::V_SUBREV_U16_e64
:
3127 case AMDGPU::V_SUBREV_U16_e32_vi
:
3128 case AMDGPU::V_SUBREV_U16_e64_vi
:
3130 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9
:
3131 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10
:
3132 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9
:
3134 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9
:
3135 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9
:
3137 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10
:
3138 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10
:
3140 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10
:
3141 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10
:
3143 case AMDGPU::V_LSHRREV_B32_e32
:
3144 case AMDGPU::V_LSHRREV_B32_e64
:
3145 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7
:
3146 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7
:
3147 case AMDGPU::V_LSHRREV_B32_e32_vi
:
3148 case AMDGPU::V_LSHRREV_B32_e64_vi
:
3149 case AMDGPU::V_LSHRREV_B32_e32_gfx10
:
3150 case AMDGPU::V_LSHRREV_B32_e64_gfx10
:
3152 case AMDGPU::V_ASHRREV_I32_e32
:
3153 case AMDGPU::V_ASHRREV_I32_e64
:
3154 case AMDGPU::V_ASHRREV_I32_e32_gfx10
:
3155 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7
:
3156 case AMDGPU::V_ASHRREV_I32_e32_vi
:
3157 case AMDGPU::V_ASHRREV_I32_e64_gfx10
:
3158 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7
:
3159 case AMDGPU::V_ASHRREV_I32_e64_vi
:
3161 case AMDGPU::V_LSHLREV_B32_e32
:
3162 case AMDGPU::V_LSHLREV_B32_e64
:
3163 case AMDGPU::V_LSHLREV_B32_e32_gfx10
:
3164 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7
:
3165 case AMDGPU::V_LSHLREV_B32_e32_vi
:
3166 case AMDGPU::V_LSHLREV_B32_e64_gfx10
:
3167 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7
:
3168 case AMDGPU::V_LSHLREV_B32_e64_vi
:
3170 case AMDGPU::V_LSHLREV_B16_e32
:
3171 case AMDGPU::V_LSHLREV_B16_e64
:
3172 case AMDGPU::V_LSHLREV_B16_e32_vi
:
3173 case AMDGPU::V_LSHLREV_B16_e64_vi
:
3174 case AMDGPU::V_LSHLREV_B16_gfx10
:
3176 case AMDGPU::V_LSHRREV_B16_e32
:
3177 case AMDGPU::V_LSHRREV_B16_e64
:
3178 case AMDGPU::V_LSHRREV_B16_e32_vi
:
3179 case AMDGPU::V_LSHRREV_B16_e64_vi
:
3180 case AMDGPU::V_LSHRREV_B16_gfx10
:
3182 case AMDGPU::V_ASHRREV_I16_e32
:
3183 case AMDGPU::V_ASHRREV_I16_e64
:
3184 case AMDGPU::V_ASHRREV_I16_e32_vi
:
3185 case AMDGPU::V_ASHRREV_I16_e64_vi
:
3186 case AMDGPU::V_ASHRREV_I16_gfx10
:
3188 case AMDGPU::V_LSHLREV_B64
:
3189 case AMDGPU::V_LSHLREV_B64_gfx10
:
3190 case AMDGPU::V_LSHLREV_B64_vi
:
3192 case AMDGPU::V_LSHRREV_B64
:
3193 case AMDGPU::V_LSHRREV_B64_gfx10
:
3194 case AMDGPU::V_LSHRREV_B64_vi
:
3196 case AMDGPU::V_ASHRREV_I64
:
3197 case AMDGPU::V_ASHRREV_I64_gfx10
:
3198 case AMDGPU::V_ASHRREV_I64_vi
:
3200 case AMDGPU::V_PK_LSHLREV_B16
:
3201 case AMDGPU::V_PK_LSHLREV_B16_gfx10
:
3202 case AMDGPU::V_PK_LSHLREV_B16_vi
:
3204 case AMDGPU::V_PK_LSHRREV_B16
:
3205 case AMDGPU::V_PK_LSHRREV_B16_gfx10
:
3206 case AMDGPU::V_PK_LSHRREV_B16_vi
:
3207 case AMDGPU::V_PK_ASHRREV_I16
:
3208 case AMDGPU::V_PK_ASHRREV_I16_gfx10
:
3209 case AMDGPU::V_PK_ASHRREV_I16_vi
:
3216 bool AMDGPUAsmParser::validateLdsDirect(const MCInst
&Inst
) {
3218 using namespace SIInstrFlags
;
3219 const unsigned Opcode
= Inst
.getOpcode();
3220 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3222 // lds_direct register is defined so that it can be used
3223 // with 9-bit operands only. Ignore encodings which do not accept these.
3224 if ((Desc
.TSFlags
& (VOP1
| VOP2
| VOP3
| VOPC
| VOP3P
| SIInstrFlags::SDWA
)) == 0)
3227 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3228 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3229 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3231 const int SrcIndices
[] = { Src1Idx
, Src2Idx
};
3233 // lds_direct cannot be specified as either src1 or src2.
3234 for (int SrcIdx
: SrcIndices
) {
3235 if (SrcIdx
== -1) break;
3236 const MCOperand
&Src
= Inst
.getOperand(SrcIdx
);
3237 if (Src
.isReg() && Src
.getReg() == LDS_DIRECT
) {
3245 const MCOperand
&Src
= Inst
.getOperand(Src0Idx
);
3246 if (!Src
.isReg() || Src
.getReg() != LDS_DIRECT
)
3249 // lds_direct is specified as src0. Check additional limitations.
3250 return (Desc
.TSFlags
& SIInstrFlags::SDWA
) == 0 && !IsRevOpcode(Opcode
);
3253 SMLoc
AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector
&Operands
) const {
3254 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
3255 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
3256 if (Op
.isFlatOffset())
3257 return Op
.getStartLoc();
3262 bool AMDGPUAsmParser::validateFlatOffset(const MCInst
&Inst
,
3263 const OperandVector
&Operands
) {
3264 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
3265 if ((TSFlags
& SIInstrFlags::FLAT
) == 0)
3268 auto Opcode
= Inst
.getOpcode();
3269 auto OpNum
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::offset
);
3270 assert(OpNum
!= -1);
3272 const auto &Op
= Inst
.getOperand(OpNum
);
3273 if (!hasFlatOffsets() && Op
.getImm() != 0) {
3274 Error(getFlatOffsetLoc(Operands
),
3275 "flat offset modifier is not supported on this GPU");
3279 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3280 // For FLAT segment the offset must be positive;
3281 // MSB is ignored and forced to zero.
3282 unsigned OffsetSize
= isGFX9() ? 13 : 12;
3283 if (TSFlags
& SIInstrFlags::IsNonFlatSeg
) {
3284 if (!isIntN(OffsetSize
, Op
.getImm())) {
3285 Error(getFlatOffsetLoc(Operands
),
3286 isGFX9() ? "expected a 13-bit signed offset" :
3287 "expected a 12-bit signed offset");
3291 if (!isUIntN(OffsetSize
- 1, Op
.getImm())) {
3292 Error(getFlatOffsetLoc(Operands
),
3293 isGFX9() ? "expected a 12-bit unsigned offset" :
3294 "expected an 11-bit unsigned offset");
3302 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst
&Inst
) const {
3303 unsigned Opcode
= Inst
.getOpcode();
3304 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3305 if (!(Desc
.TSFlags
& (SIInstrFlags::SOP2
| SIInstrFlags::SOPC
)))
3308 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3309 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3311 const int OpIndices
[] = { Src0Idx
, Src1Idx
};
3313 unsigned NumExprs
= 0;
3314 unsigned NumLiterals
= 0;
3315 uint32_t LiteralValue
;
3317 for (int OpIdx
: OpIndices
) {
3318 if (OpIdx
== -1) break;
3320 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3321 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3322 if (AMDGPU::isSISrcOperand(Desc
, OpIdx
)) {
3323 if (MO
.isImm() && !isInlineConstant(Inst
, OpIdx
)) {
3324 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
3325 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
3326 LiteralValue
= Value
;
3329 } else if (MO
.isExpr()) {
3335 return NumLiterals
+ NumExprs
<= 1;
3338 bool AMDGPUAsmParser::validateOpSel(const MCInst
&Inst
) {
3339 const unsigned Opc
= Inst
.getOpcode();
3340 if (Opc
== AMDGPU::V_PERMLANE16_B32_gfx10
||
3341 Opc
== AMDGPU::V_PERMLANEX16_B32_gfx10
) {
3342 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
3343 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
3351 // Check if VCC register matches wavefront size
3352 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg
) const {
3353 auto FB
= getFeatureBits();
3354 return (FB
[AMDGPU::FeatureWavefrontSize64
] && Reg
== AMDGPU::VCC
) ||
3355 (FB
[AMDGPU::FeatureWavefrontSize32
] && Reg
== AMDGPU::VCC_LO
);
3358 // VOP3 literal is only allowed in GFX10+ and only one can be used
3359 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst
&Inst
) const {
3360 unsigned Opcode
= Inst
.getOpcode();
3361 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3362 if (!(Desc
.TSFlags
& (SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
)))
3365 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3366 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3367 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3369 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
3371 unsigned NumExprs
= 0;
3372 unsigned NumLiterals
= 0;
3373 uint32_t LiteralValue
;
3375 for (int OpIdx
: OpIndices
) {
3376 if (OpIdx
== -1) break;
3378 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3379 if (!MO
.isImm() && !MO
.isExpr())
3381 if (!AMDGPU::isSISrcOperand(Desc
, OpIdx
))
3384 if (OpIdx
== Src2Idx
&& (Desc
.TSFlags
& SIInstrFlags::IsMAI
) &&
3385 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug
])
3388 if (MO
.isImm() && !isInlineConstant(Inst
, OpIdx
)) {
3389 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
3390 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
3391 LiteralValue
= Value
;
3394 } else if (MO
.isExpr()) {
3398 NumLiterals
+= NumExprs
;
3400 return !NumLiterals
||
3401 (NumLiterals
== 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal
]);
3404 bool AMDGPUAsmParser::validateInstruction(const MCInst
&Inst
,
3406 const OperandVector
&Operands
) {
3407 if (!validateLdsDirect(Inst
)) {
3409 "invalid use of lds_direct");
3412 if (!validateSOPLiteral(Inst
)) {
3414 "only one literal operand is allowed");
3417 if (!validateVOP3Literal(Inst
)) {
3419 "invalid literal operand");
3422 if (!validateConstantBusLimitations(Inst
)) {
3424 "invalid operand (violates constant bus restrictions)");
3427 if (!validateEarlyClobberLimitations(Inst
)) {
3429 "destination must be different than all sources");
3432 if (!validateIntClampSupported(Inst
)) {
3434 "integer clamping is not supported on this GPU");
3437 if (!validateOpSel(Inst
)) {
3439 "invalid op_sel operand");
3442 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3443 if (!validateMIMGD16(Inst
)) {
3445 "d16 modifier is not supported on this GPU");
3448 if (!validateMIMGDim(Inst
)) {
3449 Error(IDLoc
, "dim modifier is required on this GPU");
3452 if (!validateMIMGDataSize(Inst
)) {
3454 "image data size does not match dmask and tfe");
3457 if (!validateMIMGAddrSize(Inst
)) {
3459 "image address size does not match dim and a16");
3462 if (!validateMIMGAtomicDMask(Inst
)) {
3464 "invalid atomic image dmask");
3467 if (!validateMIMGGatherDMask(Inst
)) {
3469 "invalid image_gather dmask: only one bit must be set");
3472 if (!validateFlatOffset(Inst
, Operands
)) {
3479 static std::string
AMDGPUMnemonicSpellCheck(StringRef S
,
3480 const FeatureBitset
&FBS
,
3481 unsigned VariantID
= 0);
3483 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
3484 OperandVector
&Operands
,
3486 uint64_t &ErrorInfo
,
3487 bool MatchingInlineAsm
) {
3489 unsigned Result
= Match_Success
;
3490 for (auto Variant
: getMatchedVariants()) {
3492 auto R
= MatchInstructionImpl(Operands
, Inst
, EI
, MatchingInlineAsm
,
3494 // We order match statuses from least to most specific. We use most specific
3495 // status as resulting
3496 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3497 if ((R
== Match_Success
) ||
3498 (R
== Match_PreferE32
) ||
3499 (R
== Match_MissingFeature
&& Result
!= Match_PreferE32
) ||
3500 (R
== Match_InvalidOperand
&& Result
!= Match_MissingFeature
3501 && Result
!= Match_PreferE32
) ||
3502 (R
== Match_MnemonicFail
&& Result
!= Match_InvalidOperand
3503 && Result
!= Match_MissingFeature
3504 && Result
!= Match_PreferE32
)) {
3508 if (R
== Match_Success
)
3515 if (!validateInstruction(Inst
, IDLoc
, Operands
)) {
3519 Out
.EmitInstruction(Inst
, getSTI());
3522 case Match_MissingFeature
:
3523 return Error(IDLoc
, "instruction not supported on this GPU");
3525 case Match_MnemonicFail
: {
3526 FeatureBitset FBS
= ComputeAvailableFeatures(getSTI().getFeatureBits());
3527 std::string Suggestion
= AMDGPUMnemonicSpellCheck(
3528 ((AMDGPUOperand
&)*Operands
[0]).getToken(), FBS
);
3529 return Error(IDLoc
, "invalid instruction" + Suggestion
,
3530 ((AMDGPUOperand
&)*Operands
[0]).getLocRange());
3533 case Match_InvalidOperand
: {
3534 SMLoc ErrorLoc
= IDLoc
;
3535 if (ErrorInfo
!= ~0ULL) {
3536 if (ErrorInfo
>= Operands
.size()) {
3537 return Error(IDLoc
, "too few operands for instruction");
3539 ErrorLoc
= ((AMDGPUOperand
&)*Operands
[ErrorInfo
]).getStartLoc();
3540 if (ErrorLoc
== SMLoc())
3543 return Error(ErrorLoc
, "invalid operand for instruction");
3546 case Match_PreferE32
:
3547 return Error(IDLoc
, "internal error: instruction without _e64 suffix "
3548 "should be encoded as e32");
3550 llvm_unreachable("Implement any new match types added!");
3553 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret
) {
3555 if (getLexer().isNot(AsmToken::Integer
) && getLexer().isNot(AsmToken::Identifier
)) {
3558 if (getParser().parseAbsoluteExpression(Tmp
)) {
3561 Ret
= static_cast<uint32_t>(Tmp
);
3565 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major
,
3567 if (ParseAsAbsoluteExpression(Major
))
3568 return TokError("invalid major version");
3570 if (getLexer().isNot(AsmToken::Comma
))
3571 return TokError("minor version number required, comma expected");
3574 if (ParseAsAbsoluteExpression(Minor
))
3575 return TokError("invalid minor version");
3580 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3581 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
3582 return TokError("directive only supported for amdgcn architecture");
3586 SMLoc TargetStart
= getTok().getLoc();
3587 if (getParser().parseEscapedString(Target
))
3589 SMRange TargetRange
= SMRange(TargetStart
, getTok().getLoc());
3591 std::string ExpectedTarget
;
3592 raw_string_ostream
ExpectedTargetOS(ExpectedTarget
);
3593 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS
);
3595 if (Target
!= ExpectedTargetOS
.str())
3596 return getParser().Error(TargetRange
.Start
, "target must match options",
3599 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target
);
3603 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range
) {
3604 return getParser().Error(Range
.Start
, "value out of range", Range
);
3607 bool AMDGPUAsmParser::calculateGPRBlocks(
3608 const FeatureBitset
&Features
, bool VCCUsed
, bool FlatScrUsed
,
3609 bool XNACKUsed
, Optional
<bool> EnableWavefrontSize32
, unsigned NextFreeVGPR
,
3610 SMRange VGPRRange
, unsigned NextFreeSGPR
, SMRange SGPRRange
,
3611 unsigned &VGPRBlocks
, unsigned &SGPRBlocks
) {
3612 // TODO(scott.linder): These calculations are duplicated from
3613 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3614 IsaVersion Version
= getIsaVersion(getSTI().getCPU());
3616 unsigned NumVGPRs
= NextFreeVGPR
;
3617 unsigned NumSGPRs
= NextFreeSGPR
;
3619 if (Version
.Major
>= 10)
3622 unsigned MaxAddressableNumSGPRs
=
3623 IsaInfo::getAddressableNumSGPRs(&getSTI());
3625 if (Version
.Major
>= 8 && !Features
.test(FeatureSGPRInitBug
) &&
3626 NumSGPRs
> MaxAddressableNumSGPRs
)
3627 return OutOfRangeError(SGPRRange
);
3630 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed
, FlatScrUsed
, XNACKUsed
);
3632 if ((Version
.Major
<= 7 || Features
.test(FeatureSGPRInitBug
)) &&
3633 NumSGPRs
> MaxAddressableNumSGPRs
)
3634 return OutOfRangeError(SGPRRange
);
3636 if (Features
.test(FeatureSGPRInitBug
))
3637 NumSGPRs
= IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
;
3641 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs
, EnableWavefrontSize32
);
3642 SGPRBlocks
= IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs
);
3647 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3648 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
3649 return TokError("directive only supported for amdgcn architecture");
3651 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
)
3652 return TokError("directive only supported for amdhsa OS");
3654 StringRef KernelName
;
3655 if (getParser().parseIdentifier(KernelName
))
3658 kernel_descriptor_t KD
= getDefaultAmdhsaKernelDescriptor(&getSTI());
3662 IsaVersion IVersion
= getIsaVersion(getSTI().getCPU());
3665 uint64_t NextFreeVGPR
= 0;
3667 uint64_t NextFreeSGPR
= 0;
3668 unsigned UserSGPRCount
= 0;
3669 bool ReserveVCC
= true;
3670 bool ReserveFlatScr
= true;
3671 bool ReserveXNACK
= hasXNACK();
3672 Optional
<bool> EnableWavefrontSize32
;
3675 while (getLexer().is(AsmToken::EndOfStatement
))
3678 if (getLexer().isNot(AsmToken::Identifier
))
3679 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3681 StringRef ID
= getTok().getIdentifier();
3682 SMRange IDRange
= getTok().getLocRange();
3685 if (ID
== ".end_amdhsa_kernel")
3688 if (Seen
.find(ID
) != Seen
.end())
3689 return TokError(".amdhsa_ directives cannot be repeated");
3692 SMLoc ValStart
= getTok().getLoc();
3694 if (getParser().parseAbsoluteExpression(IVal
))
3696 SMLoc ValEnd
= getTok().getLoc();
3697 SMRange ValRange
= SMRange(ValStart
, ValEnd
);
3700 return OutOfRangeError(ValRange
);
3702 uint64_t Val
= IVal
;
3704 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3705 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3706 return OutOfRangeError(RANGE); \
3707 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3709 if (ID
== ".amdhsa_group_segment_fixed_size") {
3710 if (!isUInt
<sizeof(KD
.group_segment_fixed_size
) * CHAR_BIT
>(Val
))
3711 return OutOfRangeError(ValRange
);
3712 KD
.group_segment_fixed_size
= Val
;
3713 } else if (ID
== ".amdhsa_private_segment_fixed_size") {
3714 if (!isUInt
<sizeof(KD
.private_segment_fixed_size
) * CHAR_BIT
>(Val
))
3715 return OutOfRangeError(ValRange
);
3716 KD
.private_segment_fixed_size
= Val
;
3717 } else if (ID
== ".amdhsa_user_sgpr_private_segment_buffer") {
3718 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3719 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
,
3723 } else if (ID
== ".amdhsa_user_sgpr_dispatch_ptr") {
3724 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3725 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
, Val
,
3729 } else if (ID
== ".amdhsa_user_sgpr_queue_ptr") {
3730 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3731 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
, Val
,
3735 } else if (ID
== ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3736 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3737 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
,
3741 } else if (ID
== ".amdhsa_user_sgpr_dispatch_id") {
3742 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3743 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
, Val
,
3747 } else if (ID
== ".amdhsa_user_sgpr_flat_scratch_init") {
3748 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3749 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
, Val
,
3753 } else if (ID
== ".amdhsa_user_sgpr_private_segment_size") {
3754 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3755 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
,
3759 } else if (ID
== ".amdhsa_wavefront_size32") {
3760 if (IVersion
.Major
< 10)
3761 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3763 EnableWavefrontSize32
= Val
;
3764 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3765 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
,
3767 } else if (ID
== ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3769 KD
.compute_pgm_rsrc2
,
3770 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET
, Val
,
3772 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_x") {
3773 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3774 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X
, Val
,
3776 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_y") {
3777 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3778 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y
, Val
,
3780 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_z") {
3781 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3782 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z
, Val
,
3784 } else if (ID
== ".amdhsa_system_sgpr_workgroup_info") {
3785 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3786 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO
, Val
,
3788 } else if (ID
== ".amdhsa_system_vgpr_workitem_id") {
3789 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3790 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID
, Val
,
3792 } else if (ID
== ".amdhsa_next_free_vgpr") {
3793 VGPRRange
= ValRange
;
3795 } else if (ID
== ".amdhsa_next_free_sgpr") {
3796 SGPRRange
= ValRange
;
3798 } else if (ID
== ".amdhsa_reserve_vcc") {
3799 if (!isUInt
<1>(Val
))
3800 return OutOfRangeError(ValRange
);
3802 } else if (ID
== ".amdhsa_reserve_flat_scratch") {
3803 if (IVersion
.Major
< 7)
3804 return getParser().Error(IDRange
.Start
, "directive requires gfx7+",
3806 if (!isUInt
<1>(Val
))
3807 return OutOfRangeError(ValRange
);
3808 ReserveFlatScr
= Val
;
3809 } else if (ID
== ".amdhsa_reserve_xnack_mask") {
3810 if (IVersion
.Major
< 8)
3811 return getParser().Error(IDRange
.Start
, "directive requires gfx8+",
3813 if (!isUInt
<1>(Val
))
3814 return OutOfRangeError(ValRange
);
3816 } else if (ID
== ".amdhsa_float_round_mode_32") {
3817 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3818 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32
, Val
, ValRange
);
3819 } else if (ID
== ".amdhsa_float_round_mode_16_64") {
3820 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3821 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64
, Val
, ValRange
);
3822 } else if (ID
== ".amdhsa_float_denorm_mode_32") {
3823 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3824 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32
, Val
, ValRange
);
3825 } else if (ID
== ".amdhsa_float_denorm_mode_16_64") {
3826 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3827 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64
, Val
,
3829 } else if (ID
== ".amdhsa_dx10_clamp") {
3830 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3831 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP
, Val
, ValRange
);
3832 } else if (ID
== ".amdhsa_ieee_mode") {
3833 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE
,
3835 } else if (ID
== ".amdhsa_fp16_overflow") {
3836 if (IVersion
.Major
< 9)
3837 return getParser().Error(IDRange
.Start
, "directive requires gfx9+",
3839 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FP16_OVFL
, Val
,
3841 } else if (ID
== ".amdhsa_workgroup_processor_mode") {
3842 if (IVersion
.Major
< 10)
3843 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3845 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_WGP_MODE
, Val
,
3847 } else if (ID
== ".amdhsa_memory_ordered") {
3848 if (IVersion
.Major
< 10)
3849 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3851 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_MEM_ORDERED
, Val
,
3853 } else if (ID
== ".amdhsa_forward_progress") {
3854 if (IVersion
.Major
< 10)
3855 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3857 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FWD_PROGRESS
, Val
,
3859 } else if (ID
== ".amdhsa_exception_fp_ieee_invalid_op") {
3861 KD
.compute_pgm_rsrc2
,
3862 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION
, Val
,
3864 } else if (ID
== ".amdhsa_exception_fp_denorm_src") {
3865 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3866 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE
,
3868 } else if (ID
== ".amdhsa_exception_fp_ieee_div_zero") {
3870 KD
.compute_pgm_rsrc2
,
3871 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO
, Val
,
3873 } else if (ID
== ".amdhsa_exception_fp_ieee_overflow") {
3874 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3875 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW
,
3877 } else if (ID
== ".amdhsa_exception_fp_ieee_underflow") {
3878 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3879 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW
,
3881 } else if (ID
== ".amdhsa_exception_fp_ieee_inexact") {
3882 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3883 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT
,
3885 } else if (ID
== ".amdhsa_exception_int_div_zero") {
3886 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3887 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO
,
3890 return getParser().Error(IDRange
.Start
,
3891 "unknown .amdhsa_kernel directive", IDRange
);
3894 #undef PARSE_BITS_ENTRY
3897 if (Seen
.find(".amdhsa_next_free_vgpr") == Seen
.end())
3898 return TokError(".amdhsa_next_free_vgpr directive is required");
3900 if (Seen
.find(".amdhsa_next_free_sgpr") == Seen
.end())
3901 return TokError(".amdhsa_next_free_sgpr directive is required");
3903 unsigned VGPRBlocks
;
3904 unsigned SGPRBlocks
;
3905 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC
, ReserveFlatScr
,
3906 ReserveXNACK
, EnableWavefrontSize32
, NextFreeVGPR
,
3907 VGPRRange
, NextFreeSGPR
, SGPRRange
, VGPRBlocks
,
3911 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH
>(
3913 return OutOfRangeError(VGPRRange
);
3914 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
3915 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT
, VGPRBlocks
);
3917 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH
>(
3919 return OutOfRangeError(SGPRRange
);
3920 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
3921 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
,
3924 if (!isUInt
<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH
>(UserSGPRCount
))
3925 return TokError("too many user SGPRs enabled");
3926 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc2
, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT
,
3929 getTargetStreamer().EmitAmdhsaKernelDescriptor(
3930 getSTI(), KernelName
, KD
, NextFreeVGPR
, NextFreeSGPR
, ReserveVCC
,
3931 ReserveFlatScr
, ReserveXNACK
);
3935 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3939 if (ParseDirectiveMajorMinor(Major
, Minor
))
3942 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major
, Minor
);
3946 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3950 StringRef VendorName
;
3953 // If this directive has no arguments, then use the ISA version for the
3955 if (getLexer().is(AsmToken::EndOfStatement
)) {
3956 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
3957 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA
.Major
, ISA
.Minor
,
3963 if (ParseDirectiveMajorMinor(Major
, Minor
))
3966 if (getLexer().isNot(AsmToken::Comma
))
3967 return TokError("stepping version number required, comma expected");
3970 if (ParseAsAbsoluteExpression(Stepping
))
3971 return TokError("invalid stepping version");
3973 if (getLexer().isNot(AsmToken::Comma
))
3974 return TokError("vendor name required, comma expected");
3977 if (getLexer().isNot(AsmToken::String
))
3978 return TokError("invalid vendor name");
3980 VendorName
= getLexer().getTok().getStringContents();
3983 if (getLexer().isNot(AsmToken::Comma
))
3984 return TokError("arch name required, comma expected");
3987 if (getLexer().isNot(AsmToken::String
))
3988 return TokError("invalid arch name");
3990 ArchName
= getLexer().getTok().getStringContents();
3993 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major
, Minor
, Stepping
,
3994 VendorName
, ArchName
);
3998 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID
,
3999 amd_kernel_code_t
&Header
) {
4000 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4001 // assembly for backwards compatibility.
4002 if (ID
== "max_scratch_backing_memory_byte_size") {
4003 Parser
.eatToEndOfStatement();
4007 SmallString
<40> ErrStr
;
4008 raw_svector_ostream
Err(ErrStr
);
4009 if (!parseAmdKernelCodeField(ID
, getParser(), Header
, Err
)) {
4010 return TokError(Err
.str());
4014 if (ID
== "enable_wavefront_size32") {
4015 if (Header
.code_properties
& AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
) {
4017 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4018 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
4019 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4021 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
4022 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4026 if (ID
== "wavefront_size") {
4027 if (Header
.wavefront_size
== 5) {
4029 return TokError("wavefront_size=5 is only allowed on GFX10+");
4030 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
4031 return TokError("wavefront_size=5 requires +WavefrontSize32");
4032 } else if (Header
.wavefront_size
== 6) {
4033 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
4034 return TokError("wavefront_size=6 requires +WavefrontSize64");
4038 if (ID
== "enable_wgp_mode") {
4039 if (G_00B848_WGP_MODE(Header
.compute_pgm_resource_registers
) && !isGFX10())
4040 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4043 if (ID
== "enable_mem_ordered") {
4044 if (G_00B848_MEM_ORDERED(Header
.compute_pgm_resource_registers
) && !isGFX10())
4045 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4048 if (ID
== "enable_fwd_progress") {
4049 if (G_00B848_FWD_PROGRESS(Header
.compute_pgm_resource_registers
) && !isGFX10())
4050 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4056 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4057 amd_kernel_code_t Header
;
4058 AMDGPU::initDefaultAMDKernelCodeT(Header
, &getSTI());
4061 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4062 // will set the current token to EndOfStatement.
4063 while(getLexer().is(AsmToken::EndOfStatement
))
4066 if (getLexer().isNot(AsmToken::Identifier
))
4067 return TokError("expected value identifier or .end_amd_kernel_code_t");
4069 StringRef ID
= getLexer().getTok().getIdentifier();
4072 if (ID
== ".end_amd_kernel_code_t")
4075 if (ParseAMDKernelCodeTValue(ID
, Header
))
4079 getTargetStreamer().EmitAMDKernelCodeT(Header
);
4084 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4085 if (getLexer().isNot(AsmToken::Identifier
))
4086 return TokError("expected symbol name");
4088 StringRef KernelName
= Parser
.getTok().getString();
4090 getTargetStreamer().EmitAMDGPUSymbolType(KernelName
,
4091 ELF::STT_AMDGPU_HSA_KERNEL
);
4093 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4094 KernelScope
.initialize(getContext());
4098 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4099 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
) {
4100 return Error(getParser().getTok().getLoc(),
4101 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4105 auto ISAVersionStringFromASM
= getLexer().getTok().getStringContents();
4107 std::string ISAVersionStringFromSTI
;
4108 raw_string_ostream
ISAVersionStreamFromSTI(ISAVersionStringFromSTI
);
4109 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI
);
4111 if (ISAVersionStringFromASM
!= ISAVersionStreamFromSTI
.str()) {
4112 return Error(getParser().getTok().getLoc(),
4113 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4114 "arguments specified through the command line");
4117 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI
.str());
4123 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4124 const char *AssemblerDirectiveBegin
;
4125 const char *AssemblerDirectiveEnd
;
4126 std::tie(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
) =
4127 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4128 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin
,
4129 HSAMD::V3::AssemblerDirectiveEnd
)
4130 : std::make_tuple(HSAMD::AssemblerDirectiveBegin
,
4131 HSAMD::AssemblerDirectiveEnd
);
4133 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
) {
4134 return Error(getParser().getTok().getLoc(),
4135 (Twine(AssemblerDirectiveBegin
) + Twine(" directive is "
4136 "not available on non-amdhsa OSes")).str());
4139 std::string HSAMetadataString
;
4140 if (ParseToEndDirective(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
,
4144 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4145 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString
))
4146 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4148 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString
))
4149 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4155 /// Common code to parse out a block of text (typically YAML) between start and
4157 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin
,
4158 const char *AssemblerDirectiveEnd
,
4159 std::string
&CollectString
) {
4161 raw_string_ostream
CollectStream(CollectString
);
4163 getLexer().setSkipSpace(false);
4165 bool FoundEnd
= false;
4166 while (!getLexer().is(AsmToken::Eof
)) {
4167 while (getLexer().is(AsmToken::Space
)) {
4168 CollectStream
<< getLexer().getTok().getString();
4172 if (getLexer().is(AsmToken::Identifier
)) {
4173 StringRef ID
= getLexer().getTok().getIdentifier();
4174 if (ID
== AssemblerDirectiveEnd
) {
4181 CollectStream
<< Parser
.parseStringToEndOfStatement()
4182 << getContext().getAsmInfo()->getSeparatorString();
4184 Parser
.eatToEndOfStatement();
4187 getLexer().setSkipSpace(true);
4189 if (getLexer().is(AsmToken::Eof
) && !FoundEnd
) {
4190 return TokError(Twine("expected directive ") +
4191 Twine(AssemblerDirectiveEnd
) + Twine(" not found"));
4194 CollectStream
.flush();
4198 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4199 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4201 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin
,
4202 AMDGPU::PALMD::AssemblerDirectiveEnd
, String
))
4205 auto PALMetadata
= getTargetStreamer().getPALMetadata();
4206 if (!PALMetadata
->setFromString(String
))
4207 return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4211 /// Parse the assembler directive for old linear-format PAL metadata.
4212 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4213 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL
) {
4214 return Error(getParser().getTok().getLoc(),
4215 (Twine(PALMD::AssemblerDirective
) + Twine(" directive is "
4216 "not available on non-amdpal OSes")).str());
4219 auto PALMetadata
= getTargetStreamer().getPALMetadata();
4220 PALMetadata
->setLegacy();
4222 uint32_t Key
, Value
;
4223 if (ParseAsAbsoluteExpression(Key
)) {
4224 return TokError(Twine("invalid value in ") +
4225 Twine(PALMD::AssemblerDirective
));
4227 if (getLexer().isNot(AsmToken::Comma
)) {
4228 return TokError(Twine("expected an even number of values in ") +
4229 Twine(PALMD::AssemblerDirective
));
4232 if (ParseAsAbsoluteExpression(Value
)) {
4233 return TokError(Twine("invalid value in ") +
4234 Twine(PALMD::AssemblerDirective
));
4236 PALMetadata
->setRegister(Key
, Value
);
4237 if (getLexer().isNot(AsmToken::Comma
))
4244 /// ParseDirectiveAMDGPULDS
4245 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4246 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4247 if (getParser().checkForValidSection())
4251 SMLoc NameLoc
= getLexer().getLoc();
4252 if (getParser().parseIdentifier(Name
))
4253 return TokError("expected identifier in directive");
4255 MCSymbol
*Symbol
= getContext().getOrCreateSymbol(Name
);
4256 if (parseToken(AsmToken::Comma
, "expected ','"))
4259 unsigned LocalMemorySize
= AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4262 SMLoc SizeLoc
= getLexer().getLoc();
4263 if (getParser().parseAbsoluteExpression(Size
))
4266 return Error(SizeLoc
, "size must be non-negative");
4267 if (Size
> LocalMemorySize
)
4268 return Error(SizeLoc
, "size is too large");
4271 if (getLexer().is(AsmToken::Comma
)) {
4273 SMLoc AlignLoc
= getLexer().getLoc();
4274 if (getParser().parseAbsoluteExpression(Align
))
4276 if (Align
< 0 || !isPowerOf2_64(Align
))
4277 return Error(AlignLoc
, "alignment must be a power of two");
4279 // Alignment larger than the size of LDS is possible in theory, as long
4280 // as the linker manages to place to symbol at address 0, but we do want
4281 // to make sure the alignment fits nicely into a 32-bit integer.
4282 if (Align
>= 1u << 31)
4283 return Error(AlignLoc
, "alignment is too large");
4286 if (parseToken(AsmToken::EndOfStatement
,
4287 "unexpected token in '.amdgpu_lds' directive"))
4290 Symbol
->redefineIfPossible();
4291 if (!Symbol
->isUndefined())
4292 return Error(NameLoc
, "invalid symbol redefinition");
4294 getTargetStreamer().emitAMDGPULDS(Symbol
, Size
, Align
);
4298 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID
) {
4299 StringRef IDVal
= DirectiveID
.getString();
4301 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4302 if (IDVal
== ".amdgcn_target")
4303 return ParseDirectiveAMDGCNTarget();
4305 if (IDVal
== ".amdhsa_kernel")
4306 return ParseDirectiveAMDHSAKernel();
4308 // TODO: Restructure/combine with PAL metadata directive.
4309 if (IDVal
== AMDGPU::HSAMD::V3::AssemblerDirectiveBegin
)
4310 return ParseDirectiveHSAMetadata();
4312 if (IDVal
== ".hsa_code_object_version")
4313 return ParseDirectiveHSACodeObjectVersion();
4315 if (IDVal
== ".hsa_code_object_isa")
4316 return ParseDirectiveHSACodeObjectISA();
4318 if (IDVal
== ".amd_kernel_code_t")
4319 return ParseDirectiveAMDKernelCodeT();
4321 if (IDVal
== ".amdgpu_hsa_kernel")
4322 return ParseDirectiveAMDGPUHsaKernel();
4324 if (IDVal
== ".amd_amdgpu_isa")
4325 return ParseDirectiveISAVersion();
4327 if (IDVal
== AMDGPU::HSAMD::AssemblerDirectiveBegin
)
4328 return ParseDirectiveHSAMetadata();
4331 if (IDVal
== ".amdgpu_lds")
4332 return ParseDirectiveAMDGPULDS();
4334 if (IDVal
== PALMD::AssemblerDirectiveBegin
)
4335 return ParseDirectivePALMetadataBegin();
4337 if (IDVal
== PALMD::AssemblerDirective
)
4338 return ParseDirectivePALMetadata();
4343 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo
&MRI
,
4344 unsigned RegNo
) const {
4346 for (MCRegAliasIterator
R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15
, &MRI
, true);
4349 return isGFX9() || isGFX10();
4352 // GFX10 has 2 more SGPRs 104 and 105.
4353 for (MCRegAliasIterator
R(AMDGPU::SGPR104_SGPR105
, &MRI
, true);
4356 return hasSGPR104_SGPR105();
4360 case AMDGPU::SRC_SHARED_BASE
:
4361 case AMDGPU::SRC_SHARED_LIMIT
:
4362 case AMDGPU::SRC_PRIVATE_BASE
:
4363 case AMDGPU::SRC_PRIVATE_LIMIT
:
4364 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
4365 return !isCI() && !isSI() && !isVI();
4367 case AMDGPU::TBA_LO
:
4368 case AMDGPU::TBA_HI
:
4370 case AMDGPU::TMA_LO
:
4371 case AMDGPU::TMA_HI
:
4372 return !isGFX9() && !isGFX10();
4373 case AMDGPU::XNACK_MASK
:
4374 case AMDGPU::XNACK_MASK_LO
:
4375 case AMDGPU::XNACK_MASK_HI
:
4376 return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4377 case AMDGPU::SGPR_NULL
:
4386 if (isSI() || isGFX10()) {
4387 // No flat_scr on SI.
4388 // On GFX10 flat scratch is not a valid register operand and can only be
4389 // accessed with s_setreg/s_getreg.
4391 case AMDGPU::FLAT_SCR
:
4392 case AMDGPU::FLAT_SCR_LO
:
4393 case AMDGPU::FLAT_SCR_HI
:
4400 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4402 for (MCRegAliasIterator
R(AMDGPU::SGPR102_SGPR103
, &MRI
, true);
4405 return hasSGPR102_SGPR103();
4411 OperandMatchResultTy
4412 AMDGPUAsmParser::parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
4414 // Try to parse with a custom parser
4415 OperandMatchResultTy ResTy
= MatchOperandParserImpl(Operands
, Mnemonic
);
4417 // If we successfully parsed the operand or if there as an error parsing,
4420 // If we are parsing after we reach EndOfStatement then this means we
4421 // are appending default values to the Operands list. This is only done
4422 // by custom parser, so we shouldn't continue on to the generic parsing.
4423 if (ResTy
== MatchOperand_Success
|| ResTy
== MatchOperand_ParseFail
||
4424 getLexer().is(AsmToken::EndOfStatement
))
4427 if (Mode
== OperandMode_NSA
&& getLexer().is(AsmToken::LBrac
)) {
4428 unsigned Prefix
= Operands
.size();
4429 SMLoc LBraceLoc
= getTok().getLoc();
4430 Parser
.Lex(); // eat the '['
4433 ResTy
= parseReg(Operands
);
4434 if (ResTy
!= MatchOperand_Success
)
4437 if (getLexer().is(AsmToken::RBrac
))
4440 if (getLexer().isNot(AsmToken::Comma
))
4441 return MatchOperand_ParseFail
;
4445 if (Operands
.size() - Prefix
> 1) {
4446 Operands
.insert(Operands
.begin() + Prefix
,
4447 AMDGPUOperand::CreateToken(this, "[", LBraceLoc
));
4448 Operands
.push_back(AMDGPUOperand::CreateToken(this, "]",
4449 getTok().getLoc()));
4452 Parser
.Lex(); // eat the ']'
4453 return MatchOperand_Success
;
4456 return parseRegOrImm(Operands
);
4459 StringRef
AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name
) {
4460 // Clear any forced encodings from the previous instruction.
4461 setForcedEncodingSize(0);
4462 setForcedDPP(false);
4463 setForcedSDWA(false);
4465 if (Name
.endswith("_e64")) {
4466 setForcedEncodingSize(64);
4467 return Name
.substr(0, Name
.size() - 4);
4468 } else if (Name
.endswith("_e32")) {
4469 setForcedEncodingSize(32);
4470 return Name
.substr(0, Name
.size() - 4);
4471 } else if (Name
.endswith("_dpp")) {
4473 return Name
.substr(0, Name
.size() - 4);
4474 } else if (Name
.endswith("_sdwa")) {
4475 setForcedSDWA(true);
4476 return Name
.substr(0, Name
.size() - 5);
4481 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo
&Info
,
4483 SMLoc NameLoc
, OperandVector
&Operands
) {
4484 // Add the instruction mnemonic
4485 Name
= parseMnemonicSuffix(Name
);
4486 Operands
.push_back(AMDGPUOperand::CreateToken(this, Name
, NameLoc
));
4488 bool IsMIMG
= Name
.startswith("image_");
4490 while (!getLexer().is(AsmToken::EndOfStatement
)) {
4491 OperandMode Mode
= OperandMode_Default
;
4492 if (IsMIMG
&& isGFX10() && Operands
.size() == 2)
4493 Mode
= OperandMode_NSA
;
4494 OperandMatchResultTy Res
= parseOperand(Operands
, Name
, Mode
);
4496 // Eat the comma or space if there is one.
4497 if (getLexer().is(AsmToken::Comma
))
4501 case MatchOperand_Success
: break;
4502 case MatchOperand_ParseFail
:
4503 // FIXME: use real operand location rather than the current location.
4504 Error(getLexer().getLoc(), "failed parsing operand.");
4505 while (!getLexer().is(AsmToken::EndOfStatement
)) {
4509 case MatchOperand_NoMatch
:
4510 // FIXME: use real operand location rather than the current location.
4511 Error(getLexer().getLoc(), "not a valid operand.");
4512 while (!getLexer().is(AsmToken::EndOfStatement
)) {
4522 //===----------------------------------------------------------------------===//
4523 // Utility functions
4524 //===----------------------------------------------------------------------===//
4526 OperandMatchResultTy
4527 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
, int64_t &IntVal
) {
4529 if (!trySkipId(Prefix
, AsmToken::Colon
))
4530 return MatchOperand_NoMatch
;
4532 return parseExpr(IntVal
) ? MatchOperand_Success
: MatchOperand_ParseFail
;
4535 OperandMatchResultTy
4536 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
4537 AMDGPUOperand::ImmTy ImmTy
,
4538 bool (*ConvertResult
)(int64_t&)) {
4542 OperandMatchResultTy Res
= parseIntWithPrefix(Prefix
, Value
);
4543 if (Res
!= MatchOperand_Success
)
4546 if (ConvertResult
&& !ConvertResult(Value
)) {
4547 Error(S
, "invalid " + StringRef(Prefix
) + " value.");
4550 Operands
.push_back(AMDGPUOperand::CreateImm(this, Value
, S
, ImmTy
));
4551 return MatchOperand_Success
;
4554 OperandMatchResultTy
4555 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix
,
4556 OperandVector
&Operands
,
4557 AMDGPUOperand::ImmTy ImmTy
,
4558 bool (*ConvertResult
)(int64_t&)) {
4560 if (!trySkipId(Prefix
, AsmToken::Colon
))
4561 return MatchOperand_NoMatch
;
4563 if (!skipToken(AsmToken::LBrac
, "expected a left square bracket"))
4564 return MatchOperand_ParseFail
;
4567 const unsigned MaxSize
= 4;
4569 // FIXME: How to verify the number of elements matches the number of src
4571 for (int I
= 0; ; ++I
) {
4573 SMLoc Loc
= getLoc();
4575 return MatchOperand_ParseFail
;
4577 if (Op
!= 0 && Op
!= 1) {
4578 Error(Loc
, "invalid " + StringRef(Prefix
) + " value.");
4579 return MatchOperand_ParseFail
;
4584 if (trySkipToken(AsmToken::RBrac
))
4587 if (I
+ 1 == MaxSize
) {
4588 Error(getLoc(), "expected a closing square bracket");
4589 return MatchOperand_ParseFail
;
4592 if (!skipToken(AsmToken::Comma
, "expected a comma"))
4593 return MatchOperand_ParseFail
;
4596 Operands
.push_back(AMDGPUOperand::CreateImm(this, Val
, S
, ImmTy
));
4597 return MatchOperand_Success
;
4600 OperandMatchResultTy
4601 AMDGPUAsmParser::parseNamedBit(const char *Name
, OperandVector
&Operands
,
4602 AMDGPUOperand::ImmTy ImmTy
) {
4604 SMLoc S
= Parser
.getTok().getLoc();
4606 // We are at the end of the statement, and this is a default argument, so
4607 // use a default value.
4608 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4609 switch(getLexer().getKind()) {
4610 case AsmToken::Identifier
: {
4611 StringRef Tok
= Parser
.getTok().getString();
4613 if (Tok
== "r128" && isGFX9())
4614 Error(S
, "r128 modifier is not supported on this GPU");
4615 if (Tok
== "a16" && !isGFX9() && !isGFX10())
4616 Error(S
, "a16 modifier is not supported on this GPU");
4619 } else if (Tok
.startswith("no") && Tok
.endswith(Name
)) {
4623 return MatchOperand_NoMatch
;
4628 return MatchOperand_NoMatch
;
4632 if (!isGFX10() && ImmTy
== AMDGPUOperand::ImmTyDLC
)
4633 return MatchOperand_ParseFail
;
4635 Operands
.push_back(AMDGPUOperand::CreateImm(this, Bit
, S
, ImmTy
));
4636 return MatchOperand_Success
;
4639 static void addOptionalImmOperand(
4640 MCInst
& Inst
, const OperandVector
& Operands
,
4641 AMDGPUAsmParser::OptionalImmIndexMap
& OptionalIdx
,
4642 AMDGPUOperand::ImmTy ImmT
,
4643 int64_t Default
= 0) {
4644 auto i
= OptionalIdx
.find(ImmT
);
4645 if (i
!= OptionalIdx
.end()) {
4646 unsigned Idx
= i
->second
;
4647 ((AMDGPUOperand
&)*Operands
[Idx
]).addImmOperands(Inst
, 1);
4649 Inst
.addOperand(MCOperand::createImm(Default
));
4653 OperandMatchResultTy
4654 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix
, StringRef
&Value
) {
4655 if (getLexer().isNot(AsmToken::Identifier
)) {
4656 return MatchOperand_NoMatch
;
4658 StringRef Tok
= Parser
.getTok().getString();
4659 if (Tok
!= Prefix
) {
4660 return MatchOperand_NoMatch
;
4664 if (getLexer().isNot(AsmToken::Colon
)) {
4665 return MatchOperand_ParseFail
;
4669 if (getLexer().isNot(AsmToken::Identifier
)) {
4670 return MatchOperand_ParseFail
;
4673 Value
= Parser
.getTok().getString();
4674 return MatchOperand_Success
;
4677 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4678 // values to live in a joint format operand in the MCInst encoding.
4679 OperandMatchResultTy
4680 AMDGPUAsmParser::parseDfmtNfmt(OperandVector
&Operands
) {
4681 SMLoc S
= Parser
.getTok().getLoc();
4682 int64_t Dfmt
= 0, Nfmt
= 0;
4683 // dfmt and nfmt can appear in either order, and each is optional.
4684 bool GotDfmt
= false, GotNfmt
= false;
4685 while (!GotDfmt
|| !GotNfmt
) {
4687 auto Res
= parseIntWithPrefix("dfmt", Dfmt
);
4688 if (Res
!= MatchOperand_NoMatch
) {
4689 if (Res
!= MatchOperand_Success
)
4692 Error(Parser
.getTok().getLoc(), "out of range dfmt");
4693 return MatchOperand_ParseFail
;
4701 auto Res
= parseIntWithPrefix("nfmt", Nfmt
);
4702 if (Res
!= MatchOperand_NoMatch
) {
4703 if (Res
!= MatchOperand_Success
)
4706 Error(Parser
.getTok().getLoc(), "out of range nfmt");
4707 return MatchOperand_ParseFail
;
4716 if (!GotDfmt
&& !GotNfmt
)
4717 return MatchOperand_NoMatch
;
4718 auto Format
= Dfmt
| Nfmt
<< 4;
4720 AMDGPUOperand::CreateImm(this, Format
, S
, AMDGPUOperand::ImmTyFORMAT
));
4721 return MatchOperand_Success
;
4724 //===----------------------------------------------------------------------===//
4726 //===----------------------------------------------------------------------===//
4728 void AMDGPUAsmParser::cvtDSOffset01(MCInst
&Inst
,
4729 const OperandVector
&Operands
) {
4730 OptionalImmIndexMap OptionalIdx
;
4732 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4733 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4735 // Add the register arguments
4737 Op
.addRegOperands(Inst
, 1);
4741 // Handle optional arguments
4742 OptionalIdx
[Op
.getImmTy()] = i
;
4745 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset0
);
4746 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset1
);
4747 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
4749 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
4752 void AMDGPUAsmParser::cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
4753 bool IsGdsHardcoded
) {
4754 OptionalImmIndexMap OptionalIdx
;
4756 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4757 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4759 // Add the register arguments
4761 Op
.addRegOperands(Inst
, 1);
4765 if (Op
.isToken() && Op
.getToken() == "gds") {
4766 IsGdsHardcoded
= true;
4770 // Handle optional arguments
4771 OptionalIdx
[Op
.getImmTy()] = i
;
4774 AMDGPUOperand::ImmTy OffsetType
=
4775 (Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10
||
4776 Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7
||
4777 Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi
) ? AMDGPUOperand::ImmTySwizzle
:
4778 AMDGPUOperand::ImmTyOffset
;
4780 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, OffsetType
);
4782 if (!IsGdsHardcoded
) {
4783 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
4785 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
4788 void AMDGPUAsmParser::cvtExp(MCInst
&Inst
, const OperandVector
&Operands
) {
4789 OptionalImmIndexMap OptionalIdx
;
4791 unsigned OperandIdx
[4];
4792 unsigned EnMask
= 0;
4795 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4796 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4798 // Add the register arguments
4801 OperandIdx
[SrcIdx
] = Inst
.size();
4802 Op
.addRegOperands(Inst
, 1);
4809 OperandIdx
[SrcIdx
] = Inst
.size();
4810 Inst
.addOperand(MCOperand::createReg(AMDGPU::NoRegister
));
4815 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyExpTgt
) {
4816 Op
.addImmOperands(Inst
, 1);
4820 if (Op
.isToken() && Op
.getToken() == "done")
4823 // Handle optional arguments
4824 OptionalIdx
[Op
.getImmTy()] = i
;
4827 assert(SrcIdx
== 4);
4830 if (OptionalIdx
.find(AMDGPUOperand::ImmTyExpCompr
) != OptionalIdx
.end()) {
4832 Inst
.getOperand(OperandIdx
[1]) = Inst
.getOperand(OperandIdx
[2]);
4833 Inst
.getOperand(OperandIdx
[2]).setReg(AMDGPU::NoRegister
);
4834 Inst
.getOperand(OperandIdx
[3]).setReg(AMDGPU::NoRegister
);
4837 for (auto i
= 0; i
< SrcIdx
; ++i
) {
4838 if (Inst
.getOperand(OperandIdx
[i
]).getReg() != AMDGPU::NoRegister
) {
4839 EnMask
|= Compr
? (0x3 << i
* 2) : (0x1 << i
);
4843 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpVM
);
4844 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpCompr
);
4846 Inst
.addOperand(MCOperand::createImm(EnMask
));
4849 //===----------------------------------------------------------------------===//
4851 //===----------------------------------------------------------------------===//
4855 const AMDGPU::IsaVersion ISA
,
4859 unsigned (*encode
)(const IsaVersion
&Version
, unsigned, unsigned),
4860 unsigned (*decode
)(const IsaVersion
&Version
, unsigned))
4862 bool Failed
= false;
4864 IntVal
= encode(ISA
, IntVal
, CntVal
);
4865 if (CntVal
!= decode(ISA
, IntVal
)) {
4867 IntVal
= encode(ISA
, IntVal
, -1);
4875 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal
) {
4877 SMLoc CntLoc
= getLoc();
4878 StringRef CntName
= getTokenStr();
4880 if (!skipToken(AsmToken::Identifier
, "expected a counter name") ||
4881 !skipToken(AsmToken::LParen
, "expected a left parenthesis"))
4885 SMLoc ValLoc
= getLoc();
4886 if (!parseExpr(CntVal
))
4889 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
4892 bool Sat
= CntName
.endswith("_sat");
4894 if (CntName
== "vmcnt" || CntName
== "vmcnt_sat") {
4895 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeVmcnt
, decodeVmcnt
);
4896 } else if (CntName
== "expcnt" || CntName
== "expcnt_sat") {
4897 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeExpcnt
, decodeExpcnt
);
4898 } else if (CntName
== "lgkmcnt" || CntName
== "lgkmcnt_sat") {
4899 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeLgkmcnt
, decodeLgkmcnt
);
4901 Error(CntLoc
, "invalid counter name " + CntName
);
4906 Error(ValLoc
, "too large value for " + CntName
);
4910 if (!skipToken(AsmToken::RParen
, "expected a closing parenthesis"))
4913 if (trySkipToken(AsmToken::Amp
) || trySkipToken(AsmToken::Comma
)) {
4914 if (isToken(AsmToken::EndOfStatement
)) {
4915 Error(getLoc(), "expected a counter name");
4923 OperandMatchResultTy
4924 AMDGPUAsmParser::parseSWaitCntOps(OperandVector
&Operands
) {
4925 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
4926 int64_t Waitcnt
= getWaitcntBitMask(ISA
);
4929 // If parse failed, do not return error code
4930 // to avoid excessive error messages.
4931 if (isToken(AsmToken::Identifier
) && peekToken().is(AsmToken::LParen
)) {
4932 while (parseCnt(Waitcnt
) && !isToken(AsmToken::EndOfStatement
));
4937 Operands
.push_back(AMDGPUOperand::CreateImm(this, Waitcnt
, S
));
4938 return MatchOperand_Success
;
4942 AMDGPUOperand::isSWaitCnt() const {
4946 //===----------------------------------------------------------------------===//
4948 //===----------------------------------------------------------------------===//
4951 AMDGPUAsmParser::parseHwregBody(OperandInfoTy
&HwReg
,
4954 using namespace llvm::AMDGPU::Hwreg
;
4956 // The register may be specified by name or using a numeric code
4957 if (isToken(AsmToken::Identifier
) &&
4958 (HwReg
.Id
= getHwregId(getTokenStr())) >= 0) {
4959 HwReg
.IsSymbolic
= true;
4960 lex(); // skip message name
4961 } else if (!parseExpr(HwReg
.Id
)) {
4965 if (trySkipToken(AsmToken::RParen
))
4968 // parse optional params
4970 skipToken(AsmToken::Comma
, "expected a comma or a closing parenthesis") &&
4971 parseExpr(Offset
) &&
4972 skipToken(AsmToken::Comma
, "expected a comma") &&
4974 skipToken(AsmToken::RParen
, "expected a closing parenthesis");
4978 AMDGPUAsmParser::validateHwreg(const OperandInfoTy
&HwReg
,
4979 const int64_t Offset
,
4980 const int64_t Width
,
4983 using namespace llvm::AMDGPU::Hwreg
;
4985 if (HwReg
.IsSymbolic
&& !isValidHwreg(HwReg
.Id
, getSTI())) {
4986 Error(Loc
, "specified hardware register is not supported on this GPU");
4988 } else if (!isValidHwreg(HwReg
.Id
)) {
4989 Error(Loc
, "invalid code of hardware register: only 6-bit values are legal");
4991 } else if (!isValidHwregOffset(Offset
)) {
4992 Error(Loc
, "invalid bit offset: only 5-bit values are legal");
4994 } else if (!isValidHwregWidth(Width
)) {
4995 Error(Loc
, "invalid bitfield width: only values from 1 to 32 are legal");
5001 OperandMatchResultTy
5002 AMDGPUAsmParser::parseHwreg(OperandVector
&Operands
) {
5003 using namespace llvm::AMDGPU::Hwreg
;
5006 SMLoc Loc
= getLoc();
5008 // If parse failed, do not return error code
5009 // to avoid excessive error messages.
5010 if (trySkipId("hwreg", AsmToken::LParen
)) {
5011 OperandInfoTy
HwReg(ID_UNKNOWN_
);
5012 int64_t Offset
= OFFSET_DEFAULT_
;
5013 int64_t Width
= WIDTH_DEFAULT_
;
5014 if (parseHwregBody(HwReg
, Offset
, Width
) &&
5015 validateHwreg(HwReg
, Offset
, Width
, Loc
)) {
5016 ImmVal
= encodeHwreg(HwReg
.Id
, Offset
, Width
);
5018 } else if (parseExpr(ImmVal
)) {
5019 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
))
5020 Error(Loc
, "invalid immediate: only 16-bit values are legal");
5023 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTyHwreg
));
5024 return MatchOperand_Success
;
5027 bool AMDGPUOperand::isHwreg() const {
5028 return isImmTy(ImmTyHwreg
);
5031 //===----------------------------------------------------------------------===//
5033 //===----------------------------------------------------------------------===//
5036 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy
&Msg
,
5038 OperandInfoTy
&Stream
) {
5039 using namespace llvm::AMDGPU::SendMsg
;
5041 if (isToken(AsmToken::Identifier
) && (Msg
.Id
= getMsgId(getTokenStr())) >= 0) {
5042 Msg
.IsSymbolic
= true;
5043 lex(); // skip message name
5044 } else if (!parseExpr(Msg
.Id
)) {
5048 if (trySkipToken(AsmToken::Comma
)) {
5049 Op
.IsDefined
= true;
5050 if (isToken(AsmToken::Identifier
) &&
5051 (Op
.Id
= getMsgOpId(Msg
.Id
, getTokenStr())) >= 0) {
5052 lex(); // skip operation name
5053 } else if (!parseExpr(Op
.Id
)) {
5057 if (trySkipToken(AsmToken::Comma
)) {
5058 Stream
.IsDefined
= true;
5059 if (!parseExpr(Stream
.Id
))
5064 return skipToken(AsmToken::RParen
, "expected a closing parenthesis");
5068 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy
&Msg
,
5069 const OperandInfoTy
&Op
,
5070 const OperandInfoTy
&Stream
,
5072 using namespace llvm::AMDGPU::SendMsg
;
5074 // Validation strictness depends on whether message is specified
5075 // in a symbolc or in a numeric form. In the latter case
5076 // only encoding possibility is checked.
5077 bool Strict
= Msg
.IsSymbolic
;
5079 if (!isValidMsgId(Msg
.Id
, getSTI(), Strict
)) {
5080 Error(S
, "invalid message id");
5082 } else if (Strict
&& (msgRequiresOp(Msg
.Id
) != Op
.IsDefined
)) {
5083 Error(S
, Op
.IsDefined
?
5084 "message does not support operations" :
5085 "missing message operation");
5087 } else if (!isValidMsgOp(Msg
.Id
, Op
.Id
, Strict
)) {
5088 Error(S
, "invalid operation id");
5090 } else if (Strict
&& !msgSupportsStream(Msg
.Id
, Op
.Id
) && Stream
.IsDefined
) {
5091 Error(S
, "message operation does not support streams");
5093 } else if (!isValidMsgStream(Msg
.Id
, Op
.Id
, Stream
.Id
, Strict
)) {
5094 Error(S
, "invalid message stream id");
5100 OperandMatchResultTy
5101 AMDGPUAsmParser::parseSendMsgOp(OperandVector
&Operands
) {
5102 using namespace llvm::AMDGPU::SendMsg
;
5105 SMLoc Loc
= getLoc();
5107 // If parse failed, do not return error code
5108 // to avoid excessive error messages.
5109 if (trySkipId("sendmsg", AsmToken::LParen
)) {
5110 OperandInfoTy
Msg(ID_UNKNOWN_
);
5111 OperandInfoTy
Op(OP_NONE_
);
5112 OperandInfoTy
Stream(STREAM_ID_NONE_
);
5113 if (parseSendMsgBody(Msg
, Op
, Stream
) &&
5114 validateSendMsg(Msg
, Op
, Stream
, Loc
)) {
5115 ImmVal
= encodeMsg(Msg
.Id
, Op
.Id
, Stream
.Id
);
5117 } else if (parseExpr(ImmVal
)) {
5118 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
))
5119 Error(Loc
, "invalid immediate: only 16-bit values are legal");
5122 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTySendMsg
));
5123 return MatchOperand_Success
;
5126 bool AMDGPUOperand::isSendMsg() const {
5127 return isImmTy(ImmTySendMsg
);
5130 //===----------------------------------------------------------------------===//
5132 //===----------------------------------------------------------------------===//
5134 OperandMatchResultTy
AMDGPUAsmParser::parseInterpSlot(OperandVector
&Operands
) {
5135 if (getLexer().getKind() != AsmToken::Identifier
)
5136 return MatchOperand_NoMatch
;
5138 StringRef Str
= Parser
.getTok().getString();
5139 int Slot
= StringSwitch
<int>(Str
)
5145 SMLoc S
= Parser
.getTok().getLoc();
5147 return MatchOperand_ParseFail
;
5150 Operands
.push_back(AMDGPUOperand::CreateImm(this, Slot
, S
,
5151 AMDGPUOperand::ImmTyInterpSlot
));
5152 return MatchOperand_Success
;
5155 OperandMatchResultTy
AMDGPUAsmParser::parseInterpAttr(OperandVector
&Operands
) {
5156 if (getLexer().getKind() != AsmToken::Identifier
)
5157 return MatchOperand_NoMatch
;
5159 StringRef Str
= Parser
.getTok().getString();
5160 if (!Str
.startswith("attr"))
5161 return MatchOperand_NoMatch
;
5163 StringRef Chan
= Str
.take_back(2);
5164 int AttrChan
= StringSwitch
<int>(Chan
)
5171 return MatchOperand_ParseFail
;
5173 Str
= Str
.drop_back(2).drop_front(4);
5176 if (Str
.getAsInteger(10, Attr
))
5177 return MatchOperand_ParseFail
;
5179 SMLoc S
= Parser
.getTok().getLoc();
5182 Error(S
, "out of bounds attr");
5183 return MatchOperand_Success
;
5186 SMLoc SChan
= SMLoc::getFromPointer(Chan
.data());
5188 Operands
.push_back(AMDGPUOperand::CreateImm(this, Attr
, S
,
5189 AMDGPUOperand::ImmTyInterpAttr
));
5190 Operands
.push_back(AMDGPUOperand::CreateImm(this, AttrChan
, SChan
,
5191 AMDGPUOperand::ImmTyAttrChan
));
5192 return MatchOperand_Success
;
5195 //===----------------------------------------------------------------------===//
5197 //===----------------------------------------------------------------------===//
5199 void AMDGPUAsmParser::errorExpTgt() {
5200 Error(Parser
.getTok().getLoc(), "invalid exp target");
5203 OperandMatchResultTy
AMDGPUAsmParser::parseExpTgtImpl(StringRef Str
,
5205 if (Str
== "null") {
5207 return MatchOperand_Success
;
5210 if (Str
.startswith("mrt")) {
5211 Str
= Str
.drop_front(3);
5212 if (Str
== "z") { // == mrtz
5214 return MatchOperand_Success
;
5217 if (Str
.getAsInteger(10, Val
))
5218 return MatchOperand_ParseFail
;
5223 return MatchOperand_Success
;
5226 if (Str
.startswith("pos")) {
5227 Str
= Str
.drop_front(3);
5228 if (Str
.getAsInteger(10, Val
))
5229 return MatchOperand_ParseFail
;
5231 if (Val
> 4 || (Val
== 4 && !isGFX10()))
5235 return MatchOperand_Success
;
5238 if (isGFX10() && Str
== "prim") {
5240 return MatchOperand_Success
;
5243 if (Str
.startswith("param")) {
5244 Str
= Str
.drop_front(5);
5245 if (Str
.getAsInteger(10, Val
))
5246 return MatchOperand_ParseFail
;
5252 return MatchOperand_Success
;
5255 if (Str
.startswith("invalid_target_")) {
5256 Str
= Str
.drop_front(15);
5257 if (Str
.getAsInteger(10, Val
))
5258 return MatchOperand_ParseFail
;
5261 return MatchOperand_Success
;
5264 return MatchOperand_NoMatch
;
5267 OperandMatchResultTy
AMDGPUAsmParser::parseExpTgt(OperandVector
&Operands
) {
5269 StringRef Str
= Parser
.getTok().getString();
5271 auto Res
= parseExpTgtImpl(Str
, Val
);
5272 if (Res
!= MatchOperand_Success
)
5275 SMLoc S
= Parser
.getTok().getLoc();
5278 Operands
.push_back(AMDGPUOperand::CreateImm(this, Val
, S
,
5279 AMDGPUOperand::ImmTyExpTgt
));
5280 return MatchOperand_Success
;
5283 //===----------------------------------------------------------------------===//
5285 //===----------------------------------------------------------------------===//
5288 AMDGPUAsmParser::isId(const AsmToken
&Token
, const StringRef Id
) const {
5289 return Token
.is(AsmToken::Identifier
) && Token
.getString() == Id
;
5293 AMDGPUAsmParser::isId(const StringRef Id
) const {
5294 return isId(getToken(), Id
);
5298 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind
) const {
5299 return getTokenKind() == Kind
;
5303 AMDGPUAsmParser::trySkipId(const StringRef Id
) {
5312 AMDGPUAsmParser::trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
) {
5313 if (isId(Id
) && peekToken().is(Kind
)) {
5322 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind
) {
5323 if (isToken(Kind
)) {
5331 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind
,
5332 const StringRef ErrMsg
) {
5333 if (!trySkipToken(Kind
)) {
5334 Error(getLoc(), ErrMsg
);
5341 AMDGPUAsmParser::parseExpr(int64_t &Imm
) {
5342 return !getParser().parseAbsoluteExpression(Imm
);
5346 AMDGPUAsmParser::parseExpr(OperandVector
&Operands
) {
5350 if (Parser
.parseExpression(Expr
))
5354 if (Expr
->evaluateAsAbsolute(IntVal
)) {
5355 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
5357 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
5363 AMDGPUAsmParser::parseString(StringRef
&Val
, const StringRef ErrMsg
) {
5364 if (isToken(AsmToken::String
)) {
5365 Val
= getToken().getStringContents();
5369 Error(getLoc(), ErrMsg
);
5375 AMDGPUAsmParser::getToken() const {
5376 return Parser
.getTok();
5380 AMDGPUAsmParser::peekToken() {
5381 return getLexer().peekTok();
5385 AMDGPUAsmParser::peekTokens(MutableArrayRef
<AsmToken
> Tokens
) {
5386 auto TokCount
= getLexer().peekTokens(Tokens
);
5388 for (auto Idx
= TokCount
; Idx
< Tokens
.size(); ++Idx
)
5389 Tokens
[Idx
] = AsmToken(AsmToken::Error
, "");
5393 AMDGPUAsmParser::getTokenKind() const {
5394 return getLexer().getKind();
5398 AMDGPUAsmParser::getLoc() const {
5399 return getToken().getLoc();
5403 AMDGPUAsmParser::getTokenStr() const {
5404 return getToken().getString();
5408 AMDGPUAsmParser::lex() {
5412 //===----------------------------------------------------------------------===//
5414 //===----------------------------------------------------------------------===//
5418 encodeBitmaskPerm(const unsigned AndMask
,
5419 const unsigned OrMask
,
5420 const unsigned XorMask
) {
5421 using namespace llvm::AMDGPU::Swizzle
;
5423 return BITMASK_PERM_ENC
|
5424 (AndMask
<< BITMASK_AND_SHIFT
) |
5425 (OrMask
<< BITMASK_OR_SHIFT
) |
5426 (XorMask
<< BITMASK_XOR_SHIFT
);
5430 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
5431 const unsigned MinVal
,
5432 const unsigned MaxVal
,
5433 const StringRef ErrMsg
) {
5434 for (unsigned i
= 0; i
< OpNum
; ++i
) {
5435 if (!skipToken(AsmToken::Comma
, "expected a comma")){
5438 SMLoc ExprLoc
= Parser
.getTok().getLoc();
5439 if (!parseExpr(Op
[i
])) {
5442 if (Op
[i
] < MinVal
|| Op
[i
] > MaxVal
) {
5443 Error(ExprLoc
, ErrMsg
);
5452 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm
) {
5453 using namespace llvm::AMDGPU::Swizzle
;
5455 int64_t Lane
[LANE_NUM
];
5456 if (parseSwizzleOperands(LANE_NUM
, Lane
, 0, LANE_MAX
,
5457 "expected a 2-bit lane id")) {
5458 Imm
= QUAD_PERM_ENC
;
5459 for (unsigned I
= 0; I
< LANE_NUM
; ++I
) {
5460 Imm
|= Lane
[I
] << (LANE_SHIFT
* I
);
5468 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm
) {
5469 using namespace llvm::AMDGPU::Swizzle
;
5471 SMLoc S
= Parser
.getTok().getLoc();
5475 if (!parseSwizzleOperands(1, &GroupSize
,
5477 "group size must be in the interval [2,32]")) {
5480 if (!isPowerOf2_64(GroupSize
)) {
5481 Error(S
, "group size must be a power of two");
5484 if (parseSwizzleOperands(1, &LaneIdx
,
5486 "lane id must be in the interval [0,group size - 1]")) {
5487 Imm
= encodeBitmaskPerm(BITMASK_MAX
- GroupSize
+ 1, LaneIdx
, 0);
5494 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm
) {
5495 using namespace llvm::AMDGPU::Swizzle
;
5497 SMLoc S
= Parser
.getTok().getLoc();
5500 if (!parseSwizzleOperands(1, &GroupSize
,
5501 2, 32, "group size must be in the interval [2,32]")) {
5504 if (!isPowerOf2_64(GroupSize
)) {
5505 Error(S
, "group size must be a power of two");
5509 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
- 1);
5514 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm
) {
5515 using namespace llvm::AMDGPU::Swizzle
;
5517 SMLoc S
= Parser
.getTok().getLoc();
5520 if (!parseSwizzleOperands(1, &GroupSize
,
5521 1, 16, "group size must be in the interval [1,16]")) {
5524 if (!isPowerOf2_64(GroupSize
)) {
5525 Error(S
, "group size must be a power of two");
5529 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
);
5534 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm
) {
5535 using namespace llvm::AMDGPU::Swizzle
;
5537 if (!skipToken(AsmToken::Comma
, "expected a comma")) {
5542 SMLoc StrLoc
= Parser
.getTok().getLoc();
5543 if (!parseString(Ctl
)) {
5546 if (Ctl
.size() != BITMASK_WIDTH
) {
5547 Error(StrLoc
, "expected a 5-character mask");
5551 unsigned AndMask
= 0;
5552 unsigned OrMask
= 0;
5553 unsigned XorMask
= 0;
5555 for (size_t i
= 0; i
< Ctl
.size(); ++i
) {
5556 unsigned Mask
= 1 << (BITMASK_WIDTH
- 1 - i
);
5559 Error(StrLoc
, "invalid mask");
5576 Imm
= encodeBitmaskPerm(AndMask
, OrMask
, XorMask
);
5581 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm
) {
5583 SMLoc OffsetLoc
= Parser
.getTok().getLoc();
5585 if (!parseExpr(Imm
)) {
5588 if (!isUInt
<16>(Imm
)) {
5589 Error(OffsetLoc
, "expected a 16-bit offset");
5596 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm
) {
5597 using namespace llvm::AMDGPU::Swizzle
;
5599 if (skipToken(AsmToken::LParen
, "expected a left parentheses")) {
5601 SMLoc ModeLoc
= Parser
.getTok().getLoc();
5604 if (trySkipId(IdSymbolic
[ID_QUAD_PERM
])) {
5605 Ok
= parseSwizzleQuadPerm(Imm
);
5606 } else if (trySkipId(IdSymbolic
[ID_BITMASK_PERM
])) {
5607 Ok
= parseSwizzleBitmaskPerm(Imm
);
5608 } else if (trySkipId(IdSymbolic
[ID_BROADCAST
])) {
5609 Ok
= parseSwizzleBroadcast(Imm
);
5610 } else if (trySkipId(IdSymbolic
[ID_SWAP
])) {
5611 Ok
= parseSwizzleSwap(Imm
);
5612 } else if (trySkipId(IdSymbolic
[ID_REVERSE
])) {
5613 Ok
= parseSwizzleReverse(Imm
);
5615 Error(ModeLoc
, "expected a swizzle mode");
5618 return Ok
&& skipToken(AsmToken::RParen
, "expected a closing parentheses");
5624 OperandMatchResultTy
5625 AMDGPUAsmParser::parseSwizzleOp(OperandVector
&Operands
) {
5626 SMLoc S
= Parser
.getTok().getLoc();
5629 if (trySkipId("offset")) {
5632 if (skipToken(AsmToken::Colon
, "expected a colon")) {
5633 if (trySkipId("swizzle")) {
5634 Ok
= parseSwizzleMacro(Imm
);
5636 Ok
= parseSwizzleOffset(Imm
);
5640 Operands
.push_back(AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTySwizzle
));
5642 return Ok
? MatchOperand_Success
: MatchOperand_ParseFail
;
5644 // Swizzle "offset" operand is optional.
5645 // If it is omitted, try parsing other optional operands.
5646 return parseOptionalOpr(Operands
);
5651 AMDGPUOperand::isSwizzle() const {
5652 return isImmTy(ImmTySwizzle
);
5655 //===----------------------------------------------------------------------===//
5657 //===----------------------------------------------------------------------===//
5659 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5661 using namespace llvm::AMDGPU::VGPRIndexMode
;
5663 if (trySkipToken(AsmToken::RParen
)) {
5671 SMLoc S
= Parser
.getTok().getLoc();
5673 for (unsigned ModeId
= ID_MIN
; ModeId
<= ID_MAX
; ++ModeId
) {
5674 if (trySkipId(IdSymbolic
[ModeId
])) {
5681 Error(S
, (Imm
== 0)?
5682 "expected a VGPR index mode or a closing parenthesis" :
5683 "expected a VGPR index mode");
5688 Error(S
, "duplicate VGPR index mode");
5693 if (trySkipToken(AsmToken::RParen
))
5695 if (!skipToken(AsmToken::Comma
,
5696 "expected a comma or a closing parenthesis"))
5703 OperandMatchResultTy
5704 AMDGPUAsmParser::parseGPRIdxMode(OperandVector
&Operands
) {
5707 SMLoc S
= Parser
.getTok().getLoc();
5709 if (getLexer().getKind() == AsmToken::Identifier
&&
5710 Parser
.getTok().getString() == "gpr_idx" &&
5711 getLexer().peekTok().is(AsmToken::LParen
)) {
5716 // If parse failed, trigger an error but do not return error code
5717 // to avoid excessive error messages.
5718 Imm
= parseGPRIdxMacro();
5721 if (getParser().parseAbsoluteExpression(Imm
))
5722 return MatchOperand_NoMatch
;
5723 if (Imm
< 0 || !isUInt
<4>(Imm
)) {
5724 Error(S
, "invalid immediate: only 4-bit values are legal");
5729 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyGprIdxMode
));
5730 return MatchOperand_Success
;
5733 bool AMDGPUOperand::isGPRIdxMode() const {
5734 return isImmTy(ImmTyGprIdxMode
);
5737 //===----------------------------------------------------------------------===//
5738 // sopp branch targets
5739 //===----------------------------------------------------------------------===//
5741 OperandMatchResultTy
5742 AMDGPUAsmParser::parseSOppBrTarget(OperandVector
&Operands
) {
5744 // Make sure we are not parsing something
5745 // that looks like a label or an expression but is not.
5746 // This will improve error messages.
5747 if (isRegister() || isModifier())
5748 return MatchOperand_NoMatch
;
5750 if (parseExpr(Operands
)) {
5752 AMDGPUOperand
&Opr
= ((AMDGPUOperand
&)*Operands
[Operands
.size() - 1]);
5753 assert(Opr
.isImm() || Opr
.isExpr());
5754 SMLoc Loc
= Opr
.getStartLoc();
5756 // Currently we do not support arbitrary expressions as branch targets.
5757 // Only labels and absolute expressions are accepted.
5758 if (Opr
.isExpr() && !Opr
.isSymbolRefExpr()) {
5759 Error(Loc
, "expected an absolute expression or a label");
5760 } else if (Opr
.isImm() && !Opr
.isS16Imm()) {
5761 Error(Loc
, "expected a 16-bit signed jump offset");
5765 return MatchOperand_Success
; // avoid excessive error messages
5768 //===----------------------------------------------------------------------===//
5769 // Boolean holding registers
5770 //===----------------------------------------------------------------------===//
5772 OperandMatchResultTy
5773 AMDGPUAsmParser::parseBoolReg(OperandVector
&Operands
) {
5774 return parseReg(Operands
);
5777 //===----------------------------------------------------------------------===//
5779 //===----------------------------------------------------------------------===//
5781 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultDLC() const {
5782 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC
);
5785 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultGLC() const {
5786 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC
);
5789 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSLC() const {
5790 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC
);
5793 void AMDGPUAsmParser::cvtMubufImpl(MCInst
&Inst
,
5794 const OperandVector
&Operands
,
5796 bool IsAtomicReturn
,
5798 bool IsLdsOpcode
= IsLds
;
5799 bool HasLdsModifier
= false;
5800 OptionalImmIndexMap OptionalIdx
;
5801 assert(IsAtomicReturn
? IsAtomic
: true);
5802 unsigned FirstOperandIdx
= 1;
5804 for (unsigned i
= FirstOperandIdx
, e
= Operands
.size(); i
!= e
; ++i
) {
5805 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5807 // Add the register arguments
5809 Op
.addRegOperands(Inst
, 1);
5810 // Insert a tied src for atomic return dst.
5811 // This cannot be postponed as subsequent calls to
5812 // addImmOperands rely on correct number of MC operands.
5813 if (IsAtomicReturn
&& i
== FirstOperandIdx
)
5814 Op
.addRegOperands(Inst
, 1);
5818 // Handle the case where soffset is an immediate
5819 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
5820 Op
.addImmOperands(Inst
, 1);
5824 HasLdsModifier
|= Op
.isLDS();
5826 // Handle tokens like 'offen' which are sometimes hard-coded into the
5827 // asm string. There are no MCInst operands for these.
5833 // Handle optional arguments
5834 OptionalIdx
[Op
.getImmTy()] = i
;
5837 // This is a workaround for an llvm quirk which may result in an
5838 // incorrect instruction selection. Lds and non-lds versions of
5839 // MUBUF instructions are identical except that lds versions
5840 // have mandatory 'lds' modifier. However this modifier follows
5841 // optional modifiers and llvm asm matcher regards this 'lds'
5842 // modifier as an optional one. As a result, an lds version
5843 // of opcode may be selected even if it has no 'lds' modifier.
5844 if (IsLdsOpcode
&& !HasLdsModifier
) {
5845 int NoLdsOpcode
= AMDGPU::getMUBUFNoLdsInst(Inst
.getOpcode());
5846 if (NoLdsOpcode
!= -1) { // Got lds version - correct it.
5847 Inst
.setOpcode(NoLdsOpcode
);
5848 IsLdsOpcode
= false;
5852 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset
);
5853 if (!IsAtomic
) { // glc is hard-coded.
5854 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGLC
);
5856 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySLC
);
5858 if (!IsLdsOpcode
) { // tfe is not legal with lds opcodes
5859 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
5863 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDLC
);
5866 void AMDGPUAsmParser::cvtMtbuf(MCInst
&Inst
, const OperandVector
&Operands
) {
5867 OptionalImmIndexMap OptionalIdx
;
5869 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
5870 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5872 // Add the register arguments
5874 Op
.addRegOperands(Inst
, 1);
5878 // Handle the case where soffset is an immediate
5879 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
5880 Op
.addImmOperands(Inst
, 1);
5884 // Handle tokens like 'offen' which are sometimes hard-coded into the
5885 // asm string. There are no MCInst operands for these.
5891 // Handle optional arguments
5892 OptionalIdx
[Op
.getImmTy()] = i
;
5895 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
5896 AMDGPUOperand::ImmTyOffset
);
5897 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyFORMAT
);
5898 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGLC
);
5899 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySLC
);
5900 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
5903 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDLC
);
5906 //===----------------------------------------------------------------------===//
5908 //===----------------------------------------------------------------------===//
5910 void AMDGPUAsmParser::cvtMIMG(MCInst
&Inst
, const OperandVector
&Operands
,
5913 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
5914 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
5915 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
5919 // Add src, same as dst
5920 assert(Desc
.getNumDefs() == 1);
5921 ((AMDGPUOperand
&)*Operands
[I
- 1]).addRegOperands(Inst
, 1);
5924 OptionalImmIndexMap OptionalIdx
;
5926 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
5927 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
5929 // Add the register arguments
5931 Op
.addRegOperands(Inst
, 1);
5932 } else if (Op
.isImmModifier()) {
5933 OptionalIdx
[Op
.getImmTy()] = I
;
5934 } else if (!Op
.isToken()) {
5935 llvm_unreachable("unexpected operand type");
5939 bool IsGFX10
= isGFX10();
5941 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDMask
);
5943 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDim
, -1);
5944 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyUNorm
);
5946 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDLC
);
5947 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGLC
);
5948 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySLC
);
5949 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyR128A16
);
5950 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
5951 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyLWE
);
5953 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDA
);
5954 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyD16
);
5957 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst
&Inst
, const OperandVector
&Operands
) {
5958 cvtMIMG(Inst
, Operands
, true);
5961 //===----------------------------------------------------------------------===//
5963 //===----------------------------------------------------------------------===//
5965 bool AMDGPUOperand::isSMRDOffset8() const {
5966 return isImm() && isUInt
<8>(getImm());
5969 bool AMDGPUOperand::isSMRDOffset20() const {
5970 return isImm() && isUInt
<20>(getImm());
5973 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5974 // 32-bit literals are only supported on CI and we only want to use them
5975 // when the offset is > 8-bits.
5976 return isImm() && !isUInt
<8>(getImm()) && isUInt
<32>(getImm());
5979 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDOffset8() const {
5980 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5983 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDOffset20() const {
5984 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5987 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5988 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5991 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultFlatOffset() const {
5992 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5995 //===----------------------------------------------------------------------===//
5997 //===----------------------------------------------------------------------===//
5999 static bool ConvertOmodMul(int64_t &Mul
) {
6000 if (Mul
!= 1 && Mul
!= 2 && Mul
!= 4)
6007 static bool ConvertOmodDiv(int64_t &Div
) {
6021 static bool ConvertBoundCtrl(int64_t &BoundCtrl
) {
6022 if (BoundCtrl
== 0) {
6027 if (BoundCtrl
== -1) {
6035 // Note: the order in this table matches the order of operands in AsmString.
6036 static const OptionalOperand AMDGPUOptionalOperandTable
[] = {
6037 {"offen", AMDGPUOperand::ImmTyOffen
, true, nullptr},
6038 {"idxen", AMDGPUOperand::ImmTyIdxen
, true, nullptr},
6039 {"addr64", AMDGPUOperand::ImmTyAddr64
, true, nullptr},
6040 {"offset0", AMDGPUOperand::ImmTyOffset0
, false, nullptr},
6041 {"offset1", AMDGPUOperand::ImmTyOffset1
, false, nullptr},
6042 {"gds", AMDGPUOperand::ImmTyGDS
, true, nullptr},
6043 {"lds", AMDGPUOperand::ImmTyLDS
, true, nullptr},
6044 {"offset", AMDGPUOperand::ImmTyOffset
, false, nullptr},
6045 {"inst_offset", AMDGPUOperand::ImmTyInstOffset
, false, nullptr},
6046 {"dlc", AMDGPUOperand::ImmTyDLC
, true, nullptr},
6047 {"format", AMDGPUOperand::ImmTyFORMAT
, false, nullptr},
6048 {"glc", AMDGPUOperand::ImmTyGLC
, true, nullptr},
6049 {"slc", AMDGPUOperand::ImmTySLC
, true, nullptr},
6050 {"swz", AMDGPUOperand::ImmTySWZ
, true, nullptr},
6051 {"tfe", AMDGPUOperand::ImmTyTFE
, true, nullptr},
6052 {"d16", AMDGPUOperand::ImmTyD16
, true, nullptr},
6053 {"high", AMDGPUOperand::ImmTyHigh
, true, nullptr},
6054 {"clamp", AMDGPUOperand::ImmTyClampSI
, true, nullptr},
6055 {"omod", AMDGPUOperand::ImmTyOModSI
, false, ConvertOmodMul
},
6056 {"unorm", AMDGPUOperand::ImmTyUNorm
, true, nullptr},
6057 {"da", AMDGPUOperand::ImmTyDA
, true, nullptr},
6058 {"r128", AMDGPUOperand::ImmTyR128A16
, true, nullptr},
6059 {"a16", AMDGPUOperand::ImmTyR128A16
, true, nullptr},
6060 {"lwe", AMDGPUOperand::ImmTyLWE
, true, nullptr},
6061 {"d16", AMDGPUOperand::ImmTyD16
, true, nullptr},
6062 {"dmask", AMDGPUOperand::ImmTyDMask
, false, nullptr},
6063 {"dim", AMDGPUOperand::ImmTyDim
, false, nullptr},
6064 {"row_mask", AMDGPUOperand::ImmTyDppRowMask
, false, nullptr},
6065 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask
, false, nullptr},
6066 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl
, false, ConvertBoundCtrl
},
6067 {"fi", AMDGPUOperand::ImmTyDppFi
, false, nullptr},
6068 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel
, false, nullptr},
6069 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel
, false, nullptr},
6070 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel
, false, nullptr},
6071 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused
, false, nullptr},
6072 {"compr", AMDGPUOperand::ImmTyExpCompr
, true, nullptr },
6073 {"vm", AMDGPUOperand::ImmTyExpVM
, true, nullptr},
6074 {"op_sel", AMDGPUOperand::ImmTyOpSel
, false, nullptr},
6075 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi
, false, nullptr},
6076 {"neg_lo", AMDGPUOperand::ImmTyNegLo
, false, nullptr},
6077 {"neg_hi", AMDGPUOperand::ImmTyNegHi
, false, nullptr},
6078 {"blgp", AMDGPUOperand::ImmTyBLGP
, false, nullptr},
6079 {"cbsz", AMDGPUOperand::ImmTyCBSZ
, false, nullptr},
6080 {"abid", AMDGPUOperand::ImmTyABID
, false, nullptr}
6083 OperandMatchResultTy
AMDGPUAsmParser::parseOptionalOperand(OperandVector
&Operands
) {
6085 OperandMatchResultTy res
= parseOptionalOpr(Operands
);
6087 // This is a hack to enable hardcoded mandatory operands which follow
6088 // optional operands.
6090 // Current design assumes that all operands after the first optional operand
6091 // are also optional. However implementation of some instructions violates
6092 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6094 // To alleviate this problem, we have to (implicitly) parse extra operands
6095 // to make sure autogenerated parser of custom operands never hit hardcoded
6096 // mandatory operands.
6098 for (unsigned i
= 0; i
< MAX_OPR_LOOKAHEAD
; ++i
) {
6099 if (res
!= MatchOperand_Success
||
6100 isToken(AsmToken::EndOfStatement
))
6103 trySkipToken(AsmToken::Comma
);
6104 res
= parseOptionalOpr(Operands
);
6110 OperandMatchResultTy
AMDGPUAsmParser::parseOptionalOpr(OperandVector
&Operands
) {
6111 OperandMatchResultTy res
;
6112 for (const OptionalOperand
&Op
: AMDGPUOptionalOperandTable
) {
6113 // try to parse any optional operand here
6115 res
= parseNamedBit(Op
.Name
, Operands
, Op
.Type
);
6116 } else if (Op
.Type
== AMDGPUOperand::ImmTyOModSI
) {
6117 res
= parseOModOperand(Operands
);
6118 } else if (Op
.Type
== AMDGPUOperand::ImmTySdwaDstSel
||
6119 Op
.Type
== AMDGPUOperand::ImmTySdwaSrc0Sel
||
6120 Op
.Type
== AMDGPUOperand::ImmTySdwaSrc1Sel
) {
6121 res
= parseSDWASel(Operands
, Op
.Name
, Op
.Type
);
6122 } else if (Op
.Type
== AMDGPUOperand::ImmTySdwaDstUnused
) {
6123 res
= parseSDWADstUnused(Operands
);
6124 } else if (Op
.Type
== AMDGPUOperand::ImmTyOpSel
||
6125 Op
.Type
== AMDGPUOperand::ImmTyOpSelHi
||
6126 Op
.Type
== AMDGPUOperand::ImmTyNegLo
||
6127 Op
.Type
== AMDGPUOperand::ImmTyNegHi
) {
6128 res
= parseOperandArrayWithPrefix(Op
.Name
, Operands
, Op
.Type
,
6130 } else if (Op
.Type
== AMDGPUOperand::ImmTyDim
) {
6131 res
= parseDim(Operands
);
6132 } else if (Op
.Type
== AMDGPUOperand::ImmTyFORMAT
&& !isGFX10()) {
6133 res
= parseDfmtNfmt(Operands
);
6135 res
= parseIntWithPrefix(Op
.Name
, Operands
, Op
.Type
, Op
.ConvertResult
);
6137 if (res
!= MatchOperand_NoMatch
) {
6141 return MatchOperand_NoMatch
;
6144 OperandMatchResultTy
AMDGPUAsmParser::parseOModOperand(OperandVector
&Operands
) {
6145 StringRef Name
= Parser
.getTok().getString();
6146 if (Name
== "mul") {
6147 return parseIntWithPrefix("mul", Operands
,
6148 AMDGPUOperand::ImmTyOModSI
, ConvertOmodMul
);
6151 if (Name
== "div") {
6152 return parseIntWithPrefix("div", Operands
,
6153 AMDGPUOperand::ImmTyOModSI
, ConvertOmodDiv
);
6156 return MatchOperand_NoMatch
;
6159 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
) {
6160 cvtVOP3P(Inst
, Operands
);
6162 int Opc
= Inst
.getOpcode();
6165 const int Ops
[] = { AMDGPU::OpName::src0
,
6166 AMDGPU::OpName::src1
,
6167 AMDGPU::OpName::src2
};
6169 SrcNum
< 3 && AMDGPU::getNamedOperandIdx(Opc
, Ops
[SrcNum
]) != -1;
6173 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
6174 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
6176 if ((OpSel
& (1 << SrcNum
)) != 0) {
6177 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
);
6178 uint32_t ModVal
= Inst
.getOperand(ModIdx
).getImm();
6179 Inst
.getOperand(ModIdx
).setImm(ModVal
| SISrcMods::DST_OP_SEL
);
6183 static bool isRegOrImmWithInputMods(const MCInstrDesc
&Desc
, unsigned OpNum
) {
6184 // 1. This operand is input modifiers
6185 return Desc
.OpInfo
[OpNum
].OperandType
== AMDGPU::OPERAND_INPUT_MODS
6186 // 2. This is not last operand
6187 && Desc
.NumOperands
> (OpNum
+ 1)
6188 // 3. Next operand is register class
6189 && Desc
.OpInfo
[OpNum
+ 1].RegClass
!= -1
6190 // 4. Next register is not tied to any other operand
6191 && Desc
.getOperandConstraint(OpNum
+ 1, MCOI::OperandConstraint::TIED_TO
) == -1;
6194 void AMDGPUAsmParser::cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
)
6196 OptionalImmIndexMap OptionalIdx
;
6197 unsigned Opc
= Inst
.getOpcode();
6200 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6201 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6202 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6205 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6206 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6207 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6208 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
6209 } else if (Op
.isInterpSlot() ||
6210 Op
.isInterpAttr() ||
6212 Inst
.addOperand(MCOperand::createImm(Op
.getImm()));
6213 } else if (Op
.isImmModifier()) {
6214 OptionalIdx
[Op
.getImmTy()] = I
;
6216 llvm_unreachable("unhandled operand type");
6220 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::high
) != -1) {
6221 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyHigh
);
6224 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
) != -1) {
6225 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
6228 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::omod
) != -1) {
6229 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
6233 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
6234 OptionalImmIndexMap
&OptionalIdx
) {
6235 unsigned Opc
= Inst
.getOpcode();
6238 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6239 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6240 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6243 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
) != -1) {
6244 // This instruction has src modifiers
6245 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6246 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6247 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6248 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
6249 } else if (Op
.isImmModifier()) {
6250 OptionalIdx
[Op
.getImmTy()] = I
;
6251 } else if (Op
.isRegOrImm()) {
6252 Op
.addRegOrImmOperands(Inst
, 1);
6254 llvm_unreachable("unhandled operand type");
6259 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6260 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6262 OptionalIdx
[Op
.getImmTy()] = I
;
6264 Op
.addRegOrImmOperands(Inst
, 1);
6269 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
) != -1) {
6270 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
6273 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::omod
) != -1) {
6274 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
6277 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6278 // it has src2 register operand that is tied to dst operand
6279 // we don't allow modifiers for this operand in assembler so src2_modifiers
6281 if (Opc
== AMDGPU::V_MAC_F32_e64_gfx6_gfx7
||
6282 Opc
== AMDGPU::V_MAC_F32_e64_gfx10
||
6283 Opc
== AMDGPU::V_MAC_F32_e64_vi
||
6284 Opc
== AMDGPU::V_MAC_F16_e64_vi
||
6285 Opc
== AMDGPU::V_FMAC_F32_e64_gfx10
||
6286 Opc
== AMDGPU::V_FMAC_F32_e64_vi
||
6287 Opc
== AMDGPU::V_FMAC_F16_e64_gfx10
) {
6288 auto it
= Inst
.begin();
6289 std::advance(it
, AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src2_modifiers
));
6290 it
= Inst
.insert(it
, MCOperand::createImm(0)); // no modifiers for src2
6292 Inst
.insert(it
, Inst
.getOperand(0)); // src2 = dst
6296 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
) {
6297 OptionalImmIndexMap OptionalIdx
;
6298 cvtVOP3(Inst
, Operands
, OptionalIdx
);
6301 void AMDGPUAsmParser::cvtVOP3P(MCInst
&Inst
,
6302 const OperandVector
&Operands
) {
6303 OptionalImmIndexMap OptIdx
;
6304 const int Opc
= Inst
.getOpcode();
6305 const MCInstrDesc
&Desc
= MII
.get(Opc
);
6307 const bool IsPacked
= (Desc
.TSFlags
& SIInstrFlags::IsPacked
) != 0;
6309 cvtVOP3(Inst
, Operands
, OptIdx
);
6311 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdst_in
) != -1) {
6313 Inst
.addOperand(Inst
.getOperand(0));
6316 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6317 // instruction, and then figure out where to actually put the modifiers
6319 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSel
);
6321 int OpSelHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel_hi
);
6322 if (OpSelHiIdx
!= -1) {
6323 int DefaultVal
= IsPacked
? -1 : 0;
6324 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSelHi
,
6328 int NegLoIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_lo
);
6329 if (NegLoIdx
!= -1) {
6331 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegLo
);
6332 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegHi
);
6335 const int Ops
[] = { AMDGPU::OpName::src0
,
6336 AMDGPU::OpName::src1
,
6337 AMDGPU::OpName::src2
};
6338 const int ModOps
[] = { AMDGPU::OpName::src0_modifiers
,
6339 AMDGPU::OpName::src1_modifiers
,
6340 AMDGPU::OpName::src2_modifiers
};
6342 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
6344 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
6345 unsigned OpSelHi
= 0;
6349 if (OpSelHiIdx
!= -1) {
6350 OpSelHi
= Inst
.getOperand(OpSelHiIdx
).getImm();
6353 if (NegLoIdx
!= -1) {
6354 int NegHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_hi
);
6355 NegLo
= Inst
.getOperand(NegLoIdx
).getImm();
6356 NegHi
= Inst
.getOperand(NegHiIdx
).getImm();
6359 for (int J
= 0; J
< 3; ++J
) {
6360 int OpIdx
= AMDGPU::getNamedOperandIdx(Opc
, Ops
[J
]);
6364 uint32_t ModVal
= 0;
6366 if ((OpSel
& (1 << J
)) != 0)
6367 ModVal
|= SISrcMods::OP_SEL_0
;
6369 if ((OpSelHi
& (1 << J
)) != 0)
6370 ModVal
|= SISrcMods::OP_SEL_1
;
6372 if ((NegLo
& (1 << J
)) != 0)
6373 ModVal
|= SISrcMods::NEG
;
6375 if ((NegHi
& (1 << J
)) != 0)
6376 ModVal
|= SISrcMods::NEG_HI
;
6378 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, ModOps
[J
]);
6380 Inst
.getOperand(ModIdx
).setImm(Inst
.getOperand(ModIdx
).getImm() | ModVal
);
6384 //===----------------------------------------------------------------------===//
6386 //===----------------------------------------------------------------------===//
6388 bool AMDGPUOperand::isDPP8() const {
6389 return isImmTy(ImmTyDPP8
);
6392 bool AMDGPUOperand::isDPPCtrl() const {
6393 using namespace AMDGPU::DPP
;
6395 bool result
= isImm() && getImmTy() == ImmTyDppCtrl
&& isUInt
<9>(getImm());
6397 int64_t Imm
= getImm();
6398 return (Imm
>= DppCtrl::QUAD_PERM_FIRST
&& Imm
<= DppCtrl::QUAD_PERM_LAST
) ||
6399 (Imm
>= DppCtrl::ROW_SHL_FIRST
&& Imm
<= DppCtrl::ROW_SHL_LAST
) ||
6400 (Imm
>= DppCtrl::ROW_SHR_FIRST
&& Imm
<= DppCtrl::ROW_SHR_LAST
) ||
6401 (Imm
>= DppCtrl::ROW_ROR_FIRST
&& Imm
<= DppCtrl::ROW_ROR_LAST
) ||
6402 (Imm
== DppCtrl::WAVE_SHL1
) ||
6403 (Imm
== DppCtrl::WAVE_ROL1
) ||
6404 (Imm
== DppCtrl::WAVE_SHR1
) ||
6405 (Imm
== DppCtrl::WAVE_ROR1
) ||
6406 (Imm
== DppCtrl::ROW_MIRROR
) ||
6407 (Imm
== DppCtrl::ROW_HALF_MIRROR
) ||
6408 (Imm
== DppCtrl::BCAST15
) ||
6409 (Imm
== DppCtrl::BCAST31
) ||
6410 (Imm
>= DppCtrl::ROW_SHARE_FIRST
&& Imm
<= DppCtrl::ROW_SHARE_LAST
) ||
6411 (Imm
>= DppCtrl::ROW_XMASK_FIRST
&& Imm
<= DppCtrl::ROW_XMASK_LAST
);
6416 //===----------------------------------------------------------------------===//
6418 //===----------------------------------------------------------------------===//
6420 bool AMDGPUOperand::isBLGP() const {
6421 return isImm() && getImmTy() == ImmTyBLGP
&& isUInt
<3>(getImm());
6424 bool AMDGPUOperand::isCBSZ() const {
6425 return isImm() && getImmTy() == ImmTyCBSZ
&& isUInt
<3>(getImm());
6428 bool AMDGPUOperand::isABID() const {
6429 return isImm() && getImmTy() == ImmTyABID
&& isUInt
<4>(getImm());
6432 bool AMDGPUOperand::isS16Imm() const {
6433 return isImm() && (isInt
<16>(getImm()) || isUInt
<16>(getImm()));
6436 bool AMDGPUOperand::isU16Imm() const {
6437 return isImm() && isUInt
<16>(getImm());
6440 OperandMatchResultTy
AMDGPUAsmParser::parseDim(OperandVector
&Operands
) {
6442 return MatchOperand_NoMatch
;
6444 SMLoc S
= Parser
.getTok().getLoc();
6446 if (getLexer().isNot(AsmToken::Identifier
))
6447 return MatchOperand_NoMatch
;
6448 if (getLexer().getTok().getString() != "dim")
6449 return MatchOperand_NoMatch
;
6452 if (getLexer().isNot(AsmToken::Colon
))
6453 return MatchOperand_ParseFail
;
6457 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6460 if (getLexer().is(AsmToken::Integer
)) {
6461 SMLoc Loc
= getLexer().getTok().getEndLoc();
6462 Token
= getLexer().getTok().getString();
6464 if (getLexer().getTok().getLoc() != Loc
)
6465 return MatchOperand_ParseFail
;
6467 if (getLexer().isNot(AsmToken::Identifier
))
6468 return MatchOperand_ParseFail
;
6469 Token
+= getLexer().getTok().getString();
6471 StringRef DimId
= Token
;
6472 if (DimId
.startswith("SQ_RSRC_IMG_"))
6473 DimId
= DimId
.substr(12);
6475 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByAsmSuffix(DimId
);
6477 return MatchOperand_ParseFail
;
6481 Operands
.push_back(AMDGPUOperand::CreateImm(this, DimInfo
->Encoding
, S
,
6482 AMDGPUOperand::ImmTyDim
));
6483 return MatchOperand_Success
;
6486 OperandMatchResultTy
AMDGPUAsmParser::parseDPP8(OperandVector
&Operands
) {
6487 SMLoc S
= Parser
.getTok().getLoc();
6490 if (getLexer().getKind() == AsmToken::Identifier
) {
6491 Prefix
= Parser
.getTok().getString();
6493 return MatchOperand_NoMatch
;
6496 if (Prefix
!= "dpp8")
6497 return parseDPPCtrl(Operands
);
6499 return MatchOperand_NoMatch
;
6501 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6506 if (getLexer().isNot(AsmToken::Colon
))
6507 return MatchOperand_ParseFail
;
6510 if (getLexer().isNot(AsmToken::LBrac
))
6511 return MatchOperand_ParseFail
;
6514 if (getParser().parseAbsoluteExpression(Sels
[0]))
6515 return MatchOperand_ParseFail
;
6516 if (0 > Sels
[0] || 7 < Sels
[0])
6517 return MatchOperand_ParseFail
;
6519 for (size_t i
= 1; i
< 8; ++i
) {
6520 if (getLexer().isNot(AsmToken::Comma
))
6521 return MatchOperand_ParseFail
;
6524 if (getParser().parseAbsoluteExpression(Sels
[i
]))
6525 return MatchOperand_ParseFail
;
6526 if (0 > Sels
[i
] || 7 < Sels
[i
])
6527 return MatchOperand_ParseFail
;
6530 if (getLexer().isNot(AsmToken::RBrac
))
6531 return MatchOperand_ParseFail
;
6535 for (size_t i
= 0; i
< 8; ++i
)
6536 DPP8
|= (Sels
[i
] << (i
* 3));
6538 Operands
.push_back(AMDGPUOperand::CreateImm(this, DPP8
, S
, AMDGPUOperand::ImmTyDPP8
));
6539 return MatchOperand_Success
;
6542 OperandMatchResultTy
6543 AMDGPUAsmParser::parseDPPCtrl(OperandVector
&Operands
) {
6544 using namespace AMDGPU::DPP
;
6546 SMLoc S
= Parser
.getTok().getLoc();
6550 if (getLexer().getKind() == AsmToken::Identifier
) {
6551 Prefix
= Parser
.getTok().getString();
6553 return MatchOperand_NoMatch
;
6556 if (Prefix
== "row_mirror") {
6557 Int
= DppCtrl::ROW_MIRROR
;
6559 } else if (Prefix
== "row_half_mirror") {
6560 Int
= DppCtrl::ROW_HALF_MIRROR
;
6563 // Check to prevent parseDPPCtrlOps from eating invalid tokens
6564 if (Prefix
!= "quad_perm"
6565 && Prefix
!= "row_shl"
6566 && Prefix
!= "row_shr"
6567 && Prefix
!= "row_ror"
6568 && Prefix
!= "wave_shl"
6569 && Prefix
!= "wave_rol"
6570 && Prefix
!= "wave_shr"
6571 && Prefix
!= "wave_ror"
6572 && Prefix
!= "row_bcast"
6573 && Prefix
!= "row_share"
6574 && Prefix
!= "row_xmask") {
6575 return MatchOperand_NoMatch
;
6578 if (!isGFX10() && (Prefix
== "row_share" || Prefix
== "row_xmask"))
6579 return MatchOperand_NoMatch
;
6581 if (!isVI() && !isGFX9() &&
6582 (Prefix
== "wave_shl" || Prefix
== "wave_shr" ||
6583 Prefix
== "wave_rol" || Prefix
== "wave_ror" ||
6584 Prefix
== "row_bcast"))
6585 return MatchOperand_NoMatch
;
6588 if (getLexer().isNot(AsmToken::Colon
))
6589 return MatchOperand_ParseFail
;
6591 if (Prefix
== "quad_perm") {
6592 // quad_perm:[%d,%d,%d,%d]
6594 if (getLexer().isNot(AsmToken::LBrac
))
6595 return MatchOperand_ParseFail
;
6598 if (getParser().parseAbsoluteExpression(Int
) || !(0 <= Int
&& Int
<=3))
6599 return MatchOperand_ParseFail
;
6601 for (int i
= 0; i
< 3; ++i
) {
6602 if (getLexer().isNot(AsmToken::Comma
))
6603 return MatchOperand_ParseFail
;
6607 if (getParser().parseAbsoluteExpression(Temp
) || !(0 <= Temp
&& Temp
<=3))
6608 return MatchOperand_ParseFail
;
6609 const int shift
= i
*2 + 2;
6610 Int
+= (Temp
<< shift
);
6613 if (getLexer().isNot(AsmToken::RBrac
))
6614 return MatchOperand_ParseFail
;
6619 if (getParser().parseAbsoluteExpression(Int
))
6620 return MatchOperand_ParseFail
;
6622 if (Prefix
== "row_shl" && 1 <= Int
&& Int
<= 15) {
6623 Int
|= DppCtrl::ROW_SHL0
;
6624 } else if (Prefix
== "row_shr" && 1 <= Int
&& Int
<= 15) {
6625 Int
|= DppCtrl::ROW_SHR0
;
6626 } else if (Prefix
== "row_ror" && 1 <= Int
&& Int
<= 15) {
6627 Int
|= DppCtrl::ROW_ROR0
;
6628 } else if (Prefix
== "wave_shl" && 1 == Int
) {
6629 Int
= DppCtrl::WAVE_SHL1
;
6630 } else if (Prefix
== "wave_rol" && 1 == Int
) {
6631 Int
= DppCtrl::WAVE_ROL1
;
6632 } else if (Prefix
== "wave_shr" && 1 == Int
) {
6633 Int
= DppCtrl::WAVE_SHR1
;
6634 } else if (Prefix
== "wave_ror" && 1 == Int
) {
6635 Int
= DppCtrl::WAVE_ROR1
;
6636 } else if (Prefix
== "row_bcast") {
6638 Int
= DppCtrl::BCAST15
;
6639 } else if (Int
== 31) {
6640 Int
= DppCtrl::BCAST31
;
6642 return MatchOperand_ParseFail
;
6644 } else if (Prefix
== "row_share" && 0 <= Int
&& Int
<= 15) {
6645 Int
|= DppCtrl::ROW_SHARE_FIRST
;
6646 } else if (Prefix
== "row_xmask" && 0 <= Int
&& Int
<= 15) {
6647 Int
|= DppCtrl::ROW_XMASK_FIRST
;
6649 return MatchOperand_ParseFail
;
6654 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, AMDGPUOperand::ImmTyDppCtrl
));
6655 return MatchOperand_Success
;
6658 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultRowMask() const {
6659 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask
);
6662 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6663 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm
);
6666 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBankMask() const {
6667 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask
);
6670 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBoundCtrl() const {
6671 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl
);
6674 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultFI() const {
6675 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi
);
6678 void AMDGPUAsmParser::cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
) {
6679 OptionalImmIndexMap OptionalIdx
;
6682 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6683 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6684 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6688 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6689 auto TiedTo
= Desc
.getOperandConstraint(Inst
.getNumOperands(),
6692 assert((unsigned)TiedTo
< Inst
.getNumOperands());
6693 // handle tied old or src2 for MAC instructions
6694 Inst
.addOperand(Inst
.getOperand(TiedTo
));
6696 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6697 // Add the register arguments
6698 if (Op
.isReg() && validateVccOperand(Op
.getReg())) {
6699 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6706 Op
.addImmOperands(Inst
, 1);
6707 } else if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6708 Op
.addRegWithFPInputModsOperands(Inst
, 2);
6709 } else if (Op
.isFI()) {
6711 } else if (Op
.isReg()) {
6712 Op
.addRegOperands(Inst
, 1);
6714 llvm_unreachable("Invalid operand type");
6717 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6718 Op
.addRegWithFPInputModsOperands(Inst
, 2);
6719 } else if (Op
.isDPPCtrl()) {
6720 Op
.addImmOperands(Inst
, 1);
6721 } else if (Op
.isImm()) {
6722 // Handle optional arguments
6723 OptionalIdx
[Op
.getImmTy()] = I
;
6725 llvm_unreachable("Invalid operand type");
6731 using namespace llvm::AMDGPU::DPP
;
6732 Inst
.addOperand(MCOperand::createImm(Fi
? DPP8_FI_1
: DPP8_FI_0
));
6734 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppRowMask
, 0xf);
6735 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBankMask
, 0xf);
6736 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBoundCtrl
);
6737 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::fi
) != -1) {
6738 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppFi
);
6743 //===----------------------------------------------------------------------===//
6745 //===----------------------------------------------------------------------===//
6747 OperandMatchResultTy
6748 AMDGPUAsmParser::parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
6749 AMDGPUOperand::ImmTy Type
) {
6750 using namespace llvm::AMDGPU::SDWA
;
6752 SMLoc S
= Parser
.getTok().getLoc();
6754 OperandMatchResultTy res
;
6756 res
= parseStringWithPrefix(Prefix
, Value
);
6757 if (res
!= MatchOperand_Success
) {
6762 Int
= StringSwitch
<int64_t>(Value
)
6763 .Case("BYTE_0", SdwaSel::BYTE_0
)
6764 .Case("BYTE_1", SdwaSel::BYTE_1
)
6765 .Case("BYTE_2", SdwaSel::BYTE_2
)
6766 .Case("BYTE_3", SdwaSel::BYTE_3
)
6767 .Case("WORD_0", SdwaSel::WORD_0
)
6768 .Case("WORD_1", SdwaSel::WORD_1
)
6769 .Case("DWORD", SdwaSel::DWORD
)
6770 .Default(0xffffffff);
6771 Parser
.Lex(); // eat last token
6773 if (Int
== 0xffffffff) {
6774 return MatchOperand_ParseFail
;
6777 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, Type
));
6778 return MatchOperand_Success
;
6781 OperandMatchResultTy
6782 AMDGPUAsmParser::parseSDWADstUnused(OperandVector
&Operands
) {
6783 using namespace llvm::AMDGPU::SDWA
;
6785 SMLoc S
= Parser
.getTok().getLoc();
6787 OperandMatchResultTy res
;
6789 res
= parseStringWithPrefix("dst_unused", Value
);
6790 if (res
!= MatchOperand_Success
) {
6795 Int
= StringSwitch
<int64_t>(Value
)
6796 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD
)
6797 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT
)
6798 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE
)
6799 .Default(0xffffffff);
6800 Parser
.Lex(); // eat last token
6802 if (Int
== 0xffffffff) {
6803 return MatchOperand_ParseFail
;
6806 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, AMDGPUOperand::ImmTySdwaDstUnused
));
6807 return MatchOperand_Success
;
6810 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
) {
6811 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP1
);
6814 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
) {
6815 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
);
6818 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
) {
6819 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, true, true);
6822 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst
&Inst
, const OperandVector
&Operands
) {
6823 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, false, true);
6826 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
) {
6827 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOPC
, isVI());
6830 void AMDGPUAsmParser::cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
6831 uint64_t BasicInstType
,
6834 using namespace llvm::AMDGPU::SDWA
;
6836 OptionalImmIndexMap OptionalIdx
;
6837 bool SkipVcc
= SkipDstVcc
|| SkipSrcVcc
;
6838 bool SkippedVcc
= false;
6841 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6842 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6843 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6846 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6847 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6848 if (SkipVcc
&& !SkippedVcc
&& Op
.isReg() &&
6849 (Op
.getReg() == AMDGPU::VCC
|| Op
.getReg() == AMDGPU::VCC_LO
)) {
6850 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6851 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6852 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6853 // Skip VCC only if we didn't skip it on previous iteration.
6854 // Note that src0 and src1 occupy 2 slots each because of modifiers.
6855 if (BasicInstType
== SIInstrFlags::VOP2
&&
6856 ((SkipDstVcc
&& Inst
.getNumOperands() == 1) ||
6857 (SkipSrcVcc
&& Inst
.getNumOperands() == 5))) {
6860 } else if (BasicInstType
== SIInstrFlags::VOPC
&&
6861 Inst
.getNumOperands() == 0) {
6866 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6867 Op
.addRegOrImmWithInputModsOperands(Inst
, 2);
6868 } else if (Op
.isImm()) {
6869 // Handle optional arguments
6870 OptionalIdx
[Op
.getImmTy()] = I
;
6872 llvm_unreachable("Invalid operand type");
6877 if (Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10
&&
6878 Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9
&&
6879 Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_vi
) {
6880 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6881 switch (BasicInstType
) {
6882 case SIInstrFlags::VOP1
:
6883 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
6884 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::omod
) != -1) {
6885 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
6887 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstSel
, SdwaSel::DWORD
);
6888 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstUnused
, DstUnused::UNUSED_PRESERVE
);
6889 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
6892 case SIInstrFlags::VOP2
:
6893 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
6894 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::omod
) != -1) {
6895 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
6897 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstSel
, SdwaSel::DWORD
);
6898 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstUnused
, DstUnused::UNUSED_PRESERVE
);
6899 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
6900 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc1Sel
, SdwaSel::DWORD
);
6903 case SIInstrFlags::VOPC
:
6904 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::clamp
) != -1)
6905 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
6906 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
6907 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc1Sel
, SdwaSel::DWORD
);
6911 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6915 // special case v_mac_{f16, f32}:
6916 // it has src2 register operand that is tied to dst operand
6917 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
6918 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
6919 auto it
= Inst
.begin();
6921 it
, AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::src2
));
6922 Inst
.insert(it
, Inst
.getOperand(0)); // src2 = dst
6926 //===----------------------------------------------------------------------===//
6928 //===----------------------------------------------------------------------===//
6930 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBLGP() const {
6931 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP
);
6934 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultCBSZ() const {
6935 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ
);
6938 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultABID() const {
6939 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID
);
6942 /// Force static initialization.
6943 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6944 RegisterMCAsmParser
<AMDGPUAsmParser
> A(getTheAMDGPUTarget());
6945 RegisterMCAsmParser
<AMDGPUAsmParser
> B(getTheGCNTarget());
6948 #define GET_REGISTER_MATCHER
6949 #define GET_MATCHER_IMPLEMENTATION
6950 #define GET_MNEMONIC_SPELL_CHECKER
6951 #include "AMDGPUGenAsmMatcher.inc"
6953 // This fuction should be defined after auto-generated include so that we have
6954 // MatchClassKind enum defined
6955 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand
&Op
,
6957 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6958 // But MatchInstructionImpl() expects to meet token and fails to validate
6959 // operand. This method checks if we are given immediate operand but expect to
6960 // get corresponding token.
6961 AMDGPUOperand
&Operand
= (AMDGPUOperand
&)Op
;
6964 return Operand
.isAddr64() ? Match_Success
: Match_InvalidOperand
;
6966 return Operand
.isGDS() ? Match_Success
: Match_InvalidOperand
;
6968 return Operand
.isLDS() ? Match_Success
: Match_InvalidOperand
;
6970 return Operand
.isGLC() ? Match_Success
: Match_InvalidOperand
;
6972 return Operand
.isIdxen() ? Match_Success
: Match_InvalidOperand
;
6974 return Operand
.isOffen() ? Match_Success
: Match_InvalidOperand
;
6976 // When operands have expression values, they will return true for isToken,
6977 // because it is not possible to distinguish between a token and an
6978 // expression at parse time. MatchInstructionImpl() will always try to
6979 // match an operand as a token, when isToken returns true, and when the
6980 // name of the expression is not a valid token, the match will fail,
6981 // so we need to handle it here.
6982 return Operand
.isSSrcB32() ? Match_Success
: Match_InvalidOperand
;
6984 return Operand
.isSSrcF32() ? Match_Success
: Match_InvalidOperand
;
6985 case MCK_SoppBrTarget
:
6986 return Operand
.isSoppBrTarget() ? Match_Success
: Match_InvalidOperand
;
6987 case MCK_VReg32OrOff
:
6988 return Operand
.isVReg32OrOff() ? Match_Success
: Match_InvalidOperand
;
6989 case MCK_InterpSlot
:
6990 return Operand
.isInterpSlot() ? Match_Success
: Match_InvalidOperand
;
6992 return Operand
.isInterpAttr() ? Match_Success
: Match_InvalidOperand
;
6994 return Operand
.isAttrChan() ? Match_Success
: Match_InvalidOperand
;
6996 case MCK_SReg_64_XEXEC
:
6997 // Null is defined as a 32-bit register but
6998 // it should also be enabled with 64-bit operands.
6999 // The following code enables it for SReg_64 operands
7000 // used as source and destination. Remaining source
7001 // operands are handled in isInlinableImm.
7002 return Operand
.isNull() ? Match_Success
: Match_InvalidOperand
;
7004 return Match_InvalidOperand
;
7008 //===----------------------------------------------------------------------===//
7010 //===----------------------------------------------------------------------===//
7012 OperandMatchResultTy
AMDGPUAsmParser::parseEndpgmOp(OperandVector
&Operands
) {
7013 SMLoc S
= Parser
.getTok().getLoc();
7016 if (!parseExpr(Imm
)) {
7017 // The operand is optional, if not present default to 0
7021 if (!isUInt
<16>(Imm
)) {
7022 Error(S
, "expected a 16-bit value");
7023 return MatchOperand_ParseFail
;
7027 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyEndpgm
));
7028 return MatchOperand_Success
;
7031 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm
); }