1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
65 using namespace llvm::AMDGPU
;
66 using namespace llvm::amdhsa
;
70 class AMDGPUAsmParser
;
72 enum RegisterKind
{ IS_UNKNOWN
, IS_VGPR
, IS_SGPR
, IS_AGPR
, IS_TTMP
, IS_SPECIAL
};
74 //===----------------------------------------------------------------------===//
76 //===----------------------------------------------------------------------===//
78 class AMDGPUOperand
: public MCParsedAsmOperand
{
86 SMLoc StartLoc
, EndLoc
;
87 const AMDGPUAsmParser
*AsmParser
;
90 AMDGPUOperand(KindTy Kind_
, const AMDGPUAsmParser
*AsmParser_
)
91 : MCParsedAsmOperand(), Kind(Kind_
), AsmParser(AsmParser_
) {}
93 using Ptr
= std::unique_ptr
<AMDGPUOperand
>;
100 bool hasFPModifiers() const { return Abs
|| Neg
; }
101 bool hasIntModifiers() const { return Sext
; }
102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
104 int64_t getFPModifiersOperand() const {
106 Operand
|= Abs
? SISrcMods::ABS
: 0u;
107 Operand
|= Neg
? SISrcMods::NEG
: 0u;
111 int64_t getIntModifiersOperand() const {
113 Operand
|= Sext
? SISrcMods::SEXT
: 0u;
117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
129 friend raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
);
215 bool isToken() const override
{
219 // When parsing operands, we can't always tell if something was meant to be
220 // a token, like 'gds', or an expression that references a global variable.
221 // In this case, we assume the string is an expression, and if we need to
222 // interpret is a token, then we treat the symbol name as the token.
223 return isSymbolRefExpr();
226 bool isSymbolRefExpr() const {
227 return isExpr() && Expr
&& isa
<MCSymbolRefExpr
>(Expr
);
230 bool isImm() const override
{
231 return Kind
== Immediate
;
234 bool isInlinableImm(MVT type
) const;
235 bool isLiteralImm(MVT type
) const;
237 bool isRegKind() const {
238 return Kind
== Register
;
241 bool isReg() const override
{
242 return isRegKind() && !hasModifiers();
245 bool isRegOrImmWithInputMods(unsigned RCID
, MVT type
) const {
246 return isRegClass(RCID
) || isInlinableImm(type
) || isLiteralImm(type
);
249 bool isRegOrImmWithInt16InputMods() const {
250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
253 bool isRegOrImmWithInt32InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
257 bool isRegOrImmWithInt64InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
261 bool isRegOrImmWithFP16InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
265 bool isRegOrImmWithFP32InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
269 bool isRegOrImmWithFP64InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
273 bool isVReg() const {
274 return isRegClass(AMDGPU::VGPR_32RegClassID
) ||
275 isRegClass(AMDGPU::VReg_64RegClassID
) ||
276 isRegClass(AMDGPU::VReg_96RegClassID
) ||
277 isRegClass(AMDGPU::VReg_128RegClassID
) ||
278 isRegClass(AMDGPU::VReg_160RegClassID
) ||
279 isRegClass(AMDGPU::VReg_256RegClassID
) ||
280 isRegClass(AMDGPU::VReg_512RegClassID
) ||
281 isRegClass(AMDGPU::VReg_1024RegClassID
);
284 bool isVReg32() const {
285 return isRegClass(AMDGPU::VGPR_32RegClassID
);
288 bool isVReg32OrOff() const {
289 return isOff() || isVReg32();
292 bool isSDWAOperand(MVT type
) const;
293 bool isSDWAFP16Operand() const;
294 bool isSDWAFP32Operand() const;
295 bool isSDWAInt16Operand() const;
296 bool isSDWAInt32Operand() const;
298 bool isImmTy(ImmTy ImmT
) const {
299 return isImm() && Imm
.Type
== ImmT
;
302 bool isImmModifier() const {
303 return isImm() && Imm
.Type
!= ImmTyNone
;
306 bool isClampSI() const { return isImmTy(ImmTyClampSI
); }
307 bool isOModSI() const { return isImmTy(ImmTyOModSI
); }
308 bool isDMask() const { return isImmTy(ImmTyDMask
); }
309 bool isDim() const { return isImmTy(ImmTyDim
); }
310 bool isUNorm() const { return isImmTy(ImmTyUNorm
); }
311 bool isDA() const { return isImmTy(ImmTyDA
); }
312 bool isR128A16() const { return isImmTy(ImmTyR128A16
); }
313 bool isLWE() const { return isImmTy(ImmTyLWE
); }
314 bool isOff() const { return isImmTy(ImmTyOff
); }
315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt
); }
316 bool isExpVM() const { return isImmTy(ImmTyExpVM
); }
317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr
); }
318 bool isOffen() const { return isImmTy(ImmTyOffen
); }
319 bool isIdxen() const { return isImmTy(ImmTyIdxen
); }
320 bool isAddr64() const { return isImmTy(ImmTyAddr64
); }
321 bool isOffset() const { return isImmTy(ImmTyOffset
) && isUInt
<16>(getImm()); }
322 bool isOffset0() const { return isImmTy(ImmTyOffset0
) && isUInt
<8>(getImm()); }
323 bool isOffset1() const { return isImmTy(ImmTyOffset1
) && isUInt
<8>(getImm()); }
325 bool isFlatOffset() const { return isImmTy(ImmTyOffset
) || isImmTy(ImmTyInstOffset
); }
326 bool isGDS() const { return isImmTy(ImmTyGDS
); }
327 bool isLDS() const { return isImmTy(ImmTyLDS
); }
328 bool isDLC() const { return isImmTy(ImmTyDLC
); }
329 bool isGLC() const { return isImmTy(ImmTyGLC
); }
330 bool isSLC() const { return isImmTy(ImmTySLC
); }
331 bool isTFE() const { return isImmTy(ImmTyTFE
); }
332 bool isD16() const { return isImmTy(ImmTyD16
); }
333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT
) && isUInt
<8>(getImm()); }
334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask
); }
335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask
); }
336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl
); }
337 bool isFI() const { return isImmTy(ImmTyDppFi
); }
338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel
); }
339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel
); }
340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel
); }
341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused
); }
342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot
); }
343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr
); }
344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan
); }
345 bool isOpSel() const { return isImmTy(ImmTyOpSel
); }
346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi
); }
347 bool isNegLo() const { return isImmTy(ImmTyNegLo
); }
348 bool isNegHi() const { return isImmTy(ImmTyNegHi
); }
349 bool isHigh() const { return isImmTy(ImmTyHigh
); }
352 return isClampSI() || isOModSI();
355 bool isRegOrImm() const {
356 return isReg() || isImm();
359 bool isRegClass(unsigned RCID
) const;
361 bool isInlineValue() const;
363 bool isRegOrInlineNoMods(unsigned RCID
, MVT type
) const {
364 return (isRegClass(RCID
) || isInlinableImm(type
)) && !hasModifiers();
367 bool isSCSrcB16() const {
368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i16
);
371 bool isSCSrcV2B16() const {
375 bool isSCSrcB32() const {
376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i32
);
379 bool isSCSrcB64() const {
380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::i64
);
383 bool isBoolReg() const;
385 bool isSCSrcF16() const {
386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f16
);
389 bool isSCSrcV2F16() const {
393 bool isSCSrcF32() const {
394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f32
);
397 bool isSCSrcF64() const {
398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::f64
);
401 bool isSSrcB32() const {
402 return isSCSrcB32() || isLiteralImm(MVT::i32
) || isExpr();
405 bool isSSrcB16() const {
406 return isSCSrcB16() || isLiteralImm(MVT::i16
);
409 bool isSSrcV2B16() const {
410 llvm_unreachable("cannot happen");
414 bool isSSrcB64() const {
415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
417 return isSCSrcB64() || isLiteralImm(MVT::i64
);
420 bool isSSrcF32() const {
421 return isSCSrcB32() || isLiteralImm(MVT::f32
) || isExpr();
424 bool isSSrcF64() const {
425 return isSCSrcB64() || isLiteralImm(MVT::f64
);
428 bool isSSrcF16() const {
429 return isSCSrcB16() || isLiteralImm(MVT::f16
);
432 bool isSSrcV2F16() const {
433 llvm_unreachable("cannot happen");
437 bool isSSrcOrLdsB32() const {
438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID
, MVT::i32
) ||
439 isLiteralImm(MVT::i32
) || isExpr();
442 bool isVCSrcB32() const {
443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
446 bool isVCSrcB64() const {
447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
450 bool isVCSrcB16() const {
451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
454 bool isVCSrcV2B16() const {
458 bool isVCSrcF32() const {
459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
462 bool isVCSrcF64() const {
463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
466 bool isVCSrcF16() const {
467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
470 bool isVCSrcV2F16() const {
474 bool isVSrcB32() const {
475 return isVCSrcF32() || isLiteralImm(MVT::i32
) || isExpr();
478 bool isVSrcB64() const {
479 return isVCSrcF64() || isLiteralImm(MVT::i64
);
482 bool isVSrcB16() const {
483 return isVCSrcF16() || isLiteralImm(MVT::i16
);
486 bool isVSrcV2B16() const {
487 return isVSrcB16() || isLiteralImm(MVT::v2i16
);
490 bool isVSrcF32() const {
491 return isVCSrcF32() || isLiteralImm(MVT::f32
) || isExpr();
494 bool isVSrcF64() const {
495 return isVCSrcF64() || isLiteralImm(MVT::f64
);
498 bool isVSrcF16() const {
499 return isVCSrcF16() || isLiteralImm(MVT::f16
);
502 bool isVSrcV2F16() const {
503 return isVSrcF16() || isLiteralImm(MVT::v2f16
);
506 bool isVISrcB32() const {
507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i32
);
510 bool isVISrcB16() const {
511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i16
);
514 bool isVISrcV2B16() const {
518 bool isVISrcF32() const {
519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f32
);
522 bool isVISrcF16() const {
523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f16
);
526 bool isVISrcV2F16() const {
527 return isVISrcF16() || isVISrcB32();
530 bool isAISrcB32() const {
531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i32
);
534 bool isAISrcB16() const {
535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i16
);
538 bool isAISrcV2B16() const {
542 bool isAISrcF32() const {
543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f32
);
546 bool isAISrcF16() const {
547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f16
);
550 bool isAISrcV2F16() const {
551 return isAISrcF16() || isAISrcB32();
554 bool isAISrc_128B32() const {
555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i32
);
558 bool isAISrc_128B16() const {
559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i16
);
562 bool isAISrc_128V2B16() const {
563 return isAISrc_128B16();
566 bool isAISrc_128F32() const {
567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f32
);
570 bool isAISrc_128F16() const {
571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f16
);
574 bool isAISrc_128V2F16() const {
575 return isAISrc_128F16() || isAISrc_128B32();
578 bool isAISrc_512B32() const {
579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i32
);
582 bool isAISrc_512B16() const {
583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i16
);
586 bool isAISrc_512V2B16() const {
587 return isAISrc_512B16();
590 bool isAISrc_512F32() const {
591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f32
);
594 bool isAISrc_512F16() const {
595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f16
);
598 bool isAISrc_512V2F16() const {
599 return isAISrc_512F16() || isAISrc_512B32();
602 bool isAISrc_1024B32() const {
603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i32
);
606 bool isAISrc_1024B16() const {
607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i16
);
610 bool isAISrc_1024V2B16() const {
611 return isAISrc_1024B16();
614 bool isAISrc_1024F32() const {
615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f32
);
618 bool isAISrc_1024F16() const {
619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f16
);
622 bool isAISrc_1024V2F16() const {
623 return isAISrc_1024F16() || isAISrc_1024B32();
626 bool isKImmFP32() const {
627 return isLiteralImm(MVT::f32
);
630 bool isKImmFP16() const {
631 return isLiteralImm(MVT::f16
);
634 bool isMem() const override
{
638 bool isExpr() const {
639 return Kind
== Expression
;
642 bool isSoppBrTarget() const {
643 return isExpr() || isImm();
646 bool isSWaitCnt() const;
647 bool isHwreg() const;
648 bool isSendMsg() const;
649 bool isSwizzle() const;
650 bool isSMRDOffset8() const;
651 bool isSMRDOffset20() const;
652 bool isSMRDLiteralOffset() const;
654 bool isDPPCtrl() const;
658 bool isGPRIdxMode() const;
659 bool isS16Imm() const;
660 bool isU16Imm() const;
661 bool isEndpgm() const;
663 StringRef
getExpressionAsToken() const {
665 const MCSymbolRefExpr
*S
= cast
<MCSymbolRefExpr
>(Expr
);
666 return S
->getSymbol().getName();
669 StringRef
getToken() const {
672 if (Kind
== Expression
)
673 return getExpressionAsToken();
675 return StringRef(Tok
.Data
, Tok
.Length
);
678 int64_t getImm() const {
683 ImmTy
getImmTy() const {
688 unsigned getReg() const override
{
693 SMLoc
getStartLoc() const override
{
697 SMLoc
getEndLoc() const override
{
701 SMRange
getLocRange() const {
702 return SMRange(StartLoc
, EndLoc
);
705 Modifiers
getModifiers() const {
706 assert(isRegKind() || isImmTy(ImmTyNone
));
707 return isRegKind() ? Reg
.Mods
: Imm
.Mods
;
710 void setModifiers(Modifiers Mods
) {
711 assert(isRegKind() || isImmTy(ImmTyNone
));
718 bool hasModifiers() const {
719 return getModifiers().hasModifiers();
722 bool hasFPModifiers() const {
723 return getModifiers().hasFPModifiers();
726 bool hasIntModifiers() const {
727 return getModifiers().hasIntModifiers();
730 uint64_t applyInputFPModifiers(uint64_t Val
, unsigned Size
) const;
732 void addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
= true) const;
734 void addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const;
736 template <unsigned Bitwidth
>
737 void addKImmFPOperands(MCInst
&Inst
, unsigned N
) const;
739 void addKImmFP16Operands(MCInst
&Inst
, unsigned N
) const {
740 addKImmFPOperands
<16>(Inst
, N
);
743 void addKImmFP32Operands(MCInst
&Inst
, unsigned N
) const {
744 addKImmFPOperands
<32>(Inst
, N
);
747 void addRegOperands(MCInst
&Inst
, unsigned N
) const;
749 void addBoolRegOperands(MCInst
&Inst
, unsigned N
) const {
750 addRegOperands(Inst
, N
);
753 void addRegOrImmOperands(MCInst
&Inst
, unsigned N
) const {
755 addRegOperands(Inst
, N
);
757 Inst
.addOperand(MCOperand::createExpr(Expr
));
759 addImmOperands(Inst
, N
);
762 void addRegOrImmWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
763 Modifiers Mods
= getModifiers();
764 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
766 addRegOperands(Inst
, N
);
768 addImmOperands(Inst
, N
, false);
772 void addRegOrImmWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
773 assert(!hasIntModifiers());
774 addRegOrImmWithInputModsOperands(Inst
, N
);
777 void addRegOrImmWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
778 assert(!hasFPModifiers());
779 addRegOrImmWithInputModsOperands(Inst
, N
);
782 void addRegWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
783 Modifiers Mods
= getModifiers();
784 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
786 addRegOperands(Inst
, N
);
789 void addRegWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
790 assert(!hasIntModifiers());
791 addRegWithInputModsOperands(Inst
, N
);
794 void addRegWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
795 assert(!hasFPModifiers());
796 addRegWithInputModsOperands(Inst
, N
);
799 void addSoppBrTargetOperands(MCInst
&Inst
, unsigned N
) const {
801 addImmOperands(Inst
, N
);
804 Inst
.addOperand(MCOperand::createExpr(Expr
));
808 static void printImmTy(raw_ostream
& OS
, ImmTy Type
) {
810 case ImmTyNone
: OS
<< "None"; break;
811 case ImmTyGDS
: OS
<< "GDS"; break;
812 case ImmTyLDS
: OS
<< "LDS"; break;
813 case ImmTyOffen
: OS
<< "Offen"; break;
814 case ImmTyIdxen
: OS
<< "Idxen"; break;
815 case ImmTyAddr64
: OS
<< "Addr64"; break;
816 case ImmTyOffset
: OS
<< "Offset"; break;
817 case ImmTyInstOffset
: OS
<< "InstOffset"; break;
818 case ImmTyOffset0
: OS
<< "Offset0"; break;
819 case ImmTyOffset1
: OS
<< "Offset1"; break;
820 case ImmTyDLC
: OS
<< "DLC"; break;
821 case ImmTyGLC
: OS
<< "GLC"; break;
822 case ImmTySLC
: OS
<< "SLC"; break;
823 case ImmTyTFE
: OS
<< "TFE"; break;
824 case ImmTyD16
: OS
<< "D16"; break;
825 case ImmTyFORMAT
: OS
<< "FORMAT"; break;
826 case ImmTyClampSI
: OS
<< "ClampSI"; break;
827 case ImmTyOModSI
: OS
<< "OModSI"; break;
828 case ImmTyDPP8
: OS
<< "DPP8"; break;
829 case ImmTyDppCtrl
: OS
<< "DppCtrl"; break;
830 case ImmTyDppRowMask
: OS
<< "DppRowMask"; break;
831 case ImmTyDppBankMask
: OS
<< "DppBankMask"; break;
832 case ImmTyDppBoundCtrl
: OS
<< "DppBoundCtrl"; break;
833 case ImmTyDppFi
: OS
<< "FI"; break;
834 case ImmTySdwaDstSel
: OS
<< "SdwaDstSel"; break;
835 case ImmTySdwaSrc0Sel
: OS
<< "SdwaSrc0Sel"; break;
836 case ImmTySdwaSrc1Sel
: OS
<< "SdwaSrc1Sel"; break;
837 case ImmTySdwaDstUnused
: OS
<< "SdwaDstUnused"; break;
838 case ImmTyDMask
: OS
<< "DMask"; break;
839 case ImmTyDim
: OS
<< "Dim"; break;
840 case ImmTyUNorm
: OS
<< "UNorm"; break;
841 case ImmTyDA
: OS
<< "DA"; break;
842 case ImmTyR128A16
: OS
<< "R128A16"; break;
843 case ImmTyLWE
: OS
<< "LWE"; break;
844 case ImmTyOff
: OS
<< "Off"; break;
845 case ImmTyExpTgt
: OS
<< "ExpTgt"; break;
846 case ImmTyExpCompr
: OS
<< "ExpCompr"; break;
847 case ImmTyExpVM
: OS
<< "ExpVM"; break;
848 case ImmTyHwreg
: OS
<< "Hwreg"; break;
849 case ImmTySendMsg
: OS
<< "SendMsg"; break;
850 case ImmTyInterpSlot
: OS
<< "InterpSlot"; break;
851 case ImmTyInterpAttr
: OS
<< "InterpAttr"; break;
852 case ImmTyAttrChan
: OS
<< "AttrChan"; break;
853 case ImmTyOpSel
: OS
<< "OpSel"; break;
854 case ImmTyOpSelHi
: OS
<< "OpSelHi"; break;
855 case ImmTyNegLo
: OS
<< "NegLo"; break;
856 case ImmTyNegHi
: OS
<< "NegHi"; break;
857 case ImmTySwizzle
: OS
<< "Swizzle"; break;
858 case ImmTyGprIdxMode
: OS
<< "GprIdxMode"; break;
859 case ImmTyHigh
: OS
<< "High"; break;
860 case ImmTyBLGP
: OS
<< "BLGP"; break;
861 case ImmTyCBSZ
: OS
<< "CBSZ"; break;
862 case ImmTyABID
: OS
<< "ABID"; break;
863 case ImmTyEndpgm
: OS
<< "Endpgm"; break;
867 void print(raw_ostream
&OS
) const override
{
870 OS
<< "<register " << getReg() << " mods: " << Reg
.Mods
<< '>';
873 OS
<< '<' << getImm();
874 if (getImmTy() != ImmTyNone
) {
875 OS
<< " type: "; printImmTy(OS
, getImmTy());
877 OS
<< " mods: " << Imm
.Mods
<< '>';
880 OS
<< '\'' << getToken() << '\'';
883 OS
<< "<expr " << *Expr
<< '>';
888 static AMDGPUOperand::Ptr
CreateImm(const AMDGPUAsmParser
*AsmParser
,
889 int64_t Val
, SMLoc Loc
,
890 ImmTy Type
= ImmTyNone
,
891 bool IsFPImm
= false) {
892 auto Op
= std::make_unique
<AMDGPUOperand
>(Immediate
, AsmParser
);
894 Op
->Imm
.IsFPImm
= IsFPImm
;
896 Op
->Imm
.Mods
= Modifiers();
902 static AMDGPUOperand::Ptr
CreateToken(const AMDGPUAsmParser
*AsmParser
,
903 StringRef Str
, SMLoc Loc
,
904 bool HasExplicitEncodingSize
= true) {
905 auto Res
= std::make_unique
<AMDGPUOperand
>(Token
, AsmParser
);
906 Res
->Tok
.Data
= Str
.data();
907 Res
->Tok
.Length
= Str
.size();
913 static AMDGPUOperand::Ptr
CreateReg(const AMDGPUAsmParser
*AsmParser
,
914 unsigned RegNo
, SMLoc S
,
916 auto Op
= std::make_unique
<AMDGPUOperand
>(Register
, AsmParser
);
917 Op
->Reg
.RegNo
= RegNo
;
918 Op
->Reg
.Mods
= Modifiers();
924 static AMDGPUOperand::Ptr
CreateExpr(const AMDGPUAsmParser
*AsmParser
,
925 const class MCExpr
*Expr
, SMLoc S
) {
926 auto Op
= std::make_unique
<AMDGPUOperand
>(Expression
, AsmParser
);
934 raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
) {
935 OS
<< "abs:" << Mods
.Abs
<< " neg: " << Mods
.Neg
<< " sext:" << Mods
.Sext
;
939 //===----------------------------------------------------------------------===//
941 //===----------------------------------------------------------------------===//
943 // Holds info related to the current kernel, e.g. count of SGPRs used.
944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
945 // .amdgpu_hsa_kernel or at EOF.
946 class KernelScopeInfo
{
947 int SgprIndexUnusedMin
= -1;
948 int VgprIndexUnusedMin
= -1;
949 MCContext
*Ctx
= nullptr;
951 void usesSgprAt(int i
) {
952 if (i
>= SgprIndexUnusedMin
) {
953 SgprIndexUnusedMin
= ++i
;
955 MCSymbol
* const Sym
= Ctx
->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
956 Sym
->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin
, *Ctx
));
961 void usesVgprAt(int i
) {
962 if (i
>= VgprIndexUnusedMin
) {
963 VgprIndexUnusedMin
= ++i
;
965 MCSymbol
* const Sym
= Ctx
->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
966 Sym
->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin
, *Ctx
));
972 KernelScopeInfo() = default;
974 void initialize(MCContext
&Context
) {
976 usesSgprAt(SgprIndexUnusedMin
= -1);
977 usesVgprAt(VgprIndexUnusedMin
= -1);
980 void usesRegister(RegisterKind RegKind
, unsigned DwordRegIndex
, unsigned RegWidth
) {
982 case IS_SGPR
: usesSgprAt(DwordRegIndex
+ RegWidth
- 1); break;
983 case IS_AGPR
: // fall through
984 case IS_VGPR
: usesVgprAt(DwordRegIndex
+ RegWidth
- 1); break;
990 class AMDGPUAsmParser
: public MCTargetAsmParser
{
993 // Number of extra operands parsed after the first optional operand.
994 // This may be necessary to skip hardcoded mandatory operands.
995 static const unsigned MAX_OPR_LOOKAHEAD
= 8;
997 unsigned ForcedEncodingSize
= 0;
998 bool ForcedDPP
= false;
999 bool ForcedSDWA
= false;
1000 KernelScopeInfo KernelScope
;
1002 /// @name Auto-generated Match Functions
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1011 bool ParseAsAbsoluteExpression(uint32_t &Ret
);
1012 bool OutOfRangeError(SMRange Range
);
1013 /// Calculate VGPR/SGPR blocks required for given target, reserved
1014 /// registers, and user-specified NextFreeXGPR values.
1016 /// \param Features [in] Target features, used for bug corrections.
1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021 /// descriptor field, if valid.
1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026 /// \param VGPRBlocks [out] Result VGPR block count.
1027 /// \param SGPRBlocks [out] Result SGPR block count.
1028 bool calculateGPRBlocks(const FeatureBitset
&Features
, bool VCCUsed
,
1029 bool FlatScrUsed
, bool XNACKUsed
,
1030 Optional
<bool> EnableWavefrontSize32
, unsigned NextFreeVGPR
,
1031 SMRange VGPRRange
, unsigned NextFreeSGPR
,
1032 SMRange SGPRRange
, unsigned &VGPRBlocks
,
1033 unsigned &SGPRBlocks
);
1034 bool ParseDirectiveAMDGCNTarget();
1035 bool ParseDirectiveAMDHSAKernel();
1036 bool ParseDirectiveMajorMinor(uint32_t &Major
, uint32_t &Minor
);
1037 bool ParseDirectiveHSACodeObjectVersion();
1038 bool ParseDirectiveHSACodeObjectISA();
1039 bool ParseAMDKernelCodeTValue(StringRef ID
, amd_kernel_code_t
&Header
);
1040 bool ParseDirectiveAMDKernelCodeT();
1041 bool subtargetHasRegister(const MCRegisterInfo
&MRI
, unsigned RegNo
) const;
1042 bool ParseDirectiveAMDGPUHsaKernel();
1044 bool ParseDirectiveISAVersion();
1045 bool ParseDirectiveHSAMetadata();
1046 bool ParseDirectivePALMetadataBegin();
1047 bool ParseDirectivePALMetadata();
1048 bool ParseDirectiveAMDGPULDS();
1050 /// Common code to parse out a block of text (typically YAML) between start and
1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin
,
1053 const char *AssemblerDirectiveEnd
,
1054 std::string
&CollectString
);
1056 bool AddNextRegisterToList(unsigned& Reg
, unsigned& RegWidth
,
1057 RegisterKind RegKind
, unsigned Reg1
,
1059 bool ParseAMDGPURegister(RegisterKind
& RegKind
, unsigned& Reg
,
1060 unsigned& RegNum
, unsigned& RegWidth
,
1061 unsigned *DwordRegIndex
);
1063 bool isRegister(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1064 Optional
<StringRef
> getGprCountSymbolName(RegisterKind RegKind
);
1065 void initializeGprCountSymbol(RegisterKind RegKind
);
1066 bool updateGprCountSymbols(RegisterKind RegKind
, unsigned DwordRegIndex
,
1068 void cvtMubufImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1069 bool IsAtomic
, bool IsAtomicReturn
, bool IsLds
= false);
1070 void cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1071 bool IsGdsHardcoded
);
1074 enum AMDGPUMatchResultTy
{
1075 Match_PreferE32
= FIRST_TARGET_MATCH_RESULT_TY
1078 OperandMode_Default
,
1082 using OptionalImmIndexMap
= std::map
<AMDGPUOperand::ImmTy
, unsigned>;
1084 AMDGPUAsmParser(const MCSubtargetInfo
&STI
, MCAsmParser
&_Parser
,
1085 const MCInstrInfo
&MII
,
1086 const MCTargetOptions
&Options
)
1087 : MCTargetAsmParser(Options
, STI
, MII
), Parser(_Parser
) {
1088 MCAsmParserExtension::Initialize(Parser
);
1090 if (getFeatureBits().none()) {
1091 // Set default features.
1092 copySTI().ToggleFeature("southern-islands");
1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1098 // TODO: make those pre-defined variables read-only.
1099 // Currently there is none suitable machinery in the core llvm-mc for this.
1100 // MCSymbol::isRedefinable is intended for another purpose, and
1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
1103 MCContext
&Ctx
= getContext();
1104 if (ISA
.Major
>= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1106 Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1108 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1110 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1114 Ctx
.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1116 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1118 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1121 if (ISA
.Major
>= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122 initializeGprCountSymbol(IS_VGPR
);
1123 initializeGprCountSymbol(IS_SGPR
);
1125 KernelScope
.initialize(getContext());
1129 bool hasXNACK() const {
1130 return AMDGPU::hasXNACK(getSTI());
1133 bool hasMIMG_R128() const {
1134 return AMDGPU::hasMIMG_R128(getSTI());
1137 bool hasPackedD16() const {
1138 return AMDGPU::hasPackedD16(getSTI());
1142 return AMDGPU::isSI(getSTI());
1146 return AMDGPU::isCI(getSTI());
1150 return AMDGPU::isVI(getSTI());
1153 bool isGFX9() const {
1154 return AMDGPU::isGFX9(getSTI());
1157 bool isGFX10() const {
1158 return AMDGPU::isGFX10(getSTI());
1161 bool hasInv2PiInlineImm() const {
1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
];
1165 bool hasFlatOffsets() const {
1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets
];
1169 bool hasSGPR102_SGPR103() const {
1170 return !isVI() && !isGFX9();
1173 bool hasSGPR104_SGPR105() const {
1177 bool hasIntClamp() const {
1178 return getFeatureBits()[AMDGPU::FeatureIntClamp
];
1181 AMDGPUTargetStreamer
&getTargetStreamer() {
1182 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
1183 return static_cast<AMDGPUTargetStreamer
&>(TS
);
1186 const MCRegisterInfo
*getMRI() const {
1187 // We need this const_cast because for some reason getContext() is not const
1189 return const_cast<AMDGPUAsmParser
*>(this)->getContext().getRegisterInfo();
1192 const MCInstrInfo
*getMII() const {
1196 const FeatureBitset
&getFeatureBits() const {
1197 return getSTI().getFeatureBits();
1200 void setForcedEncodingSize(unsigned Size
) { ForcedEncodingSize
= Size
; }
1201 void setForcedDPP(bool ForceDPP_
) { ForcedDPP
= ForceDPP_
; }
1202 void setForcedSDWA(bool ForceSDWA_
) { ForcedSDWA
= ForceSDWA_
; }
1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize
; }
1205 bool isForcedVOP3() const { return ForcedEncodingSize
== 64; }
1206 bool isForcedDPP() const { return ForcedDPP
; }
1207 bool isForcedSDWA() const { return ForcedSDWA
; }
1208 ArrayRef
<unsigned> getMatchedVariants() const;
1210 std::unique_ptr
<AMDGPUOperand
> parseRegister();
1211 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
) override
;
1212 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1213 unsigned validateTargetOperandClass(MCParsedAsmOperand
&Op
,
1214 unsigned Kind
) override
;
1215 bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1216 OperandVector
&Operands
, MCStreamer
&Out
,
1217 uint64_t &ErrorInfo
,
1218 bool MatchingInlineAsm
) override
;
1219 bool ParseDirective(AsmToken DirectiveID
) override
;
1220 OperandMatchResultTy
parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
1221 OperandMode Mode
= OperandMode_Default
);
1222 StringRef
parseMnemonicSuffix(StringRef Name
);
1223 bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1224 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1225 //bool ProcessInstruction(MCInst &Inst);
1227 OperandMatchResultTy
parseIntWithPrefix(const char *Prefix
, int64_t &Int
);
1229 OperandMatchResultTy
1230 parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
1231 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1232 bool (*ConvertResult
)(int64_t &) = nullptr);
1234 OperandMatchResultTy
1235 parseOperandArrayWithPrefix(const char *Prefix
,
1236 OperandVector
&Operands
,
1237 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1238 bool (*ConvertResult
)(int64_t&) = nullptr);
1240 OperandMatchResultTy
1241 parseNamedBit(const char *Name
, OperandVector
&Operands
,
1242 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
);
1243 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix
,
1247 bool isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1248 bool isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1249 bool isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1250 bool isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1251 bool parseSP3NegModifier();
1252 OperandMatchResultTy
parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
= false);
1253 OperandMatchResultTy
parseReg(OperandVector
&Operands
);
1254 OperandMatchResultTy
parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
= false);
1255 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector
&Operands
, bool AllowImm
= true);
1256 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector
&Operands
, bool AllowImm
= true);
1257 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector
&Operands
);
1258 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector
&Operands
);
1259 OperandMatchResultTy
parseVReg32OrOff(OperandVector
&Operands
);
1260 OperandMatchResultTy
parseDfmtNfmt(OperandVector
&Operands
);
1262 void cvtDSOffset01(MCInst
&Inst
, const OperandVector
&Operands
);
1263 void cvtDS(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, false); }
1264 void cvtDSGds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, true); }
1265 void cvtExp(MCInst
&Inst
, const OperandVector
&Operands
);
1267 bool parseCnt(int64_t &IntVal
);
1268 OperandMatchResultTy
parseSWaitCntOps(OperandVector
&Operands
);
1269 OperandMatchResultTy
parseHwreg(OperandVector
&Operands
);
1272 struct OperandInfoTy
{
1274 bool IsSymbolic
= false;
1275 bool IsDefined
= false;
1277 OperandInfoTy(int64_t Id_
) : Id(Id_
) {}
1280 bool parseSendMsgBody(OperandInfoTy
&Msg
, OperandInfoTy
&Op
, OperandInfoTy
&Stream
);
1281 bool validateSendMsg(const OperandInfoTy
&Msg
,
1282 const OperandInfoTy
&Op
,
1283 const OperandInfoTy
&Stream
,
1286 bool parseHwregBody(OperandInfoTy
&HwReg
, int64_t &Offset
, int64_t &Width
);
1287 bool validateHwreg(const OperandInfoTy
&HwReg
,
1288 const int64_t Offset
,
1289 const int64_t Width
,
1293 OperandMatchResultTy
parseExpTgtImpl(StringRef Str
, uint8_t &Val
);
1294 SMLoc
getFlatOffsetLoc(const OperandVector
&Operands
) const;
1296 bool validateInstruction(const MCInst
&Inst
, const SMLoc
&IDLoc
, const OperandVector
&Operands
);
1297 bool validateFlatOffset(const MCInst
&Inst
, const OperandVector
&Operands
);
1298 bool validateSOPLiteral(const MCInst
&Inst
) const;
1299 bool validateConstantBusLimitations(const MCInst
&Inst
);
1300 bool validateEarlyClobberLimitations(const MCInst
&Inst
);
1301 bool validateIntClampSupported(const MCInst
&Inst
);
1302 bool validateMIMGAtomicDMask(const MCInst
&Inst
);
1303 bool validateMIMGGatherDMask(const MCInst
&Inst
);
1304 bool validateMIMGDataSize(const MCInst
&Inst
);
1305 bool validateMIMGAddrSize(const MCInst
&Inst
);
1306 bool validateMIMGD16(const MCInst
&Inst
);
1307 bool validateMIMGDim(const MCInst
&Inst
);
1308 bool validateLdsDirect(const MCInst
&Inst
);
1309 bool validateOpSel(const MCInst
&Inst
);
1310 bool validateVccOperand(unsigned Reg
) const;
1311 bool validateVOP3Literal(const MCInst
&Inst
) const;
1312 unsigned getConstantBusLimit(unsigned Opcode
) const;
1313 bool usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
);
1314 bool isInlineConstant(const MCInst
&Inst
, unsigned OpIdx
) const;
1315 unsigned findImplicitSGPRReadInVOP(const MCInst
&Inst
) const;
1317 bool isId(const StringRef Id
) const;
1318 bool isId(const AsmToken
&Token
, const StringRef Id
) const;
1319 bool isToken(const AsmToken::TokenKind Kind
) const;
1320 bool trySkipId(const StringRef Id
);
1321 bool trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
);
1322 bool trySkipToken(const AsmToken::TokenKind Kind
);
1323 bool skipToken(const AsmToken::TokenKind Kind
, const StringRef ErrMsg
);
1324 bool parseString(StringRef
&Val
, const StringRef ErrMsg
= "expected a string");
1325 void peekTokens(MutableArrayRef
<AsmToken
> Tokens
);
1326 AsmToken::TokenKind
getTokenKind() const;
1327 bool parseExpr(int64_t &Imm
);
1328 bool parseExpr(OperandVector
&Operands
);
1329 StringRef
getTokenStr() const;
1330 AsmToken
peekToken();
1331 AsmToken
getToken() const;
1332 SMLoc
getLoc() const;
1336 OperandMatchResultTy
parseOptionalOperand(OperandVector
&Operands
);
1337 OperandMatchResultTy
parseOptionalOpr(OperandVector
&Operands
);
1339 OperandMatchResultTy
parseExpTgt(OperandVector
&Operands
);
1340 OperandMatchResultTy
parseSendMsgOp(OperandVector
&Operands
);
1341 OperandMatchResultTy
parseInterpSlot(OperandVector
&Operands
);
1342 OperandMatchResultTy
parseInterpAttr(OperandVector
&Operands
);
1343 OperandMatchResultTy
parseSOppBrTarget(OperandVector
&Operands
);
1344 OperandMatchResultTy
parseBoolReg(OperandVector
&Operands
);
1346 bool parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
1347 const unsigned MinVal
,
1348 const unsigned MaxVal
,
1349 const StringRef ErrMsg
);
1350 OperandMatchResultTy
parseSwizzleOp(OperandVector
&Operands
);
1351 bool parseSwizzleOffset(int64_t &Imm
);
1352 bool parseSwizzleMacro(int64_t &Imm
);
1353 bool parseSwizzleQuadPerm(int64_t &Imm
);
1354 bool parseSwizzleBitmaskPerm(int64_t &Imm
);
1355 bool parseSwizzleBroadcast(int64_t &Imm
);
1356 bool parseSwizzleSwap(int64_t &Imm
);
1357 bool parseSwizzleReverse(int64_t &Imm
);
1359 OperandMatchResultTy
parseGPRIdxMode(OperandVector
&Operands
);
1360 int64_t parseGPRIdxMacro();
1362 void cvtMubuf(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false, false); }
1363 void cvtMubufAtomic(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, true, false); }
1364 void cvtMubufAtomicReturn(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, true, true); }
1365 void cvtMubufLds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false, false, true); }
1366 void cvtMtbuf(MCInst
&Inst
, const OperandVector
&Operands
);
1368 AMDGPUOperand::Ptr
defaultDLC() const;
1369 AMDGPUOperand::Ptr
defaultGLC() const;
1370 AMDGPUOperand::Ptr
defaultSLC() const;
1372 AMDGPUOperand::Ptr
defaultSMRDOffset8() const;
1373 AMDGPUOperand::Ptr
defaultSMRDOffset20() const;
1374 AMDGPUOperand::Ptr
defaultSMRDLiteralOffset() const;
1375 AMDGPUOperand::Ptr
defaultFlatOffset() const;
1377 OperandMatchResultTy
parseOModOperand(OperandVector
&Operands
);
1379 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
1380 OptionalImmIndexMap
&OptionalIdx
);
1381 void cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
);
1382 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
);
1383 void cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
);
1385 void cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
);
1387 void cvtMIMG(MCInst
&Inst
, const OperandVector
&Operands
,
1388 bool IsAtomic
= false);
1389 void cvtMIMGAtomic(MCInst
&Inst
, const OperandVector
&Operands
);
1391 OperandMatchResultTy
parseDim(OperandVector
&Operands
);
1392 OperandMatchResultTy
parseDPP8(OperandVector
&Operands
);
1393 OperandMatchResultTy
parseDPPCtrl(OperandVector
&Operands
);
1394 AMDGPUOperand::Ptr
defaultRowMask() const;
1395 AMDGPUOperand::Ptr
defaultBankMask() const;
1396 AMDGPUOperand::Ptr
defaultBoundCtrl() const;
1397 AMDGPUOperand::Ptr
defaultFI() const;
1398 void cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
= false);
1399 void cvtDPP8(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDPP(Inst
, Operands
, true); }
1401 OperandMatchResultTy
parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
1402 AMDGPUOperand::ImmTy Type
);
1403 OperandMatchResultTy
parseSDWADstUnused(OperandVector
&Operands
);
1404 void cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
);
1405 void cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
);
1406 void cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
);
1407 void cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
);
1408 void cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
1409 uint64_t BasicInstType
, bool skipVcc
= false);
1411 AMDGPUOperand::Ptr
defaultBLGP() const;
1412 AMDGPUOperand::Ptr
defaultCBSZ() const;
1413 AMDGPUOperand::Ptr
defaultABID() const;
1415 OperandMatchResultTy
parseEndpgmOp(OperandVector
&Operands
);
1416 AMDGPUOperand::Ptr
defaultEndpgmImmOperands() const;
1419 struct OptionalOperand
{
1421 AMDGPUOperand::ImmTy Type
;
1423 bool (*ConvertResult
)(int64_t&);
1426 } // end anonymous namespace
1428 // May be called with integer type with equivalent bitwidth.
1429 static const fltSemantics
*getFltSemantics(unsigned Size
) {
1432 return &APFloat::IEEEsingle();
1434 return &APFloat::IEEEdouble();
1436 return &APFloat::IEEEhalf();
1438 llvm_unreachable("unsupported fp type");
1442 static const fltSemantics
*getFltSemantics(MVT VT
) {
1443 return getFltSemantics(VT
.getSizeInBits() / 8);
1446 static const fltSemantics
*getOpFltSemantics(uint8_t OperandType
) {
1447 switch (OperandType
) {
1448 case AMDGPU::OPERAND_REG_IMM_INT32
:
1449 case AMDGPU::OPERAND_REG_IMM_FP32
:
1450 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1451 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1452 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1453 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1454 return &APFloat::IEEEsingle();
1455 case AMDGPU::OPERAND_REG_IMM_INT64
:
1456 case AMDGPU::OPERAND_REG_IMM_FP64
:
1457 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1458 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1459 return &APFloat::IEEEdouble();
1460 case AMDGPU::OPERAND_REG_IMM_INT16
:
1461 case AMDGPU::OPERAND_REG_IMM_FP16
:
1462 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1463 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1464 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1465 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1466 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1467 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1469 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
1470 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1471 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
1472 return &APFloat::IEEEhalf();
1474 llvm_unreachable("unsupported fp type");
1478 //===----------------------------------------------------------------------===//
1480 //===----------------------------------------------------------------------===//
1482 static bool canLosslesslyConvertToFPType(APFloat
&FPLiteral
, MVT VT
) {
1485 // Convert literal to single precision
1486 APFloat::opStatus Status
= FPLiteral
.convert(*getFltSemantics(VT
),
1487 APFloat::rmNearestTiesToEven
,
1489 // We allow precision lost but not overflow or underflow
1490 if (Status
!= APFloat::opOK
&&
1492 ((Status
& APFloat::opOverflow
) != 0 ||
1493 (Status
& APFloat::opUnderflow
) != 0)) {
1500 static bool isSafeTruncation(int64_t Val
, unsigned Size
) {
1501 return isUIntN(Size
, Val
) || isIntN(Size
, Val
);
1504 bool AMDGPUOperand::isInlinableImm(MVT type
) const {
1506 // This is a hack to enable named inline values like
1507 // shared_base with both 32-bit and 64-bit operands.
1508 // Note that these values are defined as
1509 // 32-bit operands only.
1510 if (isInlineValue()) {
1514 if (!isImmTy(ImmTyNone
)) {
1515 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1518 // TODO: We should avoid using host float here. It would be better to
1519 // check the float bit values which is what a few other places do.
1520 // We've had bot failures before due to weird NaN support on mips hosts.
1522 APInt
Literal(64, Imm
.Val
);
1524 if (Imm
.IsFPImm
) { // We got fp literal token
1525 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1526 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1527 AsmParser
->hasInv2PiInlineImm());
1530 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1531 if (!canLosslesslyConvertToFPType(FPLiteral
, type
))
1534 if (type
.getScalarSizeInBits() == 16) {
1535 return AMDGPU::isInlinableLiteral16(
1536 static_cast<int16_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1537 AsmParser
->hasInv2PiInlineImm());
1540 // Check if single precision literal is inlinable
1541 return AMDGPU::isInlinableLiteral32(
1542 static_cast<int32_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1543 AsmParser
->hasInv2PiInlineImm());
1546 // We got int literal token.
1547 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1548 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1549 AsmParser
->hasInv2PiInlineImm());
1552 if (!isSafeTruncation(Imm
.Val
, type
.getScalarSizeInBits())) {
1556 if (type
.getScalarSizeInBits() == 16) {
1557 return AMDGPU::isInlinableLiteral16(
1558 static_cast<int16_t>(Literal
.getLoBits(16).getSExtValue()),
1559 AsmParser
->hasInv2PiInlineImm());
1562 return AMDGPU::isInlinableLiteral32(
1563 static_cast<int32_t>(Literal
.getLoBits(32).getZExtValue()),
1564 AsmParser
->hasInv2PiInlineImm());
1567 bool AMDGPUOperand::isLiteralImm(MVT type
) const {
1568 // Check that this immediate can be added as literal
1569 if (!isImmTy(ImmTyNone
)) {
1574 // We got int literal token.
1576 if (type
== MVT::f64
&& hasFPModifiers()) {
1577 // Cannot apply fp modifiers to int literals preserving the same semantics
1578 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1579 // disable these cases.
1583 unsigned Size
= type
.getSizeInBits();
1587 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1589 return isSafeTruncation(Imm
.Val
, Size
);
1592 // We got fp literal token
1593 if (type
== MVT::f64
) { // Expected 64-bit fp operand
1594 // We would set low 64-bits of literal to zeroes but we accept this literals
1598 if (type
== MVT::i64
) { // Expected 64-bit int operand
1599 // We don't allow fp literals in 64-bit integer instructions. It is
1600 // unclear how we should encode them.
1604 // We allow fp literals with f16x2 operands assuming that the specified
1605 // literal goes into the lower half and the upper half is zero. We also
1606 // require that the literal may be losslesly converted to f16.
1607 MVT ExpectedType
= (type
== MVT::v2f16
)? MVT::f16
:
1608 (type
== MVT::v2i16
)? MVT::i16
: type
;
1610 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1611 return canLosslesslyConvertToFPType(FPLiteral
, ExpectedType
);
1614 bool AMDGPUOperand::isRegClass(unsigned RCID
) const {
1615 return isRegKind() && AsmParser
->getMRI()->getRegClass(RCID
).contains(getReg());
1618 bool AMDGPUOperand::isSDWAOperand(MVT type
) const {
1619 if (AsmParser
->isVI())
1621 else if (AsmParser
->isGFX9() || AsmParser
->isGFX10())
1622 return isRegClass(AMDGPU::VS_32RegClassID
) || isInlinableImm(type
);
1627 bool AMDGPUOperand::isSDWAFP16Operand() const {
1628 return isSDWAOperand(MVT::f16
);
1631 bool AMDGPUOperand::isSDWAFP32Operand() const {
1632 return isSDWAOperand(MVT::f32
);
1635 bool AMDGPUOperand::isSDWAInt16Operand() const {
1636 return isSDWAOperand(MVT::i16
);
1639 bool AMDGPUOperand::isSDWAInt32Operand() const {
1640 return isSDWAOperand(MVT::i32
);
1643 bool AMDGPUOperand::isBoolReg() const {
1644 return (AsmParser
->getFeatureBits()[AMDGPU::FeatureWavefrontSize64
] && isSCSrcB64()) ||
1645 (AsmParser
->getFeatureBits()[AMDGPU::FeatureWavefrontSize32
] && isSCSrcB32());
1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val
, unsigned Size
) const
1650 assert(isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
1651 assert(Size
== 2 || Size
== 4 || Size
== 8);
1653 const uint64_t FpSignMask
= (1ULL << (Size
* 8 - 1));
1665 void AMDGPUOperand::addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
) const {
1666 if (AMDGPU::isSISrcOperand(AsmParser
->getMII()->get(Inst
.getOpcode()),
1667 Inst
.getNumOperands())) {
1668 addLiteralImmOperand(Inst
, Imm
.Val
,
1670 isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
1672 assert(!isImmTy(ImmTyNone
) || !hasModifiers());
1673 Inst
.addOperand(MCOperand::createImm(Imm
.Val
));
1677 void AMDGPUOperand::addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const {
1678 const auto& InstDesc
= AsmParser
->getMII()->get(Inst
.getOpcode());
1679 auto OpNum
= Inst
.getNumOperands();
1680 // Check that this operand accepts literals
1681 assert(AMDGPU::isSISrcOperand(InstDesc
, OpNum
));
1683 if (ApplyModifiers
) {
1684 assert(AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
));
1685 const unsigned Size
= Imm
.IsFPImm
? sizeof(double) : getOperandSize(InstDesc
, OpNum
);
1686 Val
= applyInputFPModifiers(Val
, Size
);
1689 APInt
Literal(64, Val
);
1690 uint8_t OpTy
= InstDesc
.OpInfo
[OpNum
].OperandType
;
1692 if (Imm
.IsFPImm
) { // We got fp literal token
1694 case AMDGPU::OPERAND_REG_IMM_INT64
:
1695 case AMDGPU::OPERAND_REG_IMM_FP64
:
1696 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1697 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1698 if (AMDGPU::isInlinableLiteral64(Literal
.getZExtValue(),
1699 AsmParser
->hasInv2PiInlineImm())) {
1700 Inst
.addOperand(MCOperand::createImm(Literal
.getZExtValue()));
1705 if (AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
)) { // Expected 64-bit fp operand
1706 // For fp operands we check if low 32 bits are zeros
1707 if (Literal
.getLoBits(32) != 0) {
1708 const_cast<AMDGPUAsmParser
*>(AsmParser
)->Warning(Inst
.getLoc(),
1709 "Can't encode literal as exact 64-bit floating-point operand. "
1710 "Low 32-bits will be set to zero");
1713 Inst
.addOperand(MCOperand::createImm(Literal
.lshr(32).getZExtValue()));
1717 // We don't allow fp literals in 64-bit integer instructions. It is
1718 // unclear how we should encode them. This case should be checked earlier
1719 // in predicate methods (isLiteralImm())
1720 llvm_unreachable("fp literal in 64-bit integer instruction.");
1722 case AMDGPU::OPERAND_REG_IMM_INT32
:
1723 case AMDGPU::OPERAND_REG_IMM_FP32
:
1724 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1725 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1726 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1727 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1728 case AMDGPU::OPERAND_REG_IMM_INT16
:
1729 case AMDGPU::OPERAND_REG_IMM_FP16
:
1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
1738 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1739 case AMDGPU::OPERAND_REG_IMM_V2FP16
: {
1741 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
1742 // Convert literal to single precision
1743 FPLiteral
.convert(*getOpFltSemantics(OpTy
),
1744 APFloat::rmNearestTiesToEven
, &lost
);
1745 // We allow precision lost but not overflow or underflow. This should be
1746 // checked earlier in isLiteralImm()
1748 uint64_t ImmVal
= FPLiteral
.bitcastToAPInt().getZExtValue();
1749 Inst
.addOperand(MCOperand::createImm(ImmVal
));
1753 llvm_unreachable("invalid operand size");
1759 // We got int literal token.
1760 // Only sign extend inline immediates.
1762 case AMDGPU::OPERAND_REG_IMM_INT32
:
1763 case AMDGPU::OPERAND_REG_IMM_FP32
:
1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1768 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1769 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
1770 if (isSafeTruncation(Val
, 32) &&
1771 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val
),
1772 AsmParser
->hasInv2PiInlineImm())) {
1773 Inst
.addOperand(MCOperand::createImm(Val
));
1777 Inst
.addOperand(MCOperand::createImm(Val
& 0xffffffff));
1780 case AMDGPU::OPERAND_REG_IMM_INT64
:
1781 case AMDGPU::OPERAND_REG_IMM_FP64
:
1782 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1783 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1784 if (AMDGPU::isInlinableLiteral64(Val
, AsmParser
->hasInv2PiInlineImm())) {
1785 Inst
.addOperand(MCOperand::createImm(Val
));
1789 Inst
.addOperand(MCOperand::createImm(Lo_32(Val
)));
1792 case AMDGPU::OPERAND_REG_IMM_INT16
:
1793 case AMDGPU::OPERAND_REG_IMM_FP16
:
1794 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1795 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1798 if (isSafeTruncation(Val
, 16) &&
1799 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
1800 AsmParser
->hasInv2PiInlineImm())) {
1801 Inst
.addOperand(MCOperand::createImm(Val
));
1805 Inst
.addOperand(MCOperand::createImm(Val
& 0xffff));
1808 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1811 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
: {
1812 assert(isSafeTruncation(Val
, 16));
1813 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
1814 AsmParser
->hasInv2PiInlineImm()));
1816 Inst
.addOperand(MCOperand::createImm(Val
));
1820 llvm_unreachable("invalid operand size");
1824 template <unsigned Bitwidth
>
1825 void AMDGPUOperand::addKImmFPOperands(MCInst
&Inst
, unsigned N
) const {
1826 APInt
Literal(64, Imm
.Val
);
1829 // We got int literal token.
1830 Inst
.addOperand(MCOperand::createImm(Literal
.getLoBits(Bitwidth
).getZExtValue()));
1835 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
1836 FPLiteral
.convert(*getFltSemantics(Bitwidth
/ 8),
1837 APFloat::rmNearestTiesToEven
, &Lost
);
1838 Inst
.addOperand(MCOperand::createImm(FPLiteral
.bitcastToAPInt().getZExtValue()));
1841 void AMDGPUOperand::addRegOperands(MCInst
&Inst
, unsigned N
) const {
1842 Inst
.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser
->getSTI())));
1845 static bool isInlineValue(unsigned Reg
) {
1847 case AMDGPU::SRC_SHARED_BASE
:
1848 case AMDGPU::SRC_SHARED_LIMIT
:
1849 case AMDGPU::SRC_PRIVATE_BASE
:
1850 case AMDGPU::SRC_PRIVATE_LIMIT
:
1851 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
1853 case AMDGPU::SRC_VCCZ
:
1854 case AMDGPU::SRC_EXECZ
:
1855 case AMDGPU::SRC_SCC
:
1857 case AMDGPU::SGPR_NULL
:
1864 bool AMDGPUOperand::isInlineValue() const {
1865 return isRegKind() && ::isInlineValue(getReg());
1868 //===----------------------------------------------------------------------===//
1870 //===----------------------------------------------------------------------===//
1872 static int getRegClass(RegisterKind Is
, unsigned RegWidth
) {
1873 if (Is
== IS_VGPR
) {
1876 case 1: return AMDGPU::VGPR_32RegClassID
;
1877 case 2: return AMDGPU::VReg_64RegClassID
;
1878 case 3: return AMDGPU::VReg_96RegClassID
;
1879 case 4: return AMDGPU::VReg_128RegClassID
;
1880 case 5: return AMDGPU::VReg_160RegClassID
;
1881 case 8: return AMDGPU::VReg_256RegClassID
;
1882 case 16: return AMDGPU::VReg_512RegClassID
;
1883 case 32: return AMDGPU::VReg_1024RegClassID
;
1885 } else if (Is
== IS_TTMP
) {
1888 case 1: return AMDGPU::TTMP_32RegClassID
;
1889 case 2: return AMDGPU::TTMP_64RegClassID
;
1890 case 4: return AMDGPU::TTMP_128RegClassID
;
1891 case 8: return AMDGPU::TTMP_256RegClassID
;
1892 case 16: return AMDGPU::TTMP_512RegClassID
;
1894 } else if (Is
== IS_SGPR
) {
1897 case 1: return AMDGPU::SGPR_32RegClassID
;
1898 case 2: return AMDGPU::SGPR_64RegClassID
;
1899 case 4: return AMDGPU::SGPR_128RegClassID
;
1900 case 8: return AMDGPU::SGPR_256RegClassID
;
1901 case 16: return AMDGPU::SGPR_512RegClassID
;
1903 } else if (Is
== IS_AGPR
) {
1906 case 1: return AMDGPU::AGPR_32RegClassID
;
1907 case 2: return AMDGPU::AReg_64RegClassID
;
1908 case 4: return AMDGPU::AReg_128RegClassID
;
1909 case 16: return AMDGPU::AReg_512RegClassID
;
1910 case 32: return AMDGPU::AReg_1024RegClassID
;
1916 static unsigned getSpecialRegForName(StringRef RegName
) {
1917 return StringSwitch
<unsigned>(RegName
)
1918 .Case("exec", AMDGPU::EXEC
)
1919 .Case("vcc", AMDGPU::VCC
)
1920 .Case("flat_scratch", AMDGPU::FLAT_SCR
)
1921 .Case("xnack_mask", AMDGPU::XNACK_MASK
)
1922 .Case("shared_base", AMDGPU::SRC_SHARED_BASE
)
1923 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE
)
1924 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
1925 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
1926 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE
)
1927 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE
)
1928 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
1929 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
1930 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
1931 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
1932 .Case("lds_direct", AMDGPU::LDS_DIRECT
)
1933 .Case("src_lds_direct", AMDGPU::LDS_DIRECT
)
1934 .Case("m0", AMDGPU::M0
)
1935 .Case("vccz", AMDGPU::SRC_VCCZ
)
1936 .Case("src_vccz", AMDGPU::SRC_VCCZ
)
1937 .Case("execz", AMDGPU::SRC_EXECZ
)
1938 .Case("src_execz", AMDGPU::SRC_EXECZ
)
1939 .Case("scc", AMDGPU::SRC_SCC
)
1940 .Case("src_scc", AMDGPU::SRC_SCC
)
1941 .Case("tba", AMDGPU::TBA
)
1942 .Case("tma", AMDGPU::TMA
)
1943 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO
)
1944 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI
)
1945 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO
)
1946 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI
)
1947 .Case("vcc_lo", AMDGPU::VCC_LO
)
1948 .Case("vcc_hi", AMDGPU::VCC_HI
)
1949 .Case("exec_lo", AMDGPU::EXEC_LO
)
1950 .Case("exec_hi", AMDGPU::EXEC_HI
)
1951 .Case("tma_lo", AMDGPU::TMA_LO
)
1952 .Case("tma_hi", AMDGPU::TMA_HI
)
1953 .Case("tba_lo", AMDGPU::TBA_LO
)
1954 .Case("tba_hi", AMDGPU::TBA_HI
)
1955 .Case("null", AMDGPU::SGPR_NULL
)
1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
1961 auto R
= parseRegister();
1962 if (!R
) return true;
1964 RegNo
= R
->getReg();
1965 StartLoc
= R
->getStartLoc();
1966 EndLoc
= R
->getEndLoc();
1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg
, unsigned &RegWidth
,
1971 RegisterKind RegKind
, unsigned Reg1
,
1975 if (Reg
== AMDGPU::EXEC_LO
&& Reg1
== AMDGPU::EXEC_HI
) {
1980 if (Reg
== AMDGPU::FLAT_SCR_LO
&& Reg1
== AMDGPU::FLAT_SCR_HI
) {
1981 Reg
= AMDGPU::FLAT_SCR
;
1985 if (Reg
== AMDGPU::XNACK_MASK_LO
&& Reg1
== AMDGPU::XNACK_MASK_HI
) {
1986 Reg
= AMDGPU::XNACK_MASK
;
1990 if (Reg
== AMDGPU::VCC_LO
&& Reg1
== AMDGPU::VCC_HI
) {
1995 if (Reg
== AMDGPU::TBA_LO
&& Reg1
== AMDGPU::TBA_HI
) {
2000 if (Reg
== AMDGPU::TMA_LO
&& Reg1
== AMDGPU::TMA_HI
) {
2010 if (Reg1
!= Reg
+ RegWidth
) {
2016 llvm_unreachable("unexpected register kind");
2020 static constexpr StringLiteral Registers
[] = {"v", "s", "ttmp", "acc", "a"};
2023 AMDGPUAsmParser::isRegister(const AsmToken
&Token
,
2024 const AsmToken
&NextToken
) const {
2026 // A list of consecutive registers: [s0,s1,s2,s3]
2027 if (Token
.is(AsmToken::LBrac
))
2030 if (!Token
.is(AsmToken::Identifier
))
2033 // A single register like s0 or a range of registers like s[0:1]
2035 StringRef RegName
= Token
.getString();
2037 for (StringRef Reg
: Registers
) {
2038 if (RegName
.startswith(Reg
)) {
2039 if (Reg
.size() < RegName
.size()) {
2041 // A single register with an index: rXX
2042 if (!RegName
.substr(Reg
.size()).getAsInteger(10, RegNum
))
2045 // A range of registers: r[XX:YY].
2046 if (NextToken
.is(AsmToken::LBrac
))
2052 return getSpecialRegForName(RegName
);
2056 AMDGPUAsmParser::isRegister()
2058 return isRegister(getToken(), peekToken());
2061 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
2062 unsigned &RegNum
, unsigned &RegWidth
,
2063 unsigned *DwordRegIndex
) {
2064 if (DwordRegIndex
) { *DwordRegIndex
= 0; }
2065 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2066 if (getLexer().is(AsmToken::Identifier
)) {
2067 StringRef RegName
= Parser
.getTok().getString();
2068 if ((Reg
= getSpecialRegForName(RegName
))) {
2070 RegKind
= IS_SPECIAL
;
2072 unsigned RegNumIndex
= 0;
2073 if (RegName
[0] == 'v') {
2076 } else if (RegName
[0] == 's') {
2079 } else if (RegName
[0] == 'a') {
2080 RegNumIndex
= RegName
.startswith("acc") ? 3 : 1;
2082 } else if (RegName
.startswith("ttmp")) {
2083 RegNumIndex
= strlen("ttmp");
2088 if (RegName
.size() > RegNumIndex
) {
2089 // Single 32-bit register: vXX.
2090 if (RegName
.substr(RegNumIndex
).getAsInteger(10, RegNum
))
2095 // Range of registers: v[XX:YY]. ":YY" is optional.
2097 int64_t RegLo
, RegHi
;
2098 if (getLexer().isNot(AsmToken::LBrac
))
2102 if (getParser().parseAbsoluteExpression(RegLo
))
2105 const bool isRBrace
= getLexer().is(AsmToken::RBrac
);
2106 if (!isRBrace
&& getLexer().isNot(AsmToken::Colon
))
2113 if (getParser().parseAbsoluteExpression(RegHi
))
2116 if (getLexer().isNot(AsmToken::RBrac
))
2120 RegNum
= (unsigned) RegLo
;
2121 RegWidth
= (RegHi
- RegLo
) + 1;
2124 } else if (getLexer().is(AsmToken::LBrac
)) {
2125 // List of consecutive registers: [s0,s1,s2,s3]
2127 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
, nullptr))
2131 RegisterKind RegKind1
;
2132 unsigned Reg1
, RegNum1
, RegWidth1
;
2134 if (getLexer().is(AsmToken::Comma
)) {
2136 } else if (getLexer().is(AsmToken::RBrac
)) {
2139 } else if (ParseAMDGPURegister(RegKind1
, Reg1
, RegNum1
, RegWidth1
, nullptr)) {
2140 if (RegWidth1
!= 1) {
2143 if (RegKind1
!= RegKind
) {
2146 if (!AddNextRegisterToList(Reg
, RegWidth
, RegKind1
, Reg1
, RegNum1
)) {
2167 if (RegKind
== IS_SGPR
|| RegKind
== IS_TTMP
) {
2168 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2169 Size
= std::min(RegWidth
, 4u);
2171 if (RegNum
% Size
!= 0)
2173 if (DwordRegIndex
) { *DwordRegIndex
= RegNum
; }
2174 RegNum
= RegNum
/ Size
;
2175 int RCID
= getRegClass(RegKind
, RegWidth
);
2178 const MCRegisterClass RC
= TRI
->getRegClass(RCID
);
2179 if (RegNum
>= RC
.getNumRegs())
2181 Reg
= RC
.getRegister(RegNum
);
2186 llvm_unreachable("unexpected register kind");
2189 if (!subtargetHasRegister(*TRI
, Reg
))
2195 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind
) {
2198 return StringRef(".amdgcn.next_free_vgpr");
2200 return StringRef(".amdgcn.next_free_sgpr");
2206 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind
) {
2207 auto SymbolName
= getGprCountSymbolName(RegKind
);
2208 assert(SymbolName
&& "initializing invalid register kind");
2209 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2210 Sym
->setVariableValue(MCConstantExpr::create(0, getContext()));
2213 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind
,
2214 unsigned DwordRegIndex
,
2215 unsigned RegWidth
) {
2216 // Symbols are only defined for GCN targets
2217 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major
< 6)
2220 auto SymbolName
= getGprCountSymbolName(RegKind
);
2223 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2225 int64_t NewMax
= DwordRegIndex
+ RegWidth
- 1;
2228 if (!Sym
->isVariable())
2229 return !Error(getParser().getTok().getLoc(),
2230 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2231 if (!Sym
->getVariableValue(false)->evaluateAsAbsolute(OldCount
))
2233 getParser().getTok().getLoc(),
2234 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2236 if (OldCount
<= NewMax
)
2237 Sym
->setVariableValue(MCConstantExpr::create(NewMax
+ 1, getContext()));
2242 std::unique_ptr
<AMDGPUOperand
> AMDGPUAsmParser::parseRegister() {
2243 const auto &Tok
= Parser
.getTok();
2244 SMLoc StartLoc
= Tok
.getLoc();
2245 SMLoc EndLoc
= Tok
.getEndLoc();
2246 RegisterKind RegKind
;
2247 unsigned Reg
, RegNum
, RegWidth
, DwordRegIndex
;
2249 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
, &DwordRegIndex
)) {
2250 //FIXME: improve error messages (bug 41303).
2251 Error(StartLoc
, "not a valid operand.");
2254 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2255 if (!updateGprCountSymbols(RegKind
, DwordRegIndex
, RegWidth
))
2258 KernelScope
.usesRegister(RegKind
, DwordRegIndex
, RegWidth
);
2259 return AMDGPUOperand::CreateReg(this, Reg
, StartLoc
, EndLoc
);
2262 OperandMatchResultTy
2263 AMDGPUAsmParser::parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
) {
2264 // TODO: add syntactic sugar for 1/(2*PI)
2266 assert(!isRegister());
2267 assert(!isModifier());
2269 const auto& Tok
= getToken();
2270 const auto& NextTok
= peekToken();
2271 bool IsReal
= Tok
.is(AsmToken::Real
);
2273 bool Negate
= false;
2275 if (!IsReal
&& Tok
.is(AsmToken::Minus
) && NextTok
.is(AsmToken::Real
)) {
2282 // Floating-point expressions are not supported.
2283 // Can only allow floating-point literals with an
2286 StringRef Num
= getTokenStr();
2289 APFloat
RealVal(APFloat::IEEEdouble());
2290 auto roundMode
= APFloat::rmNearestTiesToEven
;
2291 if (RealVal
.convertFromString(Num
, roundMode
) == APFloat::opInvalidOp
) {
2292 return MatchOperand_ParseFail
;
2295 RealVal
.changeSign();
2298 AMDGPUOperand::CreateImm(this, RealVal
.bitcastToAPInt().getZExtValue(), S
,
2299 AMDGPUOperand::ImmTyNone
, true));
2301 return MatchOperand_Success
;
2308 if (HasSP3AbsModifier
) {
2309 // This is a workaround for handling expressions
2310 // as arguments of SP3 'abs' modifier, for example:
2314 // This syntax is not compatible with syntax of standard
2315 // MC expressions (due to the trailing '|').
2317 if (getParser().parsePrimaryExpr(Expr
, EndLoc
))
2318 return MatchOperand_ParseFail
;
2320 if (Parser
.parseExpression(Expr
))
2321 return MatchOperand_ParseFail
;
2324 if (Expr
->evaluateAsAbsolute(IntVal
)) {
2325 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
2327 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
2330 return MatchOperand_Success
;
2333 return MatchOperand_NoMatch
;
2336 OperandMatchResultTy
2337 AMDGPUAsmParser::parseReg(OperandVector
&Operands
) {
2339 return MatchOperand_NoMatch
;
2341 if (auto R
= parseRegister()) {
2343 Operands
.push_back(std::move(R
));
2344 return MatchOperand_Success
;
2346 return MatchOperand_ParseFail
;
2349 OperandMatchResultTy
2350 AMDGPUAsmParser::parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
) {
2351 auto res
= parseReg(Operands
);
2352 if (res
!= MatchOperand_NoMatch
) {
2354 } else if (isModifier()) {
2355 return MatchOperand_NoMatch
;
2357 return parseImm(Operands
, HasSP3AbsMod
);
2362 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2363 if (Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::LParen
)) {
2364 const auto &str
= Token
.getString();
2365 return str
== "abs" || str
== "neg" || str
== "sext";
2371 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2372 return Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::Colon
);
2376 AMDGPUAsmParser::isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2377 return isNamedOperandModifier(Token
, NextToken
) || Token
.is(AsmToken::Pipe
);
2381 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2382 return isRegister(Token
, NextToken
) || isOperandModifier(Token
, NextToken
);
2385 // Check if this is an operand modifier or an opcode modifier
2386 // which may look like an expression but it is not. We should
2387 // avoid parsing these modifiers as expressions. Currently
2388 // recognized sequences are:
2397 // Note that simple opcode modifiers like 'gds' may be parsed as
2398 // expressions; this is a special case. See getExpressionAsToken.
2401 AMDGPUAsmParser::isModifier() {
2403 AsmToken Tok
= getToken();
2404 AsmToken NextToken
[2];
2405 peekTokens(NextToken
);
2407 return isOperandModifier(Tok
, NextToken
[0]) ||
2408 (Tok
.is(AsmToken::Minus
) && isRegOrOperandModifier(NextToken
[0], NextToken
[1])) ||
2409 isOpcodeModifierWithVal(Tok
, NextToken
[0]);
2412 // Check if the current token is an SP3 'neg' modifier.
2413 // Currently this modifier is allowed in the following context:
2415 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2416 // 2. Before an 'abs' modifier: -abs(...)
2417 // 3. Before an SP3 'abs' modifier: -|...|
2419 // In all other cases "-" is handled as a part
2420 // of an expression that follows the sign.
2422 // Note: When "-" is followed by an integer literal,
2423 // this is interpreted as integer negation rather
2424 // than a floating-point NEG modifier applied to N.
2425 // Beside being contr-intuitive, such use of floating-point
2426 // NEG modifier would have resulted in different meaning
2427 // of integer literals used with VOP1/2/C and VOP3,
2429 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2430 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2431 // Negative fp literals with preceding "-" are
2432 // handled likewise for unifomtity
2435 AMDGPUAsmParser::parseSP3NegModifier() {
2437 AsmToken NextToken
[2];
2438 peekTokens(NextToken
);
2440 if (isToken(AsmToken::Minus
) &&
2441 (isRegister(NextToken
[0], NextToken
[1]) ||
2442 NextToken
[0].is(AsmToken::Pipe
) ||
2443 isId(NextToken
[0], "abs"))) {
2451 OperandMatchResultTy
2452 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector
&Operands
,
2458 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2459 if (isToken(AsmToken::Minus
) && peekToken().is(AsmToken::Minus
)) {
2460 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2461 return MatchOperand_ParseFail
;
2464 SP3Neg
= parseSP3NegModifier();
2467 Neg
= trySkipId("neg");
2468 if (Neg
&& SP3Neg
) {
2469 Error(Loc
, "expected register or immediate");
2470 return MatchOperand_ParseFail
;
2472 if (Neg
&& !skipToken(AsmToken::LParen
, "expected left paren after neg"))
2473 return MatchOperand_ParseFail
;
2475 Abs
= trySkipId("abs");
2476 if (Abs
&& !skipToken(AsmToken::LParen
, "expected left paren after abs"))
2477 return MatchOperand_ParseFail
;
2480 SP3Abs
= trySkipToken(AsmToken::Pipe
);
2481 if (Abs
&& SP3Abs
) {
2482 Error(Loc
, "expected register or immediate");
2483 return MatchOperand_ParseFail
;
2486 OperandMatchResultTy Res
;
2488 Res
= parseRegOrImm(Operands
, SP3Abs
);
2490 Res
= parseReg(Operands
);
2492 if (Res
!= MatchOperand_Success
) {
2493 return (SP3Neg
|| Neg
|| SP3Abs
|| Abs
)? MatchOperand_ParseFail
: Res
;
2496 if (SP3Abs
&& !skipToken(AsmToken::Pipe
, "expected vertical bar"))
2497 return MatchOperand_ParseFail
;
2498 if (Abs
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2499 return MatchOperand_ParseFail
;
2500 if (Neg
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2501 return MatchOperand_ParseFail
;
2503 AMDGPUOperand::Modifiers Mods
;
2504 Mods
.Abs
= Abs
|| SP3Abs
;
2505 Mods
.Neg
= Neg
|| SP3Neg
;
2507 if (Mods
.hasFPModifiers()) {
2508 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
2510 Error(Op
.getStartLoc(), "expected an absolute expression");
2511 return MatchOperand_ParseFail
;
2513 Op
.setModifiers(Mods
);
2515 return MatchOperand_Success
;
2518 OperandMatchResultTy
2519 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector
&Operands
,
2521 bool Sext
= trySkipId("sext");
2522 if (Sext
&& !skipToken(AsmToken::LParen
, "expected left paren after sext"))
2523 return MatchOperand_ParseFail
;
2525 OperandMatchResultTy Res
;
2527 Res
= parseRegOrImm(Operands
);
2529 Res
= parseReg(Operands
);
2531 if (Res
!= MatchOperand_Success
) {
2532 return Sext
? MatchOperand_ParseFail
: Res
;
2535 if (Sext
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2536 return MatchOperand_ParseFail
;
2538 AMDGPUOperand::Modifiers Mods
;
2541 if (Mods
.hasIntModifiers()) {
2542 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
2544 Error(Op
.getStartLoc(), "expected an absolute expression");
2545 return MatchOperand_ParseFail
;
2547 Op
.setModifiers(Mods
);
2550 return MatchOperand_Success
;
2553 OperandMatchResultTy
2554 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector
&Operands
) {
2555 return parseRegOrImmWithFPInputMods(Operands
, false);
2558 OperandMatchResultTy
2559 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector
&Operands
) {
2560 return parseRegOrImmWithIntInputMods(Operands
, false);
2563 OperandMatchResultTy
AMDGPUAsmParser::parseVReg32OrOff(OperandVector
&Operands
) {
2564 auto Loc
= getLoc();
2565 if (trySkipId("off")) {
2566 Operands
.push_back(AMDGPUOperand::CreateImm(this, 0, Loc
,
2567 AMDGPUOperand::ImmTyOff
, false));
2568 return MatchOperand_Success
;
2572 return MatchOperand_NoMatch
;
2574 std::unique_ptr
<AMDGPUOperand
> Reg
= parseRegister();
2576 Operands
.push_back(std::move(Reg
));
2577 return MatchOperand_Success
;
2580 return MatchOperand_ParseFail
;
2584 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
2585 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
2587 if ((getForcedEncodingSize() == 32 && (TSFlags
& SIInstrFlags::VOP3
)) ||
2588 (getForcedEncodingSize() == 64 && !(TSFlags
& SIInstrFlags::VOP3
)) ||
2589 (isForcedDPP() && !(TSFlags
& SIInstrFlags::DPP
)) ||
2590 (isForcedSDWA() && !(TSFlags
& SIInstrFlags::SDWA
)) )
2591 return Match_InvalidOperand
;
2593 if ((TSFlags
& SIInstrFlags::VOP3
) &&
2594 (TSFlags
& SIInstrFlags::VOPAsmPrefer32Bit
) &&
2595 getForcedEncodingSize() != 64)
2596 return Match_PreferE32
;
2598 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
2599 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
2600 // v_mac_f32/16 allow only dst_sel == DWORD;
2602 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::dst_sel
);
2603 const auto &Op
= Inst
.getOperand(OpNum
);
2604 if (!Op
.isImm() || Op
.getImm() != AMDGPU::SDWA::SdwaSel::DWORD
) {
2605 return Match_InvalidOperand
;
2609 return Match_Success
;
2612 // What asm variants we should check
2613 ArrayRef
<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2614 if (getForcedEncodingSize() == 32) {
2615 static const unsigned Variants
[] = {AMDGPUAsmVariants::DEFAULT
};
2616 return makeArrayRef(Variants
);
2619 if (isForcedVOP3()) {
2620 static const unsigned Variants
[] = {AMDGPUAsmVariants::VOP3
};
2621 return makeArrayRef(Variants
);
2624 if (isForcedSDWA()) {
2625 static const unsigned Variants
[] = {AMDGPUAsmVariants::SDWA
,
2626 AMDGPUAsmVariants::SDWA9
};
2627 return makeArrayRef(Variants
);
2630 if (isForcedDPP()) {
2631 static const unsigned Variants
[] = {AMDGPUAsmVariants::DPP
};
2632 return makeArrayRef(Variants
);
2635 static const unsigned Variants
[] = {
2636 AMDGPUAsmVariants::DEFAULT
, AMDGPUAsmVariants::VOP3
,
2637 AMDGPUAsmVariants::SDWA
, AMDGPUAsmVariants::SDWA9
, AMDGPUAsmVariants::DPP
2640 return makeArrayRef(Variants
);
2643 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst
&Inst
) const {
2644 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
2645 const unsigned Num
= Desc
.getNumImplicitUses();
2646 for (unsigned i
= 0; i
< Num
; ++i
) {
2647 unsigned Reg
= Desc
.ImplicitUses
[i
];
2649 case AMDGPU::FLAT_SCR
:
2651 case AMDGPU::VCC_LO
:
2652 case AMDGPU::VCC_HI
:
2659 return AMDGPU::NoRegister
;
2662 // NB: This code is correct only when used to check constant
2663 // bus limitations because GFX7 support no f16 inline constants.
2664 // Note that there are no cases when a GFX7 opcode violates
2665 // constant bus limitations due to the use of an f16 constant.
2666 bool AMDGPUAsmParser::isInlineConstant(const MCInst
&Inst
,
2667 unsigned OpIdx
) const {
2668 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
2670 if (!AMDGPU::isSISrcOperand(Desc
, OpIdx
)) {
2674 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
2676 int64_t Val
= MO
.getImm();
2677 auto OpSize
= AMDGPU::getOperandSize(Desc
, OpIdx
);
2679 switch (OpSize
) { // expected operand size
2681 return AMDGPU::isInlinableLiteral64(Val
, hasInv2PiInlineImm());
2683 return AMDGPU::isInlinableLiteral32(Val
, hasInv2PiInlineImm());
2685 const unsigned OperandType
= Desc
.OpInfo
[OpIdx
].OperandType
;
2686 if (OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2INT16
||
2687 OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2FP16
||
2688 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
||
2689 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
||
2690 OperandType
== AMDGPU::OPERAND_REG_IMM_V2INT16
||
2691 OperandType
== AMDGPU::OPERAND_REG_IMM_V2FP16
) {
2692 return AMDGPU::isInlinableLiteralV216(Val
, hasInv2PiInlineImm());
2694 return AMDGPU::isInlinableLiteral16(Val
, hasInv2PiInlineImm());
2698 llvm_unreachable("invalid operand size");
2702 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode
) const {
2707 // 64-bit shift instructions can use only one scalar value input
2708 case AMDGPU::V_LSHLREV_B64
:
2709 case AMDGPU::V_LSHLREV_B64_gfx10
:
2710 case AMDGPU::V_LSHL_B64
:
2711 case AMDGPU::V_LSHRREV_B64
:
2712 case AMDGPU::V_LSHRREV_B64_gfx10
:
2713 case AMDGPU::V_LSHR_B64
:
2714 case AMDGPU::V_ASHRREV_I64
:
2715 case AMDGPU::V_ASHRREV_I64_gfx10
:
2716 case AMDGPU::V_ASHR_I64
:
2723 bool AMDGPUAsmParser::usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
) {
2724 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
2726 return !isInlineConstant(Inst
, OpIdx
);
2727 } else if (MO
.isReg()) {
2728 auto Reg
= MO
.getReg();
2729 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2730 return isSGPR(mc2PseudoReg(Reg
), TRI
) && Reg
!= SGPR_NULL
;
2736 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst
&Inst
) {
2737 const unsigned Opcode
= Inst
.getOpcode();
2738 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
2739 unsigned ConstantBusUseCount
= 0;
2740 unsigned NumLiterals
= 0;
2741 unsigned LiteralSize
;
2744 (SIInstrFlags::VOPC
|
2745 SIInstrFlags::VOP1
| SIInstrFlags::VOP2
|
2746 SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
|
2747 SIInstrFlags::SDWA
)) {
2748 // Check special imm operands (used by madmk, etc)
2749 if (AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::imm
) != -1) {
2750 ++ConstantBusUseCount
;
2753 SmallDenseSet
<unsigned> SGPRsUsed
;
2754 unsigned SGPRUsed
= findImplicitSGPRReadInVOP(Inst
);
2755 if (SGPRUsed
!= AMDGPU::NoRegister
) {
2756 SGPRsUsed
.insert(SGPRUsed
);
2757 ++ConstantBusUseCount
;
2760 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
2761 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
2762 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
2764 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
2766 for (int OpIdx
: OpIndices
) {
2767 if (OpIdx
== -1) break;
2769 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
2770 if (usesConstantBus(Inst
, OpIdx
)) {
2772 const unsigned Reg
= mc2PseudoReg(MO
.getReg());
2773 // Pairs of registers with a partial intersections like these
2775 // flat_scratch_lo, flat_scratch
2776 // flat_scratch_lo, flat_scratch_hi
2777 // are theoretically valid but they are disabled anyway.
2778 // Note that this code mimics SIInstrInfo::verifyInstruction
2779 if (!SGPRsUsed
.count(Reg
)) {
2780 SGPRsUsed
.insert(Reg
);
2781 ++ConstantBusUseCount
;
2783 } else { // Expression or a literal
2785 if (Desc
.OpInfo
[OpIdx
].OperandType
== MCOI::OPERAND_IMMEDIATE
)
2786 continue; // special operand like VINTERP attr_chan
2788 // An instruction may use only one literal.
2789 // This has been validated on the previous step.
2790 // See validateVOP3Literal.
2791 // This literal may be used as more than one operand.
2792 // If all these operands are of the same size,
2793 // this literal counts as one scalar value.
2794 // Otherwise it counts as 2 scalar values.
2795 // See "GFX10 Shader Programming", section 3.6.2.3.
2797 unsigned Size
= AMDGPU::getOperandSize(Desc
, OpIdx
);
2798 if (Size
< 4) Size
= 4;
2800 if (NumLiterals
== 0) {
2803 } else if (LiteralSize
!= Size
) {
2810 ConstantBusUseCount
+= NumLiterals
;
2812 return ConstantBusUseCount
<= getConstantBusLimit(Opcode
);
2815 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst
&Inst
) {
2816 const unsigned Opcode
= Inst
.getOpcode();
2817 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
2819 const int DstIdx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::vdst
);
2821 Desc
.getOperandConstraint(DstIdx
, MCOI::EARLY_CLOBBER
) == -1) {
2825 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2827 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
2828 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
2829 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
2831 assert(DstIdx
!= -1);
2832 const MCOperand
&Dst
= Inst
.getOperand(DstIdx
);
2833 assert(Dst
.isReg());
2834 const unsigned DstReg
= mc2PseudoReg(Dst
.getReg());
2836 const int SrcIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
2838 for (int SrcIdx
: SrcIndices
) {
2839 if (SrcIdx
== -1) break;
2840 const MCOperand
&Src
= Inst
.getOperand(SrcIdx
);
2842 const unsigned SrcReg
= mc2PseudoReg(Src
.getReg());
2843 if (isRegIntersect(DstReg
, SrcReg
, TRI
)) {
2852 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst
&Inst
) {
2854 const unsigned Opc
= Inst
.getOpcode();
2855 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2857 if ((Desc
.TSFlags
& SIInstrFlags::IntClamp
) != 0 && !hasIntClamp()) {
2858 int ClampIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
);
2859 assert(ClampIdx
!= -1);
2860 return Inst
.getOperand(ClampIdx
).getImm() == 0;
2866 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst
&Inst
) {
2868 const unsigned Opc
= Inst
.getOpcode();
2869 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2871 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
2874 int VDataIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdata
);
2875 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
2876 int TFEIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::tfe
);
2878 assert(VDataIdx
!= -1);
2879 assert(DMaskIdx
!= -1);
2880 assert(TFEIdx
!= -1);
2882 unsigned VDataSize
= AMDGPU::getRegOperandSize(getMRI(), Desc
, VDataIdx
);
2883 unsigned TFESize
= Inst
.getOperand(TFEIdx
).getImm()? 1 : 0;
2884 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
2889 (Desc
.TSFlags
& SIInstrFlags::Gather4
) ? 4 : countPopulation(DMask
);
2890 if (hasPackedD16()) {
2891 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
2892 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm())
2893 DataSize
= (DataSize
+ 1) / 2;
2896 return (VDataSize
/ 4) == DataSize
+ TFESize
;
2899 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst
&Inst
) {
2900 const unsigned Opc
= Inst
.getOpcode();
2901 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2903 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0 || !isGFX10())
2906 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(Opc
);
2907 const AMDGPU::MIMGBaseOpcodeInfo
*BaseOpcode
=
2908 AMDGPU::getMIMGBaseOpcodeInfo(Info
->BaseOpcode
);
2909 int VAddr0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vaddr0
);
2910 int SrsrcIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::srsrc
);
2911 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
2913 assert(VAddr0Idx
!= -1);
2914 assert(SrsrcIdx
!= -1);
2915 assert(DimIdx
!= -1);
2916 assert(SrsrcIdx
> VAddr0Idx
);
2918 unsigned Dim
= Inst
.getOperand(DimIdx
).getImm();
2919 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByEncoding(Dim
);
2920 bool IsNSA
= SrsrcIdx
- VAddr0Idx
> 1;
2921 unsigned VAddrSize
=
2922 IsNSA
? SrsrcIdx
- VAddr0Idx
2923 : AMDGPU::getRegOperandSize(getMRI(), Desc
, VAddr0Idx
) / 4;
2925 unsigned AddrSize
= BaseOpcode
->NumExtraArgs
+
2926 (BaseOpcode
->Gradients
? DimInfo
->NumGradients
: 0) +
2927 (BaseOpcode
->Coordinates
? DimInfo
->NumCoords
: 0) +
2928 (BaseOpcode
->LodOrClampOrMip
? 1 : 0);
2932 else if (AddrSize
> 4)
2936 return VAddrSize
== AddrSize
;
2939 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst
&Inst
) {
2941 const unsigned Opc
= Inst
.getOpcode();
2942 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2944 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
2946 if (!Desc
.mayLoad() || !Desc
.mayStore())
2947 return true; // Not atomic
2949 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
2950 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
2952 // This is an incomplete check because image_atomic_cmpswap
2953 // may only use 0x3 and 0xf while other atomic operations
2954 // may use 0x1 and 0x3. However these limitations are
2955 // verified when we check that dmask matches dst size.
2956 return DMask
== 0x1 || DMask
== 0x3 || DMask
== 0xf;
2959 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst
&Inst
) {
2961 const unsigned Opc
= Inst
.getOpcode();
2962 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2964 if ((Desc
.TSFlags
& SIInstrFlags::Gather4
) == 0)
2967 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
2968 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
2970 // GATHER4 instructions use dmask in a different fashion compared to
2971 // other MIMG instructions. The only useful DMASK values are
2972 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2973 // (red,red,red,red) etc.) The ISA document doesn't mention
2975 return DMask
== 0x1 || DMask
== 0x2 || DMask
== 0x4 || DMask
== 0x8;
2978 bool AMDGPUAsmParser::validateMIMGD16(const MCInst
&Inst
) {
2980 const unsigned Opc
= Inst
.getOpcode();
2981 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2983 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
2986 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
2987 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm()) {
2988 if (isCI() || isSI())
2995 bool AMDGPUAsmParser::validateMIMGDim(const MCInst
&Inst
) {
2996 const unsigned Opc
= Inst
.getOpcode();
2997 const MCInstrDesc
&Desc
= MII
.get(Opc
);
2999 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3002 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3006 long Imm
= Inst
.getOperand(DimIdx
).getImm();
3007 if (Imm
< 0 || Imm
>= 8)
3013 static bool IsRevOpcode(const unsigned Opcode
)
3016 case AMDGPU::V_SUBREV_F32_e32
:
3017 case AMDGPU::V_SUBREV_F32_e64
:
3018 case AMDGPU::V_SUBREV_F32_e32_gfx10
:
3019 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7
:
3020 case AMDGPU::V_SUBREV_F32_e32_vi
:
3021 case AMDGPU::V_SUBREV_F32_e64_gfx10
:
3022 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7
:
3023 case AMDGPU::V_SUBREV_F32_e64_vi
:
3025 case AMDGPU::V_SUBREV_I32_e32
:
3026 case AMDGPU::V_SUBREV_I32_e64
:
3027 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7
:
3028 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7
:
3030 case AMDGPU::V_SUBBREV_U32_e32
:
3031 case AMDGPU::V_SUBBREV_U32_e64
:
3032 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7
:
3033 case AMDGPU::V_SUBBREV_U32_e32_vi
:
3034 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7
:
3035 case AMDGPU::V_SUBBREV_U32_e64_vi
:
3037 case AMDGPU::V_SUBREV_U32_e32
:
3038 case AMDGPU::V_SUBREV_U32_e64
:
3039 case AMDGPU::V_SUBREV_U32_e32_gfx9
:
3040 case AMDGPU::V_SUBREV_U32_e32_vi
:
3041 case AMDGPU::V_SUBREV_U32_e64_gfx9
:
3042 case AMDGPU::V_SUBREV_U32_e64_vi
:
3044 case AMDGPU::V_SUBREV_F16_e32
:
3045 case AMDGPU::V_SUBREV_F16_e64
:
3046 case AMDGPU::V_SUBREV_F16_e32_gfx10
:
3047 case AMDGPU::V_SUBREV_F16_e32_vi
:
3048 case AMDGPU::V_SUBREV_F16_e64_gfx10
:
3049 case AMDGPU::V_SUBREV_F16_e64_vi
:
3051 case AMDGPU::V_SUBREV_U16_e32
:
3052 case AMDGPU::V_SUBREV_U16_e64
:
3053 case AMDGPU::V_SUBREV_U16_e32_vi
:
3054 case AMDGPU::V_SUBREV_U16_e64_vi
:
3056 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9
:
3057 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10
:
3058 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9
:
3060 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9
:
3061 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9
:
3063 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10
:
3064 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10
:
3066 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10
:
3067 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10
:
3069 case AMDGPU::V_LSHRREV_B32_e32
:
3070 case AMDGPU::V_LSHRREV_B32_e64
:
3071 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7
:
3072 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7
:
3073 case AMDGPU::V_LSHRREV_B32_e32_vi
:
3074 case AMDGPU::V_LSHRREV_B32_e64_vi
:
3075 case AMDGPU::V_LSHRREV_B32_e32_gfx10
:
3076 case AMDGPU::V_LSHRREV_B32_e64_gfx10
:
3078 case AMDGPU::V_ASHRREV_I32_e32
:
3079 case AMDGPU::V_ASHRREV_I32_e64
:
3080 case AMDGPU::V_ASHRREV_I32_e32_gfx10
:
3081 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7
:
3082 case AMDGPU::V_ASHRREV_I32_e32_vi
:
3083 case AMDGPU::V_ASHRREV_I32_e64_gfx10
:
3084 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7
:
3085 case AMDGPU::V_ASHRREV_I32_e64_vi
:
3087 case AMDGPU::V_LSHLREV_B32_e32
:
3088 case AMDGPU::V_LSHLREV_B32_e64
:
3089 case AMDGPU::V_LSHLREV_B32_e32_gfx10
:
3090 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7
:
3091 case AMDGPU::V_LSHLREV_B32_e32_vi
:
3092 case AMDGPU::V_LSHLREV_B32_e64_gfx10
:
3093 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7
:
3094 case AMDGPU::V_LSHLREV_B32_e64_vi
:
3096 case AMDGPU::V_LSHLREV_B16_e32
:
3097 case AMDGPU::V_LSHLREV_B16_e64
:
3098 case AMDGPU::V_LSHLREV_B16_e32_vi
:
3099 case AMDGPU::V_LSHLREV_B16_e64_vi
:
3100 case AMDGPU::V_LSHLREV_B16_gfx10
:
3102 case AMDGPU::V_LSHRREV_B16_e32
:
3103 case AMDGPU::V_LSHRREV_B16_e64
:
3104 case AMDGPU::V_LSHRREV_B16_e32_vi
:
3105 case AMDGPU::V_LSHRREV_B16_e64_vi
:
3106 case AMDGPU::V_LSHRREV_B16_gfx10
:
3108 case AMDGPU::V_ASHRREV_I16_e32
:
3109 case AMDGPU::V_ASHRREV_I16_e64
:
3110 case AMDGPU::V_ASHRREV_I16_e32_vi
:
3111 case AMDGPU::V_ASHRREV_I16_e64_vi
:
3112 case AMDGPU::V_ASHRREV_I16_gfx10
:
3114 case AMDGPU::V_LSHLREV_B64
:
3115 case AMDGPU::V_LSHLREV_B64_gfx10
:
3116 case AMDGPU::V_LSHLREV_B64_vi
:
3118 case AMDGPU::V_LSHRREV_B64
:
3119 case AMDGPU::V_LSHRREV_B64_gfx10
:
3120 case AMDGPU::V_LSHRREV_B64_vi
:
3122 case AMDGPU::V_ASHRREV_I64
:
3123 case AMDGPU::V_ASHRREV_I64_gfx10
:
3124 case AMDGPU::V_ASHRREV_I64_vi
:
3126 case AMDGPU::V_PK_LSHLREV_B16
:
3127 case AMDGPU::V_PK_LSHLREV_B16_gfx10
:
3128 case AMDGPU::V_PK_LSHLREV_B16_vi
:
3130 case AMDGPU::V_PK_LSHRREV_B16
:
3131 case AMDGPU::V_PK_LSHRREV_B16_gfx10
:
3132 case AMDGPU::V_PK_LSHRREV_B16_vi
:
3133 case AMDGPU::V_PK_ASHRREV_I16
:
3134 case AMDGPU::V_PK_ASHRREV_I16_gfx10
:
3135 case AMDGPU::V_PK_ASHRREV_I16_vi
:
3142 bool AMDGPUAsmParser::validateLdsDirect(const MCInst
&Inst
) {
3144 using namespace SIInstrFlags
;
3145 const unsigned Opcode
= Inst
.getOpcode();
3146 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3148 // lds_direct register is defined so that it can be used
3149 // with 9-bit operands only. Ignore encodings which do not accept these.
3150 if ((Desc
.TSFlags
& (VOP1
| VOP2
| VOP3
| VOPC
| VOP3P
| SIInstrFlags::SDWA
)) == 0)
3153 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3154 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3155 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3157 const int SrcIndices
[] = { Src1Idx
, Src2Idx
};
3159 // lds_direct cannot be specified as either src1 or src2.
3160 for (int SrcIdx
: SrcIndices
) {
3161 if (SrcIdx
== -1) break;
3162 const MCOperand
&Src
= Inst
.getOperand(SrcIdx
);
3163 if (Src
.isReg() && Src
.getReg() == LDS_DIRECT
) {
3171 const MCOperand
&Src
= Inst
.getOperand(Src0Idx
);
3172 if (!Src
.isReg() || Src
.getReg() != LDS_DIRECT
)
3175 // lds_direct is specified as src0. Check additional limitations.
3176 return (Desc
.TSFlags
& SIInstrFlags::SDWA
) == 0 && !IsRevOpcode(Opcode
);
3179 SMLoc
AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector
&Operands
) const {
3180 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
3181 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
3182 if (Op
.isFlatOffset())
3183 return Op
.getStartLoc();
3188 bool AMDGPUAsmParser::validateFlatOffset(const MCInst
&Inst
,
3189 const OperandVector
&Operands
) {
3190 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
3191 if ((TSFlags
& SIInstrFlags::FLAT
) == 0)
3194 auto Opcode
= Inst
.getOpcode();
3195 auto OpNum
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::offset
);
3196 assert(OpNum
!= -1);
3198 const auto &Op
= Inst
.getOperand(OpNum
);
3199 if (!hasFlatOffsets() && Op
.getImm() != 0) {
3200 Error(getFlatOffsetLoc(Operands
),
3201 "flat offset modifier is not supported on this GPU");
3205 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3206 // For FLAT segment the offset must be positive;
3207 // MSB is ignored and forced to zero.
3208 unsigned OffsetSize
= isGFX9() ? 13 : 12;
3209 if (TSFlags
& SIInstrFlags::IsNonFlatSeg
) {
3210 if (!isIntN(OffsetSize
, Op
.getImm())) {
3211 Error(getFlatOffsetLoc(Operands
),
3212 isGFX9() ? "expected a 13-bit signed offset" :
3213 "expected a 12-bit signed offset");
3217 if (!isUIntN(OffsetSize
- 1, Op
.getImm())) {
3218 Error(getFlatOffsetLoc(Operands
),
3219 isGFX9() ? "expected a 12-bit unsigned offset" :
3220 "expected an 11-bit unsigned offset");
3228 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst
&Inst
) const {
3229 unsigned Opcode
= Inst
.getOpcode();
3230 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3231 if (!(Desc
.TSFlags
& (SIInstrFlags::SOP2
| SIInstrFlags::SOPC
)))
3234 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3235 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3237 const int OpIndices
[] = { Src0Idx
, Src1Idx
};
3239 unsigned NumLiterals
= 0;
3240 uint32_t LiteralValue
;
3242 for (int OpIdx
: OpIndices
) {
3243 if (OpIdx
== -1) break;
3245 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3247 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3248 AMDGPU::isSISrcOperand(Desc
, OpIdx
) &&
3249 !isInlineConstant(Inst
, OpIdx
)) {
3250 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
3251 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
3252 LiteralValue
= Value
;
3258 return NumLiterals
<= 1;
3261 bool AMDGPUAsmParser::validateOpSel(const MCInst
&Inst
) {
3262 const unsigned Opc
= Inst
.getOpcode();
3263 if (Opc
== AMDGPU::V_PERMLANE16_B32_gfx10
||
3264 Opc
== AMDGPU::V_PERMLANEX16_B32_gfx10
) {
3265 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
3266 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
3274 // Check if VCC register matches wavefront size
3275 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg
) const {
3276 auto FB
= getFeatureBits();
3277 return (FB
[AMDGPU::FeatureWavefrontSize64
] && Reg
== AMDGPU::VCC
) ||
3278 (FB
[AMDGPU::FeatureWavefrontSize32
] && Reg
== AMDGPU::VCC_LO
);
3281 // VOP3 literal is only allowed in GFX10+ and only one can be used
3282 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst
&Inst
) const {
3283 unsigned Opcode
= Inst
.getOpcode();
3284 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3285 if (!(Desc
.TSFlags
& (SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
)))
3288 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3289 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3290 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3292 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
3294 unsigned NumLiterals
= 0;
3295 uint32_t LiteralValue
;
3297 for (int OpIdx
: OpIndices
) {
3298 if (OpIdx
== -1) break;
3300 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3301 if (!MO
.isImm() || !AMDGPU::isSISrcOperand(Desc
, OpIdx
))
3304 if (OpIdx
== Src2Idx
&& (Desc
.TSFlags
& SIInstrFlags::IsMAI
) &&
3305 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug
])
3308 if (!isInlineConstant(Inst
, OpIdx
)) {
3309 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
3310 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
3311 LiteralValue
= Value
;
3317 return !NumLiterals
||
3318 (NumLiterals
== 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal
]);
3321 bool AMDGPUAsmParser::validateInstruction(const MCInst
&Inst
,
3323 const OperandVector
&Operands
) {
3324 if (!validateLdsDirect(Inst
)) {
3326 "invalid use of lds_direct");
3329 if (!validateSOPLiteral(Inst
)) {
3331 "only one literal operand is allowed");
3334 if (!validateVOP3Literal(Inst
)) {
3336 "invalid literal operand");
3339 if (!validateConstantBusLimitations(Inst
)) {
3341 "invalid operand (violates constant bus restrictions)");
3344 if (!validateEarlyClobberLimitations(Inst
)) {
3346 "destination must be different than all sources");
3349 if (!validateIntClampSupported(Inst
)) {
3351 "integer clamping is not supported on this GPU");
3354 if (!validateOpSel(Inst
)) {
3356 "invalid op_sel operand");
3359 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3360 if (!validateMIMGD16(Inst
)) {
3362 "d16 modifier is not supported on this GPU");
3365 if (!validateMIMGDim(Inst
)) {
3366 Error(IDLoc
, "dim modifier is required on this GPU");
3369 if (!validateMIMGDataSize(Inst
)) {
3371 "image data size does not match dmask and tfe");
3374 if (!validateMIMGAddrSize(Inst
)) {
3376 "image address size does not match dim and a16");
3379 if (!validateMIMGAtomicDMask(Inst
)) {
3381 "invalid atomic image dmask");
3384 if (!validateMIMGGatherDMask(Inst
)) {
3386 "invalid image_gather dmask: only one bit must be set");
3389 if (!validateFlatOffset(Inst
, Operands
)) {
3396 static std::string
AMDGPUMnemonicSpellCheck(StringRef S
,
3397 const FeatureBitset
&FBS
,
3398 unsigned VariantID
= 0);
3400 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
3401 OperandVector
&Operands
,
3403 uint64_t &ErrorInfo
,
3404 bool MatchingInlineAsm
) {
3406 unsigned Result
= Match_Success
;
3407 for (auto Variant
: getMatchedVariants()) {
3409 auto R
= MatchInstructionImpl(Operands
, Inst
, EI
, MatchingInlineAsm
,
3411 // We order match statuses from least to most specific. We use most specific
3412 // status as resulting
3413 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3414 if ((R
== Match_Success
) ||
3415 (R
== Match_PreferE32
) ||
3416 (R
== Match_MissingFeature
&& Result
!= Match_PreferE32
) ||
3417 (R
== Match_InvalidOperand
&& Result
!= Match_MissingFeature
3418 && Result
!= Match_PreferE32
) ||
3419 (R
== Match_MnemonicFail
&& Result
!= Match_InvalidOperand
3420 && Result
!= Match_MissingFeature
3421 && Result
!= Match_PreferE32
)) {
3425 if (R
== Match_Success
)
3432 if (!validateInstruction(Inst
, IDLoc
, Operands
)) {
3436 Out
.EmitInstruction(Inst
, getSTI());
3439 case Match_MissingFeature
:
3440 return Error(IDLoc
, "instruction not supported on this GPU");
3442 case Match_MnemonicFail
: {
3443 FeatureBitset FBS
= ComputeAvailableFeatures(getSTI().getFeatureBits());
3444 std::string Suggestion
= AMDGPUMnemonicSpellCheck(
3445 ((AMDGPUOperand
&)*Operands
[0]).getToken(), FBS
);
3446 return Error(IDLoc
, "invalid instruction" + Suggestion
,
3447 ((AMDGPUOperand
&)*Operands
[0]).getLocRange());
3450 case Match_InvalidOperand
: {
3451 SMLoc ErrorLoc
= IDLoc
;
3452 if (ErrorInfo
!= ~0ULL) {
3453 if (ErrorInfo
>= Operands
.size()) {
3454 return Error(IDLoc
, "too few operands for instruction");
3456 ErrorLoc
= ((AMDGPUOperand
&)*Operands
[ErrorInfo
]).getStartLoc();
3457 if (ErrorLoc
== SMLoc())
3460 return Error(ErrorLoc
, "invalid operand for instruction");
3463 case Match_PreferE32
:
3464 return Error(IDLoc
, "internal error: instruction without _e64 suffix "
3465 "should be encoded as e32");
3467 llvm_unreachable("Implement any new match types added!");
3470 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret
) {
3472 if (getLexer().isNot(AsmToken::Integer
) && getLexer().isNot(AsmToken::Identifier
)) {
3475 if (getParser().parseAbsoluteExpression(Tmp
)) {
3478 Ret
= static_cast<uint32_t>(Tmp
);
3482 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major
,
3484 if (ParseAsAbsoluteExpression(Major
))
3485 return TokError("invalid major version");
3487 if (getLexer().isNot(AsmToken::Comma
))
3488 return TokError("minor version number required, comma expected");
3491 if (ParseAsAbsoluteExpression(Minor
))
3492 return TokError("invalid minor version");
3497 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3498 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
3499 return TokError("directive only supported for amdgcn architecture");
3503 SMLoc TargetStart
= getTok().getLoc();
3504 if (getParser().parseEscapedString(Target
))
3506 SMRange TargetRange
= SMRange(TargetStart
, getTok().getLoc());
3508 std::string ExpectedTarget
;
3509 raw_string_ostream
ExpectedTargetOS(ExpectedTarget
);
3510 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS
);
3512 if (Target
!= ExpectedTargetOS
.str())
3513 return getParser().Error(TargetRange
.Start
, "target must match options",
3516 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target
);
3520 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range
) {
3521 return getParser().Error(Range
.Start
, "value out of range", Range
);
3524 bool AMDGPUAsmParser::calculateGPRBlocks(
3525 const FeatureBitset
&Features
, bool VCCUsed
, bool FlatScrUsed
,
3526 bool XNACKUsed
, Optional
<bool> EnableWavefrontSize32
, unsigned NextFreeVGPR
,
3527 SMRange VGPRRange
, unsigned NextFreeSGPR
, SMRange SGPRRange
,
3528 unsigned &VGPRBlocks
, unsigned &SGPRBlocks
) {
3529 // TODO(scott.linder): These calculations are duplicated from
3530 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3531 IsaVersion Version
= getIsaVersion(getSTI().getCPU());
3533 unsigned NumVGPRs
= NextFreeVGPR
;
3534 unsigned NumSGPRs
= NextFreeSGPR
;
3536 if (Version
.Major
>= 10)
3539 unsigned MaxAddressableNumSGPRs
=
3540 IsaInfo::getAddressableNumSGPRs(&getSTI());
3542 if (Version
.Major
>= 8 && !Features
.test(FeatureSGPRInitBug
) &&
3543 NumSGPRs
> MaxAddressableNumSGPRs
)
3544 return OutOfRangeError(SGPRRange
);
3547 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed
, FlatScrUsed
, XNACKUsed
);
3549 if ((Version
.Major
<= 7 || Features
.test(FeatureSGPRInitBug
)) &&
3550 NumSGPRs
> MaxAddressableNumSGPRs
)
3551 return OutOfRangeError(SGPRRange
);
3553 if (Features
.test(FeatureSGPRInitBug
))
3554 NumSGPRs
= IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
;
3558 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs
, EnableWavefrontSize32
);
3559 SGPRBlocks
= IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs
);
3564 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3565 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
3566 return TokError("directive only supported for amdgcn architecture");
3568 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
)
3569 return TokError("directive only supported for amdhsa OS");
3571 StringRef KernelName
;
3572 if (getParser().parseIdentifier(KernelName
))
3575 kernel_descriptor_t KD
= getDefaultAmdhsaKernelDescriptor(&getSTI());
3579 IsaVersion IVersion
= getIsaVersion(getSTI().getCPU());
3582 uint64_t NextFreeVGPR
= 0;
3584 uint64_t NextFreeSGPR
= 0;
3585 unsigned UserSGPRCount
= 0;
3586 bool ReserveVCC
= true;
3587 bool ReserveFlatScr
= true;
3588 bool ReserveXNACK
= hasXNACK();
3589 Optional
<bool> EnableWavefrontSize32
;
3592 while (getLexer().is(AsmToken::EndOfStatement
))
3595 if (getLexer().isNot(AsmToken::Identifier
))
3596 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3598 StringRef ID
= getTok().getIdentifier();
3599 SMRange IDRange
= getTok().getLocRange();
3602 if (ID
== ".end_amdhsa_kernel")
3605 if (Seen
.find(ID
) != Seen
.end())
3606 return TokError(".amdhsa_ directives cannot be repeated");
3609 SMLoc ValStart
= getTok().getLoc();
3611 if (getParser().parseAbsoluteExpression(IVal
))
3613 SMLoc ValEnd
= getTok().getLoc();
3614 SMRange ValRange
= SMRange(ValStart
, ValEnd
);
3617 return OutOfRangeError(ValRange
);
3619 uint64_t Val
= IVal
;
3621 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
3622 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
3623 return OutOfRangeError(RANGE); \
3624 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3626 if (ID
== ".amdhsa_group_segment_fixed_size") {
3627 if (!isUInt
<sizeof(KD
.group_segment_fixed_size
) * CHAR_BIT
>(Val
))
3628 return OutOfRangeError(ValRange
);
3629 KD
.group_segment_fixed_size
= Val
;
3630 } else if (ID
== ".amdhsa_private_segment_fixed_size") {
3631 if (!isUInt
<sizeof(KD
.private_segment_fixed_size
) * CHAR_BIT
>(Val
))
3632 return OutOfRangeError(ValRange
);
3633 KD
.private_segment_fixed_size
= Val
;
3634 } else if (ID
== ".amdhsa_user_sgpr_private_segment_buffer") {
3635 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3636 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
,
3640 } else if (ID
== ".amdhsa_user_sgpr_dispatch_ptr") {
3641 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
, Val
,
3646 } else if (ID
== ".amdhsa_user_sgpr_queue_ptr") {
3647 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
, Val
,
3652 } else if (ID
== ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3653 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3654 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
,
3658 } else if (ID
== ".amdhsa_user_sgpr_dispatch_id") {
3659 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3660 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
, Val
,
3664 } else if (ID
== ".amdhsa_user_sgpr_flat_scratch_init") {
3665 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3666 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
, Val
,
3670 } else if (ID
== ".amdhsa_user_sgpr_private_segment_size") {
3671 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3672 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
,
3676 } else if (ID
== ".amdhsa_wavefront_size32") {
3677 if (IVersion
.Major
< 10)
3678 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3680 EnableWavefrontSize32
= Val
;
3681 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
3682 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
,
3684 } else if (ID
== ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3686 KD
.compute_pgm_rsrc2
,
3687 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET
, Val
,
3689 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_x") {
3690 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3691 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X
, Val
,
3693 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_y") {
3694 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3695 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y
, Val
,
3697 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_z") {
3698 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3699 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z
, Val
,
3701 } else if (ID
== ".amdhsa_system_sgpr_workgroup_info") {
3702 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3703 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO
, Val
,
3705 } else if (ID
== ".amdhsa_system_vgpr_workitem_id") {
3706 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3707 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID
, Val
,
3709 } else if (ID
== ".amdhsa_next_free_vgpr") {
3710 VGPRRange
= ValRange
;
3712 } else if (ID
== ".amdhsa_next_free_sgpr") {
3713 SGPRRange
= ValRange
;
3715 } else if (ID
== ".amdhsa_reserve_vcc") {
3716 if (!isUInt
<1>(Val
))
3717 return OutOfRangeError(ValRange
);
3719 } else if (ID
== ".amdhsa_reserve_flat_scratch") {
3720 if (IVersion
.Major
< 7)
3721 return getParser().Error(IDRange
.Start
, "directive requires gfx7+",
3723 if (!isUInt
<1>(Val
))
3724 return OutOfRangeError(ValRange
);
3725 ReserveFlatScr
= Val
;
3726 } else if (ID
== ".amdhsa_reserve_xnack_mask") {
3727 if (IVersion
.Major
< 8)
3728 return getParser().Error(IDRange
.Start
, "directive requires gfx8+",
3730 if (!isUInt
<1>(Val
))
3731 return OutOfRangeError(ValRange
);
3733 } else if (ID
== ".amdhsa_float_round_mode_32") {
3734 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3735 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32
, Val
, ValRange
);
3736 } else if (ID
== ".amdhsa_float_round_mode_16_64") {
3737 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3738 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64
, Val
, ValRange
);
3739 } else if (ID
== ".amdhsa_float_denorm_mode_32") {
3740 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3741 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32
, Val
, ValRange
);
3742 } else if (ID
== ".amdhsa_float_denorm_mode_16_64") {
3743 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3744 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64
, Val
,
3746 } else if (ID
== ".amdhsa_dx10_clamp") {
3747 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
3748 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP
, Val
, ValRange
);
3749 } else if (ID
== ".amdhsa_ieee_mode") {
3750 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE
,
3752 } else if (ID
== ".amdhsa_fp16_overflow") {
3753 if (IVersion
.Major
< 9)
3754 return getParser().Error(IDRange
.Start
, "directive requires gfx9+",
3756 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FP16_OVFL
, Val
,
3758 } else if (ID
== ".amdhsa_workgroup_processor_mode") {
3759 if (IVersion
.Major
< 10)
3760 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3762 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_WGP_MODE
, Val
,
3764 } else if (ID
== ".amdhsa_memory_ordered") {
3765 if (IVersion
.Major
< 10)
3766 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3768 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_MEM_ORDERED
, Val
,
3770 } else if (ID
== ".amdhsa_forward_progress") {
3771 if (IVersion
.Major
< 10)
3772 return getParser().Error(IDRange
.Start
, "directive requires gfx10+",
3774 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FWD_PROGRESS
, Val
,
3776 } else if (ID
== ".amdhsa_exception_fp_ieee_invalid_op") {
3778 KD
.compute_pgm_rsrc2
,
3779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION
, Val
,
3781 } else if (ID
== ".amdhsa_exception_fp_denorm_src") {
3782 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE
,
3785 } else if (ID
== ".amdhsa_exception_fp_ieee_div_zero") {
3787 KD
.compute_pgm_rsrc2
,
3788 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO
, Val
,
3790 } else if (ID
== ".amdhsa_exception_fp_ieee_overflow") {
3791 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3792 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW
,
3794 } else if (ID
== ".amdhsa_exception_fp_ieee_underflow") {
3795 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3796 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW
,
3798 } else if (ID
== ".amdhsa_exception_fp_ieee_inexact") {
3799 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3800 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT
,
3802 } else if (ID
== ".amdhsa_exception_int_div_zero") {
3803 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
3804 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO
,
3807 return getParser().Error(IDRange
.Start
,
3808 "unknown .amdhsa_kernel directive", IDRange
);
3811 #undef PARSE_BITS_ENTRY
3814 if (Seen
.find(".amdhsa_next_free_vgpr") == Seen
.end())
3815 return TokError(".amdhsa_next_free_vgpr directive is required");
3817 if (Seen
.find(".amdhsa_next_free_sgpr") == Seen
.end())
3818 return TokError(".amdhsa_next_free_sgpr directive is required");
3820 unsigned VGPRBlocks
;
3821 unsigned SGPRBlocks
;
3822 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC
, ReserveFlatScr
,
3823 ReserveXNACK
, EnableWavefrontSize32
, NextFreeVGPR
,
3824 VGPRRange
, NextFreeSGPR
, SGPRRange
, VGPRBlocks
,
3828 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH
>(
3830 return OutOfRangeError(VGPRRange
);
3831 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
3832 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT
, VGPRBlocks
);
3834 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH
>(
3836 return OutOfRangeError(SGPRRange
);
3837 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
3838 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
,
3841 if (!isUInt
<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH
>(UserSGPRCount
))
3842 return TokError("too many user SGPRs enabled");
3843 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc2
, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT
,
3846 getTargetStreamer().EmitAmdhsaKernelDescriptor(
3847 getSTI(), KernelName
, KD
, NextFreeVGPR
, NextFreeSGPR
, ReserveVCC
,
3848 ReserveFlatScr
, ReserveXNACK
);
3852 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3856 if (ParseDirectiveMajorMinor(Major
, Minor
))
3859 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major
, Minor
);
3863 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3867 StringRef VendorName
;
3870 // If this directive has no arguments, then use the ISA version for the
3872 if (getLexer().is(AsmToken::EndOfStatement
)) {
3873 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
3874 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA
.Major
, ISA
.Minor
,
3880 if (ParseDirectiveMajorMinor(Major
, Minor
))
3883 if (getLexer().isNot(AsmToken::Comma
))
3884 return TokError("stepping version number required, comma expected");
3887 if (ParseAsAbsoluteExpression(Stepping
))
3888 return TokError("invalid stepping version");
3890 if (getLexer().isNot(AsmToken::Comma
))
3891 return TokError("vendor name required, comma expected");
3894 if (getLexer().isNot(AsmToken::String
))
3895 return TokError("invalid vendor name");
3897 VendorName
= getLexer().getTok().getStringContents();
3900 if (getLexer().isNot(AsmToken::Comma
))
3901 return TokError("arch name required, comma expected");
3904 if (getLexer().isNot(AsmToken::String
))
3905 return TokError("invalid arch name");
3907 ArchName
= getLexer().getTok().getStringContents();
3910 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major
, Minor
, Stepping
,
3911 VendorName
, ArchName
);
3915 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID
,
3916 amd_kernel_code_t
&Header
) {
3917 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3918 // assembly for backwards compatibility.
3919 if (ID
== "max_scratch_backing_memory_byte_size") {
3920 Parser
.eatToEndOfStatement();
3924 SmallString
<40> ErrStr
;
3925 raw_svector_ostream
Err(ErrStr
);
3926 if (!parseAmdKernelCodeField(ID
, getParser(), Header
, Err
)) {
3927 return TokError(Err
.str());
3931 if (ID
== "enable_wavefront_size32") {
3932 if (Header
.code_properties
& AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
) {
3934 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3935 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
3936 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3938 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
3939 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3943 if (ID
== "wavefront_size") {
3944 if (Header
.wavefront_size
== 5) {
3946 return TokError("wavefront_size=5 is only allowed on GFX10+");
3947 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
3948 return TokError("wavefront_size=5 requires +WavefrontSize32");
3949 } else if (Header
.wavefront_size
== 6) {
3950 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
3951 return TokError("wavefront_size=6 requires +WavefrontSize64");
3955 if (ID
== "enable_wgp_mode") {
3956 if (G_00B848_WGP_MODE(Header
.compute_pgm_resource_registers
) && !isGFX10())
3957 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3960 if (ID
== "enable_mem_ordered") {
3961 if (G_00B848_MEM_ORDERED(Header
.compute_pgm_resource_registers
) && !isGFX10())
3962 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3965 if (ID
== "enable_fwd_progress") {
3966 if (G_00B848_FWD_PROGRESS(Header
.compute_pgm_resource_registers
) && !isGFX10())
3967 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3973 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3974 amd_kernel_code_t Header
;
3975 AMDGPU::initDefaultAMDKernelCodeT(Header
, &getSTI());
3978 // Lex EndOfStatement. This is in a while loop, because lexing a comment
3979 // will set the current token to EndOfStatement.
3980 while(getLexer().is(AsmToken::EndOfStatement
))
3983 if (getLexer().isNot(AsmToken::Identifier
))
3984 return TokError("expected value identifier or .end_amd_kernel_code_t");
3986 StringRef ID
= getLexer().getTok().getIdentifier();
3989 if (ID
== ".end_amd_kernel_code_t")
3992 if (ParseAMDKernelCodeTValue(ID
, Header
))
3996 getTargetStreamer().EmitAMDKernelCodeT(Header
);
4001 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4002 if (getLexer().isNot(AsmToken::Identifier
))
4003 return TokError("expected symbol name");
4005 StringRef KernelName
= Parser
.getTok().getString();
4007 getTargetStreamer().EmitAMDGPUSymbolType(KernelName
,
4008 ELF::STT_AMDGPU_HSA_KERNEL
);
4010 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4011 KernelScope
.initialize(getContext());
4015 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4016 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
) {
4017 return Error(getParser().getTok().getLoc(),
4018 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4022 auto ISAVersionStringFromASM
= getLexer().getTok().getStringContents();
4024 std::string ISAVersionStringFromSTI
;
4025 raw_string_ostream
ISAVersionStreamFromSTI(ISAVersionStringFromSTI
);
4026 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI
);
4028 if (ISAVersionStringFromASM
!= ISAVersionStreamFromSTI
.str()) {
4029 return Error(getParser().getTok().getLoc(),
4030 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4031 "arguments specified through the command line");
4034 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI
.str());
4040 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4041 const char *AssemblerDirectiveBegin
;
4042 const char *AssemblerDirectiveEnd
;
4043 std::tie(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
) =
4044 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4045 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin
,
4046 HSAMD::V3::AssemblerDirectiveEnd
)
4047 : std::make_tuple(HSAMD::AssemblerDirectiveBegin
,
4048 HSAMD::AssemblerDirectiveEnd
);
4050 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
) {
4051 return Error(getParser().getTok().getLoc(),
4052 (Twine(AssemblerDirectiveBegin
) + Twine(" directive is "
4053 "not available on non-amdhsa OSes")).str());
4056 std::string HSAMetadataString
;
4057 if (ParseToEndDirective(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
,
4061 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4062 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString
))
4063 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4065 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString
))
4066 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4072 /// Common code to parse out a block of text (typically YAML) between start and
4074 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin
,
4075 const char *AssemblerDirectiveEnd
,
4076 std::string
&CollectString
) {
4078 raw_string_ostream
CollectStream(CollectString
);
4080 getLexer().setSkipSpace(false);
4082 bool FoundEnd
= false;
4083 while (!getLexer().is(AsmToken::Eof
)) {
4084 while (getLexer().is(AsmToken::Space
)) {
4085 CollectStream
<< getLexer().getTok().getString();
4089 if (getLexer().is(AsmToken::Identifier
)) {
4090 StringRef ID
= getLexer().getTok().getIdentifier();
4091 if (ID
== AssemblerDirectiveEnd
) {
4098 CollectStream
<< Parser
.parseStringToEndOfStatement()
4099 << getContext().getAsmInfo()->getSeparatorString();
4101 Parser
.eatToEndOfStatement();
4104 getLexer().setSkipSpace(true);
4106 if (getLexer().is(AsmToken::Eof
) && !FoundEnd
) {
4107 return TokError(Twine("expected directive ") +
4108 Twine(AssemblerDirectiveEnd
) + Twine(" not found"));
4111 CollectStream
.flush();
4115 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4116 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4118 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin
,
4119 AMDGPU::PALMD::AssemblerDirectiveEnd
, String
))
4122 auto PALMetadata
= getTargetStreamer().getPALMetadata();
4123 if (!PALMetadata
->setFromString(String
))
4124 return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4128 /// Parse the assembler directive for old linear-format PAL metadata.
4129 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4130 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL
) {
4131 return Error(getParser().getTok().getLoc(),
4132 (Twine(PALMD::AssemblerDirective
) + Twine(" directive is "
4133 "not available on non-amdpal OSes")).str());
4136 auto PALMetadata
= getTargetStreamer().getPALMetadata();
4137 PALMetadata
->setLegacy();
4139 uint32_t Key
, Value
;
4140 if (ParseAsAbsoluteExpression(Key
)) {
4141 return TokError(Twine("invalid value in ") +
4142 Twine(PALMD::AssemblerDirective
));
4144 if (getLexer().isNot(AsmToken::Comma
)) {
4145 return TokError(Twine("expected an even number of values in ") +
4146 Twine(PALMD::AssemblerDirective
));
4149 if (ParseAsAbsoluteExpression(Value
)) {
4150 return TokError(Twine("invalid value in ") +
4151 Twine(PALMD::AssemblerDirective
));
4153 PALMetadata
->setRegister(Key
, Value
);
4154 if (getLexer().isNot(AsmToken::Comma
))
4161 /// ParseDirectiveAMDGPULDS
4162 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4163 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4164 if (getParser().checkForValidSection())
4168 SMLoc NameLoc
= getLexer().getLoc();
4169 if (getParser().parseIdentifier(Name
))
4170 return TokError("expected identifier in directive");
4172 MCSymbol
*Symbol
= getContext().getOrCreateSymbol(Name
);
4173 if (parseToken(AsmToken::Comma
, "expected ','"))
4176 unsigned LocalMemorySize
= AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4179 SMLoc SizeLoc
= getLexer().getLoc();
4180 if (getParser().parseAbsoluteExpression(Size
))
4183 return Error(SizeLoc
, "size must be non-negative");
4184 if (Size
> LocalMemorySize
)
4185 return Error(SizeLoc
, "size is too large");
4188 if (getLexer().is(AsmToken::Comma
)) {
4190 SMLoc AlignLoc
= getLexer().getLoc();
4191 if (getParser().parseAbsoluteExpression(Align
))
4193 if (Align
< 0 || !isPowerOf2_64(Align
))
4194 return Error(AlignLoc
, "alignment must be a power of two");
4196 // Alignment larger than the size of LDS is possible in theory, as long
4197 // as the linker manages to place to symbol at address 0, but we do want
4198 // to make sure the alignment fits nicely into a 32-bit integer.
4199 if (Align
>= 1u << 31)
4200 return Error(AlignLoc
, "alignment is too large");
4203 if (parseToken(AsmToken::EndOfStatement
,
4204 "unexpected token in '.amdgpu_lds' directive"))
4207 Symbol
->redefineIfPossible();
4208 if (!Symbol
->isUndefined())
4209 return Error(NameLoc
, "invalid symbol redefinition");
4211 getTargetStreamer().emitAMDGPULDS(Symbol
, Size
, Align
);
4215 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID
) {
4216 StringRef IDVal
= DirectiveID
.getString();
4218 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4219 if (IDVal
== ".amdgcn_target")
4220 return ParseDirectiveAMDGCNTarget();
4222 if (IDVal
== ".amdhsa_kernel")
4223 return ParseDirectiveAMDHSAKernel();
4225 // TODO: Restructure/combine with PAL metadata directive.
4226 if (IDVal
== AMDGPU::HSAMD::V3::AssemblerDirectiveBegin
)
4227 return ParseDirectiveHSAMetadata();
4229 if (IDVal
== ".hsa_code_object_version")
4230 return ParseDirectiveHSACodeObjectVersion();
4232 if (IDVal
== ".hsa_code_object_isa")
4233 return ParseDirectiveHSACodeObjectISA();
4235 if (IDVal
== ".amd_kernel_code_t")
4236 return ParseDirectiveAMDKernelCodeT();
4238 if (IDVal
== ".amdgpu_hsa_kernel")
4239 return ParseDirectiveAMDGPUHsaKernel();
4241 if (IDVal
== ".amd_amdgpu_isa")
4242 return ParseDirectiveISAVersion();
4244 if (IDVal
== AMDGPU::HSAMD::AssemblerDirectiveBegin
)
4245 return ParseDirectiveHSAMetadata();
4248 if (IDVal
== ".amdgpu_lds")
4249 return ParseDirectiveAMDGPULDS();
4251 if (IDVal
== PALMD::AssemblerDirectiveBegin
)
4252 return ParseDirectivePALMetadataBegin();
4254 if (IDVal
== PALMD::AssemblerDirective
)
4255 return ParseDirectivePALMetadata();
4260 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo
&MRI
,
4261 unsigned RegNo
) const {
4263 for (MCRegAliasIterator
R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15
, &MRI
, true);
4266 return isGFX9() || isGFX10();
4269 // GFX10 has 2 more SGPRs 104 and 105.
4270 for (MCRegAliasIterator
R(AMDGPU::SGPR104_SGPR105
, &MRI
, true);
4273 return hasSGPR104_SGPR105();
4277 case AMDGPU::SRC_SHARED_BASE
:
4278 case AMDGPU::SRC_SHARED_LIMIT
:
4279 case AMDGPU::SRC_PRIVATE_BASE
:
4280 case AMDGPU::SRC_PRIVATE_LIMIT
:
4281 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
4282 return !isCI() && !isSI() && !isVI();
4284 case AMDGPU::TBA_LO
:
4285 case AMDGPU::TBA_HI
:
4287 case AMDGPU::TMA_LO
:
4288 case AMDGPU::TMA_HI
:
4289 return !isGFX9() && !isGFX10();
4290 case AMDGPU::XNACK_MASK
:
4291 case AMDGPU::XNACK_MASK_LO
:
4292 case AMDGPU::XNACK_MASK_HI
:
4293 return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4294 case AMDGPU::SGPR_NULL
:
4303 if (isSI() || isGFX10()) {
4304 // No flat_scr on SI.
4305 // On GFX10 flat scratch is not a valid register operand and can only be
4306 // accessed with s_setreg/s_getreg.
4308 case AMDGPU::FLAT_SCR
:
4309 case AMDGPU::FLAT_SCR_LO
:
4310 case AMDGPU::FLAT_SCR_HI
:
4317 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4319 for (MCRegAliasIterator
R(AMDGPU::SGPR102_SGPR103
, &MRI
, true);
4322 return hasSGPR102_SGPR103();
4328 OperandMatchResultTy
4329 AMDGPUAsmParser::parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
4331 // Try to parse with a custom parser
4332 OperandMatchResultTy ResTy
= MatchOperandParserImpl(Operands
, Mnemonic
);
4334 // If we successfully parsed the operand or if there as an error parsing,
4337 // If we are parsing after we reach EndOfStatement then this means we
4338 // are appending default values to the Operands list. This is only done
4339 // by custom parser, so we shouldn't continue on to the generic parsing.
4340 if (ResTy
== MatchOperand_Success
|| ResTy
== MatchOperand_ParseFail
||
4341 getLexer().is(AsmToken::EndOfStatement
))
4344 if (Mode
== OperandMode_NSA
&& getLexer().is(AsmToken::LBrac
)) {
4345 unsigned Prefix
= Operands
.size();
4346 SMLoc LBraceLoc
= getTok().getLoc();
4347 Parser
.Lex(); // eat the '['
4350 ResTy
= parseReg(Operands
);
4351 if (ResTy
!= MatchOperand_Success
)
4354 if (getLexer().is(AsmToken::RBrac
))
4357 if (getLexer().isNot(AsmToken::Comma
))
4358 return MatchOperand_ParseFail
;
4362 if (Operands
.size() - Prefix
> 1) {
4363 Operands
.insert(Operands
.begin() + Prefix
,
4364 AMDGPUOperand::CreateToken(this, "[", LBraceLoc
));
4365 Operands
.push_back(AMDGPUOperand::CreateToken(this, "]",
4366 getTok().getLoc()));
4369 Parser
.Lex(); // eat the ']'
4370 return MatchOperand_Success
;
4373 return parseRegOrImm(Operands
);
4376 StringRef
AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name
) {
4377 // Clear any forced encodings from the previous instruction.
4378 setForcedEncodingSize(0);
4379 setForcedDPP(false);
4380 setForcedSDWA(false);
4382 if (Name
.endswith("_e64")) {
4383 setForcedEncodingSize(64);
4384 return Name
.substr(0, Name
.size() - 4);
4385 } else if (Name
.endswith("_e32")) {
4386 setForcedEncodingSize(32);
4387 return Name
.substr(0, Name
.size() - 4);
4388 } else if (Name
.endswith("_dpp")) {
4390 return Name
.substr(0, Name
.size() - 4);
4391 } else if (Name
.endswith("_sdwa")) {
4392 setForcedSDWA(true);
4393 return Name
.substr(0, Name
.size() - 5);
4398 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo
&Info
,
4400 SMLoc NameLoc
, OperandVector
&Operands
) {
4401 // Add the instruction mnemonic
4402 Name
= parseMnemonicSuffix(Name
);
4403 Operands
.push_back(AMDGPUOperand::CreateToken(this, Name
, NameLoc
));
4405 bool IsMIMG
= Name
.startswith("image_");
4407 while (!getLexer().is(AsmToken::EndOfStatement
)) {
4408 OperandMode Mode
= OperandMode_Default
;
4409 if (IsMIMG
&& isGFX10() && Operands
.size() == 2)
4410 Mode
= OperandMode_NSA
;
4411 OperandMatchResultTy Res
= parseOperand(Operands
, Name
, Mode
);
4413 // Eat the comma or space if there is one.
4414 if (getLexer().is(AsmToken::Comma
))
4418 case MatchOperand_Success
: break;
4419 case MatchOperand_ParseFail
:
4420 // FIXME: use real operand location rather than the current location.
4421 Error(getLexer().getLoc(), "failed parsing operand.");
4422 while (!getLexer().is(AsmToken::EndOfStatement
)) {
4426 case MatchOperand_NoMatch
:
4427 // FIXME: use real operand location rather than the current location.
4428 Error(getLexer().getLoc(), "not a valid operand.");
4429 while (!getLexer().is(AsmToken::EndOfStatement
)) {
4439 //===----------------------------------------------------------------------===//
4440 // Utility functions
4441 //===----------------------------------------------------------------------===//
4443 OperandMatchResultTy
4444 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
, int64_t &IntVal
) {
4446 if (!trySkipId(Prefix
, AsmToken::Colon
))
4447 return MatchOperand_NoMatch
;
4449 return parseExpr(IntVal
) ? MatchOperand_Success
: MatchOperand_ParseFail
;
4452 OperandMatchResultTy
4453 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
4454 AMDGPUOperand::ImmTy ImmTy
,
4455 bool (*ConvertResult
)(int64_t&)) {
4459 OperandMatchResultTy Res
= parseIntWithPrefix(Prefix
, Value
);
4460 if (Res
!= MatchOperand_Success
)
4463 if (ConvertResult
&& !ConvertResult(Value
)) {
4464 Error(S
, "invalid " + StringRef(Prefix
) + " value.");
4467 Operands
.push_back(AMDGPUOperand::CreateImm(this, Value
, S
, ImmTy
));
4468 return MatchOperand_Success
;
4471 OperandMatchResultTy
4472 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix
,
4473 OperandVector
&Operands
,
4474 AMDGPUOperand::ImmTy ImmTy
,
4475 bool (*ConvertResult
)(int64_t&)) {
4477 if (!trySkipId(Prefix
, AsmToken::Colon
))
4478 return MatchOperand_NoMatch
;
4480 if (!skipToken(AsmToken::LBrac
, "expected a left square bracket"))
4481 return MatchOperand_ParseFail
;
4484 const unsigned MaxSize
= 4;
4486 // FIXME: How to verify the number of elements matches the number of src
4488 for (int I
= 0; ; ++I
) {
4490 SMLoc Loc
= getLoc();
4492 return MatchOperand_ParseFail
;
4494 if (Op
!= 0 && Op
!= 1) {
4495 Error(Loc
, "invalid " + StringRef(Prefix
) + " value.");
4496 return MatchOperand_ParseFail
;
4501 if (trySkipToken(AsmToken::RBrac
))
4504 if (I
+ 1 == MaxSize
) {
4505 Error(getLoc(), "expected a closing square bracket");
4506 return MatchOperand_ParseFail
;
4509 if (!skipToken(AsmToken::Comma
, "expected a comma"))
4510 return MatchOperand_ParseFail
;
4513 Operands
.push_back(AMDGPUOperand::CreateImm(this, Val
, S
, ImmTy
));
4514 return MatchOperand_Success
;
4517 OperandMatchResultTy
4518 AMDGPUAsmParser::parseNamedBit(const char *Name
, OperandVector
&Operands
,
4519 AMDGPUOperand::ImmTy ImmTy
) {
4521 SMLoc S
= Parser
.getTok().getLoc();
4523 // We are at the end of the statement, and this is a default argument, so
4524 // use a default value.
4525 if (getLexer().isNot(AsmToken::EndOfStatement
)) {
4526 switch(getLexer().getKind()) {
4527 case AsmToken::Identifier
: {
4528 StringRef Tok
= Parser
.getTok().getString();
4530 if (Tok
== "r128" && isGFX9())
4531 Error(S
, "r128 modifier is not supported on this GPU");
4532 if (Tok
== "a16" && !isGFX9() && !isGFX10())
4533 Error(S
, "a16 modifier is not supported on this GPU");
4536 } else if (Tok
.startswith("no") && Tok
.endswith(Name
)) {
4540 return MatchOperand_NoMatch
;
4545 return MatchOperand_NoMatch
;
4549 if (!isGFX10() && ImmTy
== AMDGPUOperand::ImmTyDLC
)
4550 return MatchOperand_ParseFail
;
4552 Operands
.push_back(AMDGPUOperand::CreateImm(this, Bit
, S
, ImmTy
));
4553 return MatchOperand_Success
;
4556 static void addOptionalImmOperand(
4557 MCInst
& Inst
, const OperandVector
& Operands
,
4558 AMDGPUAsmParser::OptionalImmIndexMap
& OptionalIdx
,
4559 AMDGPUOperand::ImmTy ImmT
,
4560 int64_t Default
= 0) {
4561 auto i
= OptionalIdx
.find(ImmT
);
4562 if (i
!= OptionalIdx
.end()) {
4563 unsigned Idx
= i
->second
;
4564 ((AMDGPUOperand
&)*Operands
[Idx
]).addImmOperands(Inst
, 1);
4566 Inst
.addOperand(MCOperand::createImm(Default
));
4570 OperandMatchResultTy
4571 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix
, StringRef
&Value
) {
4572 if (getLexer().isNot(AsmToken::Identifier
)) {
4573 return MatchOperand_NoMatch
;
4575 StringRef Tok
= Parser
.getTok().getString();
4576 if (Tok
!= Prefix
) {
4577 return MatchOperand_NoMatch
;
4581 if (getLexer().isNot(AsmToken::Colon
)) {
4582 return MatchOperand_ParseFail
;
4586 if (getLexer().isNot(AsmToken::Identifier
)) {
4587 return MatchOperand_ParseFail
;
4590 Value
= Parser
.getTok().getString();
4591 return MatchOperand_Success
;
4594 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4595 // values to live in a joint format operand in the MCInst encoding.
4596 OperandMatchResultTy
4597 AMDGPUAsmParser::parseDfmtNfmt(OperandVector
&Operands
) {
4598 SMLoc S
= Parser
.getTok().getLoc();
4599 int64_t Dfmt
= 0, Nfmt
= 0;
4600 // dfmt and nfmt can appear in either order, and each is optional.
4601 bool GotDfmt
= false, GotNfmt
= false;
4602 while (!GotDfmt
|| !GotNfmt
) {
4604 auto Res
= parseIntWithPrefix("dfmt", Dfmt
);
4605 if (Res
!= MatchOperand_NoMatch
) {
4606 if (Res
!= MatchOperand_Success
)
4609 Error(Parser
.getTok().getLoc(), "out of range dfmt");
4610 return MatchOperand_ParseFail
;
4618 auto Res
= parseIntWithPrefix("nfmt", Nfmt
);
4619 if (Res
!= MatchOperand_NoMatch
) {
4620 if (Res
!= MatchOperand_Success
)
4623 Error(Parser
.getTok().getLoc(), "out of range nfmt");
4624 return MatchOperand_ParseFail
;
4633 if (!GotDfmt
&& !GotNfmt
)
4634 return MatchOperand_NoMatch
;
4635 auto Format
= Dfmt
| Nfmt
<< 4;
4637 AMDGPUOperand::CreateImm(this, Format
, S
, AMDGPUOperand::ImmTyFORMAT
));
4638 return MatchOperand_Success
;
4641 //===----------------------------------------------------------------------===//
4643 //===----------------------------------------------------------------------===//
4645 void AMDGPUAsmParser::cvtDSOffset01(MCInst
&Inst
,
4646 const OperandVector
&Operands
) {
4647 OptionalImmIndexMap OptionalIdx
;
4649 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4650 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4652 // Add the register arguments
4654 Op
.addRegOperands(Inst
, 1);
4658 // Handle optional arguments
4659 OptionalIdx
[Op
.getImmTy()] = i
;
4662 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset0
);
4663 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset1
);
4664 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
4666 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
4669 void AMDGPUAsmParser::cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
4670 bool IsGdsHardcoded
) {
4671 OptionalImmIndexMap OptionalIdx
;
4673 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4674 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4676 // Add the register arguments
4678 Op
.addRegOperands(Inst
, 1);
4682 if (Op
.isToken() && Op
.getToken() == "gds") {
4683 IsGdsHardcoded
= true;
4687 // Handle optional arguments
4688 OptionalIdx
[Op
.getImmTy()] = i
;
4691 AMDGPUOperand::ImmTy OffsetType
=
4692 (Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10
||
4693 Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7
||
4694 Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi
) ? AMDGPUOperand::ImmTySwizzle
:
4695 AMDGPUOperand::ImmTyOffset
;
4697 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, OffsetType
);
4699 if (!IsGdsHardcoded
) {
4700 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
4702 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
4705 void AMDGPUAsmParser::cvtExp(MCInst
&Inst
, const OperandVector
&Operands
) {
4706 OptionalImmIndexMap OptionalIdx
;
4708 unsigned OperandIdx
[4];
4709 unsigned EnMask
= 0;
4712 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
4713 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
4715 // Add the register arguments
4718 OperandIdx
[SrcIdx
] = Inst
.size();
4719 Op
.addRegOperands(Inst
, 1);
4726 OperandIdx
[SrcIdx
] = Inst
.size();
4727 Inst
.addOperand(MCOperand::createReg(AMDGPU::NoRegister
));
4732 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyExpTgt
) {
4733 Op
.addImmOperands(Inst
, 1);
4737 if (Op
.isToken() && Op
.getToken() == "done")
4740 // Handle optional arguments
4741 OptionalIdx
[Op
.getImmTy()] = i
;
4744 assert(SrcIdx
== 4);
4747 if (OptionalIdx
.find(AMDGPUOperand::ImmTyExpCompr
) != OptionalIdx
.end()) {
4749 Inst
.getOperand(OperandIdx
[1]) = Inst
.getOperand(OperandIdx
[2]);
4750 Inst
.getOperand(OperandIdx
[2]).setReg(AMDGPU::NoRegister
);
4751 Inst
.getOperand(OperandIdx
[3]).setReg(AMDGPU::NoRegister
);
4754 for (auto i
= 0; i
< SrcIdx
; ++i
) {
4755 if (Inst
.getOperand(OperandIdx
[i
]).getReg() != AMDGPU::NoRegister
) {
4756 EnMask
|= Compr
? (0x3 << i
* 2) : (0x1 << i
);
4760 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpVM
);
4761 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpCompr
);
4763 Inst
.addOperand(MCOperand::createImm(EnMask
));
4766 //===----------------------------------------------------------------------===//
4768 //===----------------------------------------------------------------------===//
4772 const AMDGPU::IsaVersion ISA
,
4776 unsigned (*encode
)(const IsaVersion
&Version
, unsigned, unsigned),
4777 unsigned (*decode
)(const IsaVersion
&Version
, unsigned))
4779 bool Failed
= false;
4781 IntVal
= encode(ISA
, IntVal
, CntVal
);
4782 if (CntVal
!= decode(ISA
, IntVal
)) {
4784 IntVal
= encode(ISA
, IntVal
, -1);
4792 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal
) {
4794 SMLoc CntLoc
= getLoc();
4795 StringRef CntName
= getTokenStr();
4797 if (!skipToken(AsmToken::Identifier
, "expected a counter name") ||
4798 !skipToken(AsmToken::LParen
, "expected a left parenthesis"))
4802 SMLoc ValLoc
= getLoc();
4803 if (!parseExpr(CntVal
))
4806 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
4809 bool Sat
= CntName
.endswith("_sat");
4811 if (CntName
== "vmcnt" || CntName
== "vmcnt_sat") {
4812 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeVmcnt
, decodeVmcnt
);
4813 } else if (CntName
== "expcnt" || CntName
== "expcnt_sat") {
4814 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeExpcnt
, decodeExpcnt
);
4815 } else if (CntName
== "lgkmcnt" || CntName
== "lgkmcnt_sat") {
4816 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeLgkmcnt
, decodeLgkmcnt
);
4818 Error(CntLoc
, "invalid counter name " + CntName
);
4823 Error(ValLoc
, "too large value for " + CntName
);
4827 if (!skipToken(AsmToken::RParen
, "expected a closing parenthesis"))
4830 if (trySkipToken(AsmToken::Amp
) || trySkipToken(AsmToken::Comma
)) {
4831 if (isToken(AsmToken::EndOfStatement
)) {
4832 Error(getLoc(), "expected a counter name");
4840 OperandMatchResultTy
4841 AMDGPUAsmParser::parseSWaitCntOps(OperandVector
&Operands
) {
4842 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
4843 int64_t Waitcnt
= getWaitcntBitMask(ISA
);
4846 // If parse failed, do not return error code
4847 // to avoid excessive error messages.
4848 if (isToken(AsmToken::Identifier
) && peekToken().is(AsmToken::LParen
)) {
4849 while (parseCnt(Waitcnt
) && !isToken(AsmToken::EndOfStatement
));
4854 Operands
.push_back(AMDGPUOperand::CreateImm(this, Waitcnt
, S
));
4855 return MatchOperand_Success
;
4859 AMDGPUOperand::isSWaitCnt() const {
4863 //===----------------------------------------------------------------------===//
4865 //===----------------------------------------------------------------------===//
4868 AMDGPUAsmParser::parseHwregBody(OperandInfoTy
&HwReg
,
4871 using namespace llvm::AMDGPU::Hwreg
;
4873 // The register may be specified by name or using a numeric code
4874 if (isToken(AsmToken::Identifier
) &&
4875 (HwReg
.Id
= getHwregId(getTokenStr())) >= 0) {
4876 HwReg
.IsSymbolic
= true;
4877 lex(); // skip message name
4878 } else if (!parseExpr(HwReg
.Id
)) {
4882 if (trySkipToken(AsmToken::RParen
))
4885 // parse optional params
4887 skipToken(AsmToken::Comma
, "expected a comma or a closing parenthesis") &&
4888 parseExpr(Offset
) &&
4889 skipToken(AsmToken::Comma
, "expected a comma") &&
4891 skipToken(AsmToken::RParen
, "expected a closing parenthesis");
4895 AMDGPUAsmParser::validateHwreg(const OperandInfoTy
&HwReg
,
4896 const int64_t Offset
,
4897 const int64_t Width
,
4900 using namespace llvm::AMDGPU::Hwreg
;
4902 if (HwReg
.IsSymbolic
&& !isValidHwreg(HwReg
.Id
, getSTI())) {
4903 Error(Loc
, "specified hardware register is not supported on this GPU");
4905 } else if (!isValidHwreg(HwReg
.Id
)) {
4906 Error(Loc
, "invalid code of hardware register: only 6-bit values are legal");
4908 } else if (!isValidHwregOffset(Offset
)) {
4909 Error(Loc
, "invalid bit offset: only 5-bit values are legal");
4911 } else if (!isValidHwregWidth(Width
)) {
4912 Error(Loc
, "invalid bitfield width: only values from 1 to 32 are legal");
4918 OperandMatchResultTy
4919 AMDGPUAsmParser::parseHwreg(OperandVector
&Operands
) {
4920 using namespace llvm::AMDGPU::Hwreg
;
4923 SMLoc Loc
= getLoc();
4925 // If parse failed, do not return error code
4926 // to avoid excessive error messages.
4927 if (trySkipId("hwreg", AsmToken::LParen
)) {
4928 OperandInfoTy
HwReg(ID_UNKNOWN_
);
4929 int64_t Offset
= OFFSET_DEFAULT_
;
4930 int64_t Width
= WIDTH_DEFAULT_
;
4931 if (parseHwregBody(HwReg
, Offset
, Width
) &&
4932 validateHwreg(HwReg
, Offset
, Width
, Loc
)) {
4933 ImmVal
= encodeHwreg(HwReg
.Id
, Offset
, Width
);
4935 } else if (parseExpr(ImmVal
)) {
4936 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
))
4937 Error(Loc
, "invalid immediate: only 16-bit values are legal");
4940 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTyHwreg
));
4941 return MatchOperand_Success
;
4944 bool AMDGPUOperand::isHwreg() const {
4945 return isImmTy(ImmTyHwreg
);
4948 //===----------------------------------------------------------------------===//
4950 //===----------------------------------------------------------------------===//
4953 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy
&Msg
,
4955 OperandInfoTy
&Stream
) {
4956 using namespace llvm::AMDGPU::SendMsg
;
4958 if (isToken(AsmToken::Identifier
) && (Msg
.Id
= getMsgId(getTokenStr())) >= 0) {
4959 Msg
.IsSymbolic
= true;
4960 lex(); // skip message name
4961 } else if (!parseExpr(Msg
.Id
)) {
4965 if (trySkipToken(AsmToken::Comma
)) {
4966 Op
.IsDefined
= true;
4967 if (isToken(AsmToken::Identifier
) &&
4968 (Op
.Id
= getMsgOpId(Msg
.Id
, getTokenStr())) >= 0) {
4969 lex(); // skip operation name
4970 } else if (!parseExpr(Op
.Id
)) {
4974 if (trySkipToken(AsmToken::Comma
)) {
4975 Stream
.IsDefined
= true;
4976 if (!parseExpr(Stream
.Id
))
4981 return skipToken(AsmToken::RParen
, "expected a closing parenthesis");
4985 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy
&Msg
,
4986 const OperandInfoTy
&Op
,
4987 const OperandInfoTy
&Stream
,
4989 using namespace llvm::AMDGPU::SendMsg
;
4991 // Validation strictness depends on whether message is specified
4992 // in a symbolc or in a numeric form. In the latter case
4993 // only encoding possibility is checked.
4994 bool Strict
= Msg
.IsSymbolic
;
4996 if (!isValidMsgId(Msg
.Id
, getSTI(), Strict
)) {
4997 Error(S
, "invalid message id");
4999 } else if (Strict
&& (msgRequiresOp(Msg
.Id
) != Op
.IsDefined
)) {
5000 Error(S
, Op
.IsDefined
?
5001 "message does not support operations" :
5002 "missing message operation");
5004 } else if (!isValidMsgOp(Msg
.Id
, Op
.Id
, Strict
)) {
5005 Error(S
, "invalid operation id");
5007 } else if (Strict
&& !msgSupportsStream(Msg
.Id
, Op
.Id
) && Stream
.IsDefined
) {
5008 Error(S
, "message operation does not support streams");
5010 } else if (!isValidMsgStream(Msg
.Id
, Op
.Id
, Stream
.Id
, Strict
)) {
5011 Error(S
, "invalid message stream id");
5017 OperandMatchResultTy
5018 AMDGPUAsmParser::parseSendMsgOp(OperandVector
&Operands
) {
5019 using namespace llvm::AMDGPU::SendMsg
;
5022 SMLoc Loc
= getLoc();
5024 // If parse failed, do not return error code
5025 // to avoid excessive error messages.
5026 if (trySkipId("sendmsg", AsmToken::LParen
)) {
5027 OperandInfoTy
Msg(ID_UNKNOWN_
);
5028 OperandInfoTy
Op(OP_NONE_
);
5029 OperandInfoTy
Stream(STREAM_ID_NONE_
);
5030 if (parseSendMsgBody(Msg
, Op
, Stream
) &&
5031 validateSendMsg(Msg
, Op
, Stream
, Loc
)) {
5032 ImmVal
= encodeMsg(Msg
.Id
, Op
.Id
, Stream
.Id
);
5034 } else if (parseExpr(ImmVal
)) {
5035 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
))
5036 Error(Loc
, "invalid immediate: only 16-bit values are legal");
5039 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTySendMsg
));
5040 return MatchOperand_Success
;
5043 bool AMDGPUOperand::isSendMsg() const {
5044 return isImmTy(ImmTySendMsg
);
5047 //===----------------------------------------------------------------------===//
5049 //===----------------------------------------------------------------------===//
5051 OperandMatchResultTy
AMDGPUAsmParser::parseInterpSlot(OperandVector
&Operands
) {
5052 if (getLexer().getKind() != AsmToken::Identifier
)
5053 return MatchOperand_NoMatch
;
5055 StringRef Str
= Parser
.getTok().getString();
5056 int Slot
= StringSwitch
<int>(Str
)
5062 SMLoc S
= Parser
.getTok().getLoc();
5064 return MatchOperand_ParseFail
;
5067 Operands
.push_back(AMDGPUOperand::CreateImm(this, Slot
, S
,
5068 AMDGPUOperand::ImmTyInterpSlot
));
5069 return MatchOperand_Success
;
5072 OperandMatchResultTy
AMDGPUAsmParser::parseInterpAttr(OperandVector
&Operands
) {
5073 if (getLexer().getKind() != AsmToken::Identifier
)
5074 return MatchOperand_NoMatch
;
5076 StringRef Str
= Parser
.getTok().getString();
5077 if (!Str
.startswith("attr"))
5078 return MatchOperand_NoMatch
;
5080 StringRef Chan
= Str
.take_back(2);
5081 int AttrChan
= StringSwitch
<int>(Chan
)
5088 return MatchOperand_ParseFail
;
5090 Str
= Str
.drop_back(2).drop_front(4);
5093 if (Str
.getAsInteger(10, Attr
))
5094 return MatchOperand_ParseFail
;
5096 SMLoc S
= Parser
.getTok().getLoc();
5099 Error(S
, "out of bounds attr");
5100 return MatchOperand_Success
;
5103 SMLoc SChan
= SMLoc::getFromPointer(Chan
.data());
5105 Operands
.push_back(AMDGPUOperand::CreateImm(this, Attr
, S
,
5106 AMDGPUOperand::ImmTyInterpAttr
));
5107 Operands
.push_back(AMDGPUOperand::CreateImm(this, AttrChan
, SChan
,
5108 AMDGPUOperand::ImmTyAttrChan
));
5109 return MatchOperand_Success
;
5112 //===----------------------------------------------------------------------===//
5114 //===----------------------------------------------------------------------===//
5116 void AMDGPUAsmParser::errorExpTgt() {
5117 Error(Parser
.getTok().getLoc(), "invalid exp target");
5120 OperandMatchResultTy
AMDGPUAsmParser::parseExpTgtImpl(StringRef Str
,
5122 if (Str
== "null") {
5124 return MatchOperand_Success
;
5127 if (Str
.startswith("mrt")) {
5128 Str
= Str
.drop_front(3);
5129 if (Str
== "z") { // == mrtz
5131 return MatchOperand_Success
;
5134 if (Str
.getAsInteger(10, Val
))
5135 return MatchOperand_ParseFail
;
5140 return MatchOperand_Success
;
5143 if (Str
.startswith("pos")) {
5144 Str
= Str
.drop_front(3);
5145 if (Str
.getAsInteger(10, Val
))
5146 return MatchOperand_ParseFail
;
5148 if (Val
> 4 || (Val
== 4 && !isGFX10()))
5152 return MatchOperand_Success
;
5155 if (isGFX10() && Str
== "prim") {
5157 return MatchOperand_Success
;
5160 if (Str
.startswith("param")) {
5161 Str
= Str
.drop_front(5);
5162 if (Str
.getAsInteger(10, Val
))
5163 return MatchOperand_ParseFail
;
5169 return MatchOperand_Success
;
5172 if (Str
.startswith("invalid_target_")) {
5173 Str
= Str
.drop_front(15);
5174 if (Str
.getAsInteger(10, Val
))
5175 return MatchOperand_ParseFail
;
5178 return MatchOperand_Success
;
5181 return MatchOperand_NoMatch
;
5184 OperandMatchResultTy
AMDGPUAsmParser::parseExpTgt(OperandVector
&Operands
) {
5186 StringRef Str
= Parser
.getTok().getString();
5188 auto Res
= parseExpTgtImpl(Str
, Val
);
5189 if (Res
!= MatchOperand_Success
)
5192 SMLoc S
= Parser
.getTok().getLoc();
5195 Operands
.push_back(AMDGPUOperand::CreateImm(this, Val
, S
,
5196 AMDGPUOperand::ImmTyExpTgt
));
5197 return MatchOperand_Success
;
5200 //===----------------------------------------------------------------------===//
5202 //===----------------------------------------------------------------------===//
5205 AMDGPUAsmParser::isId(const AsmToken
&Token
, const StringRef Id
) const {
5206 return Token
.is(AsmToken::Identifier
) && Token
.getString() == Id
;
5210 AMDGPUAsmParser::isId(const StringRef Id
) const {
5211 return isId(getToken(), Id
);
5215 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind
) const {
5216 return getTokenKind() == Kind
;
5220 AMDGPUAsmParser::trySkipId(const StringRef Id
) {
5229 AMDGPUAsmParser::trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
) {
5230 if (isId(Id
) && peekToken().is(Kind
)) {
5239 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind
) {
5240 if (isToken(Kind
)) {
5248 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind
,
5249 const StringRef ErrMsg
) {
5250 if (!trySkipToken(Kind
)) {
5251 Error(getLoc(), ErrMsg
);
5258 AMDGPUAsmParser::parseExpr(int64_t &Imm
) {
5259 return !getParser().parseAbsoluteExpression(Imm
);
5263 AMDGPUAsmParser::parseExpr(OperandVector
&Operands
) {
5267 if (Parser
.parseExpression(Expr
))
5271 if (Expr
->evaluateAsAbsolute(IntVal
)) {
5272 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
5274 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
5280 AMDGPUAsmParser::parseString(StringRef
&Val
, const StringRef ErrMsg
) {
5281 if (isToken(AsmToken::String
)) {
5282 Val
= getToken().getStringContents();
5286 Error(getLoc(), ErrMsg
);
5292 AMDGPUAsmParser::getToken() const {
5293 return Parser
.getTok();
5297 AMDGPUAsmParser::peekToken() {
5298 return getLexer().peekTok();
5302 AMDGPUAsmParser::peekTokens(MutableArrayRef
<AsmToken
> Tokens
) {
5303 auto TokCount
= getLexer().peekTokens(Tokens
);
5305 for (auto Idx
= TokCount
; Idx
< Tokens
.size(); ++Idx
)
5306 Tokens
[Idx
] = AsmToken(AsmToken::Error
, "");
5310 AMDGPUAsmParser::getTokenKind() const {
5311 return getLexer().getKind();
5315 AMDGPUAsmParser::getLoc() const {
5316 return getToken().getLoc();
5320 AMDGPUAsmParser::getTokenStr() const {
5321 return getToken().getString();
5325 AMDGPUAsmParser::lex() {
5329 //===----------------------------------------------------------------------===//
5331 //===----------------------------------------------------------------------===//
5335 encodeBitmaskPerm(const unsigned AndMask
,
5336 const unsigned OrMask
,
5337 const unsigned XorMask
) {
5338 using namespace llvm::AMDGPU::Swizzle
;
5340 return BITMASK_PERM_ENC
|
5341 (AndMask
<< BITMASK_AND_SHIFT
) |
5342 (OrMask
<< BITMASK_OR_SHIFT
) |
5343 (XorMask
<< BITMASK_XOR_SHIFT
);
5347 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
5348 const unsigned MinVal
,
5349 const unsigned MaxVal
,
5350 const StringRef ErrMsg
) {
5351 for (unsigned i
= 0; i
< OpNum
; ++i
) {
5352 if (!skipToken(AsmToken::Comma
, "expected a comma")){
5355 SMLoc ExprLoc
= Parser
.getTok().getLoc();
5356 if (!parseExpr(Op
[i
])) {
5359 if (Op
[i
] < MinVal
|| Op
[i
] > MaxVal
) {
5360 Error(ExprLoc
, ErrMsg
);
5369 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm
) {
5370 using namespace llvm::AMDGPU::Swizzle
;
5372 int64_t Lane
[LANE_NUM
];
5373 if (parseSwizzleOperands(LANE_NUM
, Lane
, 0, LANE_MAX
,
5374 "expected a 2-bit lane id")) {
5375 Imm
= QUAD_PERM_ENC
;
5376 for (unsigned I
= 0; I
< LANE_NUM
; ++I
) {
5377 Imm
|= Lane
[I
] << (LANE_SHIFT
* I
);
5385 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm
) {
5386 using namespace llvm::AMDGPU::Swizzle
;
5388 SMLoc S
= Parser
.getTok().getLoc();
5392 if (!parseSwizzleOperands(1, &GroupSize
,
5394 "group size must be in the interval [2,32]")) {
5397 if (!isPowerOf2_64(GroupSize
)) {
5398 Error(S
, "group size must be a power of two");
5401 if (parseSwizzleOperands(1, &LaneIdx
,
5403 "lane id must be in the interval [0,group size - 1]")) {
5404 Imm
= encodeBitmaskPerm(BITMASK_MAX
- GroupSize
+ 1, LaneIdx
, 0);
5411 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm
) {
5412 using namespace llvm::AMDGPU::Swizzle
;
5414 SMLoc S
= Parser
.getTok().getLoc();
5417 if (!parseSwizzleOperands(1, &GroupSize
,
5418 2, 32, "group size must be in the interval [2,32]")) {
5421 if (!isPowerOf2_64(GroupSize
)) {
5422 Error(S
, "group size must be a power of two");
5426 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
- 1);
5431 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm
) {
5432 using namespace llvm::AMDGPU::Swizzle
;
5434 SMLoc S
= Parser
.getTok().getLoc();
5437 if (!parseSwizzleOperands(1, &GroupSize
,
5438 1, 16, "group size must be in the interval [1,16]")) {
5441 if (!isPowerOf2_64(GroupSize
)) {
5442 Error(S
, "group size must be a power of two");
5446 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
);
5451 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm
) {
5452 using namespace llvm::AMDGPU::Swizzle
;
5454 if (!skipToken(AsmToken::Comma
, "expected a comma")) {
5459 SMLoc StrLoc
= Parser
.getTok().getLoc();
5460 if (!parseString(Ctl
)) {
5463 if (Ctl
.size() != BITMASK_WIDTH
) {
5464 Error(StrLoc
, "expected a 5-character mask");
5468 unsigned AndMask
= 0;
5469 unsigned OrMask
= 0;
5470 unsigned XorMask
= 0;
5472 for (size_t i
= 0; i
< Ctl
.size(); ++i
) {
5473 unsigned Mask
= 1 << (BITMASK_WIDTH
- 1 - i
);
5476 Error(StrLoc
, "invalid mask");
5493 Imm
= encodeBitmaskPerm(AndMask
, OrMask
, XorMask
);
5498 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm
) {
5500 SMLoc OffsetLoc
= Parser
.getTok().getLoc();
5502 if (!parseExpr(Imm
)) {
5505 if (!isUInt
<16>(Imm
)) {
5506 Error(OffsetLoc
, "expected a 16-bit offset");
5513 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm
) {
5514 using namespace llvm::AMDGPU::Swizzle
;
5516 if (skipToken(AsmToken::LParen
, "expected a left parentheses")) {
5518 SMLoc ModeLoc
= Parser
.getTok().getLoc();
5521 if (trySkipId(IdSymbolic
[ID_QUAD_PERM
])) {
5522 Ok
= parseSwizzleQuadPerm(Imm
);
5523 } else if (trySkipId(IdSymbolic
[ID_BITMASK_PERM
])) {
5524 Ok
= parseSwizzleBitmaskPerm(Imm
);
5525 } else if (trySkipId(IdSymbolic
[ID_BROADCAST
])) {
5526 Ok
= parseSwizzleBroadcast(Imm
);
5527 } else if (trySkipId(IdSymbolic
[ID_SWAP
])) {
5528 Ok
= parseSwizzleSwap(Imm
);
5529 } else if (trySkipId(IdSymbolic
[ID_REVERSE
])) {
5530 Ok
= parseSwizzleReverse(Imm
);
5532 Error(ModeLoc
, "expected a swizzle mode");
5535 return Ok
&& skipToken(AsmToken::RParen
, "expected a closing parentheses");
5541 OperandMatchResultTy
5542 AMDGPUAsmParser::parseSwizzleOp(OperandVector
&Operands
) {
5543 SMLoc S
= Parser
.getTok().getLoc();
5546 if (trySkipId("offset")) {
5549 if (skipToken(AsmToken::Colon
, "expected a colon")) {
5550 if (trySkipId("swizzle")) {
5551 Ok
= parseSwizzleMacro(Imm
);
5553 Ok
= parseSwizzleOffset(Imm
);
5557 Operands
.push_back(AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTySwizzle
));
5559 return Ok
? MatchOperand_Success
: MatchOperand_ParseFail
;
5561 // Swizzle "offset" operand is optional.
5562 // If it is omitted, try parsing other optional operands.
5563 return parseOptionalOpr(Operands
);
5568 AMDGPUOperand::isSwizzle() const {
5569 return isImmTy(ImmTySwizzle
);
5572 //===----------------------------------------------------------------------===//
5574 //===----------------------------------------------------------------------===//
5576 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5578 using namespace llvm::AMDGPU::VGPRIndexMode
;
5580 if (trySkipToken(AsmToken::RParen
)) {
5588 SMLoc S
= Parser
.getTok().getLoc();
5590 for (unsigned ModeId
= ID_MIN
; ModeId
<= ID_MAX
; ++ModeId
) {
5591 if (trySkipId(IdSymbolic
[ModeId
])) {
5598 Error(S
, (Imm
== 0)?
5599 "expected a VGPR index mode or a closing parenthesis" :
5600 "expected a VGPR index mode");
5605 Error(S
, "duplicate VGPR index mode");
5610 if (trySkipToken(AsmToken::RParen
))
5612 if (!skipToken(AsmToken::Comma
,
5613 "expected a comma or a closing parenthesis"))
5620 OperandMatchResultTy
5621 AMDGPUAsmParser::parseGPRIdxMode(OperandVector
&Operands
) {
5624 SMLoc S
= Parser
.getTok().getLoc();
5626 if (getLexer().getKind() == AsmToken::Identifier
&&
5627 Parser
.getTok().getString() == "gpr_idx" &&
5628 getLexer().peekTok().is(AsmToken::LParen
)) {
5633 // If parse failed, trigger an error but do not return error code
5634 // to avoid excessive error messages.
5635 Imm
= parseGPRIdxMacro();
5638 if (getParser().parseAbsoluteExpression(Imm
))
5639 return MatchOperand_NoMatch
;
5640 if (Imm
< 0 || !isUInt
<4>(Imm
)) {
5641 Error(S
, "invalid immediate: only 4-bit values are legal");
5646 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyGprIdxMode
));
5647 return MatchOperand_Success
;
5650 bool AMDGPUOperand::isGPRIdxMode() const {
5651 return isImmTy(ImmTyGprIdxMode
);
5654 //===----------------------------------------------------------------------===//
5655 // sopp branch targets
5656 //===----------------------------------------------------------------------===//
5658 OperandMatchResultTy
5659 AMDGPUAsmParser::parseSOppBrTarget(OperandVector
&Operands
) {
5661 // Make sure we are not parsing something
5662 // that looks like a label or an expression but is not.
5663 // This will improve error messages.
5664 if (isRegister() || isModifier())
5665 return MatchOperand_NoMatch
;
5667 if (parseExpr(Operands
)) {
5669 AMDGPUOperand
&Opr
= ((AMDGPUOperand
&)*Operands
[Operands
.size() - 1]);
5670 assert(Opr
.isImm() || Opr
.isExpr());
5671 SMLoc Loc
= Opr
.getStartLoc();
5673 // Currently we do not support arbitrary expressions as branch targets.
5674 // Only labels and absolute expressions are accepted.
5675 if (Opr
.isExpr() && !Opr
.isSymbolRefExpr()) {
5676 Error(Loc
, "expected an absolute expression or a label");
5677 } else if (Opr
.isImm() && !Opr
.isS16Imm()) {
5678 Error(Loc
, "expected a 16-bit signed jump offset");
5682 return MatchOperand_Success
; // avoid excessive error messages
5685 //===----------------------------------------------------------------------===//
5686 // Boolean holding registers
5687 //===----------------------------------------------------------------------===//
5689 OperandMatchResultTy
5690 AMDGPUAsmParser::parseBoolReg(OperandVector
&Operands
) {
5691 return parseReg(Operands
);
5694 //===----------------------------------------------------------------------===//
5696 //===----------------------------------------------------------------------===//
5698 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultDLC() const {
5699 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC
);
5702 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultGLC() const {
5703 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC
);
5706 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSLC() const {
5707 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC
);
5710 void AMDGPUAsmParser::cvtMubufImpl(MCInst
&Inst
,
5711 const OperandVector
&Operands
,
5713 bool IsAtomicReturn
,
5715 bool IsLdsOpcode
= IsLds
;
5716 bool HasLdsModifier
= false;
5717 OptionalImmIndexMap OptionalIdx
;
5718 assert(IsAtomicReturn
? IsAtomic
: true);
5719 unsigned FirstOperandIdx
= 1;
5721 for (unsigned i
= FirstOperandIdx
, e
= Operands
.size(); i
!= e
; ++i
) {
5722 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5724 // Add the register arguments
5726 Op
.addRegOperands(Inst
, 1);
5727 // Insert a tied src for atomic return dst.
5728 // This cannot be postponed as subsequent calls to
5729 // addImmOperands rely on correct number of MC operands.
5730 if (IsAtomicReturn
&& i
== FirstOperandIdx
)
5731 Op
.addRegOperands(Inst
, 1);
5735 // Handle the case where soffset is an immediate
5736 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
5737 Op
.addImmOperands(Inst
, 1);
5741 HasLdsModifier
|= Op
.isLDS();
5743 // Handle tokens like 'offen' which are sometimes hard-coded into the
5744 // asm string. There are no MCInst operands for these.
5750 // Handle optional arguments
5751 OptionalIdx
[Op
.getImmTy()] = i
;
5754 // This is a workaround for an llvm quirk which may result in an
5755 // incorrect instruction selection. Lds and non-lds versions of
5756 // MUBUF instructions are identical except that lds versions
5757 // have mandatory 'lds' modifier. However this modifier follows
5758 // optional modifiers and llvm asm matcher regards this 'lds'
5759 // modifier as an optional one. As a result, an lds version
5760 // of opcode may be selected even if it has no 'lds' modifier.
5761 if (IsLdsOpcode
&& !HasLdsModifier
) {
5762 int NoLdsOpcode
= AMDGPU::getMUBUFNoLdsInst(Inst
.getOpcode());
5763 if (NoLdsOpcode
!= -1) { // Got lds version - correct it.
5764 Inst
.setOpcode(NoLdsOpcode
);
5765 IsLdsOpcode
= false;
5769 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset
);
5770 if (!IsAtomic
) { // glc is hard-coded.
5771 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGLC
);
5773 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySLC
);
5775 if (!IsLdsOpcode
) { // tfe is not legal with lds opcodes
5776 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
5780 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDLC
);
5783 void AMDGPUAsmParser::cvtMtbuf(MCInst
&Inst
, const OperandVector
&Operands
) {
5784 OptionalImmIndexMap OptionalIdx
;
5786 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
5787 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5789 // Add the register arguments
5791 Op
.addRegOperands(Inst
, 1);
5795 // Handle the case where soffset is an immediate
5796 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
5797 Op
.addImmOperands(Inst
, 1);
5801 // Handle tokens like 'offen' which are sometimes hard-coded into the
5802 // asm string. There are no MCInst operands for these.
5808 // Handle optional arguments
5809 OptionalIdx
[Op
.getImmTy()] = i
;
5812 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
5813 AMDGPUOperand::ImmTyOffset
);
5814 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyFORMAT
);
5815 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGLC
);
5816 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySLC
);
5817 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
5820 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDLC
);
5823 //===----------------------------------------------------------------------===//
5825 //===----------------------------------------------------------------------===//
5827 void AMDGPUAsmParser::cvtMIMG(MCInst
&Inst
, const OperandVector
&Operands
,
5830 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
5831 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
5832 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
5836 // Add src, same as dst
5837 assert(Desc
.getNumDefs() == 1);
5838 ((AMDGPUOperand
&)*Operands
[I
- 1]).addRegOperands(Inst
, 1);
5841 OptionalImmIndexMap OptionalIdx
;
5843 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
5844 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
5846 // Add the register arguments
5848 Op
.addRegOperands(Inst
, 1);
5849 } else if (Op
.isImmModifier()) {
5850 OptionalIdx
[Op
.getImmTy()] = I
;
5851 } else if (!Op
.isToken()) {
5852 llvm_unreachable("unexpected operand type");
5856 bool IsGFX10
= isGFX10();
5858 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDMask
);
5860 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDim
, -1);
5861 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyUNorm
);
5863 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDLC
);
5864 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGLC
);
5865 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySLC
);
5866 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyR128A16
);
5867 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
5868 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyLWE
);
5870 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDA
);
5871 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyD16
);
5874 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst
&Inst
, const OperandVector
&Operands
) {
5875 cvtMIMG(Inst
, Operands
, true);
5878 //===----------------------------------------------------------------------===//
5880 //===----------------------------------------------------------------------===//
5882 bool AMDGPUOperand::isSMRDOffset8() const {
5883 return isImm() && isUInt
<8>(getImm());
5886 bool AMDGPUOperand::isSMRDOffset20() const {
5887 return isImm() && isUInt
<20>(getImm());
5890 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5891 // 32-bit literals are only supported on CI and we only want to use them
5892 // when the offset is > 8-bits.
5893 return isImm() && !isUInt
<8>(getImm()) && isUInt
<32>(getImm());
5896 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDOffset8() const {
5897 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5900 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDOffset20() const {
5901 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5904 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5905 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5908 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultFlatOffset() const {
5909 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
5912 //===----------------------------------------------------------------------===//
5914 //===----------------------------------------------------------------------===//
5916 static bool ConvertOmodMul(int64_t &Mul
) {
5917 if (Mul
!= 1 && Mul
!= 2 && Mul
!= 4)
5924 static bool ConvertOmodDiv(int64_t &Div
) {
5938 static bool ConvertBoundCtrl(int64_t &BoundCtrl
) {
5939 if (BoundCtrl
== 0) {
5944 if (BoundCtrl
== -1) {
5952 // Note: the order in this table matches the order of operands in AsmString.
5953 static const OptionalOperand AMDGPUOptionalOperandTable
[] = {
5954 {"offen", AMDGPUOperand::ImmTyOffen
, true, nullptr},
5955 {"idxen", AMDGPUOperand::ImmTyIdxen
, true, nullptr},
5956 {"addr64", AMDGPUOperand::ImmTyAddr64
, true, nullptr},
5957 {"offset0", AMDGPUOperand::ImmTyOffset0
, false, nullptr},
5958 {"offset1", AMDGPUOperand::ImmTyOffset1
, false, nullptr},
5959 {"gds", AMDGPUOperand::ImmTyGDS
, true, nullptr},
5960 {"lds", AMDGPUOperand::ImmTyLDS
, true, nullptr},
5961 {"offset", AMDGPUOperand::ImmTyOffset
, false, nullptr},
5962 {"inst_offset", AMDGPUOperand::ImmTyInstOffset
, false, nullptr},
5963 {"dlc", AMDGPUOperand::ImmTyDLC
, true, nullptr},
5964 {"format", AMDGPUOperand::ImmTyFORMAT
, false, nullptr},
5965 {"glc", AMDGPUOperand::ImmTyGLC
, true, nullptr},
5966 {"slc", AMDGPUOperand::ImmTySLC
, true, nullptr},
5967 {"tfe", AMDGPUOperand::ImmTyTFE
, true, nullptr},
5968 {"d16", AMDGPUOperand::ImmTyD16
, true, nullptr},
5969 {"high", AMDGPUOperand::ImmTyHigh
, true, nullptr},
5970 {"clamp", AMDGPUOperand::ImmTyClampSI
, true, nullptr},
5971 {"omod", AMDGPUOperand::ImmTyOModSI
, false, ConvertOmodMul
},
5972 {"unorm", AMDGPUOperand::ImmTyUNorm
, true, nullptr},
5973 {"da", AMDGPUOperand::ImmTyDA
, true, nullptr},
5974 {"r128", AMDGPUOperand::ImmTyR128A16
, true, nullptr},
5975 {"a16", AMDGPUOperand::ImmTyR128A16
, true, nullptr},
5976 {"lwe", AMDGPUOperand::ImmTyLWE
, true, nullptr},
5977 {"d16", AMDGPUOperand::ImmTyD16
, true, nullptr},
5978 {"dmask", AMDGPUOperand::ImmTyDMask
, false, nullptr},
5979 {"dim", AMDGPUOperand::ImmTyDim
, false, nullptr},
5980 {"row_mask", AMDGPUOperand::ImmTyDppRowMask
, false, nullptr},
5981 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask
, false, nullptr},
5982 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl
, false, ConvertBoundCtrl
},
5983 {"fi", AMDGPUOperand::ImmTyDppFi
, false, nullptr},
5984 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel
, false, nullptr},
5985 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel
, false, nullptr},
5986 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel
, false, nullptr},
5987 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused
, false, nullptr},
5988 {"compr", AMDGPUOperand::ImmTyExpCompr
, true, nullptr },
5989 {"vm", AMDGPUOperand::ImmTyExpVM
, true, nullptr},
5990 {"op_sel", AMDGPUOperand::ImmTyOpSel
, false, nullptr},
5991 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi
, false, nullptr},
5992 {"neg_lo", AMDGPUOperand::ImmTyNegLo
, false, nullptr},
5993 {"neg_hi", AMDGPUOperand::ImmTyNegHi
, false, nullptr},
5994 {"blgp", AMDGPUOperand::ImmTyBLGP
, false, nullptr},
5995 {"cbsz", AMDGPUOperand::ImmTyCBSZ
, false, nullptr},
5996 {"abid", AMDGPUOperand::ImmTyABID
, false, nullptr}
5999 OperandMatchResultTy
AMDGPUAsmParser::parseOptionalOperand(OperandVector
&Operands
) {
6000 unsigned size
= Operands
.size();
6003 OperandMatchResultTy res
= parseOptionalOpr(Operands
);
6005 // This is a hack to enable hardcoded mandatory operands which follow
6006 // optional operands.
6008 // Current design assumes that all operands after the first optional operand
6009 // are also optional. However implementation of some instructions violates
6010 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6012 // To alleviate this problem, we have to (implicitly) parse extra operands
6013 // to make sure autogenerated parser of custom operands never hit hardcoded
6014 // mandatory operands.
6016 if (size
== 1 || ((AMDGPUOperand
&)*Operands
[size
- 1]).isRegKind()) {
6018 // We have parsed the first optional operand.
6019 // Parse as many operands as necessary to skip all mandatory operands.
6021 for (unsigned i
= 0; i
< MAX_OPR_LOOKAHEAD
; ++i
) {
6022 if (res
!= MatchOperand_Success
||
6023 getLexer().is(AsmToken::EndOfStatement
)) break;
6024 if (getLexer().is(AsmToken::Comma
)) Parser
.Lex();
6025 res
= parseOptionalOpr(Operands
);
6032 OperandMatchResultTy
AMDGPUAsmParser::parseOptionalOpr(OperandVector
&Operands
) {
6033 OperandMatchResultTy res
;
6034 for (const OptionalOperand
&Op
: AMDGPUOptionalOperandTable
) {
6035 // try to parse any optional operand here
6037 res
= parseNamedBit(Op
.Name
, Operands
, Op
.Type
);
6038 } else if (Op
.Type
== AMDGPUOperand::ImmTyOModSI
) {
6039 res
= parseOModOperand(Operands
);
6040 } else if (Op
.Type
== AMDGPUOperand::ImmTySdwaDstSel
||
6041 Op
.Type
== AMDGPUOperand::ImmTySdwaSrc0Sel
||
6042 Op
.Type
== AMDGPUOperand::ImmTySdwaSrc1Sel
) {
6043 res
= parseSDWASel(Operands
, Op
.Name
, Op
.Type
);
6044 } else if (Op
.Type
== AMDGPUOperand::ImmTySdwaDstUnused
) {
6045 res
= parseSDWADstUnused(Operands
);
6046 } else if (Op
.Type
== AMDGPUOperand::ImmTyOpSel
||
6047 Op
.Type
== AMDGPUOperand::ImmTyOpSelHi
||
6048 Op
.Type
== AMDGPUOperand::ImmTyNegLo
||
6049 Op
.Type
== AMDGPUOperand::ImmTyNegHi
) {
6050 res
= parseOperandArrayWithPrefix(Op
.Name
, Operands
, Op
.Type
,
6052 } else if (Op
.Type
== AMDGPUOperand::ImmTyDim
) {
6053 res
= parseDim(Operands
);
6054 } else if (Op
.Type
== AMDGPUOperand::ImmTyFORMAT
&& !isGFX10()) {
6055 res
= parseDfmtNfmt(Operands
);
6057 res
= parseIntWithPrefix(Op
.Name
, Operands
, Op
.Type
, Op
.ConvertResult
);
6059 if (res
!= MatchOperand_NoMatch
) {
6063 return MatchOperand_NoMatch
;
6066 OperandMatchResultTy
AMDGPUAsmParser::parseOModOperand(OperandVector
&Operands
) {
6067 StringRef Name
= Parser
.getTok().getString();
6068 if (Name
== "mul") {
6069 return parseIntWithPrefix("mul", Operands
,
6070 AMDGPUOperand::ImmTyOModSI
, ConvertOmodMul
);
6073 if (Name
== "div") {
6074 return parseIntWithPrefix("div", Operands
,
6075 AMDGPUOperand::ImmTyOModSI
, ConvertOmodDiv
);
6078 return MatchOperand_NoMatch
;
6081 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
) {
6082 cvtVOP3P(Inst
, Operands
);
6084 int Opc
= Inst
.getOpcode();
6087 const int Ops
[] = { AMDGPU::OpName::src0
,
6088 AMDGPU::OpName::src1
,
6089 AMDGPU::OpName::src2
};
6091 SrcNum
< 3 && AMDGPU::getNamedOperandIdx(Opc
, Ops
[SrcNum
]) != -1;
6095 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
6096 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
6098 if ((OpSel
& (1 << SrcNum
)) != 0) {
6099 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
);
6100 uint32_t ModVal
= Inst
.getOperand(ModIdx
).getImm();
6101 Inst
.getOperand(ModIdx
).setImm(ModVal
| SISrcMods::DST_OP_SEL
);
6105 static bool isRegOrImmWithInputMods(const MCInstrDesc
&Desc
, unsigned OpNum
) {
6106 // 1. This operand is input modifiers
6107 return Desc
.OpInfo
[OpNum
].OperandType
== AMDGPU::OPERAND_INPUT_MODS
6108 // 2. This is not last operand
6109 && Desc
.NumOperands
> (OpNum
+ 1)
6110 // 3. Next operand is register class
6111 && Desc
.OpInfo
[OpNum
+ 1].RegClass
!= -1
6112 // 4. Next register is not tied to any other operand
6113 && Desc
.getOperandConstraint(OpNum
+ 1, MCOI::OperandConstraint::TIED_TO
) == -1;
6116 void AMDGPUAsmParser::cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
)
6118 OptionalImmIndexMap OptionalIdx
;
6119 unsigned Opc
= Inst
.getOpcode();
6122 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6123 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6124 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6127 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6128 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6129 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6130 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
6131 } else if (Op
.isInterpSlot() ||
6132 Op
.isInterpAttr() ||
6134 Inst
.addOperand(MCOperand::createImm(Op
.getImm()));
6135 } else if (Op
.isImmModifier()) {
6136 OptionalIdx
[Op
.getImmTy()] = I
;
6138 llvm_unreachable("unhandled operand type");
6142 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::high
) != -1) {
6143 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyHigh
);
6146 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
) != -1) {
6147 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
6150 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::omod
) != -1) {
6151 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
6155 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
6156 OptionalImmIndexMap
&OptionalIdx
) {
6157 unsigned Opc
= Inst
.getOpcode();
6160 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6161 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6162 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6165 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
) != -1) {
6166 // This instruction has src modifiers
6167 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6168 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6169 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6170 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
6171 } else if (Op
.isImmModifier()) {
6172 OptionalIdx
[Op
.getImmTy()] = I
;
6173 } else if (Op
.isRegOrImm()) {
6174 Op
.addRegOrImmOperands(Inst
, 1);
6176 llvm_unreachable("unhandled operand type");
6181 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6182 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6184 OptionalIdx
[Op
.getImmTy()] = I
;
6186 Op
.addRegOrImmOperands(Inst
, 1);
6191 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
) != -1) {
6192 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
6195 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::omod
) != -1) {
6196 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
6199 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6200 // it has src2 register operand that is tied to dst operand
6201 // we don't allow modifiers for this operand in assembler so src2_modifiers
6203 if (Opc
== AMDGPU::V_MAC_F32_e64_gfx6_gfx7
||
6204 Opc
== AMDGPU::V_MAC_F32_e64_gfx10
||
6205 Opc
== AMDGPU::V_MAC_F32_e64_vi
||
6206 Opc
== AMDGPU::V_MAC_F16_e64_vi
||
6207 Opc
== AMDGPU::V_FMAC_F32_e64_gfx10
||
6208 Opc
== AMDGPU::V_FMAC_F32_e64_vi
||
6209 Opc
== AMDGPU::V_FMAC_F16_e64_gfx10
) {
6210 auto it
= Inst
.begin();
6211 std::advance(it
, AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src2_modifiers
));
6212 it
= Inst
.insert(it
, MCOperand::createImm(0)); // no modifiers for src2
6214 Inst
.insert(it
, Inst
.getOperand(0)); // src2 = dst
6218 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
) {
6219 OptionalImmIndexMap OptionalIdx
;
6220 cvtVOP3(Inst
, Operands
, OptionalIdx
);
6223 void AMDGPUAsmParser::cvtVOP3P(MCInst
&Inst
,
6224 const OperandVector
&Operands
) {
6225 OptionalImmIndexMap OptIdx
;
6226 const int Opc
= Inst
.getOpcode();
6227 const MCInstrDesc
&Desc
= MII
.get(Opc
);
6229 const bool IsPacked
= (Desc
.TSFlags
& SIInstrFlags::IsPacked
) != 0;
6231 cvtVOP3(Inst
, Operands
, OptIdx
);
6233 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdst_in
) != -1) {
6235 Inst
.addOperand(Inst
.getOperand(0));
6238 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6239 // instruction, and then figure out where to actually put the modifiers
6241 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSel
);
6243 int OpSelHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel_hi
);
6244 if (OpSelHiIdx
!= -1) {
6245 int DefaultVal
= IsPacked
? -1 : 0;
6246 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSelHi
,
6250 int NegLoIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_lo
);
6251 if (NegLoIdx
!= -1) {
6253 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegLo
);
6254 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegHi
);
6257 const int Ops
[] = { AMDGPU::OpName::src0
,
6258 AMDGPU::OpName::src1
,
6259 AMDGPU::OpName::src2
};
6260 const int ModOps
[] = { AMDGPU::OpName::src0_modifiers
,
6261 AMDGPU::OpName::src1_modifiers
,
6262 AMDGPU::OpName::src2_modifiers
};
6264 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
6266 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
6267 unsigned OpSelHi
= 0;
6271 if (OpSelHiIdx
!= -1) {
6272 OpSelHi
= Inst
.getOperand(OpSelHiIdx
).getImm();
6275 if (NegLoIdx
!= -1) {
6276 int NegHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_hi
);
6277 NegLo
= Inst
.getOperand(NegLoIdx
).getImm();
6278 NegHi
= Inst
.getOperand(NegHiIdx
).getImm();
6281 for (int J
= 0; J
< 3; ++J
) {
6282 int OpIdx
= AMDGPU::getNamedOperandIdx(Opc
, Ops
[J
]);
6286 uint32_t ModVal
= 0;
6288 if ((OpSel
& (1 << J
)) != 0)
6289 ModVal
|= SISrcMods::OP_SEL_0
;
6291 if ((OpSelHi
& (1 << J
)) != 0)
6292 ModVal
|= SISrcMods::OP_SEL_1
;
6294 if ((NegLo
& (1 << J
)) != 0)
6295 ModVal
|= SISrcMods::NEG
;
6297 if ((NegHi
& (1 << J
)) != 0)
6298 ModVal
|= SISrcMods::NEG_HI
;
6300 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, ModOps
[J
]);
6302 Inst
.getOperand(ModIdx
).setImm(Inst
.getOperand(ModIdx
).getImm() | ModVal
);
6306 //===----------------------------------------------------------------------===//
6308 //===----------------------------------------------------------------------===//
6310 bool AMDGPUOperand::isDPP8() const {
6311 return isImmTy(ImmTyDPP8
);
6314 bool AMDGPUOperand::isDPPCtrl() const {
6315 using namespace AMDGPU::DPP
;
6317 bool result
= isImm() && getImmTy() == ImmTyDppCtrl
&& isUInt
<9>(getImm());
6319 int64_t Imm
= getImm();
6320 return (Imm
>= DppCtrl::QUAD_PERM_FIRST
&& Imm
<= DppCtrl::QUAD_PERM_LAST
) ||
6321 (Imm
>= DppCtrl::ROW_SHL_FIRST
&& Imm
<= DppCtrl::ROW_SHL_LAST
) ||
6322 (Imm
>= DppCtrl::ROW_SHR_FIRST
&& Imm
<= DppCtrl::ROW_SHR_LAST
) ||
6323 (Imm
>= DppCtrl::ROW_ROR_FIRST
&& Imm
<= DppCtrl::ROW_ROR_LAST
) ||
6324 (Imm
== DppCtrl::WAVE_SHL1
) ||
6325 (Imm
== DppCtrl::WAVE_ROL1
) ||
6326 (Imm
== DppCtrl::WAVE_SHR1
) ||
6327 (Imm
== DppCtrl::WAVE_ROR1
) ||
6328 (Imm
== DppCtrl::ROW_MIRROR
) ||
6329 (Imm
== DppCtrl::ROW_HALF_MIRROR
) ||
6330 (Imm
== DppCtrl::BCAST15
) ||
6331 (Imm
== DppCtrl::BCAST31
) ||
6332 (Imm
>= DppCtrl::ROW_SHARE_FIRST
&& Imm
<= DppCtrl::ROW_SHARE_LAST
) ||
6333 (Imm
>= DppCtrl::ROW_XMASK_FIRST
&& Imm
<= DppCtrl::ROW_XMASK_LAST
);
6338 //===----------------------------------------------------------------------===//
6340 //===----------------------------------------------------------------------===//
6342 bool AMDGPUOperand::isBLGP() const {
6343 return isImm() && getImmTy() == ImmTyBLGP
&& isUInt
<3>(getImm());
6346 bool AMDGPUOperand::isCBSZ() const {
6347 return isImm() && getImmTy() == ImmTyCBSZ
&& isUInt
<3>(getImm());
6350 bool AMDGPUOperand::isABID() const {
6351 return isImm() && getImmTy() == ImmTyABID
&& isUInt
<4>(getImm());
6354 bool AMDGPUOperand::isS16Imm() const {
6355 return isImm() && (isInt
<16>(getImm()) || isUInt
<16>(getImm()));
6358 bool AMDGPUOperand::isU16Imm() const {
6359 return isImm() && isUInt
<16>(getImm());
6362 OperandMatchResultTy
AMDGPUAsmParser::parseDim(OperandVector
&Operands
) {
6364 return MatchOperand_NoMatch
;
6366 SMLoc S
= Parser
.getTok().getLoc();
6368 if (getLexer().isNot(AsmToken::Identifier
))
6369 return MatchOperand_NoMatch
;
6370 if (getLexer().getTok().getString() != "dim")
6371 return MatchOperand_NoMatch
;
6374 if (getLexer().isNot(AsmToken::Colon
))
6375 return MatchOperand_ParseFail
;
6379 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6382 if (getLexer().is(AsmToken::Integer
)) {
6383 SMLoc Loc
= getLexer().getTok().getEndLoc();
6384 Token
= getLexer().getTok().getString();
6386 if (getLexer().getTok().getLoc() != Loc
)
6387 return MatchOperand_ParseFail
;
6389 if (getLexer().isNot(AsmToken::Identifier
))
6390 return MatchOperand_ParseFail
;
6391 Token
+= getLexer().getTok().getString();
6393 StringRef DimId
= Token
;
6394 if (DimId
.startswith("SQ_RSRC_IMG_"))
6395 DimId
= DimId
.substr(12);
6397 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByAsmSuffix(DimId
);
6399 return MatchOperand_ParseFail
;
6403 Operands
.push_back(AMDGPUOperand::CreateImm(this, DimInfo
->Encoding
, S
,
6404 AMDGPUOperand::ImmTyDim
));
6405 return MatchOperand_Success
;
6408 OperandMatchResultTy
AMDGPUAsmParser::parseDPP8(OperandVector
&Operands
) {
6409 SMLoc S
= Parser
.getTok().getLoc();
6412 if (getLexer().getKind() == AsmToken::Identifier
) {
6413 Prefix
= Parser
.getTok().getString();
6415 return MatchOperand_NoMatch
;
6418 if (Prefix
!= "dpp8")
6419 return parseDPPCtrl(Operands
);
6421 return MatchOperand_NoMatch
;
6423 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6428 if (getLexer().isNot(AsmToken::Colon
))
6429 return MatchOperand_ParseFail
;
6432 if (getLexer().isNot(AsmToken::LBrac
))
6433 return MatchOperand_ParseFail
;
6436 if (getParser().parseAbsoluteExpression(Sels
[0]))
6437 return MatchOperand_ParseFail
;
6438 if (0 > Sels
[0] || 7 < Sels
[0])
6439 return MatchOperand_ParseFail
;
6441 for (size_t i
= 1; i
< 8; ++i
) {
6442 if (getLexer().isNot(AsmToken::Comma
))
6443 return MatchOperand_ParseFail
;
6446 if (getParser().parseAbsoluteExpression(Sels
[i
]))
6447 return MatchOperand_ParseFail
;
6448 if (0 > Sels
[i
] || 7 < Sels
[i
])
6449 return MatchOperand_ParseFail
;
6452 if (getLexer().isNot(AsmToken::RBrac
))
6453 return MatchOperand_ParseFail
;
6457 for (size_t i
= 0; i
< 8; ++i
)
6458 DPP8
|= (Sels
[i
] << (i
* 3));
6460 Operands
.push_back(AMDGPUOperand::CreateImm(this, DPP8
, S
, AMDGPUOperand::ImmTyDPP8
));
6461 return MatchOperand_Success
;
6464 OperandMatchResultTy
6465 AMDGPUAsmParser::parseDPPCtrl(OperandVector
&Operands
) {
6466 using namespace AMDGPU::DPP
;
6468 SMLoc S
= Parser
.getTok().getLoc();
6472 if (getLexer().getKind() == AsmToken::Identifier
) {
6473 Prefix
= Parser
.getTok().getString();
6475 return MatchOperand_NoMatch
;
6478 if (Prefix
== "row_mirror") {
6479 Int
= DppCtrl::ROW_MIRROR
;
6481 } else if (Prefix
== "row_half_mirror") {
6482 Int
= DppCtrl::ROW_HALF_MIRROR
;
6485 // Check to prevent parseDPPCtrlOps from eating invalid tokens
6486 if (Prefix
!= "quad_perm"
6487 && Prefix
!= "row_shl"
6488 && Prefix
!= "row_shr"
6489 && Prefix
!= "row_ror"
6490 && Prefix
!= "wave_shl"
6491 && Prefix
!= "wave_rol"
6492 && Prefix
!= "wave_shr"
6493 && Prefix
!= "wave_ror"
6494 && Prefix
!= "row_bcast"
6495 && Prefix
!= "row_share"
6496 && Prefix
!= "row_xmask") {
6497 return MatchOperand_NoMatch
;
6500 if (!isGFX10() && (Prefix
== "row_share" || Prefix
== "row_xmask"))
6501 return MatchOperand_NoMatch
;
6503 if (!isVI() && !isGFX9() &&
6504 (Prefix
== "wave_shl" || Prefix
== "wave_shr" ||
6505 Prefix
== "wave_rol" || Prefix
== "wave_ror" ||
6506 Prefix
== "row_bcast"))
6507 return MatchOperand_NoMatch
;
6510 if (getLexer().isNot(AsmToken::Colon
))
6511 return MatchOperand_ParseFail
;
6513 if (Prefix
== "quad_perm") {
6514 // quad_perm:[%d,%d,%d,%d]
6516 if (getLexer().isNot(AsmToken::LBrac
))
6517 return MatchOperand_ParseFail
;
6520 if (getParser().parseAbsoluteExpression(Int
) || !(0 <= Int
&& Int
<=3))
6521 return MatchOperand_ParseFail
;
6523 for (int i
= 0; i
< 3; ++i
) {
6524 if (getLexer().isNot(AsmToken::Comma
))
6525 return MatchOperand_ParseFail
;
6529 if (getParser().parseAbsoluteExpression(Temp
) || !(0 <= Temp
&& Temp
<=3))
6530 return MatchOperand_ParseFail
;
6531 const int shift
= i
*2 + 2;
6532 Int
+= (Temp
<< shift
);
6535 if (getLexer().isNot(AsmToken::RBrac
))
6536 return MatchOperand_ParseFail
;
6541 if (getParser().parseAbsoluteExpression(Int
))
6542 return MatchOperand_ParseFail
;
6544 if (Prefix
== "row_shl" && 1 <= Int
&& Int
<= 15) {
6545 Int
|= DppCtrl::ROW_SHL0
;
6546 } else if (Prefix
== "row_shr" && 1 <= Int
&& Int
<= 15) {
6547 Int
|= DppCtrl::ROW_SHR0
;
6548 } else if (Prefix
== "row_ror" && 1 <= Int
&& Int
<= 15) {
6549 Int
|= DppCtrl::ROW_ROR0
;
6550 } else if (Prefix
== "wave_shl" && 1 == Int
) {
6551 Int
= DppCtrl::WAVE_SHL1
;
6552 } else if (Prefix
== "wave_rol" && 1 == Int
) {
6553 Int
= DppCtrl::WAVE_ROL1
;
6554 } else if (Prefix
== "wave_shr" && 1 == Int
) {
6555 Int
= DppCtrl::WAVE_SHR1
;
6556 } else if (Prefix
== "wave_ror" && 1 == Int
) {
6557 Int
= DppCtrl::WAVE_ROR1
;
6558 } else if (Prefix
== "row_bcast") {
6560 Int
= DppCtrl::BCAST15
;
6561 } else if (Int
== 31) {
6562 Int
= DppCtrl::BCAST31
;
6564 return MatchOperand_ParseFail
;
6566 } else if (Prefix
== "row_share" && 0 <= Int
&& Int
<= 15) {
6567 Int
|= DppCtrl::ROW_SHARE_FIRST
;
6568 } else if (Prefix
== "row_xmask" && 0 <= Int
&& Int
<= 15) {
6569 Int
|= DppCtrl::ROW_XMASK_FIRST
;
6571 return MatchOperand_ParseFail
;
6576 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, AMDGPUOperand::ImmTyDppCtrl
));
6577 return MatchOperand_Success
;
6580 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultRowMask() const {
6581 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask
);
6584 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6585 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm
);
6588 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBankMask() const {
6589 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask
);
6592 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBoundCtrl() const {
6593 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl
);
6596 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultFI() const {
6597 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi
);
6600 void AMDGPUAsmParser::cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
) {
6601 OptionalImmIndexMap OptionalIdx
;
6604 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6605 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6606 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6610 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6611 auto TiedTo
= Desc
.getOperandConstraint(Inst
.getNumOperands(),
6614 assert((unsigned)TiedTo
< Inst
.getNumOperands());
6615 // handle tied old or src2 for MAC instructions
6616 Inst
.addOperand(Inst
.getOperand(TiedTo
));
6618 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6619 // Add the register arguments
6620 if (Op
.isReg() && validateVccOperand(Op
.getReg())) {
6621 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6628 Op
.addImmOperands(Inst
, 1);
6629 } else if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6630 Op
.addRegWithFPInputModsOperands(Inst
, 2);
6631 } else if (Op
.isFI()) {
6633 } else if (Op
.isReg()) {
6634 Op
.addRegOperands(Inst
, 1);
6636 llvm_unreachable("Invalid operand type");
6639 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6640 Op
.addRegWithFPInputModsOperands(Inst
, 2);
6641 } else if (Op
.isDPPCtrl()) {
6642 Op
.addImmOperands(Inst
, 1);
6643 } else if (Op
.isImm()) {
6644 // Handle optional arguments
6645 OptionalIdx
[Op
.getImmTy()] = I
;
6647 llvm_unreachable("Invalid operand type");
6653 using namespace llvm::AMDGPU::DPP
;
6654 Inst
.addOperand(MCOperand::createImm(Fi
? DPP8_FI_1
: DPP8_FI_0
));
6656 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppRowMask
, 0xf);
6657 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBankMask
, 0xf);
6658 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBoundCtrl
);
6659 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::fi
) != -1) {
6660 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppFi
);
6665 //===----------------------------------------------------------------------===//
6667 //===----------------------------------------------------------------------===//
6669 OperandMatchResultTy
6670 AMDGPUAsmParser::parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
6671 AMDGPUOperand::ImmTy Type
) {
6672 using namespace llvm::AMDGPU::SDWA
;
6674 SMLoc S
= Parser
.getTok().getLoc();
6676 OperandMatchResultTy res
;
6678 res
= parseStringWithPrefix(Prefix
, Value
);
6679 if (res
!= MatchOperand_Success
) {
6684 Int
= StringSwitch
<int64_t>(Value
)
6685 .Case("BYTE_0", SdwaSel::BYTE_0
)
6686 .Case("BYTE_1", SdwaSel::BYTE_1
)
6687 .Case("BYTE_2", SdwaSel::BYTE_2
)
6688 .Case("BYTE_3", SdwaSel::BYTE_3
)
6689 .Case("WORD_0", SdwaSel::WORD_0
)
6690 .Case("WORD_1", SdwaSel::WORD_1
)
6691 .Case("DWORD", SdwaSel::DWORD
)
6692 .Default(0xffffffff);
6693 Parser
.Lex(); // eat last token
6695 if (Int
== 0xffffffff) {
6696 return MatchOperand_ParseFail
;
6699 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, Type
));
6700 return MatchOperand_Success
;
6703 OperandMatchResultTy
6704 AMDGPUAsmParser::parseSDWADstUnused(OperandVector
&Operands
) {
6705 using namespace llvm::AMDGPU::SDWA
;
6707 SMLoc S
= Parser
.getTok().getLoc();
6709 OperandMatchResultTy res
;
6711 res
= parseStringWithPrefix("dst_unused", Value
);
6712 if (res
!= MatchOperand_Success
) {
6717 Int
= StringSwitch
<int64_t>(Value
)
6718 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD
)
6719 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT
)
6720 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE
)
6721 .Default(0xffffffff);
6722 Parser
.Lex(); // eat last token
6724 if (Int
== 0xffffffff) {
6725 return MatchOperand_ParseFail
;
6728 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, AMDGPUOperand::ImmTySdwaDstUnused
));
6729 return MatchOperand_Success
;
6732 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
) {
6733 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP1
);
6736 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
) {
6737 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
);
6740 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
) {
6741 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, true);
6744 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
) {
6745 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOPC
, isVI());
6748 void AMDGPUAsmParser::cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
6749 uint64_t BasicInstType
, bool skipVcc
) {
6750 using namespace llvm::AMDGPU::SDWA
;
6752 OptionalImmIndexMap OptionalIdx
;
6753 bool skippedVcc
= false;
6756 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
6757 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
6758 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
6761 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
6762 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
6763 if (skipVcc
&& !skippedVcc
&& Op
.isReg() &&
6764 (Op
.getReg() == AMDGPU::VCC
|| Op
.getReg() == AMDGPU::VCC_LO
)) {
6765 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6766 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6767 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6768 // Skip VCC only if we didn't skip it on previous iteration.
6769 if (BasicInstType
== SIInstrFlags::VOP2
&&
6770 (Inst
.getNumOperands() == 1 || Inst
.getNumOperands() == 5)) {
6773 } else if (BasicInstType
== SIInstrFlags::VOPC
&&
6774 Inst
.getNumOperands() == 0) {
6779 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
6780 Op
.addRegOrImmWithInputModsOperands(Inst
, 2);
6781 } else if (Op
.isImm()) {
6782 // Handle optional arguments
6783 OptionalIdx
[Op
.getImmTy()] = I
;
6785 llvm_unreachable("Invalid operand type");
6790 if (Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10
&&
6791 Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9
&&
6792 Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_vi
) {
6793 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6794 switch (BasicInstType
) {
6795 case SIInstrFlags::VOP1
:
6796 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
6797 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::omod
) != -1) {
6798 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
6800 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstSel
, SdwaSel::DWORD
);
6801 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstUnused
, DstUnused::UNUSED_PRESERVE
);
6802 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
6805 case SIInstrFlags::VOP2
:
6806 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
6807 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::omod
) != -1) {
6808 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
6810 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstSel
, SdwaSel::DWORD
);
6811 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstUnused
, DstUnused::UNUSED_PRESERVE
);
6812 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
6813 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc1Sel
, SdwaSel::DWORD
);
6816 case SIInstrFlags::VOPC
:
6817 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::clamp
) != -1)
6818 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
6819 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
6820 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc1Sel
, SdwaSel::DWORD
);
6824 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6828 // special case v_mac_{f16, f32}:
6829 // it has src2 register operand that is tied to dst operand
6830 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
6831 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
6832 auto it
= Inst
.begin();
6834 it
, AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::src2
));
6835 Inst
.insert(it
, Inst
.getOperand(0)); // src2 = dst
6839 //===----------------------------------------------------------------------===//
6841 //===----------------------------------------------------------------------===//
6843 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBLGP() const {
6844 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP
);
6847 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultCBSZ() const {
6848 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ
);
6851 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultABID() const {
6852 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID
);
6855 /// Force static initialization.
6856 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6857 RegisterMCAsmParser
<AMDGPUAsmParser
> A(getTheAMDGPUTarget());
6858 RegisterMCAsmParser
<AMDGPUAsmParser
> B(getTheGCNTarget());
6861 #define GET_REGISTER_MATCHER
6862 #define GET_MATCHER_IMPLEMENTATION
6863 #define GET_MNEMONIC_SPELL_CHECKER
6864 #include "AMDGPUGenAsmMatcher.inc"
6866 // This fuction should be defined after auto-generated include so that we have
6867 // MatchClassKind enum defined
6868 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand
&Op
,
6870 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6871 // But MatchInstructionImpl() expects to meet token and fails to validate
6872 // operand. This method checks if we are given immediate operand but expect to
6873 // get corresponding token.
6874 AMDGPUOperand
&Operand
= (AMDGPUOperand
&)Op
;
6877 return Operand
.isAddr64() ? Match_Success
: Match_InvalidOperand
;
6879 return Operand
.isGDS() ? Match_Success
: Match_InvalidOperand
;
6881 return Operand
.isLDS() ? Match_Success
: Match_InvalidOperand
;
6883 return Operand
.isGLC() ? Match_Success
: Match_InvalidOperand
;
6885 return Operand
.isIdxen() ? Match_Success
: Match_InvalidOperand
;
6887 return Operand
.isOffen() ? Match_Success
: Match_InvalidOperand
;
6889 // When operands have expression values, they will return true for isToken,
6890 // because it is not possible to distinguish between a token and an
6891 // expression at parse time. MatchInstructionImpl() will always try to
6892 // match an operand as a token, when isToken returns true, and when the
6893 // name of the expression is not a valid token, the match will fail,
6894 // so we need to handle it here.
6895 return Operand
.isSSrcB32() ? Match_Success
: Match_InvalidOperand
;
6897 return Operand
.isSSrcF32() ? Match_Success
: Match_InvalidOperand
;
6898 case MCK_SoppBrTarget
:
6899 return Operand
.isSoppBrTarget() ? Match_Success
: Match_InvalidOperand
;
6900 case MCK_VReg32OrOff
:
6901 return Operand
.isVReg32OrOff() ? Match_Success
: Match_InvalidOperand
;
6902 case MCK_InterpSlot
:
6903 return Operand
.isInterpSlot() ? Match_Success
: Match_InvalidOperand
;
6905 return Operand
.isInterpAttr() ? Match_Success
: Match_InvalidOperand
;
6907 return Operand
.isAttrChan() ? Match_Success
: Match_InvalidOperand
;
6909 return Match_InvalidOperand
;
6913 //===----------------------------------------------------------------------===//
6915 //===----------------------------------------------------------------------===//
6917 OperandMatchResultTy
AMDGPUAsmParser::parseEndpgmOp(OperandVector
&Operands
) {
6918 SMLoc S
= Parser
.getTok().getLoc();
6921 if (!parseExpr(Imm
)) {
6922 // The operand is optional, if not present default to 0
6926 if (!isUInt
<16>(Imm
)) {
6927 Error(S
, "expected a 16-bit value");
6928 return MatchOperand_ParseFail
;
6932 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyEndpgm
));
6933 return MatchOperand_Success
;
6936 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm
); }