1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
39 using namespace llvm::AMDGPU
;
40 using namespace llvm::amdhsa
;
44 class AMDGPUAsmParser
;
46 enum RegisterKind
{ IS_UNKNOWN
, IS_VGPR
, IS_SGPR
, IS_AGPR
, IS_TTMP
, IS_SPECIAL
};
48 //===----------------------------------------------------------------------===//
50 //===----------------------------------------------------------------------===//
52 class AMDGPUOperand
: public MCParsedAsmOperand
{
60 SMLoc StartLoc
, EndLoc
;
61 const AMDGPUAsmParser
*AsmParser
;
64 AMDGPUOperand(KindTy Kind_
, const AMDGPUAsmParser
*AsmParser_
)
65 : MCParsedAsmOperand(), Kind(Kind_
), AsmParser(AsmParser_
) {}
67 using Ptr
= std::unique_ptr
<AMDGPUOperand
>;
74 bool hasFPModifiers() const { return Abs
|| Neg
; }
75 bool hasIntModifiers() const { return Sext
; }
76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
78 int64_t getFPModifiersOperand() const {
80 Operand
|= Abs
? SISrcMods::ABS
: 0u;
81 Operand
|= Neg
? SISrcMods::NEG
: 0u;
85 int64_t getIntModifiersOperand() const {
87 Operand
|= Sext
? SISrcMods::SEXT
: 0u;
91 int64_t getModifiersOperand() const {
92 assert(!(hasFPModifiers() && hasIntModifiers())
93 && "fp and int modifiers should not be used simultaneously");
94 if (hasFPModifiers()) {
95 return getFPModifiersOperand();
96 } else if (hasIntModifiers()) {
97 return getIntModifiersOperand();
103 friend raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
);
179 mutable ImmKindTy Kind
;
196 bool isToken() const override
{
200 // When parsing operands, we can't always tell if something was meant to be
201 // a token, like 'gds', or an expression that references a global variable.
202 // In this case, we assume the string is an expression, and if we need to
203 // interpret is a token, then we treat the symbol name as the token.
204 return isSymbolRefExpr();
207 bool isSymbolRefExpr() const {
208 return isExpr() && Expr
&& isa
<MCSymbolRefExpr
>(Expr
);
211 bool isImm() const override
{
212 return Kind
== Immediate
;
215 void setImmKindNone() const {
217 Imm
.Kind
= ImmKindTyNone
;
220 void setImmKindLiteral() const {
222 Imm
.Kind
= ImmKindTyLiteral
;
225 void setImmKindConst() const {
227 Imm
.Kind
= ImmKindTyConst
;
230 bool IsImmKindLiteral() const {
231 return isImm() && Imm
.Kind
== ImmKindTyLiteral
;
234 bool isImmKindConst() const {
235 return isImm() && Imm
.Kind
== ImmKindTyConst
;
238 bool isInlinableImm(MVT type
) const;
239 bool isLiteralImm(MVT type
) const;
241 bool isRegKind() const {
242 return Kind
== Register
;
245 bool isReg() const override
{
246 return isRegKind() && !hasModifiers();
249 bool isRegOrImmWithInputMods(unsigned RCID
, MVT type
) const {
250 return isRegClass(RCID
) || isInlinableImm(type
) || isLiteralImm(type
);
253 bool isRegOrImmWithInt16InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
261 bool isRegOrImmWithInt64InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
265 bool isRegOrImmWithFP16InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
269 bool isRegOrImmWithFP32InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
273 bool isRegOrImmWithFP64InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
277 bool isVReg() const {
278 return isRegClass(AMDGPU::VGPR_32RegClassID
) ||
279 isRegClass(AMDGPU::VReg_64RegClassID
) ||
280 isRegClass(AMDGPU::VReg_96RegClassID
) ||
281 isRegClass(AMDGPU::VReg_128RegClassID
) ||
282 isRegClass(AMDGPU::VReg_160RegClassID
) ||
283 isRegClass(AMDGPU::VReg_192RegClassID
) ||
284 isRegClass(AMDGPU::VReg_256RegClassID
) ||
285 isRegClass(AMDGPU::VReg_512RegClassID
) ||
286 isRegClass(AMDGPU::VReg_1024RegClassID
);
289 bool isVReg32() const {
290 return isRegClass(AMDGPU::VGPR_32RegClassID
);
293 bool isVReg32OrOff() const {
294 return isOff() || isVReg32();
297 bool isNull() const {
298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL
;
301 bool isVRegWithInputMods() const;
303 bool isSDWAOperand(MVT type
) const;
304 bool isSDWAFP16Operand() const;
305 bool isSDWAFP32Operand() const;
306 bool isSDWAInt16Operand() const;
307 bool isSDWAInt32Operand() const;
309 bool isImmTy(ImmTy ImmT
) const {
310 return isImm() && Imm
.Type
== ImmT
;
313 bool isImmModifier() const {
314 return isImm() && Imm
.Type
!= ImmTyNone
;
317 bool isClampSI() const { return isImmTy(ImmTyClampSI
); }
318 bool isOModSI() const { return isImmTy(ImmTyOModSI
); }
319 bool isDMask() const { return isImmTy(ImmTyDMask
); }
320 bool isDim() const { return isImmTy(ImmTyDim
); }
321 bool isUNorm() const { return isImmTy(ImmTyUNorm
); }
322 bool isDA() const { return isImmTy(ImmTyDA
); }
323 bool isR128A16() const { return isImmTy(ImmTyR128A16
); }
324 bool isGFX10A16() const { return isImmTy(ImmTyA16
); }
325 bool isLWE() const { return isImmTy(ImmTyLWE
); }
326 bool isOff() const { return isImmTy(ImmTyOff
); }
327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt
); }
328 bool isExpVM() const { return isImmTy(ImmTyExpVM
); }
329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr
); }
330 bool isOffen() const { return isImmTy(ImmTyOffen
); }
331 bool isIdxen() const { return isImmTy(ImmTyIdxen
); }
332 bool isAddr64() const { return isImmTy(ImmTyAddr64
); }
333 bool isOffset() const { return isImmTy(ImmTyOffset
) && isUInt
<16>(getImm()); }
334 bool isOffset0() const { return isImmTy(ImmTyOffset0
) && isUInt
<8>(getImm()); }
335 bool isOffset1() const { return isImmTy(ImmTyOffset1
) && isUInt
<8>(getImm()); }
337 bool isFlatOffset() const { return isImmTy(ImmTyOffset
) || isImmTy(ImmTyInstOffset
); }
338 bool isGDS() const { return isImmTy(ImmTyGDS
); }
339 bool isLDS() const { return isImmTy(ImmTyLDS
); }
340 bool isCPol() const { return isImmTy(ImmTyCPol
); }
341 bool isSWZ() const { return isImmTy(ImmTySWZ
); }
342 bool isTFE() const { return isImmTy(ImmTyTFE
); }
343 bool isD16() const { return isImmTy(ImmTyD16
); }
344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT
) && isUInt
<7>(getImm()); }
345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask
); }
346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask
); }
347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl
); }
348 bool isFI() const { return isImmTy(ImmTyDppFi
); }
349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel
); }
350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel
); }
351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel
); }
352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused
); }
353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot
); }
354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr
); }
355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan
); }
356 bool isOpSel() const { return isImmTy(ImmTyOpSel
); }
357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi
); }
358 bool isNegLo() const { return isImmTy(ImmTyNegLo
); }
359 bool isNegHi() const { return isImmTy(ImmTyNegHi
); }
360 bool isHigh() const { return isImmTy(ImmTyHigh
); }
363 return isClampSI() || isOModSI();
366 bool isRegOrImm() const {
367 return isReg() || isImm();
370 bool isRegClass(unsigned RCID
) const;
372 bool isInlineValue() const;
374 bool isRegOrInlineNoMods(unsigned RCID
, MVT type
) const {
375 return (isRegClass(RCID
) || isInlinableImm(type
)) && !hasModifiers();
378 bool isSCSrcB16() const {
379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i16
);
382 bool isSCSrcV2B16() const {
386 bool isSCSrcB32() const {
387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::i32
);
390 bool isSCSrcB64() const {
391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::i64
);
394 bool isBoolReg() const;
396 bool isSCSrcF16() const {
397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f16
);
400 bool isSCSrcV2F16() const {
404 bool isSCSrcF32() const {
405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID
, MVT::f32
);
408 bool isSCSrcF64() const {
409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID
, MVT::f64
);
412 bool isSSrcB32() const {
413 return isSCSrcB32() || isLiteralImm(MVT::i32
) || isExpr();
416 bool isSSrcB16() const {
417 return isSCSrcB16() || isLiteralImm(MVT::i16
);
420 bool isSSrcV2B16() const {
421 llvm_unreachable("cannot happen");
425 bool isSSrcB64() const {
426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
428 return isSCSrcB64() || isLiteralImm(MVT::i64
);
431 bool isSSrcF32() const {
432 return isSCSrcB32() || isLiteralImm(MVT::f32
) || isExpr();
435 bool isSSrcF64() const {
436 return isSCSrcB64() || isLiteralImm(MVT::f64
);
439 bool isSSrcF16() const {
440 return isSCSrcB16() || isLiteralImm(MVT::f16
);
443 bool isSSrcV2F16() const {
444 llvm_unreachable("cannot happen");
448 bool isSSrcV2FP32() const {
449 llvm_unreachable("cannot happen");
453 bool isSCSrcV2FP32() const {
454 llvm_unreachable("cannot happen");
458 bool isSSrcV2INT32() const {
459 llvm_unreachable("cannot happen");
463 bool isSCSrcV2INT32() const {
464 llvm_unreachable("cannot happen");
468 bool isSSrcOrLdsB32() const {
469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID
, MVT::i32
) ||
470 isLiteralImm(MVT::i32
) || isExpr();
473 bool isVCSrcB32() const {
474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i32
);
477 bool isVCSrcB64() const {
478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::i64
);
481 bool isVCSrcB16() const {
482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::i16
);
485 bool isVCSrcV2B16() const {
489 bool isVCSrcF32() const {
490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f32
);
493 bool isVCSrcF64() const {
494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID
, MVT::f64
);
497 bool isVCSrcF16() const {
498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID
, MVT::f16
);
501 bool isVCSrcV2F16() const {
505 bool isVSrcB32() const {
506 return isVCSrcF32() || isLiteralImm(MVT::i32
) || isExpr();
509 bool isVSrcB64() const {
510 return isVCSrcF64() || isLiteralImm(MVT::i64
);
513 bool isVSrcB16() const {
514 return isVCSrcB16() || isLiteralImm(MVT::i16
);
517 bool isVSrcV2B16() const {
518 return isVSrcB16() || isLiteralImm(MVT::v2i16
);
521 bool isVCSrcV2FP32() const {
525 bool isVSrcV2FP32() const {
526 return isVSrcF64() || isLiteralImm(MVT::v2f32
);
529 bool isVCSrcV2INT32() const {
533 bool isVSrcV2INT32() const {
534 return isVSrcB64() || isLiteralImm(MVT::v2i32
);
537 bool isVSrcF32() const {
538 return isVCSrcF32() || isLiteralImm(MVT::f32
) || isExpr();
541 bool isVSrcF64() const {
542 return isVCSrcF64() || isLiteralImm(MVT::f64
);
545 bool isVSrcF16() const {
546 return isVCSrcF16() || isLiteralImm(MVT::f16
);
549 bool isVSrcV2F16() const {
550 return isVSrcF16() || isLiteralImm(MVT::v2f16
);
553 bool isVISrcB32() const {
554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i32
);
557 bool isVISrcB16() const {
558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::i16
);
561 bool isVISrcV2B16() const {
565 bool isVISrcF32() const {
566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f32
);
569 bool isVISrcF16() const {
570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID
, MVT::f16
);
573 bool isVISrcV2F16() const {
574 return isVISrcF16() || isVISrcB32();
577 bool isVISrc_64B64() const {
578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::i64
);
581 bool isVISrc_64F64() const {
582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::f64
);
585 bool isVISrc_64V2FP32() const {
586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::f32
);
589 bool isVISrc_64V2INT32() const {
590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID
, MVT::i32
);
593 bool isVISrc_256B64() const {
594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::i64
);
597 bool isVISrc_256F64() const {
598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::f64
);
601 bool isVISrc_128B16() const {
602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::i16
);
605 bool isVISrc_128V2B16() const {
606 return isVISrc_128B16();
609 bool isVISrc_128B32() const {
610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::i32
);
613 bool isVISrc_128F32() const {
614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::f32
);
617 bool isVISrc_256V2FP32() const {
618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::f32
);
621 bool isVISrc_256V2INT32() const {
622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID
, MVT::i32
);
625 bool isVISrc_512B32() const {
626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::i32
);
629 bool isVISrc_512B16() const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::i16
);
633 bool isVISrc_512V2B16() const {
634 return isVISrc_512B16();
637 bool isVISrc_512F32() const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::f32
);
641 bool isVISrc_512F16() const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID
, MVT::f16
);
645 bool isVISrc_512V2F16() const {
646 return isVISrc_512F16() || isVISrc_512B32();
649 bool isVISrc_1024B32() const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::i32
);
653 bool isVISrc_1024B16() const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::i16
);
657 bool isVISrc_1024V2B16() const {
658 return isVISrc_1024B16();
661 bool isVISrc_1024F32() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::f32
);
665 bool isVISrc_1024F16() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID
, MVT::f16
);
669 bool isVISrc_1024V2F16() const {
670 return isVISrc_1024F16() || isVISrc_1024B32();
673 bool isAISrcB32() const {
674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i32
);
677 bool isAISrcB16() const {
678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::i16
);
681 bool isAISrcV2B16() const {
685 bool isAISrcF32() const {
686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f32
);
689 bool isAISrcF16() const {
690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID
, MVT::f16
);
693 bool isAISrcV2F16() const {
694 return isAISrcF16() || isAISrcB32();
697 bool isAISrc_64B64() const {
698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID
, MVT::i64
);
701 bool isAISrc_64F64() const {
702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID
, MVT::f64
);
705 bool isAISrc_128B32() const {
706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i32
);
709 bool isAISrc_128B16() const {
710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::i16
);
713 bool isAISrc_128V2B16() const {
714 return isAISrc_128B16();
717 bool isAISrc_128F32() const {
718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f32
);
721 bool isAISrc_128F16() const {
722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID
, MVT::f16
);
725 bool isAISrc_128V2F16() const {
726 return isAISrc_128F16() || isAISrc_128B32();
729 bool isVISrc_128F16() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID
, MVT::f16
);
733 bool isVISrc_128V2F16() const {
734 return isVISrc_128F16() || isVISrc_128B32();
737 bool isAISrc_256B64() const {
738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID
, MVT::i64
);
741 bool isAISrc_256F64() const {
742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID
, MVT::f64
);
745 bool isAISrc_512B32() const {
746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i32
);
749 bool isAISrc_512B16() const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::i16
);
753 bool isAISrc_512V2B16() const {
754 return isAISrc_512B16();
757 bool isAISrc_512F32() const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f32
);
761 bool isAISrc_512F16() const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID
, MVT::f16
);
765 bool isAISrc_512V2F16() const {
766 return isAISrc_512F16() || isAISrc_512B32();
769 bool isAISrc_1024B32() const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i32
);
773 bool isAISrc_1024B16() const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::i16
);
777 bool isAISrc_1024V2B16() const {
778 return isAISrc_1024B16();
781 bool isAISrc_1024F32() const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f32
);
785 bool isAISrc_1024F16() const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID
, MVT::f16
);
789 bool isAISrc_1024V2F16() const {
790 return isAISrc_1024F16() || isAISrc_1024B32();
793 bool isKImmFP32() const {
794 return isLiteralImm(MVT::f32
);
797 bool isKImmFP16() const {
798 return isLiteralImm(MVT::f16
);
801 bool isMem() const override
{
805 bool isExpr() const {
806 return Kind
== Expression
;
809 bool isSoppBrTarget() const {
810 return isExpr() || isImm();
813 bool isSWaitCnt() const;
814 bool isHwreg() const;
815 bool isSendMsg() const;
816 bool isSwizzle() const;
817 bool isSMRDOffset8() const;
818 bool isSMEMOffset() const;
819 bool isSMRDLiteralOffset() const;
821 bool isDPPCtrl() const;
825 bool isGPRIdxMode() const;
826 bool isS16Imm() const;
827 bool isU16Imm() const;
828 bool isEndpgm() const;
830 StringRef
getExpressionAsToken() const {
832 const MCSymbolRefExpr
*S
= cast
<MCSymbolRefExpr
>(Expr
);
833 return S
->getSymbol().getName();
836 StringRef
getToken() const {
839 if (Kind
== Expression
)
840 return getExpressionAsToken();
842 return StringRef(Tok
.Data
, Tok
.Length
);
845 int64_t getImm() const {
850 void setImm(int64_t Val
) {
855 ImmTy
getImmTy() const {
860 unsigned getReg() const override
{
865 SMLoc
getStartLoc() const override
{
869 SMLoc
getEndLoc() const override
{
873 SMRange
getLocRange() const {
874 return SMRange(StartLoc
, EndLoc
);
877 Modifiers
getModifiers() const {
878 assert(isRegKind() || isImmTy(ImmTyNone
));
879 return isRegKind() ? Reg
.Mods
: Imm
.Mods
;
882 void setModifiers(Modifiers Mods
) {
883 assert(isRegKind() || isImmTy(ImmTyNone
));
890 bool hasModifiers() const {
891 return getModifiers().hasModifiers();
894 bool hasFPModifiers() const {
895 return getModifiers().hasFPModifiers();
898 bool hasIntModifiers() const {
899 return getModifiers().hasIntModifiers();
902 uint64_t applyInputFPModifiers(uint64_t Val
, unsigned Size
) const;
904 void addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
= true) const;
906 void addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const;
908 template <unsigned Bitwidth
>
909 void addKImmFPOperands(MCInst
&Inst
, unsigned N
) const;
911 void addKImmFP16Operands(MCInst
&Inst
, unsigned N
) const {
912 addKImmFPOperands
<16>(Inst
, N
);
915 void addKImmFP32Operands(MCInst
&Inst
, unsigned N
) const {
916 addKImmFPOperands
<32>(Inst
, N
);
919 void addRegOperands(MCInst
&Inst
, unsigned N
) const;
921 void addBoolRegOperands(MCInst
&Inst
, unsigned N
) const {
922 addRegOperands(Inst
, N
);
925 void addRegOrImmOperands(MCInst
&Inst
, unsigned N
) const {
927 addRegOperands(Inst
, N
);
929 Inst
.addOperand(MCOperand::createExpr(Expr
));
931 addImmOperands(Inst
, N
);
934 void addRegOrImmWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
935 Modifiers Mods
= getModifiers();
936 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
938 addRegOperands(Inst
, N
);
940 addImmOperands(Inst
, N
, false);
944 void addRegOrImmWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
945 assert(!hasIntModifiers());
946 addRegOrImmWithInputModsOperands(Inst
, N
);
949 void addRegOrImmWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
950 assert(!hasFPModifiers());
951 addRegOrImmWithInputModsOperands(Inst
, N
);
954 void addRegWithInputModsOperands(MCInst
&Inst
, unsigned N
) const {
955 Modifiers Mods
= getModifiers();
956 Inst
.addOperand(MCOperand::createImm(Mods
.getModifiersOperand()));
958 addRegOperands(Inst
, N
);
961 void addRegWithFPInputModsOperands(MCInst
&Inst
, unsigned N
) const {
962 assert(!hasIntModifiers());
963 addRegWithInputModsOperands(Inst
, N
);
966 void addRegWithIntInputModsOperands(MCInst
&Inst
, unsigned N
) const {
967 assert(!hasFPModifiers());
968 addRegWithInputModsOperands(Inst
, N
);
971 void addSoppBrTargetOperands(MCInst
&Inst
, unsigned N
) const {
973 addImmOperands(Inst
, N
);
976 Inst
.addOperand(MCOperand::createExpr(Expr
));
980 static void printImmTy(raw_ostream
& OS
, ImmTy Type
) {
982 case ImmTyNone
: OS
<< "None"; break;
983 case ImmTyGDS
: OS
<< "GDS"; break;
984 case ImmTyLDS
: OS
<< "LDS"; break;
985 case ImmTyOffen
: OS
<< "Offen"; break;
986 case ImmTyIdxen
: OS
<< "Idxen"; break;
987 case ImmTyAddr64
: OS
<< "Addr64"; break;
988 case ImmTyOffset
: OS
<< "Offset"; break;
989 case ImmTyInstOffset
: OS
<< "InstOffset"; break;
990 case ImmTyOffset0
: OS
<< "Offset0"; break;
991 case ImmTyOffset1
: OS
<< "Offset1"; break;
992 case ImmTyCPol
: OS
<< "CPol"; break;
993 case ImmTySWZ
: OS
<< "SWZ"; break;
994 case ImmTyTFE
: OS
<< "TFE"; break;
995 case ImmTyD16
: OS
<< "D16"; break;
996 case ImmTyFORMAT
: OS
<< "FORMAT"; break;
997 case ImmTyClampSI
: OS
<< "ClampSI"; break;
998 case ImmTyOModSI
: OS
<< "OModSI"; break;
999 case ImmTyDPP8
: OS
<< "DPP8"; break;
1000 case ImmTyDppCtrl
: OS
<< "DppCtrl"; break;
1001 case ImmTyDppRowMask
: OS
<< "DppRowMask"; break;
1002 case ImmTyDppBankMask
: OS
<< "DppBankMask"; break;
1003 case ImmTyDppBoundCtrl
: OS
<< "DppBoundCtrl"; break;
1004 case ImmTyDppFi
: OS
<< "FI"; break;
1005 case ImmTySdwaDstSel
: OS
<< "SdwaDstSel"; break;
1006 case ImmTySdwaSrc0Sel
: OS
<< "SdwaSrc0Sel"; break;
1007 case ImmTySdwaSrc1Sel
: OS
<< "SdwaSrc1Sel"; break;
1008 case ImmTySdwaDstUnused
: OS
<< "SdwaDstUnused"; break;
1009 case ImmTyDMask
: OS
<< "DMask"; break;
1010 case ImmTyDim
: OS
<< "Dim"; break;
1011 case ImmTyUNorm
: OS
<< "UNorm"; break;
1012 case ImmTyDA
: OS
<< "DA"; break;
1013 case ImmTyR128A16
: OS
<< "R128A16"; break;
1014 case ImmTyA16
: OS
<< "A16"; break;
1015 case ImmTyLWE
: OS
<< "LWE"; break;
1016 case ImmTyOff
: OS
<< "Off"; break;
1017 case ImmTyExpTgt
: OS
<< "ExpTgt"; break;
1018 case ImmTyExpCompr
: OS
<< "ExpCompr"; break;
1019 case ImmTyExpVM
: OS
<< "ExpVM"; break;
1020 case ImmTyHwreg
: OS
<< "Hwreg"; break;
1021 case ImmTySendMsg
: OS
<< "SendMsg"; break;
1022 case ImmTyInterpSlot
: OS
<< "InterpSlot"; break;
1023 case ImmTyInterpAttr
: OS
<< "InterpAttr"; break;
1024 case ImmTyAttrChan
: OS
<< "AttrChan"; break;
1025 case ImmTyOpSel
: OS
<< "OpSel"; break;
1026 case ImmTyOpSelHi
: OS
<< "OpSelHi"; break;
1027 case ImmTyNegLo
: OS
<< "NegLo"; break;
1028 case ImmTyNegHi
: OS
<< "NegHi"; break;
1029 case ImmTySwizzle
: OS
<< "Swizzle"; break;
1030 case ImmTyGprIdxMode
: OS
<< "GprIdxMode"; break;
1031 case ImmTyHigh
: OS
<< "High"; break;
1032 case ImmTyBLGP
: OS
<< "BLGP"; break;
1033 case ImmTyCBSZ
: OS
<< "CBSZ"; break;
1034 case ImmTyABID
: OS
<< "ABID"; break;
1035 case ImmTyEndpgm
: OS
<< "Endpgm"; break;
1039 void print(raw_ostream
&OS
) const override
{
1042 OS
<< "<register " << getReg() << " mods: " << Reg
.Mods
<< '>';
1045 OS
<< '<' << getImm();
1046 if (getImmTy() != ImmTyNone
) {
1047 OS
<< " type: "; printImmTy(OS
, getImmTy());
1049 OS
<< " mods: " << Imm
.Mods
<< '>';
1052 OS
<< '\'' << getToken() << '\'';
1055 OS
<< "<expr " << *Expr
<< '>';
1060 static AMDGPUOperand::Ptr
CreateImm(const AMDGPUAsmParser
*AsmParser
,
1061 int64_t Val
, SMLoc Loc
,
1062 ImmTy Type
= ImmTyNone
,
1063 bool IsFPImm
= false) {
1064 auto Op
= std::make_unique
<AMDGPUOperand
>(Immediate
, AsmParser
);
1066 Op
->Imm
.IsFPImm
= IsFPImm
;
1067 Op
->Imm
.Kind
= ImmKindTyNone
;
1068 Op
->Imm
.Type
= Type
;
1069 Op
->Imm
.Mods
= Modifiers();
1075 static AMDGPUOperand::Ptr
CreateToken(const AMDGPUAsmParser
*AsmParser
,
1076 StringRef Str
, SMLoc Loc
,
1077 bool HasExplicitEncodingSize
= true) {
1078 auto Res
= std::make_unique
<AMDGPUOperand
>(Token
, AsmParser
);
1079 Res
->Tok
.Data
= Str
.data();
1080 Res
->Tok
.Length
= Str
.size();
1081 Res
->StartLoc
= Loc
;
1086 static AMDGPUOperand::Ptr
CreateReg(const AMDGPUAsmParser
*AsmParser
,
1087 unsigned RegNo
, SMLoc S
,
1089 auto Op
= std::make_unique
<AMDGPUOperand
>(Register
, AsmParser
);
1090 Op
->Reg
.RegNo
= RegNo
;
1091 Op
->Reg
.Mods
= Modifiers();
1097 static AMDGPUOperand::Ptr
CreateExpr(const AMDGPUAsmParser
*AsmParser
,
1098 const class MCExpr
*Expr
, SMLoc S
) {
1099 auto Op
= std::make_unique
<AMDGPUOperand
>(Expression
, AsmParser
);
1107 raw_ostream
&operator <<(raw_ostream
&OS
, AMDGPUOperand::Modifiers Mods
) {
1108 OS
<< "abs:" << Mods
.Abs
<< " neg: " << Mods
.Neg
<< " sext:" << Mods
.Sext
;
1112 //===----------------------------------------------------------------------===//
1114 //===----------------------------------------------------------------------===//
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo
{
1120 int SgprIndexUnusedMin
= -1;
1121 int VgprIndexUnusedMin
= -1;
1122 MCContext
*Ctx
= nullptr;
1124 void usesSgprAt(int i
) {
1125 if (i
>= SgprIndexUnusedMin
) {
1126 SgprIndexUnusedMin
= ++i
;
1128 MCSymbol
* const Sym
= Ctx
->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129 Sym
->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin
, *Ctx
));
1134 void usesVgprAt(int i
) {
1135 if (i
>= VgprIndexUnusedMin
) {
1136 VgprIndexUnusedMin
= ++i
;
1138 MCSymbol
* const Sym
= Ctx
->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139 Sym
->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin
, *Ctx
));
1145 KernelScopeInfo() = default;
1147 void initialize(MCContext
&Context
) {
1149 usesSgprAt(SgprIndexUnusedMin
= -1);
1150 usesVgprAt(VgprIndexUnusedMin
= -1);
1153 void usesRegister(RegisterKind RegKind
, unsigned DwordRegIndex
, unsigned RegWidth
) {
1155 case IS_SGPR
: usesSgprAt(DwordRegIndex
+ RegWidth
- 1); break;
1156 case IS_AGPR
: // fall through
1157 case IS_VGPR
: usesVgprAt(DwordRegIndex
+ RegWidth
- 1); break;
1163 class AMDGPUAsmParser
: public MCTargetAsmParser
{
1164 MCAsmParser
&Parser
;
1166 // Number of extra operands parsed after the first optional operand.
1167 // This may be necessary to skip hardcoded mandatory operands.
1168 static const unsigned MAX_OPR_LOOKAHEAD
= 8;
1170 unsigned ForcedEncodingSize
= 0;
1171 bool ForcedDPP
= false;
1172 bool ForcedSDWA
= false;
1173 KernelScopeInfo KernelScope
;
1176 /// @name Auto-generated Match Functions
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1185 bool ParseAsAbsoluteExpression(uint32_t &Ret
);
1186 bool OutOfRangeError(SMRange Range
);
1187 /// Calculate VGPR/SGPR blocks required for given target, reserved
1188 /// registers, and user-specified NextFreeXGPR values.
1190 /// \param Features [in] Target features, used for bug corrections.
1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195 /// descriptor field, if valid.
1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200 /// \param VGPRBlocks [out] Result VGPR block count.
1201 /// \param SGPRBlocks [out] Result SGPR block count.
1202 bool calculateGPRBlocks(const FeatureBitset
&Features
, bool VCCUsed
,
1203 bool FlatScrUsed
, bool XNACKUsed
,
1204 Optional
<bool> EnableWavefrontSize32
, unsigned NextFreeVGPR
,
1205 SMRange VGPRRange
, unsigned NextFreeSGPR
,
1206 SMRange SGPRRange
, unsigned &VGPRBlocks
,
1207 unsigned &SGPRBlocks
);
1208 bool ParseDirectiveAMDGCNTarget();
1209 bool ParseDirectiveAMDHSAKernel();
1210 bool ParseDirectiveMajorMinor(uint32_t &Major
, uint32_t &Minor
);
1211 bool ParseDirectiveHSACodeObjectVersion();
1212 bool ParseDirectiveHSACodeObjectISA();
1213 bool ParseAMDKernelCodeTValue(StringRef ID
, amd_kernel_code_t
&Header
);
1214 bool ParseDirectiveAMDKernelCodeT();
1215 // TODO: Possibly make subtargetHasRegister const.
1216 bool subtargetHasRegister(const MCRegisterInfo
&MRI
, unsigned RegNo
);
1217 bool ParseDirectiveAMDGPUHsaKernel();
1219 bool ParseDirectiveISAVersion();
1220 bool ParseDirectiveHSAMetadata();
1221 bool ParseDirectivePALMetadataBegin();
1222 bool ParseDirectivePALMetadata();
1223 bool ParseDirectiveAMDGPULDS();
1225 /// Common code to parse out a block of text (typically YAML) between start and
1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin
,
1228 const char *AssemblerDirectiveEnd
,
1229 std::string
&CollectString
);
1231 bool AddNextRegisterToList(unsigned& Reg
, unsigned& RegWidth
,
1232 RegisterKind RegKind
, unsigned Reg1
, SMLoc Loc
);
1233 bool ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
1234 unsigned &RegNum
, unsigned &RegWidth
,
1235 bool RestoreOnFailure
= false);
1236 bool ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
1237 unsigned &RegNum
, unsigned &RegWidth
,
1238 SmallVectorImpl
<AsmToken
> &Tokens
);
1239 unsigned ParseRegularReg(RegisterKind
&RegKind
, unsigned &RegNum
,
1241 SmallVectorImpl
<AsmToken
> &Tokens
);
1242 unsigned ParseSpecialReg(RegisterKind
&RegKind
, unsigned &RegNum
,
1244 SmallVectorImpl
<AsmToken
> &Tokens
);
1245 unsigned ParseRegList(RegisterKind
&RegKind
, unsigned &RegNum
,
1246 unsigned &RegWidth
, SmallVectorImpl
<AsmToken
> &Tokens
);
1247 bool ParseRegRange(unsigned& Num
, unsigned& Width
);
1248 unsigned getRegularReg(RegisterKind RegKind
,
1254 bool isRegister(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1255 Optional
<StringRef
> getGprCountSymbolName(RegisterKind RegKind
);
1256 void initializeGprCountSymbol(RegisterKind RegKind
);
1257 bool updateGprCountSymbols(RegisterKind RegKind
, unsigned DwordRegIndex
,
1259 void cvtMubufImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1260 bool IsAtomic
, bool IsLds
= false);
1261 void cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
1262 bool IsGdsHardcoded
);
1265 enum AMDGPUMatchResultTy
{
1266 Match_PreferE32
= FIRST_TARGET_MATCH_RESULT_TY
1269 OperandMode_Default
,
1273 using OptionalImmIndexMap
= std::map
<AMDGPUOperand::ImmTy
, unsigned>;
1275 AMDGPUAsmParser(const MCSubtargetInfo
&STI
, MCAsmParser
&_Parser
,
1276 const MCInstrInfo
&MII
,
1277 const MCTargetOptions
&Options
)
1278 : MCTargetAsmParser(Options
, STI
, MII
), Parser(_Parser
) {
1279 MCAsmParserExtension::Initialize(Parser
);
1281 if (getFeatureBits().none()) {
1282 // Set default features.
1283 copySTI().ToggleFeature("southern-islands");
1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1289 // TODO: make those pre-defined variables read-only.
1290 // Currently there is none suitable machinery in the core llvm-mc for this.
1291 // MCSymbol::isRedefinable is intended for another purpose, and
1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
1294 MCContext
&Ctx
= getContext();
1295 if (ISA
.Major
>= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1297 Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1299 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1301 Sym
= Ctx
.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1305 Ctx
.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Major
, Ctx
));
1307 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Minor
, Ctx
));
1309 Sym
= Ctx
.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310 Sym
->setVariableValue(MCConstantExpr::create(ISA
.Stepping
, Ctx
));
1312 if (ISA
.Major
>= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313 initializeGprCountSymbol(IS_VGPR
);
1314 initializeGprCountSymbol(IS_SGPR
);
1316 KernelScope
.initialize(getContext());
1320 bool hasMIMG_R128() const {
1321 return AMDGPU::hasMIMG_R128(getSTI());
1324 bool hasPackedD16() const {
1325 return AMDGPU::hasPackedD16(getSTI());
1328 bool hasGFX10A16() const {
1329 return AMDGPU::hasGFX10A16(getSTI());
1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1335 return AMDGPU::isSI(getSTI());
1339 return AMDGPU::isCI(getSTI());
1343 return AMDGPU::isVI(getSTI());
1346 bool isGFX9() const {
1347 return AMDGPU::isGFX9(getSTI());
1350 bool isGFX90A() const {
1351 return AMDGPU::isGFX90A(getSTI());
1354 bool isGFX9Plus() const {
1355 return AMDGPU::isGFX9Plus(getSTI());
1358 bool isGFX10() const {
1359 return AMDGPU::isGFX10(getSTI());
1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1364 bool isGFX10_BEncoding() const {
1365 return AMDGPU::isGFX10_BEncoding(getSTI());
1368 bool hasInv2PiInlineImm() const {
1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
];
1372 bool hasFlatOffsets() const {
1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets
];
1376 bool hasArchitectedFlatScratch() const {
1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch
];
1380 bool hasSGPR102_SGPR103() const {
1381 return !isVI() && !isGFX9();
1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1386 bool hasIntClamp() const {
1387 return getFeatureBits()[AMDGPU::FeatureIntClamp
];
1390 AMDGPUTargetStreamer
&getTargetStreamer() {
1391 MCTargetStreamer
&TS
= *getParser().getStreamer().getTargetStreamer();
1392 return static_cast<AMDGPUTargetStreamer
&>(TS
);
1395 const MCRegisterInfo
*getMRI() const {
1396 // We need this const_cast because for some reason getContext() is not const
1398 return const_cast<AMDGPUAsmParser
*>(this)->getContext().getRegisterInfo();
1401 const MCInstrInfo
*getMII() const {
1405 const FeatureBitset
&getFeatureBits() const {
1406 return getSTI().getFeatureBits();
1409 void setForcedEncodingSize(unsigned Size
) { ForcedEncodingSize
= Size
; }
1410 void setForcedDPP(bool ForceDPP_
) { ForcedDPP
= ForceDPP_
; }
1411 void setForcedSDWA(bool ForceSDWA_
) { ForcedSDWA
= ForceSDWA_
; }
1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize
; }
1414 bool isForcedVOP3() const { return ForcedEncodingSize
== 64; }
1415 bool isForcedDPP() const { return ForcedDPP
; }
1416 bool isForcedSDWA() const { return ForcedSDWA
; }
1417 ArrayRef
<unsigned> getMatchedVariants() const;
1418 StringRef
getMatchedVariantName() const;
1420 std::unique_ptr
<AMDGPUOperand
> parseRegister(bool RestoreOnFailure
= false);
1421 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
,
1422 bool RestoreOnFailure
);
1423 bool ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
, SMLoc
&EndLoc
) override
;
1424 OperandMatchResultTy
tryParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
1425 SMLoc
&EndLoc
) override
;
1426 unsigned checkTargetMatchPredicate(MCInst
&Inst
) override
;
1427 unsigned validateTargetOperandClass(MCParsedAsmOperand
&Op
,
1428 unsigned Kind
) override
;
1429 bool MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
1430 OperandVector
&Operands
, MCStreamer
&Out
,
1431 uint64_t &ErrorInfo
,
1432 bool MatchingInlineAsm
) override
;
1433 bool ParseDirective(AsmToken DirectiveID
) override
;
1434 OperandMatchResultTy
parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
1435 OperandMode Mode
= OperandMode_Default
);
1436 StringRef
parseMnemonicSuffix(StringRef Name
);
1437 bool ParseInstruction(ParseInstructionInfo
&Info
, StringRef Name
,
1438 SMLoc NameLoc
, OperandVector
&Operands
) override
;
1439 //bool ProcessInstruction(MCInst &Inst);
1441 OperandMatchResultTy
parseIntWithPrefix(const char *Prefix
, int64_t &Int
);
1443 OperandMatchResultTy
1444 parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
1445 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1446 bool (*ConvertResult
)(int64_t &) = nullptr);
1448 OperandMatchResultTy
1449 parseOperandArrayWithPrefix(const char *Prefix
,
1450 OperandVector
&Operands
,
1451 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
,
1452 bool (*ConvertResult
)(int64_t&) = nullptr);
1454 OperandMatchResultTy
1455 parseNamedBit(StringRef Name
, OperandVector
&Operands
,
1456 AMDGPUOperand::ImmTy ImmTy
= AMDGPUOperand::ImmTyNone
);
1457 OperandMatchResultTy
parseCPol(OperandVector
&Operands
);
1458 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix
,
1463 bool isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1464 bool isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1465 bool isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1466 bool isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const;
1467 bool parseSP3NegModifier();
1468 OperandMatchResultTy
parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
= false);
1469 OperandMatchResultTy
parseReg(OperandVector
&Operands
);
1470 OperandMatchResultTy
parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
= false);
1471 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector
&Operands
, bool AllowImm
= true);
1472 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector
&Operands
, bool AllowImm
= true);
1473 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector
&Operands
);
1474 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector
&Operands
);
1475 OperandMatchResultTy
parseVReg32OrOff(OperandVector
&Operands
);
1476 OperandMatchResultTy
parseDfmtNfmt(int64_t &Format
);
1477 OperandMatchResultTy
parseUfmt(int64_t &Format
);
1478 OperandMatchResultTy
parseSymbolicSplitFormat(StringRef FormatStr
, SMLoc Loc
, int64_t &Format
);
1479 OperandMatchResultTy
parseSymbolicUnifiedFormat(StringRef FormatStr
, SMLoc Loc
, int64_t &Format
);
1480 OperandMatchResultTy
parseFORMAT(OperandVector
&Operands
);
1481 OperandMatchResultTy
parseSymbolicOrNumericFormat(int64_t &Format
);
1482 OperandMatchResultTy
parseNumericFormat(int64_t &Format
);
1483 bool tryParseFmt(const char *Pref
, int64_t MaxVal
, int64_t &Val
);
1484 bool matchDfmtNfmt(int64_t &Dfmt
, int64_t &Nfmt
, StringRef FormatStr
, SMLoc Loc
);
1486 void cvtDSOffset01(MCInst
&Inst
, const OperandVector
&Operands
);
1487 void cvtDS(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, false); }
1488 void cvtDSGds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDSImpl(Inst
, Operands
, true); }
1489 void cvtExp(MCInst
&Inst
, const OperandVector
&Operands
);
1491 bool parseCnt(int64_t &IntVal
);
1492 OperandMatchResultTy
parseSWaitCntOps(OperandVector
&Operands
);
1493 OperandMatchResultTy
parseHwreg(OperandVector
&Operands
);
1496 struct OperandInfoTy
{
1499 bool IsSymbolic
= false;
1500 bool IsDefined
= false;
1502 OperandInfoTy(int64_t Id_
) : Id(Id_
) {}
1505 bool parseSendMsgBody(OperandInfoTy
&Msg
, OperandInfoTy
&Op
, OperandInfoTy
&Stream
);
1506 bool validateSendMsg(const OperandInfoTy
&Msg
,
1507 const OperandInfoTy
&Op
,
1508 const OperandInfoTy
&Stream
);
1510 bool parseHwregBody(OperandInfoTy
&HwReg
,
1511 OperandInfoTy
&Offset
,
1512 OperandInfoTy
&Width
);
1513 bool validateHwreg(const OperandInfoTy
&HwReg
,
1514 const OperandInfoTy
&Offset
,
1515 const OperandInfoTy
&Width
);
1517 SMLoc
getFlatOffsetLoc(const OperandVector
&Operands
) const;
1518 SMLoc
getSMEMOffsetLoc(const OperandVector
&Operands
) const;
1520 SMLoc
getOperandLoc(std::function
<bool(const AMDGPUOperand
&)> Test
,
1521 const OperandVector
&Operands
) const;
1522 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type
, const OperandVector
&Operands
) const;
1523 SMLoc
getRegLoc(unsigned Reg
, const OperandVector
&Operands
) const;
1524 SMLoc
getLitLoc(const OperandVector
&Operands
) const;
1525 SMLoc
getConstLoc(const OperandVector
&Operands
) const;
1527 bool validateInstruction(const MCInst
&Inst
, const SMLoc
&IDLoc
, const OperandVector
&Operands
);
1528 bool validateFlatOffset(const MCInst
&Inst
, const OperandVector
&Operands
);
1529 bool validateSMEMOffset(const MCInst
&Inst
, const OperandVector
&Operands
);
1530 bool validateSOPLiteral(const MCInst
&Inst
) const;
1531 bool validateConstantBusLimitations(const MCInst
&Inst
, const OperandVector
&Operands
);
1532 bool validateEarlyClobberLimitations(const MCInst
&Inst
, const OperandVector
&Operands
);
1533 bool validateIntClampSupported(const MCInst
&Inst
);
1534 bool validateMIMGAtomicDMask(const MCInst
&Inst
);
1535 bool validateMIMGGatherDMask(const MCInst
&Inst
);
1536 bool validateMovrels(const MCInst
&Inst
, const OperandVector
&Operands
);
1537 bool validateMIMGDataSize(const MCInst
&Inst
);
1538 bool validateMIMGAddrSize(const MCInst
&Inst
);
1539 bool validateMIMGD16(const MCInst
&Inst
);
1540 bool validateMIMGDim(const MCInst
&Inst
);
1541 bool validateMIMGMSAA(const MCInst
&Inst
);
1542 bool validateOpSel(const MCInst
&Inst
);
1543 bool validateDPP(const MCInst
&Inst
, const OperandVector
&Operands
);
1544 bool validateVccOperand(unsigned Reg
) const;
1545 bool validateVOP3Literal(const MCInst
&Inst
, const OperandVector
&Operands
);
1546 bool validateMAIAccWrite(const MCInst
&Inst
, const OperandVector
&Operands
);
1547 bool validateAGPRLdSt(const MCInst
&Inst
) const;
1548 bool validateVGPRAlign(const MCInst
&Inst
) const;
1549 bool validateGWS(const MCInst
&Inst
, const OperandVector
&Operands
);
1550 bool validateDivScale(const MCInst
&Inst
);
1551 bool validateCoherencyBits(const MCInst
&Inst
, const OperandVector
&Operands
,
1552 const SMLoc
&IDLoc
);
1553 Optional
<StringRef
> validateLdsDirect(const MCInst
&Inst
);
1554 unsigned getConstantBusLimit(unsigned Opcode
) const;
1555 bool usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
);
1556 bool isInlineConstant(const MCInst
&Inst
, unsigned OpIdx
) const;
1557 unsigned findImplicitSGPRReadInVOP(const MCInst
&Inst
) const;
1559 bool isSupportedMnemo(StringRef Mnemo
,
1560 const FeatureBitset
&FBS
);
1561 bool isSupportedMnemo(StringRef Mnemo
,
1562 const FeatureBitset
&FBS
,
1563 ArrayRef
<unsigned> Variants
);
1564 bool checkUnsupportedInstruction(StringRef Name
, const SMLoc
&IDLoc
);
1566 bool isId(const StringRef Id
) const;
1567 bool isId(const AsmToken
&Token
, const StringRef Id
) const;
1568 bool isToken(const AsmToken::TokenKind Kind
) const;
1569 bool trySkipId(const StringRef Id
);
1570 bool trySkipId(const StringRef Pref
, const StringRef Id
);
1571 bool trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
);
1572 bool trySkipToken(const AsmToken::TokenKind Kind
);
1573 bool skipToken(const AsmToken::TokenKind Kind
, const StringRef ErrMsg
);
1574 bool parseString(StringRef
&Val
, const StringRef ErrMsg
= "expected a string");
1575 bool parseId(StringRef
&Val
, const StringRef ErrMsg
= "");
1577 void peekTokens(MutableArrayRef
<AsmToken
> Tokens
);
1578 AsmToken::TokenKind
getTokenKind() const;
1579 bool parseExpr(int64_t &Imm
, StringRef Expected
= "");
1580 bool parseExpr(OperandVector
&Operands
);
1581 StringRef
getTokenStr() const;
1582 AsmToken
peekToken();
1583 AsmToken
getToken() const;
1584 SMLoc
getLoc() const;
1588 void onBeginOfFile() override
;
1590 OperandMatchResultTy
parseOptionalOperand(OperandVector
&Operands
);
1591 OperandMatchResultTy
parseOptionalOpr(OperandVector
&Operands
);
1593 OperandMatchResultTy
parseExpTgt(OperandVector
&Operands
);
1594 OperandMatchResultTy
parseSendMsgOp(OperandVector
&Operands
);
1595 OperandMatchResultTy
parseInterpSlot(OperandVector
&Operands
);
1596 OperandMatchResultTy
parseInterpAttr(OperandVector
&Operands
);
1597 OperandMatchResultTy
parseSOppBrTarget(OperandVector
&Operands
);
1598 OperandMatchResultTy
parseBoolReg(OperandVector
&Operands
);
1600 bool parseSwizzleOperand(int64_t &Op
,
1601 const unsigned MinVal
,
1602 const unsigned MaxVal
,
1603 const StringRef ErrMsg
,
1605 bool parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
1606 const unsigned MinVal
,
1607 const unsigned MaxVal
,
1608 const StringRef ErrMsg
);
1609 OperandMatchResultTy
parseSwizzleOp(OperandVector
&Operands
);
1610 bool parseSwizzleOffset(int64_t &Imm
);
1611 bool parseSwizzleMacro(int64_t &Imm
);
1612 bool parseSwizzleQuadPerm(int64_t &Imm
);
1613 bool parseSwizzleBitmaskPerm(int64_t &Imm
);
1614 bool parseSwizzleBroadcast(int64_t &Imm
);
1615 bool parseSwizzleSwap(int64_t &Imm
);
1616 bool parseSwizzleReverse(int64_t &Imm
);
1618 OperandMatchResultTy
parseGPRIdxMode(OperandVector
&Operands
);
1619 int64_t parseGPRIdxMacro();
1621 void cvtMubuf(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false); }
1622 void cvtMubufAtomic(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, true); }
1623 void cvtMubufLds(MCInst
&Inst
, const OperandVector
&Operands
) { cvtMubufImpl(Inst
, Operands
, false, true); }
1624 void cvtMtbuf(MCInst
&Inst
, const OperandVector
&Operands
);
1626 AMDGPUOperand::Ptr
defaultCPol() const;
1628 AMDGPUOperand::Ptr
defaultSMRDOffset8() const;
1629 AMDGPUOperand::Ptr
defaultSMEMOffset() const;
1630 AMDGPUOperand::Ptr
defaultSMRDLiteralOffset() const;
1631 AMDGPUOperand::Ptr
defaultFlatOffset() const;
1633 OperandMatchResultTy
parseOModOperand(OperandVector
&Operands
);
1635 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
1636 OptionalImmIndexMap
&OptionalIdx
);
1637 void cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
);
1638 void cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
);
1639 void cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
);
1640 void cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
,
1641 OptionalImmIndexMap
&OptionalIdx
);
1643 void cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
);
1645 void cvtMIMG(MCInst
&Inst
, const OperandVector
&Operands
,
1646 bool IsAtomic
= false);
1647 void cvtMIMGAtomic(MCInst
&Inst
, const OperandVector
&Operands
);
1648 void cvtIntersectRay(MCInst
&Inst
, const OperandVector
&Operands
);
1650 void cvtSMEMAtomic(MCInst
&Inst
, const OperandVector
&Operands
);
1652 bool parseDimId(unsigned &Encoding
);
1653 OperandMatchResultTy
parseDim(OperandVector
&Operands
);
1654 OperandMatchResultTy
parseDPP8(OperandVector
&Operands
);
1655 OperandMatchResultTy
parseDPPCtrl(OperandVector
&Operands
);
1656 bool isSupportedDPPCtrl(StringRef Ctrl
, const OperandVector
&Operands
);
1657 int64_t parseDPPCtrlSel(StringRef Ctrl
);
1658 int64_t parseDPPCtrlPerm();
1659 AMDGPUOperand::Ptr
defaultRowMask() const;
1660 AMDGPUOperand::Ptr
defaultBankMask() const;
1661 AMDGPUOperand::Ptr
defaultBoundCtrl() const;
1662 AMDGPUOperand::Ptr
defaultFI() const;
1663 void cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
= false);
1664 void cvtDPP8(MCInst
&Inst
, const OperandVector
&Operands
) { cvtDPP(Inst
, Operands
, true); }
1666 OperandMatchResultTy
parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
1667 AMDGPUOperand::ImmTy Type
);
1668 OperandMatchResultTy
parseSDWADstUnused(OperandVector
&Operands
);
1669 void cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
);
1670 void cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
);
1671 void cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
);
1672 void cvtSdwaVOP2e(MCInst
&Inst
, const OperandVector
&Operands
);
1673 void cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
);
1674 void cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
1675 uint64_t BasicInstType
,
1676 bool SkipDstVcc
= false,
1677 bool SkipSrcVcc
= false);
1679 AMDGPUOperand::Ptr
defaultBLGP() const;
1680 AMDGPUOperand::Ptr
defaultCBSZ() const;
1681 AMDGPUOperand::Ptr
defaultABID() const;
1683 OperandMatchResultTy
parseEndpgmOp(OperandVector
&Operands
);
1684 AMDGPUOperand::Ptr
defaultEndpgmImmOperands() const;
1687 struct OptionalOperand
{
1689 AMDGPUOperand::ImmTy Type
;
1691 bool (*ConvertResult
)(int64_t&);
1694 } // end anonymous namespace
1696 // May be called with integer type with equivalent bitwidth.
1697 static const fltSemantics
*getFltSemantics(unsigned Size
) {
1700 return &APFloat::IEEEsingle();
1702 return &APFloat::IEEEdouble();
1704 return &APFloat::IEEEhalf();
1706 llvm_unreachable("unsupported fp type");
1710 static const fltSemantics
*getFltSemantics(MVT VT
) {
1711 return getFltSemantics(VT
.getSizeInBits() / 8);
1714 static const fltSemantics
*getOpFltSemantics(uint8_t OperandType
) {
1715 switch (OperandType
) {
1716 case AMDGPU::OPERAND_REG_IMM_INT32
:
1717 case AMDGPU::OPERAND_REG_IMM_FP32
:
1718 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
1719 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
1720 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
1722 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
1723 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
1724 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
1725 case AMDGPU::OPERAND_REG_IMM_V2INT32
:
1726 return &APFloat::IEEEsingle();
1727 case AMDGPU::OPERAND_REG_IMM_INT64
:
1728 case AMDGPU::OPERAND_REG_IMM_FP64
:
1729 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1730 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
1732 return &APFloat::IEEEdouble();
1733 case AMDGPU::OPERAND_REG_IMM_INT16
:
1734 case AMDGPU::OPERAND_REG_IMM_FP16
:
1735 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
1736 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
1737 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
1738 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
1739 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
1740 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
1742 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
1743 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
1744 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
1745 return &APFloat::IEEEhalf();
1747 llvm_unreachable("unsupported fp type");
1751 //===----------------------------------------------------------------------===//
1753 //===----------------------------------------------------------------------===//
1755 static bool canLosslesslyConvertToFPType(APFloat
&FPLiteral
, MVT VT
) {
1758 // Convert literal to single precision
1759 APFloat::opStatus Status
= FPLiteral
.convert(*getFltSemantics(VT
),
1760 APFloat::rmNearestTiesToEven
,
1762 // We allow precision lost but not overflow or underflow
1763 if (Status
!= APFloat::opOK
&&
1765 ((Status
& APFloat::opOverflow
) != 0 ||
1766 (Status
& APFloat::opUnderflow
) != 0)) {
1773 static bool isSafeTruncation(int64_t Val
, unsigned Size
) {
1774 return isUIntN(Size
, Val
) || isIntN(Size
, Val
);
1777 static bool isInlineableLiteralOp16(int64_t Val
, MVT VT
, bool HasInv2Pi
) {
1778 if (VT
.getScalarType() == MVT::i16
) {
1779 // FP immediate values are broken.
1780 return isInlinableIntLiteral(Val
);
1783 // f16/v2f16 operands work correctly for all values.
1784 return AMDGPU::isInlinableLiteral16(Val
, HasInv2Pi
);
1787 bool AMDGPUOperand::isInlinableImm(MVT type
) const {
1789 // This is a hack to enable named inline values like
1790 // shared_base with both 32-bit and 64-bit operands.
1791 // Note that these values are defined as
1792 // 32-bit operands only.
1793 if (isInlineValue()) {
1797 if (!isImmTy(ImmTyNone
)) {
1798 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1801 // TODO: We should avoid using host float here. It would be better to
1802 // check the float bit values which is what a few other places do.
1803 // We've had bot failures before due to weird NaN support on mips hosts.
1805 APInt
Literal(64, Imm
.Val
);
1807 if (Imm
.IsFPImm
) { // We got fp literal token
1808 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1809 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1810 AsmParser
->hasInv2PiInlineImm());
1813 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1814 if (!canLosslesslyConvertToFPType(FPLiteral
, type
))
1817 if (type
.getScalarSizeInBits() == 16) {
1818 return isInlineableLiteralOp16(
1819 static_cast<int16_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1820 type
, AsmParser
->hasInv2PiInlineImm());
1823 // Check if single precision literal is inlinable
1824 return AMDGPU::isInlinableLiteral32(
1825 static_cast<int32_t>(FPLiteral
.bitcastToAPInt().getZExtValue()),
1826 AsmParser
->hasInv2PiInlineImm());
1829 // We got int literal token.
1830 if (type
== MVT::f64
|| type
== MVT::i64
) { // Expected 64-bit operand
1831 return AMDGPU::isInlinableLiteral64(Imm
.Val
,
1832 AsmParser
->hasInv2PiInlineImm());
1835 if (!isSafeTruncation(Imm
.Val
, type
.getScalarSizeInBits())) {
1839 if (type
.getScalarSizeInBits() == 16) {
1840 return isInlineableLiteralOp16(
1841 static_cast<int16_t>(Literal
.getLoBits(16).getSExtValue()),
1842 type
, AsmParser
->hasInv2PiInlineImm());
1845 return AMDGPU::isInlinableLiteral32(
1846 static_cast<int32_t>(Literal
.getLoBits(32).getZExtValue()),
1847 AsmParser
->hasInv2PiInlineImm());
1850 bool AMDGPUOperand::isLiteralImm(MVT type
) const {
1851 // Check that this immediate can be added as literal
1852 if (!isImmTy(ImmTyNone
)) {
1857 // We got int literal token.
1859 if (type
== MVT::f64
&& hasFPModifiers()) {
1860 // Cannot apply fp modifiers to int literals preserving the same semantics
1861 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862 // disable these cases.
1866 unsigned Size
= type
.getSizeInBits();
1870 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1872 return isSafeTruncation(Imm
.Val
, Size
);
1875 // We got fp literal token
1876 if (type
== MVT::f64
) { // Expected 64-bit fp operand
1877 // We would set low 64-bits of literal to zeroes but we accept this literals
1881 if (type
== MVT::i64
) { // Expected 64-bit int operand
1882 // We don't allow fp literals in 64-bit integer instructions. It is
1883 // unclear how we should encode them.
1887 // We allow fp literals with f16x2 operands assuming that the specified
1888 // literal goes into the lower half and the upper half is zero. We also
1889 // require that the literal may be losslesly converted to f16.
1890 MVT ExpectedType
= (type
== MVT::v2f16
)? MVT::f16
:
1891 (type
== MVT::v2i16
)? MVT::i16
:
1892 (type
== MVT::v2f32
)? MVT::f32
: type
;
1894 APFloat
FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm
.Val
));
1895 return canLosslesslyConvertToFPType(FPLiteral
, ExpectedType
);
1898 bool AMDGPUOperand::isRegClass(unsigned RCID
) const {
1899 return isRegKind() && AsmParser
->getMRI()->getRegClass(RCID
).contains(getReg());
1902 bool AMDGPUOperand::isVRegWithInputMods() const {
1903 return isRegClass(AMDGPU::VGPR_32RegClassID
) ||
1904 // GFX90A allows DPP on 64-bit operands.
1905 (isRegClass(AMDGPU::VReg_64RegClassID
) &&
1906 AsmParser
->getFeatureBits()[AMDGPU::Feature64BitDPP
]);
1909 bool AMDGPUOperand::isSDWAOperand(MVT type
) const {
1910 if (AsmParser
->isVI())
1912 else if (AsmParser
->isGFX9Plus())
1913 return isRegClass(AMDGPU::VS_32RegClassID
) || isInlinableImm(type
);
1918 bool AMDGPUOperand::isSDWAFP16Operand() const {
1919 return isSDWAOperand(MVT::f16
);
1922 bool AMDGPUOperand::isSDWAFP32Operand() const {
1923 return isSDWAOperand(MVT::f32
);
1926 bool AMDGPUOperand::isSDWAInt16Operand() const {
1927 return isSDWAOperand(MVT::i16
);
1930 bool AMDGPUOperand::isSDWAInt32Operand() const {
1931 return isSDWAOperand(MVT::i32
);
1934 bool AMDGPUOperand::isBoolReg() const {
1935 auto FB
= AsmParser
->getFeatureBits();
1936 return isReg() && ((FB
[AMDGPU::FeatureWavefrontSize64
] && isSCSrcB64()) ||
1937 (FB
[AMDGPU::FeatureWavefrontSize32
] && isSCSrcB32()));
1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val
, unsigned Size
) const
1942 assert(isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
1943 assert(Size
== 2 || Size
== 4 || Size
== 8);
1945 const uint64_t FpSignMask
= (1ULL << (Size
* 8 - 1));
1957 void AMDGPUOperand::addImmOperands(MCInst
&Inst
, unsigned N
, bool ApplyModifiers
) const {
1958 if (AMDGPU::isSISrcOperand(AsmParser
->getMII()->get(Inst
.getOpcode()),
1959 Inst
.getNumOperands())) {
1960 addLiteralImmOperand(Inst
, Imm
.Val
,
1962 isImmTy(ImmTyNone
) && Imm
.Mods
.hasFPModifiers());
1964 assert(!isImmTy(ImmTyNone
) || !hasModifiers());
1965 Inst
.addOperand(MCOperand::createImm(Imm
.Val
));
1970 void AMDGPUOperand::addLiteralImmOperand(MCInst
&Inst
, int64_t Val
, bool ApplyModifiers
) const {
1971 const auto& InstDesc
= AsmParser
->getMII()->get(Inst
.getOpcode());
1972 auto OpNum
= Inst
.getNumOperands();
1973 // Check that this operand accepts literals
1974 assert(AMDGPU::isSISrcOperand(InstDesc
, OpNum
));
1976 if (ApplyModifiers
) {
1977 assert(AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
));
1978 const unsigned Size
= Imm
.IsFPImm
? sizeof(double) : getOperandSize(InstDesc
, OpNum
);
1979 Val
= applyInputFPModifiers(Val
, Size
);
1982 APInt
Literal(64, Val
);
1983 uint8_t OpTy
= InstDesc
.OpInfo
[OpNum
].OperandType
;
1985 if (Imm
.IsFPImm
) { // We got fp literal token
1987 case AMDGPU::OPERAND_REG_IMM_INT64
:
1988 case AMDGPU::OPERAND_REG_IMM_FP64
:
1989 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
1990 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
1991 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
1992 if (AMDGPU::isInlinableLiteral64(Literal
.getZExtValue(),
1993 AsmParser
->hasInv2PiInlineImm())) {
1994 Inst
.addOperand(MCOperand::createImm(Literal
.getZExtValue()));
2000 if (AMDGPU::isSISrcFPOperand(InstDesc
, OpNum
)) { // Expected 64-bit fp operand
2001 // For fp operands we check if low 32 bits are zeros
2002 if (Literal
.getLoBits(32) != 0) {
2003 const_cast<AMDGPUAsmParser
*>(AsmParser
)->Warning(Inst
.getLoc(),
2004 "Can't encode literal as exact 64-bit floating-point operand. "
2005 "Low 32-bits will be set to zero");
2008 Inst
.addOperand(MCOperand::createImm(Literal
.lshr(32).getZExtValue()));
2009 setImmKindLiteral();
2013 // We don't allow fp literals in 64-bit integer instructions. It is
2014 // unclear how we should encode them. This case should be checked earlier
2015 // in predicate methods (isLiteralImm())
2016 llvm_unreachable("fp literal in 64-bit integer instruction.");
2018 case AMDGPU::OPERAND_REG_IMM_INT32
:
2019 case AMDGPU::OPERAND_REG_IMM_FP32
:
2020 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
2021 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
2022 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
2023 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
2024 case AMDGPU::OPERAND_REG_IMM_INT16
:
2025 case AMDGPU::OPERAND_REG_IMM_FP16
:
2026 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
2027 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
2028 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
2029 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
2030 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
2031 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
2033 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
:
2034 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
2035 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
2036 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
2037 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
2039 case AMDGPU::OPERAND_REG_IMM_V2INT32
: {
2041 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
2042 // Convert literal to single precision
2043 FPLiteral
.convert(*getOpFltSemantics(OpTy
),
2044 APFloat::rmNearestTiesToEven
, &lost
);
2045 // We allow precision lost but not overflow or underflow. This should be
2046 // checked earlier in isLiteralImm()
2048 uint64_t ImmVal
= FPLiteral
.bitcastToAPInt().getZExtValue();
2049 Inst
.addOperand(MCOperand::createImm(ImmVal
));
2050 setImmKindLiteral();
2054 llvm_unreachable("invalid operand size");
2060 // We got int literal token.
2061 // Only sign extend inline immediates.
2063 case AMDGPU::OPERAND_REG_IMM_INT32
:
2064 case AMDGPU::OPERAND_REG_IMM_FP32
:
2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
2069 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
2070 case AMDGPU::OPERAND_REG_IMM_V2FP16
:
2071 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
2072 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
2073 case AMDGPU::OPERAND_REG_IMM_V2INT32
:
2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
2075 if (isSafeTruncation(Val
, 32) &&
2076 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val
),
2077 AsmParser
->hasInv2PiInlineImm())) {
2078 Inst
.addOperand(MCOperand::createImm(Val
));
2083 Inst
.addOperand(MCOperand::createImm(Val
& 0xffffffff));
2084 setImmKindLiteral();
2087 case AMDGPU::OPERAND_REG_IMM_INT64
:
2088 case AMDGPU::OPERAND_REG_IMM_FP64
:
2089 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
2090 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
2091 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
2092 if (AMDGPU::isInlinableLiteral64(Val
, AsmParser
->hasInv2PiInlineImm())) {
2093 Inst
.addOperand(MCOperand::createImm(Val
));
2098 Inst
.addOperand(MCOperand::createImm(Lo_32(Val
)));
2099 setImmKindLiteral();
2102 case AMDGPU::OPERAND_REG_IMM_INT16
:
2103 case AMDGPU::OPERAND_REG_IMM_FP16
:
2104 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
2105 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
2106 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
2107 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
2108 if (isSafeTruncation(Val
, 16) &&
2109 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
2110 AsmParser
->hasInv2PiInlineImm())) {
2111 Inst
.addOperand(MCOperand::createImm(Val
));
2116 Inst
.addOperand(MCOperand::createImm(Val
& 0xffff));
2117 setImmKindLiteral();
2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
2123 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
: {
2124 assert(isSafeTruncation(Val
, 16));
2125 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val
),
2126 AsmParser
->hasInv2PiInlineImm()));
2128 Inst
.addOperand(MCOperand::createImm(Val
));
2132 llvm_unreachable("invalid operand size");
2136 template <unsigned Bitwidth
>
2137 void AMDGPUOperand::addKImmFPOperands(MCInst
&Inst
, unsigned N
) const {
2138 APInt
Literal(64, Imm
.Val
);
2142 // We got int literal token.
2143 Inst
.addOperand(MCOperand::createImm(Literal
.getLoBits(Bitwidth
).getZExtValue()));
2148 APFloat
FPLiteral(APFloat::IEEEdouble(), Literal
);
2149 FPLiteral
.convert(*getFltSemantics(Bitwidth
/ 8),
2150 APFloat::rmNearestTiesToEven
, &Lost
);
2151 Inst
.addOperand(MCOperand::createImm(FPLiteral
.bitcastToAPInt().getZExtValue()));
2154 void AMDGPUOperand::addRegOperands(MCInst
&Inst
, unsigned N
) const {
2155 Inst
.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser
->getSTI())));
2158 static bool isInlineValue(unsigned Reg
) {
2160 case AMDGPU::SRC_SHARED_BASE
:
2161 case AMDGPU::SRC_SHARED_LIMIT
:
2162 case AMDGPU::SRC_PRIVATE_BASE
:
2163 case AMDGPU::SRC_PRIVATE_LIMIT
:
2164 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
2166 case AMDGPU::SRC_VCCZ
:
2167 case AMDGPU::SRC_EXECZ
:
2168 case AMDGPU::SRC_SCC
:
2170 case AMDGPU::SGPR_NULL
:
2177 bool AMDGPUOperand::isInlineValue() const {
2178 return isRegKind() && ::isInlineValue(getReg());
2181 //===----------------------------------------------------------------------===//
2183 //===----------------------------------------------------------------------===//
2185 static int getRegClass(RegisterKind Is
, unsigned RegWidth
) {
2186 if (Is
== IS_VGPR
) {
2189 case 1: return AMDGPU::VGPR_32RegClassID
;
2190 case 2: return AMDGPU::VReg_64RegClassID
;
2191 case 3: return AMDGPU::VReg_96RegClassID
;
2192 case 4: return AMDGPU::VReg_128RegClassID
;
2193 case 5: return AMDGPU::VReg_160RegClassID
;
2194 case 6: return AMDGPU::VReg_192RegClassID
;
2195 case 7: return AMDGPU::VReg_224RegClassID
;
2196 case 8: return AMDGPU::VReg_256RegClassID
;
2197 case 16: return AMDGPU::VReg_512RegClassID
;
2198 case 32: return AMDGPU::VReg_1024RegClassID
;
2200 } else if (Is
== IS_TTMP
) {
2203 case 1: return AMDGPU::TTMP_32RegClassID
;
2204 case 2: return AMDGPU::TTMP_64RegClassID
;
2205 case 4: return AMDGPU::TTMP_128RegClassID
;
2206 case 8: return AMDGPU::TTMP_256RegClassID
;
2207 case 16: return AMDGPU::TTMP_512RegClassID
;
2209 } else if (Is
== IS_SGPR
) {
2212 case 1: return AMDGPU::SGPR_32RegClassID
;
2213 case 2: return AMDGPU::SGPR_64RegClassID
;
2214 case 3: return AMDGPU::SGPR_96RegClassID
;
2215 case 4: return AMDGPU::SGPR_128RegClassID
;
2216 case 5: return AMDGPU::SGPR_160RegClassID
;
2217 case 6: return AMDGPU::SGPR_192RegClassID
;
2218 case 7: return AMDGPU::SGPR_224RegClassID
;
2219 case 8: return AMDGPU::SGPR_256RegClassID
;
2220 case 16: return AMDGPU::SGPR_512RegClassID
;
2222 } else if (Is
== IS_AGPR
) {
2225 case 1: return AMDGPU::AGPR_32RegClassID
;
2226 case 2: return AMDGPU::AReg_64RegClassID
;
2227 case 3: return AMDGPU::AReg_96RegClassID
;
2228 case 4: return AMDGPU::AReg_128RegClassID
;
2229 case 5: return AMDGPU::AReg_160RegClassID
;
2230 case 6: return AMDGPU::AReg_192RegClassID
;
2231 case 7: return AMDGPU::AReg_224RegClassID
;
2232 case 8: return AMDGPU::AReg_256RegClassID
;
2233 case 16: return AMDGPU::AReg_512RegClassID
;
2234 case 32: return AMDGPU::AReg_1024RegClassID
;
2240 static unsigned getSpecialRegForName(StringRef RegName
) {
2241 return StringSwitch
<unsigned>(RegName
)
2242 .Case("exec", AMDGPU::EXEC
)
2243 .Case("vcc", AMDGPU::VCC
)
2244 .Case("flat_scratch", AMDGPU::FLAT_SCR
)
2245 .Case("xnack_mask", AMDGPU::XNACK_MASK
)
2246 .Case("shared_base", AMDGPU::SRC_SHARED_BASE
)
2247 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE
)
2248 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
2249 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT
)
2250 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE
)
2251 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE
)
2252 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
2253 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT
)
2254 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
2255 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID
)
2256 .Case("lds_direct", AMDGPU::LDS_DIRECT
)
2257 .Case("src_lds_direct", AMDGPU::LDS_DIRECT
)
2258 .Case("m0", AMDGPU::M0
)
2259 .Case("vccz", AMDGPU::SRC_VCCZ
)
2260 .Case("src_vccz", AMDGPU::SRC_VCCZ
)
2261 .Case("execz", AMDGPU::SRC_EXECZ
)
2262 .Case("src_execz", AMDGPU::SRC_EXECZ
)
2263 .Case("scc", AMDGPU::SRC_SCC
)
2264 .Case("src_scc", AMDGPU::SRC_SCC
)
2265 .Case("tba", AMDGPU::TBA
)
2266 .Case("tma", AMDGPU::TMA
)
2267 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO
)
2268 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI
)
2269 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO
)
2270 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI
)
2271 .Case("vcc_lo", AMDGPU::VCC_LO
)
2272 .Case("vcc_hi", AMDGPU::VCC_HI
)
2273 .Case("exec_lo", AMDGPU::EXEC_LO
)
2274 .Case("exec_hi", AMDGPU::EXEC_HI
)
2275 .Case("tma_lo", AMDGPU::TMA_LO
)
2276 .Case("tma_hi", AMDGPU::TMA_HI
)
2277 .Case("tba_lo", AMDGPU::TBA_LO
)
2278 .Case("tba_hi", AMDGPU::TBA_HI
)
2279 .Case("pc", AMDGPU::PC_REG
)
2280 .Case("null", AMDGPU::SGPR_NULL
)
2281 .Default(AMDGPU::NoRegister
);
2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
2285 SMLoc
&EndLoc
, bool RestoreOnFailure
) {
2286 auto R
= parseRegister();
2287 if (!R
) return true;
2289 RegNo
= R
->getReg();
2290 StartLoc
= R
->getStartLoc();
2291 EndLoc
= R
->getEndLoc();
2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo
, SMLoc
&StartLoc
,
2297 return ParseRegister(RegNo
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/false);
2300 OperandMatchResultTy
AMDGPUAsmParser::tryParseRegister(unsigned &RegNo
,
2304 ParseRegister(RegNo
, StartLoc
, EndLoc
, /*RestoreOnFailure=*/true);
2305 bool PendingErrors
= getParser().hasPendingError();
2306 getParser().clearPendingErrors();
2308 return MatchOperand_ParseFail
;
2310 return MatchOperand_NoMatch
;
2311 return MatchOperand_Success
;
2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg
, unsigned &RegWidth
,
2315 RegisterKind RegKind
, unsigned Reg1
,
2319 if (Reg
== AMDGPU::EXEC_LO
&& Reg1
== AMDGPU::EXEC_HI
) {
2324 if (Reg
== AMDGPU::FLAT_SCR_LO
&& Reg1
== AMDGPU::FLAT_SCR_HI
) {
2325 Reg
= AMDGPU::FLAT_SCR
;
2329 if (Reg
== AMDGPU::XNACK_MASK_LO
&& Reg1
== AMDGPU::XNACK_MASK_HI
) {
2330 Reg
= AMDGPU::XNACK_MASK
;
2334 if (Reg
== AMDGPU::VCC_LO
&& Reg1
== AMDGPU::VCC_HI
) {
2339 if (Reg
== AMDGPU::TBA_LO
&& Reg1
== AMDGPU::TBA_HI
) {
2344 if (Reg
== AMDGPU::TMA_LO
&& Reg1
== AMDGPU::TMA_HI
) {
2349 Error(Loc
, "register does not fit in the list");
2355 if (Reg1
!= Reg
+ RegWidth
) {
2356 Error(Loc
, "registers in a list must have consecutive indices");
2362 llvm_unreachable("unexpected register kind");
2371 static constexpr RegInfo RegularRegisters
[] = {
2374 {{"ttmp"}, IS_TTMP
},
2379 static bool isRegularReg(RegisterKind Kind
) {
2380 return Kind
== IS_VGPR
||
2386 static const RegInfo
* getRegularRegInfo(StringRef Str
) {
2387 for (const RegInfo
&Reg
: RegularRegisters
)
2388 if (Str
.startswith(Reg
.Name
))
2393 static bool getRegNum(StringRef Str
, unsigned& Num
) {
2394 return !Str
.getAsInteger(10, Num
);
2398 AMDGPUAsmParser::isRegister(const AsmToken
&Token
,
2399 const AsmToken
&NextToken
) const {
2401 // A list of consecutive registers: [s0,s1,s2,s3]
2402 if (Token
.is(AsmToken::LBrac
))
2405 if (!Token
.is(AsmToken::Identifier
))
2408 // A single register like s0 or a range of registers like s[0:1]
2410 StringRef Str
= Token
.getString();
2411 const RegInfo
*Reg
= getRegularRegInfo(Str
);
2413 StringRef RegName
= Reg
->Name
;
2414 StringRef RegSuffix
= Str
.substr(RegName
.size());
2415 if (!RegSuffix
.empty()) {
2417 // A single register with an index: rXX
2418 if (getRegNum(RegSuffix
, Num
))
2421 // A range of registers: r[XX:YY].
2422 if (NextToken
.is(AsmToken::LBrac
))
2427 return getSpecialRegForName(Str
) != AMDGPU::NoRegister
;
2431 AMDGPUAsmParser::isRegister()
2433 return isRegister(getToken(), peekToken());
2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind
,
2442 assert(isRegularReg(RegKind
));
2444 unsigned AlignSize
= 1;
2445 if (RegKind
== IS_SGPR
|| RegKind
== IS_TTMP
) {
2446 // SGPR and TTMP registers must be aligned.
2447 // Max required alignment is 4 dwords.
2448 AlignSize
= std::min(RegWidth
, 4u);
2451 if (RegNum
% AlignSize
!= 0) {
2452 Error(Loc
, "invalid register alignment");
2453 return AMDGPU::NoRegister
;
2456 unsigned RegIdx
= RegNum
/ AlignSize
;
2457 int RCID
= getRegClass(RegKind
, RegWidth
);
2459 Error(Loc
, "invalid or unsupported register size");
2460 return AMDGPU::NoRegister
;
2463 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2464 const MCRegisterClass RC
= TRI
->getRegClass(RCID
);
2465 if (RegIdx
>= RC
.getNumRegs()) {
2466 Error(Loc
, "register index is out of range");
2467 return AMDGPU::NoRegister
;
2470 return RC
.getRegister(RegIdx
);
2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num
, unsigned& Width
) {
2475 int64_t RegLo
, RegHi
;
2476 if (!skipToken(AsmToken::LBrac
, "missing register index"))
2479 SMLoc FirstIdxLoc
= getLoc();
2482 if (!parseExpr(RegLo
))
2485 if (trySkipToken(AsmToken::Colon
)) {
2486 SecondIdxLoc
= getLoc();
2487 if (!parseExpr(RegHi
))
2493 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
2496 if (!isUInt
<32>(RegLo
)) {
2497 Error(FirstIdxLoc
, "invalid register index");
2501 if (!isUInt
<32>(RegHi
)) {
2502 Error(SecondIdxLoc
, "invalid register index");
2506 if (RegLo
> RegHi
) {
2507 Error(FirstIdxLoc
, "first register index should not exceed second index");
2511 Num
= static_cast<unsigned>(RegLo
);
2512 Width
= (RegHi
- RegLo
) + 1;
2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind
&RegKind
,
2517 unsigned &RegNum
, unsigned &RegWidth
,
2518 SmallVectorImpl
<AsmToken
> &Tokens
) {
2519 assert(isToken(AsmToken::Identifier
));
2520 unsigned Reg
= getSpecialRegForName(getTokenStr());
2524 RegKind
= IS_SPECIAL
;
2525 Tokens
.push_back(getToken());
2526 lex(); // skip register name
2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind
&RegKind
,
2532 unsigned &RegNum
, unsigned &RegWidth
,
2533 SmallVectorImpl
<AsmToken
> &Tokens
) {
2534 assert(isToken(AsmToken::Identifier
));
2535 StringRef RegName
= getTokenStr();
2536 auto Loc
= getLoc();
2538 const RegInfo
*RI
= getRegularRegInfo(RegName
);
2540 Error(Loc
, "invalid register name");
2541 return AMDGPU::NoRegister
;
2544 Tokens
.push_back(getToken());
2545 lex(); // skip register name
2548 StringRef RegSuffix
= RegName
.substr(RI
->Name
.size());
2549 if (!RegSuffix
.empty()) {
2550 // Single 32-bit register: vXX.
2551 if (!getRegNum(RegSuffix
, RegNum
)) {
2552 Error(Loc
, "invalid register index");
2553 return AMDGPU::NoRegister
;
2557 // Range of registers: v[XX:YY]. ":YY" is optional.
2558 if (!ParseRegRange(RegNum
, RegWidth
))
2559 return AMDGPU::NoRegister
;
2562 return getRegularReg(RegKind
, RegNum
, RegWidth
, Loc
);
2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind
&RegKind
, unsigned &RegNum
,
2567 SmallVectorImpl
<AsmToken
> &Tokens
) {
2568 unsigned Reg
= AMDGPU::NoRegister
;
2569 auto ListLoc
= getLoc();
2571 if (!skipToken(AsmToken::LBrac
,
2572 "expected a register or a list of registers")) {
2573 return AMDGPU::NoRegister
;
2576 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2578 auto Loc
= getLoc();
2579 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
))
2580 return AMDGPU::NoRegister
;
2581 if (RegWidth
!= 1) {
2582 Error(Loc
, "expected a single 32-bit register");
2583 return AMDGPU::NoRegister
;
2586 for (; trySkipToken(AsmToken::Comma
); ) {
2587 RegisterKind NextRegKind
;
2588 unsigned NextReg
, NextRegNum
, NextRegWidth
;
2591 if (!ParseAMDGPURegister(NextRegKind
, NextReg
,
2592 NextRegNum
, NextRegWidth
,
2594 return AMDGPU::NoRegister
;
2596 if (NextRegWidth
!= 1) {
2597 Error(Loc
, "expected a single 32-bit register");
2598 return AMDGPU::NoRegister
;
2600 if (NextRegKind
!= RegKind
) {
2601 Error(Loc
, "registers in a list must be of the same kind");
2602 return AMDGPU::NoRegister
;
2604 if (!AddNextRegisterToList(Reg
, RegWidth
, RegKind
, NextReg
, Loc
))
2605 return AMDGPU::NoRegister
;
2608 if (!skipToken(AsmToken::RBrac
,
2609 "expected a comma or a closing square bracket")) {
2610 return AMDGPU::NoRegister
;
2613 if (isRegularReg(RegKind
))
2614 Reg
= getRegularReg(RegKind
, RegNum
, RegWidth
, ListLoc
);
2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
2620 unsigned &RegNum
, unsigned &RegWidth
,
2621 SmallVectorImpl
<AsmToken
> &Tokens
) {
2622 auto Loc
= getLoc();
2623 Reg
= AMDGPU::NoRegister
;
2625 if (isToken(AsmToken::Identifier
)) {
2626 Reg
= ParseSpecialReg(RegKind
, RegNum
, RegWidth
, Tokens
);
2627 if (Reg
== AMDGPU::NoRegister
)
2628 Reg
= ParseRegularReg(RegKind
, RegNum
, RegWidth
, Tokens
);
2630 Reg
= ParseRegList(RegKind
, RegNum
, RegWidth
, Tokens
);
2633 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
2634 if (Reg
== AMDGPU::NoRegister
) {
2635 assert(Parser
.hasPendingError());
2639 if (!subtargetHasRegister(*TRI
, Reg
)) {
2640 if (Reg
== AMDGPU::SGPR_NULL
) {
2641 Error(Loc
, "'null' operand is not supported on this GPU");
2643 Error(Loc
, "register not available on this GPU");
2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind
&RegKind
, unsigned &Reg
,
2652 unsigned &RegNum
, unsigned &RegWidth
,
2653 bool RestoreOnFailure
/*=false*/) {
2654 Reg
= AMDGPU::NoRegister
;
2656 SmallVector
<AsmToken
, 1> Tokens
;
2657 if (ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
, Tokens
)) {
2658 if (RestoreOnFailure
) {
2659 while (!Tokens
.empty()) {
2660 getLexer().UnLex(Tokens
.pop_back_val());
2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind
) {
2672 return StringRef(".amdgcn.next_free_vgpr");
2674 return StringRef(".amdgcn.next_free_sgpr");
2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind
) {
2681 auto SymbolName
= getGprCountSymbolName(RegKind
);
2682 assert(SymbolName
&& "initializing invalid register kind");
2683 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2684 Sym
->setVariableValue(MCConstantExpr::create(0, getContext()));
2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind
,
2688 unsigned DwordRegIndex
,
2689 unsigned RegWidth
) {
2690 // Symbols are only defined for GCN targets
2691 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major
< 6)
2694 auto SymbolName
= getGprCountSymbolName(RegKind
);
2697 MCSymbol
*Sym
= getContext().getOrCreateSymbol(*SymbolName
);
2699 int64_t NewMax
= DwordRegIndex
+ RegWidth
- 1;
2702 if (!Sym
->isVariable())
2703 return !Error(getLoc(),
2704 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2705 if (!Sym
->getVariableValue(false)->evaluateAsAbsolute(OldCount
))
2708 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2710 if (OldCount
<= NewMax
)
2711 Sym
->setVariableValue(MCConstantExpr::create(NewMax
+ 1, getContext()));
2716 std::unique_ptr
<AMDGPUOperand
>
2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure
) {
2718 const auto &Tok
= getToken();
2719 SMLoc StartLoc
= Tok
.getLoc();
2720 SMLoc EndLoc
= Tok
.getEndLoc();
2721 RegisterKind RegKind
;
2722 unsigned Reg
, RegNum
, RegWidth
;
2724 if (!ParseAMDGPURegister(RegKind
, Reg
, RegNum
, RegWidth
)) {
2727 if (isHsaAbiVersion3Or4(&getSTI())) {
2728 if (!updateGprCountSymbols(RegKind
, RegNum
, RegWidth
))
2731 KernelScope
.usesRegister(RegKind
, RegNum
, RegWidth
);
2732 return AMDGPUOperand::CreateReg(this, Reg
, StartLoc
, EndLoc
);
2735 OperandMatchResultTy
2736 AMDGPUAsmParser::parseImm(OperandVector
&Operands
, bool HasSP3AbsModifier
) {
2737 // TODO: add syntactic sugar for 1/(2*PI)
2739 assert(!isRegister());
2740 assert(!isModifier());
2742 const auto& Tok
= getToken();
2743 const auto& NextTok
= peekToken();
2744 bool IsReal
= Tok
.is(AsmToken::Real
);
2746 bool Negate
= false;
2748 if (!IsReal
&& Tok
.is(AsmToken::Minus
) && NextTok
.is(AsmToken::Real
)) {
2755 // Floating-point expressions are not supported.
2756 // Can only allow floating-point literals with an
2759 StringRef Num
= getTokenStr();
2762 APFloat
RealVal(APFloat::IEEEdouble());
2763 auto roundMode
= APFloat::rmNearestTiesToEven
;
2764 if (errorToBool(RealVal
.convertFromString(Num
, roundMode
).takeError())) {
2765 return MatchOperand_ParseFail
;
2768 RealVal
.changeSign();
2771 AMDGPUOperand::CreateImm(this, RealVal
.bitcastToAPInt().getZExtValue(), S
,
2772 AMDGPUOperand::ImmTyNone
, true));
2774 return MatchOperand_Success
;
2781 if (HasSP3AbsModifier
) {
2782 // This is a workaround for handling expressions
2783 // as arguments of SP3 'abs' modifier, for example:
2787 // This syntax is not compatible with syntax of standard
2788 // MC expressions (due to the trailing '|').
2790 if (getParser().parsePrimaryExpr(Expr
, EndLoc
, nullptr))
2791 return MatchOperand_ParseFail
;
2793 if (Parser
.parseExpression(Expr
))
2794 return MatchOperand_ParseFail
;
2797 if (Expr
->evaluateAsAbsolute(IntVal
)) {
2798 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
2800 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
2803 return MatchOperand_Success
;
2806 return MatchOperand_NoMatch
;
2809 OperandMatchResultTy
2810 AMDGPUAsmParser::parseReg(OperandVector
&Operands
) {
2812 return MatchOperand_NoMatch
;
2814 if (auto R
= parseRegister()) {
2816 Operands
.push_back(std::move(R
));
2817 return MatchOperand_Success
;
2819 return MatchOperand_ParseFail
;
2822 OperandMatchResultTy
2823 AMDGPUAsmParser::parseRegOrImm(OperandVector
&Operands
, bool HasSP3AbsMod
) {
2824 auto res
= parseReg(Operands
);
2825 if (res
!= MatchOperand_NoMatch
) {
2827 } else if (isModifier()) {
2828 return MatchOperand_NoMatch
;
2830 return parseImm(Operands
, HasSP3AbsMod
);
2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2836 if (Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::LParen
)) {
2837 const auto &str
= Token
.getString();
2838 return str
== "abs" || str
== "neg" || str
== "sext";
2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2845 return Token
.is(AsmToken::Identifier
) && NextToken
.is(AsmToken::Colon
);
2849 AMDGPUAsmParser::isOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2850 return isNamedOperandModifier(Token
, NextToken
) || Token
.is(AsmToken::Pipe
);
2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken
&Token
, const AsmToken
&NextToken
) const {
2855 return isRegister(Token
, NextToken
) || isOperandModifier(Token
, NextToken
);
2858 // Check if this is an operand modifier or an opcode modifier
2859 // which may look like an expression but it is not. We should
2860 // avoid parsing these modifiers as expressions. Currently
2861 // recognized sequences are:
2870 // Note that simple opcode modifiers like 'gds' may be parsed as
2871 // expressions; this is a special case. See getExpressionAsToken.
2874 AMDGPUAsmParser::isModifier() {
2876 AsmToken Tok
= getToken();
2877 AsmToken NextToken
[2];
2878 peekTokens(NextToken
);
2880 return isOperandModifier(Tok
, NextToken
[0]) ||
2881 (Tok
.is(AsmToken::Minus
) && isRegOrOperandModifier(NextToken
[0], NextToken
[1])) ||
2882 isOpcodeModifierWithVal(Tok
, NextToken
[0]);
2885 // Check if the current token is an SP3 'neg' modifier.
2886 // Currently this modifier is allowed in the following context:
2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2889 // 2. Before an 'abs' modifier: -abs(...)
2890 // 3. Before an SP3 'abs' modifier: -|...|
2892 // In all other cases "-" is handled as a part
2893 // of an expression that follows the sign.
2895 // Note: When "-" is followed by an integer literal,
2896 // this is interpreted as integer negation rather
2897 // than a floating-point NEG modifier applied to N.
2898 // Beside being contr-intuitive, such use of floating-point
2899 // NEG modifier would have resulted in different meaning
2900 // of integer literals used with VOP1/2/C and VOP3,
2902 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2903 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2904 // Negative fp literals with preceding "-" are
2905 // handled likewise for unifomtity
2908 AMDGPUAsmParser::parseSP3NegModifier() {
2910 AsmToken NextToken
[2];
2911 peekTokens(NextToken
);
2913 if (isToken(AsmToken::Minus
) &&
2914 (isRegister(NextToken
[0], NextToken
[1]) ||
2915 NextToken
[0].is(AsmToken::Pipe
) ||
2916 isId(NextToken
[0], "abs"))) {
2924 OperandMatchResultTy
2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector
&Operands
,
2931 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2932 if (isToken(AsmToken::Minus
) && peekToken().is(AsmToken::Minus
)) {
2933 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2934 return MatchOperand_ParseFail
;
2937 SP3Neg
= parseSP3NegModifier();
2940 Neg
= trySkipId("neg");
2941 if (Neg
&& SP3Neg
) {
2942 Error(Loc
, "expected register or immediate");
2943 return MatchOperand_ParseFail
;
2945 if (Neg
&& !skipToken(AsmToken::LParen
, "expected left paren after neg"))
2946 return MatchOperand_ParseFail
;
2948 Abs
= trySkipId("abs");
2949 if (Abs
&& !skipToken(AsmToken::LParen
, "expected left paren after abs"))
2950 return MatchOperand_ParseFail
;
2953 SP3Abs
= trySkipToken(AsmToken::Pipe
);
2954 if (Abs
&& SP3Abs
) {
2955 Error(Loc
, "expected register or immediate");
2956 return MatchOperand_ParseFail
;
2959 OperandMatchResultTy Res
;
2961 Res
= parseRegOrImm(Operands
, SP3Abs
);
2963 Res
= parseReg(Operands
);
2965 if (Res
!= MatchOperand_Success
) {
2966 return (SP3Neg
|| Neg
|| SP3Abs
|| Abs
)? MatchOperand_ParseFail
: Res
;
2969 if (SP3Abs
&& !skipToken(AsmToken::Pipe
, "expected vertical bar"))
2970 return MatchOperand_ParseFail
;
2971 if (Abs
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2972 return MatchOperand_ParseFail
;
2973 if (Neg
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
2974 return MatchOperand_ParseFail
;
2976 AMDGPUOperand::Modifiers Mods
;
2977 Mods
.Abs
= Abs
|| SP3Abs
;
2978 Mods
.Neg
= Neg
|| SP3Neg
;
2980 if (Mods
.hasFPModifiers()) {
2981 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
2983 Error(Op
.getStartLoc(), "expected an absolute expression");
2984 return MatchOperand_ParseFail
;
2986 Op
.setModifiers(Mods
);
2988 return MatchOperand_Success
;
2991 OperandMatchResultTy
2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector
&Operands
,
2994 bool Sext
= trySkipId("sext");
2995 if (Sext
&& !skipToken(AsmToken::LParen
, "expected left paren after sext"))
2996 return MatchOperand_ParseFail
;
2998 OperandMatchResultTy Res
;
3000 Res
= parseRegOrImm(Operands
);
3002 Res
= parseReg(Operands
);
3004 if (Res
!= MatchOperand_Success
) {
3005 return Sext
? MatchOperand_ParseFail
: Res
;
3008 if (Sext
&& !skipToken(AsmToken::RParen
, "expected closing parentheses"))
3009 return MatchOperand_ParseFail
;
3011 AMDGPUOperand::Modifiers Mods
;
3014 if (Mods
.hasIntModifiers()) {
3015 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
.back());
3017 Error(Op
.getStartLoc(), "expected an absolute expression");
3018 return MatchOperand_ParseFail
;
3020 Op
.setModifiers(Mods
);
3023 return MatchOperand_Success
;
3026 OperandMatchResultTy
3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector
&Operands
) {
3028 return parseRegOrImmWithFPInputMods(Operands
, false);
3031 OperandMatchResultTy
3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector
&Operands
) {
3033 return parseRegOrImmWithIntInputMods(Operands
, false);
3036 OperandMatchResultTy
AMDGPUAsmParser::parseVReg32OrOff(OperandVector
&Operands
) {
3037 auto Loc
= getLoc();
3038 if (trySkipId("off")) {
3039 Operands
.push_back(AMDGPUOperand::CreateImm(this, 0, Loc
,
3040 AMDGPUOperand::ImmTyOff
, false));
3041 return MatchOperand_Success
;
3045 return MatchOperand_NoMatch
;
3047 std::unique_ptr
<AMDGPUOperand
> Reg
= parseRegister();
3049 Operands
.push_back(std::move(Reg
));
3050 return MatchOperand_Success
;
3053 return MatchOperand_ParseFail
;
3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst
&Inst
) {
3058 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
3060 if ((getForcedEncodingSize() == 32 && (TSFlags
& SIInstrFlags::VOP3
)) ||
3061 (getForcedEncodingSize() == 64 && !(TSFlags
& SIInstrFlags::VOP3
)) ||
3062 (isForcedDPP() && !(TSFlags
& SIInstrFlags::DPP
)) ||
3063 (isForcedSDWA() && !(TSFlags
& SIInstrFlags::SDWA
)) )
3064 return Match_InvalidOperand
;
3066 if ((TSFlags
& SIInstrFlags::VOP3
) &&
3067 (TSFlags
& SIInstrFlags::VOPAsmPrefer32Bit
) &&
3068 getForcedEncodingSize() != 64)
3069 return Match_PreferE32
;
3071 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
3072 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
3073 // v_mac_f32/16 allow only dst_sel == DWORD;
3075 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::dst_sel
);
3076 const auto &Op
= Inst
.getOperand(OpNum
);
3077 if (!Op
.isImm() || Op
.getImm() != AMDGPU::SDWA::SdwaSel::DWORD
) {
3078 return Match_InvalidOperand
;
3082 return Match_Success
;
3085 static ArrayRef
<unsigned> getAllVariants() {
3086 static const unsigned Variants
[] = {
3087 AMDGPUAsmVariants::DEFAULT
, AMDGPUAsmVariants::VOP3
,
3088 AMDGPUAsmVariants::SDWA
, AMDGPUAsmVariants::SDWA9
, AMDGPUAsmVariants::DPP
3091 return makeArrayRef(Variants
);
3094 // What asm variants we should check
3095 ArrayRef
<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3096 if (getForcedEncodingSize() == 32) {
3097 static const unsigned Variants
[] = {AMDGPUAsmVariants::DEFAULT
};
3098 return makeArrayRef(Variants
);
3101 if (isForcedVOP3()) {
3102 static const unsigned Variants
[] = {AMDGPUAsmVariants::VOP3
};
3103 return makeArrayRef(Variants
);
3106 if (isForcedSDWA()) {
3107 static const unsigned Variants
[] = {AMDGPUAsmVariants::SDWA
,
3108 AMDGPUAsmVariants::SDWA9
};
3109 return makeArrayRef(Variants
);
3112 if (isForcedDPP()) {
3113 static const unsigned Variants
[] = {AMDGPUAsmVariants::DPP
};
3114 return makeArrayRef(Variants
);
3117 return getAllVariants();
3120 StringRef
AMDGPUAsmParser::getMatchedVariantName() const {
3121 if (getForcedEncodingSize() == 32)
3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst
&Inst
) const {
3137 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
3138 const unsigned Num
= Desc
.getNumImplicitUses();
3139 for (unsigned i
= 0; i
< Num
; ++i
) {
3140 unsigned Reg
= Desc
.ImplicitUses
[i
];
3142 case AMDGPU::FLAT_SCR
:
3144 case AMDGPU::VCC_LO
:
3145 case AMDGPU::VCC_HI
:
3152 return AMDGPU::NoRegister
;
3155 // NB: This code is correct only when used to check constant
3156 // bus limitations because GFX7 support no f16 inline constants.
3157 // Note that there are no cases when a GFX7 opcode violates
3158 // constant bus limitations due to the use of an f16 constant.
3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst
&Inst
,
3160 unsigned OpIdx
) const {
3161 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
3163 if (!AMDGPU::isSISrcOperand(Desc
, OpIdx
)) {
3167 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3169 int64_t Val
= MO
.getImm();
3170 auto OpSize
= AMDGPU::getOperandSize(Desc
, OpIdx
);
3172 switch (OpSize
) { // expected operand size
3174 return AMDGPU::isInlinableLiteral64(Val
, hasInv2PiInlineImm());
3176 return AMDGPU::isInlinableLiteral32(Val
, hasInv2PiInlineImm());
3178 const unsigned OperandType
= Desc
.OpInfo
[OpIdx
].OperandType
;
3179 if (OperandType
== AMDGPU::OPERAND_REG_IMM_INT16
||
3180 OperandType
== AMDGPU::OPERAND_REG_INLINE_C_INT16
||
3181 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_INT16
)
3182 return AMDGPU::isInlinableIntLiteral(Val
);
3184 if (OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2INT16
||
3185 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
||
3186 OperandType
== AMDGPU::OPERAND_REG_IMM_V2INT16
)
3187 return AMDGPU::isInlinableIntLiteralV216(Val
);
3189 if (OperandType
== AMDGPU::OPERAND_REG_INLINE_C_V2FP16
||
3190 OperandType
== AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
||
3191 OperandType
== AMDGPU::OPERAND_REG_IMM_V2FP16
)
3192 return AMDGPU::isInlinableLiteralV216(Val
, hasInv2PiInlineImm());
3194 return AMDGPU::isInlinableLiteral16(Val
, hasInv2PiInlineImm());
3197 llvm_unreachable("invalid operand size");
3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode
) const {
3206 // 64-bit shift instructions can use only one scalar value input
3207 case AMDGPU::V_LSHLREV_B64_e64
:
3208 case AMDGPU::V_LSHLREV_B64_gfx10
:
3209 case AMDGPU::V_LSHRREV_B64_e64
:
3210 case AMDGPU::V_LSHRREV_B64_gfx10
:
3211 case AMDGPU::V_ASHRREV_I64_e64
:
3212 case AMDGPU::V_ASHRREV_I64_gfx10
:
3213 case AMDGPU::V_LSHL_B64_e64
:
3214 case AMDGPU::V_LSHR_B64_e64
:
3215 case AMDGPU::V_ASHR_I64_e64
:
3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst
&Inst
, unsigned OpIdx
) {
3223 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3225 return !isInlineConstant(Inst
, OpIdx
);
3226 } else if (MO
.isReg()) {
3227 auto Reg
= MO
.getReg();
3228 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3229 auto PReg
= mc2PseudoReg(Reg
);
3230 return isSGPR(PReg
, TRI
) && PReg
!= SGPR_NULL
;
3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst
&Inst
,
3238 const OperandVector
&Operands
) {
3239 const unsigned Opcode
= Inst
.getOpcode();
3240 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3241 unsigned LastSGPR
= AMDGPU::NoRegister
;
3242 unsigned ConstantBusUseCount
= 0;
3243 unsigned NumLiterals
= 0;
3244 unsigned LiteralSize
;
3247 (SIInstrFlags::VOPC
|
3248 SIInstrFlags::VOP1
| SIInstrFlags::VOP2
|
3249 SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
|
3250 SIInstrFlags::SDWA
)) {
3251 // Check special imm operands (used by madmk, etc)
3252 if (AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::imm
) != -1) {
3253 ++ConstantBusUseCount
;
3256 SmallDenseSet
<unsigned> SGPRsUsed
;
3257 unsigned SGPRUsed
= findImplicitSGPRReadInVOP(Inst
);
3258 if (SGPRUsed
!= AMDGPU::NoRegister
) {
3259 SGPRsUsed
.insert(SGPRUsed
);
3260 ++ConstantBusUseCount
;
3263 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3264 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3265 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3267 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
3269 for (int OpIdx
: OpIndices
) {
3270 if (OpIdx
== -1) break;
3272 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3273 if (usesConstantBus(Inst
, OpIdx
)) {
3275 LastSGPR
= mc2PseudoReg(MO
.getReg());
3276 // Pairs of registers with a partial intersections like these
3278 // flat_scratch_lo, flat_scratch
3279 // flat_scratch_lo, flat_scratch_hi
3280 // are theoretically valid but they are disabled anyway.
3281 // Note that this code mimics SIInstrInfo::verifyInstruction
3282 if (!SGPRsUsed
.count(LastSGPR
)) {
3283 SGPRsUsed
.insert(LastSGPR
);
3284 ++ConstantBusUseCount
;
3286 } else { // Expression or a literal
3288 if (Desc
.OpInfo
[OpIdx
].OperandType
== MCOI::OPERAND_IMMEDIATE
)
3289 continue; // special operand like VINTERP attr_chan
3291 // An instruction may use only one literal.
3292 // This has been validated on the previous step.
3293 // See validateVOP3Literal.
3294 // This literal may be used as more than one operand.
3295 // If all these operands are of the same size,
3296 // this literal counts as one scalar value.
3297 // Otherwise it counts as 2 scalar values.
3298 // See "GFX10 Shader Programming", section 3.6.2.3.
3300 unsigned Size
= AMDGPU::getOperandSize(Desc
, OpIdx
);
3301 if (Size
< 4) Size
= 4;
3303 if (NumLiterals
== 0) {
3306 } else if (LiteralSize
!= Size
) {
3313 ConstantBusUseCount
+= NumLiterals
;
3315 if (ConstantBusUseCount
<= getConstantBusLimit(Opcode
))
3318 SMLoc LitLoc
= getLitLoc(Operands
);
3319 SMLoc RegLoc
= getRegLoc(LastSGPR
, Operands
);
3320 SMLoc Loc
= (LitLoc
.getPointer() < RegLoc
.getPointer()) ? RegLoc
: LitLoc
;
3321 Error(Loc
, "invalid operand (violates constant bus restrictions)");
3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst
&Inst
,
3327 const OperandVector
&Operands
) {
3328 const unsigned Opcode
= Inst
.getOpcode();
3329 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3331 const int DstIdx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::vdst
);
3333 Desc
.getOperandConstraint(DstIdx
, MCOI::EARLY_CLOBBER
) == -1) {
3337 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3339 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3340 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3341 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3343 assert(DstIdx
!= -1);
3344 const MCOperand
&Dst
= Inst
.getOperand(DstIdx
);
3345 assert(Dst
.isReg());
3346 const unsigned DstReg
= mc2PseudoReg(Dst
.getReg());
3348 const int SrcIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
3350 for (int SrcIdx
: SrcIndices
) {
3351 if (SrcIdx
== -1) break;
3352 const MCOperand
&Src
= Inst
.getOperand(SrcIdx
);
3354 const unsigned SrcReg
= mc2PseudoReg(Src
.getReg());
3355 if (isRegIntersect(DstReg
, SrcReg
, TRI
)) {
3356 Error(getRegLoc(SrcReg
, Operands
),
3357 "destination must be different than all sources");
3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst
&Inst
) {
3368 const unsigned Opc
= Inst
.getOpcode();
3369 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3371 if ((Desc
.TSFlags
& SIInstrFlags::IntClamp
) != 0 && !hasIntClamp()) {
3372 int ClampIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
);
3373 assert(ClampIdx
!= -1);
3374 return Inst
.getOperand(ClampIdx
).getImm() == 0;
3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst
&Inst
) {
3382 const unsigned Opc
= Inst
.getOpcode();
3383 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3385 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3388 int VDataIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdata
);
3389 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3390 int TFEIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::tfe
);
3392 assert(VDataIdx
!= -1);
3394 if (DMaskIdx
== -1 || TFEIdx
== -1) // intersect_ray
3397 unsigned VDataSize
= AMDGPU::getRegOperandSize(getMRI(), Desc
, VDataIdx
);
3398 unsigned TFESize
= (TFEIdx
!= -1 && Inst
.getOperand(TFEIdx
).getImm()) ? 1 : 0;
3399 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3404 (Desc
.TSFlags
& SIInstrFlags::Gather4
) ? 4 : countPopulation(DMask
);
3405 if (hasPackedD16()) {
3406 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
3407 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm())
3408 DataSize
= (DataSize
+ 1) / 2;
3411 return (VDataSize
/ 4) == DataSize
+ TFESize
;
3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst
&Inst
) {
3415 const unsigned Opc
= Inst
.getOpcode();
3416 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3418 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0 || !isGFX10Plus())
3421 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(Opc
);
3423 const AMDGPU::MIMGBaseOpcodeInfo
*BaseOpcode
=
3424 AMDGPU::getMIMGBaseOpcodeInfo(Info
->BaseOpcode
);
3425 int VAddr0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vaddr0
);
3426 int SrsrcIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::srsrc
);
3427 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3428 int A16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::a16
);
3430 assert(VAddr0Idx
!= -1);
3431 assert(SrsrcIdx
!= -1);
3432 assert(SrsrcIdx
> VAddr0Idx
);
3435 return true; // intersect_ray
3437 unsigned Dim
= Inst
.getOperand(DimIdx
).getImm();
3438 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByEncoding(Dim
);
3439 bool IsNSA
= SrsrcIdx
- VAddr0Idx
> 1;
3440 unsigned ActualAddrSize
=
3441 IsNSA
? SrsrcIdx
- VAddr0Idx
3442 : AMDGPU::getRegOperandSize(getMRI(), Desc
, VAddr0Idx
) / 4;
3443 bool IsA16
= (A16Idx
!= -1 && Inst
.getOperand(A16Idx
).getImm());
3445 unsigned ExpectedAddrSize
=
3446 AMDGPU::getAddrSizeMIMGOp(BaseOpcode
, DimInfo
, IsA16
, hasG16());
3449 if (ExpectedAddrSize
> 8)
3450 ExpectedAddrSize
= 16;
3452 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3453 // This provides backward compatibility for assembly created
3454 // before 160b/192b/224b types were directly supported.
3455 if (ActualAddrSize
== 8 && (ExpectedAddrSize
>= 5 && ExpectedAddrSize
<= 7))
3459 return ActualAddrSize
== ExpectedAddrSize
;
3462 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst
&Inst
) {
3464 const unsigned Opc
= Inst
.getOpcode();
3465 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3467 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3469 if (!Desc
.mayLoad() || !Desc
.mayStore())
3470 return true; // Not atomic
3472 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3473 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3475 // This is an incomplete check because image_atomic_cmpswap
3476 // may only use 0x3 and 0xf while other atomic operations
3477 // may use 0x1 and 0x3. However these limitations are
3478 // verified when we check that dmask matches dst size.
3479 return DMask
== 0x1 || DMask
== 0x3 || DMask
== 0xf;
3482 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst
&Inst
) {
3484 const unsigned Opc
= Inst
.getOpcode();
3485 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3487 if ((Desc
.TSFlags
& SIInstrFlags::Gather4
) == 0)
3490 int DMaskIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dmask
);
3491 unsigned DMask
= Inst
.getOperand(DMaskIdx
).getImm() & 0xf;
3493 // GATHER4 instructions use dmask in a different fashion compared to
3494 // other MIMG instructions. The only useful DMASK values are
3495 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3496 // (red,red,red,red) etc.) The ISA document doesn't mention
3498 return DMask
== 0x1 || DMask
== 0x2 || DMask
== 0x4 || DMask
== 0x8;
3501 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst
&Inst
) {
3502 const unsigned Opc
= Inst
.getOpcode();
3503 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3505 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3508 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(Opc
);
3509 const AMDGPU::MIMGBaseOpcodeInfo
*BaseOpcode
=
3510 AMDGPU::getMIMGBaseOpcodeInfo(Info
->BaseOpcode
);
3512 if (!BaseOpcode
->MSAA
)
3515 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3516 assert(DimIdx
!= -1);
3518 unsigned Dim
= Inst
.getOperand(DimIdx
).getImm();
3519 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByEncoding(Dim
);
3521 return DimInfo
->MSAA
;
3524 static bool IsMovrelsSDWAOpcode(const unsigned Opcode
)
3527 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10
:
3528 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10
:
3529 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10
:
3536 // movrels* opcodes should only allow VGPRS as src0.
3537 // This is specified in .td description for vop1/vop3,
3538 // but sdwa is handled differently. See isSDWAOperand.
3539 bool AMDGPUAsmParser::validateMovrels(const MCInst
&Inst
,
3540 const OperandVector
&Operands
) {
3542 const unsigned Opc
= Inst
.getOpcode();
3543 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3545 if ((Desc
.TSFlags
& SIInstrFlags::SDWA
) == 0 || !IsMovrelsSDWAOpcode(Opc
))
3548 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
3549 assert(Src0Idx
!= -1);
3552 const MCOperand
&Src0
= Inst
.getOperand(Src0Idx
);
3554 auto Reg
= mc2PseudoReg(Src0
.getReg());
3555 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3556 if (!isSGPR(Reg
, TRI
))
3558 ErrLoc
= getRegLoc(Reg
, Operands
);
3560 ErrLoc
= getConstLoc(Operands
);
3563 Error(ErrLoc
, "source operand must be a VGPR");
3567 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst
&Inst
,
3568 const OperandVector
&Operands
) {
3570 const unsigned Opc
= Inst
.getOpcode();
3572 if (Opc
!= AMDGPU::V_ACCVGPR_WRITE_B32_vi
)
3575 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
3576 assert(Src0Idx
!= -1);
3578 const MCOperand
&Src0
= Inst
.getOperand(Src0Idx
);
3582 auto Reg
= mc2PseudoReg(Src0
.getReg());
3583 const MCRegisterInfo
*TRI
= getContext().getRegisterInfo();
3584 if (isSGPR(Reg
, TRI
)) {
3585 Error(getRegLoc(Reg
, Operands
),
3586 "source operand must be either a VGPR or an inline constant");
3593 bool AMDGPUAsmParser::validateDivScale(const MCInst
&Inst
) {
3594 switch (Inst
.getOpcode()) {
3597 case V_DIV_SCALE_F32_gfx6_gfx7
:
3598 case V_DIV_SCALE_F32_vi
:
3599 case V_DIV_SCALE_F32_gfx10
:
3600 case V_DIV_SCALE_F64_gfx6_gfx7
:
3601 case V_DIV_SCALE_F64_vi
:
3602 case V_DIV_SCALE_F64_gfx10
:
3606 // TODO: Check that src0 = src1 or src2.
3608 for (auto Name
: {AMDGPU::OpName::src0_modifiers
,
3609 AMDGPU::OpName::src2_modifiers
,
3610 AMDGPU::OpName::src2_modifiers
}) {
3611 if (Inst
.getOperand(AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), Name
))
3621 bool AMDGPUAsmParser::validateMIMGD16(const MCInst
&Inst
) {
3623 const unsigned Opc
= Inst
.getOpcode();
3624 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3626 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3629 int D16Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::d16
);
3630 if (D16Idx
>= 0 && Inst
.getOperand(D16Idx
).getImm()) {
3631 if (isCI() || isSI())
3638 bool AMDGPUAsmParser::validateMIMGDim(const MCInst
&Inst
) {
3639 const unsigned Opc
= Inst
.getOpcode();
3640 const MCInstrDesc
&Desc
= MII
.get(Opc
);
3642 if ((Desc
.TSFlags
& SIInstrFlags::MIMG
) == 0)
3645 int DimIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dim
);
3649 long Imm
= Inst
.getOperand(DimIdx
).getImm();
3650 if (Imm
< 0 || Imm
>= 8)
3656 static bool IsRevOpcode(const unsigned Opcode
)
3659 case AMDGPU::V_SUBREV_F32_e32
:
3660 case AMDGPU::V_SUBREV_F32_e64
:
3661 case AMDGPU::V_SUBREV_F32_e32_gfx10
:
3662 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7
:
3663 case AMDGPU::V_SUBREV_F32_e32_vi
:
3664 case AMDGPU::V_SUBREV_F32_e64_gfx10
:
3665 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7
:
3666 case AMDGPU::V_SUBREV_F32_e64_vi
:
3668 case AMDGPU::V_SUBREV_CO_U32_e32
:
3669 case AMDGPU::V_SUBREV_CO_U32_e64
:
3670 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7
:
3671 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7
:
3673 case AMDGPU::V_SUBBREV_U32_e32
:
3674 case AMDGPU::V_SUBBREV_U32_e64
:
3675 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7
:
3676 case AMDGPU::V_SUBBREV_U32_e32_vi
:
3677 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7
:
3678 case AMDGPU::V_SUBBREV_U32_e64_vi
:
3680 case AMDGPU::V_SUBREV_U32_e32
:
3681 case AMDGPU::V_SUBREV_U32_e64
:
3682 case AMDGPU::V_SUBREV_U32_e32_gfx9
:
3683 case AMDGPU::V_SUBREV_U32_e32_vi
:
3684 case AMDGPU::V_SUBREV_U32_e64_gfx9
:
3685 case AMDGPU::V_SUBREV_U32_e64_vi
:
3687 case AMDGPU::V_SUBREV_F16_e32
:
3688 case AMDGPU::V_SUBREV_F16_e64
:
3689 case AMDGPU::V_SUBREV_F16_e32_gfx10
:
3690 case AMDGPU::V_SUBREV_F16_e32_vi
:
3691 case AMDGPU::V_SUBREV_F16_e64_gfx10
:
3692 case AMDGPU::V_SUBREV_F16_e64_vi
:
3694 case AMDGPU::V_SUBREV_U16_e32
:
3695 case AMDGPU::V_SUBREV_U16_e64
:
3696 case AMDGPU::V_SUBREV_U16_e32_vi
:
3697 case AMDGPU::V_SUBREV_U16_e64_vi
:
3699 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9
:
3700 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10
:
3701 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9
:
3703 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9
:
3704 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9
:
3706 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10
:
3707 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10
:
3709 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10
:
3710 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10
:
3712 case AMDGPU::V_LSHRREV_B32_e32
:
3713 case AMDGPU::V_LSHRREV_B32_e64
:
3714 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7
:
3715 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7
:
3716 case AMDGPU::V_LSHRREV_B32_e32_vi
:
3717 case AMDGPU::V_LSHRREV_B32_e64_vi
:
3718 case AMDGPU::V_LSHRREV_B32_e32_gfx10
:
3719 case AMDGPU::V_LSHRREV_B32_e64_gfx10
:
3721 case AMDGPU::V_ASHRREV_I32_e32
:
3722 case AMDGPU::V_ASHRREV_I32_e64
:
3723 case AMDGPU::V_ASHRREV_I32_e32_gfx10
:
3724 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7
:
3725 case AMDGPU::V_ASHRREV_I32_e32_vi
:
3726 case AMDGPU::V_ASHRREV_I32_e64_gfx10
:
3727 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7
:
3728 case AMDGPU::V_ASHRREV_I32_e64_vi
:
3730 case AMDGPU::V_LSHLREV_B32_e32
:
3731 case AMDGPU::V_LSHLREV_B32_e64
:
3732 case AMDGPU::V_LSHLREV_B32_e32_gfx10
:
3733 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7
:
3734 case AMDGPU::V_LSHLREV_B32_e32_vi
:
3735 case AMDGPU::V_LSHLREV_B32_e64_gfx10
:
3736 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7
:
3737 case AMDGPU::V_LSHLREV_B32_e64_vi
:
3739 case AMDGPU::V_LSHLREV_B16_e32
:
3740 case AMDGPU::V_LSHLREV_B16_e64
:
3741 case AMDGPU::V_LSHLREV_B16_e32_vi
:
3742 case AMDGPU::V_LSHLREV_B16_e64_vi
:
3743 case AMDGPU::V_LSHLREV_B16_gfx10
:
3745 case AMDGPU::V_LSHRREV_B16_e32
:
3746 case AMDGPU::V_LSHRREV_B16_e64
:
3747 case AMDGPU::V_LSHRREV_B16_e32_vi
:
3748 case AMDGPU::V_LSHRREV_B16_e64_vi
:
3749 case AMDGPU::V_LSHRREV_B16_gfx10
:
3751 case AMDGPU::V_ASHRREV_I16_e32
:
3752 case AMDGPU::V_ASHRREV_I16_e64
:
3753 case AMDGPU::V_ASHRREV_I16_e32_vi
:
3754 case AMDGPU::V_ASHRREV_I16_e64_vi
:
3755 case AMDGPU::V_ASHRREV_I16_gfx10
:
3757 case AMDGPU::V_LSHLREV_B64_e64
:
3758 case AMDGPU::V_LSHLREV_B64_gfx10
:
3759 case AMDGPU::V_LSHLREV_B64_vi
:
3761 case AMDGPU::V_LSHRREV_B64_e64
:
3762 case AMDGPU::V_LSHRREV_B64_gfx10
:
3763 case AMDGPU::V_LSHRREV_B64_vi
:
3765 case AMDGPU::V_ASHRREV_I64_e64
:
3766 case AMDGPU::V_ASHRREV_I64_gfx10
:
3767 case AMDGPU::V_ASHRREV_I64_vi
:
3769 case AMDGPU::V_PK_LSHLREV_B16
:
3770 case AMDGPU::V_PK_LSHLREV_B16_gfx10
:
3771 case AMDGPU::V_PK_LSHLREV_B16_vi
:
3773 case AMDGPU::V_PK_LSHRREV_B16
:
3774 case AMDGPU::V_PK_LSHRREV_B16_gfx10
:
3775 case AMDGPU::V_PK_LSHRREV_B16_vi
:
3776 case AMDGPU::V_PK_ASHRREV_I16
:
3777 case AMDGPU::V_PK_ASHRREV_I16_gfx10
:
3778 case AMDGPU::V_PK_ASHRREV_I16_vi
:
3785 Optional
<StringRef
> AMDGPUAsmParser::validateLdsDirect(const MCInst
&Inst
) {
3787 using namespace SIInstrFlags
;
3788 const unsigned Opcode
= Inst
.getOpcode();
3789 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3791 // lds_direct register is defined so that it can be used
3792 // with 9-bit operands only. Ignore encodings which do not accept these.
3793 const auto Enc
= VOP1
| VOP2
| VOP3
| VOPC
| VOP3P
| SIInstrFlags::SDWA
;
3794 if ((Desc
.TSFlags
& Enc
) == 0)
3797 for (auto SrcName
: {OpName::src0
, OpName::src1
, OpName::src2
}) {
3798 auto SrcIdx
= getNamedOperandIdx(Opcode
, SrcName
);
3801 const auto &Src
= Inst
.getOperand(SrcIdx
);
3802 if (Src
.isReg() && Src
.getReg() == LDS_DIRECT
) {
3805 return StringRef("lds_direct is not supported on this GPU");
3807 if (IsRevOpcode(Opcode
) || (Desc
.TSFlags
& SIInstrFlags::SDWA
))
3808 return StringRef("lds_direct cannot be used with this instruction");
3810 if (SrcName
!= OpName::src0
)
3811 return StringRef("lds_direct may be used as src0 only");
3818 SMLoc
AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector
&Operands
) const {
3819 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
3820 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
3821 if (Op
.isFlatOffset())
3822 return Op
.getStartLoc();
3827 bool AMDGPUAsmParser::validateFlatOffset(const MCInst
&Inst
,
3828 const OperandVector
&Operands
) {
3829 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
3830 if ((TSFlags
& SIInstrFlags::FLAT
) == 0)
3833 auto Opcode
= Inst
.getOpcode();
3834 auto OpNum
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::offset
);
3835 assert(OpNum
!= -1);
3837 const auto &Op
= Inst
.getOperand(OpNum
);
3838 if (!hasFlatOffsets() && Op
.getImm() != 0) {
3839 Error(getFlatOffsetLoc(Operands
),
3840 "flat offset modifier is not supported on this GPU");
3844 // For FLAT segment the offset must be positive;
3845 // MSB is ignored and forced to zero.
3846 if (TSFlags
& (SIInstrFlags::FlatGlobal
| SIInstrFlags::FlatScratch
)) {
3847 unsigned OffsetSize
= AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3848 if (!isIntN(OffsetSize
, Op
.getImm())) {
3849 Error(getFlatOffsetLoc(Operands
),
3850 Twine("expected a ") + Twine(OffsetSize
) + "-bit signed offset");
3854 unsigned OffsetSize
= AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3855 if (!isUIntN(OffsetSize
, Op
.getImm())) {
3856 Error(getFlatOffsetLoc(Operands
),
3857 Twine("expected a ") + Twine(OffsetSize
) + "-bit unsigned offset");
3865 SMLoc
AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector
&Operands
) const {
3866 // Start with second operand because SMEM Offset cannot be dst or src0.
3867 for (unsigned i
= 2, e
= Operands
.size(); i
!= e
; ++i
) {
3868 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
3869 if (Op
.isSMEMOffset())
3870 return Op
.getStartLoc();
3875 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst
&Inst
,
3876 const OperandVector
&Operands
) {
3877 if (isCI() || isSI())
3880 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
3881 if ((TSFlags
& SIInstrFlags::SMRD
) == 0)
3884 auto Opcode
= Inst
.getOpcode();
3885 auto OpNum
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::offset
);
3889 const auto &Op
= Inst
.getOperand(OpNum
);
3893 uint64_t Offset
= Op
.getImm();
3894 bool IsBuffer
= AMDGPU::getSMEMIsBuffer(Opcode
);
3895 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset
) ||
3896 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset
, IsBuffer
))
3899 Error(getSMEMOffsetLoc(Operands
),
3900 (isVI() || IsBuffer
) ? "expected a 20-bit unsigned offset" :
3901 "expected a 21-bit signed offset");
3906 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst
&Inst
) const {
3907 unsigned Opcode
= Inst
.getOpcode();
3908 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3909 if (!(Desc
.TSFlags
& (SIInstrFlags::SOP2
| SIInstrFlags::SOPC
)))
3912 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3913 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3915 const int OpIndices
[] = { Src0Idx
, Src1Idx
};
3917 unsigned NumExprs
= 0;
3918 unsigned NumLiterals
= 0;
3919 uint32_t LiteralValue
;
3921 for (int OpIdx
: OpIndices
) {
3922 if (OpIdx
== -1) break;
3924 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
3925 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3926 if (AMDGPU::isSISrcOperand(Desc
, OpIdx
)) {
3927 if (MO
.isImm() && !isInlineConstant(Inst
, OpIdx
)) {
3928 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
3929 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
3930 LiteralValue
= Value
;
3933 } else if (MO
.isExpr()) {
3939 return NumLiterals
+ NumExprs
<= 1;
3942 bool AMDGPUAsmParser::validateOpSel(const MCInst
&Inst
) {
3943 const unsigned Opc
= Inst
.getOpcode();
3944 if (Opc
== AMDGPU::V_PERMLANE16_B32_gfx10
||
3945 Opc
== AMDGPU::V_PERMLANEX16_B32_gfx10
) {
3946 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
3947 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
3955 bool AMDGPUAsmParser::validateDPP(const MCInst
&Inst
,
3956 const OperandVector
&Operands
) {
3957 const unsigned Opc
= Inst
.getOpcode();
3958 int DppCtrlIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::dpp_ctrl
);
3961 unsigned DppCtrl
= Inst
.getOperand(DppCtrlIdx
).getImm();
3963 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl
)) {
3964 // DPP64 is supported for row_newbcast only.
3965 int Src0Idx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0
);
3967 getMRI()->getSubReg(Inst
.getOperand(Src0Idx
).getReg(), AMDGPU::sub1
)) {
3968 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyDppCtrl
, Operands
);
3969 Error(S
, "64 bit dpp only supports row_newbcast");
3977 // Check if VCC register matches wavefront size
3978 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg
) const {
3979 auto FB
= getFeatureBits();
3980 return (FB
[AMDGPU::FeatureWavefrontSize64
] && Reg
== AMDGPU::VCC
) ||
3981 (FB
[AMDGPU::FeatureWavefrontSize32
] && Reg
== AMDGPU::VCC_LO
);
3984 // VOP3 literal is only allowed in GFX10+ and only one can be used
3985 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst
&Inst
,
3986 const OperandVector
&Operands
) {
3987 unsigned Opcode
= Inst
.getOpcode();
3988 const MCInstrDesc
&Desc
= MII
.get(Opcode
);
3989 if (!(Desc
.TSFlags
& (SIInstrFlags::VOP3
| SIInstrFlags::VOP3P
)))
3992 const int Src0Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src0
);
3993 const int Src1Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src1
);
3994 const int Src2Idx
= AMDGPU::getNamedOperandIdx(Opcode
, AMDGPU::OpName::src2
);
3996 const int OpIndices
[] = { Src0Idx
, Src1Idx
, Src2Idx
};
3998 unsigned NumExprs
= 0;
3999 unsigned NumLiterals
= 0;
4000 uint32_t LiteralValue
;
4002 for (int OpIdx
: OpIndices
) {
4003 if (OpIdx
== -1) break;
4005 const MCOperand
&MO
= Inst
.getOperand(OpIdx
);
4006 if (!MO
.isImm() && !MO
.isExpr())
4008 if (!AMDGPU::isSISrcOperand(Desc
, OpIdx
))
4011 if (OpIdx
== Src2Idx
&& (Desc
.TSFlags
& SIInstrFlags::IsMAI
) &&
4012 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug
]) {
4013 Error(getConstLoc(Operands
),
4014 "inline constants are not allowed for this operand");
4018 if (MO
.isImm() && !isInlineConstant(Inst
, OpIdx
)) {
4019 uint32_t Value
= static_cast<uint32_t>(MO
.getImm());
4020 if (NumLiterals
== 0 || LiteralValue
!= Value
) {
4021 LiteralValue
= Value
;
4024 } else if (MO
.isExpr()) {
4028 NumLiterals
+= NumExprs
;
4033 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal
]) {
4034 Error(getLitLoc(Operands
), "literal operands are not supported");
4038 if (NumLiterals
> 1) {
4039 Error(getLitLoc(Operands
), "only one literal operand is allowed");
4046 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4047 static int IsAGPROperand(const MCInst
&Inst
, uint16_t NameIdx
,
4048 const MCRegisterInfo
*MRI
) {
4049 int OpIdx
= AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), NameIdx
);
4053 const MCOperand
&Op
= Inst
.getOperand(OpIdx
);
4057 unsigned Sub
= MRI
->getSubReg(Op
.getReg(), AMDGPU::sub0
);
4058 auto Reg
= Sub
? Sub
: Op
.getReg();
4059 const MCRegisterClass
&AGPR32
= MRI
->getRegClass(AMDGPU::AGPR_32RegClassID
);
4060 return AGPR32
.contains(Reg
) ? 1 : 0;
4063 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst
&Inst
) const {
4064 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
4065 if ((TSFlags
& (SIInstrFlags::FLAT
| SIInstrFlags::MUBUF
|
4066 SIInstrFlags::MTBUF
| SIInstrFlags::MIMG
|
4067 SIInstrFlags::DS
)) == 0)
4070 uint16_t DataNameIdx
= (TSFlags
& SIInstrFlags::DS
) ? AMDGPU::OpName::data0
4071 : AMDGPU::OpName::vdata
;
4073 const MCRegisterInfo
*MRI
= getMRI();
4074 int DstAreg
= IsAGPROperand(Inst
, AMDGPU::OpName::vdst
, MRI
);
4075 int DataAreg
= IsAGPROperand(Inst
, DataNameIdx
, MRI
);
4077 if ((TSFlags
& SIInstrFlags::DS
) && DataAreg
>= 0) {
4078 int Data2Areg
= IsAGPROperand(Inst
, AMDGPU::OpName::data1
, MRI
);
4079 if (Data2Areg
>= 0 && Data2Areg
!= DataAreg
)
4083 auto FB
= getFeatureBits();
4084 if (FB
[AMDGPU::FeatureGFX90AInsts
]) {
4085 if (DataAreg
< 0 || DstAreg
< 0)
4087 return DstAreg
== DataAreg
;
4090 return DstAreg
< 1 && DataAreg
< 1;
4093 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst
&Inst
) const {
4094 auto FB
= getFeatureBits();
4095 if (!FB
[AMDGPU::FeatureGFX90AInsts
])
4098 const MCRegisterInfo
*MRI
= getMRI();
4099 const MCRegisterClass
&VGPR32
= MRI
->getRegClass(AMDGPU::VGPR_32RegClassID
);
4100 const MCRegisterClass
&AGPR32
= MRI
->getRegClass(AMDGPU::AGPR_32RegClassID
);
4101 for (unsigned I
= 0, E
= Inst
.getNumOperands(); I
!= E
; ++I
) {
4102 const MCOperand
&Op
= Inst
.getOperand(I
);
4106 unsigned Sub
= MRI
->getSubReg(Op
.getReg(), AMDGPU::sub0
);
4110 if (VGPR32
.contains(Sub
) && ((Sub
- AMDGPU::VGPR0
) & 1))
4112 if (AGPR32
.contains(Sub
) && ((Sub
- AMDGPU::AGPR0
) & 1))
4119 // gfx90a has an undocumented limitation:
4120 // DS_GWS opcodes must use even aligned registers.
4121 bool AMDGPUAsmParser::validateGWS(const MCInst
&Inst
,
4122 const OperandVector
&Operands
) {
4123 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts
])
4126 int Opc
= Inst
.getOpcode();
4127 if (Opc
!= AMDGPU::DS_GWS_INIT_vi
&& Opc
!= AMDGPU::DS_GWS_BARRIER_vi
&&
4128 Opc
!= AMDGPU::DS_GWS_SEMA_BR_vi
)
4131 const MCRegisterInfo
*MRI
= getMRI();
4132 const MCRegisterClass
&VGPR32
= MRI
->getRegClass(AMDGPU::VGPR_32RegClassID
);
4134 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::data0
);
4135 assert(Data0Pos
!= -1);
4136 auto Reg
= Inst
.getOperand(Data0Pos
).getReg();
4137 auto RegIdx
= Reg
- (VGPR32
.contains(Reg
) ? AMDGPU::VGPR0
: AMDGPU::AGPR0
);
4139 SMLoc RegLoc
= getRegLoc(Reg
, Operands
);
4140 Error(RegLoc
, "vgpr must be even aligned");
4147 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst
&Inst
,
4148 const OperandVector
&Operands
,
4149 const SMLoc
&IDLoc
) {
4150 int CPolPos
= AMDGPU::getNamedOperandIdx(Inst
.getOpcode(),
4151 AMDGPU::OpName::cpol
);
4155 unsigned CPol
= Inst
.getOperand(CPolPos
).getImm();
4157 uint64_t TSFlags
= MII
.get(Inst
.getOpcode()).TSFlags
;
4158 if ((TSFlags
& (SIInstrFlags::SMRD
)) &&
4159 (CPol
& ~(AMDGPU::CPol::GLC
| AMDGPU::CPol::DLC
))) {
4160 Error(IDLoc
, "invalid cache policy for SMRD instruction");
4164 if (isGFX90A() && (CPol
& CPol::SCC
)) {
4165 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyCPol
, Operands
);
4166 StringRef
CStr(S
.getPointer());
4167 S
= SMLoc::getFromPointer(&CStr
.data()[CStr
.find("scc")]);
4168 Error(S
, "scc is not supported on this GPU");
4172 if (!(TSFlags
& (SIInstrFlags::IsAtomicNoRet
| SIInstrFlags::IsAtomicRet
)))
4175 if (TSFlags
& SIInstrFlags::IsAtomicRet
) {
4176 if (!(TSFlags
& SIInstrFlags::MIMG
) && !(CPol
& CPol::GLC
)) {
4177 Error(IDLoc
, "instruction must use glc");
4181 if (CPol
& CPol::GLC
) {
4182 SMLoc S
= getImmLoc(AMDGPUOperand::ImmTyCPol
, Operands
);
4183 StringRef
CStr(S
.getPointer());
4184 S
= SMLoc::getFromPointer(&CStr
.data()[CStr
.find("glc")]);
4185 Error(S
, "instruction must not use glc");
4193 bool AMDGPUAsmParser::validateInstruction(const MCInst
&Inst
,
4195 const OperandVector
&Operands
) {
4196 if (auto ErrMsg
= validateLdsDirect(Inst
)) {
4197 Error(getRegLoc(LDS_DIRECT
, Operands
), *ErrMsg
);
4200 if (!validateSOPLiteral(Inst
)) {
4201 Error(getLitLoc(Operands
),
4202 "only one literal operand is allowed");
4205 if (!validateVOP3Literal(Inst
, Operands
)) {
4208 if (!validateConstantBusLimitations(Inst
, Operands
)) {
4211 if (!validateEarlyClobberLimitations(Inst
, Operands
)) {
4214 if (!validateIntClampSupported(Inst
)) {
4215 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI
, Operands
),
4216 "integer clamping is not supported on this GPU");
4219 if (!validateOpSel(Inst
)) {
4220 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel
, Operands
),
4221 "invalid op_sel operand");
4224 if (!validateDPP(Inst
, Operands
)) {
4227 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4228 if (!validateMIMGD16(Inst
)) {
4229 Error(getImmLoc(AMDGPUOperand::ImmTyD16
, Operands
),
4230 "d16 modifier is not supported on this GPU");
4233 if (!validateMIMGDim(Inst
)) {
4234 Error(IDLoc
, "dim modifier is required on this GPU");
4237 if (!validateMIMGMSAA(Inst
)) {
4238 Error(getImmLoc(AMDGPUOperand::ImmTyDim
, Operands
),
4239 "invalid dim; must be MSAA type");
4242 if (!validateMIMGDataSize(Inst
)) {
4244 "image data size does not match dmask and tfe");
4247 if (!validateMIMGAddrSize(Inst
)) {
4249 "image address size does not match dim and a16");
4252 if (!validateMIMGAtomicDMask(Inst
)) {
4253 Error(getImmLoc(AMDGPUOperand::ImmTyDMask
, Operands
),
4254 "invalid atomic image dmask");
4257 if (!validateMIMGGatherDMask(Inst
)) {
4258 Error(getImmLoc(AMDGPUOperand::ImmTyDMask
, Operands
),
4259 "invalid image_gather dmask: only one bit must be set");
4262 if (!validateMovrels(Inst
, Operands
)) {
4265 if (!validateFlatOffset(Inst
, Operands
)) {
4268 if (!validateSMEMOffset(Inst
, Operands
)) {
4271 if (!validateMAIAccWrite(Inst
, Operands
)) {
4274 if (!validateCoherencyBits(Inst
, Operands
, IDLoc
)) {
4278 if (!validateAGPRLdSt(Inst
)) {
4279 Error(IDLoc
, getFeatureBits()[AMDGPU::FeatureGFX90AInsts
]
4280 ? "invalid register class: data and dst should be all VGPR or AGPR"
4281 : "invalid register class: agpr loads and stores not supported on this GPU"
4285 if (!validateVGPRAlign(Inst
)) {
4287 "invalid register class: vgpr tuples must be 64 bit aligned");
4290 if (!validateGWS(Inst
, Operands
)) {
4294 if (!validateDivScale(Inst
)) {
4295 Error(IDLoc
, "ABS not allowed in VOP3B instructions");
4298 if (!validateCoherencyBits(Inst
, Operands
, IDLoc
)) {
4305 static std::string
AMDGPUMnemonicSpellCheck(StringRef S
,
4306 const FeatureBitset
&FBS
,
4307 unsigned VariantID
= 0);
4309 static bool AMDGPUCheckMnemonic(StringRef Mnemonic
,
4310 const FeatureBitset
&AvailableFeatures
,
4311 unsigned VariantID
);
4313 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo
,
4314 const FeatureBitset
&FBS
) {
4315 return isSupportedMnemo(Mnemo
, FBS
, getAllVariants());
4318 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo
,
4319 const FeatureBitset
&FBS
,
4320 ArrayRef
<unsigned> Variants
) {
4321 for (auto Variant
: Variants
) {
4322 if (AMDGPUCheckMnemonic(Mnemo
, FBS
, Variant
))
4329 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo
,
4330 const SMLoc
&IDLoc
) {
4331 FeatureBitset FBS
= ComputeAvailableFeatures(getSTI().getFeatureBits());
4333 // Check if requested instruction variant is supported.
4334 if (isSupportedMnemo(Mnemo
, FBS
, getMatchedVariants()))
4337 // This instruction is not supported.
4338 // Clear any other pending errors because they are no longer relevant.
4339 getParser().clearPendingErrors();
4341 // Requested instruction variant is not supported.
4342 // Check if any other variants are supported.
4343 StringRef VariantName
= getMatchedVariantName();
4344 if (!VariantName
.empty() && isSupportedMnemo(Mnemo
, FBS
)) {
4347 " variant of this instruction is not supported"));
4350 // Finally check if this instruction is supported on any other GPU.
4351 if (isSupportedMnemo(Mnemo
, FeatureBitset().set())) {
4352 return Error(IDLoc
, "instruction not supported on this GPU");
4355 // Instruction not supported on any GPU. Probably a typo.
4356 std::string Suggestion
= AMDGPUMnemonicSpellCheck(Mnemo
, FBS
);
4357 return Error(IDLoc
, "invalid instruction" + Suggestion
);
4360 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc
, unsigned &Opcode
,
4361 OperandVector
&Operands
,
4363 uint64_t &ErrorInfo
,
4364 bool MatchingInlineAsm
) {
4366 unsigned Result
= Match_Success
;
4367 for (auto Variant
: getMatchedVariants()) {
4369 auto R
= MatchInstructionImpl(Operands
, Inst
, EI
, MatchingInlineAsm
,
4371 // We order match statuses from least to most specific. We use most specific
4372 // status as resulting
4373 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4374 if ((R
== Match_Success
) ||
4375 (R
== Match_PreferE32
) ||
4376 (R
== Match_MissingFeature
&& Result
!= Match_PreferE32
) ||
4377 (R
== Match_InvalidOperand
&& Result
!= Match_MissingFeature
4378 && Result
!= Match_PreferE32
) ||
4379 (R
== Match_MnemonicFail
&& Result
!= Match_InvalidOperand
4380 && Result
!= Match_MissingFeature
4381 && Result
!= Match_PreferE32
)) {
4385 if (R
== Match_Success
)
4389 if (Result
== Match_Success
) {
4390 if (!validateInstruction(Inst
, IDLoc
, Operands
)) {
4394 Out
.emitInstruction(Inst
, getSTI());
4398 StringRef Mnemo
= ((AMDGPUOperand
&)*Operands
[0]).getToken();
4399 if (checkUnsupportedInstruction(Mnemo
, IDLoc
)) {
4405 case Match_MissingFeature
:
4406 // It has been verified that the specified instruction
4407 // mnemonic is valid. A match was found but it requires
4408 // features which are not supported on this GPU.
4409 return Error(IDLoc
, "operands are not valid for this GPU or mode");
4411 case Match_InvalidOperand
: {
4412 SMLoc ErrorLoc
= IDLoc
;
4413 if (ErrorInfo
!= ~0ULL) {
4414 if (ErrorInfo
>= Operands
.size()) {
4415 return Error(IDLoc
, "too few operands for instruction");
4417 ErrorLoc
= ((AMDGPUOperand
&)*Operands
[ErrorInfo
]).getStartLoc();
4418 if (ErrorLoc
== SMLoc())
4421 return Error(ErrorLoc
, "invalid operand for instruction");
4424 case Match_PreferE32
:
4425 return Error(IDLoc
, "internal error: instruction without _e64 suffix "
4426 "should be encoded as e32");
4427 case Match_MnemonicFail
:
4428 llvm_unreachable("Invalid instructions should have been handled already");
4430 llvm_unreachable("Implement any new match types added!");
4433 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret
) {
4435 if (!isToken(AsmToken::Integer
) && !isToken(AsmToken::Identifier
)) {
4438 if (getParser().parseAbsoluteExpression(Tmp
)) {
4441 Ret
= static_cast<uint32_t>(Tmp
);
4445 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major
,
4447 if (ParseAsAbsoluteExpression(Major
))
4448 return TokError("invalid major version");
4450 if (!trySkipToken(AsmToken::Comma
))
4451 return TokError("minor version number required, comma expected");
4453 if (ParseAsAbsoluteExpression(Minor
))
4454 return TokError("invalid minor version");
4459 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4460 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
4461 return TokError("directive only supported for amdgcn architecture");
4463 std::string TargetIDDirective
;
4464 SMLoc TargetStart
= getTok().getLoc();
4465 if (getParser().parseEscapedString(TargetIDDirective
))
4468 SMRange TargetRange
= SMRange(TargetStart
, getTok().getLoc());
4469 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective
)
4470 return getParser().Error(TargetRange
.Start
,
4471 (Twine(".amdgcn_target directive's target id ") +
4472 Twine(TargetIDDirective
) +
4473 Twine(" does not match the specified target id ") +
4474 Twine(getTargetStreamer().getTargetID()->toString())).str());
4479 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range
) {
4480 return Error(Range
.Start
, "value out of range", Range
);
4483 bool AMDGPUAsmParser::calculateGPRBlocks(
4484 const FeatureBitset
&Features
, bool VCCUsed
, bool FlatScrUsed
,
4485 bool XNACKUsed
, Optional
<bool> EnableWavefrontSize32
, unsigned NextFreeVGPR
,
4486 SMRange VGPRRange
, unsigned NextFreeSGPR
, SMRange SGPRRange
,
4487 unsigned &VGPRBlocks
, unsigned &SGPRBlocks
) {
4488 // TODO(scott.linder): These calculations are duplicated from
4489 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4490 IsaVersion Version
= getIsaVersion(getSTI().getCPU());
4492 unsigned NumVGPRs
= NextFreeVGPR
;
4493 unsigned NumSGPRs
= NextFreeSGPR
;
4495 if (Version
.Major
>= 10)
4498 unsigned MaxAddressableNumSGPRs
=
4499 IsaInfo::getAddressableNumSGPRs(&getSTI());
4501 if (Version
.Major
>= 8 && !Features
.test(FeatureSGPRInitBug
) &&
4502 NumSGPRs
> MaxAddressableNumSGPRs
)
4503 return OutOfRangeError(SGPRRange
);
4506 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed
, FlatScrUsed
, XNACKUsed
);
4508 if ((Version
.Major
<= 7 || Features
.test(FeatureSGPRInitBug
)) &&
4509 NumSGPRs
> MaxAddressableNumSGPRs
)
4510 return OutOfRangeError(SGPRRange
);
4512 if (Features
.test(FeatureSGPRInitBug
))
4513 NumSGPRs
= IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
;
4517 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs
, EnableWavefrontSize32
);
4518 SGPRBlocks
= IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs
);
4523 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4524 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
)
4525 return TokError("directive only supported for amdgcn architecture");
4527 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
)
4528 return TokError("directive only supported for amdhsa OS");
4530 StringRef KernelName
;
4531 if (getParser().parseIdentifier(KernelName
))
4534 kernel_descriptor_t KD
= getDefaultAmdhsaKernelDescriptor(&getSTI());
4538 IsaVersion IVersion
= getIsaVersion(getSTI().getCPU());
4541 uint64_t NextFreeVGPR
= 0;
4542 uint64_t AccumOffset
= 0;
4544 uint64_t NextFreeSGPR
= 0;
4545 unsigned UserSGPRCount
= 0;
4546 bool ReserveVCC
= true;
4547 bool ReserveFlatScr
= true;
4548 Optional
<bool> EnableWavefrontSize32
;
4551 while (trySkipToken(AsmToken::EndOfStatement
));
4554 SMRange IDRange
= getTok().getLocRange();
4555 if (!parseId(ID
, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4558 if (ID
== ".end_amdhsa_kernel")
4561 if (Seen
.find(ID
) != Seen
.end())
4562 return TokError(".amdhsa_ directives cannot be repeated");
4565 SMLoc ValStart
= getLoc();
4567 if (getParser().parseAbsoluteExpression(IVal
))
4569 SMLoc ValEnd
= getLoc();
4570 SMRange ValRange
= SMRange(ValStart
, ValEnd
);
4573 return OutOfRangeError(ValRange
);
4575 uint64_t Val
= IVal
;
4577 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4578 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4579 return OutOfRangeError(RANGE); \
4580 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4582 if (ID
== ".amdhsa_group_segment_fixed_size") {
4583 if (!isUInt
<sizeof(KD
.group_segment_fixed_size
) * CHAR_BIT
>(Val
))
4584 return OutOfRangeError(ValRange
);
4585 KD
.group_segment_fixed_size
= Val
;
4586 } else if (ID
== ".amdhsa_private_segment_fixed_size") {
4587 if (!isUInt
<sizeof(KD
.private_segment_fixed_size
) * CHAR_BIT
>(Val
))
4588 return OutOfRangeError(ValRange
);
4589 KD
.private_segment_fixed_size
= Val
;
4590 } else if (ID
== ".amdhsa_kernarg_size") {
4591 if (!isUInt
<sizeof(KD
.kernarg_size
) * CHAR_BIT
>(Val
))
4592 return OutOfRangeError(ValRange
);
4593 KD
.kernarg_size
= Val
;
4594 } else if (ID
== ".amdhsa_user_sgpr_private_segment_buffer") {
4595 if (hasArchitectedFlatScratch())
4596 return Error(IDRange
.Start
,
4597 "directive is not supported with architected flat scratch",
4599 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4600 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
,
4604 } else if (ID
== ".amdhsa_user_sgpr_dispatch_ptr") {
4605 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4606 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
, Val
,
4610 } else if (ID
== ".amdhsa_user_sgpr_queue_ptr") {
4611 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4612 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
, Val
,
4616 } else if (ID
== ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4617 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4618 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
,
4622 } else if (ID
== ".amdhsa_user_sgpr_dispatch_id") {
4623 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4624 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
, Val
,
4628 } else if (ID
== ".amdhsa_user_sgpr_flat_scratch_init") {
4629 if (hasArchitectedFlatScratch())
4630 return Error(IDRange
.Start
,
4631 "directive is not supported with architected flat scratch",
4633 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
, Val
,
4638 } else if (ID
== ".amdhsa_user_sgpr_private_segment_size") {
4639 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4640 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
,
4644 } else if (ID
== ".amdhsa_wavefront_size32") {
4645 if (IVersion
.Major
< 10)
4646 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
4647 EnableWavefrontSize32
= Val
;
4648 PARSE_BITS_ENTRY(KD
.kernel_code_properties
,
4649 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
,
4651 } else if (ID
== ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4652 if (hasArchitectedFlatScratch())
4653 return Error(IDRange
.Start
,
4654 "directive is not supported with architected flat scratch",
4656 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4657 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT
, Val
, ValRange
);
4658 } else if (ID
== ".amdhsa_enable_private_segment") {
4659 if (!hasArchitectedFlatScratch())
4662 "directive is not supported without architected flat scratch",
4664 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4665 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT
, Val
, ValRange
);
4666 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_x") {
4667 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4668 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X
, Val
,
4670 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_y") {
4671 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4672 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y
, Val
,
4674 } else if (ID
== ".amdhsa_system_sgpr_workgroup_id_z") {
4675 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4676 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z
, Val
,
4678 } else if (ID
== ".amdhsa_system_sgpr_workgroup_info") {
4679 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4680 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO
, Val
,
4682 } else if (ID
== ".amdhsa_system_vgpr_workitem_id") {
4683 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4684 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID
, Val
,
4686 } else if (ID
== ".amdhsa_next_free_vgpr") {
4687 VGPRRange
= ValRange
;
4689 } else if (ID
== ".amdhsa_next_free_sgpr") {
4690 SGPRRange
= ValRange
;
4692 } else if (ID
== ".amdhsa_accum_offset") {
4694 return Error(IDRange
.Start
, "directive requires gfx90a+", IDRange
);
4696 } else if (ID
== ".amdhsa_reserve_vcc") {
4697 if (!isUInt
<1>(Val
))
4698 return OutOfRangeError(ValRange
);
4700 } else if (ID
== ".amdhsa_reserve_flat_scratch") {
4701 if (IVersion
.Major
< 7)
4702 return Error(IDRange
.Start
, "directive requires gfx7+", IDRange
);
4703 if (hasArchitectedFlatScratch())
4704 return Error(IDRange
.Start
,
4705 "directive is not supported with architected flat scratch",
4707 if (!isUInt
<1>(Val
))
4708 return OutOfRangeError(ValRange
);
4709 ReserveFlatScr
= Val
;
4710 } else if (ID
== ".amdhsa_reserve_xnack_mask") {
4711 if (IVersion
.Major
< 8)
4712 return Error(IDRange
.Start
, "directive requires gfx8+", IDRange
);
4713 if (!isUInt
<1>(Val
))
4714 return OutOfRangeError(ValRange
);
4715 if (Val
!= getTargetStreamer().getTargetID()->isXnackOnOrAny())
4716 return getParser().Error(IDRange
.Start
, ".amdhsa_reserve_xnack_mask does not match target id",
4718 } else if (ID
== ".amdhsa_float_round_mode_32") {
4719 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
4720 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32
, Val
, ValRange
);
4721 } else if (ID
== ".amdhsa_float_round_mode_16_64") {
4722 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
4723 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64
, Val
, ValRange
);
4724 } else if (ID
== ".amdhsa_float_denorm_mode_32") {
4725 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
4726 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32
, Val
, ValRange
);
4727 } else if (ID
== ".amdhsa_float_denorm_mode_16_64") {
4728 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
4729 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64
, Val
,
4731 } else if (ID
== ".amdhsa_dx10_clamp") {
4732 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
,
4733 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP
, Val
, ValRange
);
4734 } else if (ID
== ".amdhsa_ieee_mode") {
4735 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE
,
4737 } else if (ID
== ".amdhsa_fp16_overflow") {
4738 if (IVersion
.Major
< 9)
4739 return Error(IDRange
.Start
, "directive requires gfx9+", IDRange
);
4740 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FP16_OVFL
, Val
,
4742 } else if (ID
== ".amdhsa_tg_split") {
4744 return Error(IDRange
.Start
, "directive requires gfx90a+", IDRange
);
4745 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc3
, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT
, Val
,
4747 } else if (ID
== ".amdhsa_workgroup_processor_mode") {
4748 if (IVersion
.Major
< 10)
4749 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
4750 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_WGP_MODE
, Val
,
4752 } else if (ID
== ".amdhsa_memory_ordered") {
4753 if (IVersion
.Major
< 10)
4754 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
4755 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_MEM_ORDERED
, Val
,
4757 } else if (ID
== ".amdhsa_forward_progress") {
4758 if (IVersion
.Major
< 10)
4759 return Error(IDRange
.Start
, "directive requires gfx10+", IDRange
);
4760 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc1
, COMPUTE_PGM_RSRC1_FWD_PROGRESS
, Val
,
4762 } else if (ID
== ".amdhsa_exception_fp_ieee_invalid_op") {
4764 KD
.compute_pgm_rsrc2
,
4765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION
, Val
,
4767 } else if (ID
== ".amdhsa_exception_fp_denorm_src") {
4768 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE
,
4771 } else if (ID
== ".amdhsa_exception_fp_ieee_div_zero") {
4773 KD
.compute_pgm_rsrc2
,
4774 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO
, Val
,
4776 } else if (ID
== ".amdhsa_exception_fp_ieee_overflow") {
4777 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4778 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW
,
4780 } else if (ID
== ".amdhsa_exception_fp_ieee_underflow") {
4781 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4782 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW
,
4784 } else if (ID
== ".amdhsa_exception_fp_ieee_inexact") {
4785 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT
,
4788 } else if (ID
== ".amdhsa_exception_int_div_zero") {
4789 PARSE_BITS_ENTRY(KD
.compute_pgm_rsrc2
,
4790 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO
,
4793 return Error(IDRange
.Start
, "unknown .amdhsa_kernel directive", IDRange
);
4796 #undef PARSE_BITS_ENTRY
4799 if (Seen
.find(".amdhsa_next_free_vgpr") == Seen
.end())
4800 return TokError(".amdhsa_next_free_vgpr directive is required");
4802 if (Seen
.find(".amdhsa_next_free_sgpr") == Seen
.end())
4803 return TokError(".amdhsa_next_free_sgpr directive is required");
4805 unsigned VGPRBlocks
;
4806 unsigned SGPRBlocks
;
4807 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC
, ReserveFlatScr
,
4808 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4809 EnableWavefrontSize32
, NextFreeVGPR
,
4810 VGPRRange
, NextFreeSGPR
, SGPRRange
, VGPRBlocks
,
4814 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH
>(
4816 return OutOfRangeError(VGPRRange
);
4817 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
4818 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT
, VGPRBlocks
);
4820 if (!isUInt
<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH
>(
4822 return OutOfRangeError(SGPRRange
);
4823 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc1
,
4824 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
,
4827 if (!isUInt
<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH
>(UserSGPRCount
))
4828 return TokError("too many user SGPRs enabled");
4829 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc2
, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT
,
4833 if (Seen
.find(".amdhsa_accum_offset") == Seen
.end())
4834 return TokError(".amdhsa_accum_offset directive is required");
4835 if (AccumOffset
< 4 || AccumOffset
> 256 || (AccumOffset
& 3))
4836 return TokError("accum_offset should be in range [4..256] in "
4838 if (AccumOffset
> alignTo(std::max((uint64_t)1, NextFreeVGPR
), 4))
4839 return TokError("accum_offset exceeds total VGPR allocation");
4840 AMDHSA_BITS_SET(KD
.compute_pgm_rsrc3
, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET
,
4841 (AccumOffset
/ 4 - 1));
4844 getTargetStreamer().EmitAmdhsaKernelDescriptor(
4845 getSTI(), KernelName
, KD
, NextFreeVGPR
, NextFreeSGPR
, ReserveVCC
,
4850 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4854 if (ParseDirectiveMajorMinor(Major
, Minor
))
4857 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major
, Minor
);
4861 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4865 StringRef VendorName
;
4868 // If this directive has no arguments, then use the ISA version for the
4870 if (isToken(AsmToken::EndOfStatement
)) {
4871 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
4872 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA
.Major
, ISA
.Minor
,
4878 if (ParseDirectiveMajorMinor(Major
, Minor
))
4881 if (!trySkipToken(AsmToken::Comma
))
4882 return TokError("stepping version number required, comma expected");
4884 if (ParseAsAbsoluteExpression(Stepping
))
4885 return TokError("invalid stepping version");
4887 if (!trySkipToken(AsmToken::Comma
))
4888 return TokError("vendor name required, comma expected");
4890 if (!parseString(VendorName
, "invalid vendor name"))
4893 if (!trySkipToken(AsmToken::Comma
))
4894 return TokError("arch name required, comma expected");
4896 if (!parseString(ArchName
, "invalid arch name"))
4899 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major
, Minor
, Stepping
,
4900 VendorName
, ArchName
);
4904 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID
,
4905 amd_kernel_code_t
&Header
) {
4906 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4907 // assembly for backwards compatibility.
4908 if (ID
== "max_scratch_backing_memory_byte_size") {
4909 Parser
.eatToEndOfStatement();
4913 SmallString
<40> ErrStr
;
4914 raw_svector_ostream
Err(ErrStr
);
4915 if (!parseAmdKernelCodeField(ID
, getParser(), Header
, Err
)) {
4916 return TokError(Err
.str());
4920 if (ID
== "enable_wavefront_size32") {
4921 if (Header
.code_properties
& AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
) {
4923 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4924 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
4925 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4927 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
4928 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4932 if (ID
== "wavefront_size") {
4933 if (Header
.wavefront_size
== 5) {
4935 return TokError("wavefront_size=5 is only allowed on GFX10+");
4936 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32
])
4937 return TokError("wavefront_size=5 requires +WavefrontSize32");
4938 } else if (Header
.wavefront_size
== 6) {
4939 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64
])
4940 return TokError("wavefront_size=6 requires +WavefrontSize64");
4944 if (ID
== "enable_wgp_mode") {
4945 if (G_00B848_WGP_MODE(Header
.compute_pgm_resource_registers
) &&
4947 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4950 if (ID
== "enable_mem_ordered") {
4951 if (G_00B848_MEM_ORDERED(Header
.compute_pgm_resource_registers
) &&
4953 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4956 if (ID
== "enable_fwd_progress") {
4957 if (G_00B848_FWD_PROGRESS(Header
.compute_pgm_resource_registers
) &&
4959 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4965 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4966 amd_kernel_code_t Header
;
4967 AMDGPU::initDefaultAMDKernelCodeT(Header
, &getSTI());
4970 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4971 // will set the current token to EndOfStatement.
4972 while(trySkipToken(AsmToken::EndOfStatement
));
4975 if (!parseId(ID
, "expected value identifier or .end_amd_kernel_code_t"))
4978 if (ID
== ".end_amd_kernel_code_t")
4981 if (ParseAMDKernelCodeTValue(ID
, Header
))
4985 getTargetStreamer().EmitAMDKernelCodeT(Header
);
4990 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4991 StringRef KernelName
;
4992 if (!parseId(KernelName
, "expected symbol name"))
4995 getTargetStreamer().EmitAMDGPUSymbolType(KernelName
,
4996 ELF::STT_AMDGPU_HSA_KERNEL
);
4998 KernelScope
.initialize(getContext());
5002 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5003 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn
) {
5004 return Error(getLoc(),
5005 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5009 auto TargetIDDirective
= getLexer().getTok().getStringContents();
5010 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective
)
5011 return Error(getParser().getTok().getLoc(), "target id must match options");
5013 getTargetStreamer().EmitISAVersion();
5019 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5020 const char *AssemblerDirectiveBegin
;
5021 const char *AssemblerDirectiveEnd
;
5022 std::tie(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
) =
5023 isHsaAbiVersion3Or4(&getSTI())
5024 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin
,
5025 HSAMD::V3::AssemblerDirectiveEnd
)
5026 : std::make_tuple(HSAMD::AssemblerDirectiveBegin
,
5027 HSAMD::AssemblerDirectiveEnd
);
5029 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA
) {
5030 return Error(getLoc(),
5031 (Twine(AssemblerDirectiveBegin
) + Twine(" directive is "
5032 "not available on non-amdhsa OSes")).str());
5035 std::string HSAMetadataString
;
5036 if (ParseToEndDirective(AssemblerDirectiveBegin
, AssemblerDirectiveEnd
,
5040 if (isHsaAbiVersion3Or4(&getSTI())) {
5041 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString
))
5042 return Error(getLoc(), "invalid HSA metadata");
5044 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString
))
5045 return Error(getLoc(), "invalid HSA metadata");
5051 /// Common code to parse out a block of text (typically YAML) between start and
5053 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin
,
5054 const char *AssemblerDirectiveEnd
,
5055 std::string
&CollectString
) {
5057 raw_string_ostream
CollectStream(CollectString
);
5059 getLexer().setSkipSpace(false);
5061 bool FoundEnd
= false;
5062 while (!isToken(AsmToken::Eof
)) {
5063 while (isToken(AsmToken::Space
)) {
5064 CollectStream
<< getTokenStr();
5068 if (trySkipId(AssemblerDirectiveEnd
)) {
5073 CollectStream
<< Parser
.parseStringToEndOfStatement()
5074 << getContext().getAsmInfo()->getSeparatorString();
5076 Parser
.eatToEndOfStatement();
5079 getLexer().setSkipSpace(true);
5081 if (isToken(AsmToken::Eof
) && !FoundEnd
) {
5082 return TokError(Twine("expected directive ") +
5083 Twine(AssemblerDirectiveEnd
) + Twine(" not found"));
5086 CollectStream
.flush();
5090 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5091 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5093 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin
,
5094 AMDGPU::PALMD::AssemblerDirectiveEnd
, String
))
5097 auto PALMetadata
= getTargetStreamer().getPALMetadata();
5098 if (!PALMetadata
->setFromString(String
))
5099 return Error(getLoc(), "invalid PAL metadata");
5103 /// Parse the assembler directive for old linear-format PAL metadata.
5104 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5105 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL
) {
5106 return Error(getLoc(),
5107 (Twine(PALMD::AssemblerDirective
) + Twine(" directive is "
5108 "not available on non-amdpal OSes")).str());
5111 auto PALMetadata
= getTargetStreamer().getPALMetadata();
5112 PALMetadata
->setLegacy();
5114 uint32_t Key
, Value
;
5115 if (ParseAsAbsoluteExpression(Key
)) {
5116 return TokError(Twine("invalid value in ") +
5117 Twine(PALMD::AssemblerDirective
));
5119 if (!trySkipToken(AsmToken::Comma
)) {
5120 return TokError(Twine("expected an even number of values in ") +
5121 Twine(PALMD::AssemblerDirective
));
5123 if (ParseAsAbsoluteExpression(Value
)) {
5124 return TokError(Twine("invalid value in ") +
5125 Twine(PALMD::AssemblerDirective
));
5127 PALMetadata
->setRegister(Key
, Value
);
5128 if (!trySkipToken(AsmToken::Comma
))
5134 /// ParseDirectiveAMDGPULDS
5135 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5136 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5137 if (getParser().checkForValidSection())
5141 SMLoc NameLoc
= getLoc();
5142 if (getParser().parseIdentifier(Name
))
5143 return TokError("expected identifier in directive");
5145 MCSymbol
*Symbol
= getContext().getOrCreateSymbol(Name
);
5146 if (parseToken(AsmToken::Comma
, "expected ','"))
5149 unsigned LocalMemorySize
= AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5152 SMLoc SizeLoc
= getLoc();
5153 if (getParser().parseAbsoluteExpression(Size
))
5156 return Error(SizeLoc
, "size must be non-negative");
5157 if (Size
> LocalMemorySize
)
5158 return Error(SizeLoc
, "size is too large");
5160 int64_t Alignment
= 4;
5161 if (trySkipToken(AsmToken::Comma
)) {
5162 SMLoc AlignLoc
= getLoc();
5163 if (getParser().parseAbsoluteExpression(Alignment
))
5165 if (Alignment
< 0 || !isPowerOf2_64(Alignment
))
5166 return Error(AlignLoc
, "alignment must be a power of two");
5168 // Alignment larger than the size of LDS is possible in theory, as long
5169 // as the linker manages to place to symbol at address 0, but we do want
5170 // to make sure the alignment fits nicely into a 32-bit integer.
5171 if (Alignment
>= 1u << 31)
5172 return Error(AlignLoc
, "alignment is too large");
5175 if (parseToken(AsmToken::EndOfStatement
,
5176 "unexpected token in '.amdgpu_lds' directive"))
5179 Symbol
->redefineIfPossible();
5180 if (!Symbol
->isUndefined())
5181 return Error(NameLoc
, "invalid symbol redefinition");
5183 getTargetStreamer().emitAMDGPULDS(Symbol
, Size
, Align(Alignment
));
5187 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID
) {
5188 StringRef IDVal
= DirectiveID
.getString();
5190 if (isHsaAbiVersion3Or4(&getSTI())) {
5191 if (IDVal
== ".amdhsa_kernel")
5192 return ParseDirectiveAMDHSAKernel();
5194 // TODO: Restructure/combine with PAL metadata directive.
5195 if (IDVal
== AMDGPU::HSAMD::V3::AssemblerDirectiveBegin
)
5196 return ParseDirectiveHSAMetadata();
5198 if (IDVal
== ".hsa_code_object_version")
5199 return ParseDirectiveHSACodeObjectVersion();
5201 if (IDVal
== ".hsa_code_object_isa")
5202 return ParseDirectiveHSACodeObjectISA();
5204 if (IDVal
== ".amd_kernel_code_t")
5205 return ParseDirectiveAMDKernelCodeT();
5207 if (IDVal
== ".amdgpu_hsa_kernel")
5208 return ParseDirectiveAMDGPUHsaKernel();
5210 if (IDVal
== ".amd_amdgpu_isa")
5211 return ParseDirectiveISAVersion();
5213 if (IDVal
== AMDGPU::HSAMD::AssemblerDirectiveBegin
)
5214 return ParseDirectiveHSAMetadata();
5217 if (IDVal
== ".amdgcn_target")
5218 return ParseDirectiveAMDGCNTarget();
5220 if (IDVal
== ".amdgpu_lds")
5221 return ParseDirectiveAMDGPULDS();
5223 if (IDVal
== PALMD::AssemblerDirectiveBegin
)
5224 return ParseDirectivePALMetadataBegin();
5226 if (IDVal
== PALMD::AssemblerDirective
)
5227 return ParseDirectivePALMetadata();
5232 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo
&MRI
,
5235 for (MCRegAliasIterator
R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15
, &MRI
, true);
5238 return isGFX9Plus();
5241 // GFX10 has 2 more SGPRs 104 and 105.
5242 for (MCRegAliasIterator
R(AMDGPU::SGPR104_SGPR105
, &MRI
, true);
5245 return hasSGPR104_SGPR105();
5249 case AMDGPU::SRC_SHARED_BASE
:
5250 case AMDGPU::SRC_SHARED_LIMIT
:
5251 case AMDGPU::SRC_PRIVATE_BASE
:
5252 case AMDGPU::SRC_PRIVATE_LIMIT
:
5253 case AMDGPU::SRC_POPS_EXITING_WAVE_ID
:
5254 return isGFX9Plus();
5256 case AMDGPU::TBA_LO
:
5257 case AMDGPU::TBA_HI
:
5259 case AMDGPU::TMA_LO
:
5260 case AMDGPU::TMA_HI
:
5261 return !isGFX9Plus();
5262 case AMDGPU::XNACK_MASK
:
5263 case AMDGPU::XNACK_MASK_LO
:
5264 case AMDGPU::XNACK_MASK_HI
:
5265 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5266 case AMDGPU::SGPR_NULL
:
5267 return isGFX10Plus();
5275 if (isSI() || isGFX10Plus()) {
5276 // No flat_scr on SI.
5277 // On GFX10 flat scratch is not a valid register operand and can only be
5278 // accessed with s_setreg/s_getreg.
5280 case AMDGPU::FLAT_SCR
:
5281 case AMDGPU::FLAT_SCR_LO
:
5282 case AMDGPU::FLAT_SCR_HI
:
5289 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5291 for (MCRegAliasIterator
R(AMDGPU::SGPR102_SGPR103
, &MRI
, true);
5294 return hasSGPR102_SGPR103();
5300 OperandMatchResultTy
5301 AMDGPUAsmParser::parseOperand(OperandVector
&Operands
, StringRef Mnemonic
,
5303 // Try to parse with a custom parser
5304 OperandMatchResultTy ResTy
= MatchOperandParserImpl(Operands
, Mnemonic
);
5306 // If we successfully parsed the operand or if there as an error parsing,
5309 // If we are parsing after we reach EndOfStatement then this means we
5310 // are appending default values to the Operands list. This is only done
5311 // by custom parser, so we shouldn't continue on to the generic parsing.
5312 if (ResTy
== MatchOperand_Success
|| ResTy
== MatchOperand_ParseFail
||
5313 isToken(AsmToken::EndOfStatement
))
5317 SMLoc LBraceLoc
= getLoc();
5318 if (Mode
== OperandMode_NSA
&& trySkipToken(AsmToken::LBrac
)) {
5319 unsigned Prefix
= Operands
.size();
5322 auto Loc
= getLoc();
5323 ResTy
= parseReg(Operands
);
5324 if (ResTy
== MatchOperand_NoMatch
)
5325 Error(Loc
, "expected a register");
5326 if (ResTy
!= MatchOperand_Success
)
5327 return MatchOperand_ParseFail
;
5329 RBraceLoc
= getLoc();
5330 if (trySkipToken(AsmToken::RBrac
))
5333 if (!skipToken(AsmToken::Comma
,
5334 "expected a comma or a closing square bracket")) {
5335 return MatchOperand_ParseFail
;
5339 if (Operands
.size() - Prefix
> 1) {
5340 Operands
.insert(Operands
.begin() + Prefix
,
5341 AMDGPUOperand::CreateToken(this, "[", LBraceLoc
));
5342 Operands
.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc
));
5345 return MatchOperand_Success
;
5348 return parseRegOrImm(Operands
);
5351 StringRef
AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name
) {
5352 // Clear any forced encodings from the previous instruction.
5353 setForcedEncodingSize(0);
5354 setForcedDPP(false);
5355 setForcedSDWA(false);
5357 if (Name
.endswith("_e64")) {
5358 setForcedEncodingSize(64);
5359 return Name
.substr(0, Name
.size() - 4);
5360 } else if (Name
.endswith("_e32")) {
5361 setForcedEncodingSize(32);
5362 return Name
.substr(0, Name
.size() - 4);
5363 } else if (Name
.endswith("_dpp")) {
5365 return Name
.substr(0, Name
.size() - 4);
5366 } else if (Name
.endswith("_sdwa")) {
5367 setForcedSDWA(true);
5368 return Name
.substr(0, Name
.size() - 5);
5373 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo
&Info
,
5375 SMLoc NameLoc
, OperandVector
&Operands
) {
5376 // Add the instruction mnemonic
5377 Name
= parseMnemonicSuffix(Name
);
5378 Operands
.push_back(AMDGPUOperand::CreateToken(this, Name
, NameLoc
));
5380 bool IsMIMG
= Name
.startswith("image_");
5382 while (!trySkipToken(AsmToken::EndOfStatement
)) {
5383 OperandMode Mode
= OperandMode_Default
;
5384 if (IsMIMG
&& isGFX10Plus() && Operands
.size() == 2)
5385 Mode
= OperandMode_NSA
;
5387 OperandMatchResultTy Res
= parseOperand(Operands
, Name
, Mode
);
5389 if (Res
!= MatchOperand_Success
) {
5390 checkUnsupportedInstruction(Name
, NameLoc
);
5391 if (!Parser
.hasPendingError()) {
5392 // FIXME: use real operand location rather than the current location.
5394 (Res
== MatchOperand_ParseFail
) ? "failed parsing operand." :
5395 "not a valid operand.";
5396 Error(getLoc(), Msg
);
5398 while (!trySkipToken(AsmToken::EndOfStatement
)) {
5404 // Eat the comma or space if there is one.
5405 trySkipToken(AsmToken::Comma
);
5411 //===----------------------------------------------------------------------===//
5412 // Utility functions
5413 //===----------------------------------------------------------------------===//
5415 OperandMatchResultTy
5416 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
, int64_t &IntVal
) {
5418 if (!trySkipId(Prefix
, AsmToken::Colon
))
5419 return MatchOperand_NoMatch
;
5421 return parseExpr(IntVal
) ? MatchOperand_Success
: MatchOperand_ParseFail
;
5424 OperandMatchResultTy
5425 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix
, OperandVector
&Operands
,
5426 AMDGPUOperand::ImmTy ImmTy
,
5427 bool (*ConvertResult
)(int64_t&)) {
5431 OperandMatchResultTy Res
= parseIntWithPrefix(Prefix
, Value
);
5432 if (Res
!= MatchOperand_Success
)
5435 if (ConvertResult
&& !ConvertResult(Value
)) {
5436 Error(S
, "invalid " + StringRef(Prefix
) + " value.");
5439 Operands
.push_back(AMDGPUOperand::CreateImm(this, Value
, S
, ImmTy
));
5440 return MatchOperand_Success
;
5443 OperandMatchResultTy
5444 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix
,
5445 OperandVector
&Operands
,
5446 AMDGPUOperand::ImmTy ImmTy
,
5447 bool (*ConvertResult
)(int64_t&)) {
5449 if (!trySkipId(Prefix
, AsmToken::Colon
))
5450 return MatchOperand_NoMatch
;
5452 if (!skipToken(AsmToken::LBrac
, "expected a left square bracket"))
5453 return MatchOperand_ParseFail
;
5456 const unsigned MaxSize
= 4;
5458 // FIXME: How to verify the number of elements matches the number of src
5460 for (int I
= 0; ; ++I
) {
5462 SMLoc Loc
= getLoc();
5464 return MatchOperand_ParseFail
;
5466 if (Op
!= 0 && Op
!= 1) {
5467 Error(Loc
, "invalid " + StringRef(Prefix
) + " value.");
5468 return MatchOperand_ParseFail
;
5473 if (trySkipToken(AsmToken::RBrac
))
5476 if (I
+ 1 == MaxSize
) {
5477 Error(getLoc(), "expected a closing square bracket");
5478 return MatchOperand_ParseFail
;
5481 if (!skipToken(AsmToken::Comma
, "expected a comma"))
5482 return MatchOperand_ParseFail
;
5485 Operands
.push_back(AMDGPUOperand::CreateImm(this, Val
, S
, ImmTy
));
5486 return MatchOperand_Success
;
5489 OperandMatchResultTy
5490 AMDGPUAsmParser::parseNamedBit(StringRef Name
, OperandVector
&Operands
,
5491 AMDGPUOperand::ImmTy ImmTy
) {
5495 if (trySkipId(Name
)) {
5497 } else if (trySkipId("no", Name
)) {
5500 return MatchOperand_NoMatch
;
5503 if (Name
== "r128" && !hasMIMG_R128()) {
5504 Error(S
, "r128 modifier is not supported on this GPU");
5505 return MatchOperand_ParseFail
;
5507 if (Name
== "a16" && !isGFX9() && !hasGFX10A16()) {
5508 Error(S
, "a16 modifier is not supported on this GPU");
5509 return MatchOperand_ParseFail
;
5512 if (isGFX9() && ImmTy
== AMDGPUOperand::ImmTyA16
)
5513 ImmTy
= AMDGPUOperand::ImmTyR128A16
;
5515 Operands
.push_back(AMDGPUOperand::CreateImm(this, Bit
, S
, ImmTy
));
5516 return MatchOperand_Success
;
5519 OperandMatchResultTy
5520 AMDGPUAsmParser::parseCPol(OperandVector
&Operands
) {
5521 unsigned CPolOn
= 0;
5522 unsigned CPolOff
= 0;
5525 if (trySkipId("glc"))
5526 CPolOn
= AMDGPU::CPol::GLC
;
5527 else if (trySkipId("noglc"))
5528 CPolOff
= AMDGPU::CPol::GLC
;
5529 else if (trySkipId("slc"))
5530 CPolOn
= AMDGPU::CPol::SLC
;
5531 else if (trySkipId("noslc"))
5532 CPolOff
= AMDGPU::CPol::SLC
;
5533 else if (trySkipId("dlc"))
5534 CPolOn
= AMDGPU::CPol::DLC
;
5535 else if (trySkipId("nodlc"))
5536 CPolOff
= AMDGPU::CPol::DLC
;
5537 else if (trySkipId("scc"))
5538 CPolOn
= AMDGPU::CPol::SCC
;
5539 else if (trySkipId("noscc"))
5540 CPolOff
= AMDGPU::CPol::SCC
;
5542 return MatchOperand_NoMatch
;
5544 if (!isGFX10Plus() && ((CPolOn
| CPolOff
) & AMDGPU::CPol::DLC
)) {
5545 Error(S
, "dlc modifier is not supported on this GPU");
5546 return MatchOperand_ParseFail
;
5549 if (!isGFX90A() && ((CPolOn
| CPolOff
) & AMDGPU::CPol::SCC
)) {
5550 Error(S
, "scc modifier is not supported on this GPU");
5551 return MatchOperand_ParseFail
;
5554 if (CPolSeen
& (CPolOn
| CPolOff
)) {
5555 Error(S
, "duplicate cache policy modifier");
5556 return MatchOperand_ParseFail
;
5559 CPolSeen
|= (CPolOn
| CPolOff
);
5561 for (unsigned I
= 1; I
!= Operands
.size(); ++I
) {
5562 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
5564 Op
.setImm((Op
.getImm() | CPolOn
) & ~CPolOff
);
5565 return MatchOperand_Success
;
5569 Operands
.push_back(AMDGPUOperand::CreateImm(this, CPolOn
, S
,
5570 AMDGPUOperand::ImmTyCPol
));
5572 return MatchOperand_Success
;
5575 static void addOptionalImmOperand(
5576 MCInst
& Inst
, const OperandVector
& Operands
,
5577 AMDGPUAsmParser::OptionalImmIndexMap
& OptionalIdx
,
5578 AMDGPUOperand::ImmTy ImmT
,
5579 int64_t Default
= 0) {
5580 auto i
= OptionalIdx
.find(ImmT
);
5581 if (i
!= OptionalIdx
.end()) {
5582 unsigned Idx
= i
->second
;
5583 ((AMDGPUOperand
&)*Operands
[Idx
]).addImmOperands(Inst
, 1);
5585 Inst
.addOperand(MCOperand::createImm(Default
));
5589 OperandMatchResultTy
5590 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix
,
5593 if (!trySkipId(Prefix
, AsmToken::Colon
))
5594 return MatchOperand_NoMatch
;
5596 StringLoc
= getLoc();
5597 return parseId(Value
, "expected an identifier") ? MatchOperand_Success
5598 : MatchOperand_ParseFail
;
5601 //===----------------------------------------------------------------------===//
5603 //===----------------------------------------------------------------------===//
5605 bool AMDGPUAsmParser::tryParseFmt(const char *Pref
,
5609 SMLoc Loc
= getLoc();
5611 auto Res
= parseIntWithPrefix(Pref
, Val
);
5612 if (Res
== MatchOperand_ParseFail
)
5614 if (Res
== MatchOperand_NoMatch
)
5617 if (Val
< 0 || Val
> MaxVal
) {
5618 Error(Loc
, Twine("out of range ", StringRef(Pref
)));
5626 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5627 // values to live in a joint format operand in the MCInst encoding.
5628 OperandMatchResultTy
5629 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format
) {
5630 using namespace llvm::AMDGPU::MTBUFFormat
;
5632 int64_t Dfmt
= DFMT_UNDEF
;
5633 int64_t Nfmt
= NFMT_UNDEF
;
5635 // dfmt and nfmt can appear in either order, and each is optional.
5636 for (int I
= 0; I
< 2; ++I
) {
5637 if (Dfmt
== DFMT_UNDEF
&& !tryParseFmt("dfmt", DFMT_MAX
, Dfmt
))
5638 return MatchOperand_ParseFail
;
5640 if (Nfmt
== NFMT_UNDEF
&& !tryParseFmt("nfmt", NFMT_MAX
, Nfmt
)) {
5641 return MatchOperand_ParseFail
;
5643 // Skip optional comma between dfmt/nfmt
5644 // but guard against 2 commas following each other.
5645 if ((Dfmt
== DFMT_UNDEF
) != (Nfmt
== NFMT_UNDEF
) &&
5646 !peekToken().is(AsmToken::Comma
)) {
5647 trySkipToken(AsmToken::Comma
);
5651 if (Dfmt
== DFMT_UNDEF
&& Nfmt
== NFMT_UNDEF
)
5652 return MatchOperand_NoMatch
;
5654 Dfmt
= (Dfmt
== DFMT_UNDEF
) ? DFMT_DEFAULT
: Dfmt
;
5655 Nfmt
= (Nfmt
== NFMT_UNDEF
) ? NFMT_DEFAULT
: Nfmt
;
5657 Format
= encodeDfmtNfmt(Dfmt
, Nfmt
);
5658 return MatchOperand_Success
;
5661 OperandMatchResultTy
5662 AMDGPUAsmParser::parseUfmt(int64_t &Format
) {
5663 using namespace llvm::AMDGPU::MTBUFFormat
;
5665 int64_t Fmt
= UFMT_UNDEF
;
5667 if (!tryParseFmt("format", UFMT_MAX
, Fmt
))
5668 return MatchOperand_ParseFail
;
5670 if (Fmt
== UFMT_UNDEF
)
5671 return MatchOperand_NoMatch
;
5674 return MatchOperand_Success
;
5677 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt
,
5679 StringRef FormatStr
,
5681 using namespace llvm::AMDGPU::MTBUFFormat
;
5684 Format
= getDfmt(FormatStr
);
5685 if (Format
!= DFMT_UNDEF
) {
5690 Format
= getNfmt(FormatStr
, getSTI());
5691 if (Format
!= NFMT_UNDEF
) {
5696 Error(Loc
, "unsupported format");
5700 OperandMatchResultTy
5701 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr
,
5704 using namespace llvm::AMDGPU::MTBUFFormat
;
5706 int64_t Dfmt
= DFMT_UNDEF
;
5707 int64_t Nfmt
= NFMT_UNDEF
;
5708 if (!matchDfmtNfmt(Dfmt
, Nfmt
, FormatStr
, FormatLoc
))
5709 return MatchOperand_ParseFail
;
5711 if (trySkipToken(AsmToken::Comma
)) {
5713 SMLoc Loc
= getLoc();
5714 if (!parseId(Str
, "expected a format string") ||
5715 !matchDfmtNfmt(Dfmt
, Nfmt
, Str
, Loc
)) {
5716 return MatchOperand_ParseFail
;
5718 if (Dfmt
== DFMT_UNDEF
) {
5719 Error(Loc
, "duplicate numeric format");
5720 return MatchOperand_ParseFail
;
5721 } else if (Nfmt
== NFMT_UNDEF
) {
5722 Error(Loc
, "duplicate data format");
5723 return MatchOperand_ParseFail
;
5727 Dfmt
= (Dfmt
== DFMT_UNDEF
) ? DFMT_DEFAULT
: Dfmt
;
5728 Nfmt
= (Nfmt
== NFMT_UNDEF
) ? NFMT_DEFAULT
: Nfmt
;
5730 if (isGFX10Plus()) {
5731 auto Ufmt
= convertDfmtNfmt2Ufmt(Dfmt
, Nfmt
);
5732 if (Ufmt
== UFMT_UNDEF
) {
5733 Error(FormatLoc
, "unsupported format");
5734 return MatchOperand_ParseFail
;
5738 Format
= encodeDfmtNfmt(Dfmt
, Nfmt
);
5741 return MatchOperand_Success
;
5744 OperandMatchResultTy
5745 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr
,
5748 using namespace llvm::AMDGPU::MTBUFFormat
;
5750 auto Id
= getUnifiedFormat(FormatStr
);
5751 if (Id
== UFMT_UNDEF
)
5752 return MatchOperand_NoMatch
;
5754 if (!isGFX10Plus()) {
5755 Error(Loc
, "unified format is not supported on this GPU");
5756 return MatchOperand_ParseFail
;
5760 return MatchOperand_Success
;
5763 OperandMatchResultTy
5764 AMDGPUAsmParser::parseNumericFormat(int64_t &Format
) {
5765 using namespace llvm::AMDGPU::MTBUFFormat
;
5766 SMLoc Loc
= getLoc();
5768 if (!parseExpr(Format
))
5769 return MatchOperand_ParseFail
;
5770 if (!isValidFormatEncoding(Format
, getSTI())) {
5771 Error(Loc
, "out of range format");
5772 return MatchOperand_ParseFail
;
5775 return MatchOperand_Success
;
5778 OperandMatchResultTy
5779 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format
) {
5780 using namespace llvm::AMDGPU::MTBUFFormat
;
5782 if (!trySkipId("format", AsmToken::Colon
))
5783 return MatchOperand_NoMatch
;
5785 if (trySkipToken(AsmToken::LBrac
)) {
5786 StringRef FormatStr
;
5787 SMLoc Loc
= getLoc();
5788 if (!parseId(FormatStr
, "expected a format string"))
5789 return MatchOperand_ParseFail
;
5791 auto Res
= parseSymbolicUnifiedFormat(FormatStr
, Loc
, Format
);
5792 if (Res
== MatchOperand_NoMatch
)
5793 Res
= parseSymbolicSplitFormat(FormatStr
, Loc
, Format
);
5794 if (Res
!= MatchOperand_Success
)
5797 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
5798 return MatchOperand_ParseFail
;
5800 return MatchOperand_Success
;
5803 return parseNumericFormat(Format
);
5806 OperandMatchResultTy
5807 AMDGPUAsmParser::parseFORMAT(OperandVector
&Operands
) {
5808 using namespace llvm::AMDGPU::MTBUFFormat
;
5810 int64_t Format
= getDefaultFormatEncoding(getSTI());
5811 OperandMatchResultTy Res
;
5812 SMLoc Loc
= getLoc();
5814 // Parse legacy format syntax.
5815 Res
= isGFX10Plus() ? parseUfmt(Format
) : parseDfmtNfmt(Format
);
5816 if (Res
== MatchOperand_ParseFail
)
5819 bool FormatFound
= (Res
== MatchOperand_Success
);
5822 AMDGPUOperand::CreateImm(this, Format
, Loc
, AMDGPUOperand::ImmTyFORMAT
));
5825 trySkipToken(AsmToken::Comma
);
5827 if (isToken(AsmToken::EndOfStatement
)) {
5828 // We are expecting an soffset operand,
5829 // but let matcher handle the error.
5830 return MatchOperand_Success
;
5834 Res
= parseRegOrImm(Operands
);
5835 if (Res
!= MatchOperand_Success
)
5838 trySkipToken(AsmToken::Comma
);
5841 Res
= parseSymbolicOrNumericFormat(Format
);
5842 if (Res
== MatchOperand_ParseFail
)
5844 if (Res
== MatchOperand_Success
) {
5845 auto Size
= Operands
.size();
5846 AMDGPUOperand
&Op
= static_cast<AMDGPUOperand
&>(*Operands
[Size
- 2]);
5847 assert(Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyFORMAT
);
5850 return MatchOperand_Success
;
5853 if (isId("format") && peekToken().is(AsmToken::Colon
)) {
5854 Error(getLoc(), "duplicate format");
5855 return MatchOperand_ParseFail
;
5857 return MatchOperand_Success
;
5860 //===----------------------------------------------------------------------===//
5862 //===----------------------------------------------------------------------===//
5864 void AMDGPUAsmParser::cvtDSOffset01(MCInst
&Inst
,
5865 const OperandVector
&Operands
) {
5866 OptionalImmIndexMap OptionalIdx
;
5868 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
5869 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5871 // Add the register arguments
5873 Op
.addRegOperands(Inst
, 1);
5877 // Handle optional arguments
5878 OptionalIdx
[Op
.getImmTy()] = i
;
5881 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset0
);
5882 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset1
);
5883 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
5885 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
5888 void AMDGPUAsmParser::cvtDSImpl(MCInst
&Inst
, const OperandVector
&Operands
,
5889 bool IsGdsHardcoded
) {
5890 OptionalImmIndexMap OptionalIdx
;
5892 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
5893 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5895 // Add the register arguments
5897 Op
.addRegOperands(Inst
, 1);
5901 if (Op
.isToken() && Op
.getToken() == "gds") {
5902 IsGdsHardcoded
= true;
5906 // Handle optional arguments
5907 OptionalIdx
[Op
.getImmTy()] = i
;
5910 AMDGPUOperand::ImmTy OffsetType
=
5911 (Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10
||
5912 Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7
||
5913 Inst
.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi
) ? AMDGPUOperand::ImmTySwizzle
:
5914 AMDGPUOperand::ImmTyOffset
;
5916 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, OffsetType
);
5918 if (!IsGdsHardcoded
) {
5919 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyGDS
);
5921 Inst
.addOperand(MCOperand::createReg(AMDGPU::M0
)); // m0
5924 void AMDGPUAsmParser::cvtExp(MCInst
&Inst
, const OperandVector
&Operands
) {
5925 OptionalImmIndexMap OptionalIdx
;
5927 unsigned OperandIdx
[4];
5928 unsigned EnMask
= 0;
5931 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
5932 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
5934 // Add the register arguments
5937 OperandIdx
[SrcIdx
] = Inst
.size();
5938 Op
.addRegOperands(Inst
, 1);
5945 OperandIdx
[SrcIdx
] = Inst
.size();
5946 Inst
.addOperand(MCOperand::createReg(AMDGPU::NoRegister
));
5951 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyExpTgt
) {
5952 Op
.addImmOperands(Inst
, 1);
5956 if (Op
.isToken() && Op
.getToken() == "done")
5959 // Handle optional arguments
5960 OptionalIdx
[Op
.getImmTy()] = i
;
5963 assert(SrcIdx
== 4);
5966 if (OptionalIdx
.find(AMDGPUOperand::ImmTyExpCompr
) != OptionalIdx
.end()) {
5968 Inst
.getOperand(OperandIdx
[1]) = Inst
.getOperand(OperandIdx
[2]);
5969 Inst
.getOperand(OperandIdx
[2]).setReg(AMDGPU::NoRegister
);
5970 Inst
.getOperand(OperandIdx
[3]).setReg(AMDGPU::NoRegister
);
5973 for (auto i
= 0; i
< SrcIdx
; ++i
) {
5974 if (Inst
.getOperand(OperandIdx
[i
]).getReg() != AMDGPU::NoRegister
) {
5975 EnMask
|= Compr
? (0x3 << i
* 2) : (0x1 << i
);
5979 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpVM
);
5980 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyExpCompr
);
5982 Inst
.addOperand(MCOperand::createImm(EnMask
));
5985 //===----------------------------------------------------------------------===//
5987 //===----------------------------------------------------------------------===//
5991 const AMDGPU::IsaVersion ISA
,
5995 unsigned (*encode
)(const IsaVersion
&Version
, unsigned, unsigned),
5996 unsigned (*decode
)(const IsaVersion
&Version
, unsigned))
5998 bool Failed
= false;
6000 IntVal
= encode(ISA
, IntVal
, CntVal
);
6001 if (CntVal
!= decode(ISA
, IntVal
)) {
6003 IntVal
= encode(ISA
, IntVal
, -1);
6011 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal
) {
6013 SMLoc CntLoc
= getLoc();
6014 StringRef CntName
= getTokenStr();
6016 if (!skipToken(AsmToken::Identifier
, "expected a counter name") ||
6017 !skipToken(AsmToken::LParen
, "expected a left parenthesis"))
6021 SMLoc ValLoc
= getLoc();
6022 if (!parseExpr(CntVal
))
6025 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
6028 bool Sat
= CntName
.endswith("_sat");
6030 if (CntName
== "vmcnt" || CntName
== "vmcnt_sat") {
6031 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeVmcnt
, decodeVmcnt
);
6032 } else if (CntName
== "expcnt" || CntName
== "expcnt_sat") {
6033 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeExpcnt
, decodeExpcnt
);
6034 } else if (CntName
== "lgkmcnt" || CntName
== "lgkmcnt_sat") {
6035 Failed
= encodeCnt(ISA
, IntVal
, CntVal
, Sat
, encodeLgkmcnt
, decodeLgkmcnt
);
6037 Error(CntLoc
, "invalid counter name " + CntName
);
6042 Error(ValLoc
, "too large value for " + CntName
);
6046 if (!skipToken(AsmToken::RParen
, "expected a closing parenthesis"))
6049 if (trySkipToken(AsmToken::Amp
) || trySkipToken(AsmToken::Comma
)) {
6050 if (isToken(AsmToken::EndOfStatement
)) {
6051 Error(getLoc(), "expected a counter name");
6059 OperandMatchResultTy
6060 AMDGPUAsmParser::parseSWaitCntOps(OperandVector
&Operands
) {
6061 AMDGPU::IsaVersion ISA
= AMDGPU::getIsaVersion(getSTI().getCPU());
6062 int64_t Waitcnt
= getWaitcntBitMask(ISA
);
6065 if (isToken(AsmToken::Identifier
) && peekToken().is(AsmToken::LParen
)) {
6066 while (!isToken(AsmToken::EndOfStatement
)) {
6067 if (!parseCnt(Waitcnt
))
6068 return MatchOperand_ParseFail
;
6071 if (!parseExpr(Waitcnt
))
6072 return MatchOperand_ParseFail
;
6075 Operands
.push_back(AMDGPUOperand::CreateImm(this, Waitcnt
, S
));
6076 return MatchOperand_Success
;
6080 AMDGPUOperand::isSWaitCnt() const {
6084 //===----------------------------------------------------------------------===//
6086 //===----------------------------------------------------------------------===//
6089 AMDGPUAsmParser::parseHwregBody(OperandInfoTy
&HwReg
,
6090 OperandInfoTy
&Offset
,
6091 OperandInfoTy
&Width
) {
6092 using namespace llvm::AMDGPU::Hwreg
;
6094 // The register may be specified by name or using a numeric code
6095 HwReg
.Loc
= getLoc();
6096 if (isToken(AsmToken::Identifier
) &&
6097 (HwReg
.Id
= getHwregId(getTokenStr())) >= 0) {
6098 HwReg
.IsSymbolic
= true;
6099 lex(); // skip register name
6100 } else if (!parseExpr(HwReg
.Id
, "a register name")) {
6104 if (trySkipToken(AsmToken::RParen
))
6107 // parse optional params
6108 if (!skipToken(AsmToken::Comma
, "expected a comma or a closing parenthesis"))
6111 Offset
.Loc
= getLoc();
6112 if (!parseExpr(Offset
.Id
))
6115 if (!skipToken(AsmToken::Comma
, "expected a comma"))
6118 Width
.Loc
= getLoc();
6119 return parseExpr(Width
.Id
) &&
6120 skipToken(AsmToken::RParen
, "expected a closing parenthesis");
6124 AMDGPUAsmParser::validateHwreg(const OperandInfoTy
&HwReg
,
6125 const OperandInfoTy
&Offset
,
6126 const OperandInfoTy
&Width
) {
6128 using namespace llvm::AMDGPU::Hwreg
;
6130 if (HwReg
.IsSymbolic
&& !isValidHwreg(HwReg
.Id
, getSTI())) {
6132 "specified hardware register is not supported on this GPU");
6135 if (!isValidHwreg(HwReg
.Id
)) {
6137 "invalid code of hardware register: only 6-bit values are legal");
6140 if (!isValidHwregOffset(Offset
.Id
)) {
6141 Error(Offset
.Loc
, "invalid bit offset: only 5-bit values are legal");
6144 if (!isValidHwregWidth(Width
.Id
)) {
6146 "invalid bitfield width: only values from 1 to 32 are legal");
6152 OperandMatchResultTy
6153 AMDGPUAsmParser::parseHwreg(OperandVector
&Operands
) {
6154 using namespace llvm::AMDGPU::Hwreg
;
6157 SMLoc Loc
= getLoc();
6159 if (trySkipId("hwreg", AsmToken::LParen
)) {
6160 OperandInfoTy
HwReg(ID_UNKNOWN_
);
6161 OperandInfoTy
Offset(OFFSET_DEFAULT_
);
6162 OperandInfoTy
Width(WIDTH_DEFAULT_
);
6163 if (parseHwregBody(HwReg
, Offset
, Width
) &&
6164 validateHwreg(HwReg
, Offset
, Width
)) {
6165 ImmVal
= encodeHwreg(HwReg
.Id
, Offset
.Id
, Width
.Id
);
6167 return MatchOperand_ParseFail
;
6169 } else if (parseExpr(ImmVal
, "a hwreg macro")) {
6170 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
)) {
6171 Error(Loc
, "invalid immediate: only 16-bit values are legal");
6172 return MatchOperand_ParseFail
;
6175 return MatchOperand_ParseFail
;
6178 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTyHwreg
));
6179 return MatchOperand_Success
;
6182 bool AMDGPUOperand::isHwreg() const {
6183 return isImmTy(ImmTyHwreg
);
6186 //===----------------------------------------------------------------------===//
6188 //===----------------------------------------------------------------------===//
6191 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy
&Msg
,
6193 OperandInfoTy
&Stream
) {
6194 using namespace llvm::AMDGPU::SendMsg
;
6197 if (isToken(AsmToken::Identifier
) && (Msg
.Id
= getMsgId(getTokenStr())) >= 0) {
6198 Msg
.IsSymbolic
= true;
6199 lex(); // skip message name
6200 } else if (!parseExpr(Msg
.Id
, "a message name")) {
6204 if (trySkipToken(AsmToken::Comma
)) {
6205 Op
.IsDefined
= true;
6207 if (isToken(AsmToken::Identifier
) &&
6208 (Op
.Id
= getMsgOpId(Msg
.Id
, getTokenStr())) >= 0) {
6209 lex(); // skip operation name
6210 } else if (!parseExpr(Op
.Id
, "an operation name")) {
6214 if (trySkipToken(AsmToken::Comma
)) {
6215 Stream
.IsDefined
= true;
6216 Stream
.Loc
= getLoc();
6217 if (!parseExpr(Stream
.Id
))
6222 return skipToken(AsmToken::RParen
, "expected a closing parenthesis");
6226 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy
&Msg
,
6227 const OperandInfoTy
&Op
,
6228 const OperandInfoTy
&Stream
) {
6229 using namespace llvm::AMDGPU::SendMsg
;
6231 // Validation strictness depends on whether message is specified
6232 // in a symbolc or in a numeric form. In the latter case
6233 // only encoding possibility is checked.
6234 bool Strict
= Msg
.IsSymbolic
;
6236 if (!isValidMsgId(Msg
.Id
, getSTI(), Strict
)) {
6237 Error(Msg
.Loc
, "invalid message id");
6240 if (Strict
&& (msgRequiresOp(Msg
.Id
) != Op
.IsDefined
)) {
6242 Error(Op
.Loc
, "message does not support operations");
6244 Error(Msg
.Loc
, "missing message operation");
6248 if (!isValidMsgOp(Msg
.Id
, Op
.Id
, getSTI(), Strict
)) {
6249 Error(Op
.Loc
, "invalid operation id");
6252 if (Strict
&& !msgSupportsStream(Msg
.Id
, Op
.Id
) && Stream
.IsDefined
) {
6253 Error(Stream
.Loc
, "message operation does not support streams");
6256 if (!isValidMsgStream(Msg
.Id
, Op
.Id
, Stream
.Id
, getSTI(), Strict
)) {
6257 Error(Stream
.Loc
, "invalid message stream id");
6263 OperandMatchResultTy
6264 AMDGPUAsmParser::parseSendMsgOp(OperandVector
&Operands
) {
6265 using namespace llvm::AMDGPU::SendMsg
;
6268 SMLoc Loc
= getLoc();
6270 if (trySkipId("sendmsg", AsmToken::LParen
)) {
6271 OperandInfoTy
Msg(ID_UNKNOWN_
);
6272 OperandInfoTy
Op(OP_NONE_
);
6273 OperandInfoTy
Stream(STREAM_ID_NONE_
);
6274 if (parseSendMsgBody(Msg
, Op
, Stream
) &&
6275 validateSendMsg(Msg
, Op
, Stream
)) {
6276 ImmVal
= encodeMsg(Msg
.Id
, Op
.Id
, Stream
.Id
);
6278 return MatchOperand_ParseFail
;
6280 } else if (parseExpr(ImmVal
, "a sendmsg macro")) {
6281 if (ImmVal
< 0 || !isUInt
<16>(ImmVal
)) {
6282 Error(Loc
, "invalid immediate: only 16-bit values are legal");
6283 return MatchOperand_ParseFail
;
6286 return MatchOperand_ParseFail
;
6289 Operands
.push_back(AMDGPUOperand::CreateImm(this, ImmVal
, Loc
, AMDGPUOperand::ImmTySendMsg
));
6290 return MatchOperand_Success
;
6293 bool AMDGPUOperand::isSendMsg() const {
6294 return isImmTy(ImmTySendMsg
);
6297 //===----------------------------------------------------------------------===//
6299 //===----------------------------------------------------------------------===//
6301 OperandMatchResultTy
AMDGPUAsmParser::parseInterpSlot(OperandVector
&Operands
) {
6306 return MatchOperand_NoMatch
;
6308 int Slot
= StringSwitch
<int>(Str
)
6315 Error(S
, "invalid interpolation slot");
6316 return MatchOperand_ParseFail
;
6319 Operands
.push_back(AMDGPUOperand::CreateImm(this, Slot
, S
,
6320 AMDGPUOperand::ImmTyInterpSlot
));
6321 return MatchOperand_Success
;
6324 OperandMatchResultTy
AMDGPUAsmParser::parseInterpAttr(OperandVector
&Operands
) {
6329 return MatchOperand_NoMatch
;
6331 if (!Str
.startswith("attr")) {
6332 Error(S
, "invalid interpolation attribute");
6333 return MatchOperand_ParseFail
;
6336 StringRef Chan
= Str
.take_back(2);
6337 int AttrChan
= StringSwitch
<int>(Chan
)
6343 if (AttrChan
== -1) {
6344 Error(S
, "invalid or missing interpolation attribute channel");
6345 return MatchOperand_ParseFail
;
6348 Str
= Str
.drop_back(2).drop_front(4);
6351 if (Str
.getAsInteger(10, Attr
)) {
6352 Error(S
, "invalid or missing interpolation attribute number");
6353 return MatchOperand_ParseFail
;
6357 Error(S
, "out of bounds interpolation attribute number");
6358 return MatchOperand_ParseFail
;
6361 SMLoc SChan
= SMLoc::getFromPointer(Chan
.data());
6363 Operands
.push_back(AMDGPUOperand::CreateImm(this, Attr
, S
,
6364 AMDGPUOperand::ImmTyInterpAttr
));
6365 Operands
.push_back(AMDGPUOperand::CreateImm(this, AttrChan
, SChan
,
6366 AMDGPUOperand::ImmTyAttrChan
));
6367 return MatchOperand_Success
;
6370 //===----------------------------------------------------------------------===//
6372 //===----------------------------------------------------------------------===//
6374 OperandMatchResultTy
AMDGPUAsmParser::parseExpTgt(OperandVector
&Operands
) {
6375 using namespace llvm::AMDGPU::Exp
;
6381 return MatchOperand_NoMatch
;
6383 unsigned Id
= getTgtId(Str
);
6384 if (Id
== ET_INVALID
|| !isSupportedTgtId(Id
, getSTI())) {
6385 Error(S
, (Id
== ET_INVALID
) ?
6386 "invalid exp target" :
6387 "exp target is not supported on this GPU");
6388 return MatchOperand_ParseFail
;
6391 Operands
.push_back(AMDGPUOperand::CreateImm(this, Id
, S
,
6392 AMDGPUOperand::ImmTyExpTgt
));
6393 return MatchOperand_Success
;
6396 //===----------------------------------------------------------------------===//
6398 //===----------------------------------------------------------------------===//
6401 AMDGPUAsmParser::isId(const AsmToken
&Token
, const StringRef Id
) const {
6402 return Token
.is(AsmToken::Identifier
) && Token
.getString() == Id
;
6406 AMDGPUAsmParser::isId(const StringRef Id
) const {
6407 return isId(getToken(), Id
);
6411 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind
) const {
6412 return getTokenKind() == Kind
;
6416 AMDGPUAsmParser::trySkipId(const StringRef Id
) {
6425 AMDGPUAsmParser::trySkipId(const StringRef Pref
, const StringRef Id
) {
6426 if (isToken(AsmToken::Identifier
)) {
6427 StringRef Tok
= getTokenStr();
6428 if (Tok
.startswith(Pref
) && Tok
.drop_front(Pref
.size()) == Id
) {
6437 AMDGPUAsmParser::trySkipId(const StringRef Id
, const AsmToken::TokenKind Kind
) {
6438 if (isId(Id
) && peekToken().is(Kind
)) {
6447 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind
) {
6448 if (isToken(Kind
)) {
6456 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind
,
6457 const StringRef ErrMsg
) {
6458 if (!trySkipToken(Kind
)) {
6459 Error(getLoc(), ErrMsg
);
6466 AMDGPUAsmParser::parseExpr(int64_t &Imm
, StringRef Expected
) {
6470 if (Parser
.parseExpression(Expr
))
6473 if (Expr
->evaluateAsAbsolute(Imm
))
6476 if (Expected
.empty()) {
6477 Error(S
, "expected absolute expression");
6479 Error(S
, Twine("expected ", Expected
) +
6480 Twine(" or an absolute expression"));
6486 AMDGPUAsmParser::parseExpr(OperandVector
&Operands
) {
6490 if (Parser
.parseExpression(Expr
))
6494 if (Expr
->evaluateAsAbsolute(IntVal
)) {
6495 Operands
.push_back(AMDGPUOperand::CreateImm(this, IntVal
, S
));
6497 Operands
.push_back(AMDGPUOperand::CreateExpr(this, Expr
, S
));
6503 AMDGPUAsmParser::parseString(StringRef
&Val
, const StringRef ErrMsg
) {
6504 if (isToken(AsmToken::String
)) {
6505 Val
= getToken().getStringContents();
6509 Error(getLoc(), ErrMsg
);
6515 AMDGPUAsmParser::parseId(StringRef
&Val
, const StringRef ErrMsg
) {
6516 if (isToken(AsmToken::Identifier
)) {
6517 Val
= getTokenStr();
6521 if (!ErrMsg
.empty())
6522 Error(getLoc(), ErrMsg
);
6528 AMDGPUAsmParser::getToken() const {
6529 return Parser
.getTok();
6533 AMDGPUAsmParser::peekToken() {
6534 return isToken(AsmToken::EndOfStatement
) ? getToken() : getLexer().peekTok();
6538 AMDGPUAsmParser::peekTokens(MutableArrayRef
<AsmToken
> Tokens
) {
6539 auto TokCount
= getLexer().peekTokens(Tokens
);
6541 for (auto Idx
= TokCount
; Idx
< Tokens
.size(); ++Idx
)
6542 Tokens
[Idx
] = AsmToken(AsmToken::Error
, "");
6546 AMDGPUAsmParser::getTokenKind() const {
6547 return getLexer().getKind();
6551 AMDGPUAsmParser::getLoc() const {
6552 return getToken().getLoc();
6556 AMDGPUAsmParser::getTokenStr() const {
6557 return getToken().getString();
6561 AMDGPUAsmParser::lex() {
6566 AMDGPUAsmParser::getOperandLoc(std::function
<bool(const AMDGPUOperand
&)> Test
,
6567 const OperandVector
&Operands
) const {
6568 for (unsigned i
= Operands
.size() - 1; i
> 0; --i
) {
6569 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
6571 return Op
.getStartLoc();
6573 return ((AMDGPUOperand
&)*Operands
[0]).getStartLoc();
6577 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type
,
6578 const OperandVector
&Operands
) const {
6579 auto Test
= [=](const AMDGPUOperand
& Op
) { return Op
.isImmTy(Type
); };
6580 return getOperandLoc(Test
, Operands
);
6584 AMDGPUAsmParser::getRegLoc(unsigned Reg
,
6585 const OperandVector
&Operands
) const {
6586 auto Test
= [=](const AMDGPUOperand
& Op
) {
6587 return Op
.isRegKind() && Op
.getReg() == Reg
;
6589 return getOperandLoc(Test
, Operands
);
6593 AMDGPUAsmParser::getLitLoc(const OperandVector
&Operands
) const {
6594 auto Test
= [](const AMDGPUOperand
& Op
) {
6595 return Op
.IsImmKindLiteral() || Op
.isExpr();
6597 return getOperandLoc(Test
, Operands
);
6601 AMDGPUAsmParser::getConstLoc(const OperandVector
&Operands
) const {
6602 auto Test
= [](const AMDGPUOperand
& Op
) {
6603 return Op
.isImmKindConst();
6605 return getOperandLoc(Test
, Operands
);
6608 //===----------------------------------------------------------------------===//
6610 //===----------------------------------------------------------------------===//
6614 encodeBitmaskPerm(const unsigned AndMask
,
6615 const unsigned OrMask
,
6616 const unsigned XorMask
) {
6617 using namespace llvm::AMDGPU::Swizzle
;
6619 return BITMASK_PERM_ENC
|
6620 (AndMask
<< BITMASK_AND_SHIFT
) |
6621 (OrMask
<< BITMASK_OR_SHIFT
) |
6622 (XorMask
<< BITMASK_XOR_SHIFT
);
6626 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op
,
6627 const unsigned MinVal
,
6628 const unsigned MaxVal
,
6629 const StringRef ErrMsg
,
6631 if (!skipToken(AsmToken::Comma
, "expected a comma")) {
6635 if (!parseExpr(Op
)) {
6638 if (Op
< MinVal
|| Op
> MaxVal
) {
6647 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum
, int64_t* Op
,
6648 const unsigned MinVal
,
6649 const unsigned MaxVal
,
6650 const StringRef ErrMsg
) {
6652 for (unsigned i
= 0; i
< OpNum
; ++i
) {
6653 if (!parseSwizzleOperand(Op
[i
], MinVal
, MaxVal
, ErrMsg
, Loc
))
6661 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm
) {
6662 using namespace llvm::AMDGPU::Swizzle
;
6664 int64_t Lane
[LANE_NUM
];
6665 if (parseSwizzleOperands(LANE_NUM
, Lane
, 0, LANE_MAX
,
6666 "expected a 2-bit lane id")) {
6667 Imm
= QUAD_PERM_ENC
;
6668 for (unsigned I
= 0; I
< LANE_NUM
; ++I
) {
6669 Imm
|= Lane
[I
] << (LANE_SHIFT
* I
);
6677 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm
) {
6678 using namespace llvm::AMDGPU::Swizzle
;
6684 if (!parseSwizzleOperand(GroupSize
,
6686 "group size must be in the interval [2,32]",
6690 if (!isPowerOf2_64(GroupSize
)) {
6691 Error(Loc
, "group size must be a power of two");
6694 if (parseSwizzleOperand(LaneIdx
,
6696 "lane id must be in the interval [0,group size - 1]",
6698 Imm
= encodeBitmaskPerm(BITMASK_MAX
- GroupSize
+ 1, LaneIdx
, 0);
6705 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm
) {
6706 using namespace llvm::AMDGPU::Swizzle
;
6711 if (!parseSwizzleOperand(GroupSize
,
6713 "group size must be in the interval [2,32]",
6717 if (!isPowerOf2_64(GroupSize
)) {
6718 Error(Loc
, "group size must be a power of two");
6722 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
- 1);
6727 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm
) {
6728 using namespace llvm::AMDGPU::Swizzle
;
6733 if (!parseSwizzleOperand(GroupSize
,
6735 "group size must be in the interval [1,16]",
6739 if (!isPowerOf2_64(GroupSize
)) {
6740 Error(Loc
, "group size must be a power of two");
6744 Imm
= encodeBitmaskPerm(BITMASK_MAX
, 0, GroupSize
);
6749 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm
) {
6750 using namespace llvm::AMDGPU::Swizzle
;
6752 if (!skipToken(AsmToken::Comma
, "expected a comma")) {
6757 SMLoc StrLoc
= getLoc();
6758 if (!parseString(Ctl
)) {
6761 if (Ctl
.size() != BITMASK_WIDTH
) {
6762 Error(StrLoc
, "expected a 5-character mask");
6766 unsigned AndMask
= 0;
6767 unsigned OrMask
= 0;
6768 unsigned XorMask
= 0;
6770 for (size_t i
= 0; i
< Ctl
.size(); ++i
) {
6771 unsigned Mask
= 1 << (BITMASK_WIDTH
- 1 - i
);
6774 Error(StrLoc
, "invalid mask");
6791 Imm
= encodeBitmaskPerm(AndMask
, OrMask
, XorMask
);
6796 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm
) {
6798 SMLoc OffsetLoc
= getLoc();
6800 if (!parseExpr(Imm
, "a swizzle macro")) {
6803 if (!isUInt
<16>(Imm
)) {
6804 Error(OffsetLoc
, "expected a 16-bit offset");
6811 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm
) {
6812 using namespace llvm::AMDGPU::Swizzle
;
6814 if (skipToken(AsmToken::LParen
, "expected a left parentheses")) {
6816 SMLoc ModeLoc
= getLoc();
6819 if (trySkipId(IdSymbolic
[ID_QUAD_PERM
])) {
6820 Ok
= parseSwizzleQuadPerm(Imm
);
6821 } else if (trySkipId(IdSymbolic
[ID_BITMASK_PERM
])) {
6822 Ok
= parseSwizzleBitmaskPerm(Imm
);
6823 } else if (trySkipId(IdSymbolic
[ID_BROADCAST
])) {
6824 Ok
= parseSwizzleBroadcast(Imm
);
6825 } else if (trySkipId(IdSymbolic
[ID_SWAP
])) {
6826 Ok
= parseSwizzleSwap(Imm
);
6827 } else if (trySkipId(IdSymbolic
[ID_REVERSE
])) {
6828 Ok
= parseSwizzleReverse(Imm
);
6830 Error(ModeLoc
, "expected a swizzle mode");
6833 return Ok
&& skipToken(AsmToken::RParen
, "expected a closing parentheses");
6839 OperandMatchResultTy
6840 AMDGPUAsmParser::parseSwizzleOp(OperandVector
&Operands
) {
6844 if (trySkipId("offset")) {
6847 if (skipToken(AsmToken::Colon
, "expected a colon")) {
6848 if (trySkipId("swizzle")) {
6849 Ok
= parseSwizzleMacro(Imm
);
6851 Ok
= parseSwizzleOffset(Imm
);
6855 Operands
.push_back(AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTySwizzle
));
6857 return Ok
? MatchOperand_Success
: MatchOperand_ParseFail
;
6859 // Swizzle "offset" operand is optional.
6860 // If it is omitted, try parsing other optional operands.
6861 return parseOptionalOpr(Operands
);
6866 AMDGPUOperand::isSwizzle() const {
6867 return isImmTy(ImmTySwizzle
);
6870 //===----------------------------------------------------------------------===//
6872 //===----------------------------------------------------------------------===//
6874 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6876 using namespace llvm::AMDGPU::VGPRIndexMode
;
6878 if (trySkipToken(AsmToken::RParen
)) {
6888 for (unsigned ModeId
= ID_MIN
; ModeId
<= ID_MAX
; ++ModeId
) {
6889 if (trySkipId(IdSymbolic
[ModeId
])) {
6896 Error(S
, (Imm
== 0)?
6897 "expected a VGPR index mode or a closing parenthesis" :
6898 "expected a VGPR index mode");
6903 Error(S
, "duplicate VGPR index mode");
6908 if (trySkipToken(AsmToken::RParen
))
6910 if (!skipToken(AsmToken::Comma
,
6911 "expected a comma or a closing parenthesis"))
6918 OperandMatchResultTy
6919 AMDGPUAsmParser::parseGPRIdxMode(OperandVector
&Operands
) {
6921 using namespace llvm::AMDGPU::VGPRIndexMode
;
6926 if (trySkipId("gpr_idx", AsmToken::LParen
)) {
6927 Imm
= parseGPRIdxMacro();
6929 return MatchOperand_ParseFail
;
6931 if (getParser().parseAbsoluteExpression(Imm
))
6932 return MatchOperand_ParseFail
;
6933 if (Imm
< 0 || !isUInt
<4>(Imm
)) {
6934 Error(S
, "invalid immediate: only 4-bit values are legal");
6935 return MatchOperand_ParseFail
;
6940 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyGprIdxMode
));
6941 return MatchOperand_Success
;
6944 bool AMDGPUOperand::isGPRIdxMode() const {
6945 return isImmTy(ImmTyGprIdxMode
);
6948 //===----------------------------------------------------------------------===//
6949 // sopp branch targets
6950 //===----------------------------------------------------------------------===//
6952 OperandMatchResultTy
6953 AMDGPUAsmParser::parseSOppBrTarget(OperandVector
&Operands
) {
6955 // Make sure we are not parsing something
6956 // that looks like a label or an expression but is not.
6957 // This will improve error messages.
6958 if (isRegister() || isModifier())
6959 return MatchOperand_NoMatch
;
6961 if (!parseExpr(Operands
))
6962 return MatchOperand_ParseFail
;
6964 AMDGPUOperand
&Opr
= ((AMDGPUOperand
&)*Operands
[Operands
.size() - 1]);
6965 assert(Opr
.isImm() || Opr
.isExpr());
6966 SMLoc Loc
= Opr
.getStartLoc();
6968 // Currently we do not support arbitrary expressions as branch targets.
6969 // Only labels and absolute expressions are accepted.
6970 if (Opr
.isExpr() && !Opr
.isSymbolRefExpr()) {
6971 Error(Loc
, "expected an absolute expression or a label");
6972 } else if (Opr
.isImm() && !Opr
.isS16Imm()) {
6973 Error(Loc
, "expected a 16-bit signed jump offset");
6976 return MatchOperand_Success
;
6979 //===----------------------------------------------------------------------===//
6980 // Boolean holding registers
6981 //===----------------------------------------------------------------------===//
6983 OperandMatchResultTy
6984 AMDGPUAsmParser::parseBoolReg(OperandVector
&Operands
) {
6985 return parseReg(Operands
);
6988 //===----------------------------------------------------------------------===//
6990 //===----------------------------------------------------------------------===//
6992 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultCPol() const {
6993 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol
);
6996 void AMDGPUAsmParser::cvtMubufImpl(MCInst
&Inst
,
6997 const OperandVector
&Operands
,
7000 bool IsLdsOpcode
= IsLds
;
7001 bool HasLdsModifier
= false;
7002 OptionalImmIndexMap OptionalIdx
;
7003 unsigned FirstOperandIdx
= 1;
7004 bool IsAtomicReturn
= false;
7007 for (unsigned i
= FirstOperandIdx
, e
= Operands
.size(); i
!= e
; ++i
) {
7008 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7011 IsAtomicReturn
= Op
.getImm() & AMDGPU::CPol::GLC
;
7015 if (!IsAtomicReturn
) {
7016 int NewOpc
= AMDGPU::getAtomicNoRetOp(Inst
.getOpcode());
7018 Inst
.setOpcode(NewOpc
);
7021 IsAtomicReturn
= MII
.get(Inst
.getOpcode()).TSFlags
&
7022 SIInstrFlags::IsAtomicRet
;
7025 for (unsigned i
= FirstOperandIdx
, e
= Operands
.size(); i
!= e
; ++i
) {
7026 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7028 // Add the register arguments
7030 Op
.addRegOperands(Inst
, 1);
7031 // Insert a tied src for atomic return dst.
7032 // This cannot be postponed as subsequent calls to
7033 // addImmOperands rely on correct number of MC operands.
7034 if (IsAtomicReturn
&& i
== FirstOperandIdx
)
7035 Op
.addRegOperands(Inst
, 1);
7039 // Handle the case where soffset is an immediate
7040 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
7041 Op
.addImmOperands(Inst
, 1);
7045 HasLdsModifier
|= Op
.isLDS();
7047 // Handle tokens like 'offen' which are sometimes hard-coded into the
7048 // asm string. There are no MCInst operands for these.
7054 // Handle optional arguments
7055 OptionalIdx
[Op
.getImmTy()] = i
;
7058 // This is a workaround for an llvm quirk which may result in an
7059 // incorrect instruction selection. Lds and non-lds versions of
7060 // MUBUF instructions are identical except that lds versions
7061 // have mandatory 'lds' modifier. However this modifier follows
7062 // optional modifiers and llvm asm matcher regards this 'lds'
7063 // modifier as an optional one. As a result, an lds version
7064 // of opcode may be selected even if it has no 'lds' modifier.
7065 if (IsLdsOpcode
&& !HasLdsModifier
) {
7066 int NoLdsOpcode
= AMDGPU::getMUBUFNoLdsInst(Inst
.getOpcode());
7067 if (NoLdsOpcode
!= -1) { // Got lds version - correct it.
7068 Inst
.setOpcode(NoLdsOpcode
);
7069 IsLdsOpcode
= false;
7073 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset
);
7074 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyCPol
, 0);
7076 if (!IsLdsOpcode
) { // tfe is not legal with lds opcodes
7077 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
7079 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySWZ
);
7082 void AMDGPUAsmParser::cvtMtbuf(MCInst
&Inst
, const OperandVector
&Operands
) {
7083 OptionalImmIndexMap OptionalIdx
;
7085 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
7086 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7088 // Add the register arguments
7090 Op
.addRegOperands(Inst
, 1);
7094 // Handle the case where soffset is an immediate
7095 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
7096 Op
.addImmOperands(Inst
, 1);
7100 // Handle tokens like 'offen' which are sometimes hard-coded into the
7101 // asm string. There are no MCInst operands for these.
7107 // Handle optional arguments
7108 OptionalIdx
[Op
.getImmTy()] = i
;
7111 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
,
7112 AMDGPUOperand::ImmTyOffset
);
7113 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyFORMAT
);
7114 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyCPol
, 0);
7115 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
7116 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySWZ
);
7119 //===----------------------------------------------------------------------===//
7121 //===----------------------------------------------------------------------===//
7123 void AMDGPUAsmParser::cvtMIMG(MCInst
&Inst
, const OperandVector
&Operands
,
7126 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7127 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7128 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
7132 // Add src, same as dst
7133 assert(Desc
.getNumDefs() == 1);
7134 ((AMDGPUOperand
&)*Operands
[I
- 1]).addRegOperands(Inst
, 1);
7137 OptionalImmIndexMap OptionalIdx
;
7139 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7140 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7142 // Add the register arguments
7144 Op
.addRegOperands(Inst
, 1);
7145 } else if (Op
.isImmModifier()) {
7146 OptionalIdx
[Op
.getImmTy()] = I
;
7147 } else if (!Op
.isToken()) {
7148 llvm_unreachable("unexpected operand type");
7152 bool IsGFX10Plus
= isGFX10Plus();
7154 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDMask
);
7156 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDim
, -1);
7157 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyUNorm
);
7158 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyCPol
);
7159 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyR128A16
);
7161 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyA16
);
7162 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::tfe
) != -1)
7163 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyTFE
);
7164 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyLWE
);
7166 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDA
);
7167 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyD16
);
7170 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst
&Inst
, const OperandVector
&Operands
) {
7171 cvtMIMG(Inst
, Operands
, true);
7174 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst
&Inst
, const OperandVector
&Operands
) {
7175 OptionalImmIndexMap OptionalIdx
;
7176 bool IsAtomicReturn
= false;
7178 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
7179 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7182 IsAtomicReturn
= Op
.getImm() & AMDGPU::CPol::GLC
;
7186 if (!IsAtomicReturn
) {
7187 int NewOpc
= AMDGPU::getAtomicNoRetOp(Inst
.getOpcode());
7189 Inst
.setOpcode(NewOpc
);
7192 IsAtomicReturn
= MII
.get(Inst
.getOpcode()).TSFlags
&
7193 SIInstrFlags::IsAtomicRet
;
7195 for (unsigned i
= 1, e
= Operands
.size(); i
!= e
; ++i
) {
7196 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[i
]);
7198 // Add the register arguments
7200 Op
.addRegOperands(Inst
, 1);
7201 if (IsAtomicReturn
&& i
== 1)
7202 Op
.addRegOperands(Inst
, 1);
7206 // Handle the case where soffset is an immediate
7207 if (Op
.isImm() && Op
.getImmTy() == AMDGPUOperand::ImmTyNone
) {
7208 Op
.addImmOperands(Inst
, 1);
7212 // Handle tokens like 'offen' which are sometimes hard-coded into the
7213 // asm string. There are no MCInst operands for these.
7219 // Handle optional arguments
7220 OptionalIdx
[Op
.getImmTy()] = i
;
7223 if ((int)Inst
.getNumOperands() <=
7224 AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::offset
))
7225 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOffset
);
7226 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyCPol
, 0);
7229 void AMDGPUAsmParser::cvtIntersectRay(MCInst
&Inst
,
7230 const OperandVector
&Operands
) {
7231 for (unsigned I
= 1; I
< Operands
.size(); ++I
) {
7232 auto &Operand
= (AMDGPUOperand
&)*Operands
[I
];
7233 if (Operand
.isReg())
7234 Operand
.addRegOperands(Inst
, 1);
7237 Inst
.addOperand(MCOperand::createImm(1)); // a16
7240 //===----------------------------------------------------------------------===//
7242 //===----------------------------------------------------------------------===//
7244 bool AMDGPUOperand::isSMRDOffset8() const {
7245 return isImm() && isUInt
<8>(getImm());
7248 bool AMDGPUOperand::isSMEMOffset() const {
7249 return isImm(); // Offset range is checked later by validator.
7252 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7253 // 32-bit literals are only supported on CI and we only want to use them
7254 // when the offset is > 8-bits.
7255 return isImm() && !isUInt
<8>(getImm()) && isUInt
<32>(getImm());
7258 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDOffset8() const {
7259 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
7262 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMEMOffset() const {
7263 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
7266 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7267 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
7270 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultFlatOffset() const {
7271 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset
);
7274 //===----------------------------------------------------------------------===//
7276 //===----------------------------------------------------------------------===//
7278 static bool ConvertOmodMul(int64_t &Mul
) {
7279 if (Mul
!= 1 && Mul
!= 2 && Mul
!= 4)
7286 static bool ConvertOmodDiv(int64_t &Div
) {
7300 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7301 // This is intentional and ensures compatibility with sp3.
7302 // See bug 35397 for details.
7303 static bool ConvertBoundCtrl(int64_t &BoundCtrl
) {
7304 if (BoundCtrl
== 0 || BoundCtrl
== 1) {
7311 // Note: the order in this table matches the order of operands in AsmString.
7312 static const OptionalOperand AMDGPUOptionalOperandTable
[] = {
7313 {"offen", AMDGPUOperand::ImmTyOffen
, true, nullptr},
7314 {"idxen", AMDGPUOperand::ImmTyIdxen
, true, nullptr},
7315 {"addr64", AMDGPUOperand::ImmTyAddr64
, true, nullptr},
7316 {"offset0", AMDGPUOperand::ImmTyOffset0
, false, nullptr},
7317 {"offset1", AMDGPUOperand::ImmTyOffset1
, false, nullptr},
7318 {"gds", AMDGPUOperand::ImmTyGDS
, true, nullptr},
7319 {"lds", AMDGPUOperand::ImmTyLDS
, true, nullptr},
7320 {"offset", AMDGPUOperand::ImmTyOffset
, false, nullptr},
7321 {"inst_offset", AMDGPUOperand::ImmTyInstOffset
, false, nullptr},
7322 {"", AMDGPUOperand::ImmTyCPol
, false, nullptr},
7323 {"swz", AMDGPUOperand::ImmTySWZ
, true, nullptr},
7324 {"tfe", AMDGPUOperand::ImmTyTFE
, true, nullptr},
7325 {"d16", AMDGPUOperand::ImmTyD16
, true, nullptr},
7326 {"high", AMDGPUOperand::ImmTyHigh
, true, nullptr},
7327 {"clamp", AMDGPUOperand::ImmTyClampSI
, true, nullptr},
7328 {"omod", AMDGPUOperand::ImmTyOModSI
, false, ConvertOmodMul
},
7329 {"unorm", AMDGPUOperand::ImmTyUNorm
, true, nullptr},
7330 {"da", AMDGPUOperand::ImmTyDA
, true, nullptr},
7331 {"r128", AMDGPUOperand::ImmTyR128A16
, true, nullptr},
7332 {"a16", AMDGPUOperand::ImmTyA16
, true, nullptr},
7333 {"lwe", AMDGPUOperand::ImmTyLWE
, true, nullptr},
7334 {"d16", AMDGPUOperand::ImmTyD16
, true, nullptr},
7335 {"dmask", AMDGPUOperand::ImmTyDMask
, false, nullptr},
7336 {"dim", AMDGPUOperand::ImmTyDim
, false, nullptr},
7337 {"row_mask", AMDGPUOperand::ImmTyDppRowMask
, false, nullptr},
7338 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask
, false, nullptr},
7339 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl
, false, ConvertBoundCtrl
},
7340 {"fi", AMDGPUOperand::ImmTyDppFi
, false, nullptr},
7341 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel
, false, nullptr},
7342 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel
, false, nullptr},
7343 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel
, false, nullptr},
7344 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused
, false, nullptr},
7345 {"compr", AMDGPUOperand::ImmTyExpCompr
, true, nullptr },
7346 {"vm", AMDGPUOperand::ImmTyExpVM
, true, nullptr},
7347 {"op_sel", AMDGPUOperand::ImmTyOpSel
, false, nullptr},
7348 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi
, false, nullptr},
7349 {"neg_lo", AMDGPUOperand::ImmTyNegLo
, false, nullptr},
7350 {"neg_hi", AMDGPUOperand::ImmTyNegHi
, false, nullptr},
7351 {"blgp", AMDGPUOperand::ImmTyBLGP
, false, nullptr},
7352 {"cbsz", AMDGPUOperand::ImmTyCBSZ
, false, nullptr},
7353 {"abid", AMDGPUOperand::ImmTyABID
, false, nullptr}
7356 void AMDGPUAsmParser::onBeginOfFile() {
7357 if (!getParser().getStreamer().getTargetStreamer() ||
7358 getSTI().getTargetTriple().getArch() == Triple::r600
)
7361 if (!getTargetStreamer().getTargetID())
7362 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7364 if (isHsaAbiVersion3Or4(&getSTI()))
7365 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7368 OperandMatchResultTy
AMDGPUAsmParser::parseOptionalOperand(OperandVector
&Operands
) {
7370 OperandMatchResultTy res
= parseOptionalOpr(Operands
);
7372 // This is a hack to enable hardcoded mandatory operands which follow
7373 // optional operands.
7375 // Current design assumes that all operands after the first optional operand
7376 // are also optional. However implementation of some instructions violates
7377 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7379 // To alleviate this problem, we have to (implicitly) parse extra operands
7380 // to make sure autogenerated parser of custom operands never hit hardcoded
7381 // mandatory operands.
7383 for (unsigned i
= 0; i
< MAX_OPR_LOOKAHEAD
; ++i
) {
7384 if (res
!= MatchOperand_Success
||
7385 isToken(AsmToken::EndOfStatement
))
7388 trySkipToken(AsmToken::Comma
);
7389 res
= parseOptionalOpr(Operands
);
7395 OperandMatchResultTy
AMDGPUAsmParser::parseOptionalOpr(OperandVector
&Operands
) {
7396 OperandMatchResultTy res
;
7397 for (const OptionalOperand
&Op
: AMDGPUOptionalOperandTable
) {
7398 // try to parse any optional operand here
7400 res
= parseNamedBit(Op
.Name
, Operands
, Op
.Type
);
7401 } else if (Op
.Type
== AMDGPUOperand::ImmTyOModSI
) {
7402 res
= parseOModOperand(Operands
);
7403 } else if (Op
.Type
== AMDGPUOperand::ImmTySdwaDstSel
||
7404 Op
.Type
== AMDGPUOperand::ImmTySdwaSrc0Sel
||
7405 Op
.Type
== AMDGPUOperand::ImmTySdwaSrc1Sel
) {
7406 res
= parseSDWASel(Operands
, Op
.Name
, Op
.Type
);
7407 } else if (Op
.Type
== AMDGPUOperand::ImmTySdwaDstUnused
) {
7408 res
= parseSDWADstUnused(Operands
);
7409 } else if (Op
.Type
== AMDGPUOperand::ImmTyOpSel
||
7410 Op
.Type
== AMDGPUOperand::ImmTyOpSelHi
||
7411 Op
.Type
== AMDGPUOperand::ImmTyNegLo
||
7412 Op
.Type
== AMDGPUOperand::ImmTyNegHi
) {
7413 res
= parseOperandArrayWithPrefix(Op
.Name
, Operands
, Op
.Type
,
7415 } else if (Op
.Type
== AMDGPUOperand::ImmTyDim
) {
7416 res
= parseDim(Operands
);
7417 } else if (Op
.Type
== AMDGPUOperand::ImmTyCPol
) {
7418 res
= parseCPol(Operands
);
7420 res
= parseIntWithPrefix(Op
.Name
, Operands
, Op
.Type
, Op
.ConvertResult
);
7422 if (res
!= MatchOperand_NoMatch
) {
7426 return MatchOperand_NoMatch
;
7429 OperandMatchResultTy
AMDGPUAsmParser::parseOModOperand(OperandVector
&Operands
) {
7430 StringRef Name
= getTokenStr();
7431 if (Name
== "mul") {
7432 return parseIntWithPrefix("mul", Operands
,
7433 AMDGPUOperand::ImmTyOModSI
, ConvertOmodMul
);
7436 if (Name
== "div") {
7437 return parseIntWithPrefix("div", Operands
,
7438 AMDGPUOperand::ImmTyOModSI
, ConvertOmodDiv
);
7441 return MatchOperand_NoMatch
;
7444 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst
&Inst
, const OperandVector
&Operands
) {
7445 cvtVOP3P(Inst
, Operands
);
7447 int Opc
= Inst
.getOpcode();
7450 const int Ops
[] = { AMDGPU::OpName::src0
,
7451 AMDGPU::OpName::src1
,
7452 AMDGPU::OpName::src2
};
7454 SrcNum
< 3 && AMDGPU::getNamedOperandIdx(Opc
, Ops
[SrcNum
]) != -1;
7458 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
7459 unsigned OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
7461 if ((OpSel
& (1 << SrcNum
)) != 0) {
7462 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
);
7463 uint32_t ModVal
= Inst
.getOperand(ModIdx
).getImm();
7464 Inst
.getOperand(ModIdx
).setImm(ModVal
| SISrcMods::DST_OP_SEL
);
7468 static bool isRegOrImmWithInputMods(const MCInstrDesc
&Desc
, unsigned OpNum
) {
7469 // 1. This operand is input modifiers
7470 return Desc
.OpInfo
[OpNum
].OperandType
== AMDGPU::OPERAND_INPUT_MODS
7471 // 2. This is not last operand
7472 && Desc
.NumOperands
> (OpNum
+ 1)
7473 // 3. Next operand is register class
7474 && Desc
.OpInfo
[OpNum
+ 1].RegClass
!= -1
7475 // 4. Next register is not tied to any other operand
7476 && Desc
.getOperandConstraint(OpNum
+ 1, MCOI::OperandConstraint::TIED_TO
) == -1;
7479 void AMDGPUAsmParser::cvtVOP3Interp(MCInst
&Inst
, const OperandVector
&Operands
)
7481 OptionalImmIndexMap OptionalIdx
;
7482 unsigned Opc
= Inst
.getOpcode();
7485 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7486 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7487 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
7490 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7491 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7492 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
7493 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
7494 } else if (Op
.isInterpSlot() ||
7495 Op
.isInterpAttr() ||
7497 Inst
.addOperand(MCOperand::createImm(Op
.getImm()));
7498 } else if (Op
.isImmModifier()) {
7499 OptionalIdx
[Op
.getImmTy()] = I
;
7501 llvm_unreachable("unhandled operand type");
7505 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::high
) != -1) {
7506 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyHigh
);
7509 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
) != -1) {
7510 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
7513 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::omod
) != -1) {
7514 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
7518 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
,
7519 OptionalImmIndexMap
&OptionalIdx
) {
7520 unsigned Opc
= Inst
.getOpcode();
7523 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7524 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7525 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
7528 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
) != -1) {
7529 // This instruction has src modifiers
7530 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7531 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7532 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
7533 Op
.addRegOrImmWithFPInputModsOperands(Inst
, 2);
7534 } else if (Op
.isImmModifier()) {
7535 OptionalIdx
[Op
.getImmTy()] = I
;
7536 } else if (Op
.isRegOrImm()) {
7537 Op
.addRegOrImmOperands(Inst
, 1);
7539 llvm_unreachable("unhandled operand type");
7544 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
7545 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
7547 OptionalIdx
[Op
.getImmTy()] = I
;
7549 Op
.addRegOrImmOperands(Inst
, 1);
7554 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::clamp
) != -1) {
7555 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
);
7558 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::omod
) != -1) {
7559 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
);
7562 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7563 // it has src2 register operand that is tied to dst operand
7564 // we don't allow modifiers for this operand in assembler so src2_modifiers
7566 if (Opc
== AMDGPU::V_MAC_F32_e64_gfx6_gfx7
||
7567 Opc
== AMDGPU::V_MAC_F32_e64_gfx10
||
7568 Opc
== AMDGPU::V_MAC_F32_e64_vi
||
7569 Opc
== AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7
||
7570 Opc
== AMDGPU::V_MAC_LEGACY_F32_e64_gfx10
||
7571 Opc
== AMDGPU::V_MAC_F16_e64_vi
||
7572 Opc
== AMDGPU::V_FMAC_F64_e64_gfx90a
||
7573 Opc
== AMDGPU::V_FMAC_F32_e64_gfx10
||
7574 Opc
== AMDGPU::V_FMAC_F32_e64_vi
||
7575 Opc
== AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10
||
7576 Opc
== AMDGPU::V_FMAC_F16_e64_gfx10
) {
7577 auto it
= Inst
.begin();
7578 std::advance(it
, AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src2_modifiers
));
7579 it
= Inst
.insert(it
, MCOperand::createImm(0)); // no modifiers for src2
7581 // Copy the operand to ensure it's not invalidated when Inst grows.
7582 Inst
.insert(it
, MCOperand(Inst
.getOperand(0))); // src2 = dst
7586 void AMDGPUAsmParser::cvtVOP3(MCInst
&Inst
, const OperandVector
&Operands
) {
7587 OptionalImmIndexMap OptionalIdx
;
7588 cvtVOP3(Inst
, Operands
, OptionalIdx
);
7591 void AMDGPUAsmParser::cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
,
7592 OptionalImmIndexMap
&OptIdx
) {
7593 const int Opc
= Inst
.getOpcode();
7594 const MCInstrDesc
&Desc
= MII
.get(Opc
);
7596 const bool IsPacked
= (Desc
.TSFlags
& SIInstrFlags::IsPacked
) != 0;
7598 if (AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::vdst_in
) != -1) {
7600 Inst
.addOperand(Inst
.getOperand(0));
7603 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7604 // instruction, and then figure out where to actually put the modifiers
7606 int OpSelIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel
);
7607 if (OpSelIdx
!= -1) {
7608 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSel
);
7611 int OpSelHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::op_sel_hi
);
7612 if (OpSelHiIdx
!= -1) {
7613 int DefaultVal
= IsPacked
? -1 : 0;
7614 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyOpSelHi
,
7618 int NegLoIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_lo
);
7619 if (NegLoIdx
!= -1) {
7620 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegLo
);
7621 addOptionalImmOperand(Inst
, Operands
, OptIdx
, AMDGPUOperand::ImmTyNegHi
);
7624 const int Ops
[] = { AMDGPU::OpName::src0
,
7625 AMDGPU::OpName::src1
,
7626 AMDGPU::OpName::src2
};
7627 const int ModOps
[] = { AMDGPU::OpName::src0_modifiers
,
7628 AMDGPU::OpName::src1_modifiers
,
7629 AMDGPU::OpName::src2_modifiers
};
7632 unsigned OpSelHi
= 0;
7637 OpSel
= Inst
.getOperand(OpSelIdx
).getImm();
7639 if (OpSelHiIdx
!= -1)
7640 OpSelHi
= Inst
.getOperand(OpSelHiIdx
).getImm();
7642 if (NegLoIdx
!= -1) {
7643 int NegHiIdx
= AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::neg_hi
);
7644 NegLo
= Inst
.getOperand(NegLoIdx
).getImm();
7645 NegHi
= Inst
.getOperand(NegHiIdx
).getImm();
7648 for (int J
= 0; J
< 3; ++J
) {
7649 int OpIdx
= AMDGPU::getNamedOperandIdx(Opc
, Ops
[J
]);
7653 uint32_t ModVal
= 0;
7655 if ((OpSel
& (1 << J
)) != 0)
7656 ModVal
|= SISrcMods::OP_SEL_0
;
7658 if ((OpSelHi
& (1 << J
)) != 0)
7659 ModVal
|= SISrcMods::OP_SEL_1
;
7661 if ((NegLo
& (1 << J
)) != 0)
7662 ModVal
|= SISrcMods::NEG
;
7664 if ((NegHi
& (1 << J
)) != 0)
7665 ModVal
|= SISrcMods::NEG_HI
;
7667 int ModIdx
= AMDGPU::getNamedOperandIdx(Opc
, ModOps
[J
]);
7669 Inst
.getOperand(ModIdx
).setImm(Inst
.getOperand(ModIdx
).getImm() | ModVal
);
7673 void AMDGPUAsmParser::cvtVOP3P(MCInst
&Inst
, const OperandVector
&Operands
) {
7674 OptionalImmIndexMap OptIdx
;
7675 cvtVOP3(Inst
, Operands
, OptIdx
);
7676 cvtVOP3P(Inst
, Operands
, OptIdx
);
7679 //===----------------------------------------------------------------------===//
7681 //===----------------------------------------------------------------------===//
7683 bool AMDGPUOperand::isDPP8() const {
7684 return isImmTy(ImmTyDPP8
);
7687 bool AMDGPUOperand::isDPPCtrl() const {
7688 using namespace AMDGPU::DPP
;
7690 bool result
= isImm() && getImmTy() == ImmTyDppCtrl
&& isUInt
<9>(getImm());
7692 int64_t Imm
= getImm();
7693 return (Imm
>= DppCtrl::QUAD_PERM_FIRST
&& Imm
<= DppCtrl::QUAD_PERM_LAST
) ||
7694 (Imm
>= DppCtrl::ROW_SHL_FIRST
&& Imm
<= DppCtrl::ROW_SHL_LAST
) ||
7695 (Imm
>= DppCtrl::ROW_SHR_FIRST
&& Imm
<= DppCtrl::ROW_SHR_LAST
) ||
7696 (Imm
>= DppCtrl::ROW_ROR_FIRST
&& Imm
<= DppCtrl::ROW_ROR_LAST
) ||
7697 (Imm
== DppCtrl::WAVE_SHL1
) ||
7698 (Imm
== DppCtrl::WAVE_ROL1
) ||
7699 (Imm
== DppCtrl::WAVE_SHR1
) ||
7700 (Imm
== DppCtrl::WAVE_ROR1
) ||
7701 (Imm
== DppCtrl::ROW_MIRROR
) ||
7702 (Imm
== DppCtrl::ROW_HALF_MIRROR
) ||
7703 (Imm
== DppCtrl::BCAST15
) ||
7704 (Imm
== DppCtrl::BCAST31
) ||
7705 (Imm
>= DppCtrl::ROW_SHARE_FIRST
&& Imm
<= DppCtrl::ROW_SHARE_LAST
) ||
7706 (Imm
>= DppCtrl::ROW_XMASK_FIRST
&& Imm
<= DppCtrl::ROW_XMASK_LAST
);
7711 //===----------------------------------------------------------------------===//
7713 //===----------------------------------------------------------------------===//
7715 bool AMDGPUOperand::isBLGP() const {
7716 return isImm() && getImmTy() == ImmTyBLGP
&& isUInt
<3>(getImm());
7719 bool AMDGPUOperand::isCBSZ() const {
7720 return isImm() && getImmTy() == ImmTyCBSZ
&& isUInt
<3>(getImm());
7723 bool AMDGPUOperand::isABID() const {
7724 return isImm() && getImmTy() == ImmTyABID
&& isUInt
<4>(getImm());
7727 bool AMDGPUOperand::isS16Imm() const {
7728 return isImm() && (isInt
<16>(getImm()) || isUInt
<16>(getImm()));
7731 bool AMDGPUOperand::isU16Imm() const {
7732 return isImm() && isUInt
<16>(getImm());
7735 //===----------------------------------------------------------------------===//
7737 //===----------------------------------------------------------------------===//
7739 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding
) {
7740 // We want to allow "dim:1D" etc.,
7741 // but the initial 1 is tokenized as an integer.
7743 if (isToken(AsmToken::Integer
)) {
7744 SMLoc Loc
= getToken().getEndLoc();
7745 Token
= std::string(getTokenStr());
7747 if (getLoc() != Loc
)
7752 if (!parseId(Suffix
))
7756 StringRef DimId
= Token
;
7757 if (DimId
.startswith("SQ_RSRC_IMG_"))
7758 DimId
= DimId
.drop_front(12);
7760 const AMDGPU::MIMGDimInfo
*DimInfo
= AMDGPU::getMIMGDimInfoByAsmSuffix(DimId
);
7764 Encoding
= DimInfo
->Encoding
;
7768 OperandMatchResultTy
AMDGPUAsmParser::parseDim(OperandVector
&Operands
) {
7770 return MatchOperand_NoMatch
;
7774 if (!trySkipId("dim", AsmToken::Colon
))
7775 return MatchOperand_NoMatch
;
7778 SMLoc Loc
= getLoc();
7779 if (!parseDimId(Encoding
)) {
7780 Error(Loc
, "invalid dim value");
7781 return MatchOperand_ParseFail
;
7784 Operands
.push_back(AMDGPUOperand::CreateImm(this, Encoding
, S
,
7785 AMDGPUOperand::ImmTyDim
));
7786 return MatchOperand_Success
;
7789 //===----------------------------------------------------------------------===//
7791 //===----------------------------------------------------------------------===//
7793 OperandMatchResultTy
AMDGPUAsmParser::parseDPP8(OperandVector
&Operands
) {
7796 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon
))
7797 return MatchOperand_NoMatch
;
7799 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7803 if (!skipToken(AsmToken::LBrac
, "expected an opening square bracket"))
7804 return MatchOperand_ParseFail
;
7806 for (size_t i
= 0; i
< 8; ++i
) {
7807 if (i
> 0 && !skipToken(AsmToken::Comma
, "expected a comma"))
7808 return MatchOperand_ParseFail
;
7810 SMLoc Loc
= getLoc();
7811 if (getParser().parseAbsoluteExpression(Sels
[i
]))
7812 return MatchOperand_ParseFail
;
7813 if (0 > Sels
[i
] || 7 < Sels
[i
]) {
7814 Error(Loc
, "expected a 3-bit value");
7815 return MatchOperand_ParseFail
;
7819 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
7820 return MatchOperand_ParseFail
;
7823 for (size_t i
= 0; i
< 8; ++i
)
7824 DPP8
|= (Sels
[i
] << (i
* 3));
7826 Operands
.push_back(AMDGPUOperand::CreateImm(this, DPP8
, S
, AMDGPUOperand::ImmTyDPP8
));
7827 return MatchOperand_Success
;
7831 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl
,
7832 const OperandVector
&Operands
) {
7833 if (Ctrl
== "row_newbcast")
7836 if (Ctrl
== "row_share" ||
7837 Ctrl
== "row_xmask")
7838 return isGFX10Plus();
7840 if (Ctrl
== "wave_shl" ||
7841 Ctrl
== "wave_shr" ||
7842 Ctrl
== "wave_rol" ||
7843 Ctrl
== "wave_ror" ||
7844 Ctrl
== "row_bcast")
7845 return isVI() || isGFX9();
7847 return Ctrl
== "row_mirror" ||
7848 Ctrl
== "row_half_mirror" ||
7849 Ctrl
== "quad_perm" ||
7850 Ctrl
== "row_shl" ||
7851 Ctrl
== "row_shr" ||
7856 AMDGPUAsmParser::parseDPPCtrlPerm() {
7857 // quad_perm:[%d,%d,%d,%d]
7859 if (!skipToken(AsmToken::LBrac
, "expected an opening square bracket"))
7863 for (int i
= 0; i
< 4; ++i
) {
7864 if (i
> 0 && !skipToken(AsmToken::Comma
, "expected a comma"))
7868 SMLoc Loc
= getLoc();
7869 if (getParser().parseAbsoluteExpression(Temp
))
7871 if (Temp
< 0 || Temp
> 3) {
7872 Error(Loc
, "expected a 2-bit value");
7876 Val
+= (Temp
<< i
* 2);
7879 if (!skipToken(AsmToken::RBrac
, "expected a closing square bracket"))
7886 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl
) {
7887 using namespace AMDGPU::DPP
;
7892 SMLoc Loc
= getLoc();
7894 if (getParser().parseAbsoluteExpression(Val
))
7897 struct DppCtrlCheck
{
7903 DppCtrlCheck Check
= StringSwitch
<DppCtrlCheck
>(Ctrl
)
7904 .Case("wave_shl", {DppCtrl::WAVE_SHL1
, 1, 1})
7905 .Case("wave_rol", {DppCtrl::WAVE_ROL1
, 1, 1})
7906 .Case("wave_shr", {DppCtrl::WAVE_SHR1
, 1, 1})
7907 .Case("wave_ror", {DppCtrl::WAVE_ROR1
, 1, 1})
7908 .Case("row_shl", {DppCtrl::ROW_SHL0
, 1, 15})
7909 .Case("row_shr", {DppCtrl::ROW_SHR0
, 1, 15})
7910 .Case("row_ror", {DppCtrl::ROW_ROR0
, 1, 15})
7911 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST
, 0, 15})
7912 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST
, 0, 15})
7913 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST
, 0, 15})
7914 .Default({-1, 0, 0});
7917 if (Check
.Ctrl
== -1) {
7918 Valid
= (Ctrl
== "row_bcast" && (Val
== 15 || Val
== 31));
7919 Val
= (Val
== 15)? DppCtrl::BCAST15
: DppCtrl::BCAST31
;
7921 Valid
= Check
.Lo
<= Val
&& Val
<= Check
.Hi
;
7922 Val
= (Check
.Lo
== Check
.Hi
) ? Check
.Ctrl
: (Check
.Ctrl
| Val
);
7926 Error(Loc
, Twine("invalid ", Ctrl
) + Twine(" value"));
7933 OperandMatchResultTy
7934 AMDGPUAsmParser::parseDPPCtrl(OperandVector
&Operands
) {
7935 using namespace AMDGPU::DPP
;
7937 if (!isToken(AsmToken::Identifier
) ||
7938 !isSupportedDPPCtrl(getTokenStr(), Operands
))
7939 return MatchOperand_NoMatch
;
7947 if (Ctrl
== "row_mirror") {
7948 Val
= DppCtrl::ROW_MIRROR
;
7949 } else if (Ctrl
== "row_half_mirror") {
7950 Val
= DppCtrl::ROW_HALF_MIRROR
;
7952 if (skipToken(AsmToken::Colon
, "expected a colon")) {
7953 if (Ctrl
== "quad_perm") {
7954 Val
= parseDPPCtrlPerm();
7956 Val
= parseDPPCtrlSel(Ctrl
);
7962 return MatchOperand_ParseFail
;
7965 AMDGPUOperand::CreateImm(this, Val
, S
, AMDGPUOperand::ImmTyDppCtrl
));
7966 return MatchOperand_Success
;
7969 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultRowMask() const {
7970 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask
);
7973 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7974 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm
);
7977 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBankMask() const {
7978 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask
);
7981 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBoundCtrl() const {
7982 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl
);
7985 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultFI() const {
7986 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi
);
7989 void AMDGPUAsmParser::cvtDPP(MCInst
&Inst
, const OperandVector
&Operands
, bool IsDPP8
) {
7990 OptionalImmIndexMap OptionalIdx
;
7992 unsigned Opc
= Inst
.getOpcode();
7994 AMDGPU::getNamedOperandIdx(Opc
, AMDGPU::OpName::src0_modifiers
) != -1;
7996 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
7997 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
7998 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
8002 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
8003 auto TiedTo
= Desc
.getOperandConstraint(Inst
.getNumOperands(),
8006 assert((unsigned)TiedTo
< Inst
.getNumOperands());
8007 // handle tied old or src2 for MAC instructions
8008 Inst
.addOperand(Inst
.getOperand(TiedTo
));
8010 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
8011 // Add the register arguments
8012 if (Op
.isReg() && validateVccOperand(Op
.getReg())) {
8013 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8020 Op
.addImmOperands(Inst
, 1);
8021 } else if (HasModifiers
&&
8022 isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8023 Op
.addRegWithFPInputModsOperands(Inst
, 2);
8024 } else if (Op
.isFI()) {
8026 } else if (Op
.isReg()) {
8027 Op
.addRegOperands(Inst
, 1);
8029 llvm_unreachable("Invalid operand type");
8033 isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8034 Op
.addRegWithFPInputModsOperands(Inst
, 2);
8035 } else if (Op
.isReg()) {
8036 Op
.addRegOperands(Inst
, 1);
8037 } else if (Op
.isDPPCtrl()) {
8038 Op
.addImmOperands(Inst
, 1);
8039 } else if (Op
.isImm()) {
8040 // Handle optional arguments
8041 OptionalIdx
[Op
.getImmTy()] = I
;
8043 llvm_unreachable("Invalid operand type");
8049 using namespace llvm::AMDGPU::DPP
;
8050 Inst
.addOperand(MCOperand::createImm(Fi
? DPP8_FI_1
: DPP8_FI_0
));
8052 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppRowMask
, 0xf);
8053 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBankMask
, 0xf);
8054 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppBoundCtrl
);
8055 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::fi
) != -1) {
8056 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyDppFi
);
8061 //===----------------------------------------------------------------------===//
8063 //===----------------------------------------------------------------------===//
8065 OperandMatchResultTy
8066 AMDGPUAsmParser::parseSDWASel(OperandVector
&Operands
, StringRef Prefix
,
8067 AMDGPUOperand::ImmTy Type
) {
8068 using namespace llvm::AMDGPU::SDWA
;
8072 OperandMatchResultTy res
;
8075 res
= parseStringWithPrefix(Prefix
, Value
, StringLoc
);
8076 if (res
!= MatchOperand_Success
) {
8081 Int
= StringSwitch
<int64_t>(Value
)
8082 .Case("BYTE_0", SdwaSel::BYTE_0
)
8083 .Case("BYTE_1", SdwaSel::BYTE_1
)
8084 .Case("BYTE_2", SdwaSel::BYTE_2
)
8085 .Case("BYTE_3", SdwaSel::BYTE_3
)
8086 .Case("WORD_0", SdwaSel::WORD_0
)
8087 .Case("WORD_1", SdwaSel::WORD_1
)
8088 .Case("DWORD", SdwaSel::DWORD
)
8089 .Default(0xffffffff);
8091 if (Int
== 0xffffffff) {
8092 Error(StringLoc
, "invalid " + Twine(Prefix
) + " value");
8093 return MatchOperand_ParseFail
;
8096 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, Type
));
8097 return MatchOperand_Success
;
8100 OperandMatchResultTy
8101 AMDGPUAsmParser::parseSDWADstUnused(OperandVector
&Operands
) {
8102 using namespace llvm::AMDGPU::SDWA
;
8106 OperandMatchResultTy res
;
8109 res
= parseStringWithPrefix("dst_unused", Value
, StringLoc
);
8110 if (res
!= MatchOperand_Success
) {
8115 Int
= StringSwitch
<int64_t>(Value
)
8116 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD
)
8117 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT
)
8118 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE
)
8119 .Default(0xffffffff);
8121 if (Int
== 0xffffffff) {
8122 Error(StringLoc
, "invalid dst_unused value");
8123 return MatchOperand_ParseFail
;
8126 Operands
.push_back(AMDGPUOperand::CreateImm(this, Int
, S
, AMDGPUOperand::ImmTySdwaDstUnused
));
8127 return MatchOperand_Success
;
8130 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst
&Inst
, const OperandVector
&Operands
) {
8131 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP1
);
8134 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst
&Inst
, const OperandVector
&Operands
) {
8135 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
);
8138 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst
&Inst
, const OperandVector
&Operands
) {
8139 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, true, true);
8142 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst
&Inst
, const OperandVector
&Operands
) {
8143 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOP2
, false, true);
8146 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst
&Inst
, const OperandVector
&Operands
) {
8147 cvtSDWA(Inst
, Operands
, SIInstrFlags::VOPC
, isVI());
8150 void AMDGPUAsmParser::cvtSDWA(MCInst
&Inst
, const OperandVector
&Operands
,
8151 uint64_t BasicInstType
,
8154 using namespace llvm::AMDGPU::SDWA
;
8156 OptionalImmIndexMap OptionalIdx
;
8157 bool SkipVcc
= SkipDstVcc
|| SkipSrcVcc
;
8158 bool SkippedVcc
= false;
8161 const MCInstrDesc
&Desc
= MII
.get(Inst
.getOpcode());
8162 for (unsigned J
= 0; J
< Desc
.getNumDefs(); ++J
) {
8163 ((AMDGPUOperand
&)*Operands
[I
++]).addRegOperands(Inst
, 1);
8166 for (unsigned E
= Operands
.size(); I
!= E
; ++I
) {
8167 AMDGPUOperand
&Op
= ((AMDGPUOperand
&)*Operands
[I
]);
8168 if (SkipVcc
&& !SkippedVcc
&& Op
.isReg() &&
8169 (Op
.getReg() == AMDGPU::VCC
|| Op
.getReg() == AMDGPU::VCC_LO
)) {
8170 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8171 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8172 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8173 // Skip VCC only if we didn't skip it on previous iteration.
8174 // Note that src0 and src1 occupy 2 slots each because of modifiers.
8175 if (BasicInstType
== SIInstrFlags::VOP2
&&
8176 ((SkipDstVcc
&& Inst
.getNumOperands() == 1) ||
8177 (SkipSrcVcc
&& Inst
.getNumOperands() == 5))) {
8180 } else if (BasicInstType
== SIInstrFlags::VOPC
&&
8181 Inst
.getNumOperands() == 0) {
8186 if (isRegOrImmWithInputMods(Desc
, Inst
.getNumOperands())) {
8187 Op
.addRegOrImmWithInputModsOperands(Inst
, 2);
8188 } else if (Op
.isImm()) {
8189 // Handle optional arguments
8190 OptionalIdx
[Op
.getImmTy()] = I
;
8192 llvm_unreachable("Invalid operand type");
8197 if (Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10
&&
8198 Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9
&&
8199 Inst
.getOpcode() != AMDGPU::V_NOP_sdwa_vi
) {
8200 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8201 switch (BasicInstType
) {
8202 case SIInstrFlags::VOP1
:
8203 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
8204 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::omod
) != -1) {
8205 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
8207 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstSel
, SdwaSel::DWORD
);
8208 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstUnused
, DstUnused::UNUSED_PRESERVE
);
8209 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
8212 case SIInstrFlags::VOP2
:
8213 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
8214 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::omod
) != -1) {
8215 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyOModSI
, 0);
8217 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstSel
, SdwaSel::DWORD
);
8218 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaDstUnused
, DstUnused::UNUSED_PRESERVE
);
8219 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
8220 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc1Sel
, SdwaSel::DWORD
);
8223 case SIInstrFlags::VOPC
:
8224 if (AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::clamp
) != -1)
8225 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTyClampSI
, 0);
8226 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc0Sel
, SdwaSel::DWORD
);
8227 addOptionalImmOperand(Inst
, Operands
, OptionalIdx
, AMDGPUOperand::ImmTySdwaSrc1Sel
, SdwaSel::DWORD
);
8231 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8235 // special case v_mac_{f16, f32}:
8236 // it has src2 register operand that is tied to dst operand
8237 if (Inst
.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi
||
8238 Inst
.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi
) {
8239 auto it
= Inst
.begin();
8241 it
, AMDGPU::getNamedOperandIdx(Inst
.getOpcode(), AMDGPU::OpName::src2
));
8242 Inst
.insert(it
, Inst
.getOperand(0)); // src2 = dst
8246 //===----------------------------------------------------------------------===//
8248 //===----------------------------------------------------------------------===//
8250 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultBLGP() const {
8251 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP
);
8254 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultCBSZ() const {
8255 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ
);
8258 AMDGPUOperand::Ptr
AMDGPUAsmParser::defaultABID() const {
8259 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID
);
8262 /// Force static initialization.
8263 extern "C" LLVM_EXTERNAL_VISIBILITY
void LLVMInitializeAMDGPUAsmParser() {
8264 RegisterMCAsmParser
<AMDGPUAsmParser
> A(getTheAMDGPUTarget());
8265 RegisterMCAsmParser
<AMDGPUAsmParser
> B(getTheGCNTarget());
8268 #define GET_REGISTER_MATCHER
8269 #define GET_MATCHER_IMPLEMENTATION
8270 #define GET_MNEMONIC_SPELL_CHECKER
8271 #define GET_MNEMONIC_CHECKER
8272 #include "AMDGPUGenAsmMatcher.inc"
8274 // This fuction should be defined after auto-generated include so that we have
8275 // MatchClassKind enum defined
8276 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand
&Op
,
8278 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8279 // But MatchInstructionImpl() expects to meet token and fails to validate
8280 // operand. This method checks if we are given immediate operand but expect to
8281 // get corresponding token.
8282 AMDGPUOperand
&Operand
= (AMDGPUOperand
&)Op
;
8285 return Operand
.isAddr64() ? Match_Success
: Match_InvalidOperand
;
8287 return Operand
.isGDS() ? Match_Success
: Match_InvalidOperand
;
8289 return Operand
.isLDS() ? Match_Success
: Match_InvalidOperand
;
8291 return Operand
.isIdxen() ? Match_Success
: Match_InvalidOperand
;
8293 return Operand
.isOffen() ? Match_Success
: Match_InvalidOperand
;
8295 // When operands have expression values, they will return true for isToken,
8296 // because it is not possible to distinguish between a token and an
8297 // expression at parse time. MatchInstructionImpl() will always try to
8298 // match an operand as a token, when isToken returns true, and when the
8299 // name of the expression is not a valid token, the match will fail,
8300 // so we need to handle it here.
8301 return Operand
.isSSrcB32() ? Match_Success
: Match_InvalidOperand
;
8303 return Operand
.isSSrcF32() ? Match_Success
: Match_InvalidOperand
;
8304 case MCK_SoppBrTarget
:
8305 return Operand
.isSoppBrTarget() ? Match_Success
: Match_InvalidOperand
;
8306 case MCK_VReg32OrOff
:
8307 return Operand
.isVReg32OrOff() ? Match_Success
: Match_InvalidOperand
;
8308 case MCK_InterpSlot
:
8309 return Operand
.isInterpSlot() ? Match_Success
: Match_InvalidOperand
;
8311 return Operand
.isInterpAttr() ? Match_Success
: Match_InvalidOperand
;
8313 return Operand
.isAttrChan() ? Match_Success
: Match_InvalidOperand
;
8314 case MCK_ImmSMEMOffset
:
8315 return Operand
.isSMEMOffset() ? Match_Success
: Match_InvalidOperand
;
8317 case MCK_SReg_64_XEXEC
:
8318 // Null is defined as a 32-bit register but
8319 // it should also be enabled with 64-bit operands.
8320 // The following code enables it for SReg_64 operands
8321 // used as source and destination. Remaining source
8322 // operands are handled in isInlinableImm.
8323 return Operand
.isNull() ? Match_Success
: Match_InvalidOperand
;
8325 return Match_InvalidOperand
;
8329 //===----------------------------------------------------------------------===//
8331 //===----------------------------------------------------------------------===//
8333 OperandMatchResultTy
AMDGPUAsmParser::parseEndpgmOp(OperandVector
&Operands
) {
8337 if (!parseExpr(Imm
)) {
8338 // The operand is optional, if not present default to 0
8342 if (!isUInt
<16>(Imm
)) {
8343 Error(S
, "expected a 16-bit value");
8344 return MatchOperand_ParseFail
;
8348 AMDGPUOperand::CreateImm(this, Imm
, S
, AMDGPUOperand::ImmTyEndpgm
));
8349 return MatchOperand_Success
;
8352 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm
); }