1 //===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// The SI code emitter produces machine code that can be executed
11 /// directly on the GPU device.
13 //===----------------------------------------------------------------------===//
15 #include "MCTargetDesc/AMDGPUFixupKinds.h"
16 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "SIDefines.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
29 class SIMCCodeEmitter
: public AMDGPUMCCodeEmitter
{
30 const MCRegisterInfo
&MRI
;
32 /// Encode an fp or int literal
33 uint32_t getLitEncoding(const MCOperand
&MO
, const MCOperandInfo
&OpInfo
,
34 const MCSubtargetInfo
&STI
) const;
37 SIMCCodeEmitter(const MCInstrInfo
&mcii
, const MCRegisterInfo
&mri
,
39 : AMDGPUMCCodeEmitter(mcii
), MRI(mri
) {}
40 SIMCCodeEmitter(const SIMCCodeEmitter
&) = delete;
41 SIMCCodeEmitter
&operator=(const SIMCCodeEmitter
&) = delete;
43 /// Encode the instruction and write it to the OS.
44 void encodeInstruction(const MCInst
&MI
, raw_ostream
&OS
,
45 SmallVectorImpl
<MCFixup
> &Fixups
,
46 const MCSubtargetInfo
&STI
) const override
;
48 /// \returns the encoding for an MCOperand.
49 uint64_t getMachineOpValue(const MCInst
&MI
, const MCOperand
&MO
,
50 SmallVectorImpl
<MCFixup
> &Fixups
,
51 const MCSubtargetInfo
&STI
) const override
;
53 /// Use a fixup to encode the simm16 field for SOPP branch
55 unsigned getSOPPBrEncoding(const MCInst
&MI
, unsigned OpNo
,
56 SmallVectorImpl
<MCFixup
> &Fixups
,
57 const MCSubtargetInfo
&STI
) const override
;
59 unsigned getSMEMOffsetEncoding(const MCInst
&MI
, unsigned OpNo
,
60 SmallVectorImpl
<MCFixup
> &Fixups
,
61 const MCSubtargetInfo
&STI
) const override
;
63 unsigned getSDWASrcEncoding(const MCInst
&MI
, unsigned OpNo
,
64 SmallVectorImpl
<MCFixup
> &Fixups
,
65 const MCSubtargetInfo
&STI
) const override
;
67 unsigned getSDWAVopcDstEncoding(const MCInst
&MI
, unsigned OpNo
,
68 SmallVectorImpl
<MCFixup
> &Fixups
,
69 const MCSubtargetInfo
&STI
) const override
;
71 unsigned getAVOperandEncoding(const MCInst
&MI
, unsigned OpNo
,
72 SmallVectorImpl
<MCFixup
> &Fixups
,
73 const MCSubtargetInfo
&STI
) const override
;
76 uint64_t getImplicitOpSelHiEncoding(int Opcode
) const;
79 } // end anonymous namespace
81 MCCodeEmitter
*llvm::createSIMCCodeEmitter(const MCInstrInfo
&MCII
,
82 const MCRegisterInfo
&MRI
,
84 return new SIMCCodeEmitter(MCII
, MRI
, Ctx
);
87 // Returns the encoding value to use if the given integer is an integer inline
88 // immediate value, or 0 if it is not.
89 template <typename IntTy
>
90 static uint32_t getIntInlineImmEncoding(IntTy Imm
) {
91 if (Imm
>= 0 && Imm
<= 64)
94 if (Imm
>= -16 && Imm
<= -1)
95 return 192 + std::abs(Imm
);
100 static uint32_t getLit16IntEncoding(uint16_t Val
, const MCSubtargetInfo
&STI
) {
101 uint16_t IntImm
= getIntInlineImmEncoding(static_cast<int16_t>(Val
));
102 return IntImm
== 0 ? 255 : IntImm
;
105 static uint32_t getLit16Encoding(uint16_t Val
, const MCSubtargetInfo
&STI
) {
106 uint16_t IntImm
= getIntInlineImmEncoding(static_cast<int16_t>(Val
));
110 if (Val
== 0x3800) // 0.5
113 if (Val
== 0xB800) // -0.5
116 if (Val
== 0x3C00) // 1.0
119 if (Val
== 0xBC00) // -1.0
122 if (Val
== 0x4000) // 2.0
125 if (Val
== 0xC000) // -2.0
128 if (Val
== 0x4400) // 4.0
131 if (Val
== 0xC400) // -4.0
134 if (Val
== 0x3118 && // 1.0 / (2.0 * pi)
135 STI
.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
])
141 static uint32_t getLit32Encoding(uint32_t Val
, const MCSubtargetInfo
&STI
) {
142 uint32_t IntImm
= getIntInlineImmEncoding(static_cast<int32_t>(Val
));
146 if (Val
== FloatToBits(0.5f
))
149 if (Val
== FloatToBits(-0.5f
))
152 if (Val
== FloatToBits(1.0f
))
155 if (Val
== FloatToBits(-1.0f
))
158 if (Val
== FloatToBits(2.0f
))
161 if (Val
== FloatToBits(-2.0f
))
164 if (Val
== FloatToBits(4.0f
))
167 if (Val
== FloatToBits(-4.0f
))
170 if (Val
== 0x3e22f983 && // 1.0 / (2.0 * pi)
171 STI
.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
])
177 static uint32_t getLit64Encoding(uint64_t Val
, const MCSubtargetInfo
&STI
) {
178 uint32_t IntImm
= getIntInlineImmEncoding(static_cast<int64_t>(Val
));
182 if (Val
== DoubleToBits(0.5))
185 if (Val
== DoubleToBits(-0.5))
188 if (Val
== DoubleToBits(1.0))
191 if (Val
== DoubleToBits(-1.0))
194 if (Val
== DoubleToBits(2.0))
197 if (Val
== DoubleToBits(-2.0))
200 if (Val
== DoubleToBits(4.0))
203 if (Val
== DoubleToBits(-4.0))
206 if (Val
== 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
207 STI
.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm
])
213 uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand
&MO
,
214 const MCOperandInfo
&OpInfo
,
215 const MCSubtargetInfo
&STI
) const {
218 const auto *C
= dyn_cast
<MCConstantExpr
>(MO
.getExpr());
225 assert(!MO
.isDFPImm());
233 switch (OpInfo
.OperandType
) {
234 case AMDGPU::OPERAND_REG_IMM_INT32
:
235 case AMDGPU::OPERAND_REG_IMM_FP32
:
236 case AMDGPU::OPERAND_REG_INLINE_C_INT32
:
237 case AMDGPU::OPERAND_REG_INLINE_C_FP32
:
238 case AMDGPU::OPERAND_REG_INLINE_AC_INT32
:
239 case AMDGPU::OPERAND_REG_INLINE_AC_FP32
:
240 case AMDGPU::OPERAND_REG_IMM_V2INT32
:
241 case AMDGPU::OPERAND_REG_IMM_V2FP32
:
242 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32
:
243 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32
:
244 return getLit32Encoding(static_cast<uint32_t>(Imm
), STI
);
246 case AMDGPU::OPERAND_REG_IMM_INT64
:
247 case AMDGPU::OPERAND_REG_IMM_FP64
:
248 case AMDGPU::OPERAND_REG_INLINE_C_INT64
:
249 case AMDGPU::OPERAND_REG_INLINE_C_FP64
:
250 case AMDGPU::OPERAND_REG_INLINE_AC_FP64
:
251 return getLit64Encoding(static_cast<uint64_t>(Imm
), STI
);
253 case AMDGPU::OPERAND_REG_IMM_INT16
:
254 case AMDGPU::OPERAND_REG_INLINE_C_INT16
:
255 case AMDGPU::OPERAND_REG_INLINE_AC_INT16
:
256 return getLit16IntEncoding(static_cast<uint16_t>(Imm
), STI
);
257 case AMDGPU::OPERAND_REG_IMM_FP16
:
258 case AMDGPU::OPERAND_REG_INLINE_C_FP16
:
259 case AMDGPU::OPERAND_REG_INLINE_AC_FP16
:
260 // FIXME Is this correct? What do inline immediates do on SI for f16 src
261 // which does not have f16 support?
262 return getLit16Encoding(static_cast<uint16_t>(Imm
), STI
);
263 case AMDGPU::OPERAND_REG_IMM_V2INT16
:
264 case AMDGPU::OPERAND_REG_IMM_V2FP16
: {
265 if (!isUInt
<16>(Imm
) && STI
.getFeatureBits()[AMDGPU::FeatureVOP3Literal
])
266 return getLit32Encoding(static_cast<uint32_t>(Imm
), STI
);
267 if (OpInfo
.OperandType
== AMDGPU::OPERAND_REG_IMM_V2FP16
)
268 return getLit16Encoding(static_cast<uint16_t>(Imm
), STI
);
271 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16
:
272 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16
:
273 return getLit16IntEncoding(static_cast<uint16_t>(Imm
), STI
);
274 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16
:
275 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16
: {
276 uint16_t Lo16
= static_cast<uint16_t>(Imm
);
277 uint32_t Encoding
= getLit16Encoding(Lo16
, STI
);
281 llvm_unreachable("invalid operand size");
285 uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode
) const {
286 using namespace AMDGPU::VOP3PEncoding
;
287 using namespace AMDGPU::OpName
;
289 if (AMDGPU::getNamedOperandIdx(Opcode
, op_sel_hi
) != -1) {
290 if (AMDGPU::getNamedOperandIdx(Opcode
, src2
) != -1)
292 if (AMDGPU::getNamedOperandIdx(Opcode
, src1
) != -1)
294 if (AMDGPU::getNamedOperandIdx(Opcode
, src0
) != -1)
295 return OP_SEL_HI_1
| OP_SEL_HI_2
;
297 return OP_SEL_HI_0
| OP_SEL_HI_1
| OP_SEL_HI_2
;
300 void SIMCCodeEmitter::encodeInstruction(const MCInst
&MI
, raw_ostream
&OS
,
301 SmallVectorImpl
<MCFixup
> &Fixups
,
302 const MCSubtargetInfo
&STI
) const {
303 verifyInstructionPredicates(MI
,
304 computeAvailableFeatures(STI
.getFeatureBits()));
306 int Opcode
= MI
.getOpcode();
307 uint64_t Encoding
= getBinaryCodeForInstr(MI
, Fixups
, STI
);
308 const MCInstrDesc
&Desc
= MCII
.get(Opcode
);
309 unsigned bytes
= Desc
.getSize();
311 // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions.
312 // Note that accvgpr_read/write are MAI, have src0, but do not use op_sel.
313 if ((Desc
.TSFlags
& SIInstrFlags::VOP3P
) ||
314 Opcode
== AMDGPU::V_ACCVGPR_READ_B32_vi
||
315 Opcode
== AMDGPU::V_ACCVGPR_WRITE_B32_vi
) {
316 Encoding
|= getImplicitOpSelHiEncoding(Opcode
);
319 for (unsigned i
= 0; i
< bytes
; i
++) {
320 OS
.write((uint8_t) ((Encoding
>> (8 * i
)) & 0xff));
324 if (AMDGPU::isGFX10Plus(STI
) && Desc
.TSFlags
& SIInstrFlags::MIMG
) {
325 int vaddr0
= AMDGPU::getNamedOperandIdx(MI
.getOpcode(),
326 AMDGPU::OpName::vaddr0
);
327 int srsrc
= AMDGPU::getNamedOperandIdx(MI
.getOpcode(),
328 AMDGPU::OpName::srsrc
);
329 assert(vaddr0
>= 0 && srsrc
> vaddr0
);
330 unsigned NumExtraAddrs
= srsrc
- vaddr0
- 1;
331 unsigned NumPadding
= (-NumExtraAddrs
) & 3;
333 for (unsigned i
= 0; i
< NumExtraAddrs
; ++i
)
334 OS
.write((uint8_t)getMachineOpValue(MI
, MI
.getOperand(vaddr0
+ 1 + i
),
336 for (unsigned i
= 0; i
< NumPadding
; ++i
)
340 if ((bytes
> 8 && STI
.getFeatureBits()[AMDGPU::FeatureVOP3Literal
]) ||
341 (bytes
> 4 && !STI
.getFeatureBits()[AMDGPU::FeatureVOP3Literal
]))
344 // Check for additional literals in SRC0/1/2 (Op 1/2/3)
345 for (unsigned i
= 0, e
= Desc
.getNumOperands(); i
< e
; ++i
) {
347 // Check if this operand should be encoded as [SV]Src
348 if (!AMDGPU::isSISrcOperand(Desc
, i
))
351 // Is this operand a literal immediate?
352 const MCOperand
&Op
= MI
.getOperand(i
);
353 if (getLitEncoding(Op
, Desc
.OpInfo
[i
], STI
) != 255)
361 else if (Op
.isExpr()) {
362 if (const auto *C
= dyn_cast
<MCConstantExpr
>(Op
.getExpr()))
365 } else if (!Op
.isExpr()) // Exprs will be replaced with a fixup value.
366 llvm_unreachable("Must be immediate or expr");
368 for (unsigned j
= 0; j
< 4; j
++) {
369 OS
.write((uint8_t) ((Imm
>> (8 * j
)) & 0xff));
372 // Only one literal value allowed
377 unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst
&MI
, unsigned OpNo
,
378 SmallVectorImpl
<MCFixup
> &Fixups
,
379 const MCSubtargetInfo
&STI
) const {
380 const MCOperand
&MO
= MI
.getOperand(OpNo
);
383 const MCExpr
*Expr
= MO
.getExpr();
384 MCFixupKind Kind
= (MCFixupKind
)AMDGPU::fixup_si_sopp_br
;
385 Fixups
.push_back(MCFixup::create(0, Expr
, Kind
, MI
.getLoc()));
389 return getMachineOpValue(MI
, MO
, Fixups
, STI
);
392 unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst
&MI
, unsigned OpNo
,
393 SmallVectorImpl
<MCFixup
> &Fixups
,
394 const MCSubtargetInfo
&STI
) const {
395 auto Offset
= MI
.getOperand(OpNo
).getImm();
396 // VI only supports 20-bit unsigned offsets.
397 assert(!AMDGPU::isVI(STI
) || isUInt
<20>(Offset
));
402 SIMCCodeEmitter::getSDWASrcEncoding(const MCInst
&MI
, unsigned OpNo
,
403 SmallVectorImpl
<MCFixup
> &Fixups
,
404 const MCSubtargetInfo
&STI
) const {
405 using namespace AMDGPU::SDWA
;
409 const MCOperand
&MO
= MI
.getOperand(OpNo
);
412 unsigned Reg
= MO
.getReg();
413 RegEnc
|= MRI
.getEncodingValue(Reg
);
414 RegEnc
&= SDWA9EncValues::SRC_VGPR_MASK
;
415 if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg
), &MRI
)) {
416 RegEnc
|= SDWA9EncValues::SRC_SGPR_MASK
;
420 const MCInstrDesc
&Desc
= MCII
.get(MI
.getOpcode());
421 uint32_t Enc
= getLitEncoding(MO
, Desc
.OpInfo
[OpNo
], STI
);
422 if (Enc
!= ~0U && Enc
!= 255) {
423 return Enc
| SDWA9EncValues::SRC_SGPR_MASK
;
427 llvm_unreachable("Unsupported operand kind");
432 SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst
&MI
, unsigned OpNo
,
433 SmallVectorImpl
<MCFixup
> &Fixups
,
434 const MCSubtargetInfo
&STI
) const {
435 using namespace AMDGPU::SDWA
;
439 const MCOperand
&MO
= MI
.getOperand(OpNo
);
441 unsigned Reg
= MO
.getReg();
442 if (Reg
!= AMDGPU::VCC
&& Reg
!= AMDGPU::VCC_LO
) {
443 RegEnc
|= MRI
.getEncodingValue(Reg
);
444 RegEnc
&= SDWA9EncValues::VOPC_DST_SGPR_MASK
;
445 RegEnc
|= SDWA9EncValues::VOPC_DST_VCC_MASK
;
451 SIMCCodeEmitter::getAVOperandEncoding(const MCInst
&MI
, unsigned OpNo
,
452 SmallVectorImpl
<MCFixup
> &Fixups
,
453 const MCSubtargetInfo
&STI
) const {
454 unsigned Reg
= MI
.getOperand(OpNo
).getReg();
455 uint64_t Enc
= MRI
.getEncodingValue(Reg
);
457 // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
458 // instructions use acc[0:1] modifier bits to distinguish. These bits are
459 // encoded as a virtual 9th bit of the register for these operands.
460 if (MRI
.getRegClass(AMDGPU::AGPR_32RegClassID
).contains(Reg
) ||
461 MRI
.getRegClass(AMDGPU::AReg_64RegClassID
).contains(Reg
) ||
462 MRI
.getRegClass(AMDGPU::AReg_96RegClassID
).contains(Reg
) ||
463 MRI
.getRegClass(AMDGPU::AReg_128RegClassID
).contains(Reg
) ||
464 MRI
.getRegClass(AMDGPU::AReg_160RegClassID
).contains(Reg
) ||
465 MRI
.getRegClass(AMDGPU::AReg_192RegClassID
).contains(Reg
) ||
466 MRI
.getRegClass(AMDGPU::AReg_224RegClassID
).contains(Reg
) ||
467 MRI
.getRegClass(AMDGPU::AReg_256RegClassID
).contains(Reg
) ||
468 MRI
.getRegClass(AMDGPU::AGPR_LO16RegClassID
).contains(Reg
))
474 static bool needsPCRel(const MCExpr
*Expr
) {
475 switch (Expr
->getKind()) {
476 case MCExpr::SymbolRef
: {
477 auto *SE
= cast
<MCSymbolRefExpr
>(Expr
);
478 MCSymbolRefExpr::VariantKind Kind
= SE
->getKind();
479 return Kind
!= MCSymbolRefExpr::VK_AMDGPU_ABS32_LO
&&
480 Kind
!= MCSymbolRefExpr::VK_AMDGPU_ABS32_HI
;
482 case MCExpr::Binary
: {
483 auto *BE
= cast
<MCBinaryExpr
>(Expr
);
484 if (BE
->getOpcode() == MCBinaryExpr::Sub
)
486 return needsPCRel(BE
->getLHS()) || needsPCRel(BE
->getRHS());
489 return needsPCRel(cast
<MCUnaryExpr
>(Expr
)->getSubExpr());
491 case MCExpr::Constant
:
494 llvm_unreachable("invalid kind");
497 uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst
&MI
,
499 SmallVectorImpl
<MCFixup
> &Fixups
,
500 const MCSubtargetInfo
&STI
) const {
502 return MRI
.getEncodingValue(MO
.getReg());
504 if (MO
.isExpr() && MO
.getExpr()->getKind() != MCExpr::Constant
) {
505 // FIXME: If this is expression is PCRel or not should not depend on what
506 // the expression looks like. Given that this is just a general expression,
507 // it should probably be FK_Data_4 and whatever is producing
509 // s_add_u32 s2, s2, (extern_const_addrspace+16
511 // And expecting a PCRel should instead produce
514 // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
516 if (needsPCRel(MO
.getExpr()))
521 const MCInstrDesc
&Desc
= MCII
.get(MI
.getOpcode());
522 uint32_t Offset
= Desc
.getSize();
523 assert(Offset
== 4 || Offset
== 8);
526 MCFixup::create(Offset
, MO
.getExpr(), Kind
, MI
.getLoc()));
529 // Figure out the operand number, needed for isSrcOperand check
531 for (unsigned e
= MI
.getNumOperands(); OpNo
< e
; ++OpNo
) {
532 if (&MO
== &MI
.getOperand(OpNo
))
536 const MCInstrDesc
&Desc
= MCII
.get(MI
.getOpcode());
537 if (AMDGPU::isSISrcOperand(Desc
, OpNo
)) {
538 uint32_t Enc
= getLitEncoding(MO
, Desc
.OpInfo
[OpNo
], STI
);
540 (Enc
!= 255 || Desc
.getSize() == 4 || Desc
.getSize() == 8))
543 } else if (MO
.isImm())
546 llvm_unreachable("Encoding of this operand type is not supported yet.");
550 #define ENABLE_INSTR_PREDICATE_VERIFIER
551 #include "AMDGPUGenMCCodeEmitter.inc"