1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Any MIMG instructions that use tfe or lwe require an initialization of the
11 /// result register that will be written in the case of a memory access failure
12 /// The required code is also added to tie this init code to the result of the
15 //===----------------------------------------------------------------------===//
19 #include "AMDGPUSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIInstrInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Target/TargetMachine.h"
29 #define DEBUG_TYPE "si-img-init"
35 class SIAddIMGInit
: public MachineFunctionPass
{
40 SIAddIMGInit() : MachineFunctionPass(ID
) {
41 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
44 bool runOnMachineFunction(MachineFunction
&MF
) override
;
46 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
48 MachineFunctionPass::getAnalysisUsage(AU
);
52 } // End anonymous namespace.
54 INITIALIZE_PASS(SIAddIMGInit
, DEBUG_TYPE
, "SI Add IMG Init", false, false)
56 char SIAddIMGInit::ID
= 0;
58 char &llvm::SIAddIMGInitID
= SIAddIMGInit::ID
;
60 FunctionPass
*llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
62 bool SIAddIMGInit::runOnMachineFunction(MachineFunction
&MF
) {
63 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
64 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
65 const SIInstrInfo
*TII
= ST
.getInstrInfo();
66 const SIRegisterInfo
*RI
= ST
.getRegisterInfo();
69 for (MachineFunction::iterator BI
= MF
.begin(), BE
= MF
.end(); BI
!= BE
;
71 MachineBasicBlock
&MBB
= *BI
;
72 MachineBasicBlock::iterator I
, Next
;
73 for (I
= MBB
.begin(); I
!= MBB
.end(); I
= Next
) {
75 MachineInstr
&MI
= *I
;
77 auto Opcode
= MI
.getOpcode();
78 if (TII
->isMIMG(Opcode
) && !MI
.mayStore()) {
79 MachineOperand
*TFE
= TII
->getNamedOperand(MI
, AMDGPU::OpName::tfe
);
80 MachineOperand
*LWE
= TII
->getNamedOperand(MI
, AMDGPU::OpName::lwe
);
81 MachineOperand
*D16
= TII
->getNamedOperand(MI
, AMDGPU::OpName::d16
);
83 // Check for instructions that don't have tfe or lwe fields
84 // There shouldn't be any at this point.
85 assert( (TFE
&& LWE
) && "Expected tfe and lwe operands in instruction");
87 unsigned TFEVal
= TFE
->getImm();
88 unsigned LWEVal
= LWE
->getImm();
89 unsigned D16Val
= D16
? D16
->getImm() : 0;
91 if (TFEVal
|| LWEVal
) {
92 // At least one of TFE or LWE are non-zero
93 // We have to insert a suitable initialization of the result value and
94 // tie this to the dest of the image instruction.
96 const DebugLoc
&DL
= MI
.getDebugLoc();
99 AMDGPU::getNamedOperandIdx(MI
.getOpcode(), AMDGPU::OpName::vdata
);
101 // Calculate which dword we have to initialize to 0.
102 MachineOperand
*MO_Dmask
=
103 TII
->getNamedOperand(MI
, AMDGPU::OpName::dmask
);
105 // check that dmask operand is found.
106 assert(MO_Dmask
&& "Expected dmask operand in instruction");
108 unsigned dmask
= MO_Dmask
->getImm();
109 // Determine the number of active lanes taking into account the
110 // Gather4 special case
111 unsigned ActiveLanes
=
112 TII
->isGather4(Opcode
) ? 4 : countPopulation(dmask
);
114 // Subreg indices are counted from 1
115 // When D16 then we want next whole VGPR after write data.
116 static_assert(AMDGPU::sub0
== 1 && AMDGPU::sub4
== 5, "Subreg indices different from expected");
118 bool Packed
= !ST
.hasUnpackedD16VMem();
121 D16Val
&& Packed
? ((ActiveLanes
+ 1) >> 1) + 1 : ActiveLanes
+ 1;
123 // Abandon attempt if the dst size isn't large enough
124 // - this is in fact an error but this is picked up elsewhere and
125 // reported correctly.
127 RI
->getRegSizeInBits(*TII
->getOpRegClass(MI
, DstIdx
)) / 32;
128 if (DstSize
< InitIdx
)
131 // Create a register for the intialization value.
133 MRI
.createVirtualRegister(TII
->getOpRegClass(MI
, DstIdx
));
134 unsigned NewDst
= 0; // Final initialized value will be in here
136 // If PRTStrictNull feature is enabled (the default) then initialize
137 // all the result registers to 0, otherwise just the error indication
138 // register (VGPRn+1)
139 unsigned SizeLeft
= ST
.usePRTStrictNull() ? InitIdx
: 1;
140 unsigned CurrIdx
= ST
.usePRTStrictNull() ? 1 : InitIdx
;
143 // In this case we can just initialize the result directly
144 BuildMI(MBB
, MI
, DL
, TII
->get(AMDGPU::V_MOV_B32_e32
), PrevDst
)
148 BuildMI(MBB
, MI
, DL
, TII
->get(AMDGPU::IMPLICIT_DEF
), PrevDst
);
149 for (; SizeLeft
; SizeLeft
--, CurrIdx
++) {
151 MRI
.createVirtualRegister(TII
->getOpRegClass(MI
, DstIdx
));
154 MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
155 BuildMI(MBB
, MI
, DL
, TII
->get(AMDGPU::V_MOV_B32_e32
), SubReg
)
157 // Insert into the super-reg
158 BuildMI(MBB
, I
, DL
, TII
->get(TargetOpcode::INSERT_SUBREG
), NewDst
)
167 // Add as an implicit operand
168 MachineInstrBuilder(MF
, MI
).addReg(NewDst
, RegState::Implicit
);
170 // Tie the just added implicit operand to the dst
171 MI
.tieOperands(DstIdx
, MI
.getNumOperands() - 1);