[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / SIAddIMGInit.cpp
blobee011286b8ff34528ae2c6549b3000fb12ac41c4
1 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Any MIMG instructions that use tfe or lwe require an initialization of the
11 /// result register that will be written in the case of a memory access failure
12 /// The required code is also added to tie this init code to the result of the
13 /// img instruction
14 ///
15 //===----------------------------------------------------------------------===//
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIInstrInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Target/TargetMachine.h"
29 #define DEBUG_TYPE "si-img-init"
31 using namespace llvm;
33 namespace {
35 class SIAddIMGInit : public MachineFunctionPass {
36 public:
37 static char ID;
39 public:
40 SIAddIMGInit() : MachineFunctionPass(ID) {
41 initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
44 bool runOnMachineFunction(MachineFunction &MF) override;
46 void getAnalysisUsage(AnalysisUsage &AU) const override {
47 AU.setPreservesCFG();
48 MachineFunctionPass::getAnalysisUsage(AU);
52 } // End anonymous namespace.
54 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
56 char SIAddIMGInit::ID = 0;
58 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
60 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
62 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
63 MachineRegisterInfo &MRI = MF.getRegInfo();
64 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
65 const SIInstrInfo *TII = ST.getInstrInfo();
66 const SIRegisterInfo *RI = ST.getRegisterInfo();
67 bool Changed = false;
69 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
70 ++BI) {
71 MachineBasicBlock &MBB = *BI;
72 MachineBasicBlock::iterator I, Next;
73 for (I = MBB.begin(); I != MBB.end(); I = Next) {
74 Next = std::next(I);
75 MachineInstr &MI = *I;
77 auto Opcode = MI.getOpcode();
78 if (TII->isMIMG(Opcode) && !MI.mayStore()) {
79 MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
80 MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
81 MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
83 // Check for instructions that don't have tfe or lwe fields
84 // There shouldn't be any at this point.
85 assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
87 unsigned TFEVal = TFE->getImm();
88 unsigned LWEVal = LWE->getImm();
89 unsigned D16Val = D16 ? D16->getImm() : 0;
91 if (TFEVal || LWEVal) {
92 // At least one of TFE or LWE are non-zero
93 // We have to insert a suitable initialization of the result value and
94 // tie this to the dest of the image instruction.
96 const DebugLoc &DL = MI.getDebugLoc();
98 int DstIdx =
99 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
101 // Calculate which dword we have to initialize to 0.
102 MachineOperand *MO_Dmask =
103 TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
105 // check that dmask operand is found.
106 assert(MO_Dmask && "Expected dmask operand in instruction");
108 unsigned dmask = MO_Dmask->getImm();
109 // Determine the number of active lanes taking into account the
110 // Gather4 special case
111 unsigned ActiveLanes =
112 TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
114 // Subreg indices are counted from 1
115 // When D16 then we want next whole VGPR after write data.
116 static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
118 bool Packed = !ST.hasUnpackedD16VMem();
120 unsigned InitIdx =
121 D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
123 // Abandon attempt if the dst size isn't large enough
124 // - this is in fact an error but this is picked up elsewhere and
125 // reported correctly.
126 uint32_t DstSize =
127 RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
128 if (DstSize < InitIdx)
129 continue;
131 // Create a register for the intialization value.
132 Register PrevDst =
133 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
134 unsigned NewDst = 0; // Final initialized value will be in here
136 // If PRTStrictNull feature is enabled (the default) then initialize
137 // all the result registers to 0, otherwise just the error indication
138 // register (VGPRn+1)
139 unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
140 unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
142 if (DstSize == 1) {
143 // In this case we can just initialize the result directly
144 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
145 .addImm(0);
146 NewDst = PrevDst;
147 } else {
148 BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
149 for (; SizeLeft; SizeLeft--, CurrIdx++) {
150 NewDst =
151 MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
152 // Initialize dword
153 Register SubReg =
154 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
155 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
156 .addImm(0);
157 // Insert into the super-reg
158 BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
159 .addReg(PrevDst)
160 .addReg(SubReg)
161 .addImm(CurrIdx);
163 PrevDst = NewDst;
167 // Add as an implicit operand
168 MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
170 // Tie the just added implicit operand to the dst
171 MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
173 Changed = true;
179 return Changed;