1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
14 //===----------------------------------------------------------------------===//
17 #include "AMDGPUSubtarget.h"
18 #include "R600InstrInfo.h"
19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "llvm/CodeGen/DFAPacketizer.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineLoopInfo.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "packets"
35 class R600Packetizer
: public MachineFunctionPass
{
39 R600Packetizer() : MachineFunctionPass(ID
) {}
41 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
43 AU
.addRequired
<MachineDominatorTree
>();
44 AU
.addPreserved
<MachineDominatorTree
>();
45 AU
.addRequired
<MachineLoopInfo
>();
46 AU
.addPreserved
<MachineLoopInfo
>();
47 MachineFunctionPass::getAnalysisUsage(AU
);
50 StringRef
getPassName() const override
{ return "R600 Packetizer"; }
52 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
55 class R600PacketizerList
: public VLIWPacketizerList
{
57 const R600InstrInfo
*TII
;
58 const R600RegisterInfo
&TRI
;
60 bool ConsideredInstUsesAlreadyWrittenVectorElement
;
62 unsigned getSlot(const MachineInstr
&MI
) const {
63 return TRI
.getHWRegChan(MI
.getOperand(0).getReg());
66 /// \returns register to PV chan mapping for bundle/single instructions that
67 /// immediately precedes I.
68 DenseMap
<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I
)
70 DenseMap
<unsigned, unsigned> Result
;
72 if (!TII
->isALUInstr(I
->getOpcode()) && !I
->isBundle())
74 MachineBasicBlock::instr_iterator BI
= I
.getInstrIterator();
80 int BISlot
= getSlot(*BI
);
81 if (LastDstChan
>= BISlot
)
84 if (TII
->isPredicated(*BI
))
86 int OperandIdx
= TII
->getOperandIdx(BI
->getOpcode(), R600::OpName::write
);
87 if (OperandIdx
> -1 && BI
->getOperand(OperandIdx
).getImm() == 0)
89 int DstIdx
= TII
->getOperandIdx(BI
->getOpcode(), R600::OpName::dst
);
93 unsigned Dst
= BI
->getOperand(DstIdx
).getReg();
94 if (isTrans
|| TII
->isTransOnly(*BI
)) {
95 Result
[Dst
] = R600::PS
;
98 if (BI
->getOpcode() == R600::DOT4_r600
||
99 BI
->getOpcode() == R600::DOT4_eg
) {
100 Result
[Dst
] = R600::PV_X
;
103 if (Dst
== R600::OQAP
) {
107 switch (TRI
.getHWRegChan(Dst
)) {
121 llvm_unreachable("Invalid Chan");
124 } while ((++BI
)->isBundledWithPred());
128 void substitutePV(MachineInstr
&MI
, const DenseMap
<unsigned, unsigned> &PVs
)
135 for (unsigned i
= 0; i
< 3; i
++) {
136 int OperandIdx
= TII
->getOperandIdx(MI
.getOpcode(), Ops
[i
]);
139 unsigned Src
= MI
.getOperand(OperandIdx
).getReg();
140 const DenseMap
<unsigned, unsigned>::const_iterator It
= PVs
.find(Src
);
142 MI
.getOperand(OperandIdx
).setReg(It
->second
);
147 R600PacketizerList(MachineFunction
&MF
, const R600Subtarget
&ST
,
148 MachineLoopInfo
&MLI
)
149 : VLIWPacketizerList(MF
, MLI
, nullptr),
150 TII(ST
.getInstrInfo()),
151 TRI(TII
->getRegisterInfo()) {
152 VLIW5
= !ST
.hasCaymanISA();
155 // initPacketizerState - initialize some internal flags.
156 void initPacketizerState() override
{
157 ConsideredInstUsesAlreadyWrittenVectorElement
= false;
160 // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
161 bool ignorePseudoInstruction(const MachineInstr
&MI
,
162 const MachineBasicBlock
*MBB
) override
{
166 // isSoloInstruction - return true if instruction MI can not be packetized
167 // with any other instruction, which means that MI itself is a packet.
168 bool isSoloInstruction(const MachineInstr
&MI
) override
{
169 if (TII
->isVector(MI
))
171 if (!TII
->isALUInstr(MI
.getOpcode()))
173 if (MI
.getOpcode() == R600::GROUP_BARRIER
)
175 // XXX: This can be removed once the packetizer properly handles all the
176 // LDS instruction group restrictions.
177 return TII
->isLDSInstr(MI
.getOpcode());
180 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
182 bool isLegalToPacketizeTogether(SUnit
*SUI
, SUnit
*SUJ
) override
{
183 MachineInstr
*MII
= SUI
->getInstr(), *MIJ
= SUJ
->getInstr();
184 if (getSlot(*MII
) == getSlot(*MIJ
))
185 ConsideredInstUsesAlreadyWrittenVectorElement
= true;
186 // Does MII and MIJ share the same pred_sel ?
187 int OpI
= TII
->getOperandIdx(MII
->getOpcode(), R600::OpName::pred_sel
),
188 OpJ
= TII
->getOperandIdx(MIJ
->getOpcode(), R600::OpName::pred_sel
);
189 unsigned PredI
= (OpI
> -1)?MII
->getOperand(OpI
).getReg():0,
190 PredJ
= (OpJ
> -1)?MIJ
->getOperand(OpJ
).getReg():0;
193 if (SUJ
->isSucc(SUI
)) {
194 for (unsigned i
= 0, e
= SUJ
->Succs
.size(); i
< e
; ++i
) {
195 const SDep
&Dep
= SUJ
->Succs
[i
];
196 if (Dep
.getSUnit() != SUI
)
198 if (Dep
.getKind() == SDep::Anti
)
200 if (Dep
.getKind() == SDep::Output
)
201 if (MII
->getOperand(0).getReg() != MIJ
->getOperand(0).getReg())
208 TII
->definesAddressRegister(*MII
) || TII
->definesAddressRegister(*MIJ
);
210 TII
->usesAddressRegister(*MII
) || TII
->usesAddressRegister(*MIJ
);
212 return !ARDef
|| !ARUse
;
215 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
217 bool isLegalToPruneDependencies(SUnit
*SUI
, SUnit
*SUJ
) override
{
221 void setIsLastBit(MachineInstr
*MI
, unsigned Bit
) const {
222 unsigned LastOp
= TII
->getOperandIdx(MI
->getOpcode(), R600::OpName::last
);
223 MI
->getOperand(LastOp
).setImm(Bit
);
226 bool isBundlableWithCurrentPMI(MachineInstr
&MI
,
227 const DenseMap
<unsigned, unsigned> &PV
,
228 std::vector
<R600InstrInfo::BankSwizzle
> &BS
,
230 isTransSlot
= TII
->isTransOnly(MI
);
231 assert (!isTransSlot
|| VLIW5
);
233 // Is the dst reg sequence legal ?
234 if (!isTransSlot
&& !CurrentPacketMIs
.empty()) {
235 if (getSlot(MI
) <= getSlot(*CurrentPacketMIs
.back())) {
236 if (ConsideredInstUsesAlreadyWrittenVectorElement
&&
237 !TII
->isVectorOnly(MI
) && VLIW5
) {
240 dbgs() << "Considering as Trans Inst :";
249 // Are the Constants limitations met ?
250 CurrentPacketMIs
.push_back(&MI
);
251 if (!TII
->fitsConstReadLimitations(CurrentPacketMIs
)) {
253 dbgs() << "Couldn't pack :\n";
255 dbgs() << "with the following packets :\n";
256 for (unsigned i
= 0, e
= CurrentPacketMIs
.size() - 1; i
< e
; i
++) {
257 CurrentPacketMIs
[i
]->dump();
260 dbgs() << "because of Consts read limitations\n";
262 CurrentPacketMIs
.pop_back();
266 // Is there a BankSwizzle set that meet Read Port limitations ?
267 if (!TII
->fitsReadPortLimitations(CurrentPacketMIs
,
268 PV
, BS
, isTransSlot
)) {
270 dbgs() << "Couldn't pack :\n";
272 dbgs() << "with the following packets :\n";
273 for (unsigned i
= 0, e
= CurrentPacketMIs
.size() - 1; i
< e
; i
++) {
274 CurrentPacketMIs
[i
]->dump();
277 dbgs() << "because of Read port limitations\n";
279 CurrentPacketMIs
.pop_back();
283 // We cannot read LDS source registers from the Trans slot.
284 if (isTransSlot
&& TII
->readsLDSSrcReg(MI
))
287 CurrentPacketMIs
.pop_back();
291 MachineBasicBlock::iterator
addToPacket(MachineInstr
&MI
) override
{
292 MachineBasicBlock::iterator FirstInBundle
=
293 CurrentPacketMIs
.empty() ? &MI
: CurrentPacketMIs
.front();
294 const DenseMap
<unsigned, unsigned> &PV
=
295 getPreviousVector(FirstInBundle
);
296 std::vector
<R600InstrInfo::BankSwizzle
> BS
;
299 if (isBundlableWithCurrentPMI(MI
, PV
, BS
, isTransSlot
)) {
300 for (unsigned i
= 0, e
= CurrentPacketMIs
.size(); i
< e
; i
++) {
301 MachineInstr
*MI
= CurrentPacketMIs
[i
];
302 unsigned Op
= TII
->getOperandIdx(MI
->getOpcode(),
303 R600::OpName::bank_swizzle
);
304 MI
->getOperand(Op
).setImm(BS
[i
]);
307 TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::bank_swizzle
);
308 MI
.getOperand(Op
).setImm(BS
.back());
309 if (!CurrentPacketMIs
.empty())
310 setIsLastBit(CurrentPacketMIs
.back(), 0);
311 substitutePV(MI
, PV
);
312 MachineBasicBlock::iterator It
= VLIWPacketizerList::addToPacket(MI
);
314 endPacket(std::next(It
)->getParent(), std::next(It
));
318 endPacket(MI
.getParent(), MI
);
319 if (TII
->isTransOnly(MI
))
321 return VLIWPacketizerList::addToPacket(MI
);
325 bool R600Packetizer::runOnMachineFunction(MachineFunction
&Fn
) {
326 const R600Subtarget
&ST
= Fn
.getSubtarget
<R600Subtarget
>();
327 const R600InstrInfo
*TII
= ST
.getInstrInfo();
329 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
331 // Instantiate the packetizer.
332 R600PacketizerList
Packetizer(Fn
, ST
, MLI
);
334 // DFA state table should not be empty.
335 assert(Packetizer
.getResourceTracker() && "Empty DFA table!");
336 assert(Packetizer
.getResourceTracker()->getInstrItins());
338 if (Packetizer
.getResourceTracker()->getInstrItins()->isEmpty())
342 // Loop over all basic blocks and remove KILL pseudo-instructions
343 // These instructions confuse the dependence analysis. Consider:
345 // R0 = KILL R0, D0 (Insn 1)
347 // Here, Insn 1 will result in the dependence graph not emitting an output
348 // dependence between Insn 0 and Insn 2. This can lead to incorrect
351 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
352 MBB
!= MBBe
; ++MBB
) {
353 MachineBasicBlock::iterator End
= MBB
->end();
354 MachineBasicBlock::iterator MI
= MBB
->begin();
356 if (MI
->isKill() || MI
->getOpcode() == R600::IMPLICIT_DEF
||
357 (MI
->getOpcode() == R600::CF_ALU
&& !MI
->getOperand(8).getImm())) {
358 MachineBasicBlock::iterator DeleteMI
= MI
;
360 MBB
->erase(DeleteMI
);
368 // Loop over all of the basic blocks.
369 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
370 MBB
!= MBBe
; ++MBB
) {
371 // Find scheduling regions and schedule / packetize each region.
372 unsigned RemainingCount
= MBB
->size();
373 for(MachineBasicBlock::iterator RegionEnd
= MBB
->end();
374 RegionEnd
!= MBB
->begin();) {
375 // The next region starts above the previous region. Look backward in the
376 // instruction stream until we find the nearest boundary.
377 MachineBasicBlock::iterator I
= RegionEnd
;
378 for(;I
!= MBB
->begin(); --I
, --RemainingCount
) {
379 if (TII
->isSchedulingBoundary(*std::prev(I
), &*MBB
, Fn
))
384 // Skip empty scheduling regions.
385 if (I
== RegionEnd
) {
386 RegionEnd
= std::prev(RegionEnd
);
390 // Skip regions with one instruction.
391 if (I
== std::prev(RegionEnd
)) {
392 RegionEnd
= std::prev(RegionEnd
);
396 Packetizer
.PacketizeMIs(&*MBB
, &*I
, RegionEnd
);
405 } // end anonymous namespace
407 INITIALIZE_PASS_BEGIN(R600Packetizer
, DEBUG_TYPE
,
408 "R600 Packetizer", false, false)
409 INITIALIZE_PASS_END(R600Packetizer
, DEBUG_TYPE
,
410 "R600 Packetizer", false, false)
412 char R600Packetizer::ID
= 0;
414 char &llvm::R600PacketizerID
= R600Packetizer::ID
;
416 llvm::FunctionPass
*llvm::createR600Packetizer() {
417 return new R600Packetizer();