1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
14 //===----------------------------------------------------------------------===//
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "R600Subtarget.h"
19 #include "llvm/CodeGen/DFAPacketizer.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/ScheduleDAG.h"
26 #define DEBUG_TYPE "packets"
30 class R600Packetizer
: public MachineFunctionPass
{
34 R600Packetizer() : MachineFunctionPass(ID
) {}
36 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
38 AU
.addRequired
<MachineDominatorTree
>();
39 AU
.addPreserved
<MachineDominatorTree
>();
40 AU
.addRequired
<MachineLoopInfo
>();
41 AU
.addPreserved
<MachineLoopInfo
>();
42 MachineFunctionPass::getAnalysisUsage(AU
);
45 StringRef
getPassName() const override
{ return "R600 Packetizer"; }
47 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
50 class R600PacketizerList
: public VLIWPacketizerList
{
52 const R600InstrInfo
*TII
;
53 const R600RegisterInfo
&TRI
;
55 bool ConsideredInstUsesAlreadyWrittenVectorElement
;
57 unsigned getSlot(const MachineInstr
&MI
) const {
58 return TRI
.getHWRegChan(MI
.getOperand(0).getReg());
61 /// \returns register to PV chan mapping for bundle/single instructions that
62 /// immediately precedes I.
63 DenseMap
<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I
)
65 DenseMap
<unsigned, unsigned> Result
;
67 if (!TII
->isALUInstr(I
->getOpcode()) && !I
->isBundle())
69 MachineBasicBlock::instr_iterator BI
= I
.getInstrIterator();
75 int BISlot
= getSlot(*BI
);
76 if (LastDstChan
>= BISlot
)
79 if (TII
->isPredicated(*BI
))
81 int OperandIdx
= TII
->getOperandIdx(BI
->getOpcode(), R600::OpName::write
);
82 if (OperandIdx
> -1 && BI
->getOperand(OperandIdx
).getImm() == 0)
84 int DstIdx
= TII
->getOperandIdx(BI
->getOpcode(), R600::OpName::dst
);
88 Register Dst
= BI
->getOperand(DstIdx
).getReg();
89 if (isTrans
|| TII
->isTransOnly(*BI
)) {
90 Result
[Dst
] = R600::PS
;
93 if (BI
->getOpcode() == R600::DOT4_r600
||
94 BI
->getOpcode() == R600::DOT4_eg
) {
95 Result
[Dst
] = R600::PV_X
;
98 if (Dst
== R600::OQAP
) {
102 switch (TRI
.getHWRegChan(Dst
)) {
116 llvm_unreachable("Invalid Chan");
119 } while ((++BI
)->isBundledWithPred());
123 void substitutePV(MachineInstr
&MI
, const DenseMap
<unsigned, unsigned> &PVs
)
130 for (unsigned i
= 0; i
< 3; i
++) {
131 int OperandIdx
= TII
->getOperandIdx(MI
.getOpcode(), Ops
[i
]);
134 Register Src
= MI
.getOperand(OperandIdx
).getReg();
135 const DenseMap
<unsigned, unsigned>::const_iterator It
= PVs
.find(Src
);
137 MI
.getOperand(OperandIdx
).setReg(It
->second
);
142 R600PacketizerList(MachineFunction
&MF
, const R600Subtarget
&ST
,
143 MachineLoopInfo
&MLI
)
144 : VLIWPacketizerList(MF
, MLI
, nullptr),
145 TII(ST
.getInstrInfo()),
146 TRI(TII
->getRegisterInfo()) {
147 VLIW5
= !ST
.hasCaymanISA();
150 // initPacketizerState - initialize some internal flags.
151 void initPacketizerState() override
{
152 ConsideredInstUsesAlreadyWrittenVectorElement
= false;
155 // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
156 bool ignorePseudoInstruction(const MachineInstr
&MI
,
157 const MachineBasicBlock
*MBB
) override
{
161 // isSoloInstruction - return true if instruction MI can not be packetized
162 // with any other instruction, which means that MI itself is a packet.
163 bool isSoloInstruction(const MachineInstr
&MI
) override
{
164 if (TII
->isVector(MI
))
166 if (!TII
->isALUInstr(MI
.getOpcode()))
168 if (MI
.getOpcode() == R600::GROUP_BARRIER
)
170 // XXX: This can be removed once the packetizer properly handles all the
171 // LDS instruction group restrictions.
172 return TII
->isLDSInstr(MI
.getOpcode());
175 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
177 bool isLegalToPacketizeTogether(SUnit
*SUI
, SUnit
*SUJ
) override
{
178 MachineInstr
*MII
= SUI
->getInstr(), *MIJ
= SUJ
->getInstr();
179 if (getSlot(*MII
) == getSlot(*MIJ
))
180 ConsideredInstUsesAlreadyWrittenVectorElement
= true;
181 // Does MII and MIJ share the same pred_sel ?
182 int OpI
= TII
->getOperandIdx(MII
->getOpcode(), R600::OpName::pred_sel
),
183 OpJ
= TII
->getOperandIdx(MIJ
->getOpcode(), R600::OpName::pred_sel
);
184 Register PredI
= (OpI
> -1)?MII
->getOperand(OpI
).getReg() : Register(),
185 PredJ
= (OpJ
> -1)?MIJ
->getOperand(OpJ
).getReg() : Register();
188 if (SUJ
->isSucc(SUI
)) {
189 for (unsigned i
= 0, e
= SUJ
->Succs
.size(); i
< e
; ++i
) {
190 const SDep
&Dep
= SUJ
->Succs
[i
];
191 if (Dep
.getSUnit() != SUI
)
193 if (Dep
.getKind() == SDep::Anti
)
195 if (Dep
.getKind() == SDep::Output
)
196 if (MII
->getOperand(0).getReg() != MIJ
->getOperand(0).getReg())
203 TII
->definesAddressRegister(*MII
) || TII
->definesAddressRegister(*MIJ
);
205 TII
->usesAddressRegister(*MII
) || TII
->usesAddressRegister(*MIJ
);
207 return !ARDef
|| !ARUse
;
210 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
212 bool isLegalToPruneDependencies(SUnit
*SUI
, SUnit
*SUJ
) override
{
216 void setIsLastBit(MachineInstr
*MI
, unsigned Bit
) const {
217 unsigned LastOp
= TII
->getOperandIdx(MI
->getOpcode(), R600::OpName::last
);
218 MI
->getOperand(LastOp
).setImm(Bit
);
221 bool isBundlableWithCurrentPMI(MachineInstr
&MI
,
222 const DenseMap
<unsigned, unsigned> &PV
,
223 std::vector
<R600InstrInfo::BankSwizzle
> &BS
,
225 isTransSlot
= TII
->isTransOnly(MI
);
226 assert (!isTransSlot
|| VLIW5
);
228 // Is the dst reg sequence legal ?
229 if (!isTransSlot
&& !CurrentPacketMIs
.empty()) {
230 if (getSlot(MI
) <= getSlot(*CurrentPacketMIs
.back())) {
231 if (ConsideredInstUsesAlreadyWrittenVectorElement
&&
232 !TII
->isVectorOnly(MI
) && VLIW5
) {
235 dbgs() << "Considering as Trans Inst :";
244 // Are the Constants limitations met ?
245 CurrentPacketMIs
.push_back(&MI
);
246 if (!TII
->fitsConstReadLimitations(CurrentPacketMIs
)) {
248 dbgs() << "Couldn't pack :\n";
250 dbgs() << "with the following packets :\n";
251 for (unsigned i
= 0, e
= CurrentPacketMIs
.size() - 1; i
< e
; i
++) {
252 CurrentPacketMIs
[i
]->dump();
255 dbgs() << "because of Consts read limitations\n";
257 CurrentPacketMIs
.pop_back();
261 // Is there a BankSwizzle set that meet Read Port limitations ?
262 if (!TII
->fitsReadPortLimitations(CurrentPacketMIs
,
263 PV
, BS
, isTransSlot
)) {
265 dbgs() << "Couldn't pack :\n";
267 dbgs() << "with the following packets :\n";
268 for (unsigned i
= 0, e
= CurrentPacketMIs
.size() - 1; i
< e
; i
++) {
269 CurrentPacketMIs
[i
]->dump();
272 dbgs() << "because of Read port limitations\n";
274 CurrentPacketMIs
.pop_back();
278 // We cannot read LDS source registers from the Trans slot.
279 if (isTransSlot
&& TII
->readsLDSSrcReg(MI
))
282 CurrentPacketMIs
.pop_back();
286 MachineBasicBlock::iterator
addToPacket(MachineInstr
&MI
) override
{
287 MachineBasicBlock::iterator FirstInBundle
=
288 CurrentPacketMIs
.empty() ? &MI
: CurrentPacketMIs
.front();
289 const DenseMap
<unsigned, unsigned> &PV
=
290 getPreviousVector(FirstInBundle
);
291 std::vector
<R600InstrInfo::BankSwizzle
> BS
;
294 if (isBundlableWithCurrentPMI(MI
, PV
, BS
, isTransSlot
)) {
295 for (unsigned i
= 0, e
= CurrentPacketMIs
.size(); i
< e
; i
++) {
296 MachineInstr
*MI
= CurrentPacketMIs
[i
];
297 unsigned Op
= TII
->getOperandIdx(MI
->getOpcode(),
298 R600::OpName::bank_swizzle
);
299 MI
->getOperand(Op
).setImm(BS
[i
]);
302 TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::bank_swizzle
);
303 MI
.getOperand(Op
).setImm(BS
.back());
304 if (!CurrentPacketMIs
.empty())
305 setIsLastBit(CurrentPacketMIs
.back(), 0);
306 substitutePV(MI
, PV
);
307 MachineBasicBlock::iterator It
= VLIWPacketizerList::addToPacket(MI
);
309 endPacket(std::next(It
)->getParent(), std::next(It
));
313 endPacket(MI
.getParent(), MI
);
314 if (TII
->isTransOnly(MI
))
316 return VLIWPacketizerList::addToPacket(MI
);
320 bool R600Packetizer::runOnMachineFunction(MachineFunction
&Fn
) {
321 const R600Subtarget
&ST
= Fn
.getSubtarget
<R600Subtarget
>();
322 const R600InstrInfo
*TII
= ST
.getInstrInfo();
324 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
326 // Instantiate the packetizer.
327 R600PacketizerList
Packetizer(Fn
, ST
, MLI
);
329 // DFA state table should not be empty.
330 assert(Packetizer
.getResourceTracker() && "Empty DFA table!");
331 assert(Packetizer
.getResourceTracker()->getInstrItins());
333 if (Packetizer
.getResourceTracker()->getInstrItins()->isEmpty())
337 // Loop over all basic blocks and remove KILL pseudo-instructions
338 // These instructions confuse the dependence analysis. Consider:
340 // R0 = KILL R0, D0 (Insn 1)
342 // Here, Insn 1 will result in the dependence graph not emitting an output
343 // dependence between Insn 0 and Insn 2. This can lead to incorrect
346 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
347 MBB
!= MBBe
; ++MBB
) {
348 MachineBasicBlock::iterator End
= MBB
->end();
349 MachineBasicBlock::iterator MI
= MBB
->begin();
351 if (MI
->isKill() || MI
->getOpcode() == R600::IMPLICIT_DEF
||
352 (MI
->getOpcode() == R600::CF_ALU
&& !MI
->getOperand(8).getImm())) {
353 MachineBasicBlock::iterator DeleteMI
= MI
;
355 MBB
->erase(DeleteMI
);
363 // Loop over all of the basic blocks.
364 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
365 MBB
!= MBBe
; ++MBB
) {
366 // Find scheduling regions and schedule / packetize each region.
367 unsigned RemainingCount
= MBB
->size();
368 for(MachineBasicBlock::iterator RegionEnd
= MBB
->end();
369 RegionEnd
!= MBB
->begin();) {
370 // The next region starts above the previous region. Look backward in the
371 // instruction stream until we find the nearest boundary.
372 MachineBasicBlock::iterator I
= RegionEnd
;
373 for(;I
!= MBB
->begin(); --I
, --RemainingCount
) {
374 if (TII
->isSchedulingBoundary(*std::prev(I
), &*MBB
, Fn
))
379 // Skip empty scheduling regions.
380 if (I
== RegionEnd
) {
381 RegionEnd
= std::prev(RegionEnd
);
385 // Skip regions with one instruction.
386 if (I
== std::prev(RegionEnd
)) {
387 RegionEnd
= std::prev(RegionEnd
);
391 Packetizer
.PacketizeMIs(&*MBB
, &*I
, RegionEnd
);
400 } // end anonymous namespace
402 INITIALIZE_PASS_BEGIN(R600Packetizer
, DEBUG_TYPE
,
403 "R600 Packetizer", false, false)
404 INITIALIZE_PASS_END(R600Packetizer
, DEBUG_TYPE
,
405 "R600 Packetizer", false, false)
407 char R600Packetizer::ID
= 0;
409 char &llvm::R600PacketizerID
= R600Packetizer::ID
;
411 llvm::FunctionPass
*llvm::createR600Packetizer() {
412 return new R600Packetizer();