1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This pass implements instructions packetization for R600. It unsets isLast
12 /// bit of instructions inside a bundle and substitutes src register with
13 /// PreviousVector when applicable.
15 //===----------------------------------------------------------------------===//
18 #include "AMDGPUSubtarget.h"
19 #include "R600InstrInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/DFAPacketizer.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineLoopInfo.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/CodeGen/ScheduleDAG.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
32 #define DEBUG_TYPE "packets"
36 class R600Packetizer
: public MachineFunctionPass
{
40 R600Packetizer() : MachineFunctionPass(ID
) {}
42 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
44 AU
.addRequired
<MachineDominatorTree
>();
45 AU
.addPreserved
<MachineDominatorTree
>();
46 AU
.addRequired
<MachineLoopInfo
>();
47 AU
.addPreserved
<MachineLoopInfo
>();
48 MachineFunctionPass::getAnalysisUsage(AU
);
51 StringRef
getPassName() const override
{ return "R600 Packetizer"; }
53 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
56 class R600PacketizerList
: public VLIWPacketizerList
{
58 const R600InstrInfo
*TII
;
59 const R600RegisterInfo
&TRI
;
61 bool ConsideredInstUsesAlreadyWrittenVectorElement
;
63 unsigned getSlot(const MachineInstr
&MI
) const {
64 return TRI
.getHWRegChan(MI
.getOperand(0).getReg());
67 /// \returns register to PV chan mapping for bundle/single instructions that
68 /// immediately precedes I.
69 DenseMap
<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I
)
71 DenseMap
<unsigned, unsigned> Result
;
73 if (!TII
->isALUInstr(I
->getOpcode()) && !I
->isBundle())
75 MachineBasicBlock::instr_iterator BI
= I
.getInstrIterator();
81 int BISlot
= getSlot(*BI
);
82 if (LastDstChan
>= BISlot
)
85 if (TII
->isPredicated(*BI
))
87 int OperandIdx
= TII
->getOperandIdx(BI
->getOpcode(), R600::OpName::write
);
88 if (OperandIdx
> -1 && BI
->getOperand(OperandIdx
).getImm() == 0)
90 int DstIdx
= TII
->getOperandIdx(BI
->getOpcode(), R600::OpName::dst
);
94 unsigned Dst
= BI
->getOperand(DstIdx
).getReg();
95 if (isTrans
|| TII
->isTransOnly(*BI
)) {
96 Result
[Dst
] = R600::PS
;
99 if (BI
->getOpcode() == R600::DOT4_r600
||
100 BI
->getOpcode() == R600::DOT4_eg
) {
101 Result
[Dst
] = R600::PV_X
;
104 if (Dst
== R600::OQAP
) {
108 switch (TRI
.getHWRegChan(Dst
)) {
122 llvm_unreachable("Invalid Chan");
125 } while ((++BI
)->isBundledWithPred());
129 void substitutePV(MachineInstr
&MI
, const DenseMap
<unsigned, unsigned> &PVs
)
136 for (unsigned i
= 0; i
< 3; i
++) {
137 int OperandIdx
= TII
->getOperandIdx(MI
.getOpcode(), Ops
[i
]);
140 unsigned Src
= MI
.getOperand(OperandIdx
).getReg();
141 const DenseMap
<unsigned, unsigned>::const_iterator It
= PVs
.find(Src
);
143 MI
.getOperand(OperandIdx
).setReg(It
->second
);
148 R600PacketizerList(MachineFunction
&MF
, const R600Subtarget
&ST
,
149 MachineLoopInfo
&MLI
)
150 : VLIWPacketizerList(MF
, MLI
, nullptr),
151 TII(ST
.getInstrInfo()),
152 TRI(TII
->getRegisterInfo()) {
153 VLIW5
= !ST
.hasCaymanISA();
156 // initPacketizerState - initialize some internal flags.
157 void initPacketizerState() override
{
158 ConsideredInstUsesAlreadyWrittenVectorElement
= false;
161 // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
162 bool ignorePseudoInstruction(const MachineInstr
&MI
,
163 const MachineBasicBlock
*MBB
) override
{
167 // isSoloInstruction - return true if instruction MI can not be packetized
168 // with any other instruction, which means that MI itself is a packet.
169 bool isSoloInstruction(const MachineInstr
&MI
) override
{
170 if (TII
->isVector(MI
))
172 if (!TII
->isALUInstr(MI
.getOpcode()))
174 if (MI
.getOpcode() == R600::GROUP_BARRIER
)
176 // XXX: This can be removed once the packetizer properly handles all the
177 // LDS instruction group restrictions.
178 return TII
->isLDSInstr(MI
.getOpcode());
181 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
183 bool isLegalToPacketizeTogether(SUnit
*SUI
, SUnit
*SUJ
) override
{
184 MachineInstr
*MII
= SUI
->getInstr(), *MIJ
= SUJ
->getInstr();
185 if (getSlot(*MII
) == getSlot(*MIJ
))
186 ConsideredInstUsesAlreadyWrittenVectorElement
= true;
187 // Does MII and MIJ share the same pred_sel ?
188 int OpI
= TII
->getOperandIdx(MII
->getOpcode(), R600::OpName::pred_sel
),
189 OpJ
= TII
->getOperandIdx(MIJ
->getOpcode(), R600::OpName::pred_sel
);
190 unsigned PredI
= (OpI
> -1)?MII
->getOperand(OpI
).getReg():0,
191 PredJ
= (OpJ
> -1)?MIJ
->getOperand(OpJ
).getReg():0;
194 if (SUJ
->isSucc(SUI
)) {
195 for (unsigned i
= 0, e
= SUJ
->Succs
.size(); i
< e
; ++i
) {
196 const SDep
&Dep
= SUJ
->Succs
[i
];
197 if (Dep
.getSUnit() != SUI
)
199 if (Dep
.getKind() == SDep::Anti
)
201 if (Dep
.getKind() == SDep::Output
)
202 if (MII
->getOperand(0).getReg() != MIJ
->getOperand(0).getReg())
209 TII
->definesAddressRegister(*MII
) || TII
->definesAddressRegister(*MIJ
);
211 TII
->usesAddressRegister(*MII
) || TII
->usesAddressRegister(*MIJ
);
213 return !ARDef
|| !ARUse
;
216 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
218 bool isLegalToPruneDependencies(SUnit
*SUI
, SUnit
*SUJ
) override
{
222 void setIsLastBit(MachineInstr
*MI
, unsigned Bit
) const {
223 unsigned LastOp
= TII
->getOperandIdx(MI
->getOpcode(), R600::OpName::last
);
224 MI
->getOperand(LastOp
).setImm(Bit
);
227 bool isBundlableWithCurrentPMI(MachineInstr
&MI
,
228 const DenseMap
<unsigned, unsigned> &PV
,
229 std::vector
<R600InstrInfo::BankSwizzle
> &BS
,
231 isTransSlot
= TII
->isTransOnly(MI
);
232 assert (!isTransSlot
|| VLIW5
);
234 // Is the dst reg sequence legal ?
235 if (!isTransSlot
&& !CurrentPacketMIs
.empty()) {
236 if (getSlot(MI
) <= getSlot(*CurrentPacketMIs
.back())) {
237 if (ConsideredInstUsesAlreadyWrittenVectorElement
&&
238 !TII
->isVectorOnly(MI
) && VLIW5
) {
241 dbgs() << "Considering as Trans Inst :";
250 // Are the Constants limitations met ?
251 CurrentPacketMIs
.push_back(&MI
);
252 if (!TII
->fitsConstReadLimitations(CurrentPacketMIs
)) {
254 dbgs() << "Couldn't pack :\n";
256 dbgs() << "with the following packets :\n";
257 for (unsigned i
= 0, e
= CurrentPacketMIs
.size() - 1; i
< e
; i
++) {
258 CurrentPacketMIs
[i
]->dump();
261 dbgs() << "because of Consts read limitations\n";
263 CurrentPacketMIs
.pop_back();
267 // Is there a BankSwizzle set that meet Read Port limitations ?
268 if (!TII
->fitsReadPortLimitations(CurrentPacketMIs
,
269 PV
, BS
, isTransSlot
)) {
271 dbgs() << "Couldn't pack :\n";
273 dbgs() << "with the following packets :\n";
274 for (unsigned i
= 0, e
= CurrentPacketMIs
.size() - 1; i
< e
; i
++) {
275 CurrentPacketMIs
[i
]->dump();
278 dbgs() << "because of Read port limitations\n";
280 CurrentPacketMIs
.pop_back();
284 // We cannot read LDS source registers from the Trans slot.
285 if (isTransSlot
&& TII
->readsLDSSrcReg(MI
))
288 CurrentPacketMIs
.pop_back();
292 MachineBasicBlock::iterator
addToPacket(MachineInstr
&MI
) override
{
293 MachineBasicBlock::iterator FirstInBundle
=
294 CurrentPacketMIs
.empty() ? &MI
: CurrentPacketMIs
.front();
295 const DenseMap
<unsigned, unsigned> &PV
=
296 getPreviousVector(FirstInBundle
);
297 std::vector
<R600InstrInfo::BankSwizzle
> BS
;
300 if (isBundlableWithCurrentPMI(MI
, PV
, BS
, isTransSlot
)) {
301 for (unsigned i
= 0, e
= CurrentPacketMIs
.size(); i
< e
; i
++) {
302 MachineInstr
*MI
= CurrentPacketMIs
[i
];
303 unsigned Op
= TII
->getOperandIdx(MI
->getOpcode(),
304 R600::OpName::bank_swizzle
);
305 MI
->getOperand(Op
).setImm(BS
[i
]);
308 TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::bank_swizzle
);
309 MI
.getOperand(Op
).setImm(BS
.back());
310 if (!CurrentPacketMIs
.empty())
311 setIsLastBit(CurrentPacketMIs
.back(), 0);
312 substitutePV(MI
, PV
);
313 MachineBasicBlock::iterator It
= VLIWPacketizerList::addToPacket(MI
);
315 endPacket(std::next(It
)->getParent(), std::next(It
));
319 endPacket(MI
.getParent(), MI
);
320 if (TII
->isTransOnly(MI
))
322 return VLIWPacketizerList::addToPacket(MI
);
326 bool R600Packetizer::runOnMachineFunction(MachineFunction
&Fn
) {
327 const R600Subtarget
&ST
= Fn
.getSubtarget
<R600Subtarget
>();
328 const R600InstrInfo
*TII
= ST
.getInstrInfo();
330 MachineLoopInfo
&MLI
= getAnalysis
<MachineLoopInfo
>();
332 // Instantiate the packetizer.
333 R600PacketizerList
Packetizer(Fn
, ST
, MLI
);
335 // DFA state table should not be empty.
336 assert(Packetizer
.getResourceTracker() && "Empty DFA table!");
337 assert(Packetizer
.getResourceTracker()->getInstrItins());
339 if (Packetizer
.getResourceTracker()->getInstrItins()->isEmpty())
343 // Loop over all basic blocks and remove KILL pseudo-instructions
344 // These instructions confuse the dependence analysis. Consider:
346 // R0 = KILL R0, D0 (Insn 1)
348 // Here, Insn 1 will result in the dependence graph not emitting an output
349 // dependence between Insn 0 and Insn 2. This can lead to incorrect
352 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
353 MBB
!= MBBe
; ++MBB
) {
354 MachineBasicBlock::iterator End
= MBB
->end();
355 MachineBasicBlock::iterator MI
= MBB
->begin();
357 if (MI
->isKill() || MI
->getOpcode() == R600::IMPLICIT_DEF
||
358 (MI
->getOpcode() == R600::CF_ALU
&& !MI
->getOperand(8).getImm())) {
359 MachineBasicBlock::iterator DeleteMI
= MI
;
361 MBB
->erase(DeleteMI
);
369 // Loop over all of the basic blocks.
370 for (MachineFunction::iterator MBB
= Fn
.begin(), MBBe
= Fn
.end();
371 MBB
!= MBBe
; ++MBB
) {
372 // Find scheduling regions and schedule / packetize each region.
373 unsigned RemainingCount
= MBB
->size();
374 for(MachineBasicBlock::iterator RegionEnd
= MBB
->end();
375 RegionEnd
!= MBB
->begin();) {
376 // The next region starts above the previous region. Look backward in the
377 // instruction stream until we find the nearest boundary.
378 MachineBasicBlock::iterator I
= RegionEnd
;
379 for(;I
!= MBB
->begin(); --I
, --RemainingCount
) {
380 if (TII
->isSchedulingBoundary(*std::prev(I
), &*MBB
, Fn
))
385 // Skip empty scheduling regions.
386 if (I
== RegionEnd
) {
387 RegionEnd
= std::prev(RegionEnd
);
391 // Skip regions with one instruction.
392 if (I
== std::prev(RegionEnd
)) {
393 RegionEnd
= std::prev(RegionEnd
);
397 Packetizer
.PacketizeMIs(&*MBB
, &*I
, RegionEnd
);
406 } // end anonymous namespace
408 INITIALIZE_PASS_BEGIN(R600Packetizer
, DEBUG_TYPE
,
409 "R600 Packetizer", false, false)
410 INITIALIZE_PASS_END(R600Packetizer
, DEBUG_TYPE
,
411 "R600 Packetizer", false, false)
413 char R600Packetizer::ID
= 0;
415 char &llvm::R600PacketizerID
= R600Packetizer::ID
;
417 llvm::FunctionPass
*llvm::createR600Packetizer() {
418 return new R600Packetizer();