[SimplifyCFG] FoldTwoEntryPHINode(): consider *total* speculation cost, not per-BB...
[llvm-complete.git] / lib / CodeGen / ExecutionDomainFix.cpp
bloba2dd5eee33b73d3e0cd16b4b3f7325b5e218af93
1 //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/ExecutionDomainFix.h"
10 #include "llvm/CodeGen/MachineRegisterInfo.h"
11 #include "llvm/CodeGen/TargetInstrInfo.h"
13 using namespace llvm;
15 #define DEBUG_TYPE "execution-deps-fix"
17 iterator_range<SmallVectorImpl<int>::const_iterator>
18 ExecutionDomainFix::regIndices(unsigned Reg) const {
19 assert(Reg < AliasMap.size() && "Invalid register");
20 const auto &Entry = AliasMap[Reg];
21 return make_range(Entry.begin(), Entry.end());
24 DomainValue *ExecutionDomainFix::alloc(int domain) {
25 DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
26 : Avail.pop_back_val();
27 if (domain >= 0)
28 dv->addDomain(domain);
29 assert(dv->Refs == 0 && "Reference count wasn't cleared");
30 assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
31 return dv;
34 void ExecutionDomainFix::release(DomainValue *DV) {
35 while (DV) {
36 assert(DV->Refs && "Bad DomainValue");
37 if (--DV->Refs)
38 return;
40 // There are no more DV references. Collapse any contained instructions.
41 if (DV->AvailableDomains && !DV->isCollapsed())
42 collapse(DV, DV->getFirstDomain());
44 DomainValue *Next = DV->Next;
45 DV->clear();
46 Avail.push_back(DV);
47 // Also release the next DomainValue in the chain.
48 DV = Next;
52 DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
53 DomainValue *DV = DVRef;
54 if (!DV || !DV->Next)
55 return DV;
57 // DV has a chain. Find the end.
59 DV = DV->Next;
60 while (DV->Next);
62 // Update DVRef to point to DV.
63 retain(DV);
64 release(DVRef);
65 DVRef = DV;
66 return DV;
69 void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
70 assert(unsigned(rx) < NumRegs && "Invalid index");
71 assert(!LiveRegs.empty() && "Must enter basic block first.");
73 if (LiveRegs[rx] == dv)
74 return;
75 if (LiveRegs[rx])
76 release(LiveRegs[rx]);
77 LiveRegs[rx] = retain(dv);
80 void ExecutionDomainFix::kill(int rx) {
81 assert(unsigned(rx) < NumRegs && "Invalid index");
82 assert(!LiveRegs.empty() && "Must enter basic block first.");
83 if (!LiveRegs[rx])
84 return;
86 release(LiveRegs[rx]);
87 LiveRegs[rx] = nullptr;
90 void ExecutionDomainFix::force(int rx, unsigned domain) {
91 assert(unsigned(rx) < NumRegs && "Invalid index");
92 assert(!LiveRegs.empty() && "Must enter basic block first.");
93 if (DomainValue *dv = LiveRegs[rx]) {
94 if (dv->isCollapsed())
95 dv->addDomain(domain);
96 else if (dv->hasDomain(domain))
97 collapse(dv, domain);
98 else {
99 // This is an incompatible open DomainValue. Collapse it to whatever and
100 // force the new value into domain. This costs a domain crossing.
101 collapse(dv, dv->getFirstDomain());
102 assert(LiveRegs[rx] && "Not live after collapse?");
103 LiveRegs[rx]->addDomain(domain);
105 } else {
106 // Set up basic collapsed DomainValue.
107 setLiveReg(rx, alloc(domain));
111 void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
112 assert(dv->hasDomain(domain) && "Cannot collapse");
114 // Collapse all the instructions.
115 while (!dv->Instrs.empty())
116 TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
117 dv->setSingleDomain(domain);
119 // If there are multiple users, give them new, unique DomainValues.
120 if (!LiveRegs.empty() && dv->Refs > 1)
121 for (unsigned rx = 0; rx != NumRegs; ++rx)
122 if (LiveRegs[rx] == dv)
123 setLiveReg(rx, alloc(domain));
126 bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
127 assert(!A->isCollapsed() && "Cannot merge into collapsed");
128 assert(!B->isCollapsed() && "Cannot merge from collapsed");
129 if (A == B)
130 return true;
131 // Restrict to the domains that A and B have in common.
132 unsigned common = A->getCommonDomains(B->AvailableDomains);
133 if (!common)
134 return false;
135 A->AvailableDomains = common;
136 A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
138 // Clear the old DomainValue so we won't try to swizzle instructions twice.
139 B->clear();
140 // All uses of B are referred to A.
141 B->Next = retain(A);
143 for (unsigned rx = 0; rx != NumRegs; ++rx) {
144 assert(!LiveRegs.empty() && "no space allocated for live registers");
145 if (LiveRegs[rx] == B)
146 setLiveReg(rx, A);
148 return true;
151 void ExecutionDomainFix::enterBasicBlock(
152 const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
154 MachineBasicBlock *MBB = TraversedMBB.MBB;
156 // Set up LiveRegs to represent registers entering MBB.
157 // Set default domain values to 'no domain' (nullptr)
158 if (LiveRegs.empty())
159 LiveRegs.assign(NumRegs, nullptr);
161 // This is the entry block.
162 if (MBB->pred_empty()) {
163 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
164 return;
167 // Try to coalesce live-out registers from predecessors.
168 for (MachineBasicBlock *pred : MBB->predecessors()) {
169 assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
170 "Should have pre-allocated MBBInfos for all MBBs");
171 LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
172 // Incoming is null if this is a backedge from a BB
173 // we haven't processed yet
174 if (Incoming.empty())
175 continue;
177 for (unsigned rx = 0; rx != NumRegs; ++rx) {
178 DomainValue *pdv = resolve(Incoming[rx]);
179 if (!pdv)
180 continue;
181 if (!LiveRegs[rx]) {
182 setLiveReg(rx, pdv);
183 continue;
186 // We have a live DomainValue from more than one predecessor.
187 if (LiveRegs[rx]->isCollapsed()) {
188 // We are already collapsed, but predecessor is not. Force it.
189 unsigned Domain = LiveRegs[rx]->getFirstDomain();
190 if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
191 collapse(pdv, Domain);
192 continue;
195 // Currently open, merge in predecessor.
196 if (!pdv->isCollapsed())
197 merge(LiveRegs[rx], pdv);
198 else
199 force(rx, pdv->getFirstDomain());
202 LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
203 << (!TraversedMBB.IsDone ? ": incomplete\n"
204 : ": all preds known\n"));
207 void ExecutionDomainFix::leaveBasicBlock(
208 const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
209 assert(!LiveRegs.empty() && "Must enter basic block first.");
210 unsigned MBBNumber = TraversedMBB.MBB->getNumber();
211 assert(MBBNumber < MBBOutRegsInfos.size() &&
212 "Unexpected basic block number.");
213 // Save register clearances at end of MBB - used by enterBasicBlock().
214 for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
215 release(OldLiveReg);
217 MBBOutRegsInfos[MBBNumber] = LiveRegs;
218 LiveRegs.clear();
221 bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
222 // Update instructions with explicit execution domains.
223 std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
224 if (DomP.first) {
225 if (DomP.second)
226 visitSoftInstr(MI, DomP.second);
227 else
228 visitHardInstr(MI, DomP.first);
231 return !DomP.first;
234 void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
235 assert(!MI->isDebugInstr() && "Won't process debug values");
236 const MCInstrDesc &MCID = MI->getDesc();
237 for (unsigned i = 0,
238 e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
239 i != e; ++i) {
240 MachineOperand &MO = MI->getOperand(i);
241 if (!MO.isReg())
242 continue;
243 if (MO.isUse())
244 continue;
245 for (int rx : regIndices(MO.getReg())) {
246 // This instruction explicitly defines rx.
247 LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI);
249 // Kill off domains redefined by generic instructions.
250 if (Kill)
251 kill(rx);
256 void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
257 // Collapse all uses.
258 for (unsigned i = mi->getDesc().getNumDefs(),
259 e = mi->getDesc().getNumOperands();
260 i != e; ++i) {
261 MachineOperand &mo = mi->getOperand(i);
262 if (!mo.isReg())
263 continue;
264 for (int rx : regIndices(mo.getReg())) {
265 force(rx, domain);
269 // Kill all defs and force them.
270 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
271 MachineOperand &mo = mi->getOperand(i);
272 if (!mo.isReg())
273 continue;
274 for (int rx : regIndices(mo.getReg())) {
275 kill(rx);
276 force(rx, domain);
281 void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
282 // Bitmask of available domains for this instruction after taking collapsed
283 // operands into account.
284 unsigned available = mask;
286 // Scan the explicit use operands for incoming domains.
287 SmallVector<int, 4> used;
288 if (!LiveRegs.empty())
289 for (unsigned i = mi->getDesc().getNumDefs(),
290 e = mi->getDesc().getNumOperands();
291 i != e; ++i) {
292 MachineOperand &mo = mi->getOperand(i);
293 if (!mo.isReg())
294 continue;
295 for (int rx : regIndices(mo.getReg())) {
296 DomainValue *dv = LiveRegs[rx];
297 if (dv == nullptr)
298 continue;
299 // Bitmask of domains that dv and available have in common.
300 unsigned common = dv->getCommonDomains(available);
301 // Is it possible to use this collapsed register for free?
302 if (dv->isCollapsed()) {
303 // Restrict available domains to the ones in common with the operand.
304 // If there are no common domains, we must pay the cross-domain
305 // penalty for this operand.
306 if (common)
307 available = common;
308 } else if (common)
309 // Open DomainValue is compatible, save it for merging.
310 used.push_back(rx);
311 else
312 // Open DomainValue is not compatible with instruction. It is useless
313 // now.
314 kill(rx);
318 // If the collapsed operands force a single domain, propagate the collapse.
319 if (isPowerOf2_32(available)) {
320 unsigned domain = countTrailingZeros(available);
321 TII->setExecutionDomain(*mi, domain);
322 visitHardInstr(mi, domain);
323 return;
326 // Kill off any remaining uses that don't match available, and build a list of
327 // incoming DomainValues that we want to merge.
328 SmallVector<int, 4> Regs;
329 for (int rx : used) {
330 assert(!LiveRegs.empty() && "no space allocated for live registers");
331 DomainValue *&LR = LiveRegs[rx];
332 // This useless DomainValue could have been missed above.
333 if (!LR->getCommonDomains(available)) {
334 kill(rx);
335 continue;
337 // Sorted insertion.
338 // Enables giving priority to the latest domains during merging.
339 const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
340 auto I = partition_point(Regs, [&](int I) {
341 return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
343 Regs.insert(I, rx);
346 // doms are now sorted in order of appearance. Try to merge them all, giving
347 // priority to the latest ones.
348 DomainValue *dv = nullptr;
349 while (!Regs.empty()) {
350 if (!dv) {
351 dv = LiveRegs[Regs.pop_back_val()];
352 // Force the first dv to match the current instruction.
353 dv->AvailableDomains = dv->getCommonDomains(available);
354 assert(dv->AvailableDomains && "Domain should have been filtered");
355 continue;
358 DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
359 // Skip already merged values.
360 if (Latest == dv || Latest->Next)
361 continue;
362 if (merge(dv, Latest))
363 continue;
365 // If latest didn't merge, it is useless now. Kill all registers using it.
366 for (int i : used) {
367 assert(!LiveRegs.empty() && "no space allocated for live registers");
368 if (LiveRegs[i] == Latest)
369 kill(i);
373 // dv is the DomainValue we are going to use for this instruction.
374 if (!dv) {
375 dv = alloc();
376 dv->AvailableDomains = available;
378 dv->Instrs.push_back(mi);
380 // Finally set all defs and non-collapsed uses to dv. We must iterate through
381 // all the operators, including imp-def ones.
382 for (MachineOperand &mo : mi->operands()) {
383 if (!mo.isReg())
384 continue;
385 for (int rx : regIndices(mo.getReg())) {
386 if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
387 kill(rx);
388 setLiveReg(rx, dv);
394 void ExecutionDomainFix::processBasicBlock(
395 const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
396 enterBasicBlock(TraversedMBB);
397 // If this block is not done, it makes little sense to make any decisions
398 // based on clearance information. We need to make a second pass anyway,
399 // and by then we'll have better information, so we can avoid doing the work
400 // to try and break dependencies now.
401 for (MachineInstr &MI : *TraversedMBB.MBB) {
402 if (!MI.isDebugInstr()) {
403 bool Kill = false;
404 if (TraversedMBB.PrimaryPass)
405 Kill = visitInstr(&MI);
406 processDefs(&MI, Kill);
409 leaveBasicBlock(TraversedMBB);
412 bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
413 if (skipFunction(mf.getFunction()))
414 return false;
415 MF = &mf;
416 TII = MF->getSubtarget().getInstrInfo();
417 TRI = MF->getSubtarget().getRegisterInfo();
418 LiveRegs.clear();
419 assert(NumRegs == RC->getNumRegs() && "Bad regclass");
421 LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "
422 << TRI->getRegClassName(RC) << " **********\n");
424 // If no relevant registers are used in the function, we can skip it
425 // completely.
426 bool anyregs = false;
427 const MachineRegisterInfo &MRI = mf.getRegInfo();
428 for (unsigned Reg : *RC) {
429 if (MRI.isPhysRegUsed(Reg)) {
430 anyregs = true;
431 break;
434 if (!anyregs)
435 return false;
437 RDA = &getAnalysis<ReachingDefAnalysis>();
439 // Initialize the AliasMap on the first use.
440 if (AliasMap.empty()) {
441 // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
442 // therefore the LiveRegs array.
443 AliasMap.resize(TRI->getNumRegs());
444 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
445 for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
446 ++AI)
447 AliasMap[*AI].push_back(i);
450 // Initialize the MBBOutRegsInfos
451 MBBOutRegsInfos.resize(mf.getNumBlockIDs());
453 // Traverse the basic blocks.
454 LoopTraversal Traversal;
455 LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
456 for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) {
457 processBasicBlock(TraversedMBB);
460 for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) {
461 for (DomainValue *OutLiveReg : OutLiveRegs) {
462 if (OutLiveReg)
463 release(OutLiveReg);
466 MBBOutRegsInfos.clear();
467 Avail.clear();
468 Allocator.DestroyAll();
470 return false;