[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / GCNMinRegStrategy.cpp
blobc469cf290e264215d9863de53f1c42895b4e65b2
1 //===- GCNMinRegStrategy.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/ArrayRef.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include "llvm/ADT/SmallVector.h"
12 #include "llvm/ADT/ilist_node.h"
13 #include "llvm/ADT/simple_ilist.h"
14 #include "llvm/CodeGen/ScheduleDAG.h"
15 #include "llvm/Support/Allocator.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/raw_ostream.h"
18 #include <cassert>
19 #include <cstdint>
20 #include <limits>
21 #include <vector>
23 using namespace llvm;
25 #define DEBUG_TYPE "machine-scheduler"
27 namespace {
29 class GCNMinRegScheduler {
30 struct Candidate : ilist_node<Candidate> {
31 const SUnit *SU;
32 int Priority;
34 Candidate(const SUnit *SU_, int Priority_ = 0)
35 : SU(SU_), Priority(Priority_) {}
38 SpecificBumpPtrAllocator<Candidate> Alloc;
39 using Queue = simple_ilist<Candidate>;
40 Queue RQ; // Ready queue
42 std::vector<unsigned> NumPreds;
44 bool isScheduled(const SUnit *SU) const {
45 assert(!SU->isBoundaryNode());
46 return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
49 void setIsScheduled(const SUnit *SU) {
50 assert(!SU->isBoundaryNode());
51 NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max();
54 unsigned getNumPreds(const SUnit *SU) const {
55 assert(!SU->isBoundaryNode());
56 assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
57 return NumPreds[SU->NodeNum];
60 unsigned decNumPreds(const SUnit *SU) {
61 assert(!SU->isBoundaryNode());
62 assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
63 return --NumPreds[SU->NodeNum];
66 void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
68 int getReadySuccessors(const SUnit *SU) const;
69 int getNotReadySuccessors(const SUnit *SU) const;
71 template <typename Calc>
72 unsigned findMax(unsigned Num, Calc C);
74 Candidate* pickCandidate();
76 void bumpPredsPriority(const SUnit *SchedSU, int Priority);
77 void releaseSuccessors(const SUnit* SU, int Priority);
79 public:
80 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
81 const ScheduleDAG &DAG);
84 } // end anonymous namespace
86 void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
87 NumPreds.resize(SUnits.size());
88 for (unsigned I = 0; I < SUnits.size(); ++I)
89 NumPreds[I] = SUnits[I].NumPredsLeft;
92 int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
93 unsigned NumSchedSuccs = 0;
94 for (auto SDep : SU->Succs) {
95 bool wouldBeScheduled = true;
96 for (auto PDep : SDep.getSUnit()->Preds) {
97 auto PSU = PDep.getSUnit();
98 assert(!PSU->isBoundaryNode());
99 if (PSU != SU && !isScheduled(PSU)) {
100 wouldBeScheduled = false;
101 break;
104 NumSchedSuccs += wouldBeScheduled ? 1 : 0;
106 return NumSchedSuccs;
109 int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
110 return SU->Succs.size() - getReadySuccessors(SU);
113 template <typename Calc>
114 unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
115 assert(!RQ.empty() && Num <= RQ.size());
117 using T = decltype(C(*RQ.begin())) ;
119 T Max = std::numeric_limits<T>::min();
120 unsigned NumMax = 0;
121 for (auto I = RQ.begin(); Num; --Num) {
122 T Cur = C(*I);
123 if (Cur >= Max) {
124 if (Cur > Max) {
125 Max = Cur;
126 NumMax = 1;
127 } else
128 ++NumMax;
129 auto &Cand = *I++;
130 RQ.remove(Cand);
131 RQ.push_front(Cand);
132 continue;
134 ++I;
136 return NumMax;
139 GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
140 do {
141 unsigned Num = RQ.size();
142 if (Num == 1) break;
144 LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num
145 << '\n');
146 Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
147 if (Num == 1) break;
149 LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
150 << Num << '\n');
151 Num = findMax(Num, [=](const Candidate &C) {
152 auto SU = C.SU;
153 int Res = getNotReadySuccessors(SU);
154 LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
155 << Res << " successors, metric = " << -Res << '\n');
156 return -Res;
158 if (Num == 1) break;
160 LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num
161 << '\n');
162 Num = findMax(Num, [=](const Candidate &C) {
163 auto SU = C.SU;
164 auto Res = getReadySuccessors(SU);
165 LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res
166 << " successors, metric = " << Res << '\n');
167 return Res;
169 if (Num == 1) break;
171 Num = Num ? Num : RQ.size();
172 LLVM_DEBUG(
173 dbgs()
174 << "\nCan't find best candidate, selecting in program order among "
175 << Num << '\n');
176 Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
177 assert(Num == 1);
178 } while (false);
180 return &RQ.front();
183 void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
184 SmallPtrSet<const SUnit*, 32> Set;
185 for (const auto &S : SchedSU->Succs) {
186 if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
187 S.getKind() != SDep::Data)
188 continue;
189 for (const auto &P : S.getSUnit()->Preds) {
190 auto PSU = P.getSUnit();
191 assert(!PSU->isBoundaryNode());
192 if (PSU != SchedSU && !isScheduled(PSU)) {
193 Set.insert(PSU);
197 SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
198 while (!Worklist.empty()) {
199 auto SU = Worklist.pop_back_val();
200 assert(!SU->isBoundaryNode());
201 for (const auto &P : SU->Preds) {
202 if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
203 Set.insert(P.getSUnit()).second)
204 Worklist.push_back(P.getSUnit());
207 LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
208 << ")'s non-ready successors of " << Priority
209 << " priority in ready queue: ");
210 const auto SetEnd = Set.end();
211 for (auto &C : RQ) {
212 if (Set.find(C.SU) != SetEnd) {
213 C.Priority = Priority;
214 LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
217 LLVM_DEBUG(dbgs() << '\n');
220 void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
221 for (const auto &S : SU->Succs) {
222 auto SuccSU = S.getSUnit();
223 if (S.isWeak())
224 continue;
225 assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
226 if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
227 RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
231 std::vector<const SUnit*>
232 GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
233 const ScheduleDAG &DAG) {
234 const auto &SUnits = DAG.SUnits;
235 std::vector<const SUnit*> Schedule;
236 Schedule.reserve(SUnits.size());
238 initNumPreds(SUnits);
240 int StepNo = 0;
242 for (auto SU : TopRoots) {
243 RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
245 releaseSuccessors(&DAG.EntrySU, StepNo);
247 while (!RQ.empty()) {
248 LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo
249 << "\n"
250 "Ready queue:";
251 for (auto &C
252 : RQ) dbgs()
253 << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
254 dbgs() << '\n';);
256 auto C = pickCandidate();
257 assert(C);
258 RQ.remove(*C);
259 auto SU = C->SU;
260 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
262 releaseSuccessors(SU, StepNo);
263 Schedule.push_back(SU);
264 setIsScheduled(SU);
266 if (getReadySuccessors(SU) == 0)
267 bumpPredsPriority(SU, StepNo);
269 ++StepNo;
271 assert(SUnits.size() == Schedule.size());
273 return Schedule;
276 namespace llvm {
278 std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
279 const ScheduleDAG &DAG) {
280 GCNMinRegScheduler S;
281 return S.schedule(TopRoots, DAG);
284 } // end namespace llvm