[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / Hexagon / HexagonVectorLoopCarriedReuse.cpp
blobe5df1d456c1eefeb5970f81baa43352ada903d2a
1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
12 // types.
14 //-----------------------------------------------------------------------------
15 // Motivation: Consider the case where we have the following loop structure.
17 // Loop:
18 // t0 = a[i];
19 // t1 = f(t0);
20 // t2 = g(t1);
21 // ...
22 // t3 = a[i+1];
23 // t4 = f(t3);
24 // t5 = g(t4);
25 // t6 = op(t2, t5)
26 // cond_branch <Loop>
28 // This can be converted to
29 // t00 = a[0];
30 // t10 = f(t00);
31 // t20 = g(t10);
32 // Loop:
33 // t2 = t20;
34 // t3 = a[i+1];
35 // t4 = f(t3);
36 // t5 = g(t4);
37 // t6 = op(t2, t5)
38 // t20 = t5
39 // cond_branch <Loop>
41 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42 // Such a loop comes to this pass in the following form.
44 // LoopPreheader:
45 // X0 = a[0];
46 // Loop:
47 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48 // t1 = f(X2) <-- I1
49 // t2 = g(t1)
50 // ...
51 // X1 = a[i+1]
52 // t4 = f(X1) <-- I2
53 // t5 = g(t4)
54 // t6 = op(t2, t5)
55 // cond_branch <Loop>
57 // In this pass, we look for PHIs such as X2 whose incoming values come only
58 // from the Loop Preheader and over the backedge and additionaly, both these
59 // values are the results of the same operation in terms of opcode. We call such
60 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61 // over X1 is carried over only one iteration and so the DepChain is only one
62 // PHI node long.
64 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65 // PHI coming over the backedge (X1). We stop at the first pair of such users
66 // I1 (of X2) and I2 (of X1) that meet the following conditions.
67 // 1. I1 and I2 are the same operation, but with different operands.
68 // 2. X2 and X1 are used at the same operand number in the two instructions.
69 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70 // a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
72 // We then make the following transformation
73 // LoopPreheader:
74 // X0 = a[0];
75 // Y0 = f(X0);
76 // Loop:
77 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78 // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79 // t1 = f(X2) <-- Will be removed by DCE.
80 // t2 = g(Y2)
81 // ...
82 // X1 = a[i+1]
83 // t4 = f(X1)
84 // t5 = g(t4)
85 // t6 = op(t2, t5)
86 // cond_branch <Loop>
88 // We proceed until we cannot find any more such instructions I1 and I2.
90 // --- DepChains & Loop carried dependences ---
91 // Consider a single basic block loop such as
93 // LoopPreheader:
94 // X0 = ...
95 // Y0 = ...
96 // Loop:
97 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98 // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99 // ...
100 // X1 = ...
101 // ...
102 // cond_branch <Loop>
104 // Then there is a dependence between X2 and X1 that goes back one iteration,
105 // i.e. X1 is used as X2 in the very next iteration. We represent this as a
106 // DepChain from X2 to X1 (X2->X1).
107 // Similarly, there is a dependence between Y2 and X1 that goes back two
108 // iterations. X1 is used as Y2 two iterations after it is computed. This is
109 // represented by a DepChain as (Y2->X2->X1).
111 // A DepChain has the following properties.
112 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113 // iterations of carried dependence + 1.
114 // 2. All instructions in the DepChain except the last are PHIs.
116 //===----------------------------------------------------------------------===//
118 #include "llvm/ADT/SetVector.h"
119 #include "llvm/ADT/SmallVector.h"
120 #include "llvm/ADT/Statistic.h"
121 #include "llvm/Analysis/LoopInfo.h"
122 #include "llvm/Analysis/LoopPass.h"
123 #include "llvm/IR/BasicBlock.h"
124 #include "llvm/IR/DerivedTypes.h"
125 #include "llvm/IR/IRBuilder.h"
126 #include "llvm/IR/Instruction.h"
127 #include "llvm/IR/Instructions.h"
128 #include "llvm/IR/IntrinsicInst.h"
129 #include "llvm/IR/Intrinsics.h"
130 #include "llvm/IR/Use.h"
131 #include "llvm/IR/User.h"
132 #include "llvm/IR/Value.h"
133 #include "llvm/Pass.h"
134 #include "llvm/Support/Casting.h"
135 #include "llvm/Support/CommandLine.h"
136 #include "llvm/Support/Compiler.h"
137 #include "llvm/Support/Debug.h"
138 #include "llvm/Support/raw_ostream.h"
139 #include "llvm/Transforms/Scalar.h"
140 #include "llvm/Transforms/Utils.h"
141 #include <algorithm>
142 #include <cassert>
143 #include <cstddef>
144 #include <map>
145 #include <memory>
146 #include <set>
148 using namespace llvm;
150 #define DEBUG_TYPE "hexagon-vlcr"
152 STATISTIC(HexagonNumVectorLoopCarriedReuse,
153 "Number of values that were reused from a previous iteration.");
155 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
156 cl::Hidden,
157 cl::desc("Maximum distance of loop carried dependences that are handled"),
158 cl::init(2), cl::ZeroOrMore);
160 namespace llvm {
162 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
163 Pass *createHexagonVectorLoopCarriedReusePass();
165 } // end namespace llvm
167 namespace {
169 // See info about DepChain in the comments at the top of this file.
170 using ChainOfDependences = SmallVector<Instruction *, 4>;
172 class DepChain {
173 ChainOfDependences Chain;
175 public:
176 bool isIdentical(DepChain &Other) const {
177 if (Other.size() != size())
178 return false;
179 ChainOfDependences &OtherChain = Other.getChain();
180 for (int i = 0; i < size(); ++i) {
181 if (Chain[i] != OtherChain[i])
182 return false;
184 return true;
187 ChainOfDependences &getChain() {
188 return Chain;
191 int size() const {
192 return Chain.size();
195 void clear() {
196 Chain.clear();
199 void push_back(Instruction *I) {
200 Chain.push_back(I);
203 int iterations() const {
204 return size() - 1;
207 Instruction *front() const {
208 return Chain.front();
211 Instruction *back() const {
212 return Chain.back();
215 Instruction *&operator[](const int index) {
216 return Chain[index];
219 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
222 LLVM_ATTRIBUTE_UNUSED
223 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
224 const ChainOfDependences &CD = D.Chain;
225 int ChainSize = CD.size();
226 OS << "**DepChain Start::**\n";
227 for (int i = 0; i < ChainSize -1; ++i) {
228 OS << *(CD[i]) << " -->\n";
230 OS << *CD[ChainSize-1] << "\n";
231 return OS;
234 struct ReuseValue {
235 Instruction *Inst2Replace = nullptr;
237 // In the new PHI node that we'll construct this is the value that'll be
238 // used over the backedge. This is teh value that gets reused from a
239 // previous iteration.
240 Instruction *BackedgeInst = nullptr;
241 std::map<Instruction *, DepChain *> DepChains;
242 int Iterations = -1;
244 ReuseValue() = default;
246 void reset() {
247 Inst2Replace = nullptr;
248 BackedgeInst = nullptr;
249 DepChains.clear();
250 Iterations = -1;
252 bool isDefined() { return Inst2Replace != nullptr; }
255 LLVM_ATTRIBUTE_UNUSED
256 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
257 OS << "** ReuseValue ***\n";
258 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
259 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
260 return OS;
263 class HexagonVectorLoopCarriedReuse : public LoopPass {
264 public:
265 static char ID;
267 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
268 PassRegistry *PR = PassRegistry::getPassRegistry();
269 initializeHexagonVectorLoopCarriedReusePass(*PR);
272 StringRef getPassName() const override {
273 return "Hexagon-specific loop carried reuse for HVX vectors";
276 void getAnalysisUsage(AnalysisUsage &AU) const override {
277 AU.addRequired<LoopInfoWrapperPass>();
278 AU.addRequiredID(LoopSimplifyID);
279 AU.addRequiredID(LCSSAID);
280 AU.addPreservedID(LCSSAID);
281 AU.setPreservesCFG();
284 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
286 private:
287 SetVector<DepChain *> Dependences;
288 std::set<Instruction *> ReplacedInsts;
289 Loop *CurLoop;
290 ReuseValue ReuseCandidate;
292 bool doVLCR();
293 void findLoopCarriedDeps();
294 void findValueToReuse();
295 void findDepChainFromPHI(Instruction *I, DepChain &D);
296 void reuseValue();
297 Value *findValueInBlock(Value *Op, BasicBlock *BB);
298 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
299 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
300 bool canReplace(Instruction *I);
301 bool isCallInstCommutative(CallInst *C);
304 } // end anonymous namespace
306 char HexagonVectorLoopCarriedReuse::ID = 0;
308 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
309 "Hexagon-specific predictive commoning for HVX vectors", false, false)
310 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
311 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
312 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
313 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
314 "Hexagon-specific predictive commoning for HVX vectors", false, false)
316 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
317 if (skipLoop(L))
318 return false;
320 if (!L->getLoopPreheader())
321 return false;
323 // Work only on innermost loops.
324 if (!L->getSubLoops().empty())
325 return false;
327 // Work only on single basic blocks loops.
328 if (L->getNumBlocks() != 1)
329 return false;
331 CurLoop = L;
333 return doVLCR();
336 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
337 switch (C->getCalledFunction()->getIntrinsicID()) {
338 case Intrinsic::hexagon_V6_vaddb:
339 case Intrinsic::hexagon_V6_vaddb_128B:
340 case Intrinsic::hexagon_V6_vaddh:
341 case Intrinsic::hexagon_V6_vaddh_128B:
342 case Intrinsic::hexagon_V6_vaddw:
343 case Intrinsic::hexagon_V6_vaddw_128B:
344 case Intrinsic::hexagon_V6_vaddubh:
345 case Intrinsic::hexagon_V6_vaddubh_128B:
346 case Intrinsic::hexagon_V6_vadduhw:
347 case Intrinsic::hexagon_V6_vadduhw_128B:
348 case Intrinsic::hexagon_V6_vaddhw:
349 case Intrinsic::hexagon_V6_vaddhw_128B:
350 case Intrinsic::hexagon_V6_vmaxb:
351 case Intrinsic::hexagon_V6_vmaxb_128B:
352 case Intrinsic::hexagon_V6_vmaxh:
353 case Intrinsic::hexagon_V6_vmaxh_128B:
354 case Intrinsic::hexagon_V6_vmaxw:
355 case Intrinsic::hexagon_V6_vmaxw_128B:
356 case Intrinsic::hexagon_V6_vmaxub:
357 case Intrinsic::hexagon_V6_vmaxub_128B:
358 case Intrinsic::hexagon_V6_vmaxuh:
359 case Intrinsic::hexagon_V6_vmaxuh_128B:
360 case Intrinsic::hexagon_V6_vminub:
361 case Intrinsic::hexagon_V6_vminub_128B:
362 case Intrinsic::hexagon_V6_vminuh:
363 case Intrinsic::hexagon_V6_vminuh_128B:
364 case Intrinsic::hexagon_V6_vminb:
365 case Intrinsic::hexagon_V6_vminb_128B:
366 case Intrinsic::hexagon_V6_vminh:
367 case Intrinsic::hexagon_V6_vminh_128B:
368 case Intrinsic::hexagon_V6_vminw:
369 case Intrinsic::hexagon_V6_vminw_128B:
370 case Intrinsic::hexagon_V6_vmpyub:
371 case Intrinsic::hexagon_V6_vmpyub_128B:
372 case Intrinsic::hexagon_V6_vmpyuh:
373 case Intrinsic::hexagon_V6_vmpyuh_128B:
374 case Intrinsic::hexagon_V6_vavgub:
375 case Intrinsic::hexagon_V6_vavgub_128B:
376 case Intrinsic::hexagon_V6_vavgh:
377 case Intrinsic::hexagon_V6_vavgh_128B:
378 case Intrinsic::hexagon_V6_vavguh:
379 case Intrinsic::hexagon_V6_vavguh_128B:
380 case Intrinsic::hexagon_V6_vavgw:
381 case Intrinsic::hexagon_V6_vavgw_128B:
382 case Intrinsic::hexagon_V6_vavgb:
383 case Intrinsic::hexagon_V6_vavgb_128B:
384 case Intrinsic::hexagon_V6_vavguw:
385 case Intrinsic::hexagon_V6_vavguw_128B:
386 case Intrinsic::hexagon_V6_vabsdiffh:
387 case Intrinsic::hexagon_V6_vabsdiffh_128B:
388 case Intrinsic::hexagon_V6_vabsdiffub:
389 case Intrinsic::hexagon_V6_vabsdiffub_128B:
390 case Intrinsic::hexagon_V6_vabsdiffuh:
391 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
392 case Intrinsic::hexagon_V6_vabsdiffw:
393 case Intrinsic::hexagon_V6_vabsdiffw_128B:
394 return true;
395 default:
396 return false;
400 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
401 Instruction *I2) {
402 if (!I1->isSameOperationAs(I2))
403 return false;
404 // This check is in place specifically for intrinsics. isSameOperationAs will
405 // return two for any two hexagon intrinsics because they are essentially the
406 // same instruciton (CallInst). We need to scratch the surface to see if they
407 // are calls to the same function.
408 if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
409 if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
410 if (C1->getCalledFunction() != C2->getCalledFunction())
411 return false;
415 // If both the Instructions are of Vector Type and any of the element
416 // is integer constant, check their values too for equivalence.
417 if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
418 unsigned NumOperands = I1->getNumOperands();
419 for (unsigned i = 0; i < NumOperands; ++i) {
420 ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
421 ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
422 if(!C1) continue;
423 assert(C2);
424 if (C1->getSExtValue() != C2->getSExtValue())
425 return false;
429 return true;
432 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
433 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
434 if (!II)
435 return true;
437 switch (II->getIntrinsicID()) {
438 case Intrinsic::hexagon_V6_hi:
439 case Intrinsic::hexagon_V6_lo:
440 case Intrinsic::hexagon_V6_hi_128B:
441 case Intrinsic::hexagon_V6_lo_128B:
442 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
443 return false;
444 default:
445 return true;
448 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
449 for (auto *D : Dependences) {
450 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
451 if (D->iterations() > HexagonVLCRIterationLim) {
452 LLVM_DEBUG(
453 dbgs()
454 << ".. Skipping because number of iterations > than the limit\n");
455 continue;
458 PHINode *PN = cast<PHINode>(D->front());
459 Instruction *BEInst = D->back();
460 int Iters = D->iterations();
461 BasicBlock *BB = PN->getParent();
462 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
463 << " can be reused\n");
465 SmallVector<Instruction *, 4> PNUsers;
466 for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
467 Use &U = *UI;
468 Instruction *User = cast<Instruction>(U.getUser());
470 if (User->getParent() != BB)
471 continue;
472 if (ReplacedInsts.count(User)) {
473 LLVM_DEBUG(dbgs() << *User
474 << " has already been replaced. Skipping...\n");
475 continue;
477 if (isa<PHINode>(User))
478 continue;
479 if (User->mayHaveSideEffects())
480 continue;
481 if (!canReplace(User))
482 continue;
484 PNUsers.push_back(User);
486 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
488 // For each interesting use I of PN, find an Instruction BEUser that
489 // performs the same operation as I on BEInst and whose other operands,
490 // if any, can also be rematerialized in OtherBB. We stop when we find the
491 // first such Instruction BEUser. This is because once BEUser is
492 // rematerialized in OtherBB, we may find more such "fixup" opportunities
493 // in this block. So, we'll start over again.
494 for (Instruction *I : PNUsers) {
495 for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
496 ++UI) {
497 Use &U = *UI;
498 Instruction *BEUser = cast<Instruction>(U.getUser());
500 if (BEUser->getParent() != BB)
501 continue;
502 if (!isEquivalentOperation(I, BEUser))
503 continue;
505 int NumOperands = I->getNumOperands();
507 // Take operands of each PNUser one by one and try to find DepChain
508 // with every operand of the BEUser. If any of the operands of BEUser
509 // has DepChain with current operand of the PNUser, break the matcher
510 // loop. Keep doing this for Every PNUser operand. If PNUser operand
511 // does not have DepChain with any of the BEUser operand, break the
512 // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
513 // This ensures that DepChain exist for all the PNUser operand with
514 // BEUser operand. This also ensures that DepChains are independent of
515 // the positions in PNUser and BEUser.
516 std::map<Instruction *, DepChain *> DepChains;
517 CallInst *C1 = dyn_cast<CallInst>(I);
518 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
519 bool Found = false;
520 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
521 Value *Op = I->getOperand(OpNo);
522 Instruction *OpInst = dyn_cast<Instruction>(Op);
523 Found = false;
524 for (int T = 0; T < NumOperands; ++T) {
525 Value *BEOp = BEUser->getOperand(T);
526 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
527 if (!OpInst && !BEOpInst) {
528 if (Op == BEOp) {
529 Found = true;
530 break;
534 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
535 continue;
537 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
539 if (D) {
540 Found = true;
541 DepChains[OpInst] = D;
542 break;
545 if (!Found) {
546 BEUser = nullptr;
547 break;
550 } else {
552 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
553 Value *Op = I->getOperand(OpNo);
554 Value *BEOp = BEUser->getOperand(OpNo);
556 Instruction *OpInst = dyn_cast<Instruction>(Op);
557 if (!OpInst) {
558 if (Op == BEOp)
559 continue;
560 // Do not allow reuse to occur when the operands may be different
561 // values.
562 BEUser = nullptr;
563 break;
566 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
567 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
569 if (D) {
570 DepChains[OpInst] = D;
571 } else {
572 BEUser = nullptr;
573 break;
577 if (BEUser) {
578 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
579 ReuseCandidate.Inst2Replace = I;
580 ReuseCandidate.BackedgeInst = BEUser;
581 ReuseCandidate.DepChains = DepChains;
582 ReuseCandidate.Iterations = Iters;
583 return;
585 ReuseCandidate.reset();
589 ReuseCandidate.reset();
592 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
593 BasicBlock *BB) {
594 PHINode *PN = dyn_cast<PHINode>(Op);
595 assert(PN);
596 Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
597 return ValueInBlock;
600 void HexagonVectorLoopCarriedReuse::reuseValue() {
601 LLVM_DEBUG(dbgs() << ReuseCandidate);
602 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
603 Instruction *BEInst = ReuseCandidate.BackedgeInst;
604 int NumOperands = Inst2Replace->getNumOperands();
605 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
606 int Iterations = ReuseCandidate.Iterations;
607 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
608 assert(!DepChains.empty() && "No DepChains");
609 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
611 SmallVector<Instruction *, 4> InstsInPreheader;
612 for (int i = 0; i < Iterations; ++i) {
613 Instruction *InstInPreheader = Inst2Replace->clone();
614 SmallVector<Value *, 4> Ops;
615 for (int j = 0; j < NumOperands; ++j) {
616 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
617 if (!I)
618 continue;
619 // Get the DepChain corresponding to this operand.
620 DepChain &D = *DepChains[I];
621 // Get the PHI for the iteration number and find
622 // the incoming value from the Loop Preheader for
623 // that PHI.
624 Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
625 InstInPreheader->setOperand(j, ValInPreheader);
627 InstsInPreheader.push_back(InstInPreheader);
628 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
629 InstInPreheader->insertBefore(LoopPH->getTerminator());
630 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
631 << LoopPH->getName() << "\n");
633 BasicBlock *BB = BEInst->getParent();
634 IRBuilder<> IRB(BB);
635 IRB.SetInsertPoint(BB->getFirstNonPHI());
636 Value *BEVal = BEInst;
637 PHINode *NewPhi;
638 for (int i = Iterations-1; i >=0 ; --i) {
639 Instruction *InstInPreheader = InstsInPreheader[i];
640 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
641 NewPhi->addIncoming(InstInPreheader, LoopPH);
642 NewPhi->addIncoming(BEVal, BB);
643 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
644 << "\n");
645 BEVal = NewPhi;
647 // We are in LCSSA form. So, a value defined inside the Loop is used only
648 // inside the loop. So, the following is safe.
649 Inst2Replace->replaceAllUsesWith(NewPhi);
650 ReplacedInsts.insert(Inst2Replace);
651 ++HexagonNumVectorLoopCarriedReuse;
654 bool HexagonVectorLoopCarriedReuse::doVLCR() {
655 assert(CurLoop->getSubLoops().empty() &&
656 "Can do VLCR on the innermost loop only");
657 assert((CurLoop->getNumBlocks() == 1) &&
658 "Can do VLCR only on single block loops");
660 bool Changed = false;
661 bool Continue;
663 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
664 do {
665 // Reset datastructures.
666 Dependences.clear();
667 Continue = false;
669 findLoopCarriedDeps();
670 findValueToReuse();
671 if (ReuseCandidate.isDefined()) {
672 reuseValue();
673 Changed = true;
674 Continue = true;
676 llvm::for_each(Dependences, std::default_delete<DepChain>());
677 } while (Continue);
678 return Changed;
681 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
682 DepChain &D) {
683 PHINode *PN = dyn_cast<PHINode>(I);
684 if (!PN) {
685 D.push_back(I);
686 return;
687 } else {
688 auto NumIncomingValues = PN->getNumIncomingValues();
689 if (NumIncomingValues != 2) {
690 D.clear();
691 return;
694 BasicBlock *BB = PN->getParent();
695 if (BB != CurLoop->getHeader()) {
696 D.clear();
697 return;
700 Value *BEVal = PN->getIncomingValueForBlock(BB);
701 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
702 // This is a single block loop with a preheader, so at least
703 // one value should come over the backedge.
704 assert(BEInst && "There should be a value over the backedge");
706 Value *PreHdrVal =
707 PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
708 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
709 D.clear();
710 return;
712 D.push_back(PN);
713 findDepChainFromPHI(BEInst, D);
717 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
718 Instruction *I2,
719 int Iters) {
720 for (auto *D : Dependences) {
721 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
722 return D;
724 return nullptr;
727 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
728 BasicBlock *BB = CurLoop->getHeader();
729 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
730 auto *PN = cast<PHINode>(I);
731 if (!isa<VectorType>(PN->getType()))
732 continue;
734 DepChain *D = new DepChain();
735 findDepChainFromPHI(PN, *D);
736 if (D->size() != 0)
737 Dependences.insert(D);
738 else
739 delete D;
741 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
742 LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
743 ++i) { dbgs() << *Dependences[i] << "\n"; });
746 Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
747 return new HexagonVectorLoopCarriedReuse();