1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
14 //-----------------------------------------------------------------------------
15 // Motivation: Consider the case where we have the following loop structure.
28 // This can be converted to
41 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42 // Such a loop comes to this pass in the following form.
47 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
57 // In this pass, we look for PHIs such as X2 whose incoming values come only
58 // from the Loop Preheader and over the backedge and additionaly, both these
59 // values are the results of the same operation in terms of opcode. We call such
60 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61 // over X1 is carried over only one iteration and so the DepChain is only one
64 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65 // PHI coming over the backedge (X1). We stop at the first pair of such users
66 // I1 (of X2) and I2 (of X1) that meet the following conditions.
67 // 1. I1 and I2 are the same operation, but with different operands.
68 // 2. X2 and X1 are used at the same operand number in the two instructions.
69 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70 // a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
72 // We then make the following transformation
77 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78 // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79 // t1 = f(X2) <-- Will be removed by DCE.
88 // We proceed until we cannot find any more such instructions I1 and I2.
90 // --- DepChains & Loop carried dependences ---
91 // Consider a single basic block loop such as
97 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98 // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
102 // cond_branch <Loop>
104 // Then there is a dependence between X2 and X1 that goes back one iteration,
105 // i.e. X1 is used as X2 in the very next iteration. We represent this as a
106 // DepChain from X2 to X1 (X2->X1).
107 // Similarly, there is a dependence between Y2 and X1 that goes back two
108 // iterations. X1 is used as Y2 two iterations after it is computed. This is
109 // represented by a DepChain as (Y2->X2->X1).
111 // A DepChain has the following properties.
112 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113 // iterations of carried dependence + 1.
114 // 2. All instructions in the DepChain except the last are PHIs.
116 //===----------------------------------------------------------------------===//
118 #include "llvm/ADT/SetVector.h"
119 #include "llvm/ADT/SmallVector.h"
120 #include "llvm/ADT/Statistic.h"
121 #include "llvm/Analysis/LoopInfo.h"
122 #include "llvm/Analysis/LoopPass.h"
123 #include "llvm/IR/BasicBlock.h"
124 #include "llvm/IR/DerivedTypes.h"
125 #include "llvm/IR/IRBuilder.h"
126 #include "llvm/IR/Instruction.h"
127 #include "llvm/IR/Instructions.h"
128 #include "llvm/IR/IntrinsicInst.h"
129 #include "llvm/IR/Intrinsics.h"
130 #include "llvm/IR/Use.h"
131 #include "llvm/IR/User.h"
132 #include "llvm/IR/Value.h"
133 #include "llvm/Pass.h"
134 #include "llvm/Support/Casting.h"
135 #include "llvm/Support/CommandLine.h"
136 #include "llvm/Support/Compiler.h"
137 #include "llvm/Support/Debug.h"
138 #include "llvm/Support/raw_ostream.h"
139 #include "llvm/Transforms/Scalar.h"
140 #include "llvm/Transforms/Utils.h"
148 using namespace llvm
;
150 #define DEBUG_TYPE "hexagon-vlcr"
152 STATISTIC(HexagonNumVectorLoopCarriedReuse
,
153 "Number of values that were reused from a previous iteration.");
155 static cl::opt
<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
157 cl::desc("Maximum distance of loop carried dependences that are handled"),
158 cl::init(2), cl::ZeroOrMore
);
162 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry
&);
163 Pass
*createHexagonVectorLoopCarriedReusePass();
165 } // end namespace llvm
169 // See info about DepChain in the comments at the top of this file.
170 using ChainOfDependences
= SmallVector
<Instruction
*, 4>;
173 ChainOfDependences Chain
;
176 bool isIdentical(DepChain
&Other
) const {
177 if (Other
.size() != size())
179 ChainOfDependences
&OtherChain
= Other
.getChain();
180 for (int i
= 0; i
< size(); ++i
) {
181 if (Chain
[i
] != OtherChain
[i
])
187 ChainOfDependences
&getChain() {
199 void push_back(Instruction
*I
) {
203 int iterations() const {
207 Instruction
*front() const {
208 return Chain
.front();
211 Instruction
*back() const {
215 Instruction
*&operator[](const int index
) {
219 friend raw_ostream
&operator<< (raw_ostream
&OS
, const DepChain
&D
);
222 LLVM_ATTRIBUTE_UNUSED
223 raw_ostream
&operator<<(raw_ostream
&OS
, const DepChain
&D
) {
224 const ChainOfDependences
&CD
= D
.Chain
;
225 int ChainSize
= CD
.size();
226 OS
<< "**DepChain Start::**\n";
227 for (int i
= 0; i
< ChainSize
-1; ++i
) {
228 OS
<< *(CD
[i
]) << " -->\n";
230 OS
<< *CD
[ChainSize
-1] << "\n";
235 Instruction
*Inst2Replace
= nullptr;
237 // In the new PHI node that we'll construct this is the value that'll be
238 // used over the backedge. This is teh value that gets reused from a
239 // previous iteration.
240 Instruction
*BackedgeInst
= nullptr;
242 ReuseValue() = default;
244 void reset() { Inst2Replace
= nullptr; BackedgeInst
= nullptr; }
245 bool isDefined() { return Inst2Replace
!= nullptr; }
248 LLVM_ATTRIBUTE_UNUSED
249 raw_ostream
&operator<<(raw_ostream
&OS
, const ReuseValue
&RU
) {
250 OS
<< "** ReuseValue ***\n";
251 OS
<< "Instruction to Replace: " << *(RU
.Inst2Replace
) << "\n";
252 OS
<< "Backedge Instruction: " << *(RU
.BackedgeInst
) << "\n";
256 class HexagonVectorLoopCarriedReuse
: public LoopPass
{
260 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID
) {
261 PassRegistry
*PR
= PassRegistry::getPassRegistry();
262 initializeHexagonVectorLoopCarriedReusePass(*PR
);
265 StringRef
getPassName() const override
{
266 return "Hexagon-specific loop carried reuse for HVX vectors";
269 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
270 AU
.addRequired
<LoopInfoWrapperPass
>();
271 AU
.addRequiredID(LoopSimplifyID
);
272 AU
.addRequiredID(LCSSAID
);
273 AU
.addPreservedID(LCSSAID
);
274 AU
.setPreservesCFG();
277 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
) override
;
280 SetVector
<DepChain
*> Dependences
;
281 std::set
<Instruction
*> ReplacedInsts
;
283 ReuseValue ReuseCandidate
;
286 void findLoopCarriedDeps();
287 void findValueToReuse();
288 void findDepChainFromPHI(Instruction
*I
, DepChain
&D
);
290 Value
*findValueInBlock(Value
*Op
, BasicBlock
*BB
);
291 bool isDepChainBtwn(Instruction
*I1
, Instruction
*I2
, int Iters
);
292 DepChain
*getDepChainBtwn(Instruction
*I1
, Instruction
*I2
);
293 bool isEquivalentOperation(Instruction
*I1
, Instruction
*I2
);
294 bool canReplace(Instruction
*I
);
297 } // end anonymous namespace
299 char HexagonVectorLoopCarriedReuse::ID
= 0;
301 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse
, "hexagon-vlcr",
302 "Hexagon-specific predictive commoning for HVX vectors", false, false)
303 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
304 INITIALIZE_PASS_DEPENDENCY(LoopSimplify
)
305 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass
)
306 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse
, "hexagon-vlcr",
307 "Hexagon-specific predictive commoning for HVX vectors", false, false)
309 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop
*L
, LPPassManager
&LPM
) {
313 if (!L
->getLoopPreheader())
316 // Work only on innermost loops.
317 if (!L
->getSubLoops().empty())
320 // Work only on single basic blocks loops.
321 if (L
->getNumBlocks() != 1)
329 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction
*I1
,
331 if (!I1
->isSameOperationAs(I2
))
333 // This check is in place specifically for intrinsics. isSameOperationAs will
334 // return two for any two hexagon intrinsics because they are essentially the
335 // same instruciton (CallInst). We need to scratch the surface to see if they
336 // are calls to the same function.
337 if (CallInst
*C1
= dyn_cast
<CallInst
>(I1
)) {
338 if (CallInst
*C2
= dyn_cast
<CallInst
>(I2
)) {
339 if (C1
->getCalledFunction() != C2
->getCalledFunction())
344 // If both the Instructions are of Vector Type and any of the element
345 // is integer constant, check their values too for equivalence.
346 if (I1
->getType()->isVectorTy() && I2
->getType()->isVectorTy()) {
347 unsigned NumOperands
= I1
->getNumOperands();
348 for (unsigned i
= 0; i
< NumOperands
; ++i
) {
349 ConstantInt
*C1
= dyn_cast
<ConstantInt
>(I1
->getOperand(i
));
350 ConstantInt
*C2
= dyn_cast
<ConstantInt
>(I2
->getOperand(i
));
353 if (C1
->getSExtValue() != C2
->getSExtValue())
361 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction
*I
) {
362 const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
364 (II
->getIntrinsicID() == Intrinsic::hexagon_V6_hi
||
365 II
->getIntrinsicID() == Intrinsic::hexagon_V6_lo
)) {
366 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II
<< "\n");
371 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (auto *D
: Dependences
) {
373 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D
->front()) << "\n");
374 if (D
->iterations() > HexagonVLCRIterationLim
) {
377 << ".. Skipping because number of iterations > than the limit\n");
381 PHINode
*PN
= cast
<PHINode
>(D
->front());
382 Instruction
*BEInst
= D
->back();
383 int Iters
= D
->iterations();
384 BasicBlock
*BB
= PN
->getParent();
385 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
386 << " can be reused\n");
388 SmallVector
<Instruction
*, 4> PNUsers
;
389 for (auto UI
= PN
->use_begin(), E
= PN
->use_end(); UI
!= E
; ++UI
) {
391 Instruction
*User
= cast
<Instruction
>(U
.getUser());
393 if (User
->getParent() != BB
)
395 if (ReplacedInsts
.count(User
)) {
396 LLVM_DEBUG(dbgs() << *User
397 << " has already been replaced. Skipping...\n");
400 if (isa
<PHINode
>(User
))
402 if (User
->mayHaveSideEffects())
404 if (!canReplace(User
))
407 PNUsers
.push_back(User
);
409 LLVM_DEBUG(dbgs() << PNUsers
.size() << " use(s) of the PHI in the block\n");
411 // For each interesting use I of PN, find an Instruction BEUser that
412 // performs the same operation as I on BEInst and whose other operands,
413 // if any, can also be rematerialized in OtherBB. We stop when we find the
414 // first such Instruction BEUser. This is because once BEUser is
415 // rematerialized in OtherBB, we may find more such "fixup" opportunities
416 // in this block. So, we'll start over again.
417 for (Instruction
*I
: PNUsers
) {
418 for (auto UI
= BEInst
->use_begin(), E
= BEInst
->use_end(); UI
!= E
;
421 Instruction
*BEUser
= cast
<Instruction
>(U
.getUser());
423 if (BEUser
->getParent() != BB
)
425 if (!isEquivalentOperation(I
, BEUser
))
428 int NumOperands
= I
->getNumOperands();
430 for (int OpNo
= 0; OpNo
< NumOperands
; ++OpNo
) {
431 Value
*Op
= I
->getOperand(OpNo
);
432 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
);
436 Value
*BEOp
= BEUser
->getOperand(OpNo
);
437 Instruction
*BEOpInst
= dyn_cast
<Instruction
>(BEOp
);
439 if (!isDepChainBtwn(OpInst
, BEOpInst
, Iters
)) {
445 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
446 ReuseCandidate
.Inst2Replace
= I
;
447 ReuseCandidate
.BackedgeInst
= BEUser
;
450 ReuseCandidate
.reset();
454 ReuseCandidate
.reset();
457 Value
*HexagonVectorLoopCarriedReuse::findValueInBlock(Value
*Op
,
459 PHINode
*PN
= dyn_cast
<PHINode
>(Op
);
461 Value
*ValueInBlock
= PN
->getIncomingValueForBlock(BB
);
465 void HexagonVectorLoopCarriedReuse::reuseValue() {
466 LLVM_DEBUG(dbgs() << ReuseCandidate
);
467 Instruction
*Inst2Replace
= ReuseCandidate
.Inst2Replace
;
468 Instruction
*BEInst
= ReuseCandidate
.BackedgeInst
;
469 int NumOperands
= Inst2Replace
->getNumOperands();
470 std::map
<Instruction
*, DepChain
*> DepChains
;
472 BasicBlock
*LoopPH
= CurLoop
->getLoopPreheader();
474 for (int i
= 0; i
< NumOperands
; ++i
) {
475 Instruction
*I
= dyn_cast
<Instruction
>(Inst2Replace
->getOperand(i
));
479 Instruction
*J
= cast
<Instruction
>(BEInst
->getOperand(i
));
480 DepChain
*D
= getDepChainBtwn(I
, J
);
483 "No DepChain between corresponding operands in ReuseCandidate\n");
484 if (Iterations
== -1)
485 Iterations
= D
->iterations();
486 assert(Iterations
== D
->iterations() && "Iterations mismatch");
491 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
493 SmallVector
<Instruction
*, 4> InstsInPreheader
;
494 for (int i
= 0; i
< Iterations
; ++i
) {
495 Instruction
*InstInPreheader
= Inst2Replace
->clone();
496 SmallVector
<Value
*, 4> Ops
;
497 for (int j
= 0; j
< NumOperands
; ++j
) {
498 Instruction
*I
= dyn_cast
<Instruction
>(Inst2Replace
->getOperand(j
));
501 // Get the DepChain corresponding to this operand.
502 DepChain
&D
= *DepChains
[I
];
503 // Get the PHI for the iteration number and find
504 // the incoming value from the Loop Preheader for
506 Value
*ValInPreheader
= findValueInBlock(D
[i
], LoopPH
);
507 InstInPreheader
->setOperand(j
, ValInPreheader
);
509 InstsInPreheader
.push_back(InstInPreheader
);
510 InstInPreheader
->setName(Inst2Replace
->getName() + ".hexagon.vlcr");
511 InstInPreheader
->insertBefore(LoopPH
->getTerminator());
512 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader
<< " to "
513 << LoopPH
->getName() << "\n");
515 BasicBlock
*BB
= BEInst
->getParent();
517 IRB
.SetInsertPoint(BB
->getFirstNonPHI());
518 Value
*BEVal
= BEInst
;
520 for (int i
= Iterations
-1; i
>=0 ; --i
) {
521 Instruction
*InstInPreheader
= InstsInPreheader
[i
];
522 NewPhi
= IRB
.CreatePHI(InstInPreheader
->getType(), 2);
523 NewPhi
->addIncoming(InstInPreheader
, LoopPH
);
524 NewPhi
->addIncoming(BEVal
, BB
);
525 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi
<< " to " << BB
->getName()
529 // We are in LCSSA form. So, a value defined inside the Loop is used only
530 // inside the loop. So, the following is safe.
531 Inst2Replace
->replaceAllUsesWith(NewPhi
);
532 ReplacedInsts
.insert(Inst2Replace
);
533 ++HexagonNumVectorLoopCarriedReuse
;
536 bool HexagonVectorLoopCarriedReuse::doVLCR() {
537 assert(CurLoop
->getSubLoops().empty() &&
538 "Can do VLCR on the innermost loop only");
539 assert((CurLoop
->getNumBlocks() == 1) &&
540 "Can do VLCR only on single block loops");
542 bool Changed
= false;
545 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop
->getHeader() << "\n");
547 // Reset datastructures.
551 findLoopCarriedDeps();
553 if (ReuseCandidate
.isDefined()) {
558 llvm::for_each(Dependences
, std::default_delete
<DepChain
>());
563 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction
*I
,
565 PHINode
*PN
= dyn_cast
<PHINode
>(I
);
570 auto NumIncomingValues
= PN
->getNumIncomingValues();
571 if (NumIncomingValues
!= 2) {
576 BasicBlock
*BB
= PN
->getParent();
577 if (BB
!= CurLoop
->getHeader()) {
582 Value
*BEVal
= PN
->getIncomingValueForBlock(BB
);
583 Instruction
*BEInst
= dyn_cast
<Instruction
>(BEVal
);
584 // This is a single block loop with a preheader, so at least
585 // one value should come over the backedge.
586 assert(BEInst
&& "There should be a value over the backedge");
589 PN
->getIncomingValueForBlock(CurLoop
->getLoopPreheader());
590 if(!PreHdrVal
|| !isa
<Instruction
>(PreHdrVal
)) {
595 findDepChainFromPHI(BEInst
, D
);
599 bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction
*I1
,
602 for (auto *D
: Dependences
) {
603 if (D
->front() == I1
&& D
->back() == I2
&& D
->iterations() == Iters
)
609 DepChain
*HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction
*I1
,
611 for (auto *D
: Dependences
) {
612 if (D
->front() == I1
&& D
->back() == I2
)
618 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
619 BasicBlock
*BB
= CurLoop
->getHeader();
620 for (auto I
= BB
->begin(), E
= BB
->end(); I
!= E
&& isa
<PHINode
>(I
); ++I
) {
621 auto *PN
= cast
<PHINode
>(I
);
622 if (!isa
<VectorType
>(PN
->getType()))
625 DepChain
*D
= new DepChain();
626 findDepChainFromPHI(PN
, *D
);
628 Dependences
.insert(D
);
632 LLVM_DEBUG(dbgs() << "Found " << Dependences
.size() << " dependences\n");
633 LLVM_DEBUG(for (size_t i
= 0; i
< Dependences
.size();
634 ++i
) { dbgs() << *Dependences
[i
] << "\n"; });
637 Pass
*llvm::createHexagonVectorLoopCarriedReusePass() {
638 return new HexagonVectorLoopCarriedReuse();