1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
14 //===----------------------------------------------------------------------===//
16 #include "HexagonVectorLoopCarriedReuse.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/IntrinsicsHexagon.h"
30 #include "llvm/IR/Use.h"
31 #include "llvm/IR/User.h"
32 #include "llvm/IR/Value.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils.h"
51 #define DEBUG_TYPE "hexagon-vlcr"
53 STATISTIC(HexagonNumVectorLoopCarriedReuse
,
54 "Number of values that were reused from a previous iteration.");
56 static cl::opt
<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
58 cl::desc("Maximum distance of loop carried dependences that are handled"),
59 cl::init(2), cl::ZeroOrMore
);
63 void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry
&);
64 Pass
*createHexagonVectorLoopCarriedReuseLegacyPass();
66 } // end namespace llvm
70 // See info about DepChain in the comments at the top of this file.
71 using ChainOfDependences
= SmallVector
<Instruction
*, 4>;
74 ChainOfDependences Chain
;
77 bool isIdentical(DepChain
&Other
) const {
78 if (Other
.size() != size())
80 ChainOfDependences
&OtherChain
= Other
.getChain();
81 for (int i
= 0; i
< size(); ++i
) {
82 if (Chain
[i
] != OtherChain
[i
])
88 ChainOfDependences
&getChain() {
100 void push_back(Instruction
*I
) {
104 int iterations() const {
108 Instruction
*front() const {
109 return Chain
.front();
112 Instruction
*back() const {
116 Instruction
*&operator[](const int index
) {
120 friend raw_ostream
&operator<< (raw_ostream
&OS
, const DepChain
&D
);
123 LLVM_ATTRIBUTE_UNUSED
124 raw_ostream
&operator<<(raw_ostream
&OS
, const DepChain
&D
) {
125 const ChainOfDependences
&CD
= D
.Chain
;
126 int ChainSize
= CD
.size();
127 OS
<< "**DepChain Start::**\n";
128 for (int i
= 0; i
< ChainSize
-1; ++i
) {
129 OS
<< *(CD
[i
]) << " -->\n";
131 OS
<< *CD
[ChainSize
-1] << "\n";
136 Instruction
*Inst2Replace
= nullptr;
138 // In the new PHI node that we'll construct this is the value that'll be
139 // used over the backedge. This is the value that gets reused from a
140 // previous iteration.
141 Instruction
*BackedgeInst
= nullptr;
142 std::map
<Instruction
*, DepChain
*> DepChains
;
145 ReuseValue() = default;
148 Inst2Replace
= nullptr;
149 BackedgeInst
= nullptr;
153 bool isDefined() { return Inst2Replace
!= nullptr; }
156 LLVM_ATTRIBUTE_UNUSED
157 raw_ostream
&operator<<(raw_ostream
&OS
, const ReuseValue
&RU
) {
158 OS
<< "** ReuseValue ***\n";
159 OS
<< "Instruction to Replace: " << *(RU
.Inst2Replace
) << "\n";
160 OS
<< "Backedge Instruction: " << *(RU
.BackedgeInst
) << "\n";
164 class HexagonVectorLoopCarriedReuseLegacyPass
: public LoopPass
{
168 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID
) {
169 PassRegistry
*PR
= PassRegistry::getPassRegistry();
170 initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR
);
173 StringRef
getPassName() const override
{
174 return "Hexagon-specific loop carried reuse for HVX vectors";
177 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
178 AU
.addRequiredID(LoopSimplifyID
);
179 AU
.addRequiredID(LCSSAID
);
180 AU
.addPreservedID(LCSSAID
);
181 AU
.setPreservesCFG();
184 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
) override
;
187 class HexagonVectorLoopCarriedReuse
{
189 HexagonVectorLoopCarriedReuse(Loop
*L
) : CurLoop(L
){};
194 SetVector
<DepChain
*> Dependences
;
195 std::set
<Instruction
*> ReplacedInsts
;
197 ReuseValue ReuseCandidate
;
200 void findLoopCarriedDeps();
201 void findValueToReuse();
202 void findDepChainFromPHI(Instruction
*I
, DepChain
&D
);
204 Value
*findValueInBlock(Value
*Op
, BasicBlock
*BB
);
205 DepChain
*getDepChainBtwn(Instruction
*I1
, Instruction
*I2
, int Iters
);
206 bool isEquivalentOperation(Instruction
*I1
, Instruction
*I2
);
207 bool canReplace(Instruction
*I
);
208 bool isCallInstCommutative(CallInst
*C
);
211 } // end anonymous namespace
213 char HexagonVectorLoopCarriedReuseLegacyPass::ID
= 0;
215 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass
, "hexagon-vlcr",
216 "Hexagon-specific predictive commoning for HVX vectors",
218 INITIALIZE_PASS_DEPENDENCY(LoopSimplify
)
219 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass
)
220 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass
, "hexagon-vlcr",
221 "Hexagon-specific predictive commoning for HVX vectors",
225 HexagonVectorLoopCarriedReusePass::run(Loop
&L
, LoopAnalysisManager
&LAM
,
226 LoopStandardAnalysisResults
&AR
,
228 HexagonVectorLoopCarriedReuse
Vlcr(&L
);
230 return PreservedAnalyses::all();
231 PreservedAnalyses PA
;
232 PA
.preserveSet
<CFGAnalyses
>();
236 bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop
*L
,
237 LPPassManager
&LPM
) {
240 HexagonVectorLoopCarriedReuse
Vlcr(L
);
244 bool HexagonVectorLoopCarriedReuse::run() {
245 if (!CurLoop
->getLoopPreheader())
248 // Work only on innermost loops.
249 if (!CurLoop
->getSubLoops().empty())
252 // Work only on single basic blocks loops.
253 if (CurLoop
->getNumBlocks() != 1)
259 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst
*C
) {
260 switch (C
->getCalledFunction()->getIntrinsicID()) {
261 case Intrinsic::hexagon_V6_vaddb
:
262 case Intrinsic::hexagon_V6_vaddb_128B
:
263 case Intrinsic::hexagon_V6_vaddh
:
264 case Intrinsic::hexagon_V6_vaddh_128B
:
265 case Intrinsic::hexagon_V6_vaddw
:
266 case Intrinsic::hexagon_V6_vaddw_128B
:
267 case Intrinsic::hexagon_V6_vaddubh
:
268 case Intrinsic::hexagon_V6_vaddubh_128B
:
269 case Intrinsic::hexagon_V6_vadduhw
:
270 case Intrinsic::hexagon_V6_vadduhw_128B
:
271 case Intrinsic::hexagon_V6_vaddhw
:
272 case Intrinsic::hexagon_V6_vaddhw_128B
:
273 case Intrinsic::hexagon_V6_vmaxb
:
274 case Intrinsic::hexagon_V6_vmaxb_128B
:
275 case Intrinsic::hexagon_V6_vmaxh
:
276 case Intrinsic::hexagon_V6_vmaxh_128B
:
277 case Intrinsic::hexagon_V6_vmaxw
:
278 case Intrinsic::hexagon_V6_vmaxw_128B
:
279 case Intrinsic::hexagon_V6_vmaxub
:
280 case Intrinsic::hexagon_V6_vmaxub_128B
:
281 case Intrinsic::hexagon_V6_vmaxuh
:
282 case Intrinsic::hexagon_V6_vmaxuh_128B
:
283 case Intrinsic::hexagon_V6_vminub
:
284 case Intrinsic::hexagon_V6_vminub_128B
:
285 case Intrinsic::hexagon_V6_vminuh
:
286 case Intrinsic::hexagon_V6_vminuh_128B
:
287 case Intrinsic::hexagon_V6_vminb
:
288 case Intrinsic::hexagon_V6_vminb_128B
:
289 case Intrinsic::hexagon_V6_vminh
:
290 case Intrinsic::hexagon_V6_vminh_128B
:
291 case Intrinsic::hexagon_V6_vminw
:
292 case Intrinsic::hexagon_V6_vminw_128B
:
293 case Intrinsic::hexagon_V6_vmpyub
:
294 case Intrinsic::hexagon_V6_vmpyub_128B
:
295 case Intrinsic::hexagon_V6_vmpyuh
:
296 case Intrinsic::hexagon_V6_vmpyuh_128B
:
297 case Intrinsic::hexagon_V6_vavgub
:
298 case Intrinsic::hexagon_V6_vavgub_128B
:
299 case Intrinsic::hexagon_V6_vavgh
:
300 case Intrinsic::hexagon_V6_vavgh_128B
:
301 case Intrinsic::hexagon_V6_vavguh
:
302 case Intrinsic::hexagon_V6_vavguh_128B
:
303 case Intrinsic::hexagon_V6_vavgw
:
304 case Intrinsic::hexagon_V6_vavgw_128B
:
305 case Intrinsic::hexagon_V6_vavgb
:
306 case Intrinsic::hexagon_V6_vavgb_128B
:
307 case Intrinsic::hexagon_V6_vavguw
:
308 case Intrinsic::hexagon_V6_vavguw_128B
:
309 case Intrinsic::hexagon_V6_vabsdiffh
:
310 case Intrinsic::hexagon_V6_vabsdiffh_128B
:
311 case Intrinsic::hexagon_V6_vabsdiffub
:
312 case Intrinsic::hexagon_V6_vabsdiffub_128B
:
313 case Intrinsic::hexagon_V6_vabsdiffuh
:
314 case Intrinsic::hexagon_V6_vabsdiffuh_128B
:
315 case Intrinsic::hexagon_V6_vabsdiffw
:
316 case Intrinsic::hexagon_V6_vabsdiffw_128B
:
323 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction
*I1
,
325 if (!I1
->isSameOperationAs(I2
))
327 // This check is in place specifically for intrinsics. isSameOperationAs will
328 // return two for any two hexagon intrinsics because they are essentially the
329 // same instruciton (CallInst). We need to scratch the surface to see if they
330 // are calls to the same function.
331 if (CallInst
*C1
= dyn_cast
<CallInst
>(I1
)) {
332 if (CallInst
*C2
= dyn_cast
<CallInst
>(I2
)) {
333 if (C1
->getCalledFunction() != C2
->getCalledFunction())
338 // If both the Instructions are of Vector Type and any of the element
339 // is integer constant, check their values too for equivalence.
340 if (I1
->getType()->isVectorTy() && I2
->getType()->isVectorTy()) {
341 unsigned NumOperands
= I1
->getNumOperands();
342 for (unsigned i
= 0; i
< NumOperands
; ++i
) {
343 ConstantInt
*C1
= dyn_cast
<ConstantInt
>(I1
->getOperand(i
));
344 ConstantInt
*C2
= dyn_cast
<ConstantInt
>(I2
->getOperand(i
));
347 if (C1
->getSExtValue() != C2
->getSExtValue())
355 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction
*I
) {
356 const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
360 switch (II
->getIntrinsicID()) {
361 case Intrinsic::hexagon_V6_hi
:
362 case Intrinsic::hexagon_V6_lo
:
363 case Intrinsic::hexagon_V6_hi_128B
:
364 case Intrinsic::hexagon_V6_lo_128B
:
365 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II
<< "\n");
371 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (auto *D
: Dependences
) {
373 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D
->front()) << "\n");
374 if (D
->iterations() > HexagonVLCRIterationLim
) {
377 << ".. Skipping because number of iterations > than the limit\n");
381 PHINode
*PN
= cast
<PHINode
>(D
->front());
382 Instruction
*BEInst
= D
->back();
383 int Iters
= D
->iterations();
384 BasicBlock
*BB
= PN
->getParent();
385 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
386 << " can be reused\n");
388 SmallVector
<Instruction
*, 4> PNUsers
;
389 for (Use
&U
: PN
->uses()) {
390 Instruction
*User
= cast
<Instruction
>(U
.getUser());
392 if (User
->getParent() != BB
)
394 if (ReplacedInsts
.count(User
)) {
395 LLVM_DEBUG(dbgs() << *User
396 << " has already been replaced. Skipping...\n");
399 if (isa
<PHINode
>(User
))
401 if (User
->mayHaveSideEffects())
403 if (!canReplace(User
))
406 PNUsers
.push_back(User
);
408 LLVM_DEBUG(dbgs() << PNUsers
.size() << " use(s) of the PHI in the block\n");
410 // For each interesting use I of PN, find an Instruction BEUser that
411 // performs the same operation as I on BEInst and whose other operands,
412 // if any, can also be rematerialized in OtherBB. We stop when we find the
413 // first such Instruction BEUser. This is because once BEUser is
414 // rematerialized in OtherBB, we may find more such "fixup" opportunities
415 // in this block. So, we'll start over again.
416 for (Instruction
*I
: PNUsers
) {
417 for (Use
&U
: BEInst
->uses()) {
418 Instruction
*BEUser
= cast
<Instruction
>(U
.getUser());
420 if (BEUser
->getParent() != BB
)
422 if (!isEquivalentOperation(I
, BEUser
))
425 int NumOperands
= I
->getNumOperands();
427 // Take operands of each PNUser one by one and try to find DepChain
428 // with every operand of the BEUser. If any of the operands of BEUser
429 // has DepChain with current operand of the PNUser, break the matcher
430 // loop. Keep doing this for Every PNUser operand. If PNUser operand
431 // does not have DepChain with any of the BEUser operand, break the
432 // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
433 // This ensures that DepChain exist for all the PNUser operand with
434 // BEUser operand. This also ensures that DepChains are independent of
435 // the positions in PNUser and BEUser.
436 std::map
<Instruction
*, DepChain
*> DepChains
;
437 CallInst
*C1
= dyn_cast
<CallInst
>(I
);
438 if ((I
&& I
->isCommutative()) || (C1
&& isCallInstCommutative(C1
))) {
440 for (int OpNo
= 0; OpNo
< NumOperands
; ++OpNo
) {
441 Value
*Op
= I
->getOperand(OpNo
);
442 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
);
444 for (int T
= 0; T
< NumOperands
; ++T
) {
445 Value
*BEOp
= BEUser
->getOperand(T
);
446 Instruction
*BEOpInst
= dyn_cast
<Instruction
>(BEOp
);
447 if (!OpInst
&& !BEOpInst
) {
454 if ((OpInst
&& !BEOpInst
) || (!OpInst
&& BEOpInst
))
457 DepChain
*D
= getDepChainBtwn(OpInst
, BEOpInst
, Iters
);
461 DepChains
[OpInst
] = D
;
472 for (int OpNo
= 0; OpNo
< NumOperands
; ++OpNo
) {
473 Value
*Op
= I
->getOperand(OpNo
);
474 Value
*BEOp
= BEUser
->getOperand(OpNo
);
476 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
);
480 // Do not allow reuse to occur when the operands may be different
486 Instruction
*BEOpInst
= dyn_cast
<Instruction
>(BEOp
);
487 DepChain
*D
= getDepChainBtwn(OpInst
, BEOpInst
, Iters
);
490 DepChains
[OpInst
] = D
;
498 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
499 ReuseCandidate
.Inst2Replace
= I
;
500 ReuseCandidate
.BackedgeInst
= BEUser
;
501 ReuseCandidate
.DepChains
= DepChains
;
502 ReuseCandidate
.Iterations
= Iters
;
505 ReuseCandidate
.reset();
509 ReuseCandidate
.reset();
512 Value
*HexagonVectorLoopCarriedReuse::findValueInBlock(Value
*Op
,
514 PHINode
*PN
= dyn_cast
<PHINode
>(Op
);
516 Value
*ValueInBlock
= PN
->getIncomingValueForBlock(BB
);
520 void HexagonVectorLoopCarriedReuse::reuseValue() {
521 LLVM_DEBUG(dbgs() << ReuseCandidate
);
522 Instruction
*Inst2Replace
= ReuseCandidate
.Inst2Replace
;
523 Instruction
*BEInst
= ReuseCandidate
.BackedgeInst
;
524 int NumOperands
= Inst2Replace
->getNumOperands();
525 std::map
<Instruction
*, DepChain
*> &DepChains
= ReuseCandidate
.DepChains
;
526 int Iterations
= ReuseCandidate
.Iterations
;
527 BasicBlock
*LoopPH
= CurLoop
->getLoopPreheader();
528 assert(!DepChains
.empty() && "No DepChains");
529 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
531 SmallVector
<Instruction
*, 4> InstsInPreheader
;
532 for (int i
= 0; i
< Iterations
; ++i
) {
533 Instruction
*InstInPreheader
= Inst2Replace
->clone();
534 SmallVector
<Value
*, 4> Ops
;
535 for (int j
= 0; j
< NumOperands
; ++j
) {
536 Instruction
*I
= dyn_cast
<Instruction
>(Inst2Replace
->getOperand(j
));
539 // Get the DepChain corresponding to this operand.
540 DepChain
&D
= *DepChains
[I
];
541 // Get the PHI for the iteration number and find
542 // the incoming value from the Loop Preheader for
544 Value
*ValInPreheader
= findValueInBlock(D
[i
], LoopPH
);
545 InstInPreheader
->setOperand(j
, ValInPreheader
);
547 InstsInPreheader
.push_back(InstInPreheader
);
548 InstInPreheader
->setName(Inst2Replace
->getName() + ".hexagon.vlcr");
549 InstInPreheader
->insertBefore(LoopPH
->getTerminator());
550 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader
<< " to "
551 << LoopPH
->getName() << "\n");
553 BasicBlock
*BB
= BEInst
->getParent();
555 IRB
.SetInsertPoint(BB
->getFirstNonPHI());
556 Value
*BEVal
= BEInst
;
558 for (int i
= Iterations
-1; i
>=0 ; --i
) {
559 Instruction
*InstInPreheader
= InstsInPreheader
[i
];
560 NewPhi
= IRB
.CreatePHI(InstInPreheader
->getType(), 2);
561 NewPhi
->addIncoming(InstInPreheader
, LoopPH
);
562 NewPhi
->addIncoming(BEVal
, BB
);
563 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi
<< " to " << BB
->getName()
567 // We are in LCSSA form. So, a value defined inside the Loop is used only
568 // inside the loop. So, the following is safe.
569 Inst2Replace
->replaceAllUsesWith(NewPhi
);
570 ReplacedInsts
.insert(Inst2Replace
);
571 ++HexagonNumVectorLoopCarriedReuse
;
574 bool HexagonVectorLoopCarriedReuse::doVLCR() {
575 assert(CurLoop
->getSubLoops().empty() &&
576 "Can do VLCR on the innermost loop only");
577 assert((CurLoop
->getNumBlocks() == 1) &&
578 "Can do VLCR only on single block loops");
580 bool Changed
= false;
583 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop
->getHeader() << "\n");
585 // Reset datastructures.
589 findLoopCarriedDeps();
591 if (ReuseCandidate
.isDefined()) {
596 llvm::for_each(Dependences
, std::default_delete
<DepChain
>());
601 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction
*I
,
603 PHINode
*PN
= dyn_cast
<PHINode
>(I
);
608 auto NumIncomingValues
= PN
->getNumIncomingValues();
609 if (NumIncomingValues
!= 2) {
614 BasicBlock
*BB
= PN
->getParent();
615 if (BB
!= CurLoop
->getHeader()) {
620 Value
*BEVal
= PN
->getIncomingValueForBlock(BB
);
621 Instruction
*BEInst
= dyn_cast
<Instruction
>(BEVal
);
622 // This is a single block loop with a preheader, so at least
623 // one value should come over the backedge.
624 assert(BEInst
&& "There should be a value over the backedge");
627 PN
->getIncomingValueForBlock(CurLoop
->getLoopPreheader());
628 if(!PreHdrVal
|| !isa
<Instruction
>(PreHdrVal
)) {
633 findDepChainFromPHI(BEInst
, D
);
637 DepChain
*HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction
*I1
,
640 for (auto *D
: Dependences
) {
641 if (D
->front() == I1
&& D
->back() == I2
&& D
->iterations() == Iters
)
647 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
648 BasicBlock
*BB
= CurLoop
->getHeader();
649 for (auto I
= BB
->begin(), E
= BB
->end(); I
!= E
&& isa
<PHINode
>(I
); ++I
) {
650 auto *PN
= cast
<PHINode
>(I
);
651 if (!isa
<VectorType
>(PN
->getType()))
654 DepChain
*D
= new DepChain();
655 findDepChainFromPHI(PN
, *D
);
657 Dependences
.insert(D
);
661 LLVM_DEBUG(dbgs() << "Found " << Dependences
.size() << " dependences\n");
662 LLVM_DEBUG(for (const DepChain
*D
: Dependences
) dbgs() << *D
<< "\n";);
665 Pass
*llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
666 return new HexagonVectorLoopCarriedReuseLegacyPass();