1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
14 //===----------------------------------------------------------------------===//
16 #include "HexagonVectorLoopCarriedReuse.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/IntrinsicsHexagon.h"
30 #include "llvm/IR/Use.h"
31 #include "llvm/IR/User.h"
32 #include "llvm/IR/Value.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils.h"
51 #define DEBUG_TYPE "hexagon-vlcr"
53 STATISTIC(HexagonNumVectorLoopCarriedReuse
,
54 "Number of values that were reused from a previous iteration.");
56 static cl::opt
<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
58 cl::desc("Maximum distance of loop carried dependences that are handled"),
59 cl::init(2), cl::ZeroOrMore
);
63 void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry
&);
64 Pass
*createHexagonVectorLoopCarriedReuseLegacyPass();
66 } // end namespace llvm
70 // See info about DepChain in the comments at the top of this file.
71 using ChainOfDependences
= SmallVector
<Instruction
*, 4>;
74 ChainOfDependences Chain
;
77 bool isIdentical(DepChain
&Other
) const {
78 if (Other
.size() != size())
80 ChainOfDependences
&OtherChain
= Other
.getChain();
81 for (int i
= 0; i
< size(); ++i
) {
82 if (Chain
[i
] != OtherChain
[i
])
88 ChainOfDependences
&getChain() {
100 void push_back(Instruction
*I
) {
104 int iterations() const {
108 Instruction
*front() const {
109 return Chain
.front();
112 Instruction
*back() const {
116 Instruction
*&operator[](const int index
) {
120 friend raw_ostream
&operator<< (raw_ostream
&OS
, const DepChain
&D
);
123 LLVM_ATTRIBUTE_UNUSED
124 raw_ostream
&operator<<(raw_ostream
&OS
, const DepChain
&D
) {
125 const ChainOfDependences
&CD
= D
.Chain
;
126 int ChainSize
= CD
.size();
127 OS
<< "**DepChain Start::**\n";
128 for (int i
= 0; i
< ChainSize
-1; ++i
) {
129 OS
<< *(CD
[i
]) << " -->\n";
131 OS
<< *CD
[ChainSize
-1] << "\n";
136 Instruction
*Inst2Replace
= nullptr;
138 // In the new PHI node that we'll construct this is the value that'll be
139 // used over the backedge. This is the value that gets reused from a
140 // previous iteration.
141 Instruction
*BackedgeInst
= nullptr;
142 std::map
<Instruction
*, DepChain
*> DepChains
;
145 ReuseValue() = default;
148 Inst2Replace
= nullptr;
149 BackedgeInst
= nullptr;
153 bool isDefined() { return Inst2Replace
!= nullptr; }
156 LLVM_ATTRIBUTE_UNUSED
157 raw_ostream
&operator<<(raw_ostream
&OS
, const ReuseValue
&RU
) {
158 OS
<< "** ReuseValue ***\n";
159 OS
<< "Instruction to Replace: " << *(RU
.Inst2Replace
) << "\n";
160 OS
<< "Backedge Instruction: " << *(RU
.BackedgeInst
) << "\n";
164 class HexagonVectorLoopCarriedReuseLegacyPass
: public LoopPass
{
168 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID
) {
169 PassRegistry
*PR
= PassRegistry::getPassRegistry();
170 initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR
);
173 StringRef
getPassName() const override
{
174 return "Hexagon-specific loop carried reuse for HVX vectors";
177 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
178 AU
.addRequiredID(LoopSimplifyID
);
179 AU
.addRequiredID(LCSSAID
);
180 AU
.addPreservedID(LCSSAID
);
181 AU
.setPreservesCFG();
184 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
) override
;
187 class HexagonVectorLoopCarriedReuse
{
189 HexagonVectorLoopCarriedReuse(Loop
*L
) : CurLoop(L
){};
194 SetVector
<DepChain
*> Dependences
;
195 std::set
<Instruction
*> ReplacedInsts
;
197 ReuseValue ReuseCandidate
;
200 void findLoopCarriedDeps();
201 void findValueToReuse();
202 void findDepChainFromPHI(Instruction
*I
, DepChain
&D
);
204 Value
*findValueInBlock(Value
*Op
, BasicBlock
*BB
);
205 DepChain
*getDepChainBtwn(Instruction
*I1
, Instruction
*I2
, int Iters
);
206 bool isEquivalentOperation(Instruction
*I1
, Instruction
*I2
);
207 bool canReplace(Instruction
*I
);
208 bool isCallInstCommutative(CallInst
*C
);
211 } // end anonymous namespace
213 char HexagonVectorLoopCarriedReuseLegacyPass::ID
= 0;
215 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass
, "hexagon-vlcr",
216 "Hexagon-specific predictive commoning for HVX vectors",
218 INITIALIZE_PASS_DEPENDENCY(LoopSimplify
)
219 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass
)
220 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass
, "hexagon-vlcr",
221 "Hexagon-specific predictive commoning for HVX vectors",
225 HexagonVectorLoopCarriedReusePass::run(Loop
&L
, LoopAnalysisManager
&LAM
,
226 LoopStandardAnalysisResults
&AR
,
228 HexagonVectorLoopCarriedReuse
Vlcr(&L
);
230 return PreservedAnalyses::all();
231 PreservedAnalyses PA
;
232 PA
.preserveSet
<CFGAnalyses
>();
236 bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop
*L
,
237 LPPassManager
&LPM
) {
240 HexagonVectorLoopCarriedReuse
Vlcr(L
);
244 bool HexagonVectorLoopCarriedReuse::run() {
245 if (!CurLoop
->getLoopPreheader())
248 // Work only on innermost loops.
249 if (!CurLoop
->getSubLoops().empty())
252 // Work only on single basic blocks loops.
253 if (CurLoop
->getNumBlocks() != 1)
259 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst
*C
) {
260 switch (C
->getCalledFunction()->getIntrinsicID()) {
261 case Intrinsic::hexagon_V6_vaddb
:
262 case Intrinsic::hexagon_V6_vaddb_128B
:
263 case Intrinsic::hexagon_V6_vaddh
:
264 case Intrinsic::hexagon_V6_vaddh_128B
:
265 case Intrinsic::hexagon_V6_vaddw
:
266 case Intrinsic::hexagon_V6_vaddw_128B
:
267 case Intrinsic::hexagon_V6_vaddubh
:
268 case Intrinsic::hexagon_V6_vaddubh_128B
:
269 case Intrinsic::hexagon_V6_vadduhw
:
270 case Intrinsic::hexagon_V6_vadduhw_128B
:
271 case Intrinsic::hexagon_V6_vaddhw
:
272 case Intrinsic::hexagon_V6_vaddhw_128B
:
273 case Intrinsic::hexagon_V6_vmaxb
:
274 case Intrinsic::hexagon_V6_vmaxb_128B
:
275 case Intrinsic::hexagon_V6_vmaxh
:
276 case Intrinsic::hexagon_V6_vmaxh_128B
:
277 case Intrinsic::hexagon_V6_vmaxw
:
278 case Intrinsic::hexagon_V6_vmaxw_128B
:
279 case Intrinsic::hexagon_V6_vmaxub
:
280 case Intrinsic::hexagon_V6_vmaxub_128B
:
281 case Intrinsic::hexagon_V6_vmaxuh
:
282 case Intrinsic::hexagon_V6_vmaxuh_128B
:
283 case Intrinsic::hexagon_V6_vminub
:
284 case Intrinsic::hexagon_V6_vminub_128B
:
285 case Intrinsic::hexagon_V6_vminuh
:
286 case Intrinsic::hexagon_V6_vminuh_128B
:
287 case Intrinsic::hexagon_V6_vminb
:
288 case Intrinsic::hexagon_V6_vminb_128B
:
289 case Intrinsic::hexagon_V6_vminh
:
290 case Intrinsic::hexagon_V6_vminh_128B
:
291 case Intrinsic::hexagon_V6_vminw
:
292 case Intrinsic::hexagon_V6_vminw_128B
:
293 case Intrinsic::hexagon_V6_vmpyub
:
294 case Intrinsic::hexagon_V6_vmpyub_128B
:
295 case Intrinsic::hexagon_V6_vmpyuh
:
296 case Intrinsic::hexagon_V6_vmpyuh_128B
:
297 case Intrinsic::hexagon_V6_vavgub
:
298 case Intrinsic::hexagon_V6_vavgub_128B
:
299 case Intrinsic::hexagon_V6_vavgh
:
300 case Intrinsic::hexagon_V6_vavgh_128B
:
301 case Intrinsic::hexagon_V6_vavguh
:
302 case Intrinsic::hexagon_V6_vavguh_128B
:
303 case Intrinsic::hexagon_V6_vavgw
:
304 case Intrinsic::hexagon_V6_vavgw_128B
:
305 case Intrinsic::hexagon_V6_vavgb
:
306 case Intrinsic::hexagon_V6_vavgb_128B
:
307 case Intrinsic::hexagon_V6_vavguw
:
308 case Intrinsic::hexagon_V6_vavguw_128B
:
309 case Intrinsic::hexagon_V6_vabsdiffh
:
310 case Intrinsic::hexagon_V6_vabsdiffh_128B
:
311 case Intrinsic::hexagon_V6_vabsdiffub
:
312 case Intrinsic::hexagon_V6_vabsdiffub_128B
:
313 case Intrinsic::hexagon_V6_vabsdiffuh
:
314 case Intrinsic::hexagon_V6_vabsdiffuh_128B
:
315 case Intrinsic::hexagon_V6_vabsdiffw
:
316 case Intrinsic::hexagon_V6_vabsdiffw_128B
:
323 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction
*I1
,
325 if (!I1
->isSameOperationAs(I2
))
327 // This check is in place specifically for intrinsics. isSameOperationAs will
328 // return two for any two hexagon intrinsics because they are essentially the
329 // same instruciton (CallInst). We need to scratch the surface to see if they
330 // are calls to the same function.
331 if (CallInst
*C1
= dyn_cast
<CallInst
>(I1
)) {
332 if (CallInst
*C2
= dyn_cast
<CallInst
>(I2
)) {
333 if (C1
->getCalledFunction() != C2
->getCalledFunction())
338 // If both the Instructions are of Vector Type and any of the element
339 // is integer constant, check their values too for equivalence.
340 if (I1
->getType()->isVectorTy() && I2
->getType()->isVectorTy()) {
341 unsigned NumOperands
= I1
->getNumOperands();
342 for (unsigned i
= 0; i
< NumOperands
; ++i
) {
343 ConstantInt
*C1
= dyn_cast
<ConstantInt
>(I1
->getOperand(i
));
344 ConstantInt
*C2
= dyn_cast
<ConstantInt
>(I2
->getOperand(i
));
347 if (C1
->getSExtValue() != C2
->getSExtValue())
355 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction
*I
) {
356 const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
);
360 switch (II
->getIntrinsicID()) {
361 case Intrinsic::hexagon_V6_hi
:
362 case Intrinsic::hexagon_V6_lo
:
363 case Intrinsic::hexagon_V6_hi_128B
:
364 case Intrinsic::hexagon_V6_lo_128B
:
365 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II
<< "\n");
371 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (auto *D
: Dependences
) {
373 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D
->front()) << "\n");
374 if (D
->iterations() > HexagonVLCRIterationLim
) {
377 << ".. Skipping because number of iterations > than the limit\n");
381 PHINode
*PN
= cast
<PHINode
>(D
->front());
382 Instruction
*BEInst
= D
->back();
383 int Iters
= D
->iterations();
384 BasicBlock
*BB
= PN
->getParent();
385 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
386 << " can be reused\n");
388 SmallVector
<Instruction
*, 4> PNUsers
;
389 for (auto UI
= PN
->use_begin(), E
= PN
->use_end(); UI
!= E
; ++UI
) {
391 Instruction
*User
= cast
<Instruction
>(U
.getUser());
393 if (User
->getParent() != BB
)
395 if (ReplacedInsts
.count(User
)) {
396 LLVM_DEBUG(dbgs() << *User
397 << " has already been replaced. Skipping...\n");
400 if (isa
<PHINode
>(User
))
402 if (User
->mayHaveSideEffects())
404 if (!canReplace(User
))
407 PNUsers
.push_back(User
);
409 LLVM_DEBUG(dbgs() << PNUsers
.size() << " use(s) of the PHI in the block\n");
411 // For each interesting use I of PN, find an Instruction BEUser that
412 // performs the same operation as I on BEInst and whose other operands,
413 // if any, can also be rematerialized in OtherBB. We stop when we find the
414 // first such Instruction BEUser. This is because once BEUser is
415 // rematerialized in OtherBB, we may find more such "fixup" opportunities
416 // in this block. So, we'll start over again.
417 for (Instruction
*I
: PNUsers
) {
418 for (auto UI
= BEInst
->use_begin(), E
= BEInst
->use_end(); UI
!= E
;
421 Instruction
*BEUser
= cast
<Instruction
>(U
.getUser());
423 if (BEUser
->getParent() != BB
)
425 if (!isEquivalentOperation(I
, BEUser
))
428 int NumOperands
= I
->getNumOperands();
430 // Take operands of each PNUser one by one and try to find DepChain
431 // with every operand of the BEUser. If any of the operands of BEUser
432 // has DepChain with current operand of the PNUser, break the matcher
433 // loop. Keep doing this for Every PNUser operand. If PNUser operand
434 // does not have DepChain with any of the BEUser operand, break the
435 // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
436 // This ensures that DepChain exist for all the PNUser operand with
437 // BEUser operand. This also ensures that DepChains are independent of
438 // the positions in PNUser and BEUser.
439 std::map
<Instruction
*, DepChain
*> DepChains
;
440 CallInst
*C1
= dyn_cast
<CallInst
>(I
);
441 if ((I
&& I
->isCommutative()) || (C1
&& isCallInstCommutative(C1
))) {
443 for (int OpNo
= 0; OpNo
< NumOperands
; ++OpNo
) {
444 Value
*Op
= I
->getOperand(OpNo
);
445 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
);
447 for (int T
= 0; T
< NumOperands
; ++T
) {
448 Value
*BEOp
= BEUser
->getOperand(T
);
449 Instruction
*BEOpInst
= dyn_cast
<Instruction
>(BEOp
);
450 if (!OpInst
&& !BEOpInst
) {
457 if ((OpInst
&& !BEOpInst
) || (!OpInst
&& BEOpInst
))
460 DepChain
*D
= getDepChainBtwn(OpInst
, BEOpInst
, Iters
);
464 DepChains
[OpInst
] = D
;
475 for (int OpNo
= 0; OpNo
< NumOperands
; ++OpNo
) {
476 Value
*Op
= I
->getOperand(OpNo
);
477 Value
*BEOp
= BEUser
->getOperand(OpNo
);
479 Instruction
*OpInst
= dyn_cast
<Instruction
>(Op
);
483 // Do not allow reuse to occur when the operands may be different
489 Instruction
*BEOpInst
= dyn_cast
<Instruction
>(BEOp
);
490 DepChain
*D
= getDepChainBtwn(OpInst
, BEOpInst
, Iters
);
493 DepChains
[OpInst
] = D
;
501 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
502 ReuseCandidate
.Inst2Replace
= I
;
503 ReuseCandidate
.BackedgeInst
= BEUser
;
504 ReuseCandidate
.DepChains
= DepChains
;
505 ReuseCandidate
.Iterations
= Iters
;
508 ReuseCandidate
.reset();
512 ReuseCandidate
.reset();
515 Value
*HexagonVectorLoopCarriedReuse::findValueInBlock(Value
*Op
,
517 PHINode
*PN
= dyn_cast
<PHINode
>(Op
);
519 Value
*ValueInBlock
= PN
->getIncomingValueForBlock(BB
);
523 void HexagonVectorLoopCarriedReuse::reuseValue() {
524 LLVM_DEBUG(dbgs() << ReuseCandidate
);
525 Instruction
*Inst2Replace
= ReuseCandidate
.Inst2Replace
;
526 Instruction
*BEInst
= ReuseCandidate
.BackedgeInst
;
527 int NumOperands
= Inst2Replace
->getNumOperands();
528 std::map
<Instruction
*, DepChain
*> &DepChains
= ReuseCandidate
.DepChains
;
529 int Iterations
= ReuseCandidate
.Iterations
;
530 BasicBlock
*LoopPH
= CurLoop
->getLoopPreheader();
531 assert(!DepChains
.empty() && "No DepChains");
532 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
534 SmallVector
<Instruction
*, 4> InstsInPreheader
;
535 for (int i
= 0; i
< Iterations
; ++i
) {
536 Instruction
*InstInPreheader
= Inst2Replace
->clone();
537 SmallVector
<Value
*, 4> Ops
;
538 for (int j
= 0; j
< NumOperands
; ++j
) {
539 Instruction
*I
= dyn_cast
<Instruction
>(Inst2Replace
->getOperand(j
));
542 // Get the DepChain corresponding to this operand.
543 DepChain
&D
= *DepChains
[I
];
544 // Get the PHI for the iteration number and find
545 // the incoming value from the Loop Preheader for
547 Value
*ValInPreheader
= findValueInBlock(D
[i
], LoopPH
);
548 InstInPreheader
->setOperand(j
, ValInPreheader
);
550 InstsInPreheader
.push_back(InstInPreheader
);
551 InstInPreheader
->setName(Inst2Replace
->getName() + ".hexagon.vlcr");
552 InstInPreheader
->insertBefore(LoopPH
->getTerminator());
553 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader
<< " to "
554 << LoopPH
->getName() << "\n");
556 BasicBlock
*BB
= BEInst
->getParent();
558 IRB
.SetInsertPoint(BB
->getFirstNonPHI());
559 Value
*BEVal
= BEInst
;
561 for (int i
= Iterations
-1; i
>=0 ; --i
) {
562 Instruction
*InstInPreheader
= InstsInPreheader
[i
];
563 NewPhi
= IRB
.CreatePHI(InstInPreheader
->getType(), 2);
564 NewPhi
->addIncoming(InstInPreheader
, LoopPH
);
565 NewPhi
->addIncoming(BEVal
, BB
);
566 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi
<< " to " << BB
->getName()
570 // We are in LCSSA form. So, a value defined inside the Loop is used only
571 // inside the loop. So, the following is safe.
572 Inst2Replace
->replaceAllUsesWith(NewPhi
);
573 ReplacedInsts
.insert(Inst2Replace
);
574 ++HexagonNumVectorLoopCarriedReuse
;
577 bool HexagonVectorLoopCarriedReuse::doVLCR() {
578 assert(CurLoop
->getSubLoops().empty() &&
579 "Can do VLCR on the innermost loop only");
580 assert((CurLoop
->getNumBlocks() == 1) &&
581 "Can do VLCR only on single block loops");
583 bool Changed
= false;
586 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop
->getHeader() << "\n");
588 // Reset datastructures.
592 findLoopCarriedDeps();
594 if (ReuseCandidate
.isDefined()) {
599 llvm::for_each(Dependences
, std::default_delete
<DepChain
>());
604 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction
*I
,
606 PHINode
*PN
= dyn_cast
<PHINode
>(I
);
611 auto NumIncomingValues
= PN
->getNumIncomingValues();
612 if (NumIncomingValues
!= 2) {
617 BasicBlock
*BB
= PN
->getParent();
618 if (BB
!= CurLoop
->getHeader()) {
623 Value
*BEVal
= PN
->getIncomingValueForBlock(BB
);
624 Instruction
*BEInst
= dyn_cast
<Instruction
>(BEVal
);
625 // This is a single block loop with a preheader, so at least
626 // one value should come over the backedge.
627 assert(BEInst
&& "There should be a value over the backedge");
630 PN
->getIncomingValueForBlock(CurLoop
->getLoopPreheader());
631 if(!PreHdrVal
|| !isa
<Instruction
>(PreHdrVal
)) {
636 findDepChainFromPHI(BEInst
, D
);
640 DepChain
*HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction
*I1
,
643 for (auto *D
: Dependences
) {
644 if (D
->front() == I1
&& D
->back() == I2
&& D
->iterations() == Iters
)
650 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
651 BasicBlock
*BB
= CurLoop
->getHeader();
652 for (auto I
= BB
->begin(), E
= BB
->end(); I
!= E
&& isa
<PHINode
>(I
); ++I
) {
653 auto *PN
= cast
<PHINode
>(I
);
654 if (!isa
<VectorType
>(PN
->getType()))
657 DepChain
*D
= new DepChain();
658 findDepChainFromPHI(PN
, *D
);
660 Dependences
.insert(D
);
664 LLVM_DEBUG(dbgs() << "Found " << Dependences
.size() << " dependences\n");
665 LLVM_DEBUG(for (size_t i
= 0; i
< Dependences
.size();
666 ++i
) { dbgs() << *Dependences
[i
] << "\n"; });
669 Pass
*llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
670 return new HexagonVectorLoopCarriedReuseLegacyPass();