1 //===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a pass to prepare loops for pre-increment addressing
10 // modes. Additional PHIs are created for loop induction variables used by
11 // load/store instructions so that the pre-increment forms can be used.
12 // Generically, this means transforming loops like this:
13 // for (int i = 0; i < n; ++i)
17 // for (int i = 0; i < n; ++i)
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "ppc-loop-preinc-prep"
24 #include "PPCSubtarget.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/DepthFirstIterator.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/ScalarEvolution.h"
33 #include "llvm/Analysis/ScalarEvolutionExpander.h"
34 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
35 #include "llvm/Transforms/Utils/Local.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Transforms/Scalar.h"
50 #include "llvm/Transforms/Utils.h"
51 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
52 #include "llvm/Transforms/Utils/LoopUtils.h"
59 // By default, we limit this to creating 16 PHIs (which is a little over half
60 // of the allocatable register set).
61 static cl::opt
<unsigned> MaxVars("ppc-preinc-prep-max-vars",
62 cl::Hidden
, cl::init(16),
63 cl::desc("Potential PHI threshold for PPC preinc loop prep"));
65 STATISTIC(PHINodeAlreadyExists
, "PHI node already in pre-increment form");
69 class PPCLoopPreIncPrep
: public FunctionPass
{
71 static char ID
; // Pass ID, replacement for typeid
73 PPCLoopPreIncPrep() : FunctionPass(ID
) {
74 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
77 PPCLoopPreIncPrep(PPCTargetMachine
&TM
) : FunctionPass(ID
), TM(&TM
) {
78 initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
81 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
82 AU
.addPreserved
<DominatorTreeWrapperPass
>();
83 AU
.addRequired
<LoopInfoWrapperPass
>();
84 AU
.addPreserved
<LoopInfoWrapperPass
>();
85 AU
.addRequired
<ScalarEvolutionWrapperPass
>();
88 bool alreadyPrepared(Loop
*L
, Instruction
* MemI
,
89 const SCEV
*BasePtrStartSCEV
,
90 const SCEVConstant
*BasePtrIncSCEV
);
91 bool runOnFunction(Function
&F
) override
;
93 bool runOnLoop(Loop
*L
);
94 void simplifyLoopLatch(Loop
*L
);
95 bool rotateLoop(Loop
*L
);
98 PPCTargetMachine
*TM
= nullptr;
105 } // end anonymous namespace
107 char PPCLoopPreIncPrep::ID
= 0;
108 static const char *name
= "Prepare loop for pre-inc. addressing modes";
109 INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep
, DEBUG_TYPE
, name
, false, false)
110 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
111 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
)
112 INITIALIZE_PASS_END(PPCLoopPreIncPrep
, DEBUG_TYPE
, name
, false, false)
114 FunctionPass
*llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine
&TM
) {
115 return new PPCLoopPreIncPrep(TM
);
120 struct BucketElement
{
121 BucketElement(const SCEVConstant
*O
, Instruction
*I
) : Offset(O
), Instr(I
) {}
122 BucketElement(Instruction
*I
) : Offset(nullptr), Instr(I
) {}
124 const SCEVConstant
*Offset
;
129 Bucket(const SCEV
*B
, Instruction
*I
) : BaseSCEV(B
),
130 Elements(1, BucketElement(I
)) {}
132 const SCEV
*BaseSCEV
;
133 SmallVector
<BucketElement
, 16> Elements
;
136 } // end anonymous namespace
138 static bool IsPtrInBounds(Value
*BasePtr
) {
139 Value
*StrippedBasePtr
= BasePtr
;
140 while (BitCastInst
*BC
= dyn_cast
<BitCastInst
>(StrippedBasePtr
))
141 StrippedBasePtr
= BC
->getOperand(0);
142 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(StrippedBasePtr
))
143 return GEP
->isInBounds();
148 static Value
*GetPointerOperand(Value
*MemI
) {
149 if (LoadInst
*LMemI
= dyn_cast
<LoadInst
>(MemI
)) {
150 return LMemI
->getPointerOperand();
151 } else if (StoreInst
*SMemI
= dyn_cast
<StoreInst
>(MemI
)) {
152 return SMemI
->getPointerOperand();
153 } else if (IntrinsicInst
*IMemI
= dyn_cast
<IntrinsicInst
>(MemI
)) {
154 if (IMemI
->getIntrinsicID() == Intrinsic::prefetch
)
155 return IMemI
->getArgOperand(0);
161 bool PPCLoopPreIncPrep::runOnFunction(Function
&F
) {
165 LI
= &getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
166 SE
= &getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
167 auto *DTWP
= getAnalysisIfAvailable
<DominatorTreeWrapperPass
>();
168 DT
= DTWP
? &DTWP
->getDomTree() : nullptr;
169 PreserveLCSSA
= mustPreserveAnalysisID(LCSSAID
);
171 bool MadeChange
= false;
173 for (auto I
= LI
->begin(), IE
= LI
->end(); I
!= IE
; ++I
)
174 for (auto L
= df_begin(*I
), LE
= df_end(*I
); L
!= LE
; ++L
)
175 MadeChange
|= runOnLoop(*L
);
180 // In order to prepare for the pre-increment a PHI is added.
181 // This function will check to see if that PHI already exists and will return
182 // true if it found an existing PHI with the same start and increment as the
183 // one we wanted to create.
184 bool PPCLoopPreIncPrep::alreadyPrepared(Loop
*L
, Instruction
* MemI
,
185 const SCEV
*BasePtrStartSCEV
,
186 const SCEVConstant
*BasePtrIncSCEV
) {
187 BasicBlock
*BB
= MemI
->getParent();
191 BasicBlock
*PredBB
= L
->getLoopPredecessor();
192 BasicBlock
*LatchBB
= L
->getLoopLatch();
194 if (!PredBB
|| !LatchBB
)
197 // Run through the PHIs and see if we have some that looks like a preparation
198 iterator_range
<BasicBlock::phi_iterator
> PHIIter
= BB
->phis();
199 for (auto & CurrentPHI
: PHIIter
) {
200 PHINode
*CurrentPHINode
= dyn_cast
<PHINode
>(&CurrentPHI
);
204 if (!SE
->isSCEVable(CurrentPHINode
->getType()))
207 const SCEV
*PHISCEV
= SE
->getSCEVAtScope(CurrentPHINode
, L
);
209 const SCEVAddRecExpr
*PHIBasePtrSCEV
= dyn_cast
<SCEVAddRecExpr
>(PHISCEV
);
213 const SCEVConstant
*PHIBasePtrIncSCEV
=
214 dyn_cast
<SCEVConstant
>(PHIBasePtrSCEV
->getStepRecurrence(*SE
));
215 if (!PHIBasePtrIncSCEV
)
218 if (CurrentPHINode
->getNumIncomingValues() == 2) {
219 if ( (CurrentPHINode
->getIncomingBlock(0) == LatchBB
&&
220 CurrentPHINode
->getIncomingBlock(1) == PredBB
) ||
221 (CurrentPHINode
->getIncomingBlock(1) == LatchBB
&&
222 CurrentPHINode
->getIncomingBlock(0) == PredBB
) ) {
223 if (PHIBasePtrSCEV
->getStart() == BasePtrStartSCEV
&&
224 PHIBasePtrIncSCEV
== BasePtrIncSCEV
) {
225 // The existing PHI (CurrentPHINode) has the same start and increment
226 // as the PHI that we wanted to create.
227 ++PHINodeAlreadyExists
;
236 bool PPCLoopPreIncPrep::runOnLoop(Loop
*L
) {
237 bool MadeChange
= false;
239 // Only prep. the inner-most loop
243 LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L
<< "\n");
245 BasicBlock
*Header
= L
->getHeader();
247 const PPCSubtarget
*ST
=
248 TM
? TM
->getSubtargetImpl(*Header
->getParent()) : nullptr;
250 unsigned HeaderLoopPredCount
= pred_size(Header
);
252 // Collect buckets of comparable addresses used by loads and stores.
253 SmallVector
<Bucket
, 16> Buckets
;
254 for (Loop::block_iterator I
= L
->block_begin(), IE
= L
->block_end();
256 for (BasicBlock::iterator J
= (*I
)->begin(), JE
= (*I
)->end();
261 if (LoadInst
*LMemI
= dyn_cast
<LoadInst
>(J
)) {
263 PtrValue
= LMemI
->getPointerOperand();
264 } else if (StoreInst
*SMemI
= dyn_cast
<StoreInst
>(J
)) {
266 PtrValue
= SMemI
->getPointerOperand();
267 } else if (IntrinsicInst
*IMemI
= dyn_cast
<IntrinsicInst
>(J
)) {
268 if (IMemI
->getIntrinsicID() == Intrinsic::prefetch
) {
270 PtrValue
= IMemI
->getArgOperand(0);
274 unsigned PtrAddrSpace
= PtrValue
->getType()->getPointerAddressSpace();
278 // There are no update forms for Altivec vector load/stores.
279 if (ST
&& ST
->hasAltivec() &&
280 PtrValue
->getType()->getPointerElementType()->isVectorTy())
283 if (L
->isLoopInvariant(PtrValue
))
286 const SCEV
*LSCEV
= SE
->getSCEVAtScope(PtrValue
, L
);
287 if (const SCEVAddRecExpr
*LARSCEV
= dyn_cast
<SCEVAddRecExpr
>(LSCEV
)) {
288 if (LARSCEV
->getLoop() != L
)
290 // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
291 // be 4's multiple (DS-form). For i64 loads/stores when the displacement
292 // fits in a 16-bit signed field but isn't a multiple of 4, it will be
293 // useless and possible to break some original well-form addressing mode
294 // to make this pre-inc prep for it.
295 if (PtrValue
->getType()->getPointerElementType()->isIntegerTy(64)) {
296 if (const SCEVConstant
*StepConst
=
297 dyn_cast
<SCEVConstant
>(LARSCEV
->getStepRecurrence(*SE
))) {
298 const APInt
&ConstInt
= StepConst
->getValue()->getValue();
299 if (ConstInt
.isSignedIntN(16) && ConstInt
.srem(4) != 0)
307 bool FoundBucket
= false;
308 for (auto &B
: Buckets
) {
309 const SCEV
*Diff
= SE
->getMinusSCEV(LSCEV
, B
.BaseSCEV
);
310 if (const auto *CDiff
= dyn_cast
<SCEVConstant
>(Diff
)) {
311 B
.Elements
.push_back(BucketElement(CDiff
, MemI
));
318 if (Buckets
.size() == MaxVars
)
320 Buckets
.push_back(Bucket(LSCEV
, MemI
));
328 BasicBlock
*LoopPredecessor
= L
->getLoopPredecessor();
329 // If there is no loop predecessor, or the loop predecessor's terminator
330 // returns a value (which might contribute to determining the loop's
331 // iteration space), insert a new preheader for the loop.
332 if (!LoopPredecessor
||
333 !LoopPredecessor
->getTerminator()->getType()->isVoidTy()) {
334 LoopPredecessor
= InsertPreheaderForLoop(L
, DT
, LI
, nullptr, PreserveLCSSA
);
338 if (!LoopPredecessor
)
341 LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets
.size() << " buckets\n");
343 SmallSet
<BasicBlock
*, 16> BBChanged
;
344 for (unsigned i
= 0, e
= Buckets
.size(); i
!= e
; ++i
) {
345 // The base address of each bucket is transformed into a phi and the others
346 // are rewritten as offsets of that variable.
348 // We have a choice now of which instruction's memory operand we use as the
349 // base for the generated PHI. Always picking the first instruction in each
350 // bucket does not work well, specifically because that instruction might
351 // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
352 // the choice is somewhat arbitrary, because the backend will happily
353 // generate direct offsets from both the pre-incremented and
354 // post-incremented pointer values. Thus, we'll pick the first non-prefetch
355 // instruction in each bucket, and adjust the recurrence and other offsets
357 for (int j
= 0, je
= Buckets
[i
].Elements
.size(); j
!= je
; ++j
) {
358 if (auto *II
= dyn_cast
<IntrinsicInst
>(Buckets
[i
].Elements
[j
].Instr
))
359 if (II
->getIntrinsicID() == Intrinsic::prefetch
)
362 // If we'd otherwise pick the first element anyway, there's nothing to do.
366 // If our chosen element has no offset from the base pointer, there's
368 if (!Buckets
[i
].Elements
[j
].Offset
||
369 Buckets
[i
].Elements
[j
].Offset
->isZero())
372 const SCEV
*Offset
= Buckets
[i
].Elements
[j
].Offset
;
373 Buckets
[i
].BaseSCEV
= SE
->getAddExpr(Buckets
[i
].BaseSCEV
, Offset
);
374 for (auto &E
: Buckets
[i
].Elements
) {
376 E
.Offset
= cast
<SCEVConstant
>(SE
->getMinusSCEV(E
.Offset
, Offset
));
378 E
.Offset
= cast
<SCEVConstant
>(SE
->getNegativeSCEV(Offset
));
381 std::swap(Buckets
[i
].Elements
[j
], Buckets
[i
].Elements
[0]);
385 const SCEVAddRecExpr
*BasePtrSCEV
=
386 cast
<SCEVAddRecExpr
>(Buckets
[i
].BaseSCEV
);
387 if (!BasePtrSCEV
->isAffine())
390 LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV
<< "\n");
391 assert(BasePtrSCEV
->getLoop() == L
&&
392 "AddRec for the wrong loop?");
394 // The instruction corresponding to the Bucket's BaseSCEV must be the first
395 // in the vector of elements.
396 Instruction
*MemI
= Buckets
[i
].Elements
.begin()->Instr
;
397 Value
*BasePtr
= GetPointerOperand(MemI
);
398 assert(BasePtr
&& "No pointer operand");
400 Type
*I8Ty
= Type::getInt8Ty(MemI
->getParent()->getContext());
401 Type
*I8PtrTy
= Type::getInt8PtrTy(MemI
->getParent()->getContext(),
402 BasePtr
->getType()->getPointerAddressSpace());
404 const SCEV
*BasePtrStartSCEV
= BasePtrSCEV
->getStart();
405 if (!SE
->isLoopInvariant(BasePtrStartSCEV
, L
))
408 const SCEVConstant
*BasePtrIncSCEV
=
409 dyn_cast
<SCEVConstant
>(BasePtrSCEV
->getStepRecurrence(*SE
));
412 BasePtrStartSCEV
= SE
->getMinusSCEV(BasePtrStartSCEV
, BasePtrIncSCEV
);
413 if (!isSafeToExpand(BasePtrStartSCEV
, *SE
))
416 LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV
<< "\n");
418 if (alreadyPrepared(L
, MemI
, BasePtrStartSCEV
, BasePtrIncSCEV
))
421 PHINode
*NewPHI
= PHINode::Create(I8PtrTy
, HeaderLoopPredCount
,
422 MemI
->hasName() ? MemI
->getName() + ".phi" : "",
423 Header
->getFirstNonPHI());
425 SCEVExpander
SCEVE(*SE
, Header
->getModule()->getDataLayout(), "pistart");
426 Value
*BasePtrStart
= SCEVE
.expandCodeFor(BasePtrStartSCEV
, I8PtrTy
,
427 LoopPredecessor
->getTerminator());
429 // Note that LoopPredecessor might occur in the predecessor list multiple
430 // times, and we need to add it the right number of times.
431 for (pred_iterator PI
= pred_begin(Header
), PE
= pred_end(Header
);
433 if (*PI
!= LoopPredecessor
)
436 NewPHI
->addIncoming(BasePtrStart
, LoopPredecessor
);
439 Instruction
*InsPoint
= &*Header
->getFirstInsertionPt();
440 GetElementPtrInst
*PtrInc
= GetElementPtrInst::Create(
441 I8Ty
, NewPHI
, BasePtrIncSCEV
->getValue(),
442 MemI
->hasName() ? MemI
->getName() + ".inc" : "", InsPoint
);
443 PtrInc
->setIsInBounds(IsPtrInBounds(BasePtr
));
444 for (pred_iterator PI
= pred_begin(Header
), PE
= pred_end(Header
);
446 if (*PI
== LoopPredecessor
)
449 NewPHI
->addIncoming(PtrInc
, *PI
);
452 Instruction
*NewBasePtr
;
453 if (PtrInc
->getType() != BasePtr
->getType())
454 NewBasePtr
= new BitCastInst(PtrInc
, BasePtr
->getType(),
455 PtrInc
->hasName() ? PtrInc
->getName() + ".cast" : "", InsPoint
);
459 if (Instruction
*IDel
= dyn_cast
<Instruction
>(BasePtr
))
460 BBChanged
.insert(IDel
->getParent());
461 BasePtr
->replaceAllUsesWith(NewBasePtr
);
462 RecursivelyDeleteTriviallyDeadInstructions(BasePtr
);
464 // Keep track of the replacement pointer values we've inserted so that we
465 // don't generate more pointer values than necessary.
466 SmallPtrSet
<Value
*, 16> NewPtrs
;
467 NewPtrs
.insert( NewBasePtr
);
469 for (auto I
= std::next(Buckets
[i
].Elements
.begin()),
470 IE
= Buckets
[i
].Elements
.end(); I
!= IE
; ++I
) {
471 Value
*Ptr
= GetPointerOperand(I
->Instr
);
472 assert(Ptr
&& "No pointer operand");
473 if (NewPtrs
.count(Ptr
))
476 Instruction
*RealNewPtr
;
477 if (!I
->Offset
|| I
->Offset
->getValue()->isZero()) {
478 RealNewPtr
= NewBasePtr
;
480 Instruction
*PtrIP
= dyn_cast
<Instruction
>(Ptr
);
481 if (PtrIP
&& isa
<Instruction
>(NewBasePtr
) &&
482 cast
<Instruction
>(NewBasePtr
)->getParent() == PtrIP
->getParent())
484 else if (isa
<PHINode
>(PtrIP
))
485 PtrIP
= &*PtrIP
->getParent()->getFirstInsertionPt();
489 GetElementPtrInst
*NewPtr
= GetElementPtrInst::Create(
490 I8Ty
, PtrInc
, I
->Offset
->getValue(),
491 I
->Instr
->hasName() ? I
->Instr
->getName() + ".off" : "", PtrIP
);
493 NewPtr
->insertAfter(cast
<Instruction
>(PtrInc
));
494 NewPtr
->setIsInBounds(IsPtrInBounds(Ptr
));
498 if (Instruction
*IDel
= dyn_cast
<Instruction
>(Ptr
))
499 BBChanged
.insert(IDel
->getParent());
501 Instruction
*ReplNewPtr
;
502 if (Ptr
->getType() != RealNewPtr
->getType()) {
503 ReplNewPtr
= new BitCastInst(RealNewPtr
, Ptr
->getType(),
504 Ptr
->hasName() ? Ptr
->getName() + ".cast" : "");
505 ReplNewPtr
->insertAfter(RealNewPtr
);
507 ReplNewPtr
= RealNewPtr
;
509 Ptr
->replaceAllUsesWith(ReplNewPtr
);
510 RecursivelyDeleteTriviallyDeadInstructions(Ptr
);
512 NewPtrs
.insert(RealNewPtr
);
518 for (Loop::block_iterator I
= L
->block_begin(), IE
= L
->block_end();
520 if (BBChanged
.count(*I
))