1 //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a pass to prepare loops for ppc preferred addressing
10 // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with
12 // Additional PHIs are created for loop induction variables used by load/store
13 // instructions so that preferred addressing modes can be used.
15 // 1: DS/DQ form preparation, prepare the load/store instructions so that they
16 // can satisfy the DS/DQ form displacement requirements.
17 // Generically, this means transforming loops like this:
18 // for (int i = 0; i < n; ++i) {
19 // unsigned long x1 = *(unsigned long *)(p + i + 5);
20 // unsigned long x2 = *(unsigned long *)(p + i + 9);
25 // unsigned NewP = p + 5;
26 // for (int i = 0; i < n; ++i) {
27 // unsigned long x1 = *(unsigned long *)(i + NewP);
28 // unsigned long x2 = *(unsigned long *)(i + NewP + 4);
31 // 2: D/DS form with update preparation, prepare the load/store instructions so
32 // that we can use update form to do pre-increment.
33 // Generically, this means transforming loops like this:
34 // for (int i = 0; i < n; ++i)
40 // for (int i = 0; i < n; ++i)
42 //===----------------------------------------------------------------------===//
45 #include "PPCSubtarget.h"
46 #include "PPCTargetMachine.h"
47 #include "llvm/ADT/DepthFirstIterator.h"
48 #include "llvm/ADT/SmallPtrSet.h"
49 #include "llvm/ADT/SmallSet.h"
50 #include "llvm/ADT/SmallVector.h"
51 #include "llvm/ADT/Statistic.h"
52 #include "llvm/Analysis/LoopInfo.h"
53 #include "llvm/Analysis/ScalarEvolution.h"
54 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
55 #include "llvm/IR/BasicBlock.h"
56 #include "llvm/IR/CFG.h"
57 #include "llvm/IR/Dominators.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/IntrinsicsPowerPC.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Type.h"
64 #include "llvm/IR/Value.h"
65 #include "llvm/InitializePasses.h"
66 #include "llvm/Pass.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Utils.h"
72 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
73 #include "llvm/Transforms/Utils/Local.h"
74 #include "llvm/Transforms/Utils/LoopUtils.h"
75 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80 #define DEBUG_TYPE "ppc-loop-instr-form-prep"
84 static cl::opt
<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
85 cl::Hidden
, cl::init(24),
86 cl::desc("Potential common base number threshold per function for PPC loop "
89 static cl::opt
<bool> PreferUpdateForm("ppc-formprep-prefer-update",
90 cl::init(true), cl::Hidden
,
91 cl::desc("prefer update form when ds form is also a update form"));
93 // Sum of following 3 per loop thresholds for all loops can not be larger
95 // now the thresholds for each kind prep are exterimental values on Power9.
96 static cl::opt
<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",
97 cl::Hidden
, cl::init(3),
98 cl::desc("Potential PHI threshold per loop for PPC loop prep of update "
101 static cl::opt
<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",
102 cl::Hidden
, cl::init(3),
103 cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));
105 static cl::opt
<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
106 cl::Hidden
, cl::init(8),
107 cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
110 // If would not be profitable if the common base has only one load/store, ISEL
111 // should already be able to choose best load/store form based on offset for
112 // single load/store. Set minimal profitable value default to 2 and make it as
114 static cl::opt
<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
115 cl::Hidden
, cl::init(2),
116 cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
119 STATISTIC(PHINodeAlreadyExistsUpdate
, "PHI node already in pre-increment form");
120 STATISTIC(PHINodeAlreadyExistsDS
, "PHI node already in DS form");
121 STATISTIC(PHINodeAlreadyExistsDQ
, "PHI node already in DQ form");
122 STATISTIC(DSFormChainRewritten
, "Num of DS form chain rewritten");
123 STATISTIC(DQFormChainRewritten
, "Num of DQ form chain rewritten");
124 STATISTIC(UpdFormChainRewritten
, "Num of update form chain rewritten");
127 struct BucketElement
{
128 BucketElement(const SCEVConstant
*O
, Instruction
*I
) : Offset(O
), Instr(I
) {}
129 BucketElement(Instruction
*I
) : Offset(nullptr), Instr(I
) {}
131 const SCEVConstant
*Offset
;
136 Bucket(const SCEV
*B
, Instruction
*I
) : BaseSCEV(B
),
137 Elements(1, BucketElement(I
)) {}
139 const SCEV
*BaseSCEV
;
140 SmallVector
<BucketElement
, 16> Elements
;
143 // "UpdateForm" is not a real PPC instruction form, it stands for dform
144 // load/store with update like ldu/stdu, or Prefetch intrinsic.
145 // For DS form instructions, their displacements must be multiple of 4.
146 // For DQ form instructions, their displacements must be multiple of 16.
147 enum InstrForm
{ UpdateForm
= 1, DSForm
= 4, DQForm
= 16 };
149 class PPCLoopInstrFormPrep
: public FunctionPass
{
151 static char ID
; // Pass ID, replacement for typeid
153 PPCLoopInstrFormPrep() : FunctionPass(ID
) {
154 initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
157 PPCLoopInstrFormPrep(PPCTargetMachine
&TM
) : FunctionPass(ID
), TM(&TM
) {
158 initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());
161 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
162 AU
.addPreserved
<DominatorTreeWrapperPass
>();
163 AU
.addRequired
<LoopInfoWrapperPass
>();
164 AU
.addPreserved
<LoopInfoWrapperPass
>();
165 AU
.addRequired
<ScalarEvolutionWrapperPass
>();
168 bool runOnFunction(Function
&F
) override
;
171 PPCTargetMachine
*TM
= nullptr;
172 const PPCSubtarget
*ST
;
178 /// Successful preparation number for Update/DS/DQ form in all inner most
179 /// loops. One successful preparation will put one common base out of loop,
180 /// this may leads to register presure like LICM does.
181 /// Make sure total preparation number can be controlled by option.
182 unsigned SuccPrepCount
;
184 bool runOnLoop(Loop
*L
);
186 /// Check if required PHI node is already exist in Loop \p L.
187 bool alreadyPrepared(Loop
*L
, Instruction
* MemI
,
188 const SCEV
*BasePtrStartSCEV
,
189 const SCEVConstant
*BasePtrIncSCEV
,
192 /// Collect condition matched(\p isValidCandidate() returns true)
193 /// candidates in Loop \p L.
194 SmallVector
<Bucket
, 16> collectCandidates(
196 std::function
<bool(const Instruction
*, const Value
*, const Type
*)>
198 unsigned MaxCandidateNum
);
200 /// Add a candidate to candidates \p Buckets.
201 void addOneCandidate(Instruction
*MemI
, const SCEV
*LSCEV
,
202 SmallVector
<Bucket
, 16> &Buckets
,
203 unsigned MaxCandidateNum
);
205 /// Prepare all candidates in \p Buckets for update form.
206 bool updateFormPrep(Loop
*L
, SmallVector
<Bucket
, 16> &Buckets
);
208 /// Prepare all candidates in \p Buckets for displacement form, now for
210 bool dispFormPrep(Loop
*L
, SmallVector
<Bucket
, 16> &Buckets
,
213 /// Prepare for one chain \p BucketChain, find the best base element and
214 /// update all other elements in \p BucketChain accordingly.
215 /// \p Form is used to find the best base element.
216 /// If success, best base element must be stored as the first element of
218 /// Return false if no base element found, otherwise return true.
219 bool prepareBaseForDispFormChain(Bucket
&BucketChain
,
222 /// Prepare for one chain \p BucketChain, find the best base element and
223 /// update all other elements in \p BucketChain accordingly.
224 /// If success, best base element must be stored as the first element of
226 /// Return false if no base element found, otherwise return true.
227 bool prepareBaseForUpdateFormChain(Bucket
&BucketChain
);
229 /// Rewrite load/store instructions in \p BucketChain according to
231 bool rewriteLoadStores(Loop
*L
, Bucket
&BucketChain
,
232 SmallSet
<BasicBlock
*, 16> &BBChanged
,
236 } // end anonymous namespace
238 char PPCLoopInstrFormPrep::ID
= 0;
239 static const char *name
= "Prepare loop for ppc preferred instruction forms";
240 INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep
, DEBUG_TYPE
, name
, false, false)
241 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
242 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass
)
243 INITIALIZE_PASS_END(PPCLoopInstrFormPrep
, DEBUG_TYPE
, name
, false, false)
245 static constexpr StringRef PHINodeNameSuffix
= ".phi";
246 static constexpr StringRef CastNodeNameSuffix
= ".cast";
247 static constexpr StringRef GEPNodeIncNameSuffix
= ".inc";
248 static constexpr StringRef GEPNodeOffNameSuffix
= ".off";
250 FunctionPass
*llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine
&TM
) {
251 return new PPCLoopInstrFormPrep(TM
);
254 static bool IsPtrInBounds(Value
*BasePtr
) {
255 Value
*StrippedBasePtr
= BasePtr
;
256 while (BitCastInst
*BC
= dyn_cast
<BitCastInst
>(StrippedBasePtr
))
257 StrippedBasePtr
= BC
->getOperand(0);
258 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(StrippedBasePtr
))
259 return GEP
->isInBounds();
264 static std::string
getInstrName(const Value
*I
, StringRef Suffix
) {
265 assert(I
&& "Invalid paramater!");
267 return (I
->getName() + Suffix
).str();
272 static Value
*GetPointerOperand(Value
*MemI
) {
273 if (LoadInst
*LMemI
= dyn_cast
<LoadInst
>(MemI
)) {
274 return LMemI
->getPointerOperand();
275 } else if (StoreInst
*SMemI
= dyn_cast
<StoreInst
>(MemI
)) {
276 return SMemI
->getPointerOperand();
277 } else if (IntrinsicInst
*IMemI
= dyn_cast
<IntrinsicInst
>(MemI
)) {
278 if (IMemI
->getIntrinsicID() == Intrinsic::prefetch
||
279 IMemI
->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp
)
280 return IMemI
->getArgOperand(0);
281 if (IMemI
->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp
)
282 return IMemI
->getArgOperand(1);
288 bool PPCLoopInstrFormPrep::runOnFunction(Function
&F
) {
292 LI
= &getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
293 SE
= &getAnalysis
<ScalarEvolutionWrapperPass
>().getSE();
294 auto *DTWP
= getAnalysisIfAvailable
<DominatorTreeWrapperPass
>();
295 DT
= DTWP
? &DTWP
->getDomTree() : nullptr;
296 PreserveLCSSA
= mustPreserveAnalysisID(LCSSAID
);
297 ST
= TM
? TM
->getSubtargetImpl(F
) : nullptr;
300 bool MadeChange
= false;
302 for (auto I
= LI
->begin(), IE
= LI
->end(); I
!= IE
; ++I
)
303 for (auto L
= df_begin(*I
), LE
= df_end(*I
); L
!= LE
; ++L
)
304 MadeChange
|= runOnLoop(*L
);
309 void PPCLoopInstrFormPrep::addOneCandidate(Instruction
*MemI
, const SCEV
*LSCEV
,
310 SmallVector
<Bucket
, 16> &Buckets
,
311 unsigned MaxCandidateNum
) {
312 assert((MemI
&& GetPointerOperand(MemI
)) &&
313 "Candidate should be a memory instruction.");
314 assert(LSCEV
&& "Invalid SCEV for Ptr value.");
315 bool FoundBucket
= false;
316 for (auto &B
: Buckets
) {
317 const SCEV
*Diff
= SE
->getMinusSCEV(LSCEV
, B
.BaseSCEV
);
318 if (const auto *CDiff
= dyn_cast
<SCEVConstant
>(Diff
)) {
319 B
.Elements
.push_back(BucketElement(CDiff
, MemI
));
326 if (Buckets
.size() == MaxCandidateNum
)
328 Buckets
.push_back(Bucket(LSCEV
, MemI
));
332 SmallVector
<Bucket
, 16> PPCLoopInstrFormPrep::collectCandidates(
334 std::function
<bool(const Instruction
*, const Value
*, const Type
*)>
336 unsigned MaxCandidateNum
) {
337 SmallVector
<Bucket
, 16> Buckets
;
338 for (const auto &BB
: L
->blocks())
339 for (auto &J
: *BB
) {
341 Type
*PointerElementType
;
343 if (LoadInst
*LMemI
= dyn_cast
<LoadInst
>(&J
)) {
344 PtrValue
= LMemI
->getPointerOperand();
345 PointerElementType
= LMemI
->getType();
346 } else if (StoreInst
*SMemI
= dyn_cast
<StoreInst
>(&J
)) {
347 PtrValue
= SMemI
->getPointerOperand();
348 PointerElementType
= SMemI
->getValueOperand()->getType();
349 } else if (IntrinsicInst
*IMemI
= dyn_cast
<IntrinsicInst
>(&J
)) {
350 PointerElementType
= Type::getInt8Ty(J
.getContext());
351 if (IMemI
->getIntrinsicID() == Intrinsic::prefetch
||
352 IMemI
->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp
) {
353 PtrValue
= IMemI
->getArgOperand(0);
354 } else if (IMemI
->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp
) {
355 PtrValue
= IMemI
->getArgOperand(1);
359 unsigned PtrAddrSpace
= PtrValue
->getType()->getPointerAddressSpace();
363 if (L
->isLoopInvariant(PtrValue
))
366 const SCEV
*LSCEV
= SE
->getSCEVAtScope(PtrValue
, L
);
367 const SCEVAddRecExpr
*LARSCEV
= dyn_cast
<SCEVAddRecExpr
>(LSCEV
);
368 if (!LARSCEV
|| LARSCEV
->getLoop() != L
)
371 if (isValidCandidate(&J
, PtrValue
, PointerElementType
))
372 addOneCandidate(&J
, LSCEV
, Buckets
, MaxCandidateNum
);
377 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket
&BucketChain
,
379 // RemainderOffsetInfo details:
380 // key: value of (Offset urem DispConstraint). For DSForm, it can
382 // first of pair: the index of first BucketElement whose remainder is equal
383 // to key. For key 0, this value must be 0.
384 // second of pair: number of load/stores with the same remainder.
385 DenseMap
<unsigned, std::pair
<unsigned, unsigned>> RemainderOffsetInfo
;
387 for (unsigned j
= 0, je
= BucketChain
.Elements
.size(); j
!= je
; ++j
) {
388 if (!BucketChain
.Elements
[j
].Offset
)
389 RemainderOffsetInfo
[0] = std::make_pair(0, 1);
392 BucketChain
.Elements
[j
].Offset
->getAPInt().urem(Form
);
393 if (RemainderOffsetInfo
.find(Remainder
) == RemainderOffsetInfo
.end())
394 RemainderOffsetInfo
[Remainder
] = std::make_pair(j
, 1);
396 RemainderOffsetInfo
[Remainder
].second
++;
399 // Currently we choose the most profitable base as the one which has the max
400 // number of load/store with same remainder.
401 // FIXME: adjust the base selection strategy according to load/store offset
403 // For example, if we have one candidate chain for DS form preparation, which
404 // contains following load/stores with different remainders:
405 // 1: 10 load/store whose remainder is 1;
406 // 2: 9 load/store whose remainder is 2;
407 // 3: 1 for remainder 3 and 0 for remainder 0;
408 // Now we will choose the first load/store whose remainder is 1 as base and
409 // adjust all other load/stores according to new base, so we will get 10 DS
410 // form and 10 X form.
411 // But we should be more clever, for this case we could use two bases, one for
412 // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
414 unsigned MaxCountRemainder
= 0;
415 for (unsigned j
= 0; j
< (unsigned)Form
; j
++)
416 if ((RemainderOffsetInfo
.find(j
) != RemainderOffsetInfo
.end()) &&
417 RemainderOffsetInfo
[j
].second
>
418 RemainderOffsetInfo
[MaxCountRemainder
].second
)
419 MaxCountRemainder
= j
;
421 // Abort when there are too few insts with common base.
422 if (RemainderOffsetInfo
[MaxCountRemainder
].second
< DispFormPrepMinThreshold
)
425 // If the first value is most profitable, no needed to adjust BucketChain
426 // elements as they are substracted the first value when collecting.
427 if (MaxCountRemainder
== 0)
430 // Adjust load/store to the new chosen base.
432 BucketChain
.Elements
[RemainderOffsetInfo
[MaxCountRemainder
].first
].Offset
;
433 BucketChain
.BaseSCEV
= SE
->getAddExpr(BucketChain
.BaseSCEV
, Offset
);
434 for (auto &E
: BucketChain
.Elements
) {
436 E
.Offset
= cast
<SCEVConstant
>(SE
->getMinusSCEV(E
.Offset
, Offset
));
438 E
.Offset
= cast
<SCEVConstant
>(SE
->getNegativeSCEV(Offset
));
441 std::swap(BucketChain
.Elements
[RemainderOffsetInfo
[MaxCountRemainder
].first
],
442 BucketChain
.Elements
[0]);
446 // FIXME: implement a more clever base choosing policy.
447 // Currently we always choose an exist load/store offset. This maybe lead to
448 // suboptimal code sequences. For example, for one DS chain with offsets
449 // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp
450 // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a
451 // multipler of 4, it cannot be represented by sint16.
452 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket
&BucketChain
) {
453 // We have a choice now of which instruction's memory operand we use as the
454 // base for the generated PHI. Always picking the first instruction in each
455 // bucket does not work well, specifically because that instruction might
456 // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
457 // the choice is somewhat arbitrary, because the backend will happily
458 // generate direct offsets from both the pre-incremented and
459 // post-incremented pointer values. Thus, we'll pick the first non-prefetch
460 // instruction in each bucket, and adjust the recurrence and other offsets
462 for (int j
= 0, je
= BucketChain
.Elements
.size(); j
!= je
; ++j
) {
463 if (auto *II
= dyn_cast
<IntrinsicInst
>(BucketChain
.Elements
[j
].Instr
))
464 if (II
->getIntrinsicID() == Intrinsic::prefetch
)
467 // If we'd otherwise pick the first element anyway, there's nothing to do.
471 // If our chosen element has no offset from the base pointer, there's
473 if (!BucketChain
.Elements
[j
].Offset
||
474 BucketChain
.Elements
[j
].Offset
->isZero())
477 const SCEV
*Offset
= BucketChain
.Elements
[j
].Offset
;
478 BucketChain
.BaseSCEV
= SE
->getAddExpr(BucketChain
.BaseSCEV
, Offset
);
479 for (auto &E
: BucketChain
.Elements
) {
481 E
.Offset
= cast
<SCEVConstant
>(SE
->getMinusSCEV(E
.Offset
, Offset
));
483 E
.Offset
= cast
<SCEVConstant
>(SE
->getNegativeSCEV(Offset
));
486 std::swap(BucketChain
.Elements
[j
], BucketChain
.Elements
[0]);
492 bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop
*L
, Bucket
&BucketChain
,
493 SmallSet
<BasicBlock
*, 16> &BBChanged
,
495 bool MadeChange
= false;
496 const SCEVAddRecExpr
*BasePtrSCEV
=
497 cast
<SCEVAddRecExpr
>(BucketChain
.BaseSCEV
);
498 if (!BasePtrSCEV
->isAffine())
501 LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV
<< "\n");
503 assert(BasePtrSCEV
->getLoop() == L
&& "AddRec for the wrong loop?");
505 // The instruction corresponding to the Bucket's BaseSCEV must be the first
506 // in the vector of elements.
507 Instruction
*MemI
= BucketChain
.Elements
.begin()->Instr
;
508 Value
*BasePtr
= GetPointerOperand(MemI
);
509 assert(BasePtr
&& "No pointer operand");
511 Type
*I8Ty
= Type::getInt8Ty(MemI
->getParent()->getContext());
512 Type
*I8PtrTy
= Type::getInt8PtrTy(MemI
->getParent()->getContext(),
513 BasePtr
->getType()->getPointerAddressSpace());
515 if (!SE
->isLoopInvariant(BasePtrSCEV
->getStart(), L
))
518 const SCEVConstant
*BasePtrIncSCEV
=
519 dyn_cast
<SCEVConstant
>(BasePtrSCEV
->getStepRecurrence(*SE
));
523 // For some DS form load/store instructions, it can also be an update form,
524 // if the stride is a multipler of 4. Use update form if prefer it.
525 bool CanPreInc
= (Form
== UpdateForm
||
526 ((Form
== DSForm
) && !BasePtrIncSCEV
->getAPInt().urem(4) &&
528 const SCEV
*BasePtrStartSCEV
= nullptr;
531 SE
->getMinusSCEV(BasePtrSCEV
->getStart(), BasePtrIncSCEV
);
533 BasePtrStartSCEV
= BasePtrSCEV
->getStart();
535 if (!isSafeToExpand(BasePtrStartSCEV
, *SE
))
538 if (alreadyPrepared(L
, MemI
, BasePtrStartSCEV
, BasePtrIncSCEV
, Form
))
541 LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV
<< "\n");
543 BasicBlock
*Header
= L
->getHeader();
544 unsigned HeaderLoopPredCount
= pred_size(Header
);
545 BasicBlock
*LoopPredecessor
= L
->getLoopPredecessor();
548 PHINode::Create(I8PtrTy
, HeaderLoopPredCount
,
549 getInstrName(MemI
, PHINodeNameSuffix
),
550 Header
->getFirstNonPHI());
552 SCEVExpander
SCEVE(*SE
, Header
->getModule()->getDataLayout(), "pistart");
553 Value
*BasePtrStart
= SCEVE
.expandCodeFor(BasePtrStartSCEV
, I8PtrTy
,
554 LoopPredecessor
->getTerminator());
556 // Note that LoopPredecessor might occur in the predecessor list multiple
557 // times, and we need to add it the right number of times.
558 for (auto PI
: predecessors(Header
)) {
559 if (PI
!= LoopPredecessor
)
562 NewPHI
->addIncoming(BasePtrStart
, LoopPredecessor
);
565 Instruction
*PtrInc
= nullptr;
566 Instruction
*NewBasePtr
= nullptr;
568 Instruction
*InsPoint
= &*Header
->getFirstInsertionPt();
569 PtrInc
= GetElementPtrInst::Create(
570 I8Ty
, NewPHI
, BasePtrIncSCEV
->getValue(),
571 getInstrName(MemI
, GEPNodeIncNameSuffix
), InsPoint
);
572 cast
<GetElementPtrInst
>(PtrInc
)->setIsInBounds(IsPtrInBounds(BasePtr
));
573 for (auto PI
: predecessors(Header
)) {
574 if (PI
== LoopPredecessor
)
577 NewPHI
->addIncoming(PtrInc
, PI
);
579 if (PtrInc
->getType() != BasePtr
->getType())
580 NewBasePtr
= new BitCastInst(
581 PtrInc
, BasePtr
->getType(),
582 getInstrName(PtrInc
, CastNodeNameSuffix
), InsPoint
);
586 // Note that LoopPredecessor might occur in the predecessor list multiple
587 // times, and we need to make sure no more incoming value for them in PHI.
588 for (auto PI
: predecessors(Header
)) {
589 if (PI
== LoopPredecessor
)
592 // For the latch predecessor, we need to insert a GEP just before the
593 // terminator to increase the address.
595 Instruction
*InsPoint
= BB
->getTerminator();
596 PtrInc
= GetElementPtrInst::Create(
597 I8Ty
, NewPHI
, BasePtrIncSCEV
->getValue(),
598 getInstrName(MemI
, GEPNodeIncNameSuffix
), InsPoint
);
600 cast
<GetElementPtrInst
>(PtrInc
)->setIsInBounds(IsPtrInBounds(BasePtr
));
602 NewPHI
->addIncoming(PtrInc
, PI
);
605 if (NewPHI
->getType() != BasePtr
->getType())
607 new BitCastInst(NewPHI
, BasePtr
->getType(),
608 getInstrName(NewPHI
, CastNodeNameSuffix
),
609 &*Header
->getFirstInsertionPt());
614 // Clear the rewriter cache, because values that are in the rewriter's cache
615 // can be deleted below, causing the AssertingVH in the cache to trigger.
618 if (Instruction
*IDel
= dyn_cast
<Instruction
>(BasePtr
))
619 BBChanged
.insert(IDel
->getParent());
620 BasePtr
->replaceAllUsesWith(NewBasePtr
);
621 RecursivelyDeleteTriviallyDeadInstructions(BasePtr
);
623 // Keep track of the replacement pointer values we've inserted so that we
624 // don't generate more pointer values than necessary.
625 SmallPtrSet
<Value
*, 16> NewPtrs
;
626 NewPtrs
.insert(NewBasePtr
);
628 for (auto I
= std::next(BucketChain
.Elements
.begin()),
629 IE
= BucketChain
.Elements
.end(); I
!= IE
; ++I
) {
630 Value
*Ptr
= GetPointerOperand(I
->Instr
);
631 assert(Ptr
&& "No pointer operand");
632 if (NewPtrs
.count(Ptr
))
635 Instruction
*RealNewPtr
;
636 if (!I
->Offset
|| I
->Offset
->getValue()->isZero()) {
637 RealNewPtr
= NewBasePtr
;
639 Instruction
*PtrIP
= dyn_cast
<Instruction
>(Ptr
);
640 if (PtrIP
&& isa
<Instruction
>(NewBasePtr
) &&
641 cast
<Instruction
>(NewBasePtr
)->getParent() == PtrIP
->getParent())
643 else if (PtrIP
&& isa
<PHINode
>(PtrIP
))
644 PtrIP
= &*PtrIP
->getParent()->getFirstInsertionPt();
648 GetElementPtrInst
*NewPtr
= GetElementPtrInst::Create(
649 I8Ty
, PtrInc
, I
->Offset
->getValue(),
650 getInstrName(I
->Instr
, GEPNodeOffNameSuffix
), PtrIP
);
652 NewPtr
->insertAfter(cast
<Instruction
>(PtrInc
));
653 NewPtr
->setIsInBounds(IsPtrInBounds(Ptr
));
657 if (Instruction
*IDel
= dyn_cast
<Instruction
>(Ptr
))
658 BBChanged
.insert(IDel
->getParent());
660 Instruction
*ReplNewPtr
;
661 if (Ptr
->getType() != RealNewPtr
->getType()) {
662 ReplNewPtr
= new BitCastInst(RealNewPtr
, Ptr
->getType(),
663 getInstrName(Ptr
, CastNodeNameSuffix
));
664 ReplNewPtr
->insertAfter(RealNewPtr
);
666 ReplNewPtr
= RealNewPtr
;
668 Ptr
->replaceAllUsesWith(ReplNewPtr
);
669 RecursivelyDeleteTriviallyDeadInstructions(Ptr
);
671 NewPtrs
.insert(RealNewPtr
);
678 if (Form
== DSForm
&& !CanPreInc
)
679 DSFormChainRewritten
++;
680 else if (Form
== DQForm
)
681 DQFormChainRewritten
++;
682 else if (Form
== UpdateForm
|| (Form
== DSForm
&& CanPreInc
))
683 UpdFormChainRewritten
++;
688 bool PPCLoopInstrFormPrep::updateFormPrep(Loop
*L
,
689 SmallVector
<Bucket
, 16> &Buckets
) {
690 bool MadeChange
= false;
693 SmallSet
<BasicBlock
*, 16> BBChanged
;
694 for (auto &Bucket
: Buckets
)
695 // The base address of each bucket is transformed into a phi and the others
696 // are rewritten based on new base.
697 if (prepareBaseForUpdateFormChain(Bucket
))
698 MadeChange
|= rewriteLoadStores(L
, Bucket
, BBChanged
, UpdateForm
);
701 for (auto &BB
: L
->blocks())
702 if (BBChanged
.count(BB
))
707 bool PPCLoopInstrFormPrep::dispFormPrep(Loop
*L
, SmallVector
<Bucket
, 16> &Buckets
,
709 bool MadeChange
= false;
714 SmallSet
<BasicBlock
*, 16> BBChanged
;
715 for (auto &Bucket
: Buckets
) {
716 if (Bucket
.Elements
.size() < DispFormPrepMinThreshold
)
718 if (prepareBaseForDispFormChain(Bucket
, Form
))
719 MadeChange
|= rewriteLoadStores(L
, Bucket
, BBChanged
, Form
);
723 for (auto &BB
: L
->blocks())
724 if (BBChanged
.count(BB
))
729 // In order to prepare for the preferred instruction form, a PHI is added.
730 // This function will check to see if that PHI already exists and will return
731 // true if it found an existing PHI with the matched start and increment as the
732 // one we wanted to create.
733 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop
*L
, Instruction
* MemI
,
734 const SCEV
*BasePtrStartSCEV
,
735 const SCEVConstant
*BasePtrIncSCEV
,
737 BasicBlock
*BB
= MemI
->getParent();
741 BasicBlock
*PredBB
= L
->getLoopPredecessor();
742 BasicBlock
*LatchBB
= L
->getLoopLatch();
744 if (!PredBB
|| !LatchBB
)
747 // Run through the PHIs and see if we have some that looks like a preparation
748 iterator_range
<BasicBlock::phi_iterator
> PHIIter
= BB
->phis();
749 for (auto & CurrentPHI
: PHIIter
) {
750 PHINode
*CurrentPHINode
= dyn_cast
<PHINode
>(&CurrentPHI
);
754 if (!SE
->isSCEVable(CurrentPHINode
->getType()))
757 const SCEV
*PHISCEV
= SE
->getSCEVAtScope(CurrentPHINode
, L
);
759 const SCEVAddRecExpr
*PHIBasePtrSCEV
= dyn_cast
<SCEVAddRecExpr
>(PHISCEV
);
763 const SCEVConstant
*PHIBasePtrIncSCEV
=
764 dyn_cast
<SCEVConstant
>(PHIBasePtrSCEV
->getStepRecurrence(*SE
));
765 if (!PHIBasePtrIncSCEV
)
768 if (CurrentPHINode
->getNumIncomingValues() == 2) {
769 if ((CurrentPHINode
->getIncomingBlock(0) == LatchBB
&&
770 CurrentPHINode
->getIncomingBlock(1) == PredBB
) ||
771 (CurrentPHINode
->getIncomingBlock(1) == LatchBB
&&
772 CurrentPHINode
->getIncomingBlock(0) == PredBB
)) {
773 if (PHIBasePtrIncSCEV
== BasePtrIncSCEV
) {
774 // The existing PHI (CurrentPHINode) has the same start and increment
775 // as the PHI that we wanted to create.
776 if (Form
== UpdateForm
&&
777 PHIBasePtrSCEV
->getStart() == BasePtrStartSCEV
) {
778 ++PHINodeAlreadyExistsUpdate
;
781 if (Form
== DSForm
|| Form
== DQForm
) {
782 const SCEVConstant
*Diff
= dyn_cast
<SCEVConstant
>(
783 SE
->getMinusSCEV(PHIBasePtrSCEV
->getStart(), BasePtrStartSCEV
));
784 if (Diff
&& !Diff
->getAPInt().urem(Form
)) {
786 ++PHINodeAlreadyExistsDS
;
788 ++PHINodeAlreadyExistsDQ
;
799 bool PPCLoopInstrFormPrep::runOnLoop(Loop
*L
) {
800 bool MadeChange
= false;
802 // Only prep. the inner-most loop
803 if (!L
->isInnermost())
806 // Return if already done enough preparation.
807 if (SuccPrepCount
>= MaxVarsPrep
)
810 LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L
<< "\n");
812 BasicBlock
*LoopPredecessor
= L
->getLoopPredecessor();
813 // If there is no loop predecessor, or the loop predecessor's terminator
814 // returns a value (which might contribute to determining the loop's
815 // iteration space), insert a new preheader for the loop.
816 if (!LoopPredecessor
||
817 !LoopPredecessor
->getTerminator()->getType()->isVoidTy()) {
818 LoopPredecessor
= InsertPreheaderForLoop(L
, DT
, LI
, nullptr, PreserveLCSSA
);
822 if (!LoopPredecessor
) {
823 LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");
826 // Check if a load/store has update form. This lambda is used by function
827 // collectCandidates which can collect candidates for types defined by lambda.
828 auto isUpdateFormCandidate
= [&](const Instruction
*I
, const Value
*PtrValue
,
829 const Type
*PointerElementType
) {
830 assert((PtrValue
&& I
) && "Invalid parameter!");
831 // There are no update forms for Altivec vector load/stores.
832 if (ST
&& ST
->hasAltivec() && PointerElementType
->isVectorTy())
834 // There are no update forms for P10 lxvp/stxvp intrinsic.
835 auto *II
= dyn_cast
<IntrinsicInst
>(I
);
836 if (II
&& ((II
->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp
) ||
837 II
->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp
))
839 // See getPreIndexedAddressParts, the displacement for LDU/STDU has to
840 // be 4's multiple (DS-form). For i64 loads/stores when the displacement
841 // fits in a 16-bit signed field but isn't a multiple of 4, it will be
842 // useless and possible to break some original well-form addressing mode
843 // to make this pre-inc prep for it.
844 if (PointerElementType
->isIntegerTy(64)) {
845 const SCEV
*LSCEV
= SE
->getSCEVAtScope(const_cast<Value
*>(PtrValue
), L
);
846 const SCEVAddRecExpr
*LARSCEV
= dyn_cast
<SCEVAddRecExpr
>(LSCEV
);
847 if (!LARSCEV
|| LARSCEV
->getLoop() != L
)
849 if (const SCEVConstant
*StepConst
=
850 dyn_cast
<SCEVConstant
>(LARSCEV
->getStepRecurrence(*SE
))) {
851 const APInt
&ConstInt
= StepConst
->getValue()->getValue();
852 if (ConstInt
.isSignedIntN(16) && ConstInt
.srem(4) != 0)
859 // Check if a load/store has DS form.
860 auto isDSFormCandidate
= [](const Instruction
*I
, const Value
*PtrValue
,
861 const Type
*PointerElementType
) {
862 assert((PtrValue
&& I
) && "Invalid parameter!");
863 if (isa
<IntrinsicInst
>(I
))
865 return (PointerElementType
->isIntegerTy(64)) ||
866 (PointerElementType
->isFloatTy()) ||
867 (PointerElementType
->isDoubleTy()) ||
868 (PointerElementType
->isIntegerTy(32) &&
869 llvm::any_of(I
->users(),
870 [](const User
*U
) { return isa
<SExtInst
>(U
); }));
873 // Check if a load/store has DQ form.
874 auto isDQFormCandidate
= [&](const Instruction
*I
, const Value
*PtrValue
,
875 const Type
*PointerElementType
) {
876 assert((PtrValue
&& I
) && "Invalid parameter!");
877 // Check if it is a P10 lxvp/stxvp intrinsic.
878 auto *II
= dyn_cast
<IntrinsicInst
>(I
);
880 return II
->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp
||
881 II
->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp
;
882 // Check if it is a P9 vector load/store.
883 return ST
&& ST
->hasP9Vector() && (PointerElementType
->isVectorTy());
886 // intrinsic for update form.
887 SmallVector
<Bucket
, 16> UpdateFormBuckets
=
888 collectCandidates(L
, isUpdateFormCandidate
, MaxVarsUpdateForm
);
890 // Prepare for update form.
891 if (!UpdateFormBuckets
.empty())
892 MadeChange
|= updateFormPrep(L
, UpdateFormBuckets
);
894 // Collect buckets of comparable addresses used by loads and stores for DS
896 SmallVector
<Bucket
, 16> DSFormBuckets
=
897 collectCandidates(L
, isDSFormCandidate
, MaxVarsDSForm
);
899 // Prepare for DS form.
900 if (!DSFormBuckets
.empty())
901 MadeChange
|= dispFormPrep(L
, DSFormBuckets
, DSForm
);
903 // Collect buckets of comparable addresses used by loads and stores for DQ
905 SmallVector
<Bucket
, 16> DQFormBuckets
=
906 collectCandidates(L
, isDQFormCandidate
, MaxVarsDQForm
);
908 // Prepare for DQ form.
909 if (!DQFormBuckets
.empty())
910 MadeChange
|= dispFormPrep(L
, DQFormBuckets
, DQForm
);