1 //===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass implements an idiom recognizer that transforms simple loops into a
11 // non-loop form. In cases that this kicks in, it can be a significant
14 //===----------------------------------------------------------------------===//
18 // Future loop memory idioms to recognize:
19 // memcmp, memmove, strlen, etc.
20 // Future floating point idioms to recognize in -ffast-math mode:
22 // Future integer operation idioms to recognize:
25 // Beware that isel's default lowering for ctpop is highly inefficient for
26 // i64 and larger types when i64 is legal and the value has few bits set. It
27 // would be good to enhance isel to emit a loop for ctpop in this case.
29 // We should enhance the memset/memcpy recognition to handle multiple stores in
30 // the loop. This would handle things like:
31 // void foo(_Complex float *P)
32 // for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
34 // We should enhance this to handle negative strides through memory.
35 // Alternatively (and perhaps better) we could rely on an earlier pass to force
36 // forward iteration through memory, which is generally better for cache
37 // behavior. Negative strides *do* happen for memset/memcpy loops.
39 // This could recognize common matrix multiplies and dot product idioms and
40 // replace them with calls to BLAS (if linked in??).
42 //===----------------------------------------------------------------------===//
44 #define DEBUG_TYPE "loop-idiom"
45 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/IntrinsicInst.h"
47 #include "llvm/Module.h"
48 #include "llvm/Analysis/AliasAnalysis.h"
49 #include "llvm/Analysis/LoopPass.h"
50 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
51 #include "llvm/Analysis/ScalarEvolutionExpander.h"
52 #include "llvm/Analysis/ValueTracking.h"
53 #include "llvm/Target/TargetData.h"
54 #include "llvm/Target/TargetLibraryInfo.h"
55 #include "llvm/Transforms/Utils/Local.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/IRBuilder.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/ADT/Statistic.h"
62 STATISTIC(NumMemSet
, "Number of memset's formed from loop stores");
63 STATISTIC(NumMemCpy
, "Number of memcpy's formed from loop load+stores");
66 class LoopIdiomRecognize
: public LoopPass
{
71 TargetLibraryInfo
*TLI
;
74 explicit LoopIdiomRecognize() : LoopPass(ID
) {
75 initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
78 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
);
79 bool runOnLoopBlock(BasicBlock
*BB
, const SCEV
*BECount
,
80 SmallVectorImpl
<BasicBlock
*> &ExitBlocks
);
82 bool processLoopStore(StoreInst
*SI
, const SCEV
*BECount
);
83 bool processLoopMemSet(MemSetInst
*MSI
, const SCEV
*BECount
);
85 bool processLoopStridedStore(Value
*DestPtr
, unsigned StoreSize
,
86 unsigned StoreAlignment
,
87 Value
*SplatValue
, Instruction
*TheStore
,
88 const SCEVAddRecExpr
*Ev
,
90 bool processLoopStoreOfLoopLoad(StoreInst
*SI
, unsigned StoreSize
,
91 const SCEVAddRecExpr
*StoreEv
,
92 const SCEVAddRecExpr
*LoadEv
,
95 /// This transformation requires natural loop information & requires that
96 /// loop preheaders be inserted into the CFG.
98 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
99 AU
.addRequired
<LoopInfo
>();
100 AU
.addPreserved
<LoopInfo
>();
101 AU
.addRequiredID(LoopSimplifyID
);
102 AU
.addPreservedID(LoopSimplifyID
);
103 AU
.addRequiredID(LCSSAID
);
104 AU
.addPreservedID(LCSSAID
);
105 AU
.addRequired
<AliasAnalysis
>();
106 AU
.addPreserved
<AliasAnalysis
>();
107 AU
.addRequired
<ScalarEvolution
>();
108 AU
.addPreserved
<ScalarEvolution
>();
109 AU
.addPreserved
<DominatorTree
>();
110 AU
.addRequired
<DominatorTree
>();
111 AU
.addRequired
<TargetLibraryInfo
>();
116 char LoopIdiomRecognize::ID
= 0;
117 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize
, "loop-idiom", "Recognize loop idioms",
119 INITIALIZE_PASS_DEPENDENCY(LoopInfo
)
120 INITIALIZE_PASS_DEPENDENCY(DominatorTree
)
121 INITIALIZE_PASS_DEPENDENCY(LoopSimplify
)
122 INITIALIZE_PASS_DEPENDENCY(LCSSA
)
123 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution
)
124 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo
)
125 INITIALIZE_AG_DEPENDENCY(AliasAnalysis
)
126 INITIALIZE_PASS_END(LoopIdiomRecognize
, "loop-idiom", "Recognize loop idioms",
129 Pass
*llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
131 /// deleteDeadInstruction - Delete this instruction. Before we do, go through
132 /// and zero out all the operands of this instruction. If any of them become
133 /// dead, delete them and the computation tree that feeds them.
135 static void deleteDeadInstruction(Instruction
*I
, ScalarEvolution
&SE
) {
136 SmallVector
<Instruction
*, 32> NowDeadInsts
;
138 NowDeadInsts
.push_back(I
);
140 // Before we touch this instruction, remove it from SE!
142 Instruction
*DeadInst
= NowDeadInsts
.pop_back_val();
144 // This instruction is dead, zap it, in stages. Start by removing it from
146 SE
.forgetValue(DeadInst
);
148 for (unsigned op
= 0, e
= DeadInst
->getNumOperands(); op
!= e
; ++op
) {
149 Value
*Op
= DeadInst
->getOperand(op
);
150 DeadInst
->setOperand(op
, 0);
152 // If this operand just became dead, add it to the NowDeadInsts list.
153 if (!Op
->use_empty()) continue;
155 if (Instruction
*OpI
= dyn_cast
<Instruction
>(Op
))
156 if (isInstructionTriviallyDead(OpI
))
157 NowDeadInsts
.push_back(OpI
);
160 DeadInst
->eraseFromParent();
162 } while (!NowDeadInsts
.empty());
165 /// deleteIfDeadInstruction - If the specified value is a dead instruction,
166 /// delete it and any recursively used instructions.
167 static void deleteIfDeadInstruction(Value
*V
, ScalarEvolution
&SE
) {
168 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
169 if (isInstructionTriviallyDead(I
))
170 deleteDeadInstruction(I
, SE
);
173 bool LoopIdiomRecognize::runOnLoop(Loop
*L
, LPPassManager
&LPM
) {
176 // The trip count of the loop must be analyzable.
177 SE
= &getAnalysis
<ScalarEvolution
>();
178 if (!SE
->hasLoopInvariantBackedgeTakenCount(L
))
180 const SCEV
*BECount
= SE
->getBackedgeTakenCount(L
);
181 if (isa
<SCEVCouldNotCompute
>(BECount
)) return false;
183 // If this loop executes exactly one time, then it should be peeled, not
184 // optimized by this pass.
185 if (const SCEVConstant
*BECst
= dyn_cast
<SCEVConstant
>(BECount
))
186 if (BECst
->getValue()->getValue() == 0)
189 // We require target data for now.
190 TD
= getAnalysisIfAvailable
<TargetData
>();
191 if (TD
== 0) return false;
193 DT
= &getAnalysis
<DominatorTree
>();
194 LoopInfo
&LI
= getAnalysis
<LoopInfo
>();
195 TLI
= &getAnalysis
<TargetLibraryInfo
>();
197 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
198 CurLoop
->getUniqueExitBlocks(ExitBlocks
);
200 DEBUG(dbgs() << "loop-idiom Scanning: F["
201 << L
->getHeader()->getParent()->getName()
202 << "] Loop %" << L
->getHeader()->getName() << "\n");
204 bool MadeChange
= false;
205 // Scan all the blocks in the loop that are not in subloops.
206 for (Loop::block_iterator BI
= L
->block_begin(), E
= L
->block_end(); BI
!= E
;
208 // Ignore blocks in subloops.
209 if (LI
.getLoopFor(*BI
) != CurLoop
)
212 MadeChange
|= runOnLoopBlock(*BI
, BECount
, ExitBlocks
);
217 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
218 /// with the specified backedge count. This block is known to be in the current
219 /// loop and not in any subloops.
220 bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock
*BB
, const SCEV
*BECount
,
221 SmallVectorImpl
<BasicBlock
*> &ExitBlocks
) {
222 // We can only promote stores in this block if they are unconditionally
223 // executed in the loop. For a block to be unconditionally executed, it has
224 // to dominate all the exit blocks of the loop. Verify this now.
225 for (unsigned i
= 0, e
= ExitBlocks
.size(); i
!= e
; ++i
)
226 if (!DT
->dominates(BB
, ExitBlocks
[i
]))
229 bool MadeChange
= false;
230 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ) {
231 Instruction
*Inst
= I
++;
232 // Look for store instructions, which may be optimized to memset/memcpy.
233 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
235 if (!processLoopStore(SI
, BECount
)) continue;
238 // If processing the store invalidated our iterator, start over from the
245 // Look for memset instructions, which may be optimized to a larger memset.
246 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(Inst
)) {
248 if (!processLoopMemSet(MSI
, BECount
)) continue;
251 // If processing the memset invalidated our iterator, start over from the
263 /// processLoopStore - See if this store can be promoted to a memset or memcpy.
264 bool LoopIdiomRecognize::processLoopStore(StoreInst
*SI
, const SCEV
*BECount
) {
265 if (SI
->isVolatile()) return false;
267 Value
*StoredVal
= SI
->getValueOperand();
268 Value
*StorePtr
= SI
->getPointerOperand();
270 // Reject stores that are so large that they overflow an unsigned.
271 uint64_t SizeInBits
= TD
->getTypeSizeInBits(StoredVal
->getType());
272 if ((SizeInBits
& 7) || (SizeInBits
>> 32) != 0)
275 // See if the pointer expression is an AddRec like {base,+,1} on the current
276 // loop, which indicates a strided store. If we have something else, it's a
277 // random store we can't handle.
278 const SCEVAddRecExpr
*StoreEv
=
279 dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(StorePtr
));
280 if (StoreEv
== 0 || StoreEv
->getLoop() != CurLoop
|| !StoreEv
->isAffine())
283 // Check to see if the stride matches the size of the store. If so, then we
284 // know that every byte is touched in the loop.
285 unsigned StoreSize
= (unsigned)SizeInBits
>> 3;
286 const SCEVConstant
*Stride
= dyn_cast
<SCEVConstant
>(StoreEv
->getOperand(1));
288 if (Stride
== 0 || StoreSize
!= Stride
->getValue()->getValue()) {
289 // TODO: Could also handle negative stride here someday, that will require
290 // the validity check in mayLoopAccessLocation to be updated though.
291 // Enable this to print exact negative strides.
292 if (0 && Stride
&& StoreSize
== -Stride
->getValue()->getValue()) {
293 dbgs() << "NEGATIVE STRIDE: " << *SI
<< "\n";
294 dbgs() << "BB: " << *SI
->getParent();
300 // See if we can optimize just this store in isolation.
301 if (processLoopStridedStore(StorePtr
, StoreSize
, SI
->getAlignment(),
302 StoredVal
, SI
, StoreEv
, BECount
))
305 // If the stored value is a strided load in the same loop with the same stride
306 // this this may be transformable into a memcpy. This kicks in for stuff like
307 // for (i) A[i] = B[i];
308 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(StoredVal
)) {
309 const SCEVAddRecExpr
*LoadEv
=
310 dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(LI
->getOperand(0)));
311 if (LoadEv
&& LoadEv
->getLoop() == CurLoop
&& LoadEv
->isAffine() &&
312 StoreEv
->getOperand(1) == LoadEv
->getOperand(1) && !LI
->isVolatile())
313 if (processLoopStoreOfLoopLoad(SI
, StoreSize
, StoreEv
, LoadEv
, BECount
))
316 //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
321 /// processLoopMemSet - See if this memset can be promoted to a large memset.
322 bool LoopIdiomRecognize::
323 processLoopMemSet(MemSetInst
*MSI
, const SCEV
*BECount
) {
324 // We can only handle non-volatile memsets with a constant size.
325 if (MSI
->isVolatile() || !isa
<ConstantInt
>(MSI
->getLength())) return false;
327 // If we're not allowed to hack on memset, we fail.
328 if (!TLI
->has(LibFunc::memset
))
331 Value
*Pointer
= MSI
->getDest();
333 // See if the pointer expression is an AddRec like {base,+,1} on the current
334 // loop, which indicates a strided store. If we have something else, it's a
335 // random store we can't handle.
336 const SCEVAddRecExpr
*Ev
= dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(Pointer
));
337 if (Ev
== 0 || Ev
->getLoop() != CurLoop
|| !Ev
->isAffine())
340 // Reject memsets that are so large that they overflow an unsigned.
341 uint64_t SizeInBytes
= cast
<ConstantInt
>(MSI
->getLength())->getZExtValue();
342 if ((SizeInBytes
>> 32) != 0)
345 // Check to see if the stride matches the size of the memset. If so, then we
346 // know that every byte is touched in the loop.
347 const SCEVConstant
*Stride
= dyn_cast
<SCEVConstant
>(Ev
->getOperand(1));
349 // TODO: Could also handle negative stride here someday, that will require the
350 // validity check in mayLoopAccessLocation to be updated though.
351 if (Stride
== 0 || MSI
->getLength() != Stride
->getValue())
354 return processLoopStridedStore(Pointer
, (unsigned)SizeInBytes
,
355 MSI
->getAlignment(), MSI
->getValue(),
360 /// mayLoopAccessLocation - Return true if the specified loop might access the
361 /// specified pointer location, which is a loop-strided access. The 'Access'
362 /// argument specifies what the verboten forms of access are (read or write).
363 static bool mayLoopAccessLocation(Value
*Ptr
,AliasAnalysis::ModRefResult Access
,
364 Loop
*L
, const SCEV
*BECount
,
365 unsigned StoreSize
, AliasAnalysis
&AA
,
366 Instruction
*IgnoredStore
) {
367 // Get the location that may be stored across the loop. Since the access is
368 // strided positively through memory, we say that the modified location starts
369 // at the pointer and has infinite size.
370 uint64_t AccessSize
= AliasAnalysis::UnknownSize
;
372 // If the loop iterates a fixed number of times, we can refine the access size
373 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
374 if (const SCEVConstant
*BECst
= dyn_cast
<SCEVConstant
>(BECount
))
375 AccessSize
= (BECst
->getValue()->getZExtValue()+1)*StoreSize
;
377 // TODO: For this to be really effective, we have to dive into the pointer
378 // operand in the store. Store to &A[i] of 100 will always return may alias
379 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
380 // which will then no-alias a store to &A[100].
381 AliasAnalysis::Location
StoreLoc(Ptr
, AccessSize
);
383 for (Loop::block_iterator BI
= L
->block_begin(), E
= L
->block_end(); BI
!= E
;
385 for (BasicBlock::iterator I
= (*BI
)->begin(), E
= (*BI
)->end(); I
!= E
; ++I
)
386 if (&*I
!= IgnoredStore
&&
387 (AA
.getModRefInfo(I
, StoreLoc
) & Access
))
393 /// getMemSetPatternValue - If a strided store of the specified value is safe to
394 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
395 /// be passed in. Otherwise, return null.
397 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
398 /// just replicate their input array and then pass on to memset_pattern16.
399 static Constant
*getMemSetPatternValue(Value
*V
, const TargetData
&TD
) {
400 // If the value isn't a constant, we can't promote it to being in a constant
401 // array. We could theoretically do a store to an alloca or something, but
402 // that doesn't seem worthwhile.
403 Constant
*C
= dyn_cast
<Constant
>(V
);
404 if (C
== 0) return 0;
406 // Only handle simple values that are a power of two bytes in size.
407 uint64_t Size
= TD
.getTypeSizeInBits(V
->getType());
408 if (Size
== 0 || (Size
& 7) || (Size
& (Size
-1)))
411 // Don't care enough about darwin/ppc to implement this.
412 if (TD
.isBigEndian())
415 // Convert to size in bytes.
418 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
419 // if the top and bottom are the same (e.g. for vectors and large integers).
420 if (Size
> 16) return 0;
422 // If the constant is exactly 16 bytes, just use it.
423 if (Size
== 16) return C
;
425 // Otherwise, we'll use an array of the constants.
426 unsigned ArraySize
= 16/Size
;
427 ArrayType
*AT
= ArrayType::get(V
->getType(), ArraySize
);
428 return ConstantArray::get(AT
, std::vector
<Constant
*>(ArraySize
, C
));
432 /// processLoopStridedStore - We see a strided store of some value. If we can
433 /// transform this into a memset or memset_pattern in the loop preheader, do so.
434 bool LoopIdiomRecognize::
435 processLoopStridedStore(Value
*DestPtr
, unsigned StoreSize
,
436 unsigned StoreAlignment
, Value
*StoredVal
,
437 Instruction
*TheStore
, const SCEVAddRecExpr
*Ev
,
438 const SCEV
*BECount
) {
440 // If the stored value is a byte-wise value (like i32 -1), then it may be
441 // turned into a memset of i8 -1, assuming that all the consecutive bytes
442 // are stored. A store of i32 0x01020304 can never be turned into a memset,
443 // but it can be turned into memset_pattern if the target supports it.
444 Value
*SplatValue
= isBytewiseValue(StoredVal
);
445 Constant
*PatternValue
= 0;
447 // If we're allowed to form a memset, and the stored value would be acceptable
448 // for memset, use it.
449 if (SplatValue
&& TLI
->has(LibFunc::memset
) &&
450 // Verify that the stored value is loop invariant. If not, we can't
451 // promote the memset.
452 CurLoop
->isLoopInvariant(SplatValue
)) {
453 // Keep and use SplatValue.
455 } else if (TLI
->has(LibFunc::memset_pattern16
) &&
456 (PatternValue
= getMemSetPatternValue(StoredVal
, *TD
))) {
457 // It looks like we can use PatternValue!
460 // Otherwise, this isn't an idiom we can transform. For example, we can't
461 // do anything with a 3-byte store, for example.
465 // The trip count of the loop and the base pointer of the addrec SCEV is
466 // guaranteed to be loop invariant, which means that it should dominate the
467 // header. This allows us to insert code for it in the preheader.
468 BasicBlock
*Preheader
= CurLoop
->getLoopPreheader();
469 IRBuilder
<> Builder(Preheader
->getTerminator());
470 SCEVExpander
Expander(*SE
, "loop-idiom");
472 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
473 // this into a memset in the loop preheader now if we want. However, this
474 // would be unsafe to do if there is anything else in the loop that may read
475 // or write to the aliased location. Check for any overlap by generating the
476 // base pointer and checking the region.
477 unsigned AddrSpace
= cast
<PointerType
>(DestPtr
->getType())->getAddressSpace();
479 Expander
.expandCodeFor(Ev
->getStart(), Builder
.getInt8PtrTy(AddrSpace
),
480 Preheader
->getTerminator());
483 if (mayLoopAccessLocation(BasePtr
, AliasAnalysis::ModRef
,
485 StoreSize
, getAnalysis
<AliasAnalysis
>(), TheStore
)){
487 // If we generated new code for the base pointer, clean up.
488 deleteIfDeadInstruction(BasePtr
, *SE
);
492 // Okay, everything looks good, insert the memset.
494 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
495 // pointer size if it isn't already.
496 const Type
*IntPtr
= TD
->getIntPtrType(DestPtr
->getContext());
497 BECount
= SE
->getTruncateOrZeroExtend(BECount
, IntPtr
);
499 const SCEV
*NumBytesS
= SE
->getAddExpr(BECount
, SE
->getConstant(IntPtr
, 1),
502 NumBytesS
= SE
->getMulExpr(NumBytesS
, SE
->getConstant(IntPtr
, StoreSize
),
506 Expander
.expandCodeFor(NumBytesS
, IntPtr
, Preheader
->getTerminator());
510 NewCall
= Builder
.CreateMemSet(BasePtr
, SplatValue
,NumBytes
,StoreAlignment
);
512 Module
*M
= TheStore
->getParent()->getParent()->getParent();
513 Value
*MSP
= M
->getOrInsertFunction("memset_pattern16",
515 Builder
.getInt8PtrTy(),
516 Builder
.getInt8PtrTy(), IntPtr
,
519 // Otherwise we should form a memset_pattern16. PatternValue is known to be
520 // an constant array of 16-bytes. Plop the value into a mergable global.
521 GlobalVariable
*GV
= new GlobalVariable(*M
, PatternValue
->getType(), true,
522 GlobalValue::InternalLinkage
,
523 PatternValue
, ".memset_pattern");
524 GV
->setUnnamedAddr(true); // Ok to merge these.
525 GV
->setAlignment(16);
526 Value
*PatternPtr
= ConstantExpr::getBitCast(GV
, Builder
.getInt8PtrTy());
527 NewCall
= Builder
.CreateCall3(MSP
, BasePtr
, PatternPtr
, NumBytes
);
530 DEBUG(dbgs() << " Formed memset: " << *NewCall
<< "\n"
531 << " from store to: " << *Ev
<< " at: " << *TheStore
<< "\n");
532 NewCall
->setDebugLoc(TheStore
->getDebugLoc());
534 // Okay, the memset has been formed. Zap the original store and anything that
536 deleteDeadInstruction(TheStore
, *SE
);
541 /// processLoopStoreOfLoopLoad - We see a strided store whose value is a
542 /// same-strided load.
543 bool LoopIdiomRecognize::
544 processLoopStoreOfLoopLoad(StoreInst
*SI
, unsigned StoreSize
,
545 const SCEVAddRecExpr
*StoreEv
,
546 const SCEVAddRecExpr
*LoadEv
,
547 const SCEV
*BECount
) {
548 // If we're not allowed to form memcpy, we fail.
549 if (!TLI
->has(LibFunc::memcpy
))
552 LoadInst
*LI
= cast
<LoadInst
>(SI
->getValueOperand());
554 // The trip count of the loop and the base pointer of the addrec SCEV is
555 // guaranteed to be loop invariant, which means that it should dominate the
556 // header. This allows us to insert code for it in the preheader.
557 BasicBlock
*Preheader
= CurLoop
->getLoopPreheader();
558 IRBuilder
<> Builder(Preheader
->getTerminator());
559 SCEVExpander
Expander(*SE
, "loop-idiom");
561 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
562 // this into a memcpy in the loop preheader now if we want. However, this
563 // would be unsafe to do if there is anything else in the loop that may read
564 // or write the memory region we're storing to. This includes the load that
565 // feeds the stores. Check for an alias by generating the base address and
566 // checking everything.
567 Value
*StoreBasePtr
=
568 Expander
.expandCodeFor(StoreEv
->getStart(),
569 Builder
.getInt8PtrTy(SI
->getPointerAddressSpace()),
570 Preheader
->getTerminator());
572 if (mayLoopAccessLocation(StoreBasePtr
, AliasAnalysis::ModRef
,
573 CurLoop
, BECount
, StoreSize
,
574 getAnalysis
<AliasAnalysis
>(), SI
)) {
576 // If we generated new code for the base pointer, clean up.
577 deleteIfDeadInstruction(StoreBasePtr
, *SE
);
581 // For a memcpy, we have to make sure that the input array is not being
582 // mutated by the loop.
584 Expander
.expandCodeFor(LoadEv
->getStart(),
585 Builder
.getInt8PtrTy(LI
->getPointerAddressSpace()),
586 Preheader
->getTerminator());
588 if (mayLoopAccessLocation(LoadBasePtr
, AliasAnalysis::Mod
, CurLoop
, BECount
,
589 StoreSize
, getAnalysis
<AliasAnalysis
>(), SI
)) {
591 // If we generated new code for the base pointer, clean up.
592 deleteIfDeadInstruction(LoadBasePtr
, *SE
);
593 deleteIfDeadInstruction(StoreBasePtr
, *SE
);
597 // Okay, everything is safe, we can transform this!
600 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
601 // pointer size if it isn't already.
602 const Type
*IntPtr
= TD
->getIntPtrType(SI
->getContext());
603 BECount
= SE
->getTruncateOrZeroExtend(BECount
, IntPtr
);
605 const SCEV
*NumBytesS
= SE
->getAddExpr(BECount
, SE
->getConstant(IntPtr
, 1),
608 NumBytesS
= SE
->getMulExpr(NumBytesS
, SE
->getConstant(IntPtr
, StoreSize
),
612 Expander
.expandCodeFor(NumBytesS
, IntPtr
, Preheader
->getTerminator());
615 Builder
.CreateMemCpy(StoreBasePtr
, LoadBasePtr
, NumBytes
,
616 std::min(SI
->getAlignment(), LI
->getAlignment()));
617 NewCall
->setDebugLoc(SI
->getDebugLoc());
619 DEBUG(dbgs() << " Formed memcpy: " << *NewCall
<< "\n"
620 << " from load ptr=" << *LoadEv
<< " at: " << *LI
<< "\n"
621 << " from store ptr=" << *StoreEv
<< " at: " << *SI
<< "\n");
624 // Okay, the memset has been formed. Zap the original store and anything that
626 deleteDeadInstruction(SI
, *SE
);