1 //===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass implements an idiom recognizer that transforms simple loops into a
11 // non-loop form. In cases that this kicks in, it can be a significant
14 //===----------------------------------------------------------------------===//
18 // Future loop memory idioms to recognize:
19 // memcmp, memmove, strlen, etc.
20 // Future floating point idioms to recognize in -ffast-math mode:
22 // Future integer operation idioms to recognize:
25 // Beware that isel's default lowering for ctpop is highly inefficient for
26 // i64 and larger types when i64 is legal and the value has few bits set. It
27 // would be good to enhance isel to emit a loop for ctpop in this case.
29 // We should enhance the memset/memcpy recognition to handle multiple stores in
30 // the loop. This would handle things like:
31 // void foo(_Complex float *P)
32 // for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
34 // We should enhance this to handle negative strides through memory.
35 // Alternatively (and perhaps better) we could rely on an earlier pass to force
36 // forward iteration through memory, which is generally better for cache
37 // behavior. Negative strides *do* happen for memset/memcpy loops.
39 // This could recognize common matrix multiplies and dot product idioms and
40 // replace them with calls to BLAS (if linked in??).
42 //===----------------------------------------------------------------------===//
44 #define DEBUG_TYPE "loop-idiom"
45 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/IntrinsicInst.h"
47 #include "llvm/Module.h"
48 #include "llvm/Analysis/AliasAnalysis.h"
49 #include "llvm/Analysis/LoopPass.h"
50 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
51 #include "llvm/Analysis/ScalarEvolutionExpander.h"
52 #include "llvm/Analysis/ValueTracking.h"
53 #include "llvm/Target/TargetData.h"
54 #include "llvm/Target/TargetLibraryInfo.h"
55 #include "llvm/Transforms/Utils/Local.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/IRBuilder.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/ADT/Statistic.h"
62 STATISTIC(NumMemSet
, "Number of memset's formed from loop stores");
63 STATISTIC(NumMemCpy
, "Number of memcpy's formed from loop load+stores");
66 class LoopIdiomRecognize
: public LoopPass
{
71 TargetLibraryInfo
*TLI
;
74 explicit LoopIdiomRecognize() : LoopPass(ID
) {
75 initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
78 bool runOnLoop(Loop
*L
, LPPassManager
&LPM
);
79 bool runOnLoopBlock(BasicBlock
*BB
, const SCEV
*BECount
,
80 SmallVectorImpl
<BasicBlock
*> &ExitBlocks
);
82 bool processLoopStore(StoreInst
*SI
, const SCEV
*BECount
);
83 bool processLoopMemSet(MemSetInst
*MSI
, const SCEV
*BECount
);
85 bool processLoopStridedStore(Value
*DestPtr
, unsigned StoreSize
,
86 unsigned StoreAlignment
,
87 Value
*SplatValue
, Instruction
*TheStore
,
88 const SCEVAddRecExpr
*Ev
,
90 bool processLoopStoreOfLoopLoad(StoreInst
*SI
, unsigned StoreSize
,
91 const SCEVAddRecExpr
*StoreEv
,
92 const SCEVAddRecExpr
*LoadEv
,
95 /// This transformation requires natural loop information & requires that
96 /// loop preheaders be inserted into the CFG.
98 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
99 AU
.addRequired
<LoopInfo
>();
100 AU
.addPreserved
<LoopInfo
>();
101 AU
.addRequiredID(LoopSimplifyID
);
102 AU
.addPreservedID(LoopSimplifyID
);
103 AU
.addRequiredID(LCSSAID
);
104 AU
.addPreservedID(LCSSAID
);
105 AU
.addRequired
<AliasAnalysis
>();
106 AU
.addPreserved
<AliasAnalysis
>();
107 AU
.addRequired
<ScalarEvolution
>();
108 AU
.addPreserved
<ScalarEvolution
>();
109 AU
.addPreserved
<DominatorTree
>();
110 AU
.addRequired
<DominatorTree
>();
111 AU
.addRequired
<TargetLibraryInfo
>();
116 char LoopIdiomRecognize::ID
= 0;
117 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize
, "loop-idiom", "Recognize loop idioms",
119 INITIALIZE_PASS_DEPENDENCY(LoopInfo
)
120 INITIALIZE_PASS_DEPENDENCY(DominatorTree
)
121 INITIALIZE_PASS_DEPENDENCY(LoopSimplify
)
122 INITIALIZE_PASS_DEPENDENCY(LCSSA
)
123 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution
)
124 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo
)
125 INITIALIZE_AG_DEPENDENCY(AliasAnalysis
)
126 INITIALIZE_PASS_END(LoopIdiomRecognize
, "loop-idiom", "Recognize loop idioms",
129 Pass
*llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
131 /// DeleteDeadInstruction - Delete this instruction. Before we do, go through
132 /// and zero out all the operands of this instruction. If any of them become
133 /// dead, delete them and the computation tree that feeds them.
135 static void DeleteDeadInstruction(Instruction
*I
, ScalarEvolution
&SE
) {
136 SmallVector
<Instruction
*, 32> NowDeadInsts
;
138 NowDeadInsts
.push_back(I
);
140 // Before we touch this instruction, remove it from SE!
142 Instruction
*DeadInst
= NowDeadInsts
.pop_back_val();
144 // This instruction is dead, zap it, in stages. Start by removing it from
146 SE
.forgetValue(DeadInst
);
148 for (unsigned op
= 0, e
= DeadInst
->getNumOperands(); op
!= e
; ++op
) {
149 Value
*Op
= DeadInst
->getOperand(op
);
150 DeadInst
->setOperand(op
, 0);
152 // If this operand just became dead, add it to the NowDeadInsts list.
153 if (!Op
->use_empty()) continue;
155 if (Instruction
*OpI
= dyn_cast
<Instruction
>(Op
))
156 if (isInstructionTriviallyDead(OpI
))
157 NowDeadInsts
.push_back(OpI
);
160 DeadInst
->eraseFromParent();
162 } while (!NowDeadInsts
.empty());
165 bool LoopIdiomRecognize::runOnLoop(Loop
*L
, LPPassManager
&LPM
) {
168 // The trip count of the loop must be analyzable.
169 SE
= &getAnalysis
<ScalarEvolution
>();
170 if (!SE
->hasLoopInvariantBackedgeTakenCount(L
))
172 const SCEV
*BECount
= SE
->getBackedgeTakenCount(L
);
173 if (isa
<SCEVCouldNotCompute
>(BECount
)) return false;
175 // If this loop executes exactly one time, then it should be peeled, not
176 // optimized by this pass.
177 if (const SCEVConstant
*BECst
= dyn_cast
<SCEVConstant
>(BECount
))
178 if (BECst
->getValue()->getValue() == 0)
181 // We require target data for now.
182 TD
= getAnalysisIfAvailable
<TargetData
>();
183 if (TD
== 0) return false;
185 DT
= &getAnalysis
<DominatorTree
>();
186 LoopInfo
&LI
= getAnalysis
<LoopInfo
>();
187 TLI
= &getAnalysis
<TargetLibraryInfo
>();
189 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
190 CurLoop
->getUniqueExitBlocks(ExitBlocks
);
192 DEBUG(dbgs() << "loop-idiom Scanning: F["
193 << L
->getHeader()->getParent()->getName()
194 << "] Loop %" << L
->getHeader()->getName() << "\n");
196 bool MadeChange
= false;
197 // Scan all the blocks in the loop that are not in subloops.
198 for (Loop::block_iterator BI
= L
->block_begin(), E
= L
->block_end(); BI
!= E
;
200 // Ignore blocks in subloops.
201 if (LI
.getLoopFor(*BI
) != CurLoop
)
204 MadeChange
|= runOnLoopBlock(*BI
, BECount
, ExitBlocks
);
209 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
210 /// with the specified backedge count. This block is known to be in the current
211 /// loop and not in any subloops.
212 bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock
*BB
, const SCEV
*BECount
,
213 SmallVectorImpl
<BasicBlock
*> &ExitBlocks
) {
214 // We can only promote stores in this block if they are unconditionally
215 // executed in the loop. For a block to be unconditionally executed, it has
216 // to dominate all the exit blocks of the loop. Verify this now.
217 for (unsigned i
= 0, e
= ExitBlocks
.size(); i
!= e
; ++i
)
218 if (!DT
->dominates(BB
, ExitBlocks
[i
]))
221 bool MadeChange
= false;
222 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ) {
223 Instruction
*Inst
= I
++;
224 // Look for store instructions, which may be optimized to memset/memcpy.
225 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
227 if (!processLoopStore(SI
, BECount
)) continue;
230 // If processing the store invalidated our iterator, start over from the
237 // Look for memset instructions, which may be optimized to a larger memset.
238 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(Inst
)) {
240 if (!processLoopMemSet(MSI
, BECount
)) continue;
243 // If processing the memset invalidated our iterator, start over from the
255 /// processLoopStore - See if this store can be promoted to a memset or memcpy.
256 bool LoopIdiomRecognize::processLoopStore(StoreInst
*SI
, const SCEV
*BECount
) {
257 if (SI
->isVolatile()) return false;
259 Value
*StoredVal
= SI
->getValueOperand();
260 Value
*StorePtr
= SI
->getPointerOperand();
262 // Reject stores that are so large that they overflow an unsigned.
263 uint64_t SizeInBits
= TD
->getTypeSizeInBits(StoredVal
->getType());
264 if ((SizeInBits
& 7) || (SizeInBits
>> 32) != 0)
267 // See if the pointer expression is an AddRec like {base,+,1} on the current
268 // loop, which indicates a strided store. If we have something else, it's a
269 // random store we can't handle.
270 const SCEVAddRecExpr
*StoreEv
=
271 dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(StorePtr
));
272 if (StoreEv
== 0 || StoreEv
->getLoop() != CurLoop
|| !StoreEv
->isAffine())
275 // Check to see if the stride matches the size of the store. If so, then we
276 // know that every byte is touched in the loop.
277 unsigned StoreSize
= (unsigned)SizeInBits
>> 3;
278 const SCEVConstant
*Stride
= dyn_cast
<SCEVConstant
>(StoreEv
->getOperand(1));
280 if (Stride
== 0 || StoreSize
!= Stride
->getValue()->getValue()) {
281 // TODO: Could also handle negative stride here someday, that will require
282 // the validity check in mayLoopAccessLocation to be updated though.
283 // Enable this to print exact negative strides.
284 if (0 && Stride
&& StoreSize
== -Stride
->getValue()->getValue()) {
285 dbgs() << "NEGATIVE STRIDE: " << *SI
<< "\n";
286 dbgs() << "BB: " << *SI
->getParent();
292 // See if we can optimize just this store in isolation.
293 if (processLoopStridedStore(StorePtr
, StoreSize
, SI
->getAlignment(),
294 StoredVal
, SI
, StoreEv
, BECount
))
297 // If the stored value is a strided load in the same loop with the same stride
298 // this this may be transformable into a memcpy. This kicks in for stuff like
299 // for (i) A[i] = B[i];
300 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(StoredVal
)) {
301 const SCEVAddRecExpr
*LoadEv
=
302 dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(LI
->getOperand(0)));
303 if (LoadEv
&& LoadEv
->getLoop() == CurLoop
&& LoadEv
->isAffine() &&
304 StoreEv
->getOperand(1) == LoadEv
->getOperand(1) && !LI
->isVolatile())
305 if (processLoopStoreOfLoopLoad(SI
, StoreSize
, StoreEv
, LoadEv
, BECount
))
308 //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
313 /// processLoopMemSet - See if this memset can be promoted to a large memset.
314 bool LoopIdiomRecognize::
315 processLoopMemSet(MemSetInst
*MSI
, const SCEV
*BECount
) {
316 // We can only handle non-volatile memsets with a constant size.
317 if (MSI
->isVolatile() || !isa
<ConstantInt
>(MSI
->getLength())) return false;
319 // If we're not allowed to hack on memset, we fail.
320 if (!TLI
->has(LibFunc::memset
))
323 Value
*Pointer
= MSI
->getDest();
325 // See if the pointer expression is an AddRec like {base,+,1} on the current
326 // loop, which indicates a strided store. If we have something else, it's a
327 // random store we can't handle.
328 const SCEVAddRecExpr
*Ev
= dyn_cast
<SCEVAddRecExpr
>(SE
->getSCEV(Pointer
));
329 if (Ev
== 0 || Ev
->getLoop() != CurLoop
|| !Ev
->isAffine())
332 // Reject memsets that are so large that they overflow an unsigned.
333 uint64_t SizeInBytes
= cast
<ConstantInt
>(MSI
->getLength())->getZExtValue();
334 if ((SizeInBytes
>> 32) != 0)
337 // Check to see if the stride matches the size of the memset. If so, then we
338 // know that every byte is touched in the loop.
339 const SCEVConstant
*Stride
= dyn_cast
<SCEVConstant
>(Ev
->getOperand(1));
341 // TODO: Could also handle negative stride here someday, that will require the
342 // validity check in mayLoopAccessLocation to be updated though.
343 if (Stride
== 0 || MSI
->getLength() != Stride
->getValue())
346 return processLoopStridedStore(Pointer
, (unsigned)SizeInBytes
,
347 MSI
->getAlignment(), MSI
->getValue(),
352 /// mayLoopAccessLocation - Return true if the specified loop might access the
353 /// specified pointer location, which is a loop-strided access. The 'Access'
354 /// argument specifies what the verboten forms of access are (read or write).
355 static bool mayLoopAccessLocation(Value
*Ptr
,AliasAnalysis::ModRefResult Access
,
356 Loop
*L
, const SCEV
*BECount
,
357 unsigned StoreSize
, AliasAnalysis
&AA
,
358 Instruction
*IgnoredStore
) {
359 // Get the location that may be stored across the loop. Since the access is
360 // strided positively through memory, we say that the modified location starts
361 // at the pointer and has infinite size.
362 uint64_t AccessSize
= AliasAnalysis::UnknownSize
;
364 // If the loop iterates a fixed number of times, we can refine the access size
365 // to be exactly the size of the memset, which is (BECount+1)*StoreSize
366 if (const SCEVConstant
*BECst
= dyn_cast
<SCEVConstant
>(BECount
))
367 AccessSize
= (BECst
->getValue()->getZExtValue()+1)*StoreSize
;
369 // TODO: For this to be really effective, we have to dive into the pointer
370 // operand in the store. Store to &A[i] of 100 will always return may alias
371 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
372 // which will then no-alias a store to &A[100].
373 AliasAnalysis::Location
StoreLoc(Ptr
, AccessSize
);
375 for (Loop::block_iterator BI
= L
->block_begin(), E
= L
->block_end(); BI
!= E
;
377 for (BasicBlock::iterator I
= (*BI
)->begin(), E
= (*BI
)->end(); I
!= E
; ++I
)
378 if (&*I
!= IgnoredStore
&&
379 (AA
.getModRefInfo(I
, StoreLoc
) & Access
))
385 /// getMemSetPatternValue - If a strided store of the specified value is safe to
386 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
387 /// be passed in. Otherwise, return null.
389 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
390 /// just replicate their input array and then pass on to memset_pattern16.
391 static Constant
*getMemSetPatternValue(Value
*V
, const TargetData
&TD
) {
392 // If the value isn't a constant, we can't promote it to being in a constant
393 // array. We could theoretically do a store to an alloca or something, but
394 // that doesn't seem worthwhile.
395 Constant
*C
= dyn_cast
<Constant
>(V
);
396 if (C
== 0) return 0;
398 // Only handle simple values that are a power of two bytes in size.
399 uint64_t Size
= TD
.getTypeSizeInBits(V
->getType());
400 if (Size
== 0 || (Size
& 7) || (Size
& (Size
-1)))
403 // Don't care enough about darwin/ppc to implement this.
404 if (TD
.isBigEndian())
407 // Convert to size in bytes.
410 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
411 // if the top and bottom are the same (e.g. for vectors and large integers).
412 if (Size
> 16) return 0;
414 // If the constant is exactly 16 bytes, just use it.
415 if (Size
== 16) return C
;
417 // Otherwise, we'll use an array of the constants.
418 unsigned ArraySize
= 16/Size
;
419 ArrayType
*AT
= ArrayType::get(V
->getType(), ArraySize
);
420 return ConstantArray::get(AT
, std::vector
<Constant
*>(ArraySize
, C
));
424 /// processLoopStridedStore - We see a strided store of some value. If we can
425 /// transform this into a memset or memset_pattern in the loop preheader, do so.
426 bool LoopIdiomRecognize::
427 processLoopStridedStore(Value
*DestPtr
, unsigned StoreSize
,
428 unsigned StoreAlignment
, Value
*StoredVal
,
429 Instruction
*TheStore
, const SCEVAddRecExpr
*Ev
,
430 const SCEV
*BECount
) {
432 // If the stored value is a byte-wise value (like i32 -1), then it may be
433 // turned into a memset of i8 -1, assuming that all the consecutive bytes
434 // are stored. A store of i32 0x01020304 can never be turned into a memset,
435 // but it can be turned into memset_pattern if the target supports it.
436 Value
*SplatValue
= isBytewiseValue(StoredVal
);
437 Constant
*PatternValue
= 0;
439 // If we're allowed to form a memset, and the stored value would be acceptable
440 // for memset, use it.
441 if (SplatValue
&& TLI
->has(LibFunc::memset
) &&
442 // Verify that the stored value is loop invariant. If not, we can't
443 // promote the memset.
444 CurLoop
->isLoopInvariant(SplatValue
)) {
445 // Keep and use SplatValue.
447 } else if (TLI
->has(LibFunc::memset_pattern16
) &&
448 (PatternValue
= getMemSetPatternValue(StoredVal
, *TD
))) {
449 // It looks like we can use PatternValue!
452 // Otherwise, this isn't an idiom we can transform. For example, we can't
453 // do anything with a 3-byte store, for example.
458 // Okay, we have a strided store "p[i]" of a splattable value. We can turn
459 // this into a memset in the loop preheader now if we want. However, this
460 // would be unsafe to do if there is anything else in the loop that may read
461 // or write to the aliased location. Check for an alias.
462 if (mayLoopAccessLocation(DestPtr
, AliasAnalysis::ModRef
,
464 StoreSize
, getAnalysis
<AliasAnalysis
>(), TheStore
))
467 // Okay, everything looks good, insert the memset.
468 BasicBlock
*Preheader
= CurLoop
->getLoopPreheader();
470 IRBuilder
<> Builder(Preheader
->getTerminator());
472 // The trip count of the loop and the base pointer of the addrec SCEV is
473 // guaranteed to be loop invariant, which means that it should dominate the
474 // header. Just insert code for it in the preheader.
475 SCEVExpander
Expander(*SE
);
477 unsigned AddrSpace
= cast
<PointerType
>(DestPtr
->getType())->getAddressSpace();
479 Expander
.expandCodeFor(Ev
->getStart(), Builder
.getInt8PtrTy(AddrSpace
),
480 Preheader
->getTerminator());
482 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
483 // pointer size if it isn't already.
484 const Type
*IntPtr
= TD
->getIntPtrType(DestPtr
->getContext());
485 BECount
= SE
->getTruncateOrZeroExtend(BECount
, IntPtr
);
487 const SCEV
*NumBytesS
= SE
->getAddExpr(BECount
, SE
->getConstant(IntPtr
, 1),
490 NumBytesS
= SE
->getMulExpr(NumBytesS
, SE
->getConstant(IntPtr
, StoreSize
),
494 Expander
.expandCodeFor(NumBytesS
, IntPtr
, Preheader
->getTerminator());
498 NewCall
= Builder
.CreateMemSet(BasePtr
, SplatValue
,NumBytes
,StoreAlignment
);
500 Module
*M
= TheStore
->getParent()->getParent()->getParent();
501 Value
*MSP
= M
->getOrInsertFunction("memset_pattern16",
503 Builder
.getInt8PtrTy(),
504 Builder
.getInt8PtrTy(), IntPtr
,
507 // Otherwise we should form a memset_pattern16. PatternValue is known to be
508 // an constant array of 16-bytes. Plop the value into a mergable global.
509 GlobalVariable
*GV
= new GlobalVariable(*M
, PatternValue
->getType(), true,
510 GlobalValue::InternalLinkage
,
511 PatternValue
, ".memset_pattern");
512 GV
->setUnnamedAddr(true); // Ok to merge these.
513 GV
->setAlignment(16);
514 Value
*PatternPtr
= ConstantExpr::getBitCast(GV
, Builder
.getInt8PtrTy());
515 NewCall
= Builder
.CreateCall3(MSP
, BasePtr
, PatternPtr
, NumBytes
);
518 DEBUG(dbgs() << " Formed memset: " << *NewCall
<< "\n"
519 << " from store to: " << *Ev
<< " at: " << *TheStore
<< "\n");
520 NewCall
->setDebugLoc(TheStore
->getDebugLoc());
522 // Okay, the memset has been formed. Zap the original store and anything that
524 DeleteDeadInstruction(TheStore
, *SE
);
529 /// processLoopStoreOfLoopLoad - We see a strided store whose value is a
530 /// same-strided load.
531 bool LoopIdiomRecognize::
532 processLoopStoreOfLoopLoad(StoreInst
*SI
, unsigned StoreSize
,
533 const SCEVAddRecExpr
*StoreEv
,
534 const SCEVAddRecExpr
*LoadEv
,
535 const SCEV
*BECount
) {
536 // If we're not allowed to form memcpy, we fail.
537 if (!TLI
->has(LibFunc::memcpy
))
540 LoadInst
*LI
= cast
<LoadInst
>(SI
->getValueOperand());
542 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
543 // this into a memcpy in the loop preheader now if we want. However, this
544 // would be unsafe to do if there is anything else in the loop that may read
545 // or write to the stored location (including the load feeding the stores).
546 // Check for an alias.
547 if (mayLoopAccessLocation(SI
->getPointerOperand(), AliasAnalysis::ModRef
,
548 CurLoop
, BECount
, StoreSize
,
549 getAnalysis
<AliasAnalysis
>(), SI
))
552 // For a memcpy, we have to make sure that the input array is not being
553 // mutated by the loop.
554 if (mayLoopAccessLocation(LI
->getPointerOperand(), AliasAnalysis::Mod
,
555 CurLoop
, BECount
, StoreSize
,
556 getAnalysis
<AliasAnalysis
>(), SI
))
559 // Okay, everything looks good, insert the memcpy.
560 BasicBlock
*Preheader
= CurLoop
->getLoopPreheader();
562 IRBuilder
<> Builder(Preheader
->getTerminator());
564 // The trip count of the loop and the base pointer of the addrec SCEV is
565 // guaranteed to be loop invariant, which means that it should dominate the
566 // header. Just insert code for it in the preheader.
567 SCEVExpander
Expander(*SE
);
570 Expander
.expandCodeFor(LoadEv
->getStart(),
571 Builder
.getInt8PtrTy(LI
->getPointerAddressSpace()),
572 Preheader
->getTerminator());
573 Value
*StoreBasePtr
=
574 Expander
.expandCodeFor(StoreEv
->getStart(),
575 Builder
.getInt8PtrTy(SI
->getPointerAddressSpace()),
576 Preheader
->getTerminator());
578 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
579 // pointer size if it isn't already.
580 const Type
*IntPtr
= TD
->getIntPtrType(SI
->getContext());
581 BECount
= SE
->getTruncateOrZeroExtend(BECount
, IntPtr
);
583 const SCEV
*NumBytesS
= SE
->getAddExpr(BECount
, SE
->getConstant(IntPtr
, 1),
586 NumBytesS
= SE
->getMulExpr(NumBytesS
, SE
->getConstant(IntPtr
, StoreSize
),
590 Expander
.expandCodeFor(NumBytesS
, IntPtr
, Preheader
->getTerminator());
593 Builder
.CreateMemCpy(StoreBasePtr
, LoadBasePtr
, NumBytes
,
594 std::min(SI
->getAlignment(), LI
->getAlignment()));
596 DEBUG(dbgs() << " Formed memcpy: " << *NewCall
<< "\n"
597 << " from load ptr=" << *LoadEv
<< " at: " << *LI
<< "\n"
598 << " from store ptr=" << *StoreEv
<< " at: " << *SI
<< "\n");
601 // Okay, the memset has been formed. Zap the original store and anything that
603 DeleteDeadInstruction(SI
, *SE
);