1 //===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This transformation implements the well known scalar replacement of
11 // aggregates transformation. This xform breaks up alloca instructions of
12 // aggregate type (structure or array) into individual alloca instructions for
13 // each member (if possible). Then, if possible, it transforms the individual
14 // alloca instructions into nice clean scalar SSA form.
16 // This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17 // often interact, especially for C++ programs. As such, iterating between
18 // SRoA, then Mem2Reg until we run out of things to promote works well.
20 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "scalarrepl"
23 #include "llvm/Transforms/Scalar.h"
24 #include "llvm/Constants.h"
25 #include "llvm/DerivedTypes.h"
26 #include "llvm/Function.h"
27 #include "llvm/GlobalVariable.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/IntrinsicInst.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Analysis/Dominators.h"
32 #include "llvm/Target/TargetData.h"
33 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/GetElementPtrTypeIterator.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/ADT/StringExtras.h"
43 STATISTIC(NumReplaced
, "Number of allocas broken up");
44 STATISTIC(NumPromoted
, "Number of allocas promoted");
45 STATISTIC(NumConverted
, "Number of aggregates converted to scalar");
46 STATISTIC(NumGlobals
, "Number of allocas copied from constant global");
49 struct VISIBILITY_HIDDEN SROA
: public FunctionPass
{
50 static char ID
; // Pass identification, replacement for typeid
51 explicit SROA(signed T
= -1) : FunctionPass((intptr_t)&ID
) {
58 bool runOnFunction(Function
&F
);
60 bool performScalarRepl(Function
&F
);
61 bool performPromotion(Function
&F
);
63 // getAnalysisUsage - This pass does not require any passes, but we know it
64 // will not alter the CFG, so say so.
65 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
66 AU
.addRequired
<DominatorTree
>();
67 AU
.addRequired
<DominanceFrontier
>();
68 AU
.addRequired
<TargetData
>();
73 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
74 /// information about the uses. All these fields are initialized to false
75 /// and set to true when something is learned.
77 /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
80 /// needsCanon - This is set to true if there is some use of the alloca
81 /// that requires canonicalization.
84 /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
87 /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
91 : isUnsafe(false), needsCanon(false),
92 isMemCpySrc(false), isMemCpyDst(false) {}
97 void MarkUnsafe(AllocaInfo
&I
) { I
.isUnsafe
= true; }
99 int isSafeAllocaToScalarRepl(AllocationInst
*AI
);
101 void isSafeUseOfAllocation(Instruction
*User
, AllocationInst
*AI
,
103 void isSafeElementUse(Value
*Ptr
, bool isFirstElt
, AllocationInst
*AI
,
105 void isSafeMemIntrinsicOnAllocation(MemIntrinsic
*MI
, AllocationInst
*AI
,
106 unsigned OpNo
, AllocaInfo
&Info
);
107 void isSafeUseOfBitCastedAllocation(BitCastInst
*User
, AllocationInst
*AI
,
110 void DoScalarReplacement(AllocationInst
*AI
,
111 std::vector
<AllocationInst
*> &WorkList
);
112 void CanonicalizeAllocaUsers(AllocationInst
*AI
);
113 AllocaInst
*AddNewAlloca(Function
&F
, const Type
*Ty
, AllocationInst
*Base
);
115 void RewriteBitCastUserOfAlloca(Instruction
*BCInst
, AllocationInst
*AI
,
116 SmallVector
<AllocaInst
*, 32> &NewElts
);
118 const Type
*CanConvertToScalar(Value
*V
, bool &IsNotTrivial
);
119 void ConvertToScalar(AllocationInst
*AI
, const Type
*Ty
);
120 void ConvertUsesToScalar(Value
*Ptr
, AllocaInst
*NewAI
, unsigned Offset
);
121 static Instruction
*isOnlyCopiedFromConstantGlobal(AllocationInst
*AI
);
125 RegisterPass
<SROA
> X("scalarrepl", "Scalar Replacement of Aggregates");
128 // Public interface to the ScalarReplAggregates pass
129 FunctionPass
*llvm::createScalarReplAggregatesPass(signed int Threshold
) {
130 return new SROA(Threshold
);
134 bool SROA::runOnFunction(Function
&F
) {
135 bool Changed
= performPromotion(F
);
137 bool LocalChange
= performScalarRepl(F
);
138 if (!LocalChange
) break; // No need to repromote if no scalarrepl
140 LocalChange
= performPromotion(F
);
141 if (!LocalChange
) break; // No need to re-scalarrepl if no promotion
148 bool SROA::performPromotion(Function
&F
) {
149 std::vector
<AllocaInst
*> Allocas
;
150 DominatorTree
&DT
= getAnalysis
<DominatorTree
>();
151 DominanceFrontier
&DF
= getAnalysis
<DominanceFrontier
>();
153 BasicBlock
&BB
= F
.getEntryBlock(); // Get the entry node for the function
155 bool Changed
= false;
160 // Find allocas that are safe to promote, by looking at all instructions in
162 for (BasicBlock::iterator I
= BB
.begin(), E
= --BB
.end(); I
!= E
; ++I
)
163 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(I
)) // Is it an alloca?
164 if (isAllocaPromotable(AI
))
165 Allocas
.push_back(AI
);
167 if (Allocas
.empty()) break;
169 PromoteMemToReg(Allocas
, DT
, DF
);
170 NumPromoted
+= Allocas
.size();
177 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
178 // which runs on all of the malloc/alloca instructions in the function, removing
179 // them if they are only used by getelementptr instructions.
181 bool SROA::performScalarRepl(Function
&F
) {
182 std::vector
<AllocationInst
*> WorkList
;
184 // Scan the entry basic block, adding any alloca's and mallocs to the worklist
185 BasicBlock
&BB
= F
.getEntryBlock();
186 for (BasicBlock::iterator I
= BB
.begin(), E
= BB
.end(); I
!= E
; ++I
)
187 if (AllocationInst
*A
= dyn_cast
<AllocationInst
>(I
))
188 WorkList
.push_back(A
);
190 const TargetData
&TD
= getAnalysis
<TargetData
>();
192 // Process the worklist
193 bool Changed
= false;
194 while (!WorkList
.empty()) {
195 AllocationInst
*AI
= WorkList
.back();
198 // Handle dead allocas trivially. These can be formed by SROA'ing arrays
199 // with unused elements.
200 if (AI
->use_empty()) {
201 AI
->eraseFromParent();
205 // If we can turn this aggregate value (potentially with casts) into a
206 // simple scalar value that can be mem2reg'd into a register value.
207 bool IsNotTrivial
= false;
208 if (const Type
*ActualType
= CanConvertToScalar(AI
, IsNotTrivial
))
209 if (IsNotTrivial
&& ActualType
!= Type::VoidTy
) {
210 ConvertToScalar(AI
, ActualType
);
215 // Check to see if we can perform the core SROA transformation. We cannot
216 // transform the allocation instruction if it is an array allocation
217 // (allocations OF arrays are ok though), and an allocation of a scalar
218 // value cannot be decomposed at all.
219 if (!AI
->isArrayAllocation() &&
220 (isa
<StructType
>(AI
->getAllocatedType()) ||
221 isa
<ArrayType
>(AI
->getAllocatedType())) &&
222 AI
->getAllocatedType()->isSized() &&
223 TD
.getABITypeSize(AI
->getAllocatedType()) < SRThreshold
) {
224 // Check that all of the users of the allocation are capable of being
226 switch (isSafeAllocaToScalarRepl(AI
)) {
227 default: assert(0 && "Unexpected value!");
228 case 0: // Not safe to scalar replace.
230 case 1: // Safe, but requires cleanup/canonicalizations first
231 CanonicalizeAllocaUsers(AI
);
233 case 3: // Safe to scalar replace.
234 DoScalarReplacement(AI
, WorkList
);
240 // Check to see if this allocation is only modified by a memcpy/memmove from
241 // a constant global. If this is the case, we can change all users to use
242 // the constant global instead. This is commonly produced by the CFE by
243 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
244 // is only subsequently read.
245 if (Instruction
*TheCopy
= isOnlyCopiedFromConstantGlobal(AI
)) {
246 DOUT
<< "Found alloca equal to global: " << *AI
;
247 DOUT
<< " memcpy = " << *TheCopy
;
248 Constant
*TheSrc
= cast
<Constant
>(TheCopy
->getOperand(2));
249 AI
->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc
, AI
->getType()));
250 TheCopy
->eraseFromParent(); // Don't mutate the global.
251 AI
->eraseFromParent();
257 // Otherwise, couldn't process this.
263 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
264 /// predicate, do SROA now.
265 void SROA::DoScalarReplacement(AllocationInst
*AI
,
266 std::vector
<AllocationInst
*> &WorkList
) {
267 DOUT
<< "Found inst to SROA: " << *AI
;
268 SmallVector
<AllocaInst
*, 32> ElementAllocas
;
269 if (const StructType
*ST
= dyn_cast
<StructType
>(AI
->getAllocatedType())) {
270 ElementAllocas
.reserve(ST
->getNumContainedTypes());
271 for (unsigned i
= 0, e
= ST
->getNumContainedTypes(); i
!= e
; ++i
) {
272 AllocaInst
*NA
= new AllocaInst(ST
->getContainedType(i
), 0,
274 AI
->getName() + "." + utostr(i
), AI
);
275 ElementAllocas
.push_back(NA
);
276 WorkList
.push_back(NA
); // Add to worklist for recursive processing
279 const ArrayType
*AT
= cast
<ArrayType
>(AI
->getAllocatedType());
280 ElementAllocas
.reserve(AT
->getNumElements());
281 const Type
*ElTy
= AT
->getElementType();
282 for (unsigned i
= 0, e
= AT
->getNumElements(); i
!= e
; ++i
) {
283 AllocaInst
*NA
= new AllocaInst(ElTy
, 0, AI
->getAlignment(),
284 AI
->getName() + "." + utostr(i
), AI
);
285 ElementAllocas
.push_back(NA
);
286 WorkList
.push_back(NA
); // Add to worklist for recursive processing
290 // Now that we have created the alloca instructions that we want to use,
291 // expand the getelementptr instructions to use them.
293 while (!AI
->use_empty()) {
294 Instruction
*User
= cast
<Instruction
>(AI
->use_back());
295 if (BitCastInst
*BCInst
= dyn_cast
<BitCastInst
>(User
)) {
296 RewriteBitCastUserOfAlloca(BCInst
, AI
, ElementAllocas
);
297 BCInst
->eraseFromParent();
301 GetElementPtrInst
*GEPI
= cast
<GetElementPtrInst
>(User
);
302 // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
304 (unsigned)cast
<ConstantInt
>(GEPI
->getOperand(2))->getZExtValue();
306 assert(Idx
< ElementAllocas
.size() && "Index out of range?");
307 AllocaInst
*AllocaToUse
= ElementAllocas
[Idx
];
310 if (GEPI
->getNumOperands() == 3) {
311 // Do not insert a new getelementptr instruction with zero indices, only
312 // to have it optimized out later.
313 RepValue
= AllocaToUse
;
315 // We are indexing deeply into the structure, so we still need a
316 // getelement ptr instruction to finish the indexing. This may be
317 // expanded itself once the worklist is rerun.
319 SmallVector
<Value
*, 8> NewArgs
;
320 NewArgs
.push_back(Constant::getNullValue(Type::Int32Ty
));
321 NewArgs
.append(GEPI
->op_begin()+3, GEPI
->op_end());
322 RepValue
= new GetElementPtrInst(AllocaToUse
, NewArgs
.begin(),
323 NewArgs
.end(), "", GEPI
);
324 RepValue
->takeName(GEPI
);
327 // If this GEP is to the start of the aggregate, check for memcpys.
329 bool IsStartOfAggregateGEP
= true;
330 for (unsigned i
= 3, e
= GEPI
->getNumOperands(); i
!= e
; ++i
) {
331 if (!isa
<ConstantInt
>(GEPI
->getOperand(i
))) {
332 IsStartOfAggregateGEP
= false;
335 if (!cast
<ConstantInt
>(GEPI
->getOperand(i
))->isZero()) {
336 IsStartOfAggregateGEP
= false;
341 if (IsStartOfAggregateGEP
)
342 RewriteBitCastUserOfAlloca(GEPI
, AI
, ElementAllocas
);
346 // Move all of the users over to the new GEP.
347 GEPI
->replaceAllUsesWith(RepValue
);
348 // Delete the old GEP
349 GEPI
->eraseFromParent();
352 // Finally, delete the Alloca instruction
353 AI
->eraseFromParent();
358 /// isSafeElementUse - Check to see if this use is an allowed use for a
359 /// getelementptr instruction of an array aggregate allocation. isFirstElt
360 /// indicates whether Ptr is known to the start of the aggregate.
362 void SROA::isSafeElementUse(Value
*Ptr
, bool isFirstElt
, AllocationInst
*AI
,
364 for (Value::use_iterator I
= Ptr
->use_begin(), E
= Ptr
->use_end();
366 Instruction
*User
= cast
<Instruction
>(*I
);
367 switch (User
->getOpcode()) {
368 case Instruction::Load
: break;
369 case Instruction::Store
:
370 // Store is ok if storing INTO the pointer, not storing the pointer
371 if (User
->getOperand(0) == Ptr
) return MarkUnsafe(Info
);
373 case Instruction::GetElementPtr
: {
374 GetElementPtrInst
*GEP
= cast
<GetElementPtrInst
>(User
);
375 bool AreAllZeroIndices
= isFirstElt
;
376 if (GEP
->getNumOperands() > 1) {
377 if (!isa
<ConstantInt
>(GEP
->getOperand(1)) ||
378 !cast
<ConstantInt
>(GEP
->getOperand(1))->isZero())
379 // Using pointer arithmetic to navigate the array.
380 return MarkUnsafe(Info
);
382 if (AreAllZeroIndices
) {
383 for (unsigned i
= 2, e
= GEP
->getNumOperands(); i
!= e
; ++i
) {
384 if (!isa
<ConstantInt
>(GEP
->getOperand(i
)) ||
385 !cast
<ConstantInt
>(GEP
->getOperand(i
))->isZero()) {
386 AreAllZeroIndices
= false;
392 isSafeElementUse(GEP
, AreAllZeroIndices
, AI
, Info
);
393 if (Info
.isUnsafe
) return;
396 case Instruction::BitCast
:
398 isSafeUseOfBitCastedAllocation(cast
<BitCastInst
>(User
), AI
, Info
);
399 if (Info
.isUnsafe
) return;
402 DOUT
<< " Transformation preventing inst: " << *User
;
403 return MarkUnsafe(Info
);
404 case Instruction::Call
:
405 if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(User
)) {
407 isSafeMemIntrinsicOnAllocation(MI
, AI
, I
.getOperandNo(), Info
);
408 if (Info
.isUnsafe
) return;
412 DOUT
<< " Transformation preventing inst: " << *User
;
413 return MarkUnsafe(Info
);
415 DOUT
<< " Transformation preventing inst: " << *User
;
416 return MarkUnsafe(Info
);
419 return; // All users look ok :)
422 /// AllUsersAreLoads - Return true if all users of this value are loads.
423 static bool AllUsersAreLoads(Value
*Ptr
) {
424 for (Value::use_iterator I
= Ptr
->use_begin(), E
= Ptr
->use_end();
426 if (cast
<Instruction
>(*I
)->getOpcode() != Instruction::Load
)
431 /// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
432 /// aggregate allocation.
434 void SROA::isSafeUseOfAllocation(Instruction
*User
, AllocationInst
*AI
,
436 if (BitCastInst
*C
= dyn_cast
<BitCastInst
>(User
))
437 return isSafeUseOfBitCastedAllocation(C
, AI
, Info
);
439 GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(User
);
441 return MarkUnsafe(Info
);
443 gep_type_iterator I
= gep_type_begin(GEPI
), E
= gep_type_end(GEPI
);
445 // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
447 I
.getOperand() != Constant::getNullValue(I
.getOperand()->getType())) {
448 return MarkUnsafe(Info
);
452 if (I
== E
) return MarkUnsafe(Info
); // ran out of GEP indices??
454 bool IsAllZeroIndices
= true;
456 // If this is a use of an array allocation, do a bit more checking for sanity.
457 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(*I
)) {
458 uint64_t NumElements
= AT
->getNumElements();
460 if (ConstantInt
*Idx
= dyn_cast
<ConstantInt
>(I
.getOperand())) {
461 IsAllZeroIndices
&= Idx
->isZero();
463 // Check to make sure that index falls within the array. If not,
464 // something funny is going on, so we won't do the optimization.
466 if (Idx
->getZExtValue() >= NumElements
)
467 return MarkUnsafe(Info
);
469 // We cannot scalar repl this level of the array unless any array
470 // sub-indices are in-range constants. In particular, consider:
471 // A[0][i]. We cannot know that the user isn't doing invalid things like
472 // allowing i to index an out-of-range subscript that accesses A[1].
474 // Scalar replacing *just* the outer index of the array is probably not
475 // going to be a win anyway, so just give up.
476 for (++I
; I
!= E
&& (isa
<ArrayType
>(*I
) || isa
<VectorType
>(*I
)); ++I
) {
477 uint64_t NumElements
;
478 if (const ArrayType
*SubArrayTy
= dyn_cast
<ArrayType
>(*I
))
479 NumElements
= SubArrayTy
->getNumElements();
481 NumElements
= cast
<VectorType
>(*I
)->getNumElements();
483 ConstantInt
*IdxVal
= dyn_cast
<ConstantInt
>(I
.getOperand());
484 if (!IdxVal
) return MarkUnsafe(Info
);
485 if (IdxVal
->getZExtValue() >= NumElements
)
486 return MarkUnsafe(Info
);
487 IsAllZeroIndices
&= IdxVal
->isZero();
491 IsAllZeroIndices
= 0;
493 // If this is an array index and the index is not constant, we cannot
494 // promote... that is unless the array has exactly one or two elements in
495 // it, in which case we CAN promote it, but we have to canonicalize this
496 // out if this is the only problem.
497 if ((NumElements
== 1 || NumElements
== 2) &&
498 AllUsersAreLoads(GEPI
)) {
499 Info
.needsCanon
= true;
500 return; // Canonicalization required!
502 return MarkUnsafe(Info
);
506 // If there are any non-simple uses of this getelementptr, make sure to reject
508 return isSafeElementUse(GEPI
, IsAllZeroIndices
, AI
, Info
);
511 /// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
512 /// intrinsic can be promoted by SROA. At this point, we know that the operand
513 /// of the memintrinsic is a pointer to the beginning of the allocation.
514 void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic
*MI
, AllocationInst
*AI
,
515 unsigned OpNo
, AllocaInfo
&Info
) {
516 // If not constant length, give up.
517 ConstantInt
*Length
= dyn_cast
<ConstantInt
>(MI
->getLength());
518 if (!Length
) return MarkUnsafe(Info
);
520 // If not the whole aggregate, give up.
521 const TargetData
&TD
= getAnalysis
<TargetData
>();
522 if (Length
->getZExtValue() !=
523 TD
.getABITypeSize(AI
->getType()->getElementType()))
524 return MarkUnsafe(Info
);
526 // We only know about memcpy/memset/memmove.
527 if (!isa
<MemCpyInst
>(MI
) && !isa
<MemSetInst
>(MI
) && !isa
<MemMoveInst
>(MI
))
528 return MarkUnsafe(Info
);
530 // Otherwise, we can transform it. Determine whether this is a memcpy/set
531 // into or out of the aggregate.
533 Info
.isMemCpyDst
= true;
536 Info
.isMemCpySrc
= true;
540 /// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
542 void SROA::isSafeUseOfBitCastedAllocation(BitCastInst
*BC
, AllocationInst
*AI
,
544 for (Value::use_iterator UI
= BC
->use_begin(), E
= BC
->use_end();
546 if (BitCastInst
*BCU
= dyn_cast
<BitCastInst
>(UI
)) {
547 isSafeUseOfBitCastedAllocation(BCU
, AI
, Info
);
548 } else if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(UI
)) {
549 isSafeMemIntrinsicOnAllocation(MI
, AI
, UI
.getOperandNo(), Info
);
551 return MarkUnsafe(Info
);
553 if (Info
.isUnsafe
) return;
557 /// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
558 /// to its first element. Transform users of the cast to use the new values
560 void SROA::RewriteBitCastUserOfAlloca(Instruction
*BCInst
, AllocationInst
*AI
,
561 SmallVector
<AllocaInst
*, 32> &NewElts
) {
562 Constant
*Zero
= Constant::getNullValue(Type::Int32Ty
);
563 const TargetData
&TD
= getAnalysis
<TargetData
>();
565 Value::use_iterator UI
= BCInst
->use_begin(), UE
= BCInst
->use_end();
567 if (BitCastInst
*BCU
= dyn_cast
<BitCastInst
>(*UI
)) {
568 RewriteBitCastUserOfAlloca(BCU
, AI
, NewElts
);
570 BCU
->eraseFromParent();
574 // Otherwise, must be memcpy/memmove/memset of the entire aggregate. Split
575 // into one per element.
576 MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(*UI
);
578 // If it's not a mem intrinsic, it must be some other user of a gep of the
579 // first pointer. Just leave these alone.
585 // If this is a memcpy/memmove, construct the other pointer as the
588 if (MemCpyInst
*MCI
= dyn_cast
<MemCpyInst
>(MI
)) {
589 if (BCInst
== MCI
->getRawDest())
590 OtherPtr
= MCI
->getRawSource();
592 assert(BCInst
== MCI
->getRawSource());
593 OtherPtr
= MCI
->getRawDest();
595 } else if (MemMoveInst
*MMI
= dyn_cast
<MemMoveInst
>(MI
)) {
596 if (BCInst
== MMI
->getRawDest())
597 OtherPtr
= MMI
->getRawSource();
599 assert(BCInst
== MMI
->getRawSource());
600 OtherPtr
= MMI
->getRawDest();
604 // If there is an other pointer, we want to convert it to the same pointer
605 // type as AI has, so we can GEP through it.
607 // It is likely that OtherPtr is a bitcast, if so, remove it.
608 if (BitCastInst
*BC
= dyn_cast
<BitCastInst
>(OtherPtr
))
609 OtherPtr
= BC
->getOperand(0);
610 if (ConstantExpr
*BCE
= dyn_cast
<ConstantExpr
>(OtherPtr
))
611 if (BCE
->getOpcode() == Instruction::BitCast
)
612 OtherPtr
= BCE
->getOperand(0);
614 // If the pointer is not the right type, insert a bitcast to the right
616 if (OtherPtr
->getType() != AI
->getType())
617 OtherPtr
= new BitCastInst(OtherPtr
, AI
->getType(), OtherPtr
->getName(),
621 // Process each element of the aggregate.
622 Value
*TheFn
= MI
->getOperand(0);
623 const Type
*BytePtrTy
= MI
->getRawDest()->getType();
624 bool SROADest
= MI
->getRawDest() == BCInst
;
626 for (unsigned i
= 0, e
= NewElts
.size(); i
!= e
; ++i
) {
627 // If this is a memcpy/memmove, emit a GEP of the other element address.
632 Idx
[1] = ConstantInt::get(Type::Int32Ty
, i
);
633 OtherElt
= new GetElementPtrInst(OtherPtr
, Idx
, Idx
+ 2,
634 OtherPtr
->getNameStr()+"."+utostr(i
),
638 Value
*EltPtr
= NewElts
[i
];
639 const Type
*EltTy
=cast
<PointerType
>(EltPtr
->getType())->getElementType();
641 // If we got down to a scalar, insert a load or store as appropriate.
642 if (EltTy
->isFirstClassType()) {
643 if (isa
<MemCpyInst
>(MI
) || isa
<MemMoveInst
>(MI
)) {
644 Value
*Elt
= new LoadInst(SROADest
? OtherElt
: EltPtr
, "tmp",
646 new StoreInst(Elt
, SROADest
? EltPtr
: OtherElt
, MI
);
649 assert(isa
<MemSetInst
>(MI
));
651 // If the stored element is zero (common case), just store a null
654 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(MI
->getOperand(2))) {
656 StoreVal
= Constant::getNullValue(EltTy
); // 0.0, null, 0, <0,0>
658 // If EltTy is a vector type, get the element type.
659 const Type
*ValTy
= EltTy
;
660 if (const VectorType
*VTy
= dyn_cast
<VectorType
>(ValTy
))
661 ValTy
= VTy
->getElementType();
663 // Construct an integer with the right value.
664 unsigned EltSize
= TD
.getTypeSizeInBits(ValTy
);
665 APInt
OneVal(EltSize
, CI
->getZExtValue());
666 APInt
TotalVal(OneVal
);
668 for (unsigned i
= 0; 8*i
< EltSize
; ++i
) {
669 TotalVal
= TotalVal
.shl(8);
673 // Convert the integer value to the appropriate type.
674 StoreVal
= ConstantInt::get(TotalVal
);
675 if (isa
<PointerType
>(ValTy
))
676 StoreVal
= ConstantExpr::getIntToPtr(StoreVal
, ValTy
);
677 else if (ValTy
->isFloatingPoint())
678 StoreVal
= ConstantExpr::getBitCast(StoreVal
, ValTy
);
679 assert(StoreVal
->getType() == ValTy
&& "Type mismatch!");
681 // If the requested value was a vector constant, create it.
682 if (EltTy
!= ValTy
) {
683 unsigned NumElts
= cast
<VectorType
>(ValTy
)->getNumElements();
684 SmallVector
<Constant
*, 16> Elts(NumElts
, StoreVal
);
685 StoreVal
= ConstantVector::get(&Elts
[0], NumElts
);
688 new StoreInst(StoreVal
, EltPtr
, MI
);
691 // Otherwise, if we're storing a byte variable, use a memset call for
696 // Cast the element pointer to BytePtrTy.
697 if (EltPtr
->getType() != BytePtrTy
)
698 EltPtr
= new BitCastInst(EltPtr
, BytePtrTy
, EltPtr
->getNameStr(), MI
);
700 // Cast the other pointer (if we have one) to BytePtrTy.
701 if (OtherElt
&& OtherElt
->getType() != BytePtrTy
)
702 OtherElt
= new BitCastInst(OtherElt
, BytePtrTy
,OtherElt
->getNameStr(),
705 unsigned EltSize
= TD
.getABITypeSize(EltTy
);
707 // Finally, insert the meminst for this element.
708 if (isa
<MemCpyInst
>(MI
) || isa
<MemMoveInst
>(MI
)) {
710 SROADest
? EltPtr
: OtherElt
, // Dest ptr
711 SROADest
? OtherElt
: EltPtr
, // Src ptr
712 ConstantInt::get(MI
->getOperand(3)->getType(), EltSize
), // Size
715 new CallInst(TheFn
, Ops
, Ops
+ 4, "", MI
);
717 assert(isa
<MemSetInst
>(MI
));
719 EltPtr
, MI
->getOperand(2), // Dest, Value,
720 ConstantInt::get(MI
->getOperand(3)->getType(), EltSize
), // Size
723 new CallInst(TheFn
, Ops
, Ops
+ 4, "", MI
);
727 // Finally, MI is now dead, as we've modified its actions to occur on all of
728 // the elements of the aggregate.
730 MI
->eraseFromParent();
734 /// HasPadding - Return true if the specified type has any structure or
735 /// alignment padding, false otherwise.
736 static bool HasPadding(const Type
*Ty
, const TargetData
&TD
,
737 bool inPacked
= false) {
738 if (const StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
739 const StructLayout
*SL
= TD
.getStructLayout(STy
);
740 unsigned PrevFieldBitOffset
= 0;
741 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
742 unsigned FieldBitOffset
= SL
->getElementOffsetInBits(i
);
744 // Padding in sub-elements?
745 if (HasPadding(STy
->getElementType(i
), TD
, STy
->isPacked()))
748 // Check to see if there is any padding between this element and the
751 unsigned PrevFieldEnd
=
752 PrevFieldBitOffset
+TD
.getTypeSizeInBits(STy
->getElementType(i
-1));
753 if (PrevFieldEnd
< FieldBitOffset
)
757 PrevFieldBitOffset
= FieldBitOffset
;
760 // Check for tail padding.
761 if (unsigned EltCount
= STy
->getNumElements()) {
762 unsigned PrevFieldEnd
= PrevFieldBitOffset
+
763 TD
.getTypeSizeInBits(STy
->getElementType(EltCount
-1));
764 if (PrevFieldEnd
< SL
->getSizeInBits())
768 } else if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
769 return HasPadding(ATy
->getElementType(), TD
, false);
770 } else if (const VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
771 return HasPadding(VTy
->getElementType(), TD
, false);
774 false : TD
.getTypeSizeInBits(Ty
) != TD
.getABITypeSizeInBits(Ty
);
777 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
778 /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
779 /// or 1 if safe after canonicalization has been performed.
781 int SROA::isSafeAllocaToScalarRepl(AllocationInst
*AI
) {
782 // Loop over the use list of the alloca. We can only transform it if all of
783 // the users are safe to transform.
786 for (Value::use_iterator I
= AI
->use_begin(), E
= AI
->use_end();
788 isSafeUseOfAllocation(cast
<Instruction
>(*I
), AI
, Info
);
790 DOUT
<< "Cannot transform: " << *AI
<< " due to user: " << **I
;
795 // Okay, we know all the users are promotable. If the aggregate is a memcpy
796 // source and destination, we have to be careful. In particular, the memcpy
797 // could be moving around elements that live in structure padding of the LLVM
798 // types, but may actually be used. In these cases, we refuse to promote the
800 if (Info
.isMemCpySrc
&& Info
.isMemCpyDst
&&
801 HasPadding(AI
->getType()->getElementType(), getAnalysis
<TargetData
>()))
804 // If we require cleanup, return 1, otherwise return 3.
805 return Info
.needsCanon
? 1 : 3;
808 /// CanonicalizeAllocaUsers - If SROA reported that it can promote the specified
809 /// allocation, but only if cleaned up, perform the cleanups required.
810 void SROA::CanonicalizeAllocaUsers(AllocationInst
*AI
) {
811 // At this point, we know that the end result will be SROA'd and promoted, so
812 // we can insert ugly code if required so long as sroa+mem2reg will clean it
814 for (Value::use_iterator UI
= AI
->use_begin(), E
= AI
->use_end();
816 GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(*UI
++);
818 gep_type_iterator I
= gep_type_begin(GEPI
);
821 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(*I
)) {
822 uint64_t NumElements
= AT
->getNumElements();
824 if (!isa
<ConstantInt
>(I
.getOperand())) {
825 if (NumElements
== 1) {
826 GEPI
->setOperand(2, Constant::getNullValue(Type::Int32Ty
));
828 assert(NumElements
== 2 && "Unhandled case!");
829 // All users of the GEP must be loads. At each use of the GEP, insert
830 // two loads of the appropriate indexed GEP and select between them.
831 Value
*IsOne
= new ICmpInst(ICmpInst::ICMP_NE
, I
.getOperand(),
832 Constant::getNullValue(I
.getOperand()->getType()),
834 // Insert the new GEP instructions, which are properly indexed.
835 SmallVector
<Value
*, 8> Indices(GEPI
->op_begin()+1, GEPI
->op_end());
836 Indices
[1] = Constant::getNullValue(Type::Int32Ty
);
837 Value
*ZeroIdx
= new GetElementPtrInst(GEPI
->getOperand(0),
840 GEPI
->getName()+".0", GEPI
);
841 Indices
[1] = ConstantInt::get(Type::Int32Ty
, 1);
842 Value
*OneIdx
= new GetElementPtrInst(GEPI
->getOperand(0),
845 GEPI
->getName()+".1", GEPI
);
846 // Replace all loads of the variable index GEP with loads from both
847 // indexes and a select.
848 while (!GEPI
->use_empty()) {
849 LoadInst
*LI
= cast
<LoadInst
>(GEPI
->use_back());
850 Value
*Zero
= new LoadInst(ZeroIdx
, LI
->getName()+".0", LI
);
851 Value
*One
= new LoadInst(OneIdx
, LI
->getName()+".1", LI
);
852 Value
*R
= new SelectInst(IsOne
, One
, Zero
, LI
->getName(), LI
);
853 LI
->replaceAllUsesWith(R
);
854 LI
->eraseFromParent();
856 GEPI
->eraseFromParent();
863 /// MergeInType - Add the 'In' type to the accumulated type so far. If the
864 /// types are incompatible, return true, otherwise update Accum and return
867 /// There are three cases we handle here:
868 /// 1) An effectively-integer union, where the pieces are stored into as
869 /// smaller integers (common with byte swap and other idioms).
870 /// 2) A union of vector types of the same size and potentially its elements.
871 /// Here we turn element accesses into insert/extract element operations.
872 /// 3) A union of scalar types, such as int/float or int/pointer. Here we
873 /// merge together into integers, allowing the xform to work with #1 as
875 static bool MergeInType(const Type
*In
, const Type
*&Accum
,
876 const TargetData
&TD
) {
877 // If this is our first type, just use it.
878 const VectorType
*PTy
;
879 if (Accum
== Type::VoidTy
|| In
== Accum
) {
881 } else if (In
== Type::VoidTy
) {
883 } else if (In
->isInteger() && Accum
->isInteger()) { // integer union.
884 // Otherwise pick whichever type is larger.
885 if (cast
<IntegerType
>(In
)->getBitWidth() >
886 cast
<IntegerType
>(Accum
)->getBitWidth())
888 } else if (isa
<PointerType
>(In
) && isa
<PointerType
>(Accum
)) {
889 // Pointer unions just stay as one of the pointers.
890 } else if (isa
<VectorType
>(In
) || isa
<VectorType
>(Accum
)) {
891 if ((PTy
= dyn_cast
<VectorType
>(Accum
)) &&
892 PTy
->getElementType() == In
) {
893 // Accum is a vector, and we are accessing an element: ok.
894 } else if ((PTy
= dyn_cast
<VectorType
>(In
)) &&
895 PTy
->getElementType() == Accum
) {
896 // In is a vector, and accum is an element: ok, remember In.
898 } else if ((PTy
= dyn_cast
<VectorType
>(In
)) && isa
<VectorType
>(Accum
) &&
899 PTy
->getBitWidth() == cast
<VectorType
>(Accum
)->getBitWidth()) {
900 // Two vectors of the same size: keep Accum.
902 // Cannot insert an short into a <4 x int> or handle
903 // <2 x int> -> <4 x int>
907 // Pointer/FP/Integer unions merge together as integers.
908 switch (Accum
->getTypeID()) {
909 case Type::PointerTyID
: Accum
= TD
.getIntPtrType(); break;
910 case Type::FloatTyID
: Accum
= Type::Int32Ty
; break;
911 case Type::DoubleTyID
: Accum
= Type::Int64Ty
; break;
912 case Type::X86_FP80TyID
: return true;
913 case Type::FP128TyID
: return true;
914 case Type::PPC_FP128TyID
: return true;
916 assert(Accum
->isInteger() && "Unknown FP type!");
920 switch (In
->getTypeID()) {
921 case Type::PointerTyID
: In
= TD
.getIntPtrType(); break;
922 case Type::FloatTyID
: In
= Type::Int32Ty
; break;
923 case Type::DoubleTyID
: In
= Type::Int64Ty
; break;
924 case Type::X86_FP80TyID
: return true;
925 case Type::FP128TyID
: return true;
926 case Type::PPC_FP128TyID
: return true;
928 assert(In
->isInteger() && "Unknown FP type!");
931 return MergeInType(In
, Accum
, TD
);
936 /// getUIntAtLeastAsBigAs - Return an unsigned integer type that is at least
937 /// as big as the specified type. If there is no suitable type, this returns
939 const Type
*getUIntAtLeastAsBigAs(unsigned NumBits
) {
940 if (NumBits
> 64) return 0;
941 if (NumBits
> 32) return Type::Int64Ty
;
942 if (NumBits
> 16) return Type::Int32Ty
;
943 if (NumBits
> 8) return Type::Int16Ty
;
947 /// CanConvertToScalar - V is a pointer. If we can convert the pointee to a
948 /// single scalar integer type, return that type. Further, if the use is not
949 /// a completely trivial use that mem2reg could promote, set IsNotTrivial. If
950 /// there are no uses of this pointer, return Type::VoidTy to differentiate from
953 const Type
*SROA::CanConvertToScalar(Value
*V
, bool &IsNotTrivial
) {
954 const Type
*UsedType
= Type::VoidTy
; // No uses, no forced type.
955 const TargetData
&TD
= getAnalysis
<TargetData
>();
956 const PointerType
*PTy
= cast
<PointerType
>(V
->getType());
958 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!=E
; ++UI
) {
959 Instruction
*User
= cast
<Instruction
>(*UI
);
961 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
962 if (MergeInType(LI
->getType(), UsedType
, TD
))
965 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
)) {
966 // Storing the pointer, not into the value?
967 if (SI
->getOperand(0) == V
) return 0;
969 // NOTE: We could handle storing of FP imms into integers here!
971 if (MergeInType(SI
->getOperand(0)->getType(), UsedType
, TD
))
973 } else if (BitCastInst
*CI
= dyn_cast
<BitCastInst
>(User
)) {
975 const Type
*SubTy
= CanConvertToScalar(CI
, IsNotTrivial
);
976 if (!SubTy
|| MergeInType(SubTy
, UsedType
, TD
)) return 0;
977 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(User
)) {
978 // Check to see if this is stepping over an element: GEP Ptr, int C
979 if (GEP
->getNumOperands() == 2 && isa
<ConstantInt
>(GEP
->getOperand(1))) {
980 unsigned Idx
= cast
<ConstantInt
>(GEP
->getOperand(1))->getZExtValue();
981 unsigned ElSize
= TD
.getABITypeSize(PTy
->getElementType());
982 unsigned BitOffset
= Idx
*ElSize
*8;
983 if (BitOffset
> 64 || !isPowerOf2_32(ElSize
)) return 0;
986 const Type
*SubElt
= CanConvertToScalar(GEP
, IsNotTrivial
);
987 if (SubElt
== 0) return 0;
988 if (SubElt
!= Type::VoidTy
&& SubElt
->isInteger()) {
990 getUIntAtLeastAsBigAs(TD
.getABITypeSizeInBits(SubElt
)+BitOffset
);
991 if (NewTy
== 0 || MergeInType(NewTy
, UsedType
, TD
)) return 0;
994 } else if (GEP
->getNumOperands() == 3 &&
995 isa
<ConstantInt
>(GEP
->getOperand(1)) &&
996 isa
<ConstantInt
>(GEP
->getOperand(2)) &&
997 cast
<ConstantInt
>(GEP
->getOperand(1))->isZero()) {
998 // We are stepping into an element, e.g. a structure or an array:
999 // GEP Ptr, int 0, uint C
1000 const Type
*AggTy
= PTy
->getElementType();
1001 unsigned Idx
= cast
<ConstantInt
>(GEP
->getOperand(2))->getZExtValue();
1003 if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(AggTy
)) {
1004 if (Idx
>= ATy
->getNumElements()) return 0; // Out of range.
1005 } else if (const VectorType
*VectorTy
= dyn_cast
<VectorType
>(AggTy
)) {
1006 // Getting an element of the vector.
1007 if (Idx
>= VectorTy
->getNumElements()) return 0; // Out of range.
1009 // Merge in the vector type.
1010 if (MergeInType(VectorTy
, UsedType
, TD
)) return 0;
1012 const Type
*SubTy
= CanConvertToScalar(GEP
, IsNotTrivial
);
1013 if (SubTy
== 0) return 0;
1015 if (SubTy
!= Type::VoidTy
&& MergeInType(SubTy
, UsedType
, TD
))
1018 // We'll need to change this to an insert/extract element operation.
1019 IsNotTrivial
= true;
1020 continue; // Everything looks ok
1022 } else if (isa
<StructType
>(AggTy
)) {
1023 // Structs are always ok.
1027 const Type
*NTy
= getUIntAtLeastAsBigAs(TD
.getABITypeSizeInBits(AggTy
));
1028 if (NTy
== 0 || MergeInType(NTy
, UsedType
, TD
)) return 0;
1029 const Type
*SubTy
= CanConvertToScalar(GEP
, IsNotTrivial
);
1030 if (SubTy
== 0) return 0;
1031 if (SubTy
!= Type::VoidTy
&& MergeInType(SubTy
, UsedType
, TD
))
1033 continue; // Everything looks ok
1037 // Cannot handle this!
1045 /// ConvertToScalar - The specified alloca passes the CanConvertToScalar
1046 /// predicate and is non-trivial. Convert it to something that can be trivially
1047 /// promoted into a register by mem2reg.
1048 void SROA::ConvertToScalar(AllocationInst
*AI
, const Type
*ActualTy
) {
1049 DOUT
<< "CONVERT TO SCALAR: " << *AI
<< " TYPE = "
1050 << *ActualTy
<< "\n";
1053 BasicBlock
*EntryBlock
= AI
->getParent();
1054 assert(EntryBlock
== &EntryBlock
->getParent()->getEntryBlock() &&
1055 "Not in the entry block!");
1056 EntryBlock
->getInstList().remove(AI
); // Take the alloca out of the program.
1058 // Create and insert the alloca.
1059 AllocaInst
*NewAI
= new AllocaInst(ActualTy
, 0, AI
->getName(),
1060 EntryBlock
->begin());
1061 ConvertUsesToScalar(AI
, NewAI
, 0);
1066 /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
1067 /// directly. This happens when we are converting an "integer union" to a
1068 /// single integer scalar, or when we are converting a "vector union" to a
1069 /// vector with insert/extractelement instructions.
1071 /// Offset is an offset from the original alloca, in bits that need to be
1072 /// shifted to the right. By the end of this, there should be no uses of Ptr.
1073 void SROA::ConvertUsesToScalar(Value
*Ptr
, AllocaInst
*NewAI
, unsigned Offset
) {
1074 const TargetData
&TD
= getAnalysis
<TargetData
>();
1075 while (!Ptr
->use_empty()) {
1076 Instruction
*User
= cast
<Instruction
>(Ptr
->use_back());
1078 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
1079 // The load is a bit extract from NewAI shifted right by Offset bits.
1080 Value
*NV
= new LoadInst(NewAI
, LI
->getName(), LI
);
1081 if (NV
->getType() == LI
->getType()) {
1082 // We win, no conversion needed.
1083 } else if (const VectorType
*PTy
= dyn_cast
<VectorType
>(NV
->getType())) {
1084 // If the result alloca is a vector type, this is either an element
1085 // access or a bitcast to another vector type.
1086 if (isa
<VectorType
>(LI
->getType())) {
1087 NV
= new BitCastInst(NV
, LI
->getType(), LI
->getName(), LI
);
1089 // Must be an element access.
1090 unsigned Elt
= Offset
/TD
.getABITypeSizeInBits(PTy
->getElementType());
1091 NV
= new ExtractElementInst(
1092 NV
, ConstantInt::get(Type::Int32Ty
, Elt
), "tmp", LI
);
1094 } else if (isa
<PointerType
>(NV
->getType())) {
1095 assert(isa
<PointerType
>(LI
->getType()));
1096 // Must be ptr->ptr cast. Anything else would result in NV being
1098 NV
= new BitCastInst(NV
, LI
->getType(), LI
->getName(), LI
);
1100 const IntegerType
*NTy
= cast
<IntegerType
>(NV
->getType());
1102 // If this is a big-endian system and the load is narrower than the
1103 // full alloca type, we need to do a shift to get the right bits.
1105 if (TD
.isBigEndian()) {
1106 // On big-endian machines, the lowest bit is stored at the bit offset
1107 // from the pointer given by getTypeStoreSizeInBits. This matters for
1108 // integers with a bitwidth that is not a multiple of 8.
1109 ShAmt
= TD
.getTypeStoreSizeInBits(NTy
) -
1110 TD
.getTypeStoreSizeInBits(LI
->getType()) - Offset
;
1115 // Note: we support negative bitwidths (with shl) which are not defined.
1116 // We do this to support (f.e.) loads off the end of a structure where
1117 // only some bits are used.
1118 if (ShAmt
> 0 && (unsigned)ShAmt
< NTy
->getBitWidth())
1119 NV
= BinaryOperator::createLShr(NV
,
1120 ConstantInt::get(NV
->getType(),ShAmt
),
1122 else if (ShAmt
< 0 && (unsigned)-ShAmt
< NTy
->getBitWidth())
1123 NV
= BinaryOperator::createShl(NV
,
1124 ConstantInt::get(NV
->getType(),-ShAmt
),
1127 // Finally, unconditionally truncate the integer to the right width.
1128 unsigned LIBitWidth
= TD
.getTypeSizeInBits(LI
->getType());
1129 if (LIBitWidth
< NTy
->getBitWidth())
1130 NV
= new TruncInst(NV
, IntegerType::get(LIBitWidth
),
1133 // If the result is an integer, this is a trunc or bitcast.
1134 if (isa
<IntegerType
>(LI
->getType())) {
1135 assert(NV
->getType() == LI
->getType() && "Truncate wasn't enough?");
1136 } else if (LI
->getType()->isFloatingPoint()) {
1137 // Just do a bitcast, we know the sizes match up.
1138 NV
= new BitCastInst(NV
, LI
->getType(), LI
->getName(), LI
);
1140 // Otherwise must be a pointer.
1141 NV
= new IntToPtrInst(NV
, LI
->getType(), LI
->getName(), LI
);
1144 LI
->replaceAllUsesWith(NV
);
1145 LI
->eraseFromParent();
1146 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
)) {
1147 assert(SI
->getOperand(0) != Ptr
&& "Consistency error!");
1149 // Convert the stored type to the actual type, shift it left to insert
1150 // then 'or' into place.
1151 Value
*SV
= SI
->getOperand(0);
1152 const Type
*AllocaType
= NewAI
->getType()->getElementType();
1153 if (SV
->getType() == AllocaType
) {
1155 } else if (const VectorType
*PTy
= dyn_cast
<VectorType
>(AllocaType
)) {
1156 Value
*Old
= new LoadInst(NewAI
, NewAI
->getName()+".in", SI
);
1158 // If the result alloca is a vector type, this is either an element
1159 // access or a bitcast to another vector type.
1160 if (isa
<VectorType
>(SV
->getType())) {
1161 SV
= new BitCastInst(SV
, AllocaType
, SV
->getName(), SI
);
1163 // Must be an element insertion.
1164 unsigned Elt
= Offset
/TD
.getABITypeSizeInBits(PTy
->getElementType());
1165 SV
= new InsertElementInst(Old
, SV
,
1166 ConstantInt::get(Type::Int32Ty
, Elt
),
1169 } else if (isa
<PointerType
>(AllocaType
)) {
1170 // If the alloca type is a pointer, then all the elements must be
1172 if (SV
->getType() != AllocaType
)
1173 SV
= new BitCastInst(SV
, AllocaType
, SV
->getName(), SI
);
1175 Value
*Old
= new LoadInst(NewAI
, NewAI
->getName()+".in", SI
);
1177 // If SV is a float, convert it to the appropriate integer type.
1178 // If it is a pointer, do the same, and also handle ptr->ptr casts
1180 unsigned SrcWidth
= TD
.getTypeSizeInBits(SV
->getType());
1181 unsigned DestWidth
= TD
.getTypeSizeInBits(AllocaType
);
1182 unsigned SrcStoreWidth
= TD
.getTypeStoreSizeInBits(SV
->getType());
1183 unsigned DestStoreWidth
= TD
.getTypeStoreSizeInBits(AllocaType
);
1184 if (SV
->getType()->isFloatingPoint())
1185 SV
= new BitCastInst(SV
, IntegerType::get(SrcWidth
),
1187 else if (isa
<PointerType
>(SV
->getType()))
1188 SV
= new PtrToIntInst(SV
, TD
.getIntPtrType(), SV
->getName(), SI
);
1190 // Always zero extend the value if needed.
1191 if (SV
->getType() != AllocaType
)
1192 SV
= new ZExtInst(SV
, AllocaType
, SV
->getName(), SI
);
1194 // If this is a big-endian system and the store is narrower than the
1195 // full alloca type, we need to do a shift to get the right bits.
1197 if (TD
.isBigEndian()) {
1198 // On big-endian machines, the lowest bit is stored at the bit offset
1199 // from the pointer given by getTypeStoreSizeInBits. This matters for
1200 // integers with a bitwidth that is not a multiple of 8.
1201 ShAmt
= DestStoreWidth
- SrcStoreWidth
- Offset
;
1206 // Note: we support negative bitwidths (with shr) which are not defined.
1207 // We do this to support (f.e.) stores off the end of a structure where
1208 // only some bits in the structure are set.
1209 APInt
Mask(APInt::getLowBitsSet(DestWidth
, SrcWidth
));
1210 if (ShAmt
> 0 && (unsigned)ShAmt
< DestWidth
) {
1211 SV
= BinaryOperator::createShl(SV
,
1212 ConstantInt::get(SV
->getType(), ShAmt
),
1215 } else if (ShAmt
< 0 && (unsigned)-ShAmt
< DestWidth
) {
1216 SV
= BinaryOperator::createLShr(SV
,
1217 ConstantInt::get(SV
->getType(),-ShAmt
),
1219 Mask
= Mask
.lshr(ShAmt
);
1222 // Mask out the bits we are about to insert from the old value, and or
1224 if (SrcWidth
!= DestWidth
) {
1225 assert(DestWidth
> SrcWidth
);
1226 Old
= BinaryOperator::createAnd(Old
, ConstantInt::get(~Mask
),
1227 Old
->getName()+".mask", SI
);
1228 SV
= BinaryOperator::createOr(Old
, SV
, SV
->getName()+".ins", SI
);
1231 new StoreInst(SV
, NewAI
, SI
);
1232 SI
->eraseFromParent();
1234 } else if (BitCastInst
*CI
= dyn_cast
<BitCastInst
>(User
)) {
1235 ConvertUsesToScalar(CI
, NewAI
, Offset
);
1236 CI
->eraseFromParent();
1237 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(User
)) {
1238 const PointerType
*AggPtrTy
=
1239 cast
<PointerType
>(GEP
->getOperand(0)->getType());
1240 const TargetData
&TD
= getAnalysis
<TargetData
>();
1241 unsigned AggSizeInBits
=
1242 TD
.getABITypeSizeInBits(AggPtrTy
->getElementType());
1244 // Check to see if this is stepping over an element: GEP Ptr, int C
1245 unsigned NewOffset
= Offset
;
1246 if (GEP
->getNumOperands() == 2) {
1247 unsigned Idx
= cast
<ConstantInt
>(GEP
->getOperand(1))->getZExtValue();
1248 unsigned BitOffset
= Idx
*AggSizeInBits
;
1250 NewOffset
+= BitOffset
;
1251 } else if (GEP
->getNumOperands() == 3) {
1252 // We know that operand #2 is zero.
1253 unsigned Idx
= cast
<ConstantInt
>(GEP
->getOperand(2))->getZExtValue();
1254 const Type
*AggTy
= AggPtrTy
->getElementType();
1255 if (const SequentialType
*SeqTy
= dyn_cast
<SequentialType
>(AggTy
)) {
1256 unsigned ElSizeBits
=
1257 TD
.getABITypeSizeInBits(SeqTy
->getElementType());
1259 NewOffset
+= ElSizeBits
*Idx
;
1260 } else if (const StructType
*STy
= dyn_cast
<StructType
>(AggTy
)) {
1261 unsigned EltBitOffset
=
1262 TD
.getStructLayout(STy
)->getElementOffsetInBits(Idx
);
1264 NewOffset
+= EltBitOffset
;
1266 assert(0 && "Unsupported operation!");
1270 assert(0 && "Unsupported operation!");
1273 ConvertUsesToScalar(GEP
, NewAI
, NewOffset
);
1274 GEP
->eraseFromParent();
1276 assert(0 && "Unsupported operation!");
1283 /// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
1284 /// some part of a constant global variable. This intentionally only accepts
1285 /// constant expressions because we don't can't rewrite arbitrary instructions.
1286 static bool PointsToConstantGlobal(Value
*V
) {
1287 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(V
))
1288 return GV
->isConstant();
1289 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(V
))
1290 if (CE
->getOpcode() == Instruction::BitCast
||
1291 CE
->getOpcode() == Instruction::GetElementPtr
)
1292 return PointsToConstantGlobal(CE
->getOperand(0));
1296 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
1297 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
1298 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
1299 /// track of whether it moves the pointer (with isOffset) but otherwise traverse
1300 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
1301 /// the alloca, and if the source pointer is a pointer to a constant global, we
1302 /// can optimize this.
1303 static bool isOnlyCopiedFromConstantGlobal(Value
*V
, Instruction
*&TheCopy
,
1305 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!=E
; ++UI
) {
1306 if (isa
<LoadInst
>(*UI
)) {
1307 // Ignore loads, they are always ok.
1310 if (BitCastInst
*BCI
= dyn_cast
<BitCastInst
>(*UI
)) {
1311 // If uses of the bitcast are ok, we are ok.
1312 if (!isOnlyCopiedFromConstantGlobal(BCI
, TheCopy
, isOffset
))
1316 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(*UI
)) {
1317 // If the GEP has all zero indices, it doesn't offset the pointer. If it
1318 // doesn't, it does.
1319 if (!isOnlyCopiedFromConstantGlobal(GEP
, TheCopy
,
1320 isOffset
|| !GEP
->hasAllZeroIndices()))
1325 // If this is isn't our memcpy/memmove, reject it as something we can't
1327 if (!isa
<MemCpyInst
>(*UI
) && !isa
<MemMoveInst
>(*UI
))
1330 // If we already have seen a copy, reject the second one.
1331 if (TheCopy
) return false;
1333 // If the pointer has been offset from the start of the alloca, we can't
1334 // safely handle this.
1335 if (isOffset
) return false;
1337 // If the memintrinsic isn't using the alloca as the dest, reject it.
1338 if (UI
.getOperandNo() != 1) return false;
1340 MemIntrinsic
*MI
= cast
<MemIntrinsic
>(*UI
);
1342 // If the source of the memcpy/move is not a constant global, reject it.
1343 if (!PointsToConstantGlobal(MI
->getOperand(2)))
1346 // Otherwise, the transform is safe. Remember the copy instruction.
1352 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
1353 /// modified by a copy from a constant global. If we can prove this, we can
1354 /// replace any uses of the alloca with uses of the global directly.
1355 Instruction
*SROA::isOnlyCopiedFromConstantGlobal(AllocationInst
*AI
) {
1356 Instruction
*TheCopy
= 0;
1357 if (::isOnlyCopiedFromConstantGlobal(AI
, TheCopy
, false))