1 //===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This transformation implements the well known scalar replacement of
11 // aggregates transformation. This xform breaks up alloca instructions of
12 // aggregate type (structure or array) into individual alloca instructions for
13 // each member (if possible). Then, if possible, it transforms the individual
14 // alloca instructions into nice clean scalar SSA form.
16 // This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17 // often interact, especially for C++ programs. As such, iterating between
18 // SRoA, then Mem2Reg until we run out of things to promote works well.
20 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "scalarrepl"
23 #include "llvm/Transforms/Scalar.h"
24 #include "llvm/Constants.h"
25 #include "llvm/DerivedTypes.h"
26 #include "llvm/Function.h"
27 #include "llvm/GlobalVariable.h"
28 #include "llvm/Instructions.h"
29 #include "llvm/IntrinsicInst.h"
30 #include "llvm/LLVMContext.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Analysis/Dominators.h"
33 #include "llvm/Target/TargetData.h"
34 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
35 #include "llvm/Transforms/Utils/Local.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/GetElementPtrTypeIterator.h"
39 #include "llvm/Support/IRBuilder.h"
40 #include "llvm/Support/MathExtras.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/Statistic.h"
46 STATISTIC(NumReplaced
, "Number of allocas broken up");
47 STATISTIC(NumPromoted
, "Number of allocas promoted");
48 STATISTIC(NumConverted
, "Number of aggregates converted to scalar");
49 STATISTIC(NumGlobals
, "Number of allocas copied from constant global");
52 struct VISIBILITY_HIDDEN SROA
: public FunctionPass
{
53 static char ID
; // Pass identification, replacement for typeid
54 explicit SROA(signed T
= -1) : FunctionPass(&ID
) {
61 bool runOnFunction(Function
&F
);
63 bool performScalarRepl(Function
&F
);
64 bool performPromotion(Function
&F
);
66 // getAnalysisUsage - This pass does not require any passes, but we know it
67 // will not alter the CFG, so say so.
68 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
69 AU
.addRequired
<DominatorTree
>();
70 AU
.addRequired
<DominanceFrontier
>();
71 AU
.addRequired
<TargetData
>();
78 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
79 /// information about the uses. All these fields are initialized to false
80 /// and set to true when something is learned.
82 /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
85 /// needsCleanup - This is set to true if there is some use of the alloca
86 /// that requires cleanup.
87 bool needsCleanup
: 1;
89 /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
92 /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
96 : isUnsafe(false), needsCleanup(false),
97 isMemCpySrc(false), isMemCpyDst(false) {}
100 unsigned SRThreshold
;
102 void MarkUnsafe(AllocaInfo
&I
) { I
.isUnsafe
= true; }
104 int isSafeAllocaToScalarRepl(AllocationInst
*AI
);
106 void isSafeUseOfAllocation(Instruction
*User
, AllocationInst
*AI
,
108 void isSafeElementUse(Value
*Ptr
, bool isFirstElt
, AllocationInst
*AI
,
110 void isSafeMemIntrinsicOnAllocation(MemIntrinsic
*MI
, AllocationInst
*AI
,
111 unsigned OpNo
, AllocaInfo
&Info
);
112 void isSafeUseOfBitCastedAllocation(BitCastInst
*User
, AllocationInst
*AI
,
115 void DoScalarReplacement(AllocationInst
*AI
,
116 std::vector
<AllocationInst
*> &WorkList
);
117 void CleanupGEP(GetElementPtrInst
*GEP
);
118 void CleanupAllocaUsers(AllocationInst
*AI
);
119 AllocaInst
*AddNewAlloca(Function
&F
, const Type
*Ty
, AllocationInst
*Base
);
121 void RewriteBitCastUserOfAlloca(Instruction
*BCInst
, AllocationInst
*AI
,
122 SmallVector
<AllocaInst
*, 32> &NewElts
);
124 void RewriteMemIntrinUserOfAlloca(MemIntrinsic
*MI
, Instruction
*BCInst
,
126 SmallVector
<AllocaInst
*, 32> &NewElts
);
127 void RewriteStoreUserOfWholeAlloca(StoreInst
*SI
, AllocationInst
*AI
,
128 SmallVector
<AllocaInst
*, 32> &NewElts
);
129 void RewriteLoadUserOfWholeAlloca(LoadInst
*LI
, AllocationInst
*AI
,
130 SmallVector
<AllocaInst
*, 32> &NewElts
);
132 bool CanConvertToScalar(Value
*V
, bool &IsNotTrivial
, const Type
*&VecTy
,
133 bool &SawVec
, uint64_t Offset
, unsigned AllocaSize
);
134 void ConvertUsesToScalar(Value
*Ptr
, AllocaInst
*NewAI
, uint64_t Offset
);
135 Value
*ConvertScalar_ExtractValue(Value
*NV
, const Type
*ToType
,
136 uint64_t Offset
, IRBuilder
<> &Builder
);
137 Value
*ConvertScalar_InsertValue(Value
*StoredVal
, Value
*ExistingVal
,
138 uint64_t Offset
, IRBuilder
<> &Builder
);
139 static Instruction
*isOnlyCopiedFromConstantGlobal(AllocationInst
*AI
);
144 static RegisterPass
<SROA
> X("scalarrepl", "Scalar Replacement of Aggregates");
146 // Public interface to the ScalarReplAggregates pass
147 FunctionPass
*llvm::createScalarReplAggregatesPass(signed int Threshold
) {
148 return new SROA(Threshold
);
152 bool SROA::runOnFunction(Function
&F
) {
153 TD
= &getAnalysis
<TargetData
>();
155 bool Changed
= performPromotion(F
);
157 bool LocalChange
= performScalarRepl(F
);
158 if (!LocalChange
) break; // No need to repromote if no scalarrepl
160 LocalChange
= performPromotion(F
);
161 if (!LocalChange
) break; // No need to re-scalarrepl if no promotion
168 bool SROA::performPromotion(Function
&F
) {
169 std::vector
<AllocaInst
*> Allocas
;
170 DominatorTree
&DT
= getAnalysis
<DominatorTree
>();
171 DominanceFrontier
&DF
= getAnalysis
<DominanceFrontier
>();
173 BasicBlock
&BB
= F
.getEntryBlock(); // Get the entry node for the function
175 bool Changed
= false;
180 // Find allocas that are safe to promote, by looking at all instructions in
182 for (BasicBlock::iterator I
= BB
.begin(), E
= --BB
.end(); I
!= E
; ++I
)
183 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(I
)) // Is it an alloca?
184 if (isAllocaPromotable(AI
))
185 Allocas
.push_back(AI
);
187 if (Allocas
.empty()) break;
189 PromoteMemToReg(Allocas
, DT
, DF
, F
.getContext());
190 NumPromoted
+= Allocas
.size();
197 /// getNumSAElements - Return the number of elements in the specific struct or
199 static uint64_t getNumSAElements(const Type
*T
) {
200 if (const StructType
*ST
= dyn_cast
<StructType
>(T
))
201 return ST
->getNumElements();
202 return cast
<ArrayType
>(T
)->getNumElements();
205 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
206 // which runs on all of the malloc/alloca instructions in the function, removing
207 // them if they are only used by getelementptr instructions.
209 bool SROA::performScalarRepl(Function
&F
) {
210 std::vector
<AllocationInst
*> WorkList
;
212 // Scan the entry basic block, adding any alloca's and mallocs to the worklist
213 BasicBlock
&BB
= F
.getEntryBlock();
214 for (BasicBlock::iterator I
= BB
.begin(), E
= BB
.end(); I
!= E
; ++I
)
215 if (AllocationInst
*A
= dyn_cast
<AllocationInst
>(I
))
216 WorkList
.push_back(A
);
218 // Process the worklist
219 bool Changed
= false;
220 while (!WorkList
.empty()) {
221 AllocationInst
*AI
= WorkList
.back();
224 // Handle dead allocas trivially. These can be formed by SROA'ing arrays
225 // with unused elements.
226 if (AI
->use_empty()) {
227 AI
->eraseFromParent();
231 // If this alloca is impossible for us to promote, reject it early.
232 if (AI
->isArrayAllocation() || !AI
->getAllocatedType()->isSized())
235 // Check to see if this allocation is only modified by a memcpy/memmove from
236 // a constant global. If this is the case, we can change all users to use
237 // the constant global instead. This is commonly produced by the CFE by
238 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
239 // is only subsequently read.
240 if (Instruction
*TheCopy
= isOnlyCopiedFromConstantGlobal(AI
)) {
241 DOUT
<< "Found alloca equal to global: " << *AI
;
242 DOUT
<< " memcpy = " << *TheCopy
;
243 Constant
*TheSrc
= cast
<Constant
>(TheCopy
->getOperand(2));
244 AI
->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc
, AI
->getType()));
245 TheCopy
->eraseFromParent(); // Don't mutate the global.
246 AI
->eraseFromParent();
252 // Check to see if we can perform the core SROA transformation. We cannot
253 // transform the allocation instruction if it is an array allocation
254 // (allocations OF arrays are ok though), and an allocation of a scalar
255 // value cannot be decomposed at all.
256 uint64_t AllocaSize
= TD
->getTypeAllocSize(AI
->getAllocatedType());
258 // Do not promote any struct whose size is too big.
259 if (AllocaSize
> SRThreshold
) continue;
261 if ((isa
<StructType
>(AI
->getAllocatedType()) ||
262 isa
<ArrayType
>(AI
->getAllocatedType())) &&
263 // Do not promote any struct into more than "32" separate vars.
264 getNumSAElements(AI
->getAllocatedType()) <= SRThreshold
/4) {
265 // Check that all of the users of the allocation are capable of being
267 switch (isSafeAllocaToScalarRepl(AI
)) {
268 default: llvm_unreachable("Unexpected value!");
269 case 0: // Not safe to scalar replace.
271 case 1: // Safe, but requires cleanup/canonicalizations first
272 CleanupAllocaUsers(AI
);
274 case 3: // Safe to scalar replace.
275 DoScalarReplacement(AI
, WorkList
);
281 // If we can turn this aggregate value (potentially with casts) into a
282 // simple scalar value that can be mem2reg'd into a register value.
283 // IsNotTrivial tracks whether this is something that mem2reg could have
284 // promoted itself. If so, we don't want to transform it needlessly. Note
285 // that we can't just check based on the type: the alloca may be of an i32
286 // but that has pointer arithmetic to set byte 3 of it or something.
287 bool IsNotTrivial
= false;
288 const Type
*VectorTy
= 0;
289 bool HadAVector
= false;
290 if (CanConvertToScalar(AI
, IsNotTrivial
, VectorTy
, HadAVector
,
291 0, unsigned(AllocaSize
)) && IsNotTrivial
) {
293 // If we were able to find a vector type that can handle this with
294 // insert/extract elements, and if there was at least one use that had
295 // a vector type, promote this to a vector. We don't want to promote
296 // random stuff that doesn't use vectors (e.g. <9 x double>) because then
297 // we just get a lot of insert/extracts. If at least one vector is
298 // involved, then we probably really do have a union of vector/array.
299 if (VectorTy
&& isa
<VectorType
>(VectorTy
) && HadAVector
) {
300 DOUT
<< "CONVERT TO VECTOR: " << *AI
<< " TYPE = " << *VectorTy
<<"\n";
302 // Create and insert the vector alloca.
303 NewAI
= new AllocaInst(VectorTy
, 0, "", AI
->getParent()->begin());
304 ConvertUsesToScalar(AI
, NewAI
, 0);
306 DOUT
<< "CONVERT TO SCALAR INTEGER: " << *AI
<< "\n";
308 // Create and insert the integer alloca.
309 const Type
*NewTy
= IntegerType::get(AllocaSize
*8);
310 NewAI
= new AllocaInst(NewTy
, 0, "", AI
->getParent()->begin());
311 ConvertUsesToScalar(AI
, NewAI
, 0);
314 AI
->eraseFromParent();
320 // Otherwise, couldn't process this alloca.
326 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
327 /// predicate, do SROA now.
328 void SROA::DoScalarReplacement(AllocationInst
*AI
,
329 std::vector
<AllocationInst
*> &WorkList
) {
330 DOUT
<< "Found inst to SROA: " << *AI
;
331 SmallVector
<AllocaInst
*, 32> ElementAllocas
;
332 if (const StructType
*ST
= dyn_cast
<StructType
>(AI
->getAllocatedType())) {
333 ElementAllocas
.reserve(ST
->getNumContainedTypes());
334 for (unsigned i
= 0, e
= ST
->getNumContainedTypes(); i
!= e
; ++i
) {
335 AllocaInst
*NA
= new AllocaInst(ST
->getContainedType(i
), 0,
337 AI
->getName() + "." + Twine(i
), AI
);
338 ElementAllocas
.push_back(NA
);
339 WorkList
.push_back(NA
); // Add to worklist for recursive processing
342 const ArrayType
*AT
= cast
<ArrayType
>(AI
->getAllocatedType());
343 ElementAllocas
.reserve(AT
->getNumElements());
344 const Type
*ElTy
= AT
->getElementType();
345 for (unsigned i
= 0, e
= AT
->getNumElements(); i
!= e
; ++i
) {
346 AllocaInst
*NA
= new AllocaInst(ElTy
, 0, AI
->getAlignment(),
347 AI
->getName() + "." + Twine(i
), AI
);
348 ElementAllocas
.push_back(NA
);
349 WorkList
.push_back(NA
); // Add to worklist for recursive processing
353 // Now that we have created the alloca instructions that we want to use,
354 // expand the getelementptr instructions to use them.
356 while (!AI
->use_empty()) {
357 Instruction
*User
= cast
<Instruction
>(AI
->use_back());
358 if (BitCastInst
*BCInst
= dyn_cast
<BitCastInst
>(User
)) {
359 RewriteBitCastUserOfAlloca(BCInst
, AI
, ElementAllocas
);
360 BCInst
->eraseFromParent();
365 // %res = load { i32, i32 }* %alloc
367 // %load.0 = load i32* %alloc.0
368 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
369 // %load.1 = load i32* %alloc.1
370 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
371 // (Also works for arrays instead of structs)
372 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
373 Value
*Insert
= UndefValue::get(LI
->getType());
374 for (unsigned i
= 0, e
= ElementAllocas
.size(); i
!= e
; ++i
) {
375 Value
*Load
= new LoadInst(ElementAllocas
[i
], "load", LI
);
376 Insert
= InsertValueInst::Create(Insert
, Load
, i
, "insert", LI
);
378 LI
->replaceAllUsesWith(Insert
);
379 LI
->eraseFromParent();
384 // store { i32, i32 } %val, { i32, i32 }* %alloc
386 // %val.0 = extractvalue { i32, i32 } %val, 0
387 // store i32 %val.0, i32* %alloc.0
388 // %val.1 = extractvalue { i32, i32 } %val, 1
389 // store i32 %val.1, i32* %alloc.1
390 // (Also works for arrays instead of structs)
391 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
)) {
392 Value
*Val
= SI
->getOperand(0);
393 for (unsigned i
= 0, e
= ElementAllocas
.size(); i
!= e
; ++i
) {
394 Value
*Extract
= ExtractValueInst::Create(Val
, i
, Val
->getName(), SI
);
395 new StoreInst(Extract
, ElementAllocas
[i
], SI
);
397 SI
->eraseFromParent();
401 GetElementPtrInst
*GEPI
= cast
<GetElementPtrInst
>(User
);
402 // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
404 (unsigned)cast
<ConstantInt
>(GEPI
->getOperand(2))->getZExtValue();
406 assert(Idx
< ElementAllocas
.size() && "Index out of range?");
407 AllocaInst
*AllocaToUse
= ElementAllocas
[Idx
];
410 if (GEPI
->getNumOperands() == 3) {
411 // Do not insert a new getelementptr instruction with zero indices, only
412 // to have it optimized out later.
413 RepValue
= AllocaToUse
;
415 // We are indexing deeply into the structure, so we still need a
416 // getelement ptr instruction to finish the indexing. This may be
417 // expanded itself once the worklist is rerun.
419 SmallVector
<Value
*, 8> NewArgs
;
420 NewArgs
.push_back(Constant::getNullValue(Type::Int32Ty
));
421 NewArgs
.append(GEPI
->op_begin()+3, GEPI
->op_end());
422 RepValue
= GetElementPtrInst::Create(AllocaToUse
, NewArgs
.begin(),
423 NewArgs
.end(), "", GEPI
);
424 RepValue
->takeName(GEPI
);
427 // If this GEP is to the start of the aggregate, check for memcpys.
428 if (Idx
== 0 && GEPI
->hasAllZeroIndices())
429 RewriteBitCastUserOfAlloca(GEPI
, AI
, ElementAllocas
);
431 // Move all of the users over to the new GEP.
432 GEPI
->replaceAllUsesWith(RepValue
);
433 // Delete the old GEP
434 GEPI
->eraseFromParent();
437 // Finally, delete the Alloca instruction
438 AI
->eraseFromParent();
443 /// isSafeElementUse - Check to see if this use is an allowed use for a
444 /// getelementptr instruction of an array aggregate allocation. isFirstElt
445 /// indicates whether Ptr is known to the start of the aggregate.
447 void SROA::isSafeElementUse(Value
*Ptr
, bool isFirstElt
, AllocationInst
*AI
,
449 for (Value::use_iterator I
= Ptr
->use_begin(), E
= Ptr
->use_end();
451 Instruction
*User
= cast
<Instruction
>(*I
);
452 switch (User
->getOpcode()) {
453 case Instruction::Load
: break;
454 case Instruction::Store
:
455 // Store is ok if storing INTO the pointer, not storing the pointer
456 if (User
->getOperand(0) == Ptr
) return MarkUnsafe(Info
);
458 case Instruction::GetElementPtr
: {
459 GetElementPtrInst
*GEP
= cast
<GetElementPtrInst
>(User
);
460 bool AreAllZeroIndices
= isFirstElt
;
461 if (GEP
->getNumOperands() > 1) {
462 if (!isa
<ConstantInt
>(GEP
->getOperand(1)) ||
463 !cast
<ConstantInt
>(GEP
->getOperand(1))->isZero())
464 // Using pointer arithmetic to navigate the array.
465 return MarkUnsafe(Info
);
467 if (AreAllZeroIndices
)
468 AreAllZeroIndices
= GEP
->hasAllZeroIndices();
470 isSafeElementUse(GEP
, AreAllZeroIndices
, AI
, Info
);
471 if (Info
.isUnsafe
) return;
474 case Instruction::BitCast
:
476 isSafeUseOfBitCastedAllocation(cast
<BitCastInst
>(User
), AI
, Info
);
477 if (Info
.isUnsafe
) return;
480 DOUT
<< " Transformation preventing inst: " << *User
;
481 return MarkUnsafe(Info
);
482 case Instruction::Call
:
483 if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(User
)) {
485 isSafeMemIntrinsicOnAllocation(MI
, AI
, I
.getOperandNo(), Info
);
486 if (Info
.isUnsafe
) return;
490 DOUT
<< " Transformation preventing inst: " << *User
;
491 return MarkUnsafe(Info
);
493 DOUT
<< " Transformation preventing inst: " << *User
;
494 return MarkUnsafe(Info
);
497 return; // All users look ok :)
500 /// AllUsersAreLoads - Return true if all users of this value are loads.
501 static bool AllUsersAreLoads(Value
*Ptr
) {
502 for (Value::use_iterator I
= Ptr
->use_begin(), E
= Ptr
->use_end();
504 if (cast
<Instruction
>(*I
)->getOpcode() != Instruction::Load
)
509 /// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
510 /// aggregate allocation.
512 void SROA::isSafeUseOfAllocation(Instruction
*User
, AllocationInst
*AI
,
514 if (BitCastInst
*C
= dyn_cast
<BitCastInst
>(User
))
515 return isSafeUseOfBitCastedAllocation(C
, AI
, Info
);
517 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
))
518 if (!LI
->isVolatile())
519 return;// Loads (returning a first class aggregrate) are always rewritable
521 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
))
522 if (!SI
->isVolatile() && SI
->getOperand(0) != AI
)
523 return;// Store is ok if storing INTO the pointer, not storing the pointer
525 GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(User
);
527 return MarkUnsafe(Info
);
529 gep_type_iterator I
= gep_type_begin(GEPI
), E
= gep_type_end(GEPI
);
531 // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
533 I
.getOperand() != Constant::getNullValue(I
.getOperand()->getType())) {
534 return MarkUnsafe(Info
);
538 if (I
== E
) return MarkUnsafe(Info
); // ran out of GEP indices??
540 bool IsAllZeroIndices
= true;
542 // If the first index is a non-constant index into an array, see if we can
543 // handle it as a special case.
544 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(*I
)) {
545 if (!isa
<ConstantInt
>(I
.getOperand())) {
546 IsAllZeroIndices
= 0;
547 uint64_t NumElements
= AT
->getNumElements();
549 // If this is an array index and the index is not constant, we cannot
550 // promote... that is unless the array has exactly one or two elements in
551 // it, in which case we CAN promote it, but we have to canonicalize this
552 // out if this is the only problem.
553 if ((NumElements
== 1 || NumElements
== 2) &&
554 AllUsersAreLoads(GEPI
)) {
555 Info
.needsCleanup
= true;
556 return; // Canonicalization required!
558 return MarkUnsafe(Info
);
562 // Walk through the GEP type indices, checking the types that this indexes
564 for (; I
!= E
; ++I
) {
565 // Ignore struct elements, no extra checking needed for these.
566 if (isa
<StructType
>(*I
))
569 ConstantInt
*IdxVal
= dyn_cast
<ConstantInt
>(I
.getOperand());
570 if (!IdxVal
) return MarkUnsafe(Info
);
572 // Are all indices still zero?
573 IsAllZeroIndices
&= IdxVal
->isZero();
575 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(*I
)) {
576 // This GEP indexes an array. Verify that this is an in-range constant
577 // integer. Specifically, consider A[0][i]. We cannot know that the user
578 // isn't doing invalid things like allowing i to index an out-of-range
579 // subscript that accesses A[1]. Because of this, we have to reject SROA
580 // of any accesses into structs where any of the components are variables.
581 if (IdxVal
->getZExtValue() >= AT
->getNumElements())
582 return MarkUnsafe(Info
);
583 } else if (const VectorType
*VT
= dyn_cast
<VectorType
>(*I
)) {
584 if (IdxVal
->getZExtValue() >= VT
->getNumElements())
585 return MarkUnsafe(Info
);
589 // If there are any non-simple uses of this getelementptr, make sure to reject
591 return isSafeElementUse(GEPI
, IsAllZeroIndices
, AI
, Info
);
594 /// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
595 /// intrinsic can be promoted by SROA. At this point, we know that the operand
596 /// of the memintrinsic is a pointer to the beginning of the allocation.
597 void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic
*MI
, AllocationInst
*AI
,
598 unsigned OpNo
, AllocaInfo
&Info
) {
599 // If not constant length, give up.
600 ConstantInt
*Length
= dyn_cast
<ConstantInt
>(MI
->getLength());
601 if (!Length
) return MarkUnsafe(Info
);
603 // If not the whole aggregate, give up.
604 if (Length
->getZExtValue() !=
605 TD
->getTypeAllocSize(AI
->getType()->getElementType()))
606 return MarkUnsafe(Info
);
608 // We only know about memcpy/memset/memmove.
609 if (!isa
<MemIntrinsic
>(MI
))
610 return MarkUnsafe(Info
);
612 // Otherwise, we can transform it. Determine whether this is a memcpy/set
613 // into or out of the aggregate.
615 Info
.isMemCpyDst
= true;
618 Info
.isMemCpySrc
= true;
622 /// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
624 void SROA::isSafeUseOfBitCastedAllocation(BitCastInst
*BC
, AllocationInst
*AI
,
626 for (Value::use_iterator UI
= BC
->use_begin(), E
= BC
->use_end();
628 if (BitCastInst
*BCU
= dyn_cast
<BitCastInst
>(UI
)) {
629 isSafeUseOfBitCastedAllocation(BCU
, AI
, Info
);
630 } else if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(UI
)) {
631 isSafeMemIntrinsicOnAllocation(MI
, AI
, UI
.getOperandNo(), Info
);
632 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(UI
)) {
633 if (SI
->isVolatile())
634 return MarkUnsafe(Info
);
636 // If storing the entire alloca in one chunk through a bitcasted pointer
637 // to integer, we can transform it. This happens (for example) when you
638 // cast a {i32,i32}* to i64* and store through it. This is similar to the
639 // memcpy case and occurs in various "byval" cases and emulated memcpys.
640 if (isa
<IntegerType
>(SI
->getOperand(0)->getType()) &&
641 TD
->getTypeAllocSize(SI
->getOperand(0)->getType()) ==
642 TD
->getTypeAllocSize(AI
->getType()->getElementType())) {
643 Info
.isMemCpyDst
= true;
646 return MarkUnsafe(Info
);
647 } else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(UI
)) {
648 if (LI
->isVolatile())
649 return MarkUnsafe(Info
);
651 // If loading the entire alloca in one chunk through a bitcasted pointer
652 // to integer, we can transform it. This happens (for example) when you
653 // cast a {i32,i32}* to i64* and load through it. This is similar to the
654 // memcpy case and occurs in various "byval" cases and emulated memcpys.
655 if (isa
<IntegerType
>(LI
->getType()) &&
656 TD
->getTypeAllocSize(LI
->getType()) ==
657 TD
->getTypeAllocSize(AI
->getType()->getElementType())) {
658 Info
.isMemCpySrc
= true;
661 return MarkUnsafe(Info
);
662 } else if (isa
<DbgInfoIntrinsic
>(UI
)) {
663 // If one user is DbgInfoIntrinsic then check if all users are
664 // DbgInfoIntrinsics.
665 if (OnlyUsedByDbgInfoIntrinsics(BC
)) {
666 Info
.needsCleanup
= true;
673 return MarkUnsafe(Info
);
675 if (Info
.isUnsafe
) return;
679 /// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
680 /// to its first element. Transform users of the cast to use the new values
682 void SROA::RewriteBitCastUserOfAlloca(Instruction
*BCInst
, AllocationInst
*AI
,
683 SmallVector
<AllocaInst
*, 32> &NewElts
) {
684 Value::use_iterator UI
= BCInst
->use_begin(), UE
= BCInst
->use_end();
686 Instruction
*User
= cast
<Instruction
>(*UI
++);
687 if (BitCastInst
*BCU
= dyn_cast
<BitCastInst
>(User
)) {
688 RewriteBitCastUserOfAlloca(BCU
, AI
, NewElts
);
689 if (BCU
->use_empty()) BCU
->eraseFromParent();
693 if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(User
)) {
694 // This must be memcpy/memmove/memset of the entire aggregate.
695 // Split into one per element.
696 RewriteMemIntrinUserOfAlloca(MI
, BCInst
, AI
, NewElts
);
700 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
)) {
701 // If this is a store of the entire alloca from an integer, rewrite it.
702 RewriteStoreUserOfWholeAlloca(SI
, AI
, NewElts
);
706 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
707 // If this is a load of the entire alloca to an integer, rewrite it.
708 RewriteLoadUserOfWholeAlloca(LI
, AI
, NewElts
);
712 // Otherwise it must be some other user of a gep of the first pointer. Just
713 // leave these alone.
718 /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
719 /// Rewrite it to copy or set the elements of the scalarized memory.
720 void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic
*MI
, Instruction
*BCInst
,
722 SmallVector
<AllocaInst
*, 32> &NewElts
) {
724 // If this is a memcpy/memmove, construct the other pointer as the
725 // appropriate type. The "Other" pointer is the pointer that goes to memory
726 // that doesn't have anything to do with the alloca that we are promoting. For
727 // memset, this Value* stays null.
729 LLVMContext
&Context
= MI
->getContext();
730 unsigned MemAlignment
= MI
->getAlignment();
731 if (MemTransferInst
*MTI
= dyn_cast
<MemTransferInst
>(MI
)) { // memmove/memcopy
732 if (BCInst
== MTI
->getRawDest())
733 OtherPtr
= MTI
->getRawSource();
735 assert(BCInst
== MTI
->getRawSource());
736 OtherPtr
= MTI
->getRawDest();
740 // If there is an other pointer, we want to convert it to the same pointer
741 // type as AI has, so we can GEP through it safely.
743 // It is likely that OtherPtr is a bitcast, if so, remove it.
744 if (BitCastInst
*BC
= dyn_cast
<BitCastInst
>(OtherPtr
))
745 OtherPtr
= BC
->getOperand(0);
746 // All zero GEPs are effectively bitcasts.
747 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(OtherPtr
))
748 if (GEP
->hasAllZeroIndices())
749 OtherPtr
= GEP
->getOperand(0);
751 if (ConstantExpr
*BCE
= dyn_cast
<ConstantExpr
>(OtherPtr
))
752 if (BCE
->getOpcode() == Instruction::BitCast
)
753 OtherPtr
= BCE
->getOperand(0);
755 // If the pointer is not the right type, insert a bitcast to the right
757 if (OtherPtr
->getType() != AI
->getType())
758 OtherPtr
= new BitCastInst(OtherPtr
, AI
->getType(), OtherPtr
->getName(),
762 // Process each element of the aggregate.
763 Value
*TheFn
= MI
->getOperand(0);
764 const Type
*BytePtrTy
= MI
->getRawDest()->getType();
765 bool SROADest
= MI
->getRawDest() == BCInst
;
767 Constant
*Zero
= Constant::getNullValue(Type::Int32Ty
);
769 for (unsigned i
= 0, e
= NewElts
.size(); i
!= e
; ++i
) {
770 // If this is a memcpy/memmove, emit a GEP of the other element address.
772 unsigned OtherEltAlign
= MemAlignment
;
775 Value
*Idx
[2] = { Zero
, ConstantInt::get(Type::Int32Ty
, i
) };
776 OtherElt
= GetElementPtrInst::Create(OtherPtr
, Idx
, Idx
+ 2,
777 OtherPtr
->getNameStr()+"."+Twine(i
),
780 const PointerType
*OtherPtrTy
= cast
<PointerType
>(OtherPtr
->getType());
781 if (const StructType
*ST
=
782 dyn_cast
<StructType
>(OtherPtrTy
->getElementType())) {
783 EltOffset
= TD
->getStructLayout(ST
)->getElementOffset(i
);
786 cast
<SequentialType
>(OtherPtr
->getType())->getElementType();
787 EltOffset
= TD
->getTypeAllocSize(EltTy
)*i
;
790 // The alignment of the other pointer is the guaranteed alignment of the
791 // element, which is affected by both the known alignment of the whole
792 // mem intrinsic and the alignment of the element. If the alignment of
793 // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
794 // known alignment is just 4 bytes.
795 OtherEltAlign
= (unsigned)MinAlign(OtherEltAlign
, EltOffset
);
798 Value
*EltPtr
= NewElts
[i
];
799 const Type
*EltTy
= cast
<PointerType
>(EltPtr
->getType())->getElementType();
801 // If we got down to a scalar, insert a load or store as appropriate.
802 if (EltTy
->isSingleValueType()) {
803 if (isa
<MemTransferInst
>(MI
)) {
805 // From Other to Alloca.
806 Value
*Elt
= new LoadInst(OtherElt
, "tmp", false, OtherEltAlign
, MI
);
807 new StoreInst(Elt
, EltPtr
, MI
);
809 // From Alloca to Other.
810 Value
*Elt
= new LoadInst(EltPtr
, "tmp", MI
);
811 new StoreInst(Elt
, OtherElt
, false, OtherEltAlign
, MI
);
815 assert(isa
<MemSetInst
>(MI
));
817 // If the stored element is zero (common case), just store a null
820 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(MI
->getOperand(2))) {
822 StoreVal
= Constant::getNullValue(EltTy
); // 0.0, null, 0, <0,0>
824 // If EltTy is a vector type, get the element type.
825 const Type
*ValTy
= EltTy
->getScalarType();
827 // Construct an integer with the right value.
828 unsigned EltSize
= TD
->getTypeSizeInBits(ValTy
);
829 APInt
OneVal(EltSize
, CI
->getZExtValue());
830 APInt
TotalVal(OneVal
);
832 for (unsigned i
= 0; 8*i
< EltSize
; ++i
) {
833 TotalVal
= TotalVal
.shl(8);
837 // Convert the integer value to the appropriate type.
838 StoreVal
= ConstantInt::get(Context
, TotalVal
);
839 if (isa
<PointerType
>(ValTy
))
840 StoreVal
= ConstantExpr::getIntToPtr(StoreVal
, ValTy
);
841 else if (ValTy
->isFloatingPoint())
842 StoreVal
= ConstantExpr::getBitCast(StoreVal
, ValTy
);
843 assert(StoreVal
->getType() == ValTy
&& "Type mismatch!");
845 // If the requested value was a vector constant, create it.
846 if (EltTy
!= ValTy
) {
847 unsigned NumElts
= cast
<VectorType
>(ValTy
)->getNumElements();
848 SmallVector
<Constant
*, 16> Elts(NumElts
, StoreVal
);
849 StoreVal
= ConstantVector::get(&Elts
[0], NumElts
);
852 new StoreInst(StoreVal
, EltPtr
, MI
);
855 // Otherwise, if we're storing a byte variable, use a memset call for
859 // Cast the element pointer to BytePtrTy.
860 if (EltPtr
->getType() != BytePtrTy
)
861 EltPtr
= new BitCastInst(EltPtr
, BytePtrTy
, EltPtr
->getNameStr(), MI
);
863 // Cast the other pointer (if we have one) to BytePtrTy.
864 if (OtherElt
&& OtherElt
->getType() != BytePtrTy
)
865 OtherElt
= new BitCastInst(OtherElt
, BytePtrTy
,OtherElt
->getNameStr(),
868 unsigned EltSize
= TD
->getTypeAllocSize(EltTy
);
870 // Finally, insert the meminst for this element.
871 if (isa
<MemTransferInst
>(MI
)) {
873 SROADest
? EltPtr
: OtherElt
, // Dest ptr
874 SROADest
? OtherElt
: EltPtr
, // Src ptr
875 ConstantInt::get(MI
->getOperand(3)->getType(), EltSize
), // Size
876 ConstantInt::get(Type::Int32Ty
, OtherEltAlign
) // Align
878 CallInst::Create(TheFn
, Ops
, Ops
+ 4, "", MI
);
880 assert(isa
<MemSetInst
>(MI
));
882 EltPtr
, MI
->getOperand(2), // Dest, Value,
883 ConstantInt::get(MI
->getOperand(3)->getType(), EltSize
), // Size
886 CallInst::Create(TheFn
, Ops
, Ops
+ 4, "", MI
);
889 MI
->eraseFromParent();
892 /// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
893 /// overwrites the entire allocation. Extract out the pieces of the stored
894 /// integer and store them individually.
895 void SROA::RewriteStoreUserOfWholeAlloca(StoreInst
*SI
,
897 SmallVector
<AllocaInst
*, 32> &NewElts
){
898 // Extract each element out of the integer according to its structure offset
899 // and store the element value to the individual alloca.
900 Value
*SrcVal
= SI
->getOperand(0);
901 const Type
*AllocaEltTy
= AI
->getType()->getElementType();
902 uint64_t AllocaSizeBits
= TD
->getTypeAllocSizeInBits(AllocaEltTy
);
904 // If this isn't a store of an integer to the whole alloca, it may be a store
905 // to the first element. Just ignore the store in this case and normal SROA
907 if (!isa
<IntegerType
>(SrcVal
->getType()) ||
908 TD
->getTypeAllocSizeInBits(SrcVal
->getType()) != AllocaSizeBits
)
910 // Handle tail padding by extending the operand
911 if (TD
->getTypeSizeInBits(SrcVal
->getType()) != AllocaSizeBits
)
912 SrcVal
= new ZExtInst(SrcVal
,
913 IntegerType::get(AllocaSizeBits
), "", SI
);
915 DOUT
<< "PROMOTING STORE TO WHOLE ALLOCA: " << *AI
<< *SI
;
917 // There are two forms here: AI could be an array or struct. Both cases
918 // have different ways to compute the element offset.
919 if (const StructType
*EltSTy
= dyn_cast
<StructType
>(AllocaEltTy
)) {
920 const StructLayout
*Layout
= TD
->getStructLayout(EltSTy
);
922 for (unsigned i
= 0, e
= NewElts
.size(); i
!= e
; ++i
) {
923 // Get the number of bits to shift SrcVal to get the value.
924 const Type
*FieldTy
= EltSTy
->getElementType(i
);
925 uint64_t Shift
= Layout
->getElementOffsetInBits(i
);
927 if (TD
->isBigEndian())
928 Shift
= AllocaSizeBits
-Shift
-TD
->getTypeAllocSizeInBits(FieldTy
);
930 Value
*EltVal
= SrcVal
;
932 Value
*ShiftVal
= ConstantInt::get(EltVal
->getType(), Shift
);
933 EltVal
= BinaryOperator::CreateLShr(EltVal
, ShiftVal
,
934 "sroa.store.elt", SI
);
937 // Truncate down to an integer of the right size.
938 uint64_t FieldSizeBits
= TD
->getTypeSizeInBits(FieldTy
);
940 // Ignore zero sized fields like {}, they obviously contain no data.
941 if (FieldSizeBits
== 0) continue;
943 if (FieldSizeBits
!= AllocaSizeBits
)
944 EltVal
= new TruncInst(EltVal
,
945 IntegerType::get(FieldSizeBits
), "", SI
);
946 Value
*DestField
= NewElts
[i
];
947 if (EltVal
->getType() == FieldTy
) {
948 // Storing to an integer field of this size, just do it.
949 } else if (FieldTy
->isFloatingPoint() || isa
<VectorType
>(FieldTy
)) {
950 // Bitcast to the right element type (for fp/vector values).
951 EltVal
= new BitCastInst(EltVal
, FieldTy
, "", SI
);
953 // Otherwise, bitcast the dest pointer (for aggregates).
954 DestField
= new BitCastInst(DestField
,
955 PointerType::getUnqual(EltVal
->getType()),
958 new StoreInst(EltVal
, DestField
, SI
);
962 const ArrayType
*ATy
= cast
<ArrayType
>(AllocaEltTy
);
963 const Type
*ArrayEltTy
= ATy
->getElementType();
964 uint64_t ElementOffset
= TD
->getTypeAllocSizeInBits(ArrayEltTy
);
965 uint64_t ElementSizeBits
= TD
->getTypeSizeInBits(ArrayEltTy
);
969 if (TD
->isBigEndian())
970 Shift
= AllocaSizeBits
-ElementOffset
;
974 for (unsigned i
= 0, e
= NewElts
.size(); i
!= e
; ++i
) {
975 // Ignore zero sized fields like {}, they obviously contain no data.
976 if (ElementSizeBits
== 0) continue;
978 Value
*EltVal
= SrcVal
;
980 Value
*ShiftVal
= ConstantInt::get(EltVal
->getType(), Shift
);
981 EltVal
= BinaryOperator::CreateLShr(EltVal
, ShiftVal
,
982 "sroa.store.elt", SI
);
985 // Truncate down to an integer of the right size.
986 if (ElementSizeBits
!= AllocaSizeBits
)
987 EltVal
= new TruncInst(EltVal
,
988 IntegerType::get(ElementSizeBits
),"",SI
);
989 Value
*DestField
= NewElts
[i
];
990 if (EltVal
->getType() == ArrayEltTy
) {
991 // Storing to an integer field of this size, just do it.
992 } else if (ArrayEltTy
->isFloatingPoint() || isa
<VectorType
>(ArrayEltTy
)) {
993 // Bitcast to the right element type (for fp/vector values).
994 EltVal
= new BitCastInst(EltVal
, ArrayEltTy
, "", SI
);
996 // Otherwise, bitcast the dest pointer (for aggregates).
997 DestField
= new BitCastInst(DestField
,
998 PointerType::getUnqual(EltVal
->getType()),
1001 new StoreInst(EltVal
, DestField
, SI
);
1003 if (TD
->isBigEndian())
1004 Shift
-= ElementOffset
;
1006 Shift
+= ElementOffset
;
1010 SI
->eraseFromParent();
1013 /// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
1014 /// an integer. Load the individual pieces to form the aggregate value.
1015 void SROA::RewriteLoadUserOfWholeAlloca(LoadInst
*LI
, AllocationInst
*AI
,
1016 SmallVector
<AllocaInst
*, 32> &NewElts
) {
1017 // Extract each element out of the NewElts according to its structure offset
1018 // and form the result value.
1019 const Type
*AllocaEltTy
= AI
->getType()->getElementType();
1020 uint64_t AllocaSizeBits
= TD
->getTypeAllocSizeInBits(AllocaEltTy
);
1022 // If this isn't a load of the whole alloca to an integer, it may be a load
1023 // of the first element. Just ignore the load in this case and normal SROA
1025 if (!isa
<IntegerType
>(LI
->getType()) ||
1026 TD
->getTypeAllocSizeInBits(LI
->getType()) != AllocaSizeBits
)
1029 DOUT
<< "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI
<< *LI
;
1031 // There are two forms here: AI could be an array or struct. Both cases
1032 // have different ways to compute the element offset.
1033 const StructLayout
*Layout
= 0;
1034 uint64_t ArrayEltBitOffset
= 0;
1035 if (const StructType
*EltSTy
= dyn_cast
<StructType
>(AllocaEltTy
)) {
1036 Layout
= TD
->getStructLayout(EltSTy
);
1038 const Type
*ArrayEltTy
= cast
<ArrayType
>(AllocaEltTy
)->getElementType();
1039 ArrayEltBitOffset
= TD
->getTypeAllocSizeInBits(ArrayEltTy
);
1043 Constant::getNullValue(IntegerType::get(AllocaSizeBits
));
1045 for (unsigned i
= 0, e
= NewElts
.size(); i
!= e
; ++i
) {
1046 // Load the value from the alloca. If the NewElt is an aggregate, cast
1047 // the pointer to an integer of the same size before doing the load.
1048 Value
*SrcField
= NewElts
[i
];
1049 const Type
*FieldTy
=
1050 cast
<PointerType
>(SrcField
->getType())->getElementType();
1051 uint64_t FieldSizeBits
= TD
->getTypeSizeInBits(FieldTy
);
1053 // Ignore zero sized fields like {}, they obviously contain no data.
1054 if (FieldSizeBits
== 0) continue;
1056 const IntegerType
*FieldIntTy
= IntegerType::get(FieldSizeBits
);
1057 if (!isa
<IntegerType
>(FieldTy
) && !FieldTy
->isFloatingPoint() &&
1058 !isa
<VectorType
>(FieldTy
))
1059 SrcField
= new BitCastInst(SrcField
,
1060 PointerType::getUnqual(FieldIntTy
),
1062 SrcField
= new LoadInst(SrcField
, "sroa.load.elt", LI
);
1064 // If SrcField is a fp or vector of the right size but that isn't an
1065 // integer type, bitcast to an integer so we can shift it.
1066 if (SrcField
->getType() != FieldIntTy
)
1067 SrcField
= new BitCastInst(SrcField
, FieldIntTy
, "", LI
);
1069 // Zero extend the field to be the same size as the final alloca so that
1070 // we can shift and insert it.
1071 if (SrcField
->getType() != ResultVal
->getType())
1072 SrcField
= new ZExtInst(SrcField
, ResultVal
->getType(), "", LI
);
1074 // Determine the number of bits to shift SrcField.
1076 if (Layout
) // Struct case.
1077 Shift
= Layout
->getElementOffsetInBits(i
);
1079 Shift
= i
*ArrayEltBitOffset
;
1081 if (TD
->isBigEndian())
1082 Shift
= AllocaSizeBits
-Shift
-FieldIntTy
->getBitWidth();
1085 Value
*ShiftVal
= ConstantInt::get(SrcField
->getType(), Shift
);
1086 SrcField
= BinaryOperator::CreateShl(SrcField
, ShiftVal
, "", LI
);
1089 ResultVal
= BinaryOperator::CreateOr(SrcField
, ResultVal
, "", LI
);
1092 // Handle tail padding by truncating the result
1093 if (TD
->getTypeSizeInBits(LI
->getType()) != AllocaSizeBits
)
1094 ResultVal
= new TruncInst(ResultVal
, LI
->getType(), "", LI
);
1096 LI
->replaceAllUsesWith(ResultVal
);
1097 LI
->eraseFromParent();
1101 /// HasPadding - Return true if the specified type has any structure or
1102 /// alignment padding, false otherwise.
1103 static bool HasPadding(const Type
*Ty
, const TargetData
&TD
) {
1104 if (const StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
1105 const StructLayout
*SL
= TD
.getStructLayout(STy
);
1106 unsigned PrevFieldBitOffset
= 0;
1107 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
1108 unsigned FieldBitOffset
= SL
->getElementOffsetInBits(i
);
1110 // Padding in sub-elements?
1111 if (HasPadding(STy
->getElementType(i
), TD
))
1114 // Check to see if there is any padding between this element and the
1117 unsigned PrevFieldEnd
=
1118 PrevFieldBitOffset
+TD
.getTypeSizeInBits(STy
->getElementType(i
-1));
1119 if (PrevFieldEnd
< FieldBitOffset
)
1123 PrevFieldBitOffset
= FieldBitOffset
;
1126 // Check for tail padding.
1127 if (unsigned EltCount
= STy
->getNumElements()) {
1128 unsigned PrevFieldEnd
= PrevFieldBitOffset
+
1129 TD
.getTypeSizeInBits(STy
->getElementType(EltCount
-1));
1130 if (PrevFieldEnd
< SL
->getSizeInBits())
1134 } else if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
1135 return HasPadding(ATy
->getElementType(), TD
);
1136 } else if (const VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
1137 return HasPadding(VTy
->getElementType(), TD
);
1139 return TD
.getTypeSizeInBits(Ty
) != TD
.getTypeAllocSizeInBits(Ty
);
1142 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
1143 /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
1144 /// or 1 if safe after canonicalization has been performed.
1146 int SROA::isSafeAllocaToScalarRepl(AllocationInst
*AI
) {
1147 // Loop over the use list of the alloca. We can only transform it if all of
1148 // the users are safe to transform.
1151 for (Value::use_iterator I
= AI
->use_begin(), E
= AI
->use_end();
1153 isSafeUseOfAllocation(cast
<Instruction
>(*I
), AI
, Info
);
1154 if (Info
.isUnsafe
) {
1155 DOUT
<< "Cannot transform: " << *AI
<< " due to user: " << **I
;
1160 // Okay, we know all the users are promotable. If the aggregate is a memcpy
1161 // source and destination, we have to be careful. In particular, the memcpy
1162 // could be moving around elements that live in structure padding of the LLVM
1163 // types, but may actually be used. In these cases, we refuse to promote the
1165 if (Info
.isMemCpySrc
&& Info
.isMemCpyDst
&&
1166 HasPadding(AI
->getType()->getElementType(), *TD
))
1169 // If we require cleanup, return 1, otherwise return 3.
1170 return Info
.needsCleanup
? 1 : 3;
1173 /// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
1174 /// is canonicalized here.
1175 void SROA::CleanupGEP(GetElementPtrInst
*GEPI
) {
1176 gep_type_iterator I
= gep_type_begin(GEPI
);
1179 const ArrayType
*AT
= dyn_cast
<ArrayType
>(*I
);
1183 uint64_t NumElements
= AT
->getNumElements();
1185 if (isa
<ConstantInt
>(I
.getOperand()))
1188 if (NumElements
== 1) {
1189 GEPI
->setOperand(2, Constant::getNullValue(Type::Int32Ty
));
1193 assert(NumElements
== 2 && "Unhandled case!");
1194 // All users of the GEP must be loads. At each use of the GEP, insert
1195 // two loads of the appropriate indexed GEP and select between them.
1196 Value
*IsOne
= new ICmpInst(GEPI
, ICmpInst::ICMP_NE
, I
.getOperand(),
1197 Constant::getNullValue(I
.getOperand()->getType()),
1199 // Insert the new GEP instructions, which are properly indexed.
1200 SmallVector
<Value
*, 8> Indices(GEPI
->op_begin()+1, GEPI
->op_end());
1201 Indices
[1] = Constant::getNullValue(Type::Int32Ty
);
1202 Value
*ZeroIdx
= GetElementPtrInst::Create(GEPI
->getOperand(0),
1205 GEPI
->getName()+".0", GEPI
);
1206 Indices
[1] = ConstantInt::get(Type::Int32Ty
, 1);
1207 Value
*OneIdx
= GetElementPtrInst::Create(GEPI
->getOperand(0),
1210 GEPI
->getName()+".1", GEPI
);
1211 // Replace all loads of the variable index GEP with loads from both
1212 // indexes and a select.
1213 while (!GEPI
->use_empty()) {
1214 LoadInst
*LI
= cast
<LoadInst
>(GEPI
->use_back());
1215 Value
*Zero
= new LoadInst(ZeroIdx
, LI
->getName()+".0", LI
);
1216 Value
*One
= new LoadInst(OneIdx
, LI
->getName()+".1", LI
);
1217 Value
*R
= SelectInst::Create(IsOne
, One
, Zero
, LI
->getName(), LI
);
1218 LI
->replaceAllUsesWith(R
);
1219 LI
->eraseFromParent();
1221 GEPI
->eraseFromParent();
1225 /// CleanupAllocaUsers - If SROA reported that it can promote the specified
1226 /// allocation, but only if cleaned up, perform the cleanups required.
1227 void SROA::CleanupAllocaUsers(AllocationInst
*AI
) {
1228 // At this point, we know that the end result will be SROA'd and promoted, so
1229 // we can insert ugly code if required so long as sroa+mem2reg will clean it
1231 for (Value::use_iterator UI
= AI
->use_begin(), E
= AI
->use_end();
1234 if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(U
))
1237 Instruction
*I
= cast
<Instruction
>(U
);
1238 SmallVector
<DbgInfoIntrinsic
*, 2> DbgInUses
;
1239 if (!isa
<StoreInst
>(I
) && OnlyUsedByDbgInfoIntrinsics(I
, &DbgInUses
)) {
1240 // Safe to remove debug info uses.
1241 while (!DbgInUses
.empty()) {
1242 DbgInfoIntrinsic
*DI
= DbgInUses
.back(); DbgInUses
.pop_back();
1243 DI
->eraseFromParent();
1245 I
->eraseFromParent();
1251 /// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
1252 /// the offset specified by Offset (which is specified in bytes).
1254 /// There are two cases we handle here:
1255 /// 1) A union of vector types of the same size and potentially its elements.
1256 /// Here we turn element accesses into insert/extract element operations.
1257 /// This promotes a <4 x float> with a store of float to the third element
1258 /// into a <4 x float> that uses insert element.
1259 /// 2) A fully general blob of memory, which we turn into some (potentially
1260 /// large) integer type with extract and insert operations where the loads
1261 /// and stores would mutate the memory.
1262 static void MergeInType(const Type
*In
, uint64_t Offset
, const Type
*&VecTy
,
1263 unsigned AllocaSize
, const TargetData
&TD
,
1264 LLVMContext
&Context
) {
1265 // If this could be contributing to a vector, analyze it.
1266 if (VecTy
!= Type::VoidTy
) { // either null or a vector type.
1268 // If the In type is a vector that is the same size as the alloca, see if it
1269 // matches the existing VecTy.
1270 if (const VectorType
*VInTy
= dyn_cast
<VectorType
>(In
)) {
1271 if (VInTy
->getBitWidth()/8 == AllocaSize
&& Offset
== 0) {
1272 // If we're storing/loading a vector of the right size, allow it as a
1273 // vector. If this the first vector we see, remember the type so that
1274 // we know the element size.
1279 } else if (In
== Type::FloatTy
|| In
== Type::DoubleTy
||
1280 (isa
<IntegerType
>(In
) && In
->getPrimitiveSizeInBits() >= 8 &&
1281 isPowerOf2_32(In
->getPrimitiveSizeInBits()))) {
1282 // If we're accessing something that could be an element of a vector, see
1283 // if the implied vector agrees with what we already have and if Offset is
1284 // compatible with it.
1285 unsigned EltSize
= In
->getPrimitiveSizeInBits()/8;
1286 if (Offset
% EltSize
== 0 &&
1287 AllocaSize
% EltSize
== 0 &&
1289 cast
<VectorType
>(VecTy
)->getElementType()
1290 ->getPrimitiveSizeInBits()/8 == EltSize
)) {
1292 VecTy
= VectorType::get(In
, AllocaSize
/EltSize
);
1298 // Otherwise, we have a case that we can't handle with an optimized vector
1299 // form. We can still turn this into a large integer.
1300 VecTy
= Type::VoidTy
;
1303 /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
1304 /// its accesses to use a to single vector type, return true, and set VecTy to
1305 /// the new type. If we could convert the alloca into a single promotable
1306 /// integer, return true but set VecTy to VoidTy. Further, if the use is not a
1307 /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
1308 /// is the current offset from the base of the alloca being analyzed.
1310 /// If we see at least one access to the value that is as a vector type, set the
1313 bool SROA::CanConvertToScalar(Value
*V
, bool &IsNotTrivial
, const Type
*&VecTy
,
1314 bool &SawVec
, uint64_t Offset
,
1315 unsigned AllocaSize
) {
1316 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!=E
; ++UI
) {
1317 Instruction
*User
= cast
<Instruction
>(*UI
);
1319 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
1320 // Don't break volatile loads.
1321 if (LI
->isVolatile())
1323 MergeInType(LI
->getType(), Offset
, VecTy
,
1324 AllocaSize
, *TD
, V
->getContext());
1325 SawVec
|= isa
<VectorType
>(LI
->getType());
1329 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
)) {
1330 // Storing the pointer, not into the value?
1331 if (SI
->getOperand(0) == V
|| SI
->isVolatile()) return 0;
1332 MergeInType(SI
->getOperand(0)->getType(), Offset
,
1333 VecTy
, AllocaSize
, *TD
, V
->getContext());
1334 SawVec
|= isa
<VectorType
>(SI
->getOperand(0)->getType());
1338 if (BitCastInst
*BCI
= dyn_cast
<BitCastInst
>(User
)) {
1339 if (!CanConvertToScalar(BCI
, IsNotTrivial
, VecTy
, SawVec
, Offset
,
1342 IsNotTrivial
= true;
1346 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(User
)) {
1347 // If this is a GEP with a variable indices, we can't handle it.
1348 if (!GEP
->hasAllConstantIndices())
1351 // Compute the offset that this GEP adds to the pointer.
1352 SmallVector
<Value
*, 8> Indices(GEP
->op_begin()+1, GEP
->op_end());
1353 uint64_t GEPOffset
= TD
->getIndexedOffset(GEP
->getOperand(0)->getType(),
1354 &Indices
[0], Indices
.size());
1355 // See if all uses can be converted.
1356 if (!CanConvertToScalar(GEP
, IsNotTrivial
, VecTy
, SawVec
,Offset
+GEPOffset
,
1359 IsNotTrivial
= true;
1363 // If this is a constant sized memset of a constant value (e.g. 0) we can
1365 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(User
)) {
1366 // Store of constant value and constant size.
1367 if (isa
<ConstantInt
>(MSI
->getValue()) &&
1368 isa
<ConstantInt
>(MSI
->getLength())) {
1369 IsNotTrivial
= true;
1374 // If this is a memcpy or memmove into or out of the whole allocation, we
1375 // can handle it like a load or store of the scalar type.
1376 if (MemTransferInst
*MTI
= dyn_cast
<MemTransferInst
>(User
)) {
1377 if (ConstantInt
*Len
= dyn_cast
<ConstantInt
>(MTI
->getLength()))
1378 if (Len
->getZExtValue() == AllocaSize
&& Offset
== 0) {
1379 IsNotTrivial
= true;
1384 // Ignore dbg intrinsic.
1385 if (isa
<DbgInfoIntrinsic
>(User
))
1388 // Otherwise, we cannot handle this!
1396 /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
1397 /// directly. This happens when we are converting an "integer union" to a
1398 /// single integer scalar, or when we are converting a "vector union" to a
1399 /// vector with insert/extractelement instructions.
1401 /// Offset is an offset from the original alloca, in bits that need to be
1402 /// shifted to the right. By the end of this, there should be no uses of Ptr.
1403 void SROA::ConvertUsesToScalar(Value
*Ptr
, AllocaInst
*NewAI
, uint64_t Offset
) {
1404 while (!Ptr
->use_empty()) {
1405 Instruction
*User
= cast
<Instruction
>(Ptr
->use_back());
1407 if (BitCastInst
*CI
= dyn_cast
<BitCastInst
>(User
)) {
1408 ConvertUsesToScalar(CI
, NewAI
, Offset
);
1409 CI
->eraseFromParent();
1413 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(User
)) {
1414 // Compute the offset that this GEP adds to the pointer.
1415 SmallVector
<Value
*, 8> Indices(GEP
->op_begin()+1, GEP
->op_end());
1416 uint64_t GEPOffset
= TD
->getIndexedOffset(GEP
->getOperand(0)->getType(),
1417 &Indices
[0], Indices
.size());
1418 ConvertUsesToScalar(GEP
, NewAI
, Offset
+GEPOffset
*8);
1419 GEP
->eraseFromParent();
1423 IRBuilder
<> Builder(User
->getParent(), User
);
1425 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
1426 // The load is a bit extract from NewAI shifted right by Offset bits.
1427 Value
*LoadedVal
= Builder
.CreateLoad(NewAI
, "tmp");
1429 = ConvertScalar_ExtractValue(LoadedVal
, LI
->getType(), Offset
, Builder
);
1430 LI
->replaceAllUsesWith(NewLoadVal
);
1431 LI
->eraseFromParent();
1435 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(User
)) {
1436 assert(SI
->getOperand(0) != Ptr
&& "Consistency error!");
1437 // FIXME: Remove once builder has Twine API.
1438 Value
*Old
= Builder
.CreateLoad(NewAI
, (NewAI
->getName()+".in").str().c_str());
1439 Value
*New
= ConvertScalar_InsertValue(SI
->getOperand(0), Old
, Offset
,
1441 Builder
.CreateStore(New
, NewAI
);
1442 SI
->eraseFromParent();
1446 // If this is a constant sized memset of a constant value (e.g. 0) we can
1447 // transform it into a store of the expanded constant value.
1448 if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(User
)) {
1449 assert(MSI
->getRawDest() == Ptr
&& "Consistency error!");
1450 unsigned NumBytes
= cast
<ConstantInt
>(MSI
->getLength())->getZExtValue();
1451 if (NumBytes
!= 0) {
1452 unsigned Val
= cast
<ConstantInt
>(MSI
->getValue())->getZExtValue();
1454 // Compute the value replicated the right number of times.
1455 APInt
APVal(NumBytes
*8, Val
);
1457 // Splat the value if non-zero.
1459 for (unsigned i
= 1; i
!= NumBytes
; ++i
)
1460 APVal
|= APVal
<< 8;
1462 // FIXME: Remove once builder has Twine API.
1463 Value
*Old
= Builder
.CreateLoad(NewAI
, (NewAI
->getName()+".in").str().c_str());
1464 Value
*New
= ConvertScalar_InsertValue(
1465 ConstantInt::get(User
->getContext(), APVal
),
1466 Old
, Offset
, Builder
);
1467 Builder
.CreateStore(New
, NewAI
);
1469 MSI
->eraseFromParent();
1473 // If this is a memcpy or memmove into or out of the whole allocation, we
1474 // can handle it like a load or store of the scalar type.
1475 if (MemTransferInst
*MTI
= dyn_cast
<MemTransferInst
>(User
)) {
1476 assert(Offset
== 0 && "must be store to start of alloca");
1478 // If the source and destination are both to the same alloca, then this is
1479 // a noop copy-to-self, just delete it. Otherwise, emit a load and store
1481 AllocaInst
*OrigAI
= cast
<AllocaInst
>(Ptr
->getUnderlyingObject());
1483 if (MTI
->getSource()->getUnderlyingObject() != OrigAI
) {
1484 // Dest must be OrigAI, change this to be a load from the original
1485 // pointer (bitcasted), then a store to our new alloca.
1486 assert(MTI
->getRawDest() == Ptr
&& "Neither use is of pointer?");
1487 Value
*SrcPtr
= MTI
->getSource();
1488 SrcPtr
= Builder
.CreateBitCast(SrcPtr
, NewAI
->getType());
1490 LoadInst
*SrcVal
= Builder
.CreateLoad(SrcPtr
, "srcval");
1491 SrcVal
->setAlignment(MTI
->getAlignment());
1492 Builder
.CreateStore(SrcVal
, NewAI
);
1493 } else if (MTI
->getDest()->getUnderlyingObject() != OrigAI
) {
1494 // Src must be OrigAI, change this to be a load from NewAI then a store
1495 // through the original dest pointer (bitcasted).
1496 assert(MTI
->getRawSource() == Ptr
&& "Neither use is of pointer?");
1497 LoadInst
*SrcVal
= Builder
.CreateLoad(NewAI
, "srcval");
1499 Value
*DstPtr
= Builder
.CreateBitCast(MTI
->getDest(), NewAI
->getType());
1500 StoreInst
*NewStore
= Builder
.CreateStore(SrcVal
, DstPtr
);
1501 NewStore
->setAlignment(MTI
->getAlignment());
1503 // Noop transfer. Src == Dst
1507 MTI
->eraseFromParent();
1511 // If user is a dbg info intrinsic then it is safe to remove it.
1512 if (isa
<DbgInfoIntrinsic
>(User
)) {
1513 User
->eraseFromParent();
1517 llvm_unreachable("Unsupported operation!");
1521 /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
1522 /// or vector value FromVal, extracting the bits from the offset specified by
1523 /// Offset. This returns the value, which is of type ToType.
1525 /// This happens when we are converting an "integer union" to a single
1526 /// integer scalar, or when we are converting a "vector union" to a vector with
1527 /// insert/extractelement instructions.
1529 /// Offset is an offset from the original alloca, in bits that need to be
1530 /// shifted to the right.
1531 Value
*SROA::ConvertScalar_ExtractValue(Value
*FromVal
, const Type
*ToType
,
1532 uint64_t Offset
, IRBuilder
<> &Builder
) {
1533 // If the load is of the whole new alloca, no conversion is needed.
1534 if (FromVal
->getType() == ToType
&& Offset
== 0)
1537 // If the result alloca is a vector type, this is either an element
1538 // access or a bitcast to another vector type of the same size.
1539 if (const VectorType
*VTy
= dyn_cast
<VectorType
>(FromVal
->getType())) {
1540 if (isa
<VectorType
>(ToType
))
1541 return Builder
.CreateBitCast(FromVal
, ToType
, "tmp");
1543 // Otherwise it must be an element access.
1546 unsigned EltSize
= TD
->getTypeAllocSizeInBits(VTy
->getElementType());
1547 Elt
= Offset
/EltSize
;
1548 assert(EltSize
*Elt
== Offset
&& "Invalid modulus in validity checking");
1550 // Return the element extracted out of it.
1551 Value
*V
= Builder
.CreateExtractElement(FromVal
,
1552 ConstantInt::get(Type::Int32Ty
,Elt
),
1554 if (V
->getType() != ToType
)
1555 V
= Builder
.CreateBitCast(V
, ToType
, "tmp");
1559 // If ToType is a first class aggregate, extract out each of the pieces and
1560 // use insertvalue's to form the FCA.
1561 if (const StructType
*ST
= dyn_cast
<StructType
>(ToType
)) {
1562 const StructLayout
&Layout
= *TD
->getStructLayout(ST
);
1563 Value
*Res
= UndefValue::get(ST
);
1564 for (unsigned i
= 0, e
= ST
->getNumElements(); i
!= e
; ++i
) {
1565 Value
*Elt
= ConvertScalar_ExtractValue(FromVal
, ST
->getElementType(i
),
1566 Offset
+Layout
.getElementOffsetInBits(i
),
1568 Res
= Builder
.CreateInsertValue(Res
, Elt
, i
, "tmp");
1573 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(ToType
)) {
1574 uint64_t EltSize
= TD
->getTypeAllocSizeInBits(AT
->getElementType());
1575 Value
*Res
= UndefValue::get(AT
);
1576 for (unsigned i
= 0, e
= AT
->getNumElements(); i
!= e
; ++i
) {
1577 Value
*Elt
= ConvertScalar_ExtractValue(FromVal
, AT
->getElementType(),
1578 Offset
+i
*EltSize
, Builder
);
1579 Res
= Builder
.CreateInsertValue(Res
, Elt
, i
, "tmp");
1584 // Otherwise, this must be a union that was converted to an integer value.
1585 const IntegerType
*NTy
= cast
<IntegerType
>(FromVal
->getType());
1587 // If this is a big-endian system and the load is narrower than the
1588 // full alloca type, we need to do a shift to get the right bits.
1590 if (TD
->isBigEndian()) {
1591 // On big-endian machines, the lowest bit is stored at the bit offset
1592 // from the pointer given by getTypeStoreSizeInBits. This matters for
1593 // integers with a bitwidth that is not a multiple of 8.
1594 ShAmt
= TD
->getTypeStoreSizeInBits(NTy
) -
1595 TD
->getTypeStoreSizeInBits(ToType
) - Offset
;
1600 // Note: we support negative bitwidths (with shl) which are not defined.
1601 // We do this to support (f.e.) loads off the end of a structure where
1602 // only some bits are used.
1603 if (ShAmt
> 0 && (unsigned)ShAmt
< NTy
->getBitWidth())
1604 FromVal
= Builder
.CreateLShr(FromVal
,
1605 ConstantInt::get(FromVal
->getType(),
1607 else if (ShAmt
< 0 && (unsigned)-ShAmt
< NTy
->getBitWidth())
1608 FromVal
= Builder
.CreateShl(FromVal
,
1609 ConstantInt::get(FromVal
->getType(),
1612 // Finally, unconditionally truncate the integer to the right width.
1613 unsigned LIBitWidth
= TD
->getTypeSizeInBits(ToType
);
1614 if (LIBitWidth
< NTy
->getBitWidth())
1616 Builder
.CreateTrunc(FromVal
, IntegerType::get(LIBitWidth
), "tmp");
1617 else if (LIBitWidth
> NTy
->getBitWidth())
1619 Builder
.CreateZExt(FromVal
, IntegerType::get(LIBitWidth
), "tmp");
1621 // If the result is an integer, this is a trunc or bitcast.
1622 if (isa
<IntegerType
>(ToType
)) {
1624 } else if (ToType
->isFloatingPoint() || isa
<VectorType
>(ToType
)) {
1625 // Just do a bitcast, we know the sizes match up.
1626 FromVal
= Builder
.CreateBitCast(FromVal
, ToType
, "tmp");
1628 // Otherwise must be a pointer.
1629 FromVal
= Builder
.CreateIntToPtr(FromVal
, ToType
, "tmp");
1631 assert(FromVal
->getType() == ToType
&& "Didn't convert right?");
1636 /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
1637 /// or vector value "Old" at the offset specified by Offset.
1639 /// This happens when we are converting an "integer union" to a
1640 /// single integer scalar, or when we are converting a "vector union" to a
1641 /// vector with insert/extractelement instructions.
1643 /// Offset is an offset from the original alloca, in bits that need to be
1644 /// shifted to the right.
1645 Value
*SROA::ConvertScalar_InsertValue(Value
*SV
, Value
*Old
,
1646 uint64_t Offset
, IRBuilder
<> &Builder
) {
1648 // Convert the stored type to the actual type, shift it left to insert
1649 // then 'or' into place.
1650 const Type
*AllocaType
= Old
->getType();
1651 LLVMContext
&Context
= Old
->getContext();
1653 if (const VectorType
*VTy
= dyn_cast
<VectorType
>(AllocaType
)) {
1654 uint64_t VecSize
= TD
->getTypeAllocSizeInBits(VTy
);
1655 uint64_t ValSize
= TD
->getTypeAllocSizeInBits(SV
->getType());
1657 // Changing the whole vector with memset or with an access of a different
1659 if (ValSize
== VecSize
)
1660 return Builder
.CreateBitCast(SV
, AllocaType
, "tmp");
1662 uint64_t EltSize
= TD
->getTypeAllocSizeInBits(VTy
->getElementType());
1664 // Must be an element insertion.
1665 unsigned Elt
= Offset
/EltSize
;
1667 if (SV
->getType() != VTy
->getElementType())
1668 SV
= Builder
.CreateBitCast(SV
, VTy
->getElementType(), "tmp");
1670 SV
= Builder
.CreateInsertElement(Old
, SV
,
1671 ConstantInt::get(Type::Int32Ty
, Elt
),
1676 // If SV is a first-class aggregate value, insert each value recursively.
1677 if (const StructType
*ST
= dyn_cast
<StructType
>(SV
->getType())) {
1678 const StructLayout
&Layout
= *TD
->getStructLayout(ST
);
1679 for (unsigned i
= 0, e
= ST
->getNumElements(); i
!= e
; ++i
) {
1680 Value
*Elt
= Builder
.CreateExtractValue(SV
, i
, "tmp");
1681 Old
= ConvertScalar_InsertValue(Elt
, Old
,
1682 Offset
+Layout
.getElementOffsetInBits(i
),
1688 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(SV
->getType())) {
1689 uint64_t EltSize
= TD
->getTypeAllocSizeInBits(AT
->getElementType());
1690 for (unsigned i
= 0, e
= AT
->getNumElements(); i
!= e
; ++i
) {
1691 Value
*Elt
= Builder
.CreateExtractValue(SV
, i
, "tmp");
1692 Old
= ConvertScalar_InsertValue(Elt
, Old
, Offset
+i
*EltSize
, Builder
);
1697 // If SV is a float, convert it to the appropriate integer type.
1698 // If it is a pointer, do the same.
1699 unsigned SrcWidth
= TD
->getTypeSizeInBits(SV
->getType());
1700 unsigned DestWidth
= TD
->getTypeSizeInBits(AllocaType
);
1701 unsigned SrcStoreWidth
= TD
->getTypeStoreSizeInBits(SV
->getType());
1702 unsigned DestStoreWidth
= TD
->getTypeStoreSizeInBits(AllocaType
);
1703 if (SV
->getType()->isFloatingPoint() || isa
<VectorType
>(SV
->getType()))
1704 SV
= Builder
.CreateBitCast(SV
, IntegerType::get(SrcWidth
), "tmp");
1705 else if (isa
<PointerType
>(SV
->getType()))
1706 SV
= Builder
.CreatePtrToInt(SV
, TD
->getIntPtrType(), "tmp");
1708 // Zero extend or truncate the value if needed.
1709 if (SV
->getType() != AllocaType
) {
1710 if (SV
->getType()->getPrimitiveSizeInBits() <
1711 AllocaType
->getPrimitiveSizeInBits())
1712 SV
= Builder
.CreateZExt(SV
, AllocaType
, "tmp");
1714 // Truncation may be needed if storing more than the alloca can hold
1715 // (undefined behavior).
1716 SV
= Builder
.CreateTrunc(SV
, AllocaType
, "tmp");
1717 SrcWidth
= DestWidth
;
1718 SrcStoreWidth
= DestStoreWidth
;
1722 // If this is a big-endian system and the store is narrower than the
1723 // full alloca type, we need to do a shift to get the right bits.
1725 if (TD
->isBigEndian()) {
1726 // On big-endian machines, the lowest bit is stored at the bit offset
1727 // from the pointer given by getTypeStoreSizeInBits. This matters for
1728 // integers with a bitwidth that is not a multiple of 8.
1729 ShAmt
= DestStoreWidth
- SrcStoreWidth
- Offset
;
1734 // Note: we support negative bitwidths (with shr) which are not defined.
1735 // We do this to support (f.e.) stores off the end of a structure where
1736 // only some bits in the structure are set.
1737 APInt
Mask(APInt::getLowBitsSet(DestWidth
, SrcWidth
));
1738 if (ShAmt
> 0 && (unsigned)ShAmt
< DestWidth
) {
1739 SV
= Builder
.CreateShl(SV
, ConstantInt::get(SV
->getType(),
1742 } else if (ShAmt
< 0 && (unsigned)-ShAmt
< DestWidth
) {
1743 SV
= Builder
.CreateLShr(SV
, ConstantInt::get(SV
->getType(),
1745 Mask
= Mask
.lshr(-ShAmt
);
1748 // Mask out the bits we are about to insert from the old value, and or
1750 if (SrcWidth
!= DestWidth
) {
1751 assert(DestWidth
> SrcWidth
);
1752 Old
= Builder
.CreateAnd(Old
, ConstantInt::get(Context
, ~Mask
), "mask");
1753 SV
= Builder
.CreateOr(Old
, SV
, "ins");
1760 /// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
1761 /// some part of a constant global variable. This intentionally only accepts
1762 /// constant expressions because we don't can't rewrite arbitrary instructions.
1763 static bool PointsToConstantGlobal(Value
*V
) {
1764 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(V
))
1765 return GV
->isConstant();
1766 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(V
))
1767 if (CE
->getOpcode() == Instruction::BitCast
||
1768 CE
->getOpcode() == Instruction::GetElementPtr
)
1769 return PointsToConstantGlobal(CE
->getOperand(0));
1773 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
1774 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
1775 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
1776 /// track of whether it moves the pointer (with isOffset) but otherwise traverse
1777 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
1778 /// the alloca, and if the source pointer is a pointer to a constant global, we
1779 /// can optimize this.
1780 static bool isOnlyCopiedFromConstantGlobal(Value
*V
, Instruction
*&TheCopy
,
1782 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!=E
; ++UI
) {
1783 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(*UI
))
1784 // Ignore non-volatile loads, they are always ok.
1785 if (!LI
->isVolatile())
1788 if (BitCastInst
*BCI
= dyn_cast
<BitCastInst
>(*UI
)) {
1789 // If uses of the bitcast are ok, we are ok.
1790 if (!isOnlyCopiedFromConstantGlobal(BCI
, TheCopy
, isOffset
))
1794 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(*UI
)) {
1795 // If the GEP has all zero indices, it doesn't offset the pointer. If it
1796 // doesn't, it does.
1797 if (!isOnlyCopiedFromConstantGlobal(GEP
, TheCopy
,
1798 isOffset
|| !GEP
->hasAllZeroIndices()))
1803 // If this is isn't our memcpy/memmove, reject it as something we can't
1805 if (!isa
<MemTransferInst
>(*UI
))
1808 // If we already have seen a copy, reject the second one.
1809 if (TheCopy
) return false;
1811 // If the pointer has been offset from the start of the alloca, we can't
1812 // safely handle this.
1813 if (isOffset
) return false;
1815 // If the memintrinsic isn't using the alloca as the dest, reject it.
1816 if (UI
.getOperandNo() != 1) return false;
1818 MemIntrinsic
*MI
= cast
<MemIntrinsic
>(*UI
);
1820 // If the source of the memcpy/move is not a constant global, reject it.
1821 if (!PointsToConstantGlobal(MI
->getOperand(2)))
1824 // Otherwise, the transform is safe. Remember the copy instruction.
1830 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
1831 /// modified by a copy from a constant global. If we can prove this, we can
1832 /// replace any uses of the alloca with uses of the global directly.
1833 Instruction
*SROA::isOnlyCopiedFromConstantGlobal(AllocationInst
*AI
) {
1834 Instruction
*TheCopy
= 0;
1835 if (::isOnlyCopiedFromConstantGlobal(AI
, TheCopy
, false))