1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass transforms simple global variables that never have their address
11 // taken. If obviously true, it marks read/write globals as constant, deletes
12 // variables only stored to, etc.
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "globalopt"
17 #include "llvm/Transforms/IPO.h"
18 #include "llvm/CallingConv.h"
19 #include "llvm/Constants.h"
20 #include "llvm/DerivedTypes.h"
21 #include "llvm/Instructions.h"
22 #include "llvm/IntrinsicInst.h"
23 #include "llvm/LLVMContext.h"
24 #include "llvm/Module.h"
25 #include "llvm/Pass.h"
26 #include "llvm/Analysis/ConstantFolding.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Support/CallSite.h"
29 #include "llvm/Support/Compiler.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/GetElementPtrTypeIterator.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/STLExtras.h"
43 STATISTIC(NumMarked
, "Number of globals marked constant");
44 STATISTIC(NumSRA
, "Number of aggregate globals broken into scalars");
45 STATISTIC(NumHeapSRA
, "Number of heap objects SRA'd");
46 STATISTIC(NumSubstitute
,"Number of globals with initializers stored into them");
47 STATISTIC(NumDeleted
, "Number of globals deleted");
48 STATISTIC(NumFnDeleted
, "Number of functions deleted");
49 STATISTIC(NumGlobUses
, "Number of global uses devirtualized");
50 STATISTIC(NumLocalized
, "Number of globals localized");
51 STATISTIC(NumShrunkToBool
, "Number of global vars shrunk to booleans");
52 STATISTIC(NumFastCallFns
, "Number of functions converted to fastcc");
53 STATISTIC(NumCtorsEvaluated
, "Number of static ctors evaluated");
54 STATISTIC(NumNestRemoved
, "Number of nest attributes removed");
55 STATISTIC(NumAliasesResolved
, "Number of global aliases resolved");
56 STATISTIC(NumAliasesRemoved
, "Number of global aliases eliminated");
59 struct VISIBILITY_HIDDEN GlobalOpt
: public ModulePass
{
60 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
62 static char ID
; // Pass identification, replacement for typeid
63 GlobalOpt() : ModulePass(&ID
) {}
65 bool runOnModule(Module
&M
);
68 GlobalVariable
*FindGlobalCtors(Module
&M
);
69 bool OptimizeFunctions(Module
&M
);
70 bool OptimizeGlobalVars(Module
&M
);
71 bool OptimizeGlobalAliases(Module
&M
);
72 bool OptimizeGlobalCtorsList(GlobalVariable
*&GCL
);
73 bool ProcessInternalGlobal(GlobalVariable
*GV
,Module::global_iterator
&GVI
);
77 char GlobalOpt::ID
= 0;
78 static RegisterPass
<GlobalOpt
> X("globalopt", "Global Variable Optimizer");
80 ModulePass
*llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
84 /// GlobalStatus - As we analyze each global, keep track of some information
85 /// about it. If we find out that the address of the global is taken, none of
86 /// this info will be accurate.
87 struct VISIBILITY_HIDDEN GlobalStatus
{
88 /// isLoaded - True if the global is ever loaded. If the global isn't ever
89 /// loaded it can be deleted.
92 /// StoredType - Keep track of what stores to the global look like.
95 /// NotStored - There is no store to this global. It can thus be marked
99 /// isInitializerStored - This global is stored to, but the only thing
100 /// stored is the constant it was initialized with. This is only tracked
101 /// for scalar globals.
104 /// isStoredOnce - This global is stored to, but only its initializer and
105 /// one other value is ever stored to it. If this global isStoredOnce, we
106 /// track the value stored to it in StoredOnceValue below. This is only
107 /// tracked for scalar globals.
110 /// isStored - This global is stored to by multiple values or something else
111 /// that we cannot track.
115 /// StoredOnceValue - If only one value (besides the initializer constant) is
116 /// ever stored to this global, keep track of what value it is.
117 Value
*StoredOnceValue
;
119 /// AccessingFunction/HasMultipleAccessingFunctions - These start out
120 /// null/false. When the first accessing function is noticed, it is recorded.
121 /// When a second different accessing function is noticed,
122 /// HasMultipleAccessingFunctions is set to true.
123 Function
*AccessingFunction
;
124 bool HasMultipleAccessingFunctions
;
126 /// HasNonInstructionUser - Set to true if this global has a user that is not
127 /// an instruction (e.g. a constant expr or GV initializer).
128 bool HasNonInstructionUser
;
130 /// HasPHIUser - Set to true if this global has a user that is a PHI node.
133 GlobalStatus() : isLoaded(false), StoredType(NotStored
), StoredOnceValue(0),
134 AccessingFunction(0), HasMultipleAccessingFunctions(false),
135 HasNonInstructionUser(false), HasPHIUser(false) {}
140 // SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
141 // by constants itself. Note that constants cannot be cyclic, so this test is
142 // pretty easy to implement recursively.
144 static bool SafeToDestroyConstant(Constant
*C
) {
145 if (isa
<GlobalValue
>(C
)) return false;
147 for (Value::use_iterator UI
= C
->use_begin(), E
= C
->use_end(); UI
!= E
; ++UI
)
148 if (Constant
*CU
= dyn_cast
<Constant
>(*UI
)) {
149 if (!SafeToDestroyConstant(CU
)) return false;
156 /// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
157 /// structure. If the global has its address taken, return true to indicate we
158 /// can't do anything with it.
160 static bool AnalyzeGlobal(Value
*V
, GlobalStatus
&GS
,
161 SmallPtrSet
<PHINode
*, 16> &PHIUsers
) {
162 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
; ++UI
)
163 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(*UI
)) {
164 GS
.HasNonInstructionUser
= true;
166 if (AnalyzeGlobal(CE
, GS
, PHIUsers
)) return true;
168 } else if (Instruction
*I
= dyn_cast
<Instruction
>(*UI
)) {
169 if (!GS
.HasMultipleAccessingFunctions
) {
170 Function
*F
= I
->getParent()->getParent();
171 if (GS
.AccessingFunction
== 0)
172 GS
.AccessingFunction
= F
;
173 else if (GS
.AccessingFunction
!= F
)
174 GS
.HasMultipleAccessingFunctions
= true;
176 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
178 if (LI
->isVolatile()) return true; // Don't hack on volatile loads.
179 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(I
)) {
180 // Don't allow a store OF the address, only stores TO the address.
181 if (SI
->getOperand(0) == V
) return true;
183 if (SI
->isVolatile()) return true; // Don't hack on volatile stores.
185 // If this is a direct store to the global (i.e., the global is a scalar
186 // value, not an aggregate), keep more specific information about
188 if (GS
.StoredType
!= GlobalStatus::isStored
) {
189 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(SI
->getOperand(1))){
190 Value
*StoredVal
= SI
->getOperand(0);
191 if (StoredVal
== GV
->getInitializer()) {
192 if (GS
.StoredType
< GlobalStatus::isInitializerStored
)
193 GS
.StoredType
= GlobalStatus::isInitializerStored
;
194 } else if (isa
<LoadInst
>(StoredVal
) &&
195 cast
<LoadInst
>(StoredVal
)->getOperand(0) == GV
) {
197 if (GS
.StoredType
< GlobalStatus::isInitializerStored
)
198 GS
.StoredType
= GlobalStatus::isInitializerStored
;
199 } else if (GS
.StoredType
< GlobalStatus::isStoredOnce
) {
200 GS
.StoredType
= GlobalStatus::isStoredOnce
;
201 GS
.StoredOnceValue
= StoredVal
;
202 } else if (GS
.StoredType
== GlobalStatus::isStoredOnce
&&
203 GS
.StoredOnceValue
== StoredVal
) {
206 GS
.StoredType
= GlobalStatus::isStored
;
209 GS
.StoredType
= GlobalStatus::isStored
;
212 } else if (isa
<GetElementPtrInst
>(I
)) {
213 if (AnalyzeGlobal(I
, GS
, PHIUsers
)) return true;
214 } else if (isa
<SelectInst
>(I
)) {
215 if (AnalyzeGlobal(I
, GS
, PHIUsers
)) return true;
216 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(I
)) {
217 // PHI nodes we can check just like select or GEP instructions, but we
218 // have to be careful about infinite recursion.
219 if (PHIUsers
.insert(PN
)) // Not already visited.
220 if (AnalyzeGlobal(I
, GS
, PHIUsers
)) return true;
221 GS
.HasPHIUser
= true;
222 } else if (isa
<CmpInst
>(I
)) {
223 } else if (isa
<MemTransferInst
>(I
)) {
224 if (I
->getOperand(1) == V
)
225 GS
.StoredType
= GlobalStatus::isStored
;
226 if (I
->getOperand(2) == V
)
228 } else if (isa
<MemSetInst
>(I
)) {
229 assert(I
->getOperand(1) == V
&& "Memset only takes one pointer!");
230 GS
.StoredType
= GlobalStatus::isStored
;
232 return true; // Any other non-load instruction might take address!
234 } else if (Constant
*C
= dyn_cast
<Constant
>(*UI
)) {
235 GS
.HasNonInstructionUser
= true;
236 // We might have a dead and dangling constant hanging off of here.
237 if (!SafeToDestroyConstant(C
))
240 GS
.HasNonInstructionUser
= true;
241 // Otherwise must be some other user.
248 static Constant
*getAggregateConstantElement(Constant
*Agg
, Constant
*Idx
,
249 LLVMContext
&Context
) {
250 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Idx
);
252 unsigned IdxV
= CI
->getZExtValue();
254 if (ConstantStruct
*CS
= dyn_cast
<ConstantStruct
>(Agg
)) {
255 if (IdxV
< CS
->getNumOperands()) return CS
->getOperand(IdxV
);
256 } else if (ConstantArray
*CA
= dyn_cast
<ConstantArray
>(Agg
)) {
257 if (IdxV
< CA
->getNumOperands()) return CA
->getOperand(IdxV
);
258 } else if (ConstantVector
*CP
= dyn_cast
<ConstantVector
>(Agg
)) {
259 if (IdxV
< CP
->getNumOperands()) return CP
->getOperand(IdxV
);
260 } else if (isa
<ConstantAggregateZero
>(Agg
)) {
261 if (const StructType
*STy
= dyn_cast
<StructType
>(Agg
->getType())) {
262 if (IdxV
< STy
->getNumElements())
263 return Constant::getNullValue(STy
->getElementType(IdxV
));
264 } else if (const SequentialType
*STy
=
265 dyn_cast
<SequentialType
>(Agg
->getType())) {
266 return Constant::getNullValue(STy
->getElementType());
268 } else if (isa
<UndefValue
>(Agg
)) {
269 if (const StructType
*STy
= dyn_cast
<StructType
>(Agg
->getType())) {
270 if (IdxV
< STy
->getNumElements())
271 return UndefValue::get(STy
->getElementType(IdxV
));
272 } else if (const SequentialType
*STy
=
273 dyn_cast
<SequentialType
>(Agg
->getType())) {
274 return UndefValue::get(STy
->getElementType());
281 /// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all
282 /// users of the global, cleaning up the obvious ones. This is largely just a
283 /// quick scan over the use list to clean up the easy and obvious cruft. This
284 /// returns true if it made a change.
285 static bool CleanupConstantGlobalUsers(Value
*V
, Constant
*Init
,
286 LLVMContext
&Context
) {
287 bool Changed
= false;
288 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
;) {
291 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(U
)) {
293 // Replace the load with the initializer.
294 LI
->replaceAllUsesWith(Init
);
295 LI
->eraseFromParent();
298 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
299 // Store must be unreachable or storing Init into the global.
300 SI
->eraseFromParent();
302 } else if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(U
)) {
303 if (CE
->getOpcode() == Instruction::GetElementPtr
) {
304 Constant
*SubInit
= 0;
306 SubInit
= ConstantFoldLoadThroughGEPConstantExpr(Init
, CE
, Context
);
307 Changed
|= CleanupConstantGlobalUsers(CE
, SubInit
, Context
);
308 } else if (CE
->getOpcode() == Instruction::BitCast
&&
309 isa
<PointerType
>(CE
->getType())) {
310 // Pointer cast, delete any stores and memsets to the global.
311 Changed
|= CleanupConstantGlobalUsers(CE
, 0, Context
);
314 if (CE
->use_empty()) {
315 CE
->destroyConstant();
318 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(U
)) {
319 // Do not transform "gepinst (gep constexpr (GV))" here, because forming
320 // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
321 // and will invalidate our notion of what Init is.
322 Constant
*SubInit
= 0;
323 if (!isa
<ConstantExpr
>(GEP
->getOperand(0))) {
325 dyn_cast_or_null
<ConstantExpr
>(ConstantFoldInstruction(GEP
, Context
));
326 if (Init
&& CE
&& CE
->getOpcode() == Instruction::GetElementPtr
)
327 SubInit
= ConstantFoldLoadThroughGEPConstantExpr(Init
, CE
, Context
);
329 Changed
|= CleanupConstantGlobalUsers(GEP
, SubInit
, Context
);
331 if (GEP
->use_empty()) {
332 GEP
->eraseFromParent();
335 } else if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(U
)) { // memset/cpy/mv
336 if (MI
->getRawDest() == V
) {
337 MI
->eraseFromParent();
341 } else if (Constant
*C
= dyn_cast
<Constant
>(U
)) {
342 // If we have a chain of dead constantexprs or other things dangling from
343 // us, and if they are all dead, nuke them without remorse.
344 if (SafeToDestroyConstant(C
)) {
345 C
->destroyConstant();
346 // This could have invalidated UI, start over from scratch.
347 CleanupConstantGlobalUsers(V
, Init
, Context
);
355 /// isSafeSROAElementUse - Return true if the specified instruction is a safe
356 /// user of a derived expression from a global that we want to SROA.
357 static bool isSafeSROAElementUse(Value
*V
) {
358 // We might have a dead and dangling constant hanging off of here.
359 if (Constant
*C
= dyn_cast
<Constant
>(V
))
360 return SafeToDestroyConstant(C
);
362 Instruction
*I
= dyn_cast
<Instruction
>(V
);
363 if (!I
) return false;
366 if (isa
<LoadInst
>(I
)) return true;
368 // Stores *to* the pointer are ok.
369 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(I
))
370 return SI
->getOperand(0) != V
;
372 // Otherwise, it must be a GEP.
373 GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(I
);
374 if (GEPI
== 0) return false;
376 if (GEPI
->getNumOperands() < 3 || !isa
<Constant
>(GEPI
->getOperand(1)) ||
377 !cast
<Constant
>(GEPI
->getOperand(1))->isNullValue())
380 for (Value::use_iterator I
= GEPI
->use_begin(), E
= GEPI
->use_end();
382 if (!isSafeSROAElementUse(*I
))
388 /// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
389 /// Look at it and its uses and decide whether it is safe to SROA this global.
391 static bool IsUserOfGlobalSafeForSRA(User
*U
, GlobalValue
*GV
) {
392 // The user of the global must be a GEP Inst or a ConstantExpr GEP.
393 if (!isa
<GetElementPtrInst
>(U
) &&
394 (!isa
<ConstantExpr
>(U
) ||
395 cast
<ConstantExpr
>(U
)->getOpcode() != Instruction::GetElementPtr
))
398 // Check to see if this ConstantExpr GEP is SRA'able. In particular, we
399 // don't like < 3 operand CE's, and we don't like non-constant integer
400 // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
402 if (U
->getNumOperands() < 3 || !isa
<Constant
>(U
->getOperand(1)) ||
403 !cast
<Constant
>(U
->getOperand(1))->isNullValue() ||
404 !isa
<ConstantInt
>(U
->getOperand(2)))
407 gep_type_iterator GEPI
= gep_type_begin(U
), E
= gep_type_end(U
);
408 ++GEPI
; // Skip over the pointer index.
410 // If this is a use of an array allocation, do a bit more checking for sanity.
411 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(*GEPI
)) {
412 uint64_t NumElements
= AT
->getNumElements();
413 ConstantInt
*Idx
= cast
<ConstantInt
>(U
->getOperand(2));
415 // Check to make sure that index falls within the array. If not,
416 // something funny is going on, so we won't do the optimization.
418 if (Idx
->getZExtValue() >= NumElements
)
421 // We cannot scalar repl this level of the array unless any array
422 // sub-indices are in-range constants. In particular, consider:
423 // A[0][i]. We cannot know that the user isn't doing invalid things like
424 // allowing i to index an out-of-range subscript that accesses A[1].
426 // Scalar replacing *just* the outer index of the array is probably not
427 // going to be a win anyway, so just give up.
428 for (++GEPI
; // Skip array index.
431 uint64_t NumElements
;
432 if (const ArrayType
*SubArrayTy
= dyn_cast
<ArrayType
>(*GEPI
))
433 NumElements
= SubArrayTy
->getNumElements();
434 else if (const VectorType
*SubVectorTy
= dyn_cast
<VectorType
>(*GEPI
))
435 NumElements
= SubVectorTy
->getNumElements();
437 assert(isa
<StructType
>(*GEPI
) &&
438 "Indexed GEP type is not array, vector, or struct!");
442 ConstantInt
*IdxVal
= dyn_cast
<ConstantInt
>(GEPI
.getOperand());
443 if (!IdxVal
|| IdxVal
->getZExtValue() >= NumElements
)
448 for (Value::use_iterator I
= U
->use_begin(), E
= U
->use_end(); I
!= E
; ++I
)
449 if (!isSafeSROAElementUse(*I
))
454 /// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
455 /// is safe for us to perform this transformation.
457 static bool GlobalUsersSafeToSRA(GlobalValue
*GV
) {
458 for (Value::use_iterator UI
= GV
->use_begin(), E
= GV
->use_end();
460 if (!IsUserOfGlobalSafeForSRA(*UI
, GV
))
467 /// SRAGlobal - Perform scalar replacement of aggregates on the specified global
468 /// variable. This opens the door for other optimizations by exposing the
469 /// behavior of the program in a more fine-grained way. We have determined that
470 /// this transformation is safe already. We return the first global variable we
471 /// insert so that the caller can reprocess it.
472 static GlobalVariable
*SRAGlobal(GlobalVariable
*GV
, const TargetData
&TD
,
473 LLVMContext
&Context
) {
474 // Make sure this global only has simple uses that we can SRA.
475 if (!GlobalUsersSafeToSRA(GV
))
478 assert(GV
->hasLocalLinkage() && !GV
->isConstant());
479 Constant
*Init
= GV
->getInitializer();
480 const Type
*Ty
= Init
->getType();
482 std::vector
<GlobalVariable
*> NewGlobals
;
483 Module::GlobalListType
&Globals
= GV
->getParent()->getGlobalList();
485 // Get the alignment of the global, either explicit or target-specific.
486 unsigned StartAlignment
= GV
->getAlignment();
487 if (StartAlignment
== 0)
488 StartAlignment
= TD
.getABITypeAlignment(GV
->getType());
490 if (const StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
491 NewGlobals
.reserve(STy
->getNumElements());
492 const StructLayout
&Layout
= *TD
.getStructLayout(STy
);
493 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
) {
494 Constant
*In
= getAggregateConstantElement(Init
,
495 ConstantInt::get(Type::getInt32Ty(Context
), i
),
497 assert(In
&& "Couldn't get element of initializer?");
498 GlobalVariable
*NGV
= new GlobalVariable(Context
,
499 STy
->getElementType(i
), false,
500 GlobalVariable::InternalLinkage
,
501 In
, GV
->getName()+"."+Twine(i
),
503 GV
->getType()->getAddressSpace());
504 Globals
.insert(GV
, NGV
);
505 NewGlobals
.push_back(NGV
);
507 // Calculate the known alignment of the field. If the original aggregate
508 // had 256 byte alignment for example, something might depend on that:
509 // propagate info to each field.
510 uint64_t FieldOffset
= Layout
.getElementOffset(i
);
511 unsigned NewAlign
= (unsigned)MinAlign(StartAlignment
, FieldOffset
);
512 if (NewAlign
> TD
.getABITypeAlignment(STy
->getElementType(i
)))
513 NGV
->setAlignment(NewAlign
);
515 } else if (const SequentialType
*STy
= dyn_cast
<SequentialType
>(Ty
)) {
516 unsigned NumElements
= 0;
517 if (const ArrayType
*ATy
= dyn_cast
<ArrayType
>(STy
))
518 NumElements
= ATy
->getNumElements();
520 NumElements
= cast
<VectorType
>(STy
)->getNumElements();
522 if (NumElements
> 16 && GV
->hasNUsesOrMore(16))
523 return 0; // It's not worth it.
524 NewGlobals
.reserve(NumElements
);
526 uint64_t EltSize
= TD
.getTypeAllocSize(STy
->getElementType());
527 unsigned EltAlign
= TD
.getABITypeAlignment(STy
->getElementType());
528 for (unsigned i
= 0, e
= NumElements
; i
!= e
; ++i
) {
529 Constant
*In
= getAggregateConstantElement(Init
,
530 ConstantInt::get(Type::getInt32Ty(Context
), i
),
532 assert(In
&& "Couldn't get element of initializer?");
534 GlobalVariable
*NGV
= new GlobalVariable(Context
,
535 STy
->getElementType(), false,
536 GlobalVariable::InternalLinkage
,
537 In
, GV
->getName()+"."+Twine(i
),
539 GV
->getType()->getAddressSpace());
540 Globals
.insert(GV
, NGV
);
541 NewGlobals
.push_back(NGV
);
543 // Calculate the known alignment of the field. If the original aggregate
544 // had 256 byte alignment for example, something might depend on that:
545 // propagate info to each field.
546 unsigned NewAlign
= (unsigned)MinAlign(StartAlignment
, EltSize
*i
);
547 if (NewAlign
> EltAlign
)
548 NGV
->setAlignment(NewAlign
);
552 if (NewGlobals
.empty())
555 DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV
);
557 Constant
*NullInt
= Constant::getNullValue(Type::getInt32Ty(Context
));
559 // Loop over all of the uses of the global, replacing the constantexpr geps,
560 // with smaller constantexpr geps or direct references.
561 while (!GV
->use_empty()) {
562 User
*GEP
= GV
->use_back();
563 assert(((isa
<ConstantExpr
>(GEP
) &&
564 cast
<ConstantExpr
>(GEP
)->getOpcode()==Instruction::GetElementPtr
)||
565 isa
<GetElementPtrInst
>(GEP
)) && "NonGEP CE's are not SRAable!");
567 // Ignore the 1th operand, which has to be zero or else the program is quite
568 // broken (undefined). Get the 2nd operand, which is the structure or array
570 unsigned Val
= cast
<ConstantInt
>(GEP
->getOperand(2))->getZExtValue();
571 if (Val
>= NewGlobals
.size()) Val
= 0; // Out of bound array access.
573 Value
*NewPtr
= NewGlobals
[Val
];
575 // Form a shorter GEP if needed.
576 if (GEP
->getNumOperands() > 3) {
577 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(GEP
)) {
578 SmallVector
<Constant
*, 8> Idxs
;
579 Idxs
.push_back(NullInt
);
580 for (unsigned i
= 3, e
= CE
->getNumOperands(); i
!= e
; ++i
)
581 Idxs
.push_back(CE
->getOperand(i
));
582 NewPtr
= ConstantExpr::getGetElementPtr(cast
<Constant
>(NewPtr
),
583 &Idxs
[0], Idxs
.size());
585 GetElementPtrInst
*GEPI
= cast
<GetElementPtrInst
>(GEP
);
586 SmallVector
<Value
*, 8> Idxs
;
587 Idxs
.push_back(NullInt
);
588 for (unsigned i
= 3, e
= GEPI
->getNumOperands(); i
!= e
; ++i
)
589 Idxs
.push_back(GEPI
->getOperand(i
));
590 NewPtr
= GetElementPtrInst::Create(NewPtr
, Idxs
.begin(), Idxs
.end(),
591 GEPI
->getName()+"."+Twine(Val
),GEPI
);
594 GEP
->replaceAllUsesWith(NewPtr
);
596 if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(GEP
))
597 GEPI
->eraseFromParent();
599 cast
<ConstantExpr
>(GEP
)->destroyConstant();
602 // Delete the old global, now that it is dead.
606 // Loop over the new globals array deleting any globals that are obviously
607 // dead. This can arise due to scalarization of a structure or an array that
608 // has elements that are dead.
609 unsigned FirstGlobal
= 0;
610 for (unsigned i
= 0, e
= NewGlobals
.size(); i
!= e
; ++i
)
611 if (NewGlobals
[i
]->use_empty()) {
612 Globals
.erase(NewGlobals
[i
]);
613 if (FirstGlobal
== i
) ++FirstGlobal
;
616 return FirstGlobal
!= NewGlobals
.size() ? NewGlobals
[FirstGlobal
] : 0;
619 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
620 /// value will trap if the value is dynamically null. PHIs keeps track of any
621 /// phi nodes we've seen to avoid reprocessing them.
622 static bool AllUsesOfValueWillTrapIfNull(Value
*V
,
623 SmallPtrSet
<PHINode
*, 8> &PHIs
) {
624 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
; ++UI
)
625 if (isa
<LoadInst
>(*UI
)) {
627 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(*UI
)) {
628 if (SI
->getOperand(0) == V
) {
629 //cerr << "NONTRAPPING USE: " << **UI;
630 return false; // Storing the value.
632 } else if (CallInst
*CI
= dyn_cast
<CallInst
>(*UI
)) {
633 if (CI
->getOperand(0) != V
) {
634 //cerr << "NONTRAPPING USE: " << **UI;
635 return false; // Not calling the ptr
637 } else if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(*UI
)) {
638 if (II
->getOperand(0) != V
) {
639 //cerr << "NONTRAPPING USE: " << **UI;
640 return false; // Not calling the ptr
642 } else if (BitCastInst
*CI
= dyn_cast
<BitCastInst
>(*UI
)) {
643 if (!AllUsesOfValueWillTrapIfNull(CI
, PHIs
)) return false;
644 } else if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(*UI
)) {
645 if (!AllUsesOfValueWillTrapIfNull(GEPI
, PHIs
)) return false;
646 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(*UI
)) {
647 // If we've already seen this phi node, ignore it, it has already been
650 return AllUsesOfValueWillTrapIfNull(PN
, PHIs
);
651 } else if (isa
<ICmpInst
>(*UI
) &&
652 isa
<ConstantPointerNull
>(UI
->getOperand(1))) {
653 // Ignore setcc X, null
655 //cerr << "NONTRAPPING USE: " << **UI;
661 /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
662 /// from GV will trap if the loaded value is null. Note that this also permits
663 /// comparisons of the loaded value against null, as a special case.
664 static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable
*GV
) {
665 for (Value::use_iterator UI
= GV
->use_begin(), E
= GV
->use_end(); UI
!=E
; ++UI
)
666 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(*UI
)) {
667 SmallPtrSet
<PHINode
*, 8> PHIs
;
668 if (!AllUsesOfValueWillTrapIfNull(LI
, PHIs
))
670 } else if (isa
<StoreInst
>(*UI
)) {
671 // Ignore stores to the global.
673 // We don't know or understand this user, bail out.
674 //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI;
681 static bool OptimizeAwayTrappingUsesOfValue(Value
*V
, Constant
*NewV
,
682 LLVMContext
&Context
) {
683 bool Changed
= false;
684 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
; ) {
685 Instruction
*I
= cast
<Instruction
>(*UI
++);
686 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
687 LI
->setOperand(0, NewV
);
689 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(I
)) {
690 if (SI
->getOperand(1) == V
) {
691 SI
->setOperand(1, NewV
);
694 } else if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
)) {
695 if (I
->getOperand(0) == V
) {
696 // Calling through the pointer! Turn into a direct call, but be careful
697 // that the pointer is not also being passed as an argument.
698 I
->setOperand(0, NewV
);
700 bool PassedAsArg
= false;
701 for (unsigned i
= 1, e
= I
->getNumOperands(); i
!= e
; ++i
)
702 if (I
->getOperand(i
) == V
) {
704 I
->setOperand(i
, NewV
);
708 // Being passed as an argument also. Be careful to not invalidate UI!
712 } else if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
713 Changed
|= OptimizeAwayTrappingUsesOfValue(CI
,
714 ConstantExpr::getCast(CI
->getOpcode(),
715 NewV
, CI
->getType()), Context
);
716 if (CI
->use_empty()) {
718 CI
->eraseFromParent();
720 } else if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(I
)) {
721 // Should handle GEP here.
722 SmallVector
<Constant
*, 8> Idxs
;
723 Idxs
.reserve(GEPI
->getNumOperands()-1);
724 for (User::op_iterator i
= GEPI
->op_begin() + 1, e
= GEPI
->op_end();
726 if (Constant
*C
= dyn_cast
<Constant
>(*i
))
730 if (Idxs
.size() == GEPI
->getNumOperands()-1)
731 Changed
|= OptimizeAwayTrappingUsesOfValue(GEPI
,
732 ConstantExpr::getGetElementPtr(NewV
, &Idxs
[0],
733 Idxs
.size()), Context
);
734 if (GEPI
->use_empty()) {
736 GEPI
->eraseFromParent();
745 /// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
746 /// value stored into it. If there are uses of the loaded value that would trap
747 /// if the loaded value is dynamically null, then we know that they cannot be
748 /// reachable with a null optimize away the load.
749 static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable
*GV
, Constant
*LV
,
750 LLVMContext
&Context
) {
751 bool Changed
= false;
753 // Keep track of whether we are able to remove all the uses of the global
754 // other than the store that defines it.
755 bool AllNonStoreUsesGone
= true;
757 // Replace all uses of loads with uses of uses of the stored value.
758 for (Value::use_iterator GUI
= GV
->use_begin(), E
= GV
->use_end(); GUI
!= E
;){
759 User
*GlobalUser
= *GUI
++;
760 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(GlobalUser
)) {
761 Changed
|= OptimizeAwayTrappingUsesOfValue(LI
, LV
, Context
);
762 // If we were able to delete all uses of the loads
763 if (LI
->use_empty()) {
764 LI
->eraseFromParent();
767 AllNonStoreUsesGone
= false;
769 } else if (isa
<StoreInst
>(GlobalUser
)) {
770 // Ignore the store that stores "LV" to the global.
771 assert(GlobalUser
->getOperand(1) == GV
&&
772 "Must be storing *to* the global");
774 AllNonStoreUsesGone
= false;
776 // If we get here we could have other crazy uses that are transitively
778 assert((isa
<PHINode
>(GlobalUser
) || isa
<SelectInst
>(GlobalUser
) ||
779 isa
<ConstantExpr
>(GlobalUser
)) && "Only expect load and stores!");
784 DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
);
788 // If we nuked all of the loads, then none of the stores are needed either,
789 // nor is the global.
790 if (AllNonStoreUsesGone
) {
791 DEBUG(errs() << " *** GLOBAL NOW DEAD!\n");
792 CleanupConstantGlobalUsers(GV
, 0, Context
);
793 if (GV
->use_empty()) {
794 GV
->eraseFromParent();
802 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
803 /// instructions that are foldable.
804 static void ConstantPropUsersOf(Value
*V
, LLVMContext
&Context
) {
805 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
; )
806 if (Instruction
*I
= dyn_cast
<Instruction
>(*UI
++))
807 if (Constant
*NewC
= ConstantFoldInstruction(I
, Context
)) {
808 I
->replaceAllUsesWith(NewC
);
810 // Advance UI to the next non-I use to avoid invalidating it!
811 // Instructions could multiply use V.
812 while (UI
!= E
&& *UI
== I
)
814 I
->eraseFromParent();
818 /// OptimizeGlobalAddressOfMalloc - This function takes the specified global
819 /// variable, and transforms the program as if it always contained the result of
820 /// the specified malloc. Because it is always the result of the specified
821 /// malloc, there is no reason to actually DO the malloc. Instead, turn the
822 /// malloc into a global, and any loads of GV as uses of the new global.
823 static GlobalVariable
*OptimizeGlobalAddressOfMalloc(GlobalVariable
*GV
,
825 LLVMContext
&Context
) {
826 DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
<< " MALLOC = " << *MI
);
827 ConstantInt
*NElements
= cast
<ConstantInt
>(MI
->getArraySize());
829 if (NElements
->getZExtValue() != 1) {
830 // If we have an array allocation, transform it to a single element
831 // allocation to make the code below simpler.
832 Type
*NewTy
= ArrayType::get(MI
->getAllocatedType(),
833 NElements
->getZExtValue());
835 new MallocInst(NewTy
, Constant::getNullValue(Type::getInt32Ty(Context
)),
836 MI
->getAlignment(), MI
->getName(), MI
);
838 Indices
[0] = Indices
[1] = Constant::getNullValue(Type::getInt32Ty(Context
));
839 Value
*NewGEP
= GetElementPtrInst::Create(NewMI
, Indices
, Indices
+ 2,
840 NewMI
->getName()+".el0", MI
);
841 MI
->replaceAllUsesWith(NewGEP
);
842 MI
->eraseFromParent();
846 // Create the new global variable. The contents of the malloc'd memory is
847 // undefined, so initialize with an undef value.
848 // FIXME: This new global should have the alignment returned by malloc. Code
849 // could depend on malloc returning large alignment (on the mac, 16 bytes) but
850 // this would only guarantee some lower alignment.
851 Constant
*Init
= UndefValue::get(MI
->getAllocatedType());
852 GlobalVariable
*NewGV
= new GlobalVariable(*GV
->getParent(),
853 MI
->getAllocatedType(), false,
854 GlobalValue::InternalLinkage
, Init
,
855 GV
->getName()+".body",
857 GV
->isThreadLocal());
859 // Anything that used the malloc now uses the global directly.
860 MI
->replaceAllUsesWith(NewGV
);
862 Constant
*RepValue
= NewGV
;
863 if (NewGV
->getType() != GV
->getType()->getElementType())
864 RepValue
= ConstantExpr::getBitCast(RepValue
,
865 GV
->getType()->getElementType());
867 // If there is a comparison against null, we will insert a global bool to
868 // keep track of whether the global was initialized yet or not.
869 GlobalVariable
*InitBool
=
870 new GlobalVariable(Context
, Type::getInt1Ty(Context
), false,
871 GlobalValue::InternalLinkage
,
872 ConstantInt::getFalse(Context
), GV
->getName()+".init",
873 GV
->isThreadLocal());
874 bool InitBoolUsed
= false;
876 // Loop over all uses of GV, processing them in turn.
877 std::vector
<StoreInst
*> Stores
;
878 while (!GV
->use_empty())
879 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(GV
->use_back())) {
880 while (!LI
->use_empty()) {
881 Use
&LoadUse
= LI
->use_begin().getUse();
882 if (!isa
<ICmpInst
>(LoadUse
.getUser()))
885 ICmpInst
*CI
= cast
<ICmpInst
>(LoadUse
.getUser());
886 // Replace the cmp X, 0 with a use of the bool value.
887 Value
*LV
= new LoadInst(InitBool
, InitBool
->getName()+".val", CI
);
889 switch (CI
->getPredicate()) {
890 default: llvm_unreachable("Unknown ICmp Predicate!");
891 case ICmpInst::ICMP_ULT
:
892 case ICmpInst::ICMP_SLT
:
893 LV
= ConstantInt::getFalse(Context
); // X < null -> always false
895 case ICmpInst::ICMP_ULE
:
896 case ICmpInst::ICMP_SLE
:
897 case ICmpInst::ICMP_EQ
:
898 LV
= BinaryOperator::CreateNot(LV
, "notinit", CI
);
900 case ICmpInst::ICMP_NE
:
901 case ICmpInst::ICMP_UGE
:
902 case ICmpInst::ICMP_SGE
:
903 case ICmpInst::ICMP_UGT
:
904 case ICmpInst::ICMP_SGT
:
907 CI
->replaceAllUsesWith(LV
);
908 CI
->eraseFromParent();
911 LI
->eraseFromParent();
913 StoreInst
*SI
= cast
<StoreInst
>(GV
->use_back());
914 // The global is initialized when the store to it occurs.
915 new StoreInst(ConstantInt::getTrue(Context
), InitBool
, SI
);
916 SI
->eraseFromParent();
919 // If the initialization boolean was used, insert it, otherwise delete it.
921 while (!InitBool
->use_empty()) // Delete initializations
922 cast
<Instruction
>(InitBool
->use_back())->eraseFromParent();
925 GV
->getParent()->getGlobalList().insert(GV
, InitBool
);
928 // Now the GV is dead, nuke it and the malloc.
929 GV
->eraseFromParent();
930 MI
->eraseFromParent();
932 // To further other optimizations, loop over all users of NewGV and try to
933 // constant prop them. This will promote GEP instructions with constant
934 // indices into GEP constant-exprs, which will allow global-opt to hack on it.
935 ConstantPropUsersOf(NewGV
, Context
);
936 if (RepValue
!= NewGV
)
937 ConstantPropUsersOf(RepValue
, Context
);
942 /// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
943 /// to make sure that there are no complex uses of V. We permit simple things
944 /// like dereferencing the pointer, but not storing through the address, unless
945 /// it is to the specified global.
946 static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction
*V
,
948 SmallPtrSet
<PHINode
*, 8> &PHIs
) {
949 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
;++UI
){
950 Instruction
*Inst
= cast
<Instruction
>(*UI
);
952 if (isa
<LoadInst
>(Inst
) || isa
<CmpInst
>(Inst
)) {
953 continue; // Fine, ignore.
956 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(Inst
)) {
957 if (SI
->getOperand(0) == V
&& SI
->getOperand(1) != GV
)
958 return false; // Storing the pointer itself... bad.
959 continue; // Otherwise, storing through it, or storing into GV... fine.
962 if (isa
<GetElementPtrInst
>(Inst
)) {
963 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst
, GV
, PHIs
))
968 if (PHINode
*PN
= dyn_cast
<PHINode
>(Inst
)) {
969 // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
972 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN
, GV
, PHIs
))
977 if (BitCastInst
*BCI
= dyn_cast
<BitCastInst
>(Inst
)) {
978 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI
, GV
, PHIs
))
988 /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
989 /// somewhere. Transform all uses of the allocation into loads from the
990 /// global and uses of the resultant pointer. Further, delete the store into
991 /// GV. This assumes that these value pass the
992 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
993 static void ReplaceUsesOfMallocWithGlobal(Instruction
*Alloc
,
994 GlobalVariable
*GV
) {
995 while (!Alloc
->use_empty()) {
996 Instruction
*U
= cast
<Instruction
>(*Alloc
->use_begin());
997 Instruction
*InsertPt
= U
;
998 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
999 // If this is the store of the allocation into the global, remove it.
1000 if (SI
->getOperand(1) == GV
) {
1001 SI
->eraseFromParent();
1004 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(U
)) {
1005 // Insert the load in the corresponding predecessor, not right before the
1007 InsertPt
= PN
->getIncomingBlock(Alloc
->use_begin())->getTerminator();
1008 } else if (isa
<BitCastInst
>(U
)) {
1009 // Must be bitcast between the malloc and store to initialize the global.
1010 ReplaceUsesOfMallocWithGlobal(U
, GV
);
1011 U
->eraseFromParent();
1013 } else if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(U
)) {
1014 // If this is a "GEP bitcast" and the user is a store to the global, then
1015 // just process it as a bitcast.
1016 if (GEPI
->hasAllZeroIndices() && GEPI
->hasOneUse())
1017 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(GEPI
->use_back()))
1018 if (SI
->getOperand(1) == GV
) {
1019 // Must be bitcast GEP between the malloc and store to initialize
1021 ReplaceUsesOfMallocWithGlobal(GEPI
, GV
);
1022 GEPI
->eraseFromParent();
1027 // Insert a load from the global, and use it instead of the malloc.
1028 Value
*NL
= new LoadInst(GV
, GV
->getName()+".val", InsertPt
);
1029 U
->replaceUsesOfWith(Alloc
, NL
);
1033 /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
1034 /// of a load) are simple enough to perform heap SRA on. This permits GEP's
1035 /// that index through the array and struct field, icmps of null, and PHIs.
1036 static bool LoadUsesSimpleEnoughForHeapSRA(Value
*V
,
1037 SmallPtrSet
<PHINode
*, 32> &LoadUsingPHIs
,
1038 SmallPtrSet
<PHINode
*, 32> &LoadUsingPHIsPerLoad
) {
1039 // We permit two users of the load: setcc comparing against the null
1040 // pointer, and a getelementptr of a specific form.
1041 for (Value::use_iterator UI
= V
->use_begin(), E
= V
->use_end(); UI
!= E
;++UI
){
1042 Instruction
*User
= cast
<Instruction
>(*UI
);
1044 // Comparison against null is ok.
1045 if (ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(User
)) {
1046 if (!isa
<ConstantPointerNull
>(ICI
->getOperand(1)))
1051 // getelementptr is also ok, but only a simple form.
1052 if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(User
)) {
1053 // Must index into the array and into the struct.
1054 if (GEPI
->getNumOperands() < 3)
1057 // Otherwise the GEP is ok.
1061 if (PHINode
*PN
= dyn_cast
<PHINode
>(User
)) {
1062 if (!LoadUsingPHIsPerLoad
.insert(PN
))
1063 // This means some phi nodes are dependent on each other.
1064 // Avoid infinite looping!
1066 if (!LoadUsingPHIs
.insert(PN
))
1067 // If we have already analyzed this PHI, then it is safe.
1070 // Make sure all uses of the PHI are simple enough to transform.
1071 if (!LoadUsesSimpleEnoughForHeapSRA(PN
,
1072 LoadUsingPHIs
, LoadUsingPHIsPerLoad
))
1078 // Otherwise we don't know what this is, not ok.
1086 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
1087 /// GV are simple enough to perform HeapSRA, return true.
1088 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable
*GV
,
1090 SmallPtrSet
<PHINode
*, 32> LoadUsingPHIs
;
1091 SmallPtrSet
<PHINode
*, 32> LoadUsingPHIsPerLoad
;
1092 for (Value::use_iterator UI
= GV
->use_begin(), E
= GV
->use_end(); UI
!= E
;
1094 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(*UI
)) {
1095 if (!LoadUsesSimpleEnoughForHeapSRA(LI
, LoadUsingPHIs
,
1096 LoadUsingPHIsPerLoad
))
1098 LoadUsingPHIsPerLoad
.clear();
1101 // If we reach here, we know that all uses of the loads and transitive uses
1102 // (through PHI nodes) are simple enough to transform. However, we don't know
1103 // that all inputs the to the PHI nodes are in the same equivalence sets.
1104 // Check to verify that all operands of the PHIs are either PHIS that can be
1105 // transformed, loads from GV, or MI itself.
1106 for (SmallPtrSet
<PHINode
*, 32>::iterator I
= LoadUsingPHIs
.begin(),
1107 E
= LoadUsingPHIs
.end(); I
!= E
; ++I
) {
1109 for (unsigned op
= 0, e
= PN
->getNumIncomingValues(); op
!= e
; ++op
) {
1110 Value
*InVal
= PN
->getIncomingValue(op
);
1112 // PHI of the stored value itself is ok.
1113 if (InVal
== MI
) continue;
1115 if (PHINode
*InPN
= dyn_cast
<PHINode
>(InVal
)) {
1116 // One of the PHIs in our set is (optimistically) ok.
1117 if (LoadUsingPHIs
.count(InPN
))
1122 // Load from GV is ok.
1123 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(InVal
))
1124 if (LI
->getOperand(0) == GV
)
1129 // Anything else is rejected.
1137 static Value
*GetHeapSROAValue(Value
*V
, unsigned FieldNo
,
1138 DenseMap
<Value
*, std::vector
<Value
*> > &InsertedScalarizedValues
,
1139 std::vector
<std::pair
<PHINode
*, unsigned> > &PHIsToRewrite
,
1140 LLVMContext
&Context
) {
1141 std::vector
<Value
*> &FieldVals
= InsertedScalarizedValues
[V
];
1143 if (FieldNo
>= FieldVals
.size())
1144 FieldVals
.resize(FieldNo
+1);
1146 // If we already have this value, just reuse the previously scalarized
1148 if (Value
*FieldVal
= FieldVals
[FieldNo
])
1151 // Depending on what instruction this is, we have several cases.
1153 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(V
)) {
1154 // This is a scalarized version of the load from the global. Just create
1155 // a new Load of the scalarized global.
1156 Result
= new LoadInst(GetHeapSROAValue(LI
->getOperand(0), FieldNo
,
1157 InsertedScalarizedValues
,
1158 PHIsToRewrite
, Context
),
1159 LI
->getName()+".f"+Twine(FieldNo
), LI
);
1160 } else if (PHINode
*PN
= dyn_cast
<PHINode
>(V
)) {
1161 // PN's type is pointer to struct. Make a new PHI of pointer to struct
1163 const StructType
*ST
=
1164 cast
<StructType
>(cast
<PointerType
>(PN
->getType())->getElementType());
1167 PHINode::Create(PointerType::getUnqual(ST
->getElementType(FieldNo
)),
1168 PN
->getName()+".f"+Twine(FieldNo
), PN
);
1169 PHIsToRewrite
.push_back(std::make_pair(PN
, FieldNo
));
1171 llvm_unreachable("Unknown usable value");
1175 return FieldVals
[FieldNo
] = Result
;
1178 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
1179 /// the load, rewrite the derived value to use the HeapSRoA'd load.
1180 static void RewriteHeapSROALoadUser(Instruction
*LoadUser
,
1181 DenseMap
<Value
*, std::vector
<Value
*> > &InsertedScalarizedValues
,
1182 std::vector
<std::pair
<PHINode
*, unsigned> > &PHIsToRewrite
,
1183 LLVMContext
&Context
) {
1184 // If this is a comparison against null, handle it.
1185 if (ICmpInst
*SCI
= dyn_cast
<ICmpInst
>(LoadUser
)) {
1186 assert(isa
<ConstantPointerNull
>(SCI
->getOperand(1)));
1187 // If we have a setcc of the loaded pointer, we can use a setcc of any
1189 Value
*NPtr
= GetHeapSROAValue(SCI
->getOperand(0), 0,
1190 InsertedScalarizedValues
, PHIsToRewrite
,
1193 Value
*New
= new ICmpInst(SCI
, SCI
->getPredicate(), NPtr
,
1194 Constant::getNullValue(NPtr
->getType()),
1196 SCI
->replaceAllUsesWith(New
);
1197 SCI
->eraseFromParent();
1201 // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1202 if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(LoadUser
)) {
1203 assert(GEPI
->getNumOperands() >= 3 && isa
<ConstantInt
>(GEPI
->getOperand(2))
1204 && "Unexpected GEPI!");
1206 // Load the pointer for this field.
1207 unsigned FieldNo
= cast
<ConstantInt
>(GEPI
->getOperand(2))->getZExtValue();
1208 Value
*NewPtr
= GetHeapSROAValue(GEPI
->getOperand(0), FieldNo
,
1209 InsertedScalarizedValues
, PHIsToRewrite
,
1212 // Create the new GEP idx vector.
1213 SmallVector
<Value
*, 8> GEPIdx
;
1214 GEPIdx
.push_back(GEPI
->getOperand(1));
1215 GEPIdx
.append(GEPI
->op_begin()+3, GEPI
->op_end());
1217 Value
*NGEPI
= GetElementPtrInst::Create(NewPtr
,
1218 GEPIdx
.begin(), GEPIdx
.end(),
1219 GEPI
->getName(), GEPI
);
1220 GEPI
->replaceAllUsesWith(NGEPI
);
1221 GEPI
->eraseFromParent();
1225 // Recursively transform the users of PHI nodes. This will lazily create the
1226 // PHIs that are needed for individual elements. Keep track of what PHIs we
1227 // see in InsertedScalarizedValues so that we don't get infinite loops (very
1228 // antisocial). If the PHI is already in InsertedScalarizedValues, it has
1229 // already been seen first by another load, so its uses have already been
1231 PHINode
*PN
= cast
<PHINode
>(LoadUser
);
1233 DenseMap
<Value
*, std::vector
<Value
*> >::iterator InsertPos
;
1234 tie(InsertPos
, Inserted
) =
1235 InsertedScalarizedValues
.insert(std::make_pair(PN
, std::vector
<Value
*>()));
1236 if (!Inserted
) return;
1238 // If this is the first time we've seen this PHI, recursively process all
1240 for (Value::use_iterator UI
= PN
->use_begin(), E
= PN
->use_end(); UI
!= E
; ) {
1241 Instruction
*User
= cast
<Instruction
>(*UI
++);
1242 RewriteHeapSROALoadUser(User
, InsertedScalarizedValues
, PHIsToRewrite
,
1247 /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
1248 /// is a value loaded from the global. Eliminate all uses of Ptr, making them
1249 /// use FieldGlobals instead. All uses of loaded values satisfy
1250 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1251 static void RewriteUsesOfLoadForHeapSRoA(LoadInst
*Load
,
1252 DenseMap
<Value
*, std::vector
<Value
*> > &InsertedScalarizedValues
,
1253 std::vector
<std::pair
<PHINode
*, unsigned> > &PHIsToRewrite
,
1254 LLVMContext
&Context
) {
1255 for (Value::use_iterator UI
= Load
->use_begin(), E
= Load
->use_end();
1257 Instruction
*User
= cast
<Instruction
>(*UI
++);
1258 RewriteHeapSROALoadUser(User
, InsertedScalarizedValues
, PHIsToRewrite
,
1262 if (Load
->use_empty()) {
1263 Load
->eraseFromParent();
1264 InsertedScalarizedValues
.erase(Load
);
1268 /// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break
1269 /// it up into multiple allocations of arrays of the fields.
1270 static GlobalVariable
*PerformHeapAllocSRoA(GlobalVariable
*GV
, MallocInst
*MI
,
1271 LLVMContext
&Context
){
1272 DEBUG(errs() << "SROA HEAP ALLOC: " << *GV
<< " MALLOC = " << *MI
);
1273 const StructType
*STy
= cast
<StructType
>(MI
->getAllocatedType());
1275 // There is guaranteed to be at least one use of the malloc (storing
1276 // it into GV). If there are other uses, change them to be uses of
1277 // the global to simplify later code. This also deletes the store
1279 ReplaceUsesOfMallocWithGlobal(MI
, GV
);
1281 // Okay, at this point, there are no users of the malloc. Insert N
1282 // new mallocs at the same place as MI, and N globals.
1283 std::vector
<Value
*> FieldGlobals
;
1284 std::vector
<MallocInst
*> FieldMallocs
;
1286 for (unsigned FieldNo
= 0, e
= STy
->getNumElements(); FieldNo
!= e
;++FieldNo
){
1287 const Type
*FieldTy
= STy
->getElementType(FieldNo
);
1288 const Type
*PFieldTy
= PointerType::getUnqual(FieldTy
);
1290 GlobalVariable
*NGV
=
1291 new GlobalVariable(*GV
->getParent(),
1292 PFieldTy
, false, GlobalValue::InternalLinkage
,
1293 Constant::getNullValue(PFieldTy
),
1294 GV
->getName() + ".f" + Twine(FieldNo
), GV
,
1295 GV
->isThreadLocal());
1296 FieldGlobals
.push_back(NGV
);
1298 MallocInst
*NMI
= new MallocInst(FieldTy
, MI
->getArraySize(),
1299 MI
->getName() + ".f" + Twine(FieldNo
), MI
);
1300 FieldMallocs
.push_back(NMI
);
1301 new StoreInst(NMI
, NGV
, MI
);
1304 // The tricky aspect of this transformation is handling the case when malloc
1305 // fails. In the original code, malloc failing would set the result pointer
1306 // of malloc to null. In this case, some mallocs could succeed and others
1307 // could fail. As such, we emit code that looks like this:
1308 // F0 = malloc(field0)
1309 // F1 = malloc(field1)
1310 // F2 = malloc(field2)
1311 // if (F0 == 0 || F1 == 0 || F2 == 0) {
1312 // if (F0) { free(F0); F0 = 0; }
1313 // if (F1) { free(F1); F1 = 0; }
1314 // if (F2) { free(F2); F2 = 0; }
1316 Value
*RunningOr
= 0;
1317 for (unsigned i
= 0, e
= FieldMallocs
.size(); i
!= e
; ++i
) {
1318 Value
*Cond
= new ICmpInst(MI
, ICmpInst::ICMP_EQ
, FieldMallocs
[i
],
1319 Constant::getNullValue(FieldMallocs
[i
]->getType()),
1322 RunningOr
= Cond
; // First seteq
1324 RunningOr
= BinaryOperator::CreateOr(RunningOr
, Cond
, "tmp", MI
);
1327 // Split the basic block at the old malloc.
1328 BasicBlock
*OrigBB
= MI
->getParent();
1329 BasicBlock
*ContBB
= OrigBB
->splitBasicBlock(MI
, "malloc_cont");
1331 // Create the block to check the first condition. Put all these blocks at the
1332 // end of the function as they are unlikely to be executed.
1333 BasicBlock
*NullPtrBlock
= BasicBlock::Create(Context
, "malloc_ret_null",
1334 OrigBB
->getParent());
1336 // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1337 // branch on RunningOr.
1338 OrigBB
->getTerminator()->eraseFromParent();
1339 BranchInst::Create(NullPtrBlock
, ContBB
, RunningOr
, OrigBB
);
1341 // Within the NullPtrBlock, we need to emit a comparison and branch for each
1342 // pointer, because some may be null while others are not.
1343 for (unsigned i
= 0, e
= FieldGlobals
.size(); i
!= e
; ++i
) {
1344 Value
*GVVal
= new LoadInst(FieldGlobals
[i
], "tmp", NullPtrBlock
);
1345 Value
*Cmp
= new ICmpInst(*NullPtrBlock
, ICmpInst::ICMP_NE
, GVVal
,
1346 Constant::getNullValue(GVVal
->getType()),
1348 BasicBlock
*FreeBlock
= BasicBlock::Create(Context
, "free_it",
1349 OrigBB
->getParent());
1350 BasicBlock
*NextBlock
= BasicBlock::Create(Context
, "next",
1351 OrigBB
->getParent());
1352 BranchInst::Create(FreeBlock
, NextBlock
, Cmp
, NullPtrBlock
);
1354 // Fill in FreeBlock.
1355 new FreeInst(GVVal
, FreeBlock
);
1356 new StoreInst(Constant::getNullValue(GVVal
->getType()), FieldGlobals
[i
],
1358 BranchInst::Create(NextBlock
, FreeBlock
);
1360 NullPtrBlock
= NextBlock
;
1363 BranchInst::Create(ContBB
, NullPtrBlock
);
1365 // MI is no longer needed, remove it.
1366 MI
->eraseFromParent();
1368 /// InsertedScalarizedLoads - As we process loads, if we can't immediately
1369 /// update all uses of the load, keep track of what scalarized loads are
1370 /// inserted for a given load.
1371 DenseMap
<Value
*, std::vector
<Value
*> > InsertedScalarizedValues
;
1372 InsertedScalarizedValues
[GV
] = FieldGlobals
;
1374 std::vector
<std::pair
<PHINode
*, unsigned> > PHIsToRewrite
;
1376 // Okay, the malloc site is completely handled. All of the uses of GV are now
1377 // loads, and all uses of those loads are simple. Rewrite them to use loads
1378 // of the per-field globals instead.
1379 for (Value::use_iterator UI
= GV
->use_begin(), E
= GV
->use_end(); UI
!= E
;) {
1380 Instruction
*User
= cast
<Instruction
>(*UI
++);
1382 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(User
)) {
1383 RewriteUsesOfLoadForHeapSRoA(LI
, InsertedScalarizedValues
, PHIsToRewrite
,
1388 // Must be a store of null.
1389 StoreInst
*SI
= cast
<StoreInst
>(User
);
1390 assert(isa
<ConstantPointerNull
>(SI
->getOperand(0)) &&
1391 "Unexpected heap-sra user!");
1393 // Insert a store of null into each global.
1394 for (unsigned i
= 0, e
= FieldGlobals
.size(); i
!= e
; ++i
) {
1395 const PointerType
*PT
= cast
<PointerType
>(FieldGlobals
[i
]->getType());
1396 Constant
*Null
= Constant::getNullValue(PT
->getElementType());
1397 new StoreInst(Null
, FieldGlobals
[i
], SI
);
1399 // Erase the original store.
1400 SI
->eraseFromParent();
1403 // While we have PHIs that are interesting to rewrite, do it.
1404 while (!PHIsToRewrite
.empty()) {
1405 PHINode
*PN
= PHIsToRewrite
.back().first
;
1406 unsigned FieldNo
= PHIsToRewrite
.back().second
;
1407 PHIsToRewrite
.pop_back();
1408 PHINode
*FieldPN
= cast
<PHINode
>(InsertedScalarizedValues
[PN
][FieldNo
]);
1409 assert(FieldPN
->getNumIncomingValues() == 0 &&"Already processed this phi");
1411 // Add all the incoming values. This can materialize more phis.
1412 for (unsigned i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
) {
1413 Value
*InVal
= PN
->getIncomingValue(i
);
1414 InVal
= GetHeapSROAValue(InVal
, FieldNo
, InsertedScalarizedValues
,
1415 PHIsToRewrite
, Context
);
1416 FieldPN
->addIncoming(InVal
, PN
->getIncomingBlock(i
));
1420 // Drop all inter-phi links and any loads that made it this far.
1421 for (DenseMap
<Value
*, std::vector
<Value
*> >::iterator
1422 I
= InsertedScalarizedValues
.begin(), E
= InsertedScalarizedValues
.end();
1424 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
->first
))
1425 PN
->dropAllReferences();
1426 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
->first
))
1427 LI
->dropAllReferences();
1430 // Delete all the phis and loads now that inter-references are dead.
1431 for (DenseMap
<Value
*, std::vector
<Value
*> >::iterator
1432 I
= InsertedScalarizedValues
.begin(), E
= InsertedScalarizedValues
.end();
1434 if (PHINode
*PN
= dyn_cast
<PHINode
>(I
->first
))
1435 PN
->eraseFromParent();
1436 else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
->first
))
1437 LI
->eraseFromParent();
1440 // The old global is now dead, remove it.
1441 GV
->eraseFromParent();
1444 return cast
<GlobalVariable
>(FieldGlobals
[0]);
1447 /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
1448 /// pointer global variable with a single value stored it that is a malloc or
1450 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable
*GV
,
1452 Module::global_iterator
&GVI
,
1454 LLVMContext
&Context
) {
1455 // If this is a malloc of an abstract type, don't touch it.
1456 if (!MI
->getAllocatedType()->isSized())
1459 // We can't optimize this global unless all uses of it are *known* to be
1460 // of the malloc value, not of the null initializer value (consider a use
1461 // that compares the global's value against zero to see if the malloc has
1462 // been reached). To do this, we check to see if all uses of the global
1463 // would trap if the global were null: this proves that they must all
1464 // happen after the malloc.
1465 if (!AllUsesOfLoadedValueWillTrapIfNull(GV
))
1468 // We can't optimize this if the malloc itself is used in a complex way,
1469 // for example, being stored into multiple globals. This allows the
1470 // malloc to be stored into the specified global, loaded setcc'd, and
1471 // GEP'd. These are all things we could transform to using the global
1474 SmallPtrSet
<PHINode
*, 8> PHIs
;
1475 if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI
, GV
, PHIs
))
1480 // If we have a global that is only initialized with a fixed size malloc,
1481 // transform the program to use global memory instead of malloc'd memory.
1482 // This eliminates dynamic allocation, avoids an indirection accessing the
1483 // data, and exposes the resultant global to further GlobalOpt.
1484 if (ConstantInt
*NElements
= dyn_cast
<ConstantInt
>(MI
->getArraySize())) {
1485 // Restrict this transformation to only working on small allocations
1486 // (2048 bytes currently), as we don't want to introduce a 16M global or
1489 NElements
->getZExtValue()*
1490 TD
->getTypeAllocSize(MI
->getAllocatedType()) < 2048) {
1491 GVI
= OptimizeGlobalAddressOfMalloc(GV
, MI
, Context
);
1496 // If the allocation is an array of structures, consider transforming this
1497 // into multiple malloc'd arrays, one for each field. This is basically
1498 // SRoA for malloc'd memory.
1499 const Type
*AllocTy
= MI
->getAllocatedType();
1501 // If this is an allocation of a fixed size array of structs, analyze as a
1502 // variable size array. malloc [100 x struct],1 -> malloc struct, 100
1503 if (!MI
->isArrayAllocation())
1504 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(AllocTy
))
1505 AllocTy
= AT
->getElementType();
1507 if (const StructType
*AllocSTy
= dyn_cast
<StructType
>(AllocTy
)) {
1508 // This the structure has an unreasonable number of fields, leave it
1510 if (AllocSTy
->getNumElements() <= 16 && AllocSTy
->getNumElements() != 0 &&
1511 AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV
, MI
)) {
1513 // If this is a fixed size array, transform the Malloc to be an alloc of
1514 // structs. malloc [100 x struct],1 -> malloc struct, 100
1515 if (const ArrayType
*AT
= dyn_cast
<ArrayType
>(MI
->getAllocatedType())) {
1517 new MallocInst(AllocSTy
,
1518 ConstantInt::get(Type::getInt32Ty(Context
),
1519 AT
->getNumElements()),
1521 NewMI
->takeName(MI
);
1522 Value
*Cast
= new BitCastInst(NewMI
, MI
->getType(), "tmp", MI
);
1523 MI
->replaceAllUsesWith(Cast
);
1524 MI
->eraseFromParent();
1528 GVI
= PerformHeapAllocSRoA(GV
, MI
, Context
);
1536 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
1537 // that only one value (besides its initializer) is ever stored to the global.
1538 static bool OptimizeOnceStoredGlobal(GlobalVariable
*GV
, Value
*StoredOnceVal
,
1539 Module::global_iterator
&GVI
,
1540 TargetData
*TD
, LLVMContext
&Context
) {
1541 // Ignore no-op GEPs and bitcasts.
1542 StoredOnceVal
= StoredOnceVal
->stripPointerCasts();
1544 // If we are dealing with a pointer global that is initialized to null and
1545 // only has one (non-null) value stored into it, then we can optimize any
1546 // users of the loaded value (often calls and loads) that would trap if the
1548 if (isa
<PointerType
>(GV
->getInitializer()->getType()) &&
1549 GV
->getInitializer()->isNullValue()) {
1550 if (Constant
*SOVC
= dyn_cast
<Constant
>(StoredOnceVal
)) {
1551 if (GV
->getInitializer()->getType() != SOVC
->getType())
1553 ConstantExpr::getBitCast(SOVC
, GV
->getInitializer()->getType());
1555 // Optimize away any trapping uses of the loaded value.
1556 if (OptimizeAwayTrappingUsesOfLoads(GV
, SOVC
, Context
))
1558 } else if (MallocInst
*MI
= dyn_cast
<MallocInst
>(StoredOnceVal
)) {
1559 if (TryToOptimizeStoreOfMallocToGlobal(GV
, MI
, GVI
, TD
, Context
))
1567 /// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
1568 /// two values ever stored into GV are its initializer and OtherVal. See if we
1569 /// can shrink the global into a boolean and select between the two values
1570 /// whenever it is used. This exposes the values to other scalar optimizations.
1571 static bool TryToShrinkGlobalToBoolean(GlobalVariable
*GV
, Constant
*OtherVal
,
1572 LLVMContext
&Context
) {
1573 const Type
*GVElType
= GV
->getType()->getElementType();
1575 // If GVElType is already i1, it is already shrunk. If the type of the GV is
1576 // an FP value, pointer or vector, don't do this optimization because a select
1577 // between them is very expensive and unlikely to lead to later
1578 // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1579 // where v1 and v2 both require constant pool loads, a big loss.
1580 if (GVElType
== Type::getInt1Ty(Context
) || GVElType
->isFloatingPoint() ||
1581 isa
<PointerType
>(GVElType
) || isa
<VectorType
>(GVElType
))
1584 // Walk the use list of the global seeing if all the uses are load or store.
1585 // If there is anything else, bail out.
1586 for (Value::use_iterator I
= GV
->use_begin(), E
= GV
->use_end(); I
!= E
; ++I
)
1587 if (!isa
<LoadInst
>(I
) && !isa
<StoreInst
>(I
))
1590 DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV
);
1592 // Create the new global, initializing it to false.
1593 GlobalVariable
*NewGV
= new GlobalVariable(Context
,
1594 Type::getInt1Ty(Context
), false,
1595 GlobalValue::InternalLinkage
, ConstantInt::getFalse(Context
),
1597 GV
->isThreadLocal());
1598 GV
->getParent()->getGlobalList().insert(GV
, NewGV
);
1600 Constant
*InitVal
= GV
->getInitializer();
1601 assert(InitVal
->getType() != Type::getInt1Ty(Context
) &&
1602 "No reason to shrink to bool!");
1604 // If initialized to zero and storing one into the global, we can use a cast
1605 // instead of a select to synthesize the desired value.
1606 bool IsOneZero
= false;
1607 if (ConstantInt
*CI
= dyn_cast
<ConstantInt
>(OtherVal
))
1608 IsOneZero
= InitVal
->isNullValue() && CI
->isOne();
1610 while (!GV
->use_empty()) {
1611 Instruction
*UI
= cast
<Instruction
>(GV
->use_back());
1612 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(UI
)) {
1613 // Change the store into a boolean store.
1614 bool StoringOther
= SI
->getOperand(0) == OtherVal
;
1615 // Only do this if we weren't storing a loaded value.
1617 if (StoringOther
|| SI
->getOperand(0) == InitVal
)
1618 StoreVal
= ConstantInt::get(Type::getInt1Ty(Context
), StoringOther
);
1620 // Otherwise, we are storing a previously loaded copy. To do this,
1621 // change the copy from copying the original value to just copying the
1623 Instruction
*StoredVal
= cast
<Instruction
>(SI
->getOperand(0));
1625 // If we're already replaced the input, StoredVal will be a cast or
1626 // select instruction. If not, it will be a load of the original
1628 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(StoredVal
)) {
1629 assert(LI
->getOperand(0) == GV
&& "Not a copy!");
1630 // Insert a new load, to preserve the saved value.
1631 StoreVal
= new LoadInst(NewGV
, LI
->getName()+".b", LI
);
1633 assert((isa
<CastInst
>(StoredVal
) || isa
<SelectInst
>(StoredVal
)) &&
1634 "This is not a form that we understand!");
1635 StoreVal
= StoredVal
->getOperand(0);
1636 assert(isa
<LoadInst
>(StoreVal
) && "Not a load of NewGV!");
1639 new StoreInst(StoreVal
, NewGV
, SI
);
1641 // Change the load into a load of bool then a select.
1642 LoadInst
*LI
= cast
<LoadInst
>(UI
);
1643 LoadInst
*NLI
= new LoadInst(NewGV
, LI
->getName()+".b", LI
);
1646 NSI
= new ZExtInst(NLI
, LI
->getType(), "", LI
);
1648 NSI
= SelectInst::Create(NLI
, OtherVal
, InitVal
, "", LI
);
1650 LI
->replaceAllUsesWith(NSI
);
1652 UI
->eraseFromParent();
1655 GV
->eraseFromParent();
1660 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
1661 /// it if possible. If we make a change, return true.
1662 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable
*GV
,
1663 Module::global_iterator
&GVI
) {
1664 SmallPtrSet
<PHINode
*, 16> PHIUsers
;
1666 GV
->removeDeadConstantUsers();
1668 if (GV
->use_empty()) {
1669 DEBUG(errs() << "GLOBAL DEAD: " << *GV
);
1670 GV
->eraseFromParent();
1675 if (!AnalyzeGlobal(GV
, GS
, PHIUsers
)) {
1677 cerr
<< "Global: " << *GV
;
1678 cerr
<< " isLoaded = " << GS
.isLoaded
<< "\n";
1679 cerr
<< " StoredType = ";
1680 switch (GS
.StoredType
) {
1681 case GlobalStatus::NotStored
: cerr
<< "NEVER STORED\n"; break;
1682 case GlobalStatus::isInitializerStored
: cerr
<< "INIT STORED\n"; break;
1683 case GlobalStatus::isStoredOnce
: cerr
<< "STORED ONCE\n"; break;
1684 case GlobalStatus::isStored
: cerr
<< "stored\n"; break;
1686 if (GS
.StoredType
== GlobalStatus::isStoredOnce
&& GS
.StoredOnceValue
)
1687 cerr
<< " StoredOnceValue = " << *GS
.StoredOnceValue
<< "\n";
1688 if (GS
.AccessingFunction
&& !GS
.HasMultipleAccessingFunctions
)
1689 cerr
<< " AccessingFunction = " << GS
.AccessingFunction
->getName()
1691 cerr
<< " HasMultipleAccessingFunctions = "
1692 << GS
.HasMultipleAccessingFunctions
<< "\n";
1693 cerr
<< " HasNonInstructionUser = " << GS
.HasNonInstructionUser
<<"\n";
1697 // If this is a first class global and has only one accessing function
1698 // and this function is main (which we know is not recursive we can make
1699 // this global a local variable) we replace the global with a local alloca
1700 // in this function.
1702 // NOTE: It doesn't make sense to promote non single-value types since we
1703 // are just replacing static memory to stack memory.
1705 // If the global is in different address space, don't bring it to stack.
1706 if (!GS
.HasMultipleAccessingFunctions
&&
1707 GS
.AccessingFunction
&& !GS
.HasNonInstructionUser
&&
1708 GV
->getType()->getElementType()->isSingleValueType() &&
1709 GS
.AccessingFunction
->getName() == "main" &&
1710 GS
.AccessingFunction
->hasExternalLinkage() &&
1711 GV
->getType()->getAddressSpace() == 0) {
1712 DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV
);
1713 Instruction
* FirstI
= GS
.AccessingFunction
->getEntryBlock().begin();
1714 const Type
* ElemTy
= GV
->getType()->getElementType();
1715 // FIXME: Pass Global's alignment when globals have alignment
1716 AllocaInst
* Alloca
= new AllocaInst(ElemTy
, NULL
, GV
->getName(), FirstI
);
1717 if (!isa
<UndefValue
>(GV
->getInitializer()))
1718 new StoreInst(GV
->getInitializer(), Alloca
, FirstI
);
1720 GV
->replaceAllUsesWith(Alloca
);
1721 GV
->eraseFromParent();
1726 // If the global is never loaded (but may be stored to), it is dead.
1729 DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV
);
1731 // Delete any stores we can find to the global. We may not be able to
1732 // make it completely dead though.
1733 bool Changed
= CleanupConstantGlobalUsers(GV
, GV
->getInitializer(),
1736 // If the global is dead now, delete it.
1737 if (GV
->use_empty()) {
1738 GV
->eraseFromParent();
1744 } else if (GS
.StoredType
<= GlobalStatus::isInitializerStored
) {
1745 DEBUG(errs() << "MARKING CONSTANT: " << *GV
);
1746 GV
->setConstant(true);
1748 // Clean up any obviously simplifiable users now.
1749 CleanupConstantGlobalUsers(GV
, GV
->getInitializer(), GV
->getContext());
1751 // If the global is dead now, just nuke it.
1752 if (GV
->use_empty()) {
1753 DEBUG(errs() << " *** Marking constant allowed us to simplify "
1754 << "all users and delete global!\n");
1755 GV
->eraseFromParent();
1761 } else if (!GV
->getInitializer()->getType()->isSingleValueType()) {
1762 if (TargetData
*TD
= getAnalysisIfAvailable
<TargetData
>())
1763 if (GlobalVariable
*FirstNewGV
= SRAGlobal(GV
, *TD
,
1764 GV
->getContext())) {
1765 GVI
= FirstNewGV
; // Don't skip the newly produced globals!
1768 } else if (GS
.StoredType
== GlobalStatus::isStoredOnce
) {
1769 // If the initial value for the global was an undef value, and if only
1770 // one other value was stored into it, we can just change the
1771 // initializer to be the stored value, then delete all stores to the
1772 // global. This allows us to mark it constant.
1773 if (Constant
*SOVConstant
= dyn_cast
<Constant
>(GS
.StoredOnceValue
))
1774 if (isa
<UndefValue
>(GV
->getInitializer())) {
1775 // Change the initial value here.
1776 GV
->setInitializer(SOVConstant
);
1778 // Clean up any obviously simplifiable users now.
1779 CleanupConstantGlobalUsers(GV
, GV
->getInitializer(),
1782 if (GV
->use_empty()) {
1783 DEBUG(errs() << " *** Substituting initializer allowed us to "
1784 << "simplify all users and delete global!\n");
1785 GV
->eraseFromParent();
1794 // Try to optimize globals based on the knowledge that only one value
1795 // (besides its initializer) is ever stored to the global.
1796 if (OptimizeOnceStoredGlobal(GV
, GS
.StoredOnceValue
, GVI
,
1797 getAnalysisIfAvailable
<TargetData
>(),
1801 // Otherwise, if the global was not a boolean, we can shrink it to be a
1803 if (Constant
*SOVConstant
= dyn_cast
<Constant
>(GS
.StoredOnceValue
))
1804 if (TryToShrinkGlobalToBoolean(GV
, SOVConstant
, GV
->getContext())) {
1813 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
1814 /// function, changing them to FastCC.
1815 static void ChangeCalleesToFastCall(Function
*F
) {
1816 for (Value::use_iterator UI
= F
->use_begin(), E
= F
->use_end(); UI
!= E
;++UI
){
1817 CallSite
User(cast
<Instruction
>(*UI
));
1818 User
.setCallingConv(CallingConv::Fast
);
1822 static AttrListPtr
StripNest(const AttrListPtr
&Attrs
) {
1823 for (unsigned i
= 0, e
= Attrs
.getNumSlots(); i
!= e
; ++i
) {
1824 if ((Attrs
.getSlot(i
).Attrs
& Attribute::Nest
) == 0)
1827 // There can be only one.
1828 return Attrs
.removeAttr(Attrs
.getSlot(i
).Index
, Attribute::Nest
);
1834 static void RemoveNestAttribute(Function
*F
) {
1835 F
->setAttributes(StripNest(F
->getAttributes()));
1836 for (Value::use_iterator UI
= F
->use_begin(), E
= F
->use_end(); UI
!= E
;++UI
){
1837 CallSite
User(cast
<Instruction
>(*UI
));
1838 User
.setAttributes(StripNest(User
.getAttributes()));
1842 bool GlobalOpt::OptimizeFunctions(Module
&M
) {
1843 bool Changed
= false;
1844 // Optimize functions.
1845 for (Module::iterator FI
= M
.begin(), E
= M
.end(); FI
!= E
; ) {
1847 // Functions without names cannot be referenced outside this module.
1848 if (!F
->hasName() && !F
->isDeclaration())
1849 F
->setLinkage(GlobalValue::InternalLinkage
);
1850 F
->removeDeadConstantUsers();
1851 if (F
->use_empty() && (F
->hasLocalLinkage() ||
1852 F
->hasLinkOnceLinkage())) {
1853 M
.getFunctionList().erase(F
);
1856 } else if (F
->hasLocalLinkage()) {
1857 if (F
->getCallingConv() == CallingConv::C
&& !F
->isVarArg() &&
1858 !F
->hasAddressTaken()) {
1859 // If this function has C calling conventions, is not a varargs
1860 // function, and is only called directly, promote it to use the Fast
1861 // calling convention.
1862 F
->setCallingConv(CallingConv::Fast
);
1863 ChangeCalleesToFastCall(F
);
1868 if (F
->getAttributes().hasAttrSomewhere(Attribute::Nest
) &&
1869 !F
->hasAddressTaken()) {
1870 // The function is not used by a trampoline intrinsic, so it is safe
1871 // to remove the 'nest' attribute.
1872 RemoveNestAttribute(F
);
1881 bool GlobalOpt::OptimizeGlobalVars(Module
&M
) {
1882 bool Changed
= false;
1883 for (Module::global_iterator GVI
= M
.global_begin(), E
= M
.global_end();
1885 GlobalVariable
*GV
= GVI
++;
1886 // Global variables without names cannot be referenced outside this module.
1887 if (!GV
->hasName() && !GV
->isDeclaration())
1888 GV
->setLinkage(GlobalValue::InternalLinkage
);
1889 if (!GV
->isConstant() && GV
->hasLocalLinkage() &&
1890 GV
->hasInitializer())
1891 Changed
|= ProcessInternalGlobal(GV
, GVI
);
1896 /// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
1897 /// initializers have an init priority of 65535.
1898 GlobalVariable
*GlobalOpt::FindGlobalCtors(Module
&M
) {
1899 for (Module::global_iterator I
= M
.global_begin(), E
= M
.global_end();
1901 if (I
->getName() == "llvm.global_ctors") {
1902 // Found it, verify it's an array of { int, void()* }.
1903 const ArrayType
*ATy
=dyn_cast
<ArrayType
>(I
->getType()->getElementType());
1905 const StructType
*STy
= dyn_cast
<StructType
>(ATy
->getElementType());
1906 if (!STy
|| STy
->getNumElements() != 2 ||
1907 STy
->getElementType(0) != Type::getInt32Ty(M
.getContext())) return 0;
1908 const PointerType
*PFTy
= dyn_cast
<PointerType
>(STy
->getElementType(1));
1909 if (!PFTy
) return 0;
1910 const FunctionType
*FTy
= dyn_cast
<FunctionType
>(PFTy
->getElementType());
1911 if (!FTy
|| FTy
->getReturnType() != Type::getVoidTy(M
.getContext()) ||
1912 FTy
->isVarArg() || FTy
->getNumParams() != 0)
1915 // Verify that the initializer is simple enough for us to handle.
1916 if (!I
->hasDefinitiveInitializer()) return 0;
1917 ConstantArray
*CA
= dyn_cast
<ConstantArray
>(I
->getInitializer());
1919 for (User::op_iterator i
= CA
->op_begin(), e
= CA
->op_end(); i
!= e
; ++i
)
1920 if (ConstantStruct
*CS
= dyn_cast
<ConstantStruct
>(*i
)) {
1921 if (isa
<ConstantPointerNull
>(CS
->getOperand(1)))
1924 // Must have a function or null ptr.
1925 if (!isa
<Function
>(CS
->getOperand(1)))
1928 // Init priority must be standard.
1929 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(CS
->getOperand(0));
1930 if (!CI
|| CI
->getZExtValue() != 65535)
1941 /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
1942 /// return a list of the functions and null terminator as a vector.
1943 static std::vector
<Function
*> ParseGlobalCtors(GlobalVariable
*GV
) {
1944 ConstantArray
*CA
= cast
<ConstantArray
>(GV
->getInitializer());
1945 std::vector
<Function
*> Result
;
1946 Result
.reserve(CA
->getNumOperands());
1947 for (User::op_iterator i
= CA
->op_begin(), e
= CA
->op_end(); i
!= e
; ++i
) {
1948 ConstantStruct
*CS
= cast
<ConstantStruct
>(*i
);
1949 Result
.push_back(dyn_cast
<Function
>(CS
->getOperand(1)));
1954 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
1955 /// specified array, returning the new global to use.
1956 static GlobalVariable
*InstallGlobalCtors(GlobalVariable
*GCL
,
1957 const std::vector
<Function
*> &Ctors
,
1958 LLVMContext
&Context
) {
1959 // If we made a change, reassemble the initializer list.
1960 std::vector
<Constant
*> CSVals
;
1961 CSVals
.push_back(ConstantInt::get(Type::getInt32Ty(Context
), 65535));
1962 CSVals
.push_back(0);
1964 // Create the new init list.
1965 std::vector
<Constant
*> CAList
;
1966 for (unsigned i
= 0, e
= Ctors
.size(); i
!= e
; ++i
) {
1968 CSVals
[1] = Ctors
[i
];
1970 const Type
*FTy
= FunctionType::get(Type::getVoidTy(Context
), false);
1971 const PointerType
*PFTy
= PointerType::getUnqual(FTy
);
1972 CSVals
[1] = Constant::getNullValue(PFTy
);
1973 CSVals
[0] = ConstantInt::get(Type::getInt32Ty(Context
), 2147483647);
1975 CAList
.push_back(ConstantStruct::get(Context
, CSVals
));
1978 // Create the array initializer.
1979 const Type
*StructTy
=
1980 cast
<ArrayType
>(GCL
->getType()->getElementType())->getElementType();
1981 Constant
*CA
= ConstantArray::get(ArrayType::get(StructTy
,
1982 CAList
.size()), CAList
);
1984 // If we didn't change the number of elements, don't create a new GV.
1985 if (CA
->getType() == GCL
->getInitializer()->getType()) {
1986 GCL
->setInitializer(CA
);
1990 // Create the new global and insert it next to the existing list.
1991 GlobalVariable
*NGV
= new GlobalVariable(Context
, CA
->getType(),
1993 GCL
->getLinkage(), CA
, "",
1994 GCL
->isThreadLocal());
1995 GCL
->getParent()->getGlobalList().insert(GCL
, NGV
);
1998 // Nuke the old list, replacing any uses with the new one.
1999 if (!GCL
->use_empty()) {
2001 if (V
->getType() != GCL
->getType())
2002 V
= ConstantExpr::getBitCast(V
, GCL
->getType());
2003 GCL
->replaceAllUsesWith(V
);
2005 GCL
->eraseFromParent();
2014 static Constant
*getVal(DenseMap
<Value
*, Constant
*> &ComputedValues
,
2016 if (Constant
*CV
= dyn_cast
<Constant
>(V
)) return CV
;
2017 Constant
*R
= ComputedValues
[V
];
2018 assert(R
&& "Reference to an uncomputed value!");
2022 /// isSimpleEnoughPointerToCommit - Return true if this constant is simple
2023 /// enough for us to understand. In particular, if it is a cast of something,
2024 /// we punt. We basically just support direct accesses to globals and GEP's of
2025 /// globals. This should be kept up to date with CommitValueTo.
2026 static bool isSimpleEnoughPointerToCommit(Constant
*C
, LLVMContext
&Context
) {
2027 // Conservatively, avoid aggregate types. This is because we don't
2028 // want to worry about them partially overlapping other stores.
2029 if (!cast
<PointerType
>(C
->getType())->getElementType()->isSingleValueType())
2032 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(C
))
2033 // Do not allow weak/linkonce/dllimport/dllexport linkage or
2034 // external globals.
2035 return GV
->hasDefinitiveInitializer();
2037 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(C
))
2038 // Handle a constantexpr gep.
2039 if (CE
->getOpcode() == Instruction::GetElementPtr
&&
2040 isa
<GlobalVariable
>(CE
->getOperand(0)) &&
2041 cast
<GEPOperator
>(CE
)->isInBounds()) {
2042 GlobalVariable
*GV
= cast
<GlobalVariable
>(CE
->getOperand(0));
2043 // Do not allow weak/linkonce/dllimport/dllexport linkage or
2044 // external globals.
2045 if (!GV
->hasDefinitiveInitializer())
2048 // The first index must be zero.
2049 ConstantInt
*CI
= dyn_cast
<ConstantInt
>(*next(CE
->op_begin()));
2050 if (!CI
|| !CI
->isZero()) return false;
2052 // The remaining indices must be compile-time known integers within the
2053 // notional bounds of the corresponding static array types.
2054 if (!CE
->isGEPWithNoNotionalOverIndexing())
2057 return ConstantFoldLoadThroughGEPConstantExpr(GV
->getInitializer(), CE
,
2063 /// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
2064 /// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
2065 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
2066 static Constant
*EvaluateStoreInto(Constant
*Init
, Constant
*Val
,
2067 ConstantExpr
*Addr
, unsigned OpNo
,
2068 LLVMContext
&Context
) {
2069 // Base case of the recursion.
2070 if (OpNo
== Addr
->getNumOperands()) {
2071 assert(Val
->getType() == Init
->getType() && "Type mismatch!");
2075 if (const StructType
*STy
= dyn_cast
<StructType
>(Init
->getType())) {
2076 std::vector
<Constant
*> Elts
;
2078 // Break up the constant into its elements.
2079 if (ConstantStruct
*CS
= dyn_cast
<ConstantStruct
>(Init
)) {
2080 for (User::op_iterator i
= CS
->op_begin(), e
= CS
->op_end(); i
!= e
; ++i
)
2081 Elts
.push_back(cast
<Constant
>(*i
));
2082 } else if (isa
<ConstantAggregateZero
>(Init
)) {
2083 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
)
2084 Elts
.push_back(Constant::getNullValue(STy
->getElementType(i
)));
2085 } else if (isa
<UndefValue
>(Init
)) {
2086 for (unsigned i
= 0, e
= STy
->getNumElements(); i
!= e
; ++i
)
2087 Elts
.push_back(UndefValue::get(STy
->getElementType(i
)));
2089 llvm_unreachable("This code is out of sync with "
2090 " ConstantFoldLoadThroughGEPConstantExpr");
2093 // Replace the element that we are supposed to.
2094 ConstantInt
*CU
= cast
<ConstantInt
>(Addr
->getOperand(OpNo
));
2095 unsigned Idx
= CU
->getZExtValue();
2096 assert(Idx
< STy
->getNumElements() && "Struct index out of range!");
2097 Elts
[Idx
] = EvaluateStoreInto(Elts
[Idx
], Val
, Addr
, OpNo
+1, Context
);
2099 // Return the modified struct.
2100 return ConstantStruct::get(Context
, &Elts
[0], Elts
.size(), STy
->isPacked());
2102 ConstantInt
*CI
= cast
<ConstantInt
>(Addr
->getOperand(OpNo
));
2103 const ArrayType
*ATy
= cast
<ArrayType
>(Init
->getType());
2105 // Break up the array into elements.
2106 std::vector
<Constant
*> Elts
;
2107 if (ConstantArray
*CA
= dyn_cast
<ConstantArray
>(Init
)) {
2108 for (User::op_iterator i
= CA
->op_begin(), e
= CA
->op_end(); i
!= e
; ++i
)
2109 Elts
.push_back(cast
<Constant
>(*i
));
2110 } else if (isa
<ConstantAggregateZero
>(Init
)) {
2111 Constant
*Elt
= Constant::getNullValue(ATy
->getElementType());
2112 Elts
.assign(ATy
->getNumElements(), Elt
);
2113 } else if (isa
<UndefValue
>(Init
)) {
2114 Constant
*Elt
= UndefValue::get(ATy
->getElementType());
2115 Elts
.assign(ATy
->getNumElements(), Elt
);
2117 llvm_unreachable("This code is out of sync with "
2118 " ConstantFoldLoadThroughGEPConstantExpr");
2121 assert(CI
->getZExtValue() < ATy
->getNumElements());
2122 Elts
[CI
->getZExtValue()] =
2123 EvaluateStoreInto(Elts
[CI
->getZExtValue()], Val
, Addr
, OpNo
+1, Context
);
2124 return ConstantArray::get(ATy
, Elts
);
2128 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
2129 /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
2130 static void CommitValueTo(Constant
*Val
, Constant
*Addr
,
2131 LLVMContext
&Context
) {
2132 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(Addr
)) {
2133 assert(GV
->hasInitializer());
2134 GV
->setInitializer(Val
);
2138 ConstantExpr
*CE
= cast
<ConstantExpr
>(Addr
);
2139 GlobalVariable
*GV
= cast
<GlobalVariable
>(CE
->getOperand(0));
2141 Constant
*Init
= GV
->getInitializer();
2142 Init
= EvaluateStoreInto(Init
, Val
, CE
, 2, Context
);
2143 GV
->setInitializer(Init
);
2146 /// ComputeLoadResult - Return the value that would be computed by a load from
2147 /// P after the stores reflected by 'memory' have been performed. If we can't
2148 /// decide, return null.
2149 static Constant
*ComputeLoadResult(Constant
*P
,
2150 const DenseMap
<Constant
*, Constant
*> &Memory
,
2151 LLVMContext
&Context
) {
2152 // If this memory location has been recently stored, use the stored value: it
2153 // is the most up-to-date.
2154 DenseMap
<Constant
*, Constant
*>::const_iterator I
= Memory
.find(P
);
2155 if (I
!= Memory
.end()) return I
->second
;
2158 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(P
)) {
2159 if (GV
->hasDefinitiveInitializer())
2160 return GV
->getInitializer();
2164 // Handle a constantexpr getelementptr.
2165 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(P
))
2166 if (CE
->getOpcode() == Instruction::GetElementPtr
&&
2167 isa
<GlobalVariable
>(CE
->getOperand(0))) {
2168 GlobalVariable
*GV
= cast
<GlobalVariable
>(CE
->getOperand(0));
2169 if (GV
->hasDefinitiveInitializer())
2170 return ConstantFoldLoadThroughGEPConstantExpr(GV
->getInitializer(), CE
,
2174 return 0; // don't know how to evaluate.
2177 /// EvaluateFunction - Evaluate a call to function F, returning true if
2178 /// successful, false if we can't evaluate it. ActualArgs contains the formal
2179 /// arguments for the function.
2180 static bool EvaluateFunction(Function
*F
, Constant
*&RetVal
,
2181 const SmallVectorImpl
<Constant
*> &ActualArgs
,
2182 std::vector
<Function
*> &CallStack
,
2183 DenseMap
<Constant
*, Constant
*> &MutatedMemory
,
2184 std::vector
<GlobalVariable
*> &AllocaTmps
) {
2185 // Check to see if this function is already executing (recursion). If so,
2186 // bail out. TODO: we might want to accept limited recursion.
2187 if (std::find(CallStack
.begin(), CallStack
.end(), F
) != CallStack
.end())
2190 LLVMContext
&Context
= F
->getContext();
2192 CallStack
.push_back(F
);
2194 /// Values - As we compute SSA register values, we store their contents here.
2195 DenseMap
<Value
*, Constant
*> Values
;
2197 // Initialize arguments to the incoming values specified.
2199 for (Function::arg_iterator AI
= F
->arg_begin(), E
= F
->arg_end(); AI
!= E
;
2201 Values
[AI
] = ActualArgs
[ArgNo
];
2203 /// ExecutedBlocks - We only handle non-looping, non-recursive code. As such,
2204 /// we can only evaluate any one basic block at most once. This set keeps
2205 /// track of what we have executed so we can detect recursive cases etc.
2206 SmallPtrSet
<BasicBlock
*, 32> ExecutedBlocks
;
2208 // CurInst - The current instruction we're evaluating.
2209 BasicBlock::iterator CurInst
= F
->begin()->begin();
2211 // This is the main evaluation loop.
2213 Constant
*InstResult
= 0;
2215 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(CurInst
)) {
2216 if (SI
->isVolatile()) return false; // no volatile accesses.
2217 Constant
*Ptr
= getVal(Values
, SI
->getOperand(1));
2218 if (!isSimpleEnoughPointerToCommit(Ptr
, Context
))
2219 // If this is too complex for us to commit, reject it.
2221 Constant
*Val
= getVal(Values
, SI
->getOperand(0));
2222 MutatedMemory
[Ptr
] = Val
;
2223 } else if (BinaryOperator
*BO
= dyn_cast
<BinaryOperator
>(CurInst
)) {
2224 InstResult
= ConstantExpr::get(BO
->getOpcode(),
2225 getVal(Values
, BO
->getOperand(0)),
2226 getVal(Values
, BO
->getOperand(1)));
2227 } else if (CmpInst
*CI
= dyn_cast
<CmpInst
>(CurInst
)) {
2228 InstResult
= ConstantExpr::getCompare(CI
->getPredicate(),
2229 getVal(Values
, CI
->getOperand(0)),
2230 getVal(Values
, CI
->getOperand(1)));
2231 } else if (CastInst
*CI
= dyn_cast
<CastInst
>(CurInst
)) {
2232 InstResult
= ConstantExpr::getCast(CI
->getOpcode(),
2233 getVal(Values
, CI
->getOperand(0)),
2235 } else if (SelectInst
*SI
= dyn_cast
<SelectInst
>(CurInst
)) {
2237 ConstantExpr::getSelect(getVal(Values
, SI
->getOperand(0)),
2238 getVal(Values
, SI
->getOperand(1)),
2239 getVal(Values
, SI
->getOperand(2)));
2240 } else if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(CurInst
)) {
2241 Constant
*P
= getVal(Values
, GEP
->getOperand(0));
2242 SmallVector
<Constant
*, 8> GEPOps
;
2243 for (User::op_iterator i
= GEP
->op_begin() + 1, e
= GEP
->op_end();
2245 GEPOps
.push_back(getVal(Values
, *i
));
2246 InstResult
= cast
<GEPOperator
>(GEP
)->isInBounds() ?
2247 ConstantExpr::getInBoundsGetElementPtr(P
, &GEPOps
[0], GEPOps
.size()) :
2248 ConstantExpr::getGetElementPtr(P
, &GEPOps
[0], GEPOps
.size());
2249 } else if (LoadInst
*LI
= dyn_cast
<LoadInst
>(CurInst
)) {
2250 if (LI
->isVolatile()) return false; // no volatile accesses.
2251 InstResult
= ComputeLoadResult(getVal(Values
, LI
->getOperand(0)),
2252 MutatedMemory
, Context
);
2253 if (InstResult
== 0) return false; // Could not evaluate load.
2254 } else if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(CurInst
)) {
2255 if (AI
->isArrayAllocation()) return false; // Cannot handle array allocs.
2256 const Type
*Ty
= AI
->getType()->getElementType();
2257 AllocaTmps
.push_back(new GlobalVariable(Context
, Ty
, false,
2258 GlobalValue::InternalLinkage
,
2259 UndefValue::get(Ty
),
2261 InstResult
= AllocaTmps
.back();
2262 } else if (CallInst
*CI
= dyn_cast
<CallInst
>(CurInst
)) {
2264 // Debug info can safely be ignored here.
2265 if (isa
<DbgInfoIntrinsic
>(CI
)) {
2270 // Cannot handle inline asm.
2271 if (isa
<InlineAsm
>(CI
->getOperand(0))) return false;
2273 // Resolve function pointers.
2274 Function
*Callee
= dyn_cast
<Function
>(getVal(Values
, CI
->getOperand(0)));
2275 if (!Callee
) return false; // Cannot resolve.
2277 SmallVector
<Constant
*, 8> Formals
;
2278 for (User::op_iterator i
= CI
->op_begin() + 1, e
= CI
->op_end();
2280 Formals
.push_back(getVal(Values
, *i
));
2282 if (Callee
->isDeclaration()) {
2283 // If this is a function we can constant fold, do it.
2284 if (Constant
*C
= ConstantFoldCall(Callee
, Formals
.data(),
2291 if (Callee
->getFunctionType()->isVarArg())
2295 // Execute the call, if successful, use the return value.
2296 if (!EvaluateFunction(Callee
, RetVal
, Formals
, CallStack
,
2297 MutatedMemory
, AllocaTmps
))
2299 InstResult
= RetVal
;
2301 } else if (isa
<TerminatorInst
>(CurInst
)) {
2302 BasicBlock
*NewBB
= 0;
2303 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(CurInst
)) {
2304 if (BI
->isUnconditional()) {
2305 NewBB
= BI
->getSuccessor(0);
2308 dyn_cast
<ConstantInt
>(getVal(Values
, BI
->getCondition()));
2309 if (!Cond
) return false; // Cannot determine.
2311 NewBB
= BI
->getSuccessor(!Cond
->getZExtValue());
2313 } else if (SwitchInst
*SI
= dyn_cast
<SwitchInst
>(CurInst
)) {
2315 dyn_cast
<ConstantInt
>(getVal(Values
, SI
->getCondition()));
2316 if (!Val
) return false; // Cannot determine.
2317 NewBB
= SI
->getSuccessor(SI
->findCaseValue(Val
));
2318 } else if (ReturnInst
*RI
= dyn_cast
<ReturnInst
>(CurInst
)) {
2319 if (RI
->getNumOperands())
2320 RetVal
= getVal(Values
, RI
->getOperand(0));
2322 CallStack
.pop_back(); // return from fn.
2323 return true; // We succeeded at evaluating this ctor!
2325 // invoke, unwind, unreachable.
2326 return false; // Cannot handle this terminator.
2329 // Okay, we succeeded in evaluating this control flow. See if we have
2330 // executed the new block before. If so, we have a looping function,
2331 // which we cannot evaluate in reasonable time.
2332 if (!ExecutedBlocks
.insert(NewBB
))
2333 return false; // looped!
2335 // Okay, we have never been in this block before. Check to see if there
2336 // are any PHI nodes. If so, evaluate them with information about where
2338 BasicBlock
*OldBB
= CurInst
->getParent();
2339 CurInst
= NewBB
->begin();
2341 for (; (PN
= dyn_cast
<PHINode
>(CurInst
)); ++CurInst
)
2342 Values
[PN
] = getVal(Values
, PN
->getIncomingValueForBlock(OldBB
));
2344 // Do NOT increment CurInst. We know that the terminator had no value.
2347 // Did not know how to evaluate this!
2351 if (!CurInst
->use_empty())
2352 Values
[CurInst
] = InstResult
;
2354 // Advance program counter.
2359 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
2360 /// we can. Return true if we can, false otherwise.
2361 static bool EvaluateStaticConstructor(Function
*F
) {
2362 /// MutatedMemory - For each store we execute, we update this map. Loads
2363 /// check this to get the most up-to-date value. If evaluation is successful,
2364 /// this state is committed to the process.
2365 DenseMap
<Constant
*, Constant
*> MutatedMemory
;
2367 /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
2368 /// to represent its body. This vector is needed so we can delete the
2369 /// temporary globals when we are done.
2370 std::vector
<GlobalVariable
*> AllocaTmps
;
2372 /// CallStack - This is used to detect recursion. In pathological situations
2373 /// we could hit exponential behavior, but at least there is nothing
2375 std::vector
<Function
*> CallStack
;
2377 // Call the function.
2378 Constant
*RetValDummy
;
2379 bool EvalSuccess
= EvaluateFunction(F
, RetValDummy
,
2380 SmallVector
<Constant
*, 0>(), CallStack
,
2381 MutatedMemory
, AllocaTmps
);
2383 // We succeeded at evaluation: commit the result.
2384 DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2385 << F
->getName() << "' to " << MutatedMemory
.size()
2387 for (DenseMap
<Constant
*, Constant
*>::iterator I
= MutatedMemory
.begin(),
2388 E
= MutatedMemory
.end(); I
!= E
; ++I
)
2389 CommitValueTo(I
->second
, I
->first
, F
->getContext());
2392 // At this point, we are done interpreting. If we created any 'alloca'
2393 // temporaries, release them now.
2394 while (!AllocaTmps
.empty()) {
2395 GlobalVariable
*Tmp
= AllocaTmps
.back();
2396 AllocaTmps
.pop_back();
2398 // If there are still users of the alloca, the program is doing something
2399 // silly, e.g. storing the address of the alloca somewhere and using it
2400 // later. Since this is undefined, we'll just make it be null.
2401 if (!Tmp
->use_empty())
2402 Tmp
->replaceAllUsesWith(Constant::getNullValue(Tmp
->getType()));
2411 /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
2412 /// Return true if anything changed.
2413 bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable
*&GCL
) {
2414 std::vector
<Function
*> Ctors
= ParseGlobalCtors(GCL
);
2415 bool MadeChange
= false;
2416 if (Ctors
.empty()) return false;
2418 // Loop over global ctors, optimizing them when we can.
2419 for (unsigned i
= 0; i
!= Ctors
.size(); ++i
) {
2420 Function
*F
= Ctors
[i
];
2421 // Found a null terminator in the middle of the list, prune off the rest of
2424 if (i
!= Ctors
.size()-1) {
2431 // We cannot simplify external ctor functions.
2432 if (F
->empty()) continue;
2434 // If we can evaluate the ctor at compile time, do.
2435 if (EvaluateStaticConstructor(F
)) {
2436 Ctors
.erase(Ctors
.begin()+i
);
2439 ++NumCtorsEvaluated
;
2444 if (!MadeChange
) return false;
2446 GCL
= InstallGlobalCtors(GCL
, Ctors
, GCL
->getContext());
2450 bool GlobalOpt::OptimizeGlobalAliases(Module
&M
) {
2451 bool Changed
= false;
2453 for (Module::alias_iterator I
= M
.alias_begin(), E
= M
.alias_end();
2455 Module::alias_iterator J
= I
++;
2456 // Aliases without names cannot be referenced outside this module.
2457 if (!J
->hasName() && !J
->isDeclaration())
2458 J
->setLinkage(GlobalValue::InternalLinkage
);
2459 // If the aliasee may change at link time, nothing can be done - bail out.
2460 if (J
->mayBeOverridden())
2463 Constant
*Aliasee
= J
->getAliasee();
2464 GlobalValue
*Target
= cast
<GlobalValue
>(Aliasee
->stripPointerCasts());
2465 Target
->removeDeadConstantUsers();
2466 bool hasOneUse
= Target
->hasOneUse() && Aliasee
->hasOneUse();
2468 // Make all users of the alias use the aliasee instead.
2469 if (!J
->use_empty()) {
2470 J
->replaceAllUsesWith(Aliasee
);
2471 ++NumAliasesResolved
;
2475 // If the aliasee has internal linkage, give it the name and linkage
2476 // of the alias, and delete the alias. This turns:
2477 // define internal ... @f(...)
2478 // @a = alias ... @f
2480 // define ... @a(...)
2481 if (!Target
->hasLocalLinkage())
2484 // The transform is only useful if the alias does not have internal linkage.
2485 if (J
->hasLocalLinkage())
2488 // Do not perform the transform if multiple aliases potentially target the
2489 // aliasee. This check also ensures that it is safe to replace the section
2490 // and other attributes of the aliasee with those of the alias.
2494 // Give the aliasee the name, linkage and other attributes of the alias.
2495 Target
->takeName(J
);
2496 Target
->setLinkage(J
->getLinkage());
2497 Target
->GlobalValue::copyAttributesFrom(J
);
2499 // Delete the alias.
2500 M
.getAliasList().erase(J
);
2501 ++NumAliasesRemoved
;
2508 bool GlobalOpt::runOnModule(Module
&M
) {
2509 bool Changed
= false;
2511 // Try to find the llvm.globalctors list.
2512 GlobalVariable
*GlobalCtors
= FindGlobalCtors(M
);
2514 bool LocalChange
= true;
2515 while (LocalChange
) {
2516 LocalChange
= false;
2518 // Delete functions that are trivially dead, ccc -> fastcc
2519 LocalChange
|= OptimizeFunctions(M
);
2521 // Optimize global_ctors list.
2523 LocalChange
|= OptimizeGlobalCtorsList(GlobalCtors
);
2525 // Optimize non-address-taken globals.
2526 LocalChange
|= OptimizeGlobalVars(M
);
2528 // Resolve aliases, when possible.
2529 LocalChange
|= OptimizeGlobalAliases(M
);
2530 Changed
|= LocalChange
;
2533 // TODO: Move all global ctors functions to the end of the module for code