1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass transforms simple global variables that never have their address
10 // taken. If obviously true, it marks read/write globals as constant, deletes
11 // variables only stored to, etc.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/IPO/GlobalOpt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/ConstantFolding.h"
25 #include "llvm/Analysis/MemoryBuiltins.h"
26 #include "llvm/Analysis/TargetLibraryInfo.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DebugInfoMetadata.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/GlobalVariable.h"
43 #include "llvm/IR/IRBuilder.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/User.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/Support/AtomicOrdering.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include "llvm/Transforms/IPO.h"
62 #include "llvm/Transforms/Utils/CtorUtils.h"
63 #include "llvm/Transforms/Utils/Evaluator.h"
64 #include "llvm/Transforms/Utils/GlobalStatus.h"
65 #include "llvm/Transforms/Utils/Local.h"
74 #define DEBUG_TYPE "globalopt"
76 STATISTIC(NumMarked
, "Number of globals marked constant");
77 STATISTIC(NumUnnamed
, "Number of globals marked unnamed_addr");
78 STATISTIC(NumSRA
, "Number of aggregate globals broken into scalars");
79 STATISTIC(NumSubstitute
,"Number of globals with initializers stored into them");
80 STATISTIC(NumDeleted
, "Number of globals deleted");
81 STATISTIC(NumGlobUses
, "Number of global uses devirtualized");
82 STATISTIC(NumLocalized
, "Number of globals localized");
83 STATISTIC(NumShrunkToBool
, "Number of global vars shrunk to booleans");
84 STATISTIC(NumFastCallFns
, "Number of functions converted to fastcc");
85 STATISTIC(NumCtorsEvaluated
, "Number of static ctors evaluated");
86 STATISTIC(NumNestRemoved
, "Number of nest attributes removed");
87 STATISTIC(NumAliasesResolved
, "Number of global aliases resolved");
88 STATISTIC(NumAliasesRemoved
, "Number of global aliases eliminated");
89 STATISTIC(NumCXXDtorsRemoved
, "Number of global C++ destructors removed");
90 STATISTIC(NumInternalFunc
, "Number of internal functions");
91 STATISTIC(NumColdCC
, "Number of functions marked coldcc");
94 EnableColdCCStressTest("enable-coldcc-stress-test",
95 cl::desc("Enable stress test of coldcc by adding "
96 "calling conv to all internal functions."),
97 cl::init(false), cl::Hidden
);
99 static cl::opt
<int> ColdCCRelFreq(
100 "coldcc-rel-freq", cl::Hidden
, cl::init(2),
102 "Maximum block frequency, expressed as a percentage of caller's "
103 "entry frequency, for a call site to be considered cold for enabling"
106 /// Is this global variable possibly used by a leak checker as a root? If so,
107 /// we might not really want to eliminate the stores to it.
108 static bool isLeakCheckerRoot(GlobalVariable
*GV
) {
109 // A global variable is a root if it is a pointer, or could plausibly contain
110 // a pointer. There are two challenges; one is that we could have a struct
111 // the has an inner member which is a pointer. We recurse through the type to
112 // detect these (up to a point). The other is that we may actually be a union
113 // of a pointer and another type, and so our LLVM type is an integer which
114 // gets converted into a pointer, or our type is an [i8 x #] with a pointer
115 // potentially contained here.
117 if (GV
->hasPrivateLinkage())
120 SmallVector
<Type
*, 4> Types
;
121 Types
.push_back(GV
->getValueType());
125 Type
*Ty
= Types
.pop_back_val();
126 switch (Ty
->getTypeID()) {
128 case Type::PointerTyID
:
130 case Type::FixedVectorTyID
:
131 case Type::ScalableVectorTyID
:
132 if (cast
<VectorType
>(Ty
)->getElementType()->isPointerTy())
135 case Type::ArrayTyID
:
136 Types
.push_back(cast
<ArrayType
>(Ty
)->getElementType());
138 case Type::StructTyID
: {
139 StructType
*STy
= cast
<StructType
>(Ty
);
140 if (STy
->isOpaque()) return true;
141 for (Type
*InnerTy
: STy
->elements()) {
142 if (isa
<PointerType
>(InnerTy
)) return true;
143 if (isa
<StructType
>(InnerTy
) || isa
<ArrayType
>(InnerTy
) ||
144 isa
<VectorType
>(InnerTy
))
145 Types
.push_back(InnerTy
);
150 if (--Limit
== 0) return true;
151 } while (!Types
.empty());
155 /// Given a value that is stored to a global but never read, determine whether
156 /// it's safe to remove the store and the chain of computation that feeds the
158 static bool IsSafeComputationToRemove(
159 Value
*V
, function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
) {
161 if (isa
<Constant
>(V
))
165 if (isa
<LoadInst
>(V
) || isa
<InvokeInst
>(V
) || isa
<Argument
>(V
) ||
168 if (isAllocationFn(V
, GetTLI
))
171 Instruction
*I
= cast
<Instruction
>(V
);
172 if (I
->mayHaveSideEffects())
174 if (GetElementPtrInst
*GEP
= dyn_cast
<GetElementPtrInst
>(I
)) {
175 if (!GEP
->hasAllConstantIndices())
177 } else if (I
->getNumOperands() != 1) {
181 V
= I
->getOperand(0);
185 /// This GV is a pointer root. Loop over all users of the global and clean up
186 /// any that obviously don't assign the global a value that isn't dynamically
189 CleanupPointerRootUsers(GlobalVariable
*GV
,
190 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
) {
191 // A brief explanation of leak checkers. The goal is to find bugs where
192 // pointers are forgotten, causing an accumulating growth in memory
193 // usage over time. The common strategy for leak checkers is to explicitly
194 // allow the memory pointed to by globals at exit. This is popular because it
195 // also solves another problem where the main thread of a C++ program may shut
196 // down before other threads that are still expecting to use those globals. To
197 // handle that case, we expect the program may create a singleton and never
200 bool Changed
= false;
202 // If Dead[n].first is the only use of a malloc result, we can delete its
203 // chain of computation and the store to the global in Dead[n].second.
204 SmallVector
<std::pair
<Instruction
*, Instruction
*>, 32> Dead
;
206 SmallVector
<User
*> Worklist(GV
->users());
207 // Constants can't be pointers to dynamically allocated memory.
208 while (!Worklist
.empty()) {
209 User
*U
= Worklist
.pop_back_val();
210 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
211 Value
*V
= SI
->getValueOperand();
212 if (isa
<Constant
>(V
)) {
214 SI
->eraseFromParent();
215 } else if (Instruction
*I
= dyn_cast
<Instruction
>(V
)) {
217 Dead
.push_back(std::make_pair(I
, SI
));
219 } else if (MemSetInst
*MSI
= dyn_cast
<MemSetInst
>(U
)) {
220 if (isa
<Constant
>(MSI
->getValue())) {
222 MSI
->eraseFromParent();
223 } else if (Instruction
*I
= dyn_cast
<Instruction
>(MSI
->getValue())) {
225 Dead
.push_back(std::make_pair(I
, MSI
));
227 } else if (MemTransferInst
*MTI
= dyn_cast
<MemTransferInst
>(U
)) {
228 GlobalVariable
*MemSrc
= dyn_cast
<GlobalVariable
>(MTI
->getSource());
229 if (MemSrc
&& MemSrc
->isConstant()) {
231 MTI
->eraseFromParent();
232 } else if (Instruction
*I
= dyn_cast
<Instruction
>(MTI
->getSource())) {
234 Dead
.push_back(std::make_pair(I
, MTI
));
236 } else if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(U
)) {
237 if (isa
<GEPOperator
>(CE
))
238 append_range(Worklist
, CE
->users());
242 for (int i
= 0, e
= Dead
.size(); i
!= e
; ++i
) {
243 if (IsSafeComputationToRemove(Dead
[i
].first
, GetTLI
)) {
244 Dead
[i
].second
->eraseFromParent();
245 Instruction
*I
= Dead
[i
].first
;
247 if (isAllocationFn(I
, GetTLI
))
249 Instruction
*J
= dyn_cast
<Instruction
>(I
->getOperand(0));
252 I
->eraseFromParent();
255 I
->eraseFromParent();
260 GV
->removeDeadConstantUsers();
264 /// We just marked GV constant. Loop over all users of the global, cleaning up
265 /// the obvious ones. This is largely just a quick scan over the use list to
266 /// clean up the easy and obvious cruft. This returns true if it made a change.
267 static bool CleanupConstantGlobalUsers(GlobalVariable
*GV
,
268 const DataLayout
&DL
) {
269 Constant
*Init
= GV
->getInitializer();
270 SmallVector
<User
*, 8> WorkList(GV
->users());
271 SmallPtrSet
<User
*, 8> Visited
;
272 bool Changed
= false;
274 SmallVector
<WeakTrackingVH
> MaybeDeadInsts
;
275 auto EraseFromParent
= [&](Instruction
*I
) {
276 for (Value
*Op
: I
->operands())
277 if (auto *OpI
= dyn_cast
<Instruction
>(Op
))
278 MaybeDeadInsts
.push_back(OpI
);
279 I
->eraseFromParent();
282 while (!WorkList
.empty()) {
283 User
*U
= WorkList
.pop_back_val();
284 if (!Visited
.insert(U
).second
)
287 if (auto *BO
= dyn_cast
<BitCastOperator
>(U
))
288 append_range(WorkList
, BO
->users());
289 if (auto *ASC
= dyn_cast
<AddrSpaceCastOperator
>(U
))
290 append_range(WorkList
, ASC
->users());
291 else if (auto *GEP
= dyn_cast
<GEPOperator
>(U
))
292 append_range(WorkList
, GEP
->users());
293 else if (auto *LI
= dyn_cast
<LoadInst
>(U
)) {
294 // A load from a uniform value is always the same, regardless of any
296 Type
*Ty
= LI
->getType();
297 if (Constant
*Res
= ConstantFoldLoadFromUniformValue(Init
, Ty
)) {
298 LI
->replaceAllUsesWith(Res
);
303 Value
*PtrOp
= LI
->getPointerOperand();
304 APInt
Offset(DL
.getIndexTypeSizeInBits(PtrOp
->getType()), 0);
305 PtrOp
= PtrOp
->stripAndAccumulateConstantOffsets(
306 DL
, Offset
, /* AllowNonInbounds */ true);
308 if (auto *Value
= ConstantFoldLoadFromConst(Init
, Ty
, Offset
, DL
)) {
309 LI
->replaceAllUsesWith(Value
);
313 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
314 // Store must be unreachable or storing Init into the global.
316 } else if (MemIntrinsic
*MI
= dyn_cast
<MemIntrinsic
>(U
)) { // memset/cpy/mv
317 if (getUnderlyingObject(MI
->getRawDest()) == GV
)
323 RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts
);
324 GV
->removeDeadConstantUsers();
328 /// Part of the global at a specific offset, which is only accessed through
329 /// loads and stores with the given type.
332 Constant
*Initializer
= nullptr;
333 bool IsLoaded
= false;
334 bool IsStored
= false;
337 /// Look at all uses of the global and determine which (offset, type) pairs it
338 /// can be split into.
339 static bool collectSRATypes(DenseMap
<uint64_t, GlobalPart
> &Parts
,
340 GlobalVariable
*GV
, const DataLayout
&DL
) {
341 SmallVector
<Use
*, 16> Worklist
;
342 SmallPtrSet
<Use
*, 16> Visited
;
343 auto AppendUses
= [&](Value
*V
) {
344 for (Use
&U
: V
->uses())
345 if (Visited
.insert(&U
).second
)
346 Worklist
.push_back(&U
);
349 while (!Worklist
.empty()) {
350 Use
*U
= Worklist
.pop_back_val();
351 User
*V
= U
->getUser();
353 auto *GEP
= dyn_cast
<GEPOperator
>(V
);
354 if (isa
<BitCastOperator
>(V
) || isa
<AddrSpaceCastOperator
>(V
) ||
355 (GEP
&& GEP
->hasAllConstantIndices())) {
360 if (Value
*Ptr
= getLoadStorePointerOperand(V
)) {
361 // This is storing the global address into somewhere, not storing into
363 if (isa
<StoreInst
>(V
) && U
->getOperandNo() == 0)
366 APInt
Offset(DL
.getIndexTypeSizeInBits(Ptr
->getType()), 0);
367 Ptr
= Ptr
->stripAndAccumulateConstantOffsets(DL
, Offset
,
368 /* AllowNonInbounds */ true);
369 if (Ptr
!= GV
|| Offset
.getActiveBits() >= 64)
372 // TODO: We currently require that all accesses at a given offset must
373 // use the same type. This could be relaxed.
374 Type
*Ty
= getLoadStoreType(V
);
375 const auto &[It
, Inserted
] =
376 Parts
.try_emplace(Offset
.getZExtValue(), GlobalPart
{Ty
});
377 if (Ty
!= It
->second
.Ty
)
381 It
->second
.Initializer
=
382 ConstantFoldLoadFromConst(GV
->getInitializer(), Ty
, Offset
, DL
);
383 if (!It
->second
.Initializer
) {
384 LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
385 << *GV
<< " with type " << *Ty
<< " at offset "
386 << Offset
.getZExtValue());
391 // Scalable types not currently supported.
392 if (Ty
->isScalableTy())
395 auto IsStored
= [](Value
*V
, Constant
*Initializer
) {
396 auto *SI
= dyn_cast
<StoreInst
>(V
);
400 Constant
*StoredConst
= dyn_cast
<Constant
>(SI
->getOperand(0));
404 // Don't consider stores that only write the initializer value.
405 return Initializer
!= StoredConst
;
408 It
->second
.IsLoaded
|= isa
<LoadInst
>(V
);
409 It
->second
.IsStored
|= IsStored(V
, It
->second
.Initializer
);
413 // Ignore dead constant users.
414 if (auto *C
= dyn_cast
<Constant
>(V
)) {
415 if (!isSafeToDestroyConstant(C
))
427 /// Copy over the debug info for a variable to its SRA replacements.
428 static void transferSRADebugInfo(GlobalVariable
*GV
, GlobalVariable
*NGV
,
429 uint64_t FragmentOffsetInBits
,
430 uint64_t FragmentSizeInBits
,
432 SmallVector
<DIGlobalVariableExpression
*, 1> GVs
;
433 GV
->getDebugInfo(GVs
);
434 for (auto *GVE
: GVs
) {
435 DIVariable
*Var
= GVE
->getVariable();
436 DIExpression
*Expr
= GVE
->getExpression();
437 int64_t CurVarOffsetInBytes
= 0;
438 uint64_t CurVarOffsetInBits
= 0;
439 uint64_t FragmentEndInBits
= FragmentOffsetInBits
+ FragmentSizeInBits
;
441 // Calculate the offset (Bytes), Continue if unknown.
442 if (!Expr
->extractIfOffset(CurVarOffsetInBytes
))
445 // Ignore negative offset.
446 if (CurVarOffsetInBytes
< 0)
449 // Convert offset to bits.
450 CurVarOffsetInBits
= CHAR_BIT
* (uint64_t)CurVarOffsetInBytes
;
452 // Current var starts after the fragment, ignore.
453 if (CurVarOffsetInBits
>= FragmentEndInBits
)
456 uint64_t CurVarSize
= Var
->getType()->getSizeInBits();
457 uint64_t CurVarEndInBits
= CurVarOffsetInBits
+ CurVarSize
;
458 // Current variable ends before start of fragment, ignore.
459 if (CurVarSize
!= 0 && /* CurVarSize is known */
460 CurVarEndInBits
<= FragmentOffsetInBits
)
463 // Current variable fits in (not greater than) the fragment,
464 // does not need fragment expression.
465 if (CurVarSize
!= 0 && /* CurVarSize is known */
466 CurVarOffsetInBits
>= FragmentOffsetInBits
&&
467 CurVarEndInBits
<= FragmentEndInBits
) {
468 uint64_t CurVarOffsetInFragment
=
469 (CurVarOffsetInBits
- FragmentOffsetInBits
) / 8;
470 if (CurVarOffsetInFragment
!= 0)
471 Expr
= DIExpression::get(Expr
->getContext(), {dwarf::DW_OP_plus_uconst
,
472 CurVarOffsetInFragment
});
474 Expr
= DIExpression::get(Expr
->getContext(), {});
476 DIGlobalVariableExpression::get(GVE
->getContext(), Var
, Expr
);
477 NGV
->addDebugInfo(NGVE
);
480 // Current variable does not fit in single fragment,
481 // emit a fragment expression.
482 if (FragmentSizeInBits
< VarSize
) {
483 if (CurVarOffsetInBits
> FragmentOffsetInBits
)
485 uint64_t CurVarFragmentOffsetInBits
=
486 FragmentOffsetInBits
- CurVarOffsetInBits
;
487 uint64_t CurVarFragmentSizeInBits
= FragmentSizeInBits
;
488 if (CurVarSize
!= 0 && CurVarEndInBits
< FragmentEndInBits
)
489 CurVarFragmentSizeInBits
-= (FragmentEndInBits
- CurVarEndInBits
);
490 if (CurVarOffsetInBits
)
491 Expr
= DIExpression::get(Expr
->getContext(), {});
492 if (auto E
= DIExpression::createFragmentExpression(
493 Expr
, CurVarFragmentOffsetInBits
, CurVarFragmentSizeInBits
))
498 auto *NGVE
= DIGlobalVariableExpression::get(GVE
->getContext(), Var
, Expr
);
499 NGV
->addDebugInfo(NGVE
);
503 /// Perform scalar replacement of aggregates on the specified global variable.
504 /// This opens the door for other optimizations by exposing the behavior of the
505 /// program in a more fine-grained way. We have determined that this
506 /// transformation is safe already. We return the first global variable we
507 /// insert so that the caller can reprocess it.
508 static GlobalVariable
*SRAGlobal(GlobalVariable
*GV
, const DataLayout
&DL
) {
509 assert(GV
->hasLocalLinkage());
511 // Collect types to split into.
512 DenseMap
<uint64_t, GlobalPart
> Parts
;
513 if (!collectSRATypes(Parts
, GV
, DL
) || Parts
.empty())
516 // Make sure we don't SRA back to the same type.
517 if (Parts
.size() == 1 && Parts
.begin()->second
.Ty
== GV
->getValueType())
520 // Don't perform SRA if we would have to split into many globals. Ignore
521 // parts that are either only loaded or only stored, because we expect them
522 // to be optimized away.
523 unsigned NumParts
= count_if(Parts
, [](const auto &Pair
) {
524 return Pair
.second
.IsLoaded
&& Pair
.second
.IsStored
;
530 SmallVector
<std::tuple
<uint64_t, Type
*, Constant
*>, 16> TypesVector
;
531 for (const auto &Pair
: Parts
) {
532 TypesVector
.push_back(
533 {Pair
.first
, Pair
.second
.Ty
, Pair
.second
.Initializer
});
535 sort(TypesVector
, llvm::less_first());
537 // Check that the types are non-overlapping.
539 for (const auto &[OffsetForTy
, Ty
, _
] : TypesVector
) {
540 // Overlaps with previous type.
541 if (OffsetForTy
< Offset
)
544 Offset
= OffsetForTy
+ DL
.getTypeAllocSize(Ty
);
547 // Some accesses go beyond the end of the global, don't bother.
548 if (Offset
> DL
.getTypeAllocSize(GV
->getValueType()))
551 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV
<< "\n");
553 // Get the alignment of the global, either explicit or target-specific.
554 Align StartAlignment
=
555 DL
.getValueOrABITypeAlignment(GV
->getAlign(), GV
->getValueType());
556 uint64_t VarSize
= DL
.getTypeSizeInBits(GV
->getValueType());
558 // Create replacement globals.
559 DenseMap
<uint64_t, GlobalVariable
*> NewGlobals
;
560 unsigned NameSuffix
= 0;
561 for (auto &[OffsetForTy
, Ty
, Initializer
] : TypesVector
) {
562 GlobalVariable
*NGV
= new GlobalVariable(
563 *GV
->getParent(), Ty
, false, GlobalVariable::InternalLinkage
,
564 Initializer
, GV
->getName() + "." + Twine(NameSuffix
++), GV
,
565 GV
->getThreadLocalMode(), GV
->getAddressSpace());
566 NGV
->copyAttributesFrom(GV
);
567 NewGlobals
.insert({OffsetForTy
, NGV
});
569 // Calculate the known alignment of the field. If the original aggregate
570 // had 256 byte alignment for example, something might depend on that:
571 // propagate info to each field.
572 Align NewAlign
= commonAlignment(StartAlignment
, OffsetForTy
);
573 if (NewAlign
> DL
.getABITypeAlign(Ty
))
574 NGV
->setAlignment(NewAlign
);
576 // Copy over the debug info for the variable.
577 transferSRADebugInfo(GV
, NGV
, OffsetForTy
* 8,
578 DL
.getTypeAllocSizeInBits(Ty
), VarSize
);
581 // Replace uses of the original global with uses of the new global.
582 SmallVector
<Value
*, 16> Worklist
;
583 SmallPtrSet
<Value
*, 16> Visited
;
584 SmallVector
<WeakTrackingVH
, 16> DeadInsts
;
585 auto AppendUsers
= [&](Value
*V
) {
586 for (User
*U
: V
->users())
587 if (Visited
.insert(U
).second
)
588 Worklist
.push_back(U
);
591 while (!Worklist
.empty()) {
592 Value
*V
= Worklist
.pop_back_val();
593 if (isa
<BitCastOperator
>(V
) || isa
<AddrSpaceCastOperator
>(V
) ||
594 isa
<GEPOperator
>(V
)) {
596 if (isa
<Instruction
>(V
))
597 DeadInsts
.push_back(V
);
601 if (Value
*Ptr
= getLoadStorePointerOperand(V
)) {
602 APInt
Offset(DL
.getIndexTypeSizeInBits(Ptr
->getType()), 0);
603 Ptr
= Ptr
->stripAndAccumulateConstantOffsets(DL
, Offset
,
604 /* AllowNonInbounds */ true);
605 assert(Ptr
== GV
&& "Load/store must be from/to global");
606 GlobalVariable
*NGV
= NewGlobals
[Offset
.getZExtValue()];
607 assert(NGV
&& "Must have replacement global for this offset");
609 // Update the pointer operand and recalculate alignment.
610 Align PrefAlign
= DL
.getPrefTypeAlign(getLoadStoreType(V
));
612 getOrEnforceKnownAlignment(NGV
, PrefAlign
, DL
, cast
<Instruction
>(V
));
614 if (auto *LI
= dyn_cast
<LoadInst
>(V
)) {
615 LI
->setOperand(0, NGV
);
616 LI
->setAlignment(NewAlign
);
618 auto *SI
= cast
<StoreInst
>(V
);
619 SI
->setOperand(1, NGV
);
620 SI
->setAlignment(NewAlign
);
625 assert(isa
<Constant
>(V
) && isSafeToDestroyConstant(cast
<Constant
>(V
)) &&
626 "Other users can only be dead constants");
629 // Delete old instructions and global.
630 RecursivelyDeleteTriviallyDeadInstructions(DeadInsts
);
631 GV
->removeDeadConstantUsers();
632 GV
->eraseFromParent();
635 assert(NewGlobals
.size() > 0);
636 return NewGlobals
.begin()->second
;
639 /// Return true if all users of the specified value will trap if the value is
640 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
641 /// reprocessing them.
642 static bool AllUsesOfValueWillTrapIfNull(const Value
*V
,
643 SmallPtrSetImpl
<const PHINode
*> &PHIs
) {
644 for (const User
*U
: V
->users()) {
645 if (const Instruction
*I
= dyn_cast
<Instruction
>(U
)) {
646 // If null pointer is considered valid, then all uses are non-trapping.
647 // Non address-space 0 globals have already been pruned by the caller.
648 if (NullPointerIsDefined(I
->getFunction()))
651 if (isa
<LoadInst
>(U
)) {
653 } else if (const StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
654 if (SI
->getOperand(0) == V
) {
655 return false; // Storing the value.
657 } else if (const CallInst
*CI
= dyn_cast
<CallInst
>(U
)) {
658 if (CI
->getCalledOperand() != V
) {
659 return false; // Not calling the ptr
661 } else if (const InvokeInst
*II
= dyn_cast
<InvokeInst
>(U
)) {
662 if (II
->getCalledOperand() != V
) {
663 return false; // Not calling the ptr
665 } else if (const AddrSpaceCastInst
*CI
= dyn_cast
<AddrSpaceCastInst
>(U
)) {
666 if (!AllUsesOfValueWillTrapIfNull(CI
, PHIs
))
668 } else if (const GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(U
)) {
669 if (!AllUsesOfValueWillTrapIfNull(GEPI
, PHIs
)) return false;
670 } else if (const PHINode
*PN
= dyn_cast
<PHINode
>(U
)) {
671 // If we've already seen this phi node, ignore it, it has already been
673 if (PHIs
.insert(PN
).second
&& !AllUsesOfValueWillTrapIfNull(PN
, PHIs
))
675 } else if (isa
<ICmpInst
>(U
) &&
676 !ICmpInst::isSigned(cast
<ICmpInst
>(U
)->getPredicate()) &&
677 isa
<LoadInst
>(U
->getOperand(0)) &&
678 isa
<ConstantPointerNull
>(U
->getOperand(1))) {
679 assert(isa
<GlobalValue
>(cast
<LoadInst
>(U
->getOperand(0))
680 ->getPointerOperand()
681 ->stripPointerCasts()) &&
682 "Should be GlobalVariable");
683 // This and only this kind of non-signed ICmpInst is to be replaced with
684 // the comparing of the value of the created global init bool later in
685 // optimizeGlobalAddressOfAllocation for the global variable.
693 /// Return true if all uses of any loads from GV will trap if the loaded value
694 /// is null. Note that this also permits comparisons of the loaded value
695 /// against null, as a special case.
696 static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable
*GV
) {
697 SmallVector
<const Value
*, 4> Worklist
;
698 Worklist
.push_back(GV
);
699 while (!Worklist
.empty()) {
700 const Value
*P
= Worklist
.pop_back_val();
701 for (const auto *U
: P
->users()) {
702 if (auto *LI
= dyn_cast
<LoadInst
>(U
)) {
703 SmallPtrSet
<const PHINode
*, 8> PHIs
;
704 if (!AllUsesOfValueWillTrapIfNull(LI
, PHIs
))
706 } else if (auto *SI
= dyn_cast
<StoreInst
>(U
)) {
707 // Ignore stores to the global.
708 if (SI
->getPointerOperand() != P
)
710 } else if (auto *CE
= dyn_cast
<ConstantExpr
>(U
)) {
711 if (CE
->stripPointerCasts() != GV
)
713 // Check further the ConstantExpr.
714 Worklist
.push_back(CE
);
716 // We don't know or understand this user, bail out.
725 /// Get all the loads/store uses for global variable \p GV.
726 static void allUsesOfLoadAndStores(GlobalVariable
*GV
,
727 SmallVector
<Value
*, 4> &Uses
) {
728 SmallVector
<Value
*, 4> Worklist
;
729 Worklist
.push_back(GV
);
730 while (!Worklist
.empty()) {
731 auto *P
= Worklist
.pop_back_val();
732 for (auto *U
: P
->users()) {
733 if (auto *CE
= dyn_cast
<ConstantExpr
>(U
)) {
734 Worklist
.push_back(CE
);
738 assert((isa
<LoadInst
>(U
) || isa
<StoreInst
>(U
)) &&
739 "Expect only load or store instructions");
745 static bool OptimizeAwayTrappingUsesOfValue(Value
*V
, Constant
*NewV
) {
746 bool Changed
= false;
747 for (auto UI
= V
->user_begin(), E
= V
->user_end(); UI
!= E
; ) {
748 Instruction
*I
= cast
<Instruction
>(*UI
++);
749 // Uses are non-trapping if null pointer is considered valid.
750 // Non address-space 0 globals are already pruned by the caller.
751 if (NullPointerIsDefined(I
->getFunction()))
753 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
754 LI
->setOperand(0, NewV
);
756 } else if (StoreInst
*SI
= dyn_cast
<StoreInst
>(I
)) {
757 if (SI
->getOperand(1) == V
) {
758 SI
->setOperand(1, NewV
);
761 } else if (isa
<CallInst
>(I
) || isa
<InvokeInst
>(I
)) {
762 CallBase
*CB
= cast
<CallBase
>(I
);
763 if (CB
->getCalledOperand() == V
) {
764 // Calling through the pointer! Turn into a direct call, but be careful
765 // that the pointer is not also being passed as an argument.
766 CB
->setCalledOperand(NewV
);
768 bool PassedAsArg
= false;
769 for (unsigned i
= 0, e
= CB
->arg_size(); i
!= e
; ++i
)
770 if (CB
->getArgOperand(i
) == V
) {
772 CB
->setArgOperand(i
, NewV
);
776 // Being passed as an argument also. Be careful to not invalidate UI!
777 UI
= V
->user_begin();
780 } else if (AddrSpaceCastInst
*CI
= dyn_cast
<AddrSpaceCastInst
>(I
)) {
781 Changed
|= OptimizeAwayTrappingUsesOfValue(
782 CI
, ConstantExpr::getAddrSpaceCast(NewV
, CI
->getType()));
783 if (CI
->use_empty()) {
785 CI
->eraseFromParent();
787 } else if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(I
)) {
788 // Should handle GEP here.
789 SmallVector
<Constant
*, 8> Idxs
;
790 Idxs
.reserve(GEPI
->getNumOperands()-1);
791 for (User::op_iterator i
= GEPI
->op_begin() + 1, e
= GEPI
->op_end();
793 if (Constant
*C
= dyn_cast
<Constant
>(*i
))
797 if (Idxs
.size() == GEPI
->getNumOperands()-1)
798 Changed
|= OptimizeAwayTrappingUsesOfValue(
799 GEPI
, ConstantExpr::getGetElementPtr(GEPI
->getSourceElementType(),
801 if (GEPI
->use_empty()) {
803 GEPI
->eraseFromParent();
811 /// The specified global has only one non-null value stored into it. If there
812 /// are uses of the loaded value that would trap if the loaded value is
813 /// dynamically null, then we know that they cannot be reachable with a null
814 /// optimize away the load.
815 static bool OptimizeAwayTrappingUsesOfLoads(
816 GlobalVariable
*GV
, Constant
*LV
, const DataLayout
&DL
,
817 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
) {
818 bool Changed
= false;
820 // Keep track of whether we are able to remove all the uses of the global
821 // other than the store that defines it.
822 bool AllNonStoreUsesGone
= true;
824 // Replace all uses of loads with uses of uses of the stored value.
825 for (User
*GlobalUser
: llvm::make_early_inc_range(GV
->users())) {
826 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(GlobalUser
)) {
827 Changed
|= OptimizeAwayTrappingUsesOfValue(LI
, LV
);
828 // If we were able to delete all uses of the loads
829 if (LI
->use_empty()) {
830 LI
->eraseFromParent();
833 AllNonStoreUsesGone
= false;
835 } else if (isa
<StoreInst
>(GlobalUser
)) {
836 // Ignore the store that stores "LV" to the global.
837 assert(GlobalUser
->getOperand(1) == GV
&&
838 "Must be storing *to* the global");
840 AllNonStoreUsesGone
= false;
842 // If we get here we could have other crazy uses that are transitively
844 assert((isa
<PHINode
>(GlobalUser
) || isa
<SelectInst
>(GlobalUser
) ||
845 isa
<ConstantExpr
>(GlobalUser
) || isa
<CmpInst
>(GlobalUser
) ||
846 isa
<BitCastInst
>(GlobalUser
) ||
847 isa
<GetElementPtrInst
>(GlobalUser
) ||
848 isa
<AddrSpaceCastInst
>(GlobalUser
)) &&
849 "Only expect load and stores!");
854 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
859 // If we nuked all of the loads, then none of the stores are needed either,
860 // nor is the global.
861 if (AllNonStoreUsesGone
) {
862 if (isLeakCheckerRoot(GV
)) {
863 Changed
|= CleanupPointerRootUsers(GV
, GetTLI
);
866 CleanupConstantGlobalUsers(GV
, DL
);
868 if (GV
->use_empty()) {
869 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
871 GV
->eraseFromParent();
878 /// Walk the use list of V, constant folding all of the instructions that are
880 static void ConstantPropUsersOf(Value
*V
, const DataLayout
&DL
,
881 TargetLibraryInfo
*TLI
) {
882 for (Value::user_iterator UI
= V
->user_begin(), E
= V
->user_end(); UI
!= E
; )
883 if (Instruction
*I
= dyn_cast
<Instruction
>(*UI
++))
884 if (Constant
*NewC
= ConstantFoldInstruction(I
, DL
, TLI
)) {
885 I
->replaceAllUsesWith(NewC
);
887 // Advance UI to the next non-I use to avoid invalidating it!
888 // Instructions could multiply use V.
889 while (UI
!= E
&& *UI
== I
)
891 if (isInstructionTriviallyDead(I
, TLI
))
892 I
->eraseFromParent();
896 /// This function takes the specified global variable, and transforms the
897 /// program as if it always contained the result of the specified malloc.
898 /// Because it is always the result of the specified malloc, there is no reason
899 /// to actually DO the malloc. Instead, turn the malloc into a global, and any
900 /// loads of GV as uses of the new global.
901 static GlobalVariable
*
902 OptimizeGlobalAddressOfAllocation(GlobalVariable
*GV
, CallInst
*CI
,
903 uint64_t AllocSize
, Constant
*InitVal
,
904 const DataLayout
&DL
,
905 TargetLibraryInfo
*TLI
) {
906 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV
<< " CALL = " << *CI
909 // Create global of type [AllocSize x i8].
910 Type
*GlobalType
= ArrayType::get(Type::getInt8Ty(GV
->getContext()),
913 // Create the new global variable. The contents of the allocated memory is
914 // undefined initially, so initialize with an undef value.
915 GlobalVariable
*NewGV
= new GlobalVariable(
916 *GV
->getParent(), GlobalType
, false, GlobalValue::InternalLinkage
,
917 UndefValue::get(GlobalType
), GV
->getName() + ".body", nullptr,
918 GV
->getThreadLocalMode());
920 // Initialize the global at the point of the original call. Note that this
921 // is a different point from the initialization referred to below for the
922 // nullability handling. Sublety: We have not proven the original global was
923 // only initialized once. As such, we can not fold this into the initializer
924 // of the new global as may need to re-init the storage multiple times.
925 if (!isa
<UndefValue
>(InitVal
)) {
926 IRBuilder
<> Builder(CI
->getNextNode());
927 // TODO: Use alignment above if align!=1
928 Builder
.CreateMemSet(NewGV
, InitVal
, AllocSize
, std::nullopt
);
931 // Update users of the allocation to use the new global instead.
932 CI
->replaceAllUsesWith(NewGV
);
934 // If there is a comparison against null, we will insert a global bool to
935 // keep track of whether the global was initialized yet or not.
936 GlobalVariable
*InitBool
=
937 new GlobalVariable(Type::getInt1Ty(GV
->getContext()), false,
938 GlobalValue::InternalLinkage
,
939 ConstantInt::getFalse(GV
->getContext()),
940 GV
->getName()+".init", GV
->getThreadLocalMode());
941 bool InitBoolUsed
= false;
943 // Loop over all instruction uses of GV, processing them in turn.
944 SmallVector
<Value
*, 4> Guses
;
945 allUsesOfLoadAndStores(GV
, Guses
);
946 for (auto *U
: Guses
) {
947 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
948 // The global is initialized when the store to it occurs. If the stored
949 // value is null value, the global bool is set to false, otherwise true.
950 new StoreInst(ConstantInt::getBool(
952 !isa
<ConstantPointerNull
>(SI
->getValueOperand())),
953 InitBool
, false, Align(1), SI
->getOrdering(),
954 SI
->getSyncScopeID(), SI
);
955 SI
->eraseFromParent();
959 LoadInst
*LI
= cast
<LoadInst
>(U
);
960 while (!LI
->use_empty()) {
961 Use
&LoadUse
= *LI
->use_begin();
962 ICmpInst
*ICI
= dyn_cast
<ICmpInst
>(LoadUse
.getUser());
968 // Replace the cmp X, 0 with a use of the bool value.
969 Value
*LV
= new LoadInst(InitBool
->getValueType(), InitBool
,
970 InitBool
->getName() + ".val", false, Align(1),
971 LI
->getOrdering(), LI
->getSyncScopeID(), LI
);
973 switch (ICI
->getPredicate()) {
974 default: llvm_unreachable("Unknown ICmp Predicate!");
975 case ICmpInst::ICMP_ULT
: // X < null -> always false
976 LV
= ConstantInt::getFalse(GV
->getContext());
978 case ICmpInst::ICMP_UGE
: // X >= null -> always true
979 LV
= ConstantInt::getTrue(GV
->getContext());
981 case ICmpInst::ICMP_ULE
:
982 case ICmpInst::ICMP_EQ
:
983 LV
= BinaryOperator::CreateNot(LV
, "notinit", ICI
);
985 case ICmpInst::ICMP_NE
:
986 case ICmpInst::ICMP_UGT
:
989 ICI
->replaceAllUsesWith(LV
);
990 ICI
->eraseFromParent();
992 LI
->eraseFromParent();
995 // If the initialization boolean was used, insert it, otherwise delete it.
997 while (!InitBool
->use_empty()) // Delete initializations
998 cast
<StoreInst
>(InitBool
->user_back())->eraseFromParent();
1001 GV
->getParent()->insertGlobalVariable(GV
->getIterator(), InitBool
);
1003 // Now the GV is dead, nuke it and the allocation..
1004 GV
->eraseFromParent();
1005 CI
->eraseFromParent();
1007 // To further other optimizations, loop over all users of NewGV and try to
1008 // constant prop them. This will promote GEP instructions with constant
1009 // indices into GEP constant-exprs, which will allow global-opt to hack on it.
1010 ConstantPropUsersOf(NewGV
, DL
, TLI
);
1015 /// Scan the use-list of GV checking to make sure that there are no complex uses
1016 /// of GV. We permit simple things like dereferencing the pointer, but not
1017 /// storing through the address, unless it is to the specified global.
1019 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst
*CI
,
1020 const GlobalVariable
*GV
) {
1021 SmallPtrSet
<const Value
*, 4> Visited
;
1022 SmallVector
<const Value
*, 4> Worklist
;
1023 Worklist
.push_back(CI
);
1025 while (!Worklist
.empty()) {
1026 const Value
*V
= Worklist
.pop_back_val();
1027 if (!Visited
.insert(V
).second
)
1030 for (const Use
&VUse
: V
->uses()) {
1031 const User
*U
= VUse
.getUser();
1032 if (isa
<LoadInst
>(U
) || isa
<CmpInst
>(U
))
1033 continue; // Fine, ignore.
1035 if (auto *SI
= dyn_cast
<StoreInst
>(U
)) {
1036 if (SI
->getValueOperand() == V
&&
1037 SI
->getPointerOperand()->stripPointerCasts() != GV
)
1038 return false; // Storing the pointer not into GV... bad.
1039 continue; // Otherwise, storing through it, or storing into GV... fine.
1042 if (auto *BCI
= dyn_cast
<BitCastInst
>(U
)) {
1043 Worklist
.push_back(BCI
);
1047 if (auto *GEPI
= dyn_cast
<GetElementPtrInst
>(U
)) {
1048 Worklist
.push_back(GEPI
);
1059 /// If we have a global that is only initialized with a fixed size allocation
1060 /// try to transform the program to use global memory instead of heap
1061 /// allocated memory. This eliminates dynamic allocation, avoids an indirection
1062 /// accessing the data, and exposes the resultant global to further GlobalOpt.
1063 static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable
*GV
,
1065 const DataLayout
&DL
,
1066 TargetLibraryInfo
*TLI
) {
1067 if (!isRemovableAlloc(CI
, TLI
))
1068 // Must be able to remove the call when we get done..
1071 Type
*Int8Ty
= Type::getInt8Ty(CI
->getFunction()->getContext());
1072 Constant
*InitVal
= getInitialValueOfAllocation(CI
, TLI
, Int8Ty
);
1074 // Must be able to emit a memset for initialization
1078 if (!getObjectSize(CI
, AllocSize
, DL
, TLI
, ObjectSizeOpts()))
1081 // Restrict this transformation to only working on small allocations
1082 // (2048 bytes currently), as we don't want to introduce a 16M global or
1084 if (AllocSize
>= 2048)
1087 // We can't optimize this global unless all uses of it are *known* to be
1088 // of the malloc value, not of the null initializer value (consider a use
1089 // that compares the global's value against zero to see if the malloc has
1090 // been reached). To do this, we check to see if all uses of the global
1091 // would trap if the global were null: this proves that they must all
1092 // happen after the malloc.
1093 if (!allUsesOfLoadedValueWillTrapIfNull(GV
))
1096 // We can't optimize this if the malloc itself is used in a complex way,
1097 // for example, being stored into multiple globals. This allows the
1098 // malloc to be stored into the specified global, loaded, gep, icmp'd.
1099 // These are all things we could transform to using the global for.
1100 if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI
, GV
))
1103 OptimizeGlobalAddressOfAllocation(GV
, CI
, AllocSize
, InitVal
, DL
, TLI
);
1107 // Try to optimize globals based on the knowledge that only one value (besides
1108 // its initializer) is ever stored to the global.
1110 optimizeOnceStoredGlobal(GlobalVariable
*GV
, Value
*StoredOnceVal
,
1111 const DataLayout
&DL
,
1112 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
) {
1113 // Ignore no-op GEPs and bitcasts.
1114 StoredOnceVal
= StoredOnceVal
->stripPointerCasts();
1116 // If we are dealing with a pointer global that is initialized to null and
1117 // only has one (non-null) value stored into it, then we can optimize any
1118 // users of the loaded value (often calls and loads) that would trap if the
1120 if (GV
->getInitializer()->getType()->isPointerTy() &&
1121 GV
->getInitializer()->isNullValue() &&
1122 StoredOnceVal
->getType()->isPointerTy() &&
1123 !NullPointerIsDefined(
1125 GV
->getInitializer()->getType()->getPointerAddressSpace())) {
1126 if (Constant
*SOVC
= dyn_cast
<Constant
>(StoredOnceVal
)) {
1127 // Optimize away any trapping uses of the loaded value.
1128 if (OptimizeAwayTrappingUsesOfLoads(GV
, SOVC
, DL
, GetTLI
))
1130 } else if (isAllocationFn(StoredOnceVal
, GetTLI
)) {
1131 if (auto *CI
= dyn_cast
<CallInst
>(StoredOnceVal
)) {
1132 auto *TLI
= &GetTLI(*CI
->getFunction());
1133 if (tryToOptimizeStoreOfAllocationToGlobal(GV
, CI
, DL
, TLI
))
1142 /// At this point, we have learned that the only two values ever stored into GV
1143 /// are its initializer and OtherVal. See if we can shrink the global into a
1144 /// boolean and select between the two values whenever it is used. This exposes
1145 /// the values to other scalar optimizations.
1146 static bool TryToShrinkGlobalToBoolean(GlobalVariable
*GV
, Constant
*OtherVal
) {
1147 Type
*GVElType
= GV
->getValueType();
1149 // If GVElType is already i1, it is already shrunk. If the type of the GV is
1150 // an FP value, pointer or vector, don't do this optimization because a select
1151 // between them is very expensive and unlikely to lead to later
1152 // simplification. In these cases, we typically end up with "cond ? v1 : v2"
1153 // where v1 and v2 both require constant pool loads, a big loss.
1154 if (GVElType
== Type::getInt1Ty(GV
->getContext()) ||
1155 GVElType
->isFloatingPointTy() ||
1156 GVElType
->isPointerTy() || GVElType
->isVectorTy())
1159 // Walk the use list of the global seeing if all the uses are load or store.
1160 // If there is anything else, bail out.
1161 for (User
*U
: GV
->users()) {
1162 if (!isa
<LoadInst
>(U
) && !isa
<StoreInst
>(U
))
1164 if (getLoadStoreType(U
) != GVElType
)
1168 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV
<< "\n");
1170 // Create the new global, initializing it to false.
1171 GlobalVariable
*NewGV
= new GlobalVariable(Type::getInt1Ty(GV
->getContext()),
1173 GlobalValue::InternalLinkage
,
1174 ConstantInt::getFalse(GV
->getContext()),
1176 GV
->getThreadLocalMode(),
1177 GV
->getType()->getAddressSpace());
1178 NewGV
->copyAttributesFrom(GV
);
1179 GV
->getParent()->insertGlobalVariable(GV
->getIterator(), NewGV
);
1181 Constant
*InitVal
= GV
->getInitializer();
1182 assert(InitVal
->getType() != Type::getInt1Ty(GV
->getContext()) &&
1183 "No reason to shrink to bool!");
1185 SmallVector
<DIGlobalVariableExpression
*, 1> GVs
;
1186 GV
->getDebugInfo(GVs
);
1188 // If initialized to zero and storing one into the global, we can use a cast
1189 // instead of a select to synthesize the desired value.
1190 bool IsOneZero
= false;
1191 bool EmitOneOrZero
= true;
1192 auto *CI
= dyn_cast
<ConstantInt
>(OtherVal
);
1193 if (CI
&& CI
->getValue().getActiveBits() <= 64) {
1194 IsOneZero
= InitVal
->isNullValue() && CI
->isOne();
1196 auto *CIInit
= dyn_cast
<ConstantInt
>(GV
->getInitializer());
1197 if (CIInit
&& CIInit
->getValue().getActiveBits() <= 64) {
1198 uint64_t ValInit
= CIInit
->getZExtValue();
1199 uint64_t ValOther
= CI
->getZExtValue();
1200 uint64_t ValMinus
= ValOther
- ValInit
;
1202 for(auto *GVe
: GVs
){
1203 DIGlobalVariable
*DGV
= GVe
->getVariable();
1204 DIExpression
*E
= GVe
->getExpression();
1205 const DataLayout
&DL
= GV
->getParent()->getDataLayout();
1206 unsigned SizeInOctets
=
1207 DL
.getTypeAllocSizeInBits(NewGV
->getValueType()) / 8;
1209 // It is expected that the address of global optimized variable is on
1210 // top of the stack. After optimization, value of that variable will
1211 // be ether 0 for initial value or 1 for other value. The following
1212 // expression should return constant integer value depending on the
1213 // value at global object address:
1214 // val * (ValOther - ValInit) + ValInit:
1215 // DW_OP_deref DW_OP_constu <ValMinus>
1216 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1217 SmallVector
<uint64_t, 12> Ops
= {
1218 dwarf::DW_OP_deref_size
, SizeInOctets
,
1219 dwarf::DW_OP_constu
, ValMinus
,
1220 dwarf::DW_OP_mul
, dwarf::DW_OP_constu
, ValInit
,
1222 bool WithStackValue
= true;
1223 E
= DIExpression::prependOpcodes(E
, Ops
, WithStackValue
);
1224 DIGlobalVariableExpression
*DGVE
=
1225 DIGlobalVariableExpression::get(NewGV
->getContext(), DGV
, E
);
1226 NewGV
->addDebugInfo(DGVE
);
1228 EmitOneOrZero
= false;
1232 if (EmitOneOrZero
) {
1233 // FIXME: This will only emit address for debugger on which will
1234 // be written only 0 or 1.
1236 NewGV
->addDebugInfo(GV
);
1239 while (!GV
->use_empty()) {
1240 Instruction
*UI
= cast
<Instruction
>(GV
->user_back());
1241 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(UI
)) {
1242 // Change the store into a boolean store.
1243 bool StoringOther
= SI
->getOperand(0) == OtherVal
;
1244 // Only do this if we weren't storing a loaded value.
1246 if (StoringOther
|| SI
->getOperand(0) == InitVal
) {
1247 StoreVal
= ConstantInt::get(Type::getInt1Ty(GV
->getContext()),
1250 // Otherwise, we are storing a previously loaded copy. To do this,
1251 // change the copy from copying the original value to just copying the
1253 Instruction
*StoredVal
= cast
<Instruction
>(SI
->getOperand(0));
1255 // If we've already replaced the input, StoredVal will be a cast or
1256 // select instruction. If not, it will be a load of the original
1258 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(StoredVal
)) {
1259 assert(LI
->getOperand(0) == GV
&& "Not a copy!");
1260 // Insert a new load, to preserve the saved value.
1261 StoreVal
= new LoadInst(NewGV
->getValueType(), NewGV
,
1262 LI
->getName() + ".b", false, Align(1),
1263 LI
->getOrdering(), LI
->getSyncScopeID(), LI
);
1265 assert((isa
<CastInst
>(StoredVal
) || isa
<SelectInst
>(StoredVal
)) &&
1266 "This is not a form that we understand!");
1267 StoreVal
= StoredVal
->getOperand(0);
1268 assert(isa
<LoadInst
>(StoreVal
) && "Not a load of NewGV!");
1272 new StoreInst(StoreVal
, NewGV
, false, Align(1), SI
->getOrdering(),
1273 SI
->getSyncScopeID(), SI
);
1274 NSI
->setDebugLoc(SI
->getDebugLoc());
1276 // Change the load into a load of bool then a select.
1277 LoadInst
*LI
= cast
<LoadInst
>(UI
);
1278 LoadInst
*NLI
= new LoadInst(NewGV
->getValueType(), NewGV
,
1279 LI
->getName() + ".b", false, Align(1),
1280 LI
->getOrdering(), LI
->getSyncScopeID(), LI
);
1283 NSI
= new ZExtInst(NLI
, LI
->getType(), "", LI
);
1285 NSI
= SelectInst::Create(NLI
, OtherVal
, InitVal
, "", LI
);
1287 // Since LI is split into two instructions, NLI and NSI both inherit the
1289 NLI
->setDebugLoc(LI
->getDebugLoc());
1290 NSI
->setDebugLoc(LI
->getDebugLoc());
1291 LI
->replaceAllUsesWith(NSI
);
1293 UI
->eraseFromParent();
1296 // Retain the name of the old global variable. People who are debugging their
1297 // programs may expect these variables to be named the same.
1298 NewGV
->takeName(GV
);
1299 GV
->eraseFromParent();
1304 deleteIfDead(GlobalValue
&GV
,
1305 SmallPtrSetImpl
<const Comdat
*> &NotDiscardableComdats
,
1306 function_ref
<void(Function
&)> DeleteFnCallback
= nullptr) {
1307 GV
.removeDeadConstantUsers();
1309 if (!GV
.isDiscardableIfUnused() && !GV
.isDeclaration())
1312 if (const Comdat
*C
= GV
.getComdat())
1313 if (!GV
.hasLocalLinkage() && NotDiscardableComdats
.count(C
))
1317 if (auto *F
= dyn_cast
<Function
>(&GV
))
1318 Dead
= (F
->isDeclaration() && F
->use_empty()) || F
->isDefTriviallyDead();
1320 Dead
= GV
.use_empty();
1324 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV
<< "\n");
1325 if (auto *F
= dyn_cast
<Function
>(&GV
)) {
1326 if (DeleteFnCallback
)
1327 DeleteFnCallback(*F
);
1329 GV
.eraseFromParent();
1334 static bool isPointerValueDeadOnEntryToFunction(
1335 const Function
*F
, GlobalValue
*GV
,
1336 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
) {
1337 // Find all uses of GV. We expect them all to be in F, and if we can't
1338 // identify any of the uses we bail out.
1340 // On each of these uses, identify if the memory that GV points to is
1341 // used/required/live at the start of the function. If it is not, for example
1342 // if the first thing the function does is store to the GV, the GV can
1343 // possibly be demoted.
1345 // We don't do an exhaustive search for memory operations - simply look
1346 // through bitcasts as they're quite common and benign.
1347 const DataLayout
&DL
= GV
->getParent()->getDataLayout();
1348 SmallVector
<LoadInst
*, 4> Loads
;
1349 SmallVector
<StoreInst
*, 4> Stores
;
1350 for (auto *U
: GV
->users()) {
1351 Instruction
*I
= dyn_cast
<Instruction
>(U
);
1354 assert(I
->getParent()->getParent() == F
);
1356 if (auto *LI
= dyn_cast
<LoadInst
>(I
))
1357 Loads
.push_back(LI
);
1358 else if (auto *SI
= dyn_cast
<StoreInst
>(I
))
1359 Stores
.push_back(SI
);
1364 // We have identified all uses of GV into loads and stores. Now check if all
1365 // of them are known not to depend on the value of the global at the function
1366 // entry point. We do this by ensuring that every load is dominated by at
1368 auto &DT
= LookupDomTree(*const_cast<Function
*>(F
));
1370 // The below check is quadratic. Check we're not going to do too many tests.
1371 // FIXME: Even though this will always have worst-case quadratic time, we
1372 // could put effort into minimizing the average time by putting stores that
1373 // have been shown to dominate at least one load at the beginning of the
1374 // Stores array, making subsequent dominance checks more likely to succeed
1377 // The threshold here is fairly large because global->local demotion is a
1378 // very powerful optimization should it fire.
1379 const unsigned Threshold
= 100;
1380 if (Loads
.size() * Stores
.size() > Threshold
)
1383 for (auto *L
: Loads
) {
1384 auto *LTy
= L
->getType();
1385 if (none_of(Stores
, [&](const StoreInst
*S
) {
1386 auto *STy
= S
->getValueOperand()->getType();
1387 // The load is only dominated by the store if DomTree says so
1388 // and the number of bits loaded in L is less than or equal to
1389 // the number of bits stored in S.
1390 return DT
.dominates(S
, L
) &&
1391 DL
.getTypeStoreSize(LTy
).getFixedValue() <=
1392 DL
.getTypeStoreSize(STy
).getFixedValue();
1396 // All loads have known dependences inside F, so the global can be localized.
1400 // For a global variable with one store, if the store dominates any loads,
1401 // those loads will always load the stored value (as opposed to the
1402 // initializer), even in the presence of recursion.
1403 static bool forwardStoredOnceStore(
1404 GlobalVariable
*GV
, const StoreInst
*StoredOnceStore
,
1405 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
) {
1406 const Value
*StoredOnceValue
= StoredOnceStore
->getValueOperand();
1407 // We can do this optimization for non-constants in nosync + norecurse
1408 // functions, but globals used in exactly one norecurse functions are already
1409 // promoted to an alloca.
1410 if (!isa
<Constant
>(StoredOnceValue
))
1412 const Function
*F
= StoredOnceStore
->getFunction();
1413 SmallVector
<LoadInst
*> Loads
;
1414 for (User
*U
: GV
->users()) {
1415 if (auto *LI
= dyn_cast
<LoadInst
>(U
)) {
1416 if (LI
->getFunction() == F
&&
1417 LI
->getType() == StoredOnceValue
->getType() && LI
->isSimple())
1418 Loads
.push_back(LI
);
1421 // Only compute DT if we have any loads to examine.
1422 bool MadeChange
= false;
1423 if (!Loads
.empty()) {
1424 auto &DT
= LookupDomTree(*const_cast<Function
*>(F
));
1425 for (auto *LI
: Loads
) {
1426 if (DT
.dominates(StoredOnceStore
, LI
)) {
1427 LI
->replaceAllUsesWith(const_cast<Value
*>(StoredOnceValue
));
1428 LI
->eraseFromParent();
1436 /// Analyze the specified global variable and optimize
1437 /// it if possible. If we make a change, return true.
1439 processInternalGlobal(GlobalVariable
*GV
, const GlobalStatus
&GS
,
1440 function_ref
<TargetTransformInfo
&(Function
&)> GetTTI
,
1441 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
,
1442 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
) {
1443 auto &DL
= GV
->getParent()->getDataLayout();
1444 // If this is a first class global and has only one accessing function and
1445 // this function is non-recursive, we replace the global with a local alloca
1446 // in this function.
1448 // NOTE: It doesn't make sense to promote non-single-value types since we
1449 // are just replacing static memory to stack memory.
1451 // If the global is in different address space, don't bring it to stack.
1452 if (!GS
.HasMultipleAccessingFunctions
&&
1453 GS
.AccessingFunction
&&
1454 GV
->getValueType()->isSingleValueType() &&
1455 GV
->getType()->getAddressSpace() == DL
.getAllocaAddrSpace() &&
1456 !GV
->isExternallyInitialized() &&
1457 GS
.AccessingFunction
->doesNotRecurse() &&
1458 isPointerValueDeadOnEntryToFunction(GS
.AccessingFunction
, GV
,
1460 const DataLayout
&DL
= GV
->getParent()->getDataLayout();
1462 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV
<< "\n");
1463 Instruction
&FirstI
= const_cast<Instruction
&>(*GS
.AccessingFunction
1464 ->getEntryBlock().begin());
1465 Type
*ElemTy
= GV
->getValueType();
1466 // FIXME: Pass Global's alignment when globals have alignment
1467 AllocaInst
*Alloca
= new AllocaInst(ElemTy
, DL
.getAllocaAddrSpace(), nullptr,
1468 GV
->getName(), &FirstI
);
1469 if (!isa
<UndefValue
>(GV
->getInitializer()))
1470 new StoreInst(GV
->getInitializer(), Alloca
, &FirstI
);
1472 GV
->replaceAllUsesWith(Alloca
);
1473 GV
->eraseFromParent();
1478 bool Changed
= false;
1480 // If the global is never loaded (but may be stored to), it is dead.
1483 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV
<< "\n");
1485 if (isLeakCheckerRoot(GV
)) {
1486 // Delete any constant stores to the global.
1487 Changed
= CleanupPointerRootUsers(GV
, GetTLI
);
1489 // Delete any stores we can find to the global. We may not be able to
1490 // make it completely dead though.
1491 Changed
= CleanupConstantGlobalUsers(GV
, DL
);
1494 // If the global is dead now, delete it.
1495 if (GV
->use_empty()) {
1496 GV
->eraseFromParent();
1503 if (GS
.StoredType
<= GlobalStatus::InitializerStored
) {
1504 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV
<< "\n");
1506 // Don't actually mark a global constant if it's atomic because atomic loads
1507 // are implemented by a trivial cmpxchg in some edge-cases and that usually
1508 // requires write access to the variable even if it's not actually changed.
1509 if (GS
.Ordering
== AtomicOrdering::NotAtomic
) {
1510 assert(!GV
->isConstant() && "Expected a non-constant global");
1511 GV
->setConstant(true);
1515 // Clean up any obviously simplifiable users now.
1516 Changed
|= CleanupConstantGlobalUsers(GV
, DL
);
1518 // If the global is dead now, just nuke it.
1519 if (GV
->use_empty()) {
1520 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
1521 << "all users and delete global!\n");
1522 GV
->eraseFromParent();
1527 // Fall through to the next check; see if we can optimize further.
1530 if (!GV
->getInitializer()->getType()->isSingleValueType()) {
1531 const DataLayout
&DL
= GV
->getParent()->getDataLayout();
1532 if (SRAGlobal(GV
, DL
))
1535 Value
*StoredOnceValue
= GS
.getStoredOnceValue();
1536 if (GS
.StoredType
== GlobalStatus::StoredOnce
&& StoredOnceValue
) {
1538 const_cast<Function
&>(*GS
.StoredOnceStore
->getFunction());
1539 bool CanHaveNonUndefGlobalInitializer
=
1540 GetTTI(StoreFn
).canHaveNonUndefGlobalInitializerInAddressSpace(
1541 GV
->getType()->getAddressSpace());
1542 // If the initial value for the global was an undef value, and if only
1543 // one other value was stored into it, we can just change the
1544 // initializer to be the stored value, then delete all stores to the
1545 // global. This allows us to mark it constant.
1546 // This is restricted to address spaces that allow globals to have
1547 // initializers. NVPTX, for example, does not support initializers for
1548 // shared memory (AS 3).
1549 auto *SOVConstant
= dyn_cast
<Constant
>(StoredOnceValue
);
1550 if (SOVConstant
&& isa
<UndefValue
>(GV
->getInitializer()) &&
1551 DL
.getTypeAllocSize(SOVConstant
->getType()) ==
1552 DL
.getTypeAllocSize(GV
->getValueType()) &&
1553 CanHaveNonUndefGlobalInitializer
) {
1554 if (SOVConstant
->getType() == GV
->getValueType()) {
1555 // Change the initializer in place.
1556 GV
->setInitializer(SOVConstant
);
1558 // Create a new global with adjusted type.
1559 auto *NGV
= new GlobalVariable(
1560 *GV
->getParent(), SOVConstant
->getType(), GV
->isConstant(),
1561 GV
->getLinkage(), SOVConstant
, "", GV
, GV
->getThreadLocalMode(),
1562 GV
->getAddressSpace());
1564 NGV
->copyAttributesFrom(GV
);
1565 GV
->replaceAllUsesWith(NGV
);
1566 GV
->eraseFromParent();
1570 // Clean up any obviously simplifiable users now.
1571 CleanupConstantGlobalUsers(GV
, DL
);
1573 if (GV
->use_empty()) {
1574 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
1575 << "simplify all users and delete global!\n");
1576 GV
->eraseFromParent();
1583 // Try to optimize globals based on the knowledge that only one value
1584 // (besides its initializer) is ever stored to the global.
1585 if (optimizeOnceStoredGlobal(GV
, StoredOnceValue
, DL
, GetTLI
))
1588 // Try to forward the store to any loads. If we have more than one store, we
1589 // may have a store of the initializer between StoredOnceStore and a load.
1590 if (GS
.NumStores
== 1)
1591 if (forwardStoredOnceStore(GV
, GS
.StoredOnceStore
, LookupDomTree
))
1594 // Otherwise, if the global was not a boolean, we can shrink it to be a
1595 // boolean. Skip this optimization for AS that doesn't allow an initializer.
1596 if (SOVConstant
&& GS
.Ordering
== AtomicOrdering::NotAtomic
&&
1597 (!isa
<UndefValue
>(GV
->getInitializer()) ||
1598 CanHaveNonUndefGlobalInitializer
)) {
1599 if (TryToShrinkGlobalToBoolean(GV
, SOVConstant
)) {
1609 /// Analyze the specified global variable and optimize it if possible. If we
1610 /// make a change, return true.
1612 processGlobal(GlobalValue
&GV
,
1613 function_ref
<TargetTransformInfo
&(Function
&)> GetTTI
,
1614 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
,
1615 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
) {
1616 if (GV
.getName().starts_with("llvm."))
1621 if (GlobalStatus::analyzeGlobal(&GV
, GS
))
1624 bool Changed
= false;
1625 if (!GS
.IsCompared
&& !GV
.hasGlobalUnnamedAddr()) {
1626 auto NewUnnamedAddr
= GV
.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
1627 : GlobalValue::UnnamedAddr::Local
;
1628 if (NewUnnamedAddr
!= GV
.getUnnamedAddr()) {
1629 GV
.setUnnamedAddr(NewUnnamedAddr
);
1635 // Do more involved optimizations if the global is internal.
1636 if (!GV
.hasLocalLinkage())
1639 auto *GVar
= dyn_cast
<GlobalVariable
>(&GV
);
1643 if (GVar
->isConstant() || !GVar
->hasInitializer())
1646 return processInternalGlobal(GVar
, GS
, GetTTI
, GetTLI
, LookupDomTree
) ||
1650 /// Walk all of the direct calls of the specified function, changing them to
1652 static void ChangeCalleesToFastCall(Function
*F
) {
1653 for (User
*U
: F
->users()) {
1654 if (isa
<BlockAddress
>(U
))
1656 cast
<CallBase
>(U
)->setCallingConv(CallingConv::Fast
);
1660 static AttributeList
StripAttr(LLVMContext
&C
, AttributeList Attrs
,
1661 Attribute::AttrKind A
) {
1663 if (Attrs
.hasAttrSomewhere(A
, &AttrIndex
))
1664 return Attrs
.removeAttributeAtIndex(C
, AttrIndex
, A
);
1668 static void RemoveAttribute(Function
*F
, Attribute::AttrKind A
) {
1669 F
->setAttributes(StripAttr(F
->getContext(), F
->getAttributes(), A
));
1670 for (User
*U
: F
->users()) {
1671 if (isa
<BlockAddress
>(U
))
1673 CallBase
*CB
= cast
<CallBase
>(U
);
1674 CB
->setAttributes(StripAttr(F
->getContext(), CB
->getAttributes(), A
));
1678 /// Return true if this is a calling convention that we'd like to change. The
1679 /// idea here is that we don't want to mess with the convention if the user
1680 /// explicitly requested something with performance implications like coldcc,
1681 /// GHC, or anyregcc.
1682 static bool hasChangeableCCImpl(Function
*F
) {
1683 CallingConv::ID CC
= F
->getCallingConv();
1685 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1686 if (CC
!= CallingConv::C
&& CC
!= CallingConv::X86_ThisCall
)
1692 // FIXME: Change CC for the whole chain of musttail calls when possible.
1694 // Can't change CC of the function that either has musttail calls, or is a
1695 // musttail callee itself
1696 for (User
*U
: F
->users()) {
1697 if (isa
<BlockAddress
>(U
))
1699 CallInst
* CI
= dyn_cast
<CallInst
>(U
);
1703 if (CI
->isMustTailCall())
1707 for (BasicBlock
&BB
: *F
)
1708 if (BB
.getTerminatingMustTailCall())
1711 return !F
->hasAddressTaken();
1714 using ChangeableCCCacheTy
= SmallDenseMap
<Function
*, bool, 8>;
1715 static bool hasChangeableCC(Function
*F
,
1716 ChangeableCCCacheTy
&ChangeableCCCache
) {
1717 auto Res
= ChangeableCCCache
.try_emplace(F
, false);
1719 Res
.first
->second
= hasChangeableCCImpl(F
);
1720 return Res
.first
->second
;
1723 /// Return true if the block containing the call site has a BlockFrequency of
1724 /// less than ColdCCRelFreq% of the entry block.
1725 static bool isColdCallSite(CallBase
&CB
, BlockFrequencyInfo
&CallerBFI
) {
1726 const BranchProbability
ColdProb(ColdCCRelFreq
, 100);
1727 auto *CallSiteBB
= CB
.getParent();
1728 auto CallSiteFreq
= CallerBFI
.getBlockFreq(CallSiteBB
);
1729 auto CallerEntryFreq
=
1730 CallerBFI
.getBlockFreq(&(CB
.getCaller()->getEntryBlock()));
1731 return CallSiteFreq
< CallerEntryFreq
* ColdProb
;
1734 // This function checks if the input function F is cold at all call sites. It
1735 // also looks each call site's containing function, returning false if the
1736 // caller function contains other non cold calls. The input vector AllCallsCold
1737 // contains a list of functions that only have call sites in cold blocks.
1739 isValidCandidateForColdCC(Function
&F
,
1740 function_ref
<BlockFrequencyInfo
&(Function
&)> GetBFI
,
1741 const std::vector
<Function
*> &AllCallsCold
) {
1746 for (User
*U
: F
.users()) {
1747 if (isa
<BlockAddress
>(U
))
1750 CallBase
&CB
= cast
<CallBase
>(*U
);
1751 Function
*CallerFunc
= CB
.getParent()->getParent();
1752 BlockFrequencyInfo
&CallerBFI
= GetBFI(*CallerFunc
);
1753 if (!isColdCallSite(CB
, CallerBFI
))
1755 if (!llvm::is_contained(AllCallsCold
, CallerFunc
))
1761 static void changeCallSitesToColdCC(Function
*F
) {
1762 for (User
*U
: F
->users()) {
1763 if (isa
<BlockAddress
>(U
))
1765 cast
<CallBase
>(U
)->setCallingConv(CallingConv::Cold
);
1769 // This function iterates over all the call instructions in the input Function
1770 // and checks that all call sites are in cold blocks and are allowed to use the
1771 // coldcc calling convention.
1773 hasOnlyColdCalls(Function
&F
,
1774 function_ref
<BlockFrequencyInfo
&(Function
&)> GetBFI
,
1775 ChangeableCCCacheTy
&ChangeableCCCache
) {
1776 for (BasicBlock
&BB
: F
) {
1777 for (Instruction
&I
: BB
) {
1778 if (CallInst
*CI
= dyn_cast
<CallInst
>(&I
)) {
1779 // Skip over isline asm instructions since they aren't function calls.
1780 if (CI
->isInlineAsm())
1782 Function
*CalledFn
= CI
->getCalledFunction();
1785 // Skip over intrinsics since they won't remain as function calls.
1786 // Important to do this check before the linkage check below so we
1787 // won't bail out on debug intrinsics, possibly making the generated
1788 // code dependent on the presence of debug info.
1789 if (CalledFn
->getIntrinsicID() != Intrinsic::not_intrinsic
)
1791 if (!CalledFn
->hasLocalLinkage())
1793 // Check if it's valid to use coldcc calling convention.
1794 if (!hasChangeableCC(CalledFn
, ChangeableCCCache
))
1796 BlockFrequencyInfo
&CallerBFI
= GetBFI(F
);
1797 if (!isColdCallSite(*CI
, CallerBFI
))
1805 static bool hasMustTailCallers(Function
*F
) {
1806 for (User
*U
: F
->users()) {
1807 CallBase
*CB
= dyn_cast
<CallBase
>(U
);
1809 assert(isa
<BlockAddress
>(U
) &&
1810 "Expected either CallBase or BlockAddress");
1813 if (CB
->isMustTailCall())
1819 static bool hasInvokeCallers(Function
*F
) {
1820 for (User
*U
: F
->users())
1821 if (isa
<InvokeInst
>(U
))
1826 static void RemovePreallocated(Function
*F
) {
1827 RemoveAttribute(F
, Attribute::Preallocated
);
1829 auto *M
= F
->getParent();
1831 IRBuilder
<> Builder(M
->getContext());
1833 // Cannot modify users() while iterating over it, so make a copy.
1834 SmallVector
<User
*, 4> PreallocatedCalls(F
->users());
1835 for (User
*U
: PreallocatedCalls
) {
1836 CallBase
*CB
= dyn_cast
<CallBase
>(U
);
1841 !CB
->isMustTailCall() &&
1842 "Shouldn't call RemotePreallocated() on a musttail preallocated call");
1843 // Create copy of call without "preallocated" operand bundle.
1844 SmallVector
<OperandBundleDef
, 1> OpBundles
;
1845 CB
->getOperandBundlesAsDefs(OpBundles
);
1846 CallBase
*PreallocatedSetup
= nullptr;
1847 for (auto *It
= OpBundles
.begin(); It
!= OpBundles
.end(); ++It
) {
1848 if (It
->getTag() == "preallocated") {
1849 PreallocatedSetup
= cast
<CallBase
>(*It
->input_begin());
1850 OpBundles
.erase(It
);
1854 assert(PreallocatedSetup
&& "Did not find preallocated bundle");
1856 cast
<ConstantInt
>(PreallocatedSetup
->getArgOperand(0))->getZExtValue();
1858 assert((isa
<CallInst
>(CB
) || isa
<InvokeInst
>(CB
)) &&
1859 "Unknown indirect call type");
1860 CallBase
*NewCB
= CallBase::Create(CB
, OpBundles
, CB
);
1861 CB
->replaceAllUsesWith(NewCB
);
1862 NewCB
->takeName(CB
);
1863 CB
->eraseFromParent();
1865 Builder
.SetInsertPoint(PreallocatedSetup
);
1866 auto *StackSave
= Builder
.CreateStackSave();
1867 Builder
.SetInsertPoint(NewCB
->getNextNonDebugInstruction());
1868 Builder
.CreateStackRestore(StackSave
);
1870 // Replace @llvm.call.preallocated.arg() with alloca.
1871 // Cannot modify users() while iterating over it, so make a copy.
1872 // @llvm.call.preallocated.arg() can be called with the same index multiple
1873 // times. So for each @llvm.call.preallocated.arg(), we see if we have
1874 // already created a Value* for the index, and if not, create an alloca and
1875 // bitcast right after the @llvm.call.preallocated.setup() so that it
1876 // dominates all uses.
1877 SmallVector
<Value
*, 2> ArgAllocas(ArgCount
);
1878 SmallVector
<User
*, 2> PreallocatedArgs(PreallocatedSetup
->users());
1879 for (auto *User
: PreallocatedArgs
) {
1880 auto *UseCall
= cast
<CallBase
>(User
);
1881 assert(UseCall
->getCalledFunction()->getIntrinsicID() ==
1882 Intrinsic::call_preallocated_arg
&&
1883 "preallocated token use was not a llvm.call.preallocated.arg");
1884 uint64_t AllocArgIndex
=
1885 cast
<ConstantInt
>(UseCall
->getArgOperand(1))->getZExtValue();
1886 Value
*AllocaReplacement
= ArgAllocas
[AllocArgIndex
];
1887 if (!AllocaReplacement
) {
1888 auto AddressSpace
= UseCall
->getType()->getPointerAddressSpace();
1890 UseCall
->getFnAttr(Attribute::Preallocated
).getValueAsType();
1891 auto *InsertBefore
= PreallocatedSetup
->getNextNonDebugInstruction();
1892 Builder
.SetInsertPoint(InsertBefore
);
1894 Builder
.CreateAlloca(ArgType
, AddressSpace
, nullptr, "paarg");
1895 ArgAllocas
[AllocArgIndex
] = Alloca
;
1896 AllocaReplacement
= Alloca
;
1899 UseCall
->replaceAllUsesWith(AllocaReplacement
);
1900 UseCall
->eraseFromParent();
1902 // Remove @llvm.call.preallocated.setup().
1903 cast
<Instruction
>(PreallocatedSetup
)->eraseFromParent();
1908 OptimizeFunctions(Module
&M
,
1909 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
,
1910 function_ref
<TargetTransformInfo
&(Function
&)> GetTTI
,
1911 function_ref
<BlockFrequencyInfo
&(Function
&)> GetBFI
,
1912 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
,
1913 SmallPtrSetImpl
<const Comdat
*> &NotDiscardableComdats
,
1914 function_ref
<void(Function
&F
)> ChangedCFGCallback
,
1915 function_ref
<void(Function
&F
)> DeleteFnCallback
) {
1917 bool Changed
= false;
1919 ChangeableCCCacheTy ChangeableCCCache
;
1920 std::vector
<Function
*> AllCallsCold
;
1921 for (Function
&F
: llvm::make_early_inc_range(M
))
1922 if (hasOnlyColdCalls(F
, GetBFI
, ChangeableCCCache
))
1923 AllCallsCold
.push_back(&F
);
1925 // Optimize functions.
1926 for (Function
&F
: llvm::make_early_inc_range(M
)) {
1927 // Don't perform global opt pass on naked functions; we don't want fast
1928 // calling conventions for naked functions.
1929 if (F
.hasFnAttribute(Attribute::Naked
))
1932 // Functions without names cannot be referenced outside this module.
1933 if (!F
.hasName() && !F
.isDeclaration() && !F
.hasLocalLinkage())
1934 F
.setLinkage(GlobalValue::InternalLinkage
);
1936 if (deleteIfDead(F
, NotDiscardableComdats
, DeleteFnCallback
)) {
1941 // LLVM's definition of dominance allows instructions that are cyclic
1942 // in unreachable blocks, e.g.:
1943 // %pat = select i1 %condition, @global, i16* %pat
1944 // because any instruction dominates an instruction in a block that's
1945 // not reachable from entry.
1946 // So, remove unreachable blocks from the function, because a) there's
1947 // no point in analyzing them and b) GlobalOpt should otherwise grow
1948 // some more complicated logic to break these cycles.
1949 // Notify the analysis manager that we've modified the function's CFG.
1950 if (!F
.isDeclaration()) {
1951 if (removeUnreachableBlocks(F
)) {
1953 ChangedCFGCallback(F
);
1957 Changed
|= processGlobal(F
, GetTTI
, GetTLI
, LookupDomTree
);
1959 if (!F
.hasLocalLinkage())
1962 // If we have an inalloca parameter that we can safely remove the
1963 // inalloca attribute from, do so. This unlocks optimizations that
1964 // wouldn't be safe in the presence of inalloca.
1965 // FIXME: We should also hoist alloca affected by this to the entry
1966 // block if possible.
1967 if (F
.getAttributes().hasAttrSomewhere(Attribute::InAlloca
) &&
1968 !F
.hasAddressTaken() && !hasMustTailCallers(&F
) && !F
.isVarArg()) {
1969 RemoveAttribute(&F
, Attribute::InAlloca
);
1973 // FIXME: handle invokes
1974 // FIXME: handle musttail
1975 if (F
.getAttributes().hasAttrSomewhere(Attribute::Preallocated
)) {
1976 if (!F
.hasAddressTaken() && !hasMustTailCallers(&F
) &&
1977 !hasInvokeCallers(&F
)) {
1978 RemovePreallocated(&F
);
1984 if (hasChangeableCC(&F
, ChangeableCCCache
)) {
1986 TargetTransformInfo
&TTI
= GetTTI(F
);
1987 // Change the calling convention to coldcc if either stress testing is
1988 // enabled or the target would like to use coldcc on functions which are
1989 // cold at all call sites and the callers contain no other non coldcc
1991 if (EnableColdCCStressTest
||
1992 (TTI
.useColdCCForColdCall(F
) &&
1993 isValidCandidateForColdCC(F
, GetBFI
, AllCallsCold
))) {
1994 ChangeableCCCache
.erase(&F
);
1995 F
.setCallingConv(CallingConv::Cold
);
1996 changeCallSitesToColdCC(&F
);
2002 if (hasChangeableCC(&F
, ChangeableCCCache
)) {
2003 // If this function has a calling convention worth changing, is not a
2004 // varargs function, and is only called directly, promote it to use the
2005 // Fast calling convention.
2006 F
.setCallingConv(CallingConv::Fast
);
2007 ChangeCalleesToFastCall(&F
);
2012 if (F
.getAttributes().hasAttrSomewhere(Attribute::Nest
) &&
2013 !F
.hasAddressTaken()) {
2014 // The function is not used by a trampoline intrinsic, so it is safe
2015 // to remove the 'nest' attribute.
2016 RemoveAttribute(&F
, Attribute::Nest
);
2025 OptimizeGlobalVars(Module
&M
,
2026 function_ref
<TargetTransformInfo
&(Function
&)> GetTTI
,
2027 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
,
2028 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
,
2029 SmallPtrSetImpl
<const Comdat
*> &NotDiscardableComdats
) {
2030 bool Changed
= false;
2032 for (GlobalVariable
&GV
: llvm::make_early_inc_range(M
.globals())) {
2033 // Global variables without names cannot be referenced outside this module.
2034 if (!GV
.hasName() && !GV
.isDeclaration() && !GV
.hasLocalLinkage())
2035 GV
.setLinkage(GlobalValue::InternalLinkage
);
2036 // Simplify the initializer.
2037 if (GV
.hasInitializer())
2038 if (auto *C
= dyn_cast
<Constant
>(GV
.getInitializer())) {
2039 auto &DL
= M
.getDataLayout();
2040 // TLI is not used in the case of a Constant, so use default nullptr
2041 // for that optional parameter, since we don't have a Function to
2042 // provide GetTLI anyway.
2043 Constant
*New
= ConstantFoldConstant(C
, DL
, /*TLI*/ nullptr);
2045 GV
.setInitializer(New
);
2048 if (deleteIfDead(GV
, NotDiscardableComdats
)) {
2053 Changed
|= processGlobal(GV
, GetTTI
, GetTLI
, LookupDomTree
);
2058 /// Evaluate static constructors in the function, if we can. Return true if we
2059 /// can, false otherwise.
2060 static bool EvaluateStaticConstructor(Function
*F
, const DataLayout
&DL
,
2061 TargetLibraryInfo
*TLI
) {
2062 // Skip external functions.
2063 if (F
->isDeclaration())
2065 // Call the function.
2066 Evaluator
Eval(DL
, TLI
);
2067 Constant
*RetValDummy
;
2068 bool EvalSuccess
= Eval
.EvaluateFunction(F
, RetValDummy
,
2069 SmallVector
<Constant
*, 0>());
2072 ++NumCtorsEvaluated
;
2074 // We succeeded at evaluation: commit the result.
2075 auto NewInitializers
= Eval
.getMutatedInitializers();
2076 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2077 << F
->getName() << "' to " << NewInitializers
.size()
2079 for (const auto &Pair
: NewInitializers
)
2080 Pair
.first
->setInitializer(Pair
.second
);
2081 for (GlobalVariable
*GV
: Eval
.getInvariants())
2082 GV
->setConstant(true);
2088 static int compareNames(Constant
*const *A
, Constant
*const *B
) {
2089 Value
*AStripped
= (*A
)->stripPointerCasts();
2090 Value
*BStripped
= (*B
)->stripPointerCasts();
2091 return AStripped
->getName().compare(BStripped
->getName());
2094 static void setUsedInitializer(GlobalVariable
&V
,
2095 const SmallPtrSetImpl
<GlobalValue
*> &Init
) {
2097 V
.eraseFromParent();
2101 // Get address space of pointers in the array of pointers.
2102 const Type
*UsedArrayType
= V
.getValueType();
2103 const auto *VAT
= cast
<ArrayType
>(UsedArrayType
);
2104 const auto *VEPT
= cast
<PointerType
>(VAT
->getArrayElementType());
2106 // Type of pointer to the array of pointers.
2107 PointerType
*PtrTy
=
2108 PointerType::get(V
.getContext(), VEPT
->getAddressSpace());
2110 SmallVector
<Constant
*, 8> UsedArray
;
2111 for (GlobalValue
*GV
: Init
) {
2112 Constant
*Cast
= ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV
, PtrTy
);
2113 UsedArray
.push_back(Cast
);
2116 // Sort to get deterministic order.
2117 array_pod_sort(UsedArray
.begin(), UsedArray
.end(), compareNames
);
2118 ArrayType
*ATy
= ArrayType::get(PtrTy
, UsedArray
.size());
2120 Module
*M
= V
.getParent();
2121 V
.removeFromParent();
2122 GlobalVariable
*NV
=
2123 new GlobalVariable(*M
, ATy
, false, GlobalValue::AppendingLinkage
,
2124 ConstantArray::get(ATy
, UsedArray
), "");
2126 NV
->setSection("llvm.metadata");
2132 /// An easy to access representation of llvm.used and llvm.compiler.used.
2134 SmallPtrSet
<GlobalValue
*, 4> Used
;
2135 SmallPtrSet
<GlobalValue
*, 4> CompilerUsed
;
2136 GlobalVariable
*UsedV
;
2137 GlobalVariable
*CompilerUsedV
;
2140 LLVMUsed(Module
&M
) {
2141 SmallVector
<GlobalValue
*, 4> Vec
;
2142 UsedV
= collectUsedGlobalVariables(M
, Vec
, false);
2143 Used
= {Vec
.begin(), Vec
.end()};
2145 CompilerUsedV
= collectUsedGlobalVariables(M
, Vec
, true);
2146 CompilerUsed
= {Vec
.begin(), Vec
.end()};
2149 using iterator
= SmallPtrSet
<GlobalValue
*, 4>::iterator
;
2150 using used_iterator_range
= iterator_range
<iterator
>;
2152 iterator
usedBegin() { return Used
.begin(); }
2153 iterator
usedEnd() { return Used
.end(); }
2155 used_iterator_range
used() {
2156 return used_iterator_range(usedBegin(), usedEnd());
2159 iterator
compilerUsedBegin() { return CompilerUsed
.begin(); }
2160 iterator
compilerUsedEnd() { return CompilerUsed
.end(); }
2162 used_iterator_range
compilerUsed() {
2163 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2166 bool usedCount(GlobalValue
*GV
) const { return Used
.count(GV
); }
2168 bool compilerUsedCount(GlobalValue
*GV
) const {
2169 return CompilerUsed
.count(GV
);
2172 bool usedErase(GlobalValue
*GV
) { return Used
.erase(GV
); }
2173 bool compilerUsedErase(GlobalValue
*GV
) { return CompilerUsed
.erase(GV
); }
2174 bool usedInsert(GlobalValue
*GV
) { return Used
.insert(GV
).second
; }
2176 bool compilerUsedInsert(GlobalValue
*GV
) {
2177 return CompilerUsed
.insert(GV
).second
;
2180 void syncVariablesAndSets() {
2182 setUsedInitializer(*UsedV
, Used
);
2184 setUsedInitializer(*CompilerUsedV
, CompilerUsed
);
2188 } // end anonymous namespace
2190 static bool hasUseOtherThanLLVMUsed(GlobalAlias
&GA
, const LLVMUsed
&U
) {
2191 if (GA
.use_empty()) // No use at all.
2194 assert((!U
.usedCount(&GA
) || !U
.compilerUsedCount(&GA
)) &&
2195 "We should have removed the duplicated "
2196 "element from llvm.compiler.used");
2197 if (!GA
.hasOneUse())
2198 // Strictly more than one use. So at least one is not in llvm.used and
2199 // llvm.compiler.used.
2202 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2203 return !U
.usedCount(&GA
) && !U
.compilerUsedCount(&GA
);
2206 static bool mayHaveOtherReferences(GlobalValue
&GV
, const LLVMUsed
&U
) {
2207 if (!GV
.hasLocalLinkage())
2210 return U
.usedCount(&GV
) || U
.compilerUsedCount(&GV
);
2213 static bool hasUsesToReplace(GlobalAlias
&GA
, const LLVMUsed
&U
,
2214 bool &RenameTarget
) {
2215 RenameTarget
= false;
2217 if (hasUseOtherThanLLVMUsed(GA
, U
))
2220 // If the alias is externally visible, we may still be able to simplify it.
2221 if (!mayHaveOtherReferences(GA
, U
))
2224 // If the aliasee has internal linkage and no other references (e.g.,
2225 // @llvm.used, @llvm.compiler.used), give it the name and linkage of the
2226 // alias, and delete the alias. This turns:
2227 // define internal ... @f(...)
2228 // @a = alias ... @f
2230 // define ... @a(...)
2231 Constant
*Aliasee
= GA
.getAliasee();
2232 GlobalValue
*Target
= cast
<GlobalValue
>(Aliasee
->stripPointerCasts());
2233 if (mayHaveOtherReferences(*Target
, U
))
2236 RenameTarget
= true;
2241 OptimizeGlobalAliases(Module
&M
,
2242 SmallPtrSetImpl
<const Comdat
*> &NotDiscardableComdats
) {
2243 bool Changed
= false;
2246 for (GlobalValue
*GV
: Used
.used())
2247 Used
.compilerUsedErase(GV
);
2249 // Return whether GV is explicitly or implicitly dso_local and not replaceable
2250 // by another definition in the current linkage unit.
2251 auto IsModuleLocal
= [](GlobalValue
&GV
) {
2252 return !GlobalValue::isInterposableLinkage(GV
.getLinkage()) &&
2253 (GV
.isDSOLocal() || GV
.isImplicitDSOLocal());
2256 for (GlobalAlias
&J
: llvm::make_early_inc_range(M
.aliases())) {
2257 // Aliases without names cannot be referenced outside this module.
2258 if (!J
.hasName() && !J
.isDeclaration() && !J
.hasLocalLinkage())
2259 J
.setLinkage(GlobalValue::InternalLinkage
);
2261 if (deleteIfDead(J
, NotDiscardableComdats
)) {
2266 // If the alias can change at link time, nothing can be done - bail out.
2267 if (!IsModuleLocal(J
))
2270 Constant
*Aliasee
= J
.getAliasee();
2271 GlobalValue
*Target
= dyn_cast
<GlobalValue
>(Aliasee
->stripPointerCasts());
2272 // We can't trivially replace the alias with the aliasee if the aliasee is
2273 // non-trivial in some way. We also can't replace the alias with the aliasee
2274 // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
2275 // alias can be used to access the definition as if preemption did not
2277 // TODO: Try to handle non-zero GEPs of local aliasees.
2278 if (!Target
|| !IsModuleLocal(*Target
))
2281 Target
->removeDeadConstantUsers();
2283 // Make all users of the alias use the aliasee instead.
2285 if (!hasUsesToReplace(J
, Used
, RenameTarget
))
2288 J
.replaceAllUsesWith(Aliasee
);
2289 ++NumAliasesResolved
;
2293 // Give the aliasee the name, linkage and other attributes of the alias.
2294 Target
->takeName(&J
);
2295 Target
->setLinkage(J
.getLinkage());
2296 Target
->setDSOLocal(J
.isDSOLocal());
2297 Target
->setVisibility(J
.getVisibility());
2298 Target
->setDLLStorageClass(J
.getDLLStorageClass());
2300 if (Used
.usedErase(&J
))
2301 Used
.usedInsert(Target
);
2303 if (Used
.compilerUsedErase(&J
))
2304 Used
.compilerUsedInsert(Target
);
2305 } else if (mayHaveOtherReferences(J
, Used
))
2308 // Delete the alias.
2310 ++NumAliasesRemoved
;
2314 Used
.syncVariablesAndSets();
2320 FindCXAAtExit(Module
&M
, function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
) {
2321 // Hack to get a default TLI before we have actual Function.
2322 auto FuncIter
= M
.begin();
2323 if (FuncIter
== M
.end())
2325 auto *TLI
= &GetTLI(*FuncIter
);
2327 LibFunc F
= LibFunc_cxa_atexit
;
2331 Function
*Fn
= M
.getFunction(TLI
->getName(F
));
2335 // Now get the actual TLI for Fn.
2338 // Make sure that the function has the correct prototype.
2339 if (!TLI
->getLibFunc(*Fn
, F
) || F
!= LibFunc_cxa_atexit
)
2345 /// Returns whether the given function is an empty C++ destructor and can
2346 /// therefore be eliminated.
2347 /// Note that we assume that other optimization passes have already simplified
2348 /// the code so we simply check for 'ret'.
2349 static bool cxxDtorIsEmpty(const Function
&Fn
) {
2350 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2351 // nounwind, but that doesn't seem worth doing.
2352 if (Fn
.isDeclaration())
2355 for (const auto &I
: Fn
.getEntryBlock()) {
2356 if (I
.isDebugOrPseudoInst())
2358 if (isa
<ReturnInst
>(I
))
2365 static bool OptimizeEmptyGlobalCXXDtors(Function
*CXAAtExitFn
) {
2366 /// Itanium C++ ABI p3.3.5:
2368 /// After constructing a global (or local static) object, that will require
2369 /// destruction on exit, a termination function is registered as follows:
2371 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2373 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2374 /// call f(p) when DSO d is unloaded, before all such termination calls
2375 /// registered before this one. It returns zero if registration is
2376 /// successful, nonzero on failure.
2378 // This pass will look for calls to __cxa_atexit where the function is trivial
2380 bool Changed
= false;
2382 for (User
*U
: llvm::make_early_inc_range(CXAAtExitFn
->users())) {
2383 // We're only interested in calls. Theoretically, we could handle invoke
2384 // instructions as well, but neither llvm-gcc nor clang generate invokes
2386 CallInst
*CI
= dyn_cast
<CallInst
>(U
);
2391 dyn_cast
<Function
>(CI
->getArgOperand(0)->stripPointerCasts());
2392 if (!DtorFn
|| !cxxDtorIsEmpty(*DtorFn
))
2395 // Just remove the call.
2396 CI
->replaceAllUsesWith(Constant::getNullValue(CI
->getType()));
2397 CI
->eraseFromParent();
2399 ++NumCXXDtorsRemoved
;
2408 optimizeGlobalsInModule(Module
&M
, const DataLayout
&DL
,
2409 function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
,
2410 function_ref
<TargetTransformInfo
&(Function
&)> GetTTI
,
2411 function_ref
<BlockFrequencyInfo
&(Function
&)> GetBFI
,
2412 function_ref
<DominatorTree
&(Function
&)> LookupDomTree
,
2413 function_ref
<void(Function
&F
)> ChangedCFGCallback
,
2414 function_ref
<void(Function
&F
)> DeleteFnCallback
) {
2415 SmallPtrSet
<const Comdat
*, 8> NotDiscardableComdats
;
2416 bool Changed
= false;
2417 bool LocalChange
= true;
2418 std::optional
<uint32_t> FirstNotFullyEvaluatedPriority
;
2420 while (LocalChange
) {
2421 LocalChange
= false;
2423 NotDiscardableComdats
.clear();
2424 for (const GlobalVariable
&GV
: M
.globals())
2425 if (const Comdat
*C
= GV
.getComdat())
2426 if (!GV
.isDiscardableIfUnused() || !GV
.use_empty())
2427 NotDiscardableComdats
.insert(C
);
2428 for (Function
&F
: M
)
2429 if (const Comdat
*C
= F
.getComdat())
2430 if (!F
.isDefTriviallyDead())
2431 NotDiscardableComdats
.insert(C
);
2432 for (GlobalAlias
&GA
: M
.aliases())
2433 if (const Comdat
*C
= GA
.getComdat())
2434 if (!GA
.isDiscardableIfUnused() || !GA
.use_empty())
2435 NotDiscardableComdats
.insert(C
);
2437 // Delete functions that are trivially dead, ccc -> fastcc
2438 LocalChange
|= OptimizeFunctions(M
, GetTLI
, GetTTI
, GetBFI
, LookupDomTree
,
2439 NotDiscardableComdats
, ChangedCFGCallback
,
2442 // Optimize global_ctors list.
2444 optimizeGlobalCtorsList(M
, [&](uint32_t Priority
, Function
*F
) {
2445 if (FirstNotFullyEvaluatedPriority
&&
2446 *FirstNotFullyEvaluatedPriority
!= Priority
)
2448 bool Evaluated
= EvaluateStaticConstructor(F
, DL
, &GetTLI(*F
));
2450 FirstNotFullyEvaluatedPriority
= Priority
;
2454 // Optimize non-address-taken globals.
2455 LocalChange
|= OptimizeGlobalVars(M
, GetTTI
, GetTLI
, LookupDomTree
,
2456 NotDiscardableComdats
);
2458 // Resolve aliases, when possible.
2459 LocalChange
|= OptimizeGlobalAliases(M
, NotDiscardableComdats
);
2461 // Try to remove trivial global destructors if they are not removed
2463 Function
*CXAAtExitFn
= FindCXAAtExit(M
, GetTLI
);
2465 LocalChange
|= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn
);
2467 Changed
|= LocalChange
;
2470 // TODO: Move all global ctors functions to the end of the module for code
2476 PreservedAnalyses
GlobalOptPass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
2477 auto &DL
= M
.getDataLayout();
2479 AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
2480 auto LookupDomTree
= [&FAM
](Function
&F
) -> DominatorTree
&{
2481 return FAM
.getResult
<DominatorTreeAnalysis
>(F
);
2483 auto GetTLI
= [&FAM
](Function
&F
) -> TargetLibraryInfo
& {
2484 return FAM
.getResult
<TargetLibraryAnalysis
>(F
);
2486 auto GetTTI
= [&FAM
](Function
&F
) -> TargetTransformInfo
& {
2487 return FAM
.getResult
<TargetIRAnalysis
>(F
);
2490 auto GetBFI
= [&FAM
](Function
&F
) -> BlockFrequencyInfo
& {
2491 return FAM
.getResult
<BlockFrequencyAnalysis
>(F
);
2493 auto ChangedCFGCallback
= [&FAM
](Function
&F
) {
2494 FAM
.invalidate(F
, PreservedAnalyses::none());
2496 auto DeleteFnCallback
= [&FAM
](Function
&F
) { FAM
.clear(F
, F
.getName()); };
2498 if (!optimizeGlobalsInModule(M
, DL
, GetTLI
, GetTTI
, GetBFI
, LookupDomTree
,
2499 ChangedCFGCallback
, DeleteFnCallback
))
2500 return PreservedAnalyses::all();
2502 PreservedAnalyses PA
= PreservedAnalyses::none();
2503 // We made sure to clear analyses for deleted functions.
2504 PA
.preserve
<FunctionAnalysisManagerModuleProxy
>();
2505 // The only place we modify the CFG is when calling
2506 // removeUnreachableBlocks(), but there we make sure to invalidate analyses
2507 // for modified functions.
2508 PA
.preserveSet
<CFGAnalyses
>();