[ARM] MVE sext costs
[llvm-complete.git] / lib / Transforms / ObjCARC / ObjCARCOpts.cpp
blob6653ff0bb91a4299fbd688712d6509ee2e2a9d2a
1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic
11 /// Reference Counting and is a system for managing reference counts for objects
12 /// in Objective C.
13 ///
14 /// The optimizations performed include elimination of redundant, partially
15 /// redundant, and inconsequential reference count operations, elimination of
16 /// redundant weak pointer operations, and numerous minor simplifications.
17 ///
18 /// WARNING: This file knows about certain library functions. It recognizes them
19 /// by name, and hardwires knowledge of their semantics.
20 ///
21 /// WARNING: This file knows about how certain Objective-C library functions are
22 /// used. Naive LLVM IR transformations which would otherwise be
23 /// behavior-preserving may break these assumptions.
25 //===----------------------------------------------------------------------===//
27 #include "ARCRuntimeEntryPoints.h"
28 #include "BlotMapVector.h"
29 #include "DependencyAnalysis.h"
30 #include "ObjCARC.h"
31 #include "ProvenanceAnalysis.h"
32 #include "PtrState.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/None.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Analysis/AliasAnalysis.h"
40 #include "llvm/Analysis/EHPersonalities.h"
41 #include "llvm/Analysis/ObjCARCAliasAnalysis.h"
42 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
43 #include "llvm/Analysis/ObjCARCInstKind.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/CFG.h"
46 #include "llvm/IR/CallSite.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalVariable.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/LLVMContext.h"
57 #include "llvm/IR/Metadata.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/User.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/Pass.h"
62 #include "llvm/Support/Casting.h"
63 #include "llvm/Support/Compiler.h"
64 #include "llvm/Support/Debug.h"
65 #include "llvm/Support/ErrorHandling.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include <cassert>
68 #include <iterator>
69 #include <utility>
71 using namespace llvm;
72 using namespace llvm::objcarc;
74 #define DEBUG_TYPE "objc-arc-opts"
76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
77 cl::Hidden,
78 cl::desc("Maximum number of ptr states the optimizer keeps track of"),
79 cl::init(4095));
81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
82 /// @{
84 /// This is similar to GetRCIdentityRoot but it stops as soon
85 /// as it finds a value with multiple uses.
86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
87 // ConstantData (like ConstantPointerNull and UndefValue) is used across
88 // modules. It's never a single-use value.
89 if (isa<ConstantData>(Arg))
90 return nullptr;
92 if (Arg->hasOneUse()) {
93 if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
94 return FindSingleUseIdentifiedObject(BC->getOperand(0));
95 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
96 if (GEP->hasAllZeroIndices())
97 return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
98 if (IsForwarding(GetBasicARCInstKind(Arg)))
99 return FindSingleUseIdentifiedObject(
100 cast<CallInst>(Arg)->getArgOperand(0));
101 if (!IsObjCIdentifiedObject(Arg))
102 return nullptr;
103 return Arg;
106 // If we found an identifiable object but it has multiple uses, but they are
107 // trivial uses, we can still consider this to be a single-use value.
108 if (IsObjCIdentifiedObject(Arg)) {
109 for (const User *U : Arg->users())
110 if (!U->use_empty() || GetRCIdentityRoot(U) != Arg)
111 return nullptr;
113 return Arg;
116 return nullptr;
119 /// @}
121 /// \defgroup ARCOpt ARC Optimization.
122 /// @{
124 // TODO: On code like this:
126 // objc_retain(%x)
127 // stuff_that_cannot_release()
128 // objc_autorelease(%x)
129 // stuff_that_cannot_release()
130 // objc_retain(%x)
131 // stuff_that_cannot_release()
132 // objc_autorelease(%x)
134 // The second retain and autorelease can be deleted.
136 // TODO: It should be possible to delete
137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop
138 // pairs if nothing is actually autoreleased between them. Also, autorelease
139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
140 // after inlining) can be turned into plain release calls.
142 // TODO: Critical-edge splitting. If the optimial insertion point is
143 // a critical edge, the current algorithm has to fail, because it doesn't
144 // know how to split edges. It should be possible to make the optimizer
145 // think in terms of edges, rather than blocks, and then split critical
146 // edges on demand.
148 // TODO: OptimizeSequences could generalized to be Interprocedural.
150 // TODO: Recognize that a bunch of other objc runtime calls have
151 // non-escaping arguments and non-releasing arguments, and may be
152 // non-autoreleasing.
154 // TODO: Sink autorelease calls as far as possible. Unfortunately we
155 // usually can't sink them past other calls, which would be the main
156 // case where it would be useful.
158 // TODO: The pointer returned from objc_loadWeakRetained is retained.
160 // TODO: Delete release+retain pairs (rare).
162 STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
165 STATISTIC(NumRets, "Number of return value forwarding "
166 "retain+autoreleases eliminated");
167 STATISTIC(NumRRs, "Number of retain+release paths eliminated");
168 STATISTIC(NumPeeps, "Number of calls peephole-optimized");
169 #ifndef NDEBUG
170 STATISTIC(NumRetainsBeforeOpt,
171 "Number of retains before optimization");
172 STATISTIC(NumReleasesBeforeOpt,
173 "Number of releases before optimization");
174 STATISTIC(NumRetainsAfterOpt,
175 "Number of retains after optimization");
176 STATISTIC(NumReleasesAfterOpt,
177 "Number of releases after optimization");
178 #endif
180 namespace {
182 /// Per-BasicBlock state.
183 class BBState {
184 /// The number of unique control paths from the entry which can reach this
185 /// block.
186 unsigned TopDownPathCount = 0;
188 /// The number of unique control paths to exits from this block.
189 unsigned BottomUpPathCount = 0;
191 /// The top-down traversal uses this to record information known about a
192 /// pointer at the bottom of each block.
193 BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
195 /// The bottom-up traversal uses this to record information known about a
196 /// pointer at the top of each block.
197 BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
199 /// Effective predecessors of the current block ignoring ignorable edges and
200 /// ignored backedges.
201 SmallVector<BasicBlock *, 2> Preds;
203 /// Effective successors of the current block ignoring ignorable edges and
204 /// ignored backedges.
205 SmallVector<BasicBlock *, 2> Succs;
207 public:
208 static const unsigned OverflowOccurredValue;
210 BBState() = default;
212 using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator;
213 using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator;
215 top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
216 top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
217 const_top_down_ptr_iterator top_down_ptr_begin() const {
218 return PerPtrTopDown.begin();
220 const_top_down_ptr_iterator top_down_ptr_end() const {
221 return PerPtrTopDown.end();
223 bool hasTopDownPtrs() const {
224 return !PerPtrTopDown.empty();
227 unsigned top_down_ptr_list_size() const {
228 return std::distance(top_down_ptr_begin(), top_down_ptr_end());
231 using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
232 using const_bottom_up_ptr_iterator =
233 decltype(PerPtrBottomUp)::const_iterator;
235 bottom_up_ptr_iterator bottom_up_ptr_begin() {
236 return PerPtrBottomUp.begin();
238 bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
239 const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
240 return PerPtrBottomUp.begin();
242 const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
243 return PerPtrBottomUp.end();
245 bool hasBottomUpPtrs() const {
246 return !PerPtrBottomUp.empty();
249 unsigned bottom_up_ptr_list_size() const {
250 return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
253 /// Mark this block as being an entry block, which has one path from the
254 /// entry by definition.
255 void SetAsEntry() { TopDownPathCount = 1; }
257 /// Mark this block as being an exit block, which has one path to an exit by
258 /// definition.
259 void SetAsExit() { BottomUpPathCount = 1; }
261 /// Attempt to find the PtrState object describing the top down state for
262 /// pointer Arg. Return a new initialized PtrState describing the top down
263 /// state for Arg if we do not find one.
264 TopDownPtrState &getPtrTopDownState(const Value *Arg) {
265 return PerPtrTopDown[Arg];
268 /// Attempt to find the PtrState object describing the bottom up state for
269 /// pointer Arg. Return a new initialized PtrState describing the bottom up
270 /// state for Arg if we do not find one.
271 BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
272 return PerPtrBottomUp[Arg];
275 /// Attempt to find the PtrState object describing the bottom up state for
276 /// pointer Arg.
277 bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
278 return PerPtrBottomUp.find(Arg);
281 void clearBottomUpPointers() {
282 PerPtrBottomUp.clear();
285 void clearTopDownPointers() {
286 PerPtrTopDown.clear();
289 void InitFromPred(const BBState &Other);
290 void InitFromSucc(const BBState &Other);
291 void MergePred(const BBState &Other);
292 void MergeSucc(const BBState &Other);
294 /// Compute the number of possible unique paths from an entry to an exit
295 /// which pass through this block. This is only valid after both the
296 /// top-down and bottom-up traversals are complete.
298 /// Returns true if overflow occurred. Returns false if overflow did not
299 /// occur.
300 bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
301 if (TopDownPathCount == OverflowOccurredValue ||
302 BottomUpPathCount == OverflowOccurredValue)
303 return true;
304 unsigned long long Product =
305 (unsigned long long)TopDownPathCount*BottomUpPathCount;
306 // Overflow occurred if any of the upper bits of Product are set or if all
307 // the lower bits of Product are all set.
308 return (Product >> 32) ||
309 ((PathCount = Product) == OverflowOccurredValue);
312 // Specialized CFG utilities.
313 using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
315 edge_iterator pred_begin() const { return Preds.begin(); }
316 edge_iterator pred_end() const { return Preds.end(); }
317 edge_iterator succ_begin() const { return Succs.begin(); }
318 edge_iterator succ_end() const { return Succs.end(); }
320 void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
321 void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
323 bool isExit() const { return Succs.empty(); }
326 } // end anonymous namespace
328 const unsigned BBState::OverflowOccurredValue = 0xffffffff;
330 namespace llvm {
332 raw_ostream &operator<<(raw_ostream &OS,
333 BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
335 } // end namespace llvm
337 void BBState::InitFromPred(const BBState &Other) {
338 PerPtrTopDown = Other.PerPtrTopDown;
339 TopDownPathCount = Other.TopDownPathCount;
342 void BBState::InitFromSucc(const BBState &Other) {
343 PerPtrBottomUp = Other.PerPtrBottomUp;
344 BottomUpPathCount = Other.BottomUpPathCount;
347 /// The top-down traversal uses this to merge information about predecessors to
348 /// form the initial state for a new block.
349 void BBState::MergePred(const BBState &Other) {
350 if (TopDownPathCount == OverflowOccurredValue)
351 return;
353 // Other.TopDownPathCount can be 0, in which case it is either dead or a
354 // loop backedge. Loop backedges are special.
355 TopDownPathCount += Other.TopDownPathCount;
357 // In order to be consistent, we clear the top down pointers when by adding
358 // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
359 // has not occurred.
360 if (TopDownPathCount == OverflowOccurredValue) {
361 clearTopDownPointers();
362 return;
365 // Check for overflow. If we have overflow, fall back to conservative
366 // behavior.
367 if (TopDownPathCount < Other.TopDownPathCount) {
368 TopDownPathCount = OverflowOccurredValue;
369 clearTopDownPointers();
370 return;
373 // For each entry in the other set, if our set has an entry with the same key,
374 // merge the entries. Otherwise, copy the entry and merge it with an empty
375 // entry.
376 for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
377 MI != ME; ++MI) {
378 auto Pair = PerPtrTopDown.insert(*MI);
379 Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
380 /*TopDown=*/true);
383 // For each entry in our set, if the other set doesn't have an entry with the
384 // same key, force it to merge with an empty entry.
385 for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
386 if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
387 MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
390 /// The bottom-up traversal uses this to merge information about successors to
391 /// form the initial state for a new block.
392 void BBState::MergeSucc(const BBState &Other) {
393 if (BottomUpPathCount == OverflowOccurredValue)
394 return;
396 // Other.BottomUpPathCount can be 0, in which case it is either dead or a
397 // loop backedge. Loop backedges are special.
398 BottomUpPathCount += Other.BottomUpPathCount;
400 // In order to be consistent, we clear the top down pointers when by adding
401 // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
402 // has not occurred.
403 if (BottomUpPathCount == OverflowOccurredValue) {
404 clearBottomUpPointers();
405 return;
408 // Check for overflow. If we have overflow, fall back to conservative
409 // behavior.
410 if (BottomUpPathCount < Other.BottomUpPathCount) {
411 BottomUpPathCount = OverflowOccurredValue;
412 clearBottomUpPointers();
413 return;
416 // For each entry in the other set, if our set has an entry with the
417 // same key, merge the entries. Otherwise, copy the entry and merge
418 // it with an empty entry.
419 for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
420 MI != ME; ++MI) {
421 auto Pair = PerPtrBottomUp.insert(*MI);
422 Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
423 /*TopDown=*/false);
426 // For each entry in our set, if the other set doesn't have an entry
427 // with the same key, force it to merge with an empty entry.
428 for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
429 ++MI)
430 if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
431 MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
435 // Dump the pointers we are tracking.
436 OS << " TopDown State:\n";
437 if (!BBInfo.hasTopDownPtrs()) {
438 LLVM_DEBUG(dbgs() << " NONE!\n");
439 } else {
440 for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
441 I != E; ++I) {
442 const PtrState &P = I->second;
443 OS << " Ptr: " << *I->first
444 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false")
445 << "\n ImpreciseRelease: "
446 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
447 << " HasCFGHazards: "
448 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
449 << " KnownPositive: "
450 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
451 << " Seq: "
452 << P.GetSeq() << "\n";
456 OS << " BottomUp State:\n";
457 if (!BBInfo.hasBottomUpPtrs()) {
458 LLVM_DEBUG(dbgs() << " NONE!\n");
459 } else {
460 for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
461 I != E; ++I) {
462 const PtrState &P = I->second;
463 OS << " Ptr: " << *I->first
464 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false")
465 << "\n ImpreciseRelease: "
466 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
467 << " HasCFGHazards: "
468 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
469 << " KnownPositive: "
470 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
471 << " Seq: "
472 << P.GetSeq() << "\n";
476 return OS;
479 namespace {
481 /// The main ARC optimization pass.
482 class ObjCARCOpt : public FunctionPass {
483 bool Changed;
484 ProvenanceAnalysis PA;
486 /// A cache of references to runtime entry point constants.
487 ARCRuntimeEntryPoints EP;
489 /// A cache of MDKinds that can be passed into other functions to propagate
490 /// MDKind identifiers.
491 ARCMDKindCache MDKindCache;
493 /// A flag indicating whether this optimization pass should run.
494 bool Run;
496 /// A flag indicating whether the optimization that removes or moves
497 /// retain/release pairs should be performed.
498 bool DisableRetainReleasePairing = false;
500 /// Flags which determine whether each of the interesting runtime functions
501 /// is in fact used in the current function.
502 unsigned UsedInThisFunction;
504 bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
505 void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
506 ARCInstKind &Class);
507 void OptimizeIndividualCalls(Function &F);
509 void CheckForCFGHazards(const BasicBlock *BB,
510 DenseMap<const BasicBlock *, BBState> &BBStates,
511 BBState &MyStates) const;
512 bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
513 BlotMapVector<Value *, RRInfo> &Retains,
514 BBState &MyStates);
515 bool VisitBottomUp(BasicBlock *BB,
516 DenseMap<const BasicBlock *, BBState> &BBStates,
517 BlotMapVector<Value *, RRInfo> &Retains);
518 bool VisitInstructionTopDown(Instruction *Inst,
519 DenseMap<Value *, RRInfo> &Releases,
520 BBState &MyStates);
521 bool VisitTopDown(BasicBlock *BB,
522 DenseMap<const BasicBlock *, BBState> &BBStates,
523 DenseMap<Value *, RRInfo> &Releases);
524 bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
525 BlotMapVector<Value *, RRInfo> &Retains,
526 DenseMap<Value *, RRInfo> &Releases);
528 void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
529 BlotMapVector<Value *, RRInfo> &Retains,
530 DenseMap<Value *, RRInfo> &Releases,
531 SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
533 bool
534 PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
535 BlotMapVector<Value *, RRInfo> &Retains,
536 DenseMap<Value *, RRInfo> &Releases, Module *M,
537 Instruction * Retain,
538 SmallVectorImpl<Instruction *> &DeadInsts,
539 RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
540 Value *Arg, bool KnownSafe,
541 bool &AnyPairsCompletelyEliminated);
543 bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
544 BlotMapVector<Value *, RRInfo> &Retains,
545 DenseMap<Value *, RRInfo> &Releases, Module *M);
547 void OptimizeWeakCalls(Function &F);
549 bool OptimizeSequences(Function &F);
551 void OptimizeReturns(Function &F);
553 #ifndef NDEBUG
554 void GatherStatistics(Function &F, bool AfterOptimization = false);
555 #endif
557 void getAnalysisUsage(AnalysisUsage &AU) const override;
558 bool doInitialization(Module &M) override;
559 bool runOnFunction(Function &F) override;
560 void releaseMemory() override;
562 public:
563 static char ID;
565 ObjCARCOpt() : FunctionPass(ID) {
566 initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
570 } // end anonymous namespace
572 char ObjCARCOpt::ID = 0;
574 INITIALIZE_PASS_BEGIN(ObjCARCOpt,
575 "objc-arc", "ObjC ARC optimization", false, false)
576 INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
577 INITIALIZE_PASS_END(ObjCARCOpt,
578 "objc-arc", "ObjC ARC optimization", false, false)
580 Pass *llvm::createObjCARCOptPass() {
581 return new ObjCARCOpt();
584 void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
585 AU.addRequired<ObjCARCAAWrapperPass>();
586 AU.addRequired<AAResultsWrapperPass>();
587 // ARC optimization doesn't currently split critical edges.
588 AU.setPreservesCFG();
591 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
592 /// not a return value. Or, if it can be paired with an
593 /// objc_autoreleaseReturnValue, delete the pair and return true.
594 bool
595 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
596 // Check for the argument being from an immediately preceding call or invoke.
597 const Value *Arg = GetArgRCIdentityRoot(RetainRV);
598 ImmutableCallSite CS(Arg);
599 if (const Instruction *Call = CS.getInstruction()) {
600 if (Call->getParent() == RetainRV->getParent()) {
601 BasicBlock::const_iterator I(Call);
602 ++I;
603 while (IsNoopInstruction(&*I))
604 ++I;
605 if (&*I == RetainRV)
606 return false;
607 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
608 BasicBlock *RetainRVParent = RetainRV->getParent();
609 if (II->getNormalDest() == RetainRVParent) {
610 BasicBlock::const_iterator I = RetainRVParent->begin();
611 while (IsNoopInstruction(&*I))
612 ++I;
613 if (&*I == RetainRV)
614 return false;
619 // Track PHIs which are equivalent to our Arg.
620 SmallDenseSet<const Value*, 2> EquivalentArgs;
621 EquivalentArgs.insert(Arg);
623 // Add PHIs that are equivalent to Arg to ArgUsers.
624 if (const PHINode *PN = dyn_cast<PHINode>(Arg)) {
625 SmallVector<const Value *, 2> ArgUsers;
626 getEquivalentPHIs(*PN, ArgUsers);
627 EquivalentArgs.insert(ArgUsers.begin(), ArgUsers.end());
630 // Check for being preceded by an objc_autoreleaseReturnValue on the same
631 // pointer. In this case, we can delete the pair.
632 BasicBlock::iterator I = RetainRV->getIterator(),
633 Begin = RetainRV->getParent()->begin();
634 if (I != Begin) {
636 --I;
637 while (I != Begin && IsNoopInstruction(&*I));
638 if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
639 EquivalentArgs.count(GetArgRCIdentityRoot(&*I))) {
640 Changed = true;
641 ++NumPeeps;
643 LLVM_DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
644 << "Erasing " << *RetainRV << "\n");
646 EraseInstruction(&*I);
647 EraseInstruction(RetainRV);
648 return true;
652 // Turn it to a plain objc_retain.
653 Changed = true;
654 ++NumPeeps;
656 LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
657 "objc_retain since the operand is not a return value.\n"
658 "Old = "
659 << *RetainRV << "\n");
661 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
662 cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
664 LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
666 return false;
669 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
670 /// used as a return value.
671 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
672 Instruction *AutoreleaseRV,
673 ARCInstKind &Class) {
674 // Check for a return of the pointer value.
675 const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
677 // If the argument is ConstantPointerNull or UndefValue, its other users
678 // aren't actually interesting to look at.
679 if (isa<ConstantData>(Ptr))
680 return;
682 SmallVector<const Value *, 2> Users;
683 Users.push_back(Ptr);
685 // Add PHIs that are equivalent to Ptr to Users.
686 if (const PHINode *PN = dyn_cast<PHINode>(Ptr))
687 getEquivalentPHIs(*PN, Users);
689 do {
690 Ptr = Users.pop_back_val();
691 for (const User *U : Ptr->users()) {
692 if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV)
693 return;
694 if (isa<BitCastInst>(U))
695 Users.push_back(U);
697 } while (!Users.empty());
699 Changed = true;
700 ++NumPeeps;
702 LLVM_DEBUG(
703 dbgs() << "Transforming objc_autoreleaseReturnValue => "
704 "objc_autorelease since its operand is not used as a return "
705 "value.\n"
706 "Old = "
707 << *AutoreleaseRV << "\n");
709 CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
710 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
711 AutoreleaseRVCI->setCalledFunction(NewDecl);
712 AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
713 Class = ARCInstKind::Autorelease;
715 LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
718 namespace {
719 Instruction *
720 CloneCallInstForBB(CallInst &CI, BasicBlock &BB,
721 const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
722 SmallVector<OperandBundleDef, 1> OpBundles;
723 for (unsigned I = 0, E = CI.getNumOperandBundles(); I != E; ++I) {
724 auto Bundle = CI.getOperandBundleAt(I);
725 // Funclets will be reassociated in the future.
726 if (Bundle.getTagID() == LLVMContext::OB_funclet)
727 continue;
728 OpBundles.emplace_back(Bundle);
731 if (!BlockColors.empty()) {
732 const ColorVector &CV = BlockColors.find(&BB)->second;
733 assert(CV.size() == 1 && "non-unique color for block!");
734 Instruction *EHPad = CV.front()->getFirstNonPHI();
735 if (EHPad->isEHPad())
736 OpBundles.emplace_back("funclet", EHPad);
739 return CallInst::Create(&CI, OpBundles);
743 /// Visit each call, one at a time, and make simplifications without doing any
744 /// additional analysis.
745 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
746 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
747 // Reset all the flags in preparation for recomputing them.
748 UsedInThisFunction = 0;
750 DenseMap<BasicBlock *, ColorVector> BlockColors;
751 if (F.hasPersonalityFn() &&
752 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
753 BlockColors = colorEHFunclets(F);
755 // Visit all objc_* calls in F.
756 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
757 Instruction *Inst = &*I++;
759 ARCInstKind Class = GetBasicARCInstKind(Inst);
761 LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
763 // Some of the ARC calls can be deleted if their arguments are global
764 // variables that are inert in ARC.
765 if (IsNoopOnGlobal(Class)) {
766 Value *Opnd = Inst->getOperand(0);
767 if (auto *GV = dyn_cast<GlobalVariable>(Opnd->stripPointerCasts()))
768 if (GV->hasAttribute("objc_arc_inert")) {
769 if (!Inst->getType()->isVoidTy())
770 Inst->replaceAllUsesWith(Opnd);
771 Inst->eraseFromParent();
772 continue;
776 switch (Class) {
777 default: break;
779 // Delete no-op casts. These function calls have special semantics, but
780 // the semantics are entirely implemented via lowering in the front-end,
781 // so by the time they reach the optimizer, they are just no-op calls
782 // which return their argument.
784 // There are gray areas here, as the ability to cast reference-counted
785 // pointers to raw void* and back allows code to break ARC assumptions,
786 // however these are currently considered to be unimportant.
787 case ARCInstKind::NoopCast:
788 Changed = true;
789 ++NumNoops;
790 LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
791 EraseInstruction(Inst);
792 continue;
794 // If the pointer-to-weak-pointer is null, it's undefined behavior.
795 case ARCInstKind::StoreWeak:
796 case ARCInstKind::LoadWeak:
797 case ARCInstKind::LoadWeakRetained:
798 case ARCInstKind::InitWeak:
799 case ARCInstKind::DestroyWeak: {
800 CallInst *CI = cast<CallInst>(Inst);
801 if (IsNullOrUndef(CI->getArgOperand(0))) {
802 Changed = true;
803 Type *Ty = CI->getArgOperand(0)->getType();
804 new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
805 Constant::getNullValue(Ty),
806 CI);
807 Value *NewValue = UndefValue::get(CI->getType());
808 LLVM_DEBUG(
809 dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
810 "\nOld = "
811 << *CI << "\nNew = " << *NewValue << "\n");
812 CI->replaceAllUsesWith(NewValue);
813 CI->eraseFromParent();
814 continue;
816 break;
818 case ARCInstKind::CopyWeak:
819 case ARCInstKind::MoveWeak: {
820 CallInst *CI = cast<CallInst>(Inst);
821 if (IsNullOrUndef(CI->getArgOperand(0)) ||
822 IsNullOrUndef(CI->getArgOperand(1))) {
823 Changed = true;
824 Type *Ty = CI->getArgOperand(0)->getType();
825 new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
826 Constant::getNullValue(Ty),
827 CI);
829 Value *NewValue = UndefValue::get(CI->getType());
830 LLVM_DEBUG(
831 dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
832 "\nOld = "
833 << *CI << "\nNew = " << *NewValue << "\n");
835 CI->replaceAllUsesWith(NewValue);
836 CI->eraseFromParent();
837 continue;
839 break;
841 case ARCInstKind::RetainRV:
842 if (OptimizeRetainRVCall(F, Inst))
843 continue;
844 break;
845 case ARCInstKind::AutoreleaseRV:
846 OptimizeAutoreleaseRVCall(F, Inst, Class);
847 break;
850 // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
851 if (IsAutorelease(Class) && Inst->use_empty()) {
852 CallInst *Call = cast<CallInst>(Inst);
853 const Value *Arg = Call->getArgOperand(0);
854 Arg = FindSingleUseIdentifiedObject(Arg);
855 if (Arg) {
856 Changed = true;
857 ++NumAutoreleases;
859 // Create the declaration lazily.
860 LLVMContext &C = Inst->getContext();
862 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
863 CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
864 Call);
865 NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
866 MDNode::get(C, None));
868 LLVM_DEBUG(
869 dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
870 "since x is otherwise unused.\nOld: "
871 << *Call << "\nNew: " << *NewCall << "\n");
873 EraseInstruction(Call);
874 Inst = NewCall;
875 Class = ARCInstKind::Release;
879 // For functions which can never be passed stack arguments, add
880 // a tail keyword.
881 if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
882 Changed = true;
883 LLVM_DEBUG(
884 dbgs() << "Adding tail keyword to function since it can never be "
885 "passed stack args: "
886 << *Inst << "\n");
887 cast<CallInst>(Inst)->setTailCall();
890 // Ensure that functions that can never have a "tail" keyword due to the
891 // semantics of ARC truly do not do so.
892 if (IsNeverTail(Class)) {
893 Changed = true;
894 LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst
895 << "\n");
896 cast<CallInst>(Inst)->setTailCall(false);
899 // Set nounwind as needed.
900 if (IsNoThrow(Class)) {
901 Changed = true;
902 LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: "
903 << *Inst << "\n");
904 cast<CallInst>(Inst)->setDoesNotThrow();
907 if (!IsNoopOnNull(Class)) {
908 UsedInThisFunction |= 1 << unsigned(Class);
909 continue;
912 const Value *Arg = GetArgRCIdentityRoot(Inst);
914 // ARC calls with null are no-ops. Delete them.
915 if (IsNullOrUndef(Arg)) {
916 Changed = true;
917 ++NumNoops;
918 LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst
919 << "\n");
920 EraseInstruction(Inst);
921 continue;
924 // Keep track of which of retain, release, autorelease, and retain_block
925 // are actually present in this function.
926 UsedInThisFunction |= 1 << unsigned(Class);
928 // If Arg is a PHI, and one or more incoming values to the
929 // PHI are null, and the call is control-equivalent to the PHI, and there
930 // are no relevant side effects between the PHI and the call, and the call
931 // is not a release that doesn't have the clang.imprecise_release tag, the
932 // call could be pushed up to just those paths with non-null incoming
933 // values. For now, don't bother splitting critical edges for this.
934 if (Class == ARCInstKind::Release &&
935 !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease)))
936 continue;
938 SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
939 Worklist.push_back(std::make_pair(Inst, Arg));
940 do {
941 std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
942 Inst = Pair.first;
943 Arg = Pair.second;
945 const PHINode *PN = dyn_cast<PHINode>(Arg);
946 if (!PN) continue;
948 // Determine if the PHI has any null operands, or any incoming
949 // critical edges.
950 bool HasNull = false;
951 bool HasCriticalEdges = false;
952 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
953 Value *Incoming =
954 GetRCIdentityRoot(PN->getIncomingValue(i));
955 if (IsNullOrUndef(Incoming))
956 HasNull = true;
957 else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
958 1) {
959 HasCriticalEdges = true;
960 break;
963 // If we have null operands and no critical edges, optimize.
964 if (!HasCriticalEdges && HasNull) {
965 SmallPtrSet<Instruction *, 4> DependingInstructions;
966 SmallPtrSet<const BasicBlock *, 4> Visited;
968 // Check that there is nothing that cares about the reference
969 // count between the call and the phi.
970 switch (Class) {
971 case ARCInstKind::Retain:
972 case ARCInstKind::RetainBlock:
973 // These can always be moved up.
974 break;
975 case ARCInstKind::Release:
976 // These can't be moved across things that care about the retain
977 // count.
978 FindDependencies(NeedsPositiveRetainCount, Arg,
979 Inst->getParent(), Inst,
980 DependingInstructions, Visited, PA);
981 break;
982 case ARCInstKind::Autorelease:
983 // These can't be moved across autorelease pool scope boundaries.
984 FindDependencies(AutoreleasePoolBoundary, Arg,
985 Inst->getParent(), Inst,
986 DependingInstructions, Visited, PA);
987 break;
988 case ARCInstKind::ClaimRV:
989 case ARCInstKind::RetainRV:
990 case ARCInstKind::AutoreleaseRV:
991 // Don't move these; the RV optimization depends on the autoreleaseRV
992 // being tail called, and the retainRV being immediately after a call
993 // (which might still happen if we get lucky with codegen layout, but
994 // it's not worth taking the chance).
995 continue;
996 default:
997 llvm_unreachable("Invalid dependence flavor");
1000 if (DependingInstructions.size() == 1 &&
1001 *DependingInstructions.begin() == PN) {
1002 Changed = true;
1003 ++NumPartialNoops;
1004 // Clone the call into each predecessor that has a non-null value.
1005 CallInst *CInst = cast<CallInst>(Inst);
1006 Type *ParamTy = CInst->getArgOperand(0)->getType();
1007 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1008 Value *Incoming =
1009 GetRCIdentityRoot(PN->getIncomingValue(i));
1010 if (!IsNullOrUndef(Incoming)) {
1011 Value *Op = PN->getIncomingValue(i);
1012 Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
1013 CallInst *Clone = cast<CallInst>(CloneCallInstForBB(
1014 *CInst, *InsertPos->getParent(), BlockColors));
1015 if (Op->getType() != ParamTy)
1016 Op = new BitCastInst(Op, ParamTy, "", InsertPos);
1017 Clone->setArgOperand(0, Op);
1018 Clone->insertBefore(InsertPos);
1020 LLVM_DEBUG(dbgs() << "Cloning " << *CInst
1021 << "\n"
1022 "And inserting clone at "
1023 << *InsertPos << "\n");
1024 Worklist.push_back(std::make_pair(Clone, Incoming));
1027 // Erase the original call.
1028 LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
1029 EraseInstruction(CInst);
1030 continue;
1033 } while (!Worklist.empty());
1037 /// If we have a top down pointer in the S_Use state, make sure that there are
1038 /// no CFG hazards by checking the states of various bottom up pointers.
1039 static void CheckForUseCFGHazard(const Sequence SuccSSeq,
1040 const bool SuccSRRIKnownSafe,
1041 TopDownPtrState &S,
1042 bool &SomeSuccHasSame,
1043 bool &AllSuccsHaveSame,
1044 bool &NotAllSeqEqualButKnownSafe,
1045 bool &ShouldContinue) {
1046 switch (SuccSSeq) {
1047 case S_CanRelease: {
1048 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) {
1049 S.ClearSequenceProgress();
1050 break;
1052 S.SetCFGHazardAfflicted(true);
1053 ShouldContinue = true;
1054 break;
1056 case S_Use:
1057 SomeSuccHasSame = true;
1058 break;
1059 case S_Stop:
1060 case S_Release:
1061 case S_MovableRelease:
1062 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1063 AllSuccsHaveSame = false;
1064 else
1065 NotAllSeqEqualButKnownSafe = true;
1066 break;
1067 case S_Retain:
1068 llvm_unreachable("bottom-up pointer in retain state!");
1069 case S_None:
1070 llvm_unreachable("This should have been handled earlier.");
1074 /// If we have a Top Down pointer in the S_CanRelease state, make sure that
1075 /// there are no CFG hazards by checking the states of various bottom up
1076 /// pointers.
1077 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
1078 const bool SuccSRRIKnownSafe,
1079 TopDownPtrState &S,
1080 bool &SomeSuccHasSame,
1081 bool &AllSuccsHaveSame,
1082 bool &NotAllSeqEqualButKnownSafe) {
1083 switch (SuccSSeq) {
1084 case S_CanRelease:
1085 SomeSuccHasSame = true;
1086 break;
1087 case S_Stop:
1088 case S_Release:
1089 case S_MovableRelease:
1090 case S_Use:
1091 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1092 AllSuccsHaveSame = false;
1093 else
1094 NotAllSeqEqualButKnownSafe = true;
1095 break;
1096 case S_Retain:
1097 llvm_unreachable("bottom-up pointer in retain state!");
1098 case S_None:
1099 llvm_unreachable("This should have been handled earlier.");
1103 /// Check for critical edges, loop boundaries, irreducible control flow, or
1104 /// other CFG structures where moving code across the edge would result in it
1105 /// being executed more.
1106 void
1107 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
1108 DenseMap<const BasicBlock *, BBState> &BBStates,
1109 BBState &MyStates) const {
1110 // If any top-down local-use or possible-dec has a succ which is earlier in
1111 // the sequence, forget it.
1112 for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
1113 I != E; ++I) {
1114 TopDownPtrState &S = I->second;
1115 const Sequence Seq = I->second.GetSeq();
1117 // We only care about S_Retain, S_CanRelease, and S_Use.
1118 if (Seq == S_None)
1119 continue;
1121 // Make sure that if extra top down states are added in the future that this
1122 // code is updated to handle it.
1123 assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
1124 "Unknown top down sequence state.");
1126 const Value *Arg = I->first;
1127 bool SomeSuccHasSame = false;
1128 bool AllSuccsHaveSame = true;
1129 bool NotAllSeqEqualButKnownSafe = false;
1131 for (const BasicBlock *Succ : successors(BB)) {
1132 // If VisitBottomUp has pointer information for this successor, take
1133 // what we know about it.
1134 const DenseMap<const BasicBlock *, BBState>::iterator BBI =
1135 BBStates.find(Succ);
1136 assert(BBI != BBStates.end());
1137 const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
1138 const Sequence SuccSSeq = SuccS.GetSeq();
1140 // If bottom up, the pointer is in an S_None state, clear the sequence
1141 // progress since the sequence in the bottom up state finished
1142 // suggesting a mismatch in between retains/releases. This is true for
1143 // all three cases that we are handling here: S_Retain, S_Use, and
1144 // S_CanRelease.
1145 if (SuccSSeq == S_None) {
1146 S.ClearSequenceProgress();
1147 continue;
1150 // If we have S_Use or S_CanRelease, perform our check for cfg hazard
1151 // checks.
1152 const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe();
1154 // *NOTE* We do not use Seq from above here since we are allowing for
1155 // S.GetSeq() to change while we are visiting basic blocks.
1156 switch(S.GetSeq()) {
1157 case S_Use: {
1158 bool ShouldContinue = false;
1159 CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame,
1160 AllSuccsHaveSame, NotAllSeqEqualButKnownSafe,
1161 ShouldContinue);
1162 if (ShouldContinue)
1163 continue;
1164 break;
1166 case S_CanRelease:
1167 CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
1168 SomeSuccHasSame, AllSuccsHaveSame,
1169 NotAllSeqEqualButKnownSafe);
1170 break;
1171 case S_Retain:
1172 case S_None:
1173 case S_Stop:
1174 case S_Release:
1175 case S_MovableRelease:
1176 break;
1180 // If the state at the other end of any of the successor edges
1181 // matches the current state, require all edges to match. This
1182 // guards against loops in the middle of a sequence.
1183 if (SomeSuccHasSame && !AllSuccsHaveSame) {
1184 S.ClearSequenceProgress();
1185 } else if (NotAllSeqEqualButKnownSafe) {
1186 // If we would have cleared the state foregoing the fact that we are known
1187 // safe, stop code motion. This is because whether or not it is safe to
1188 // remove RR pairs via KnownSafe is an orthogonal concept to whether we
1189 // are allowed to perform code motion.
1190 S.SetCFGHazardAfflicted(true);
1195 bool ObjCARCOpt::VisitInstructionBottomUp(
1196 Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
1197 BBState &MyStates) {
1198 bool NestingDetected = false;
1199 ARCInstKind Class = GetARCInstKind(Inst);
1200 const Value *Arg = nullptr;
1202 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n");
1204 switch (Class) {
1205 case ARCInstKind::Release: {
1206 Arg = GetArgRCIdentityRoot(Inst);
1208 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1209 NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
1210 break;
1212 case ARCInstKind::RetainBlock:
1213 // In OptimizeIndividualCalls, we have strength reduced all optimizable
1214 // objc_retainBlocks to objc_retains. Thus at this point any
1215 // objc_retainBlocks that we see are not optimizable.
1216 break;
1217 case ARCInstKind::Retain:
1218 case ARCInstKind::RetainRV: {
1219 Arg = GetArgRCIdentityRoot(Inst);
1220 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1221 if (S.MatchWithRetain()) {
1222 // Don't do retain+release tracking for ARCInstKind::RetainRV, because
1223 // it's better to let it remain as the first instruction after a call.
1224 if (Class != ARCInstKind::RetainRV) {
1225 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n");
1226 Retains[Inst] = S.GetRRInfo();
1228 S.ClearSequenceProgress();
1230 // A retain moving bottom up can be a use.
1231 break;
1233 case ARCInstKind::AutoreleasepoolPop:
1234 // Conservatively, clear MyStates for all known pointers.
1235 MyStates.clearBottomUpPointers();
1236 return NestingDetected;
1237 case ARCInstKind::AutoreleasepoolPush:
1238 case ARCInstKind::None:
1239 // These are irrelevant.
1240 return NestingDetected;
1241 default:
1242 break;
1245 // Consider any other possible effects of this instruction on each
1246 // pointer being tracked.
1247 for (auto MI = MyStates.bottom_up_ptr_begin(),
1248 ME = MyStates.bottom_up_ptr_end();
1249 MI != ME; ++MI) {
1250 const Value *Ptr = MI->first;
1251 if (Ptr == Arg)
1252 continue; // Handled above.
1253 BottomUpPtrState &S = MI->second;
1255 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
1256 continue;
1258 S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
1261 return NestingDetected;
1264 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
1265 DenseMap<const BasicBlock *, BBState> &BBStates,
1266 BlotMapVector<Value *, RRInfo> &Retains) {
1267 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
1269 bool NestingDetected = false;
1270 BBState &MyStates = BBStates[BB];
1272 // Merge the states from each successor to compute the initial state
1273 // for the current block.
1274 BBState::edge_iterator SI(MyStates.succ_begin()),
1275 SE(MyStates.succ_end());
1276 if (SI != SE) {
1277 const BasicBlock *Succ = *SI;
1278 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
1279 assert(I != BBStates.end());
1280 MyStates.InitFromSucc(I->second);
1281 ++SI;
1282 for (; SI != SE; ++SI) {
1283 Succ = *SI;
1284 I = BBStates.find(Succ);
1285 assert(I != BBStates.end());
1286 MyStates.MergeSucc(I->second);
1290 LLVM_DEBUG(dbgs() << "Before:\n"
1291 << BBStates[BB] << "\n"
1292 << "Performing Dataflow:\n");
1294 // Visit all the instructions, bottom-up.
1295 for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
1296 Instruction *Inst = &*std::prev(I);
1298 // Invoke instructions are visited as part of their successors (below).
1299 if (isa<InvokeInst>(Inst))
1300 continue;
1302 LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n");
1304 NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
1306 // Bail out if the number of pointers being tracked becomes too large so
1307 // that this pass can complete in a reasonable amount of time.
1308 if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
1309 DisableRetainReleasePairing = true;
1310 return false;
1314 // If there's a predecessor with an invoke, visit the invoke as if it were
1315 // part of this block, since we can't insert code after an invoke in its own
1316 // block, and we don't want to split critical edges.
1317 for (BBState::edge_iterator PI(MyStates.pred_begin()),
1318 PE(MyStates.pred_end()); PI != PE; ++PI) {
1319 BasicBlock *Pred = *PI;
1320 if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
1321 NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
1324 LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
1326 return NestingDetected;
1329 bool
1330 ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
1331 DenseMap<Value *, RRInfo> &Releases,
1332 BBState &MyStates) {
1333 bool NestingDetected = false;
1334 ARCInstKind Class = GetARCInstKind(Inst);
1335 const Value *Arg = nullptr;
1337 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n");
1339 switch (Class) {
1340 case ARCInstKind::RetainBlock:
1341 // In OptimizeIndividualCalls, we have strength reduced all optimizable
1342 // objc_retainBlocks to objc_retains. Thus at this point any
1343 // objc_retainBlocks that we see are not optimizable. We need to break since
1344 // a retain can be a potential use.
1345 break;
1346 case ARCInstKind::Retain:
1347 case ARCInstKind::RetainRV: {
1348 Arg = GetArgRCIdentityRoot(Inst);
1349 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1350 NestingDetected |= S.InitTopDown(Class, Inst);
1351 // A retain can be a potential use; proceed to the generic checking
1352 // code below.
1353 break;
1355 case ARCInstKind::Release: {
1356 Arg = GetArgRCIdentityRoot(Inst);
1357 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1358 // Try to form a tentative pair in between this release instruction and the
1359 // top down pointers that we are tracking.
1360 if (S.MatchWithRelease(MDKindCache, Inst)) {
1361 // If we succeed, copy S's RRInfo into the Release -> {Retain Set
1362 // Map}. Then we clear S.
1363 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n");
1364 Releases[Inst] = S.GetRRInfo();
1365 S.ClearSequenceProgress();
1367 break;
1369 case ARCInstKind::AutoreleasepoolPop:
1370 // Conservatively, clear MyStates for all known pointers.
1371 MyStates.clearTopDownPointers();
1372 return false;
1373 case ARCInstKind::AutoreleasepoolPush:
1374 case ARCInstKind::None:
1375 // These can not be uses of
1376 return false;
1377 default:
1378 break;
1381 // Consider any other possible effects of this instruction on each
1382 // pointer being tracked.
1383 for (auto MI = MyStates.top_down_ptr_begin(),
1384 ME = MyStates.top_down_ptr_end();
1385 MI != ME; ++MI) {
1386 const Value *Ptr = MI->first;
1387 if (Ptr == Arg)
1388 continue; // Handled above.
1389 TopDownPtrState &S = MI->second;
1390 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
1391 continue;
1393 S.HandlePotentialUse(Inst, Ptr, PA, Class);
1396 return NestingDetected;
1399 bool
1400 ObjCARCOpt::VisitTopDown(BasicBlock *BB,
1401 DenseMap<const BasicBlock *, BBState> &BBStates,
1402 DenseMap<Value *, RRInfo> &Releases) {
1403 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
1404 bool NestingDetected = false;
1405 BBState &MyStates = BBStates[BB];
1407 // Merge the states from each predecessor to compute the initial state
1408 // for the current block.
1409 BBState::edge_iterator PI(MyStates.pred_begin()),
1410 PE(MyStates.pred_end());
1411 if (PI != PE) {
1412 const BasicBlock *Pred = *PI;
1413 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
1414 assert(I != BBStates.end());
1415 MyStates.InitFromPred(I->second);
1416 ++PI;
1417 for (; PI != PE; ++PI) {
1418 Pred = *PI;
1419 I = BBStates.find(Pred);
1420 assert(I != BBStates.end());
1421 MyStates.MergePred(I->second);
1425 LLVM_DEBUG(dbgs() << "Before:\n"
1426 << BBStates[BB] << "\n"
1427 << "Performing Dataflow:\n");
1429 // Visit all the instructions, top-down.
1430 for (Instruction &Inst : *BB) {
1431 LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n");
1433 NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
1435 // Bail out if the number of pointers being tracked becomes too large so
1436 // that this pass can complete in a reasonable amount of time.
1437 if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
1438 DisableRetainReleasePairing = true;
1439 return false;
1443 LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
1444 << BBStates[BB] << "\n\n");
1445 CheckForCFGHazards(BB, BBStates, MyStates);
1446 LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n");
1447 return NestingDetected;
1450 static void
1451 ComputePostOrders(Function &F,
1452 SmallVectorImpl<BasicBlock *> &PostOrder,
1453 SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
1454 unsigned NoObjCARCExceptionsMDKind,
1455 DenseMap<const BasicBlock *, BBState> &BBStates) {
1456 /// The visited set, for doing DFS walks.
1457 SmallPtrSet<BasicBlock *, 16> Visited;
1459 // Do DFS, computing the PostOrder.
1460 SmallPtrSet<BasicBlock *, 16> OnStack;
1461 SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
1463 // Functions always have exactly one entry block, and we don't have
1464 // any other block that we treat like an entry block.
1465 BasicBlock *EntryBB = &F.getEntryBlock();
1466 BBState &MyStates = BBStates[EntryBB];
1467 MyStates.SetAsEntry();
1468 Instruction *EntryTI = EntryBB->getTerminator();
1469 SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
1470 Visited.insert(EntryBB);
1471 OnStack.insert(EntryBB);
1472 do {
1473 dfs_next_succ:
1474 BasicBlock *CurrBB = SuccStack.back().first;
1475 succ_iterator SE(CurrBB->getTerminator(), false);
1477 while (SuccStack.back().second != SE) {
1478 BasicBlock *SuccBB = *SuccStack.back().second++;
1479 if (Visited.insert(SuccBB).second) {
1480 SuccStack.push_back(
1481 std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator())));
1482 BBStates[CurrBB].addSucc(SuccBB);
1483 BBState &SuccStates = BBStates[SuccBB];
1484 SuccStates.addPred(CurrBB);
1485 OnStack.insert(SuccBB);
1486 goto dfs_next_succ;
1489 if (!OnStack.count(SuccBB)) {
1490 BBStates[CurrBB].addSucc(SuccBB);
1491 BBStates[SuccBB].addPred(CurrBB);
1494 OnStack.erase(CurrBB);
1495 PostOrder.push_back(CurrBB);
1496 SuccStack.pop_back();
1497 } while (!SuccStack.empty());
1499 Visited.clear();
1501 // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
1502 // Functions may have many exits, and there also blocks which we treat
1503 // as exits due to ignored edges.
1504 SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
1505 for (BasicBlock &ExitBB : F) {
1506 BBState &MyStates = BBStates[&ExitBB];
1507 if (!MyStates.isExit())
1508 continue;
1510 MyStates.SetAsExit();
1512 PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
1513 Visited.insert(&ExitBB);
1514 while (!PredStack.empty()) {
1515 reverse_dfs_next_succ:
1516 BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
1517 while (PredStack.back().second != PE) {
1518 BasicBlock *BB = *PredStack.back().second++;
1519 if (Visited.insert(BB).second) {
1520 PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
1521 goto reverse_dfs_next_succ;
1524 ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
1529 // Visit the function both top-down and bottom-up.
1530 bool ObjCARCOpt::Visit(Function &F,
1531 DenseMap<const BasicBlock *, BBState> &BBStates,
1532 BlotMapVector<Value *, RRInfo> &Retains,
1533 DenseMap<Value *, RRInfo> &Releases) {
1534 // Use reverse-postorder traversals, because we magically know that loops
1535 // will be well behaved, i.e. they won't repeatedly call retain on a single
1536 // pointer without doing a release. We can't use the ReversePostOrderTraversal
1537 // class here because we want the reverse-CFG postorder to consider each
1538 // function exit point, and we want to ignore selected cycle edges.
1539 SmallVector<BasicBlock *, 16> PostOrder;
1540 SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
1541 ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
1542 MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
1543 BBStates);
1545 // Use reverse-postorder on the reverse CFG for bottom-up.
1546 bool BottomUpNestingDetected = false;
1547 for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
1548 BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
1549 if (DisableRetainReleasePairing)
1550 return false;
1553 // Use reverse-postorder for top-down.
1554 bool TopDownNestingDetected = false;
1555 for (BasicBlock *BB : llvm::reverse(PostOrder)) {
1556 TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
1557 if (DisableRetainReleasePairing)
1558 return false;
1561 return TopDownNestingDetected && BottomUpNestingDetected;
1564 /// Move the calls in RetainsToMove and ReleasesToMove.
1565 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
1566 RRInfo &ReleasesToMove,
1567 BlotMapVector<Value *, RRInfo> &Retains,
1568 DenseMap<Value *, RRInfo> &Releases,
1569 SmallVectorImpl<Instruction *> &DeadInsts,
1570 Module *M) {
1571 Type *ArgTy = Arg->getType();
1572 Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
1574 LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
1576 // Insert the new retain and release calls.
1577 for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
1578 Value *MyArg = ArgTy == ParamTy ? Arg :
1579 new BitCastInst(Arg, ParamTy, "", InsertPt);
1580 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
1581 CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
1582 Call->setDoesNotThrow();
1583 Call->setTailCall();
1585 LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call
1586 << "\n"
1587 "At insertion point: "
1588 << *InsertPt << "\n");
1590 for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
1591 Value *MyArg = ArgTy == ParamTy ? Arg :
1592 new BitCastInst(Arg, ParamTy, "", InsertPt);
1593 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
1594 CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
1595 // Attach a clang.imprecise_release metadata tag, if appropriate.
1596 if (MDNode *M = ReleasesToMove.ReleaseMetadata)
1597 Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
1598 Call->setDoesNotThrow();
1599 if (ReleasesToMove.IsTailCallRelease)
1600 Call->setTailCall();
1602 LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call
1603 << "\n"
1604 "At insertion point: "
1605 << *InsertPt << "\n");
1608 // Delete the original retain and release calls.
1609 for (Instruction *OrigRetain : RetainsToMove.Calls) {
1610 Retains.blot(OrigRetain);
1611 DeadInsts.push_back(OrigRetain);
1612 LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
1614 for (Instruction *OrigRelease : ReleasesToMove.Calls) {
1615 Releases.erase(OrigRelease);
1616 DeadInsts.push_back(OrigRelease);
1617 LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
1621 bool ObjCARCOpt::PairUpRetainsAndReleases(
1622 DenseMap<const BasicBlock *, BBState> &BBStates,
1623 BlotMapVector<Value *, RRInfo> &Retains,
1624 DenseMap<Value *, RRInfo> &Releases, Module *M,
1625 Instruction *Retain,
1626 SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
1627 RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
1628 bool &AnyPairsCompletelyEliminated) {
1629 // If a pair happens in a region where it is known that the reference count
1630 // is already incremented, we can similarly ignore possible decrements unless
1631 // we are dealing with a retainable object with multiple provenance sources.
1632 bool KnownSafeTD = true, KnownSafeBU = true;
1633 bool CFGHazardAfflicted = false;
1635 // Connect the dots between the top-down-collected RetainsToMove and
1636 // bottom-up-collected ReleasesToMove to form sets of related calls.
1637 // This is an iterative process so that we connect multiple releases
1638 // to multiple retains if needed.
1639 unsigned OldDelta = 0;
1640 unsigned NewDelta = 0;
1641 unsigned OldCount = 0;
1642 unsigned NewCount = 0;
1643 bool FirstRelease = true;
1644 for (SmallVector<Instruction *, 4> NewRetains{Retain};;) {
1645 SmallVector<Instruction *, 4> NewReleases;
1646 for (Instruction *NewRetain : NewRetains) {
1647 auto It = Retains.find(NewRetain);
1648 assert(It != Retains.end());
1649 const RRInfo &NewRetainRRI = It->second;
1650 KnownSafeTD &= NewRetainRRI.KnownSafe;
1651 CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted;
1652 for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
1653 auto Jt = Releases.find(NewRetainRelease);
1654 if (Jt == Releases.end())
1655 return false;
1656 const RRInfo &NewRetainReleaseRRI = Jt->second;
1658 // If the release does not have a reference to the retain as well,
1659 // something happened which is unaccounted for. Do not do anything.
1661 // This can happen if we catch an additive overflow during path count
1662 // merging.
1663 if (!NewRetainReleaseRRI.Calls.count(NewRetain))
1664 return false;
1666 if (ReleasesToMove.Calls.insert(NewRetainRelease).second) {
1667 // If we overflow when we compute the path count, don't remove/move
1668 // anything.
1669 const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
1670 unsigned PathCount = BBState::OverflowOccurredValue;
1671 if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1672 return false;
1673 assert(PathCount != BBState::OverflowOccurredValue &&
1674 "PathCount at this point can not be "
1675 "OverflowOccurredValue.");
1676 OldDelta -= PathCount;
1678 // Merge the ReleaseMetadata and IsTailCallRelease values.
1679 if (FirstRelease) {
1680 ReleasesToMove.ReleaseMetadata =
1681 NewRetainReleaseRRI.ReleaseMetadata;
1682 ReleasesToMove.IsTailCallRelease =
1683 NewRetainReleaseRRI.IsTailCallRelease;
1684 FirstRelease = false;
1685 } else {
1686 if (ReleasesToMove.ReleaseMetadata !=
1687 NewRetainReleaseRRI.ReleaseMetadata)
1688 ReleasesToMove.ReleaseMetadata = nullptr;
1689 if (ReleasesToMove.IsTailCallRelease !=
1690 NewRetainReleaseRRI.IsTailCallRelease)
1691 ReleasesToMove.IsTailCallRelease = false;
1694 // Collect the optimal insertion points.
1695 if (!KnownSafe)
1696 for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) {
1697 if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) {
1698 // If we overflow when we compute the path count, don't
1699 // remove/move anything.
1700 const BBState &RIPBBState = BBStates[RIP->getParent()];
1701 PathCount = BBState::OverflowOccurredValue;
1702 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1703 return false;
1704 assert(PathCount != BBState::OverflowOccurredValue &&
1705 "PathCount at this point can not be "
1706 "OverflowOccurredValue.");
1707 NewDelta -= PathCount;
1710 NewReleases.push_back(NewRetainRelease);
1714 NewRetains.clear();
1715 if (NewReleases.empty()) break;
1717 // Back the other way.
1718 for (Instruction *NewRelease : NewReleases) {
1719 auto It = Releases.find(NewRelease);
1720 assert(It != Releases.end());
1721 const RRInfo &NewReleaseRRI = It->second;
1722 KnownSafeBU &= NewReleaseRRI.KnownSafe;
1723 CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
1724 for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
1725 auto Jt = Retains.find(NewReleaseRetain);
1726 if (Jt == Retains.end())
1727 return false;
1728 const RRInfo &NewReleaseRetainRRI = Jt->second;
1730 // If the retain does not have a reference to the release as well,
1731 // something happened which is unaccounted for. Do not do anything.
1733 // This can happen if we catch an additive overflow during path count
1734 // merging.
1735 if (!NewReleaseRetainRRI.Calls.count(NewRelease))
1736 return false;
1738 if (RetainsToMove.Calls.insert(NewReleaseRetain).second) {
1739 // If we overflow when we compute the path count, don't remove/move
1740 // anything.
1741 const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
1742 unsigned PathCount = BBState::OverflowOccurredValue;
1743 if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1744 return false;
1745 assert(PathCount != BBState::OverflowOccurredValue &&
1746 "PathCount at this point can not be "
1747 "OverflowOccurredValue.");
1748 OldDelta += PathCount;
1749 OldCount += PathCount;
1751 // Collect the optimal insertion points.
1752 if (!KnownSafe)
1753 for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) {
1754 if (RetainsToMove.ReverseInsertPts.insert(RIP).second) {
1755 // If we overflow when we compute the path count, don't
1756 // remove/move anything.
1757 const BBState &RIPBBState = BBStates[RIP->getParent()];
1759 PathCount = BBState::OverflowOccurredValue;
1760 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1761 return false;
1762 assert(PathCount != BBState::OverflowOccurredValue &&
1763 "PathCount at this point can not be "
1764 "OverflowOccurredValue.");
1765 NewDelta += PathCount;
1766 NewCount += PathCount;
1769 NewRetains.push_back(NewReleaseRetain);
1773 if (NewRetains.empty()) break;
1776 // We can only remove pointers if we are known safe in both directions.
1777 bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
1778 if (UnconditionallySafe) {
1779 RetainsToMove.ReverseInsertPts.clear();
1780 ReleasesToMove.ReverseInsertPts.clear();
1781 NewCount = 0;
1782 } else {
1783 // Determine whether the new insertion points we computed preserve the
1784 // balance of retain and release calls through the program.
1785 // TODO: If the fully aggressive solution isn't valid, try to find a
1786 // less aggressive solution which is.
1787 if (NewDelta != 0)
1788 return false;
1790 // At this point, we are not going to remove any RR pairs, but we still are
1791 // able to move RR pairs. If one of our pointers is afflicted with
1792 // CFGHazards, we cannot perform such code motion so exit early.
1793 const bool WillPerformCodeMotion =
1794 !RetainsToMove.ReverseInsertPts.empty() ||
1795 !ReleasesToMove.ReverseInsertPts.empty();
1796 if (CFGHazardAfflicted && WillPerformCodeMotion)
1797 return false;
1800 // Determine whether the original call points are balanced in the retain and
1801 // release calls through the program. If not, conservatively don't touch
1802 // them.
1803 // TODO: It's theoretically possible to do code motion in this case, as
1804 // long as the existing imbalances are maintained.
1805 if (OldDelta != 0)
1806 return false;
1808 Changed = true;
1809 assert(OldCount != 0 && "Unreachable code?");
1810 NumRRs += OldCount - NewCount;
1811 // Set to true if we completely removed any RR pairs.
1812 AnyPairsCompletelyEliminated = NewCount == 0;
1814 // We can move calls!
1815 return true;
1818 /// Identify pairings between the retains and releases, and delete and/or move
1819 /// them.
1820 bool ObjCARCOpt::PerformCodePlacement(
1821 DenseMap<const BasicBlock *, BBState> &BBStates,
1822 BlotMapVector<Value *, RRInfo> &Retains,
1823 DenseMap<Value *, RRInfo> &Releases, Module *M) {
1824 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
1826 bool AnyPairsCompletelyEliminated = false;
1827 SmallVector<Instruction *, 8> DeadInsts;
1829 // Visit each retain.
1830 for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
1831 E = Retains.end();
1832 I != E; ++I) {
1833 Value *V = I->first;
1834 if (!V) continue; // blotted
1836 Instruction *Retain = cast<Instruction>(V);
1838 LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
1840 Value *Arg = GetArgRCIdentityRoot(Retain);
1842 // If the object being released is in static or stack storage, we know it's
1843 // not being managed by ObjC reference counting, so we can delete pairs
1844 // regardless of what possible decrements or uses lie between them.
1845 bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
1847 // A constant pointer can't be pointing to an object on the heap. It may
1848 // be reference-counted, but it won't be deleted.
1849 if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
1850 if (const GlobalVariable *GV =
1851 dyn_cast<GlobalVariable>(
1852 GetRCIdentityRoot(LI->getPointerOperand())))
1853 if (GV->isConstant())
1854 KnownSafe = true;
1856 // Connect the dots between the top-down-collected RetainsToMove and
1857 // bottom-up-collected ReleasesToMove to form sets of related calls.
1858 RRInfo RetainsToMove, ReleasesToMove;
1860 bool PerformMoveCalls = PairUpRetainsAndReleases(
1861 BBStates, Retains, Releases, M, Retain, DeadInsts,
1862 RetainsToMove, ReleasesToMove, Arg, KnownSafe,
1863 AnyPairsCompletelyEliminated);
1865 if (PerformMoveCalls) {
1866 // Ok, everything checks out and we're all set. Let's move/delete some
1867 // code!
1868 MoveCalls(Arg, RetainsToMove, ReleasesToMove,
1869 Retains, Releases, DeadInsts, M);
1873 // Now that we're done moving everything, we can delete the newly dead
1874 // instructions, as we no longer need them as insert points.
1875 while (!DeadInsts.empty())
1876 EraseInstruction(DeadInsts.pop_back_val());
1878 return AnyPairsCompletelyEliminated;
1881 /// Weak pointer optimizations.
1882 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
1883 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
1885 // First, do memdep-style RLE and S2L optimizations. We can't use memdep
1886 // itself because it uses AliasAnalysis and we need to do provenance
1887 // queries instead.
1888 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
1889 Instruction *Inst = &*I++;
1891 LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
1893 ARCInstKind Class = GetBasicARCInstKind(Inst);
1894 if (Class != ARCInstKind::LoadWeak &&
1895 Class != ARCInstKind::LoadWeakRetained)
1896 continue;
1898 // Delete objc_loadWeak calls with no users.
1899 if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) {
1900 Inst->eraseFromParent();
1901 continue;
1904 // TODO: For now, just look for an earlier available version of this value
1905 // within the same block. Theoretically, we could do memdep-style non-local
1906 // analysis too, but that would want caching. A better approach would be to
1907 // use the technique that EarlyCSE uses.
1908 inst_iterator Current = std::prev(I);
1909 BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
1910 for (BasicBlock::iterator B = CurrentBB->begin(),
1911 J = Current.getInstructionIterator();
1912 J != B; --J) {
1913 Instruction *EarlierInst = &*std::prev(J);
1914 ARCInstKind EarlierClass = GetARCInstKind(EarlierInst);
1915 switch (EarlierClass) {
1916 case ARCInstKind::LoadWeak:
1917 case ARCInstKind::LoadWeakRetained: {
1918 // If this is loading from the same pointer, replace this load's value
1919 // with that one.
1920 CallInst *Call = cast<CallInst>(Inst);
1921 CallInst *EarlierCall = cast<CallInst>(EarlierInst);
1922 Value *Arg = Call->getArgOperand(0);
1923 Value *EarlierArg = EarlierCall->getArgOperand(0);
1924 switch (PA.getAA()->alias(Arg, EarlierArg)) {
1925 case MustAlias:
1926 Changed = true;
1927 // If the load has a builtin retain, insert a plain retain for it.
1928 if (Class == ARCInstKind::LoadWeakRetained) {
1929 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
1930 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
1931 CI->setTailCall();
1933 // Zap the fully redundant load.
1934 Call->replaceAllUsesWith(EarlierCall);
1935 Call->eraseFromParent();
1936 goto clobbered;
1937 case MayAlias:
1938 case PartialAlias:
1939 goto clobbered;
1940 case NoAlias:
1941 break;
1943 break;
1945 case ARCInstKind::StoreWeak:
1946 case ARCInstKind::InitWeak: {
1947 // If this is storing to the same pointer and has the same size etc.
1948 // replace this load's value with the stored value.
1949 CallInst *Call = cast<CallInst>(Inst);
1950 CallInst *EarlierCall = cast<CallInst>(EarlierInst);
1951 Value *Arg = Call->getArgOperand(0);
1952 Value *EarlierArg = EarlierCall->getArgOperand(0);
1953 switch (PA.getAA()->alias(Arg, EarlierArg)) {
1954 case MustAlias:
1955 Changed = true;
1956 // If the load has a builtin retain, insert a plain retain for it.
1957 if (Class == ARCInstKind::LoadWeakRetained) {
1958 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
1959 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
1960 CI->setTailCall();
1962 // Zap the fully redundant load.
1963 Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
1964 Call->eraseFromParent();
1965 goto clobbered;
1966 case MayAlias:
1967 case PartialAlias:
1968 goto clobbered;
1969 case NoAlias:
1970 break;
1972 break;
1974 case ARCInstKind::MoveWeak:
1975 case ARCInstKind::CopyWeak:
1976 // TOOD: Grab the copied value.
1977 goto clobbered;
1978 case ARCInstKind::AutoreleasepoolPush:
1979 case ARCInstKind::None:
1980 case ARCInstKind::IntrinsicUser:
1981 case ARCInstKind::User:
1982 // Weak pointers are only modified through the weak entry points
1983 // (and arbitrary calls, which could call the weak entry points).
1984 break;
1985 default:
1986 // Anything else could modify the weak pointer.
1987 goto clobbered;
1990 clobbered:;
1993 // Then, for each destroyWeak with an alloca operand, check to see if
1994 // the alloca and all its users can be zapped.
1995 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
1996 Instruction *Inst = &*I++;
1997 ARCInstKind Class = GetBasicARCInstKind(Inst);
1998 if (Class != ARCInstKind::DestroyWeak)
1999 continue;
2001 CallInst *Call = cast<CallInst>(Inst);
2002 Value *Arg = Call->getArgOperand(0);
2003 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
2004 for (User *U : Alloca->users()) {
2005 const Instruction *UserInst = cast<Instruction>(U);
2006 switch (GetBasicARCInstKind(UserInst)) {
2007 case ARCInstKind::InitWeak:
2008 case ARCInstKind::StoreWeak:
2009 case ARCInstKind::DestroyWeak:
2010 continue;
2011 default:
2012 goto done;
2015 Changed = true;
2016 for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) {
2017 CallInst *UserInst = cast<CallInst>(*UI++);
2018 switch (GetBasicARCInstKind(UserInst)) {
2019 case ARCInstKind::InitWeak:
2020 case ARCInstKind::StoreWeak:
2021 // These functions return their second argument.
2022 UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
2023 break;
2024 case ARCInstKind::DestroyWeak:
2025 // No return value.
2026 break;
2027 default:
2028 llvm_unreachable("alloca really is used!");
2030 UserInst->eraseFromParent();
2032 Alloca->eraseFromParent();
2033 done:;
2038 /// Identify program paths which execute sequences of retains and releases which
2039 /// can be eliminated.
2040 bool ObjCARCOpt::OptimizeSequences(Function &F) {
2041 // Releases, Retains - These are used to store the results of the main flow
2042 // analysis. These use Value* as the key instead of Instruction* so that the
2043 // map stays valid when we get around to rewriting code and calls get
2044 // replaced by arguments.
2045 DenseMap<Value *, RRInfo> Releases;
2046 BlotMapVector<Value *, RRInfo> Retains;
2048 // This is used during the traversal of the function to track the
2049 // states for each identified object at each block.
2050 DenseMap<const BasicBlock *, BBState> BBStates;
2052 // Analyze the CFG of the function, and all instructions.
2053 bool NestingDetected = Visit(F, BBStates, Retains, Releases);
2055 if (DisableRetainReleasePairing)
2056 return false;
2058 // Transform.
2059 bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
2060 Releases,
2061 F.getParent());
2063 return AnyPairsCompletelyEliminated && NestingDetected;
2066 /// Check if there is a dependent call earlier that does not have anything in
2067 /// between the Retain and the call that can affect the reference count of their
2068 /// shared pointer argument. Note that Retain need not be in BB.
2069 static bool
2070 HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
2071 SmallPtrSetImpl<Instruction *> &DepInsts,
2072 SmallPtrSetImpl<const BasicBlock *> &Visited,
2073 ProvenanceAnalysis &PA) {
2074 FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
2075 DepInsts, Visited, PA);
2076 if (DepInsts.size() != 1)
2077 return false;
2079 auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin());
2081 // Check that the pointer is the return value of the call.
2082 if (!Call || Arg != Call)
2083 return false;
2085 // Check that the call is a regular call.
2086 ARCInstKind Class = GetBasicARCInstKind(Call);
2087 return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call;
2090 /// Find a dependent retain that precedes the given autorelease for which there
2091 /// is nothing in between the two instructions that can affect the ref count of
2092 /// Arg.
2093 static CallInst *
2094 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
2095 Instruction *Autorelease,
2096 SmallPtrSetImpl<Instruction *> &DepInsts,
2097 SmallPtrSetImpl<const BasicBlock *> &Visited,
2098 ProvenanceAnalysis &PA) {
2099 FindDependencies(CanChangeRetainCount, Arg,
2100 BB, Autorelease, DepInsts, Visited, PA);
2101 if (DepInsts.size() != 1)
2102 return nullptr;
2104 auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin());
2106 // Check that we found a retain with the same argument.
2107 if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
2108 GetArgRCIdentityRoot(Retain) != Arg) {
2109 return nullptr;
2112 return Retain;
2115 /// Look for an ``autorelease'' instruction dependent on Arg such that there are
2116 /// no instructions dependent on Arg that need a positive ref count in between
2117 /// the autorelease and the ret.
2118 static CallInst *
2119 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
2120 ReturnInst *Ret,
2121 SmallPtrSetImpl<Instruction *> &DepInsts,
2122 SmallPtrSetImpl<const BasicBlock *> &V,
2123 ProvenanceAnalysis &PA) {
2124 FindDependencies(NeedsPositiveRetainCount, Arg,
2125 BB, Ret, DepInsts, V, PA);
2126 if (DepInsts.size() != 1)
2127 return nullptr;
2129 auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin());
2130 if (!Autorelease)
2131 return nullptr;
2132 ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
2133 if (!IsAutorelease(AutoreleaseClass))
2134 return nullptr;
2135 if (GetArgRCIdentityRoot(Autorelease) != Arg)
2136 return nullptr;
2138 return Autorelease;
2141 /// Look for this pattern:
2142 /// \code
2143 /// %call = call i8* @something(...)
2144 /// %2 = call i8* @objc_retain(i8* %call)
2145 /// %3 = call i8* @objc_autorelease(i8* %2)
2146 /// ret i8* %3
2147 /// \endcode
2148 /// And delete the retain and autorelease.
2149 void ObjCARCOpt::OptimizeReturns(Function &F) {
2150 if (!F.getReturnType()->isPointerTy())
2151 return;
2153 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
2155 SmallPtrSet<Instruction *, 4> DependingInstructions;
2156 SmallPtrSet<const BasicBlock *, 4> Visited;
2157 for (BasicBlock &BB: F) {
2158 ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
2159 if (!Ret)
2160 continue;
2162 LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
2164 const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
2166 // Look for an ``autorelease'' instruction that is a predecessor of Ret and
2167 // dependent on Arg such that there are no instructions dependent on Arg
2168 // that need a positive ref count in between the autorelease and Ret.
2169 CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath(
2170 Arg, &BB, Ret, DependingInstructions, Visited, PA);
2171 DependingInstructions.clear();
2172 Visited.clear();
2174 if (!Autorelease)
2175 continue;
2177 CallInst *Retain = FindPredecessorRetainWithSafePath(
2178 Arg, Autorelease->getParent(), Autorelease, DependingInstructions,
2179 Visited, PA);
2180 DependingInstructions.clear();
2181 Visited.clear();
2183 if (!Retain)
2184 continue;
2186 // Check that there is nothing that can affect the reference count
2187 // between the retain and the call. Note that Retain need not be in BB.
2188 bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
2189 DependingInstructions,
2190 Visited, PA);
2191 DependingInstructions.clear();
2192 Visited.clear();
2194 if (!HasSafePathToCall)
2195 continue;
2197 // If so, we can zap the retain and autorelease.
2198 Changed = true;
2199 ++NumRets;
2200 LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease
2201 << "\n");
2202 EraseInstruction(Retain);
2203 EraseInstruction(Autorelease);
2207 #ifndef NDEBUG
2208 void
2209 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
2210 Statistic &NumRetains =
2211 AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt;
2212 Statistic &NumReleases =
2213 AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt;
2215 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2216 Instruction *Inst = &*I++;
2217 switch (GetBasicARCInstKind(Inst)) {
2218 default:
2219 break;
2220 case ARCInstKind::Retain:
2221 ++NumRetains;
2222 break;
2223 case ARCInstKind::Release:
2224 ++NumReleases;
2225 break;
2229 #endif
2231 bool ObjCARCOpt::doInitialization(Module &M) {
2232 if (!EnableARCOpts)
2233 return false;
2235 // If nothing in the Module uses ARC, don't do anything.
2236 Run = ModuleHasARC(M);
2237 if (!Run)
2238 return false;
2240 // Intuitively, objc_retain and others are nocapture, however in practice
2241 // they are not, because they return their argument value. And objc_release
2242 // calls finalizers which can have arbitrary side effects.
2243 MDKindCache.init(&M);
2245 // Initialize our runtime entry point cache.
2246 EP.init(&M);
2248 return false;
2251 bool ObjCARCOpt::runOnFunction(Function &F) {
2252 if (!EnableARCOpts)
2253 return false;
2255 // If nothing in the Module uses ARC, don't do anything.
2256 if (!Run)
2257 return false;
2259 Changed = false;
2261 LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
2262 << " >>>"
2263 "\n");
2265 PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
2267 #ifndef NDEBUG
2268 if (AreStatisticsEnabled()) {
2269 GatherStatistics(F, false);
2271 #endif
2273 // This pass performs several distinct transformations. As a compile-time aid
2274 // when compiling code that isn't ObjC, skip these if the relevant ObjC
2275 // library functions aren't declared.
2277 // Preliminary optimizations. This also computes UsedInThisFunction.
2278 OptimizeIndividualCalls(F);
2280 // Optimizations for weak pointers.
2281 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) |
2282 (1 << unsigned(ARCInstKind::LoadWeakRetained)) |
2283 (1 << unsigned(ARCInstKind::StoreWeak)) |
2284 (1 << unsigned(ARCInstKind::InitWeak)) |
2285 (1 << unsigned(ARCInstKind::CopyWeak)) |
2286 (1 << unsigned(ARCInstKind::MoveWeak)) |
2287 (1 << unsigned(ARCInstKind::DestroyWeak))))
2288 OptimizeWeakCalls(F);
2290 // Optimizations for retain+release pairs.
2291 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) |
2292 (1 << unsigned(ARCInstKind::RetainRV)) |
2293 (1 << unsigned(ARCInstKind::RetainBlock))))
2294 if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release)))
2295 // Run OptimizeSequences until it either stops making changes or
2296 // no retain+release pair nesting is detected.
2297 while (OptimizeSequences(F)) {}
2299 // Optimizations if objc_autorelease is used.
2300 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) |
2301 (1 << unsigned(ARCInstKind::AutoreleaseRV))))
2302 OptimizeReturns(F);
2304 // Gather statistics after optimization.
2305 #ifndef NDEBUG
2306 if (AreStatisticsEnabled()) {
2307 GatherStatistics(F, true);
2309 #endif
2311 LLVM_DEBUG(dbgs() << "\n");
2313 return Changed;
2316 void ObjCARCOpt::releaseMemory() {
2317 PA.clear();
2320 /// @}