1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements interprocedural passes which walk the
11 /// call-graph deducing and/or propagating function attributes.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/IPO/FunctionAttrs.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SCCIterator.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/AssumptionCache.h"
26 #include "llvm/Analysis/BasicAliasAnalysis.h"
27 #include "llvm/Analysis/CFG.h"
28 #include "llvm/Analysis/CGSCCPassManager.h"
29 #include "llvm/Analysis/CallGraph.h"
30 #include "llvm/Analysis/CallGraphSCCPass.h"
31 #include "llvm/Analysis/CaptureTracking.h"
32 #include "llvm/Analysis/LazyCallGraph.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/ValueTracking.h"
35 #include "llvm/IR/Argument.h"
36 #include "llvm/IR/Attributes.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/InstIterator.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/ModuleSummaryIndex.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/Use.h"
51 #include "llvm/IR/User.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/ErrorHandling.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/Transforms/IPO.h"
60 #include "llvm/Transforms/Utils/Local.h"
69 #define DEBUG_TYPE "function-attrs"
71 STATISTIC(NumMemoryAttr
, "Number of functions with improved memory attribute");
72 STATISTIC(NumNoCapture
, "Number of arguments marked nocapture");
73 STATISTIC(NumReturned
, "Number of arguments marked returned");
74 STATISTIC(NumReadNoneArg
, "Number of arguments marked readnone");
75 STATISTIC(NumReadOnlyArg
, "Number of arguments marked readonly");
76 STATISTIC(NumWriteOnlyArg
, "Number of arguments marked writeonly");
77 STATISTIC(NumNoAlias
, "Number of function returns marked noalias");
78 STATISTIC(NumNonNullReturn
, "Number of function returns marked nonnull");
79 STATISTIC(NumNoRecurse
, "Number of functions marked as norecurse");
80 STATISTIC(NumNoUnwind
, "Number of functions marked as nounwind");
81 STATISTIC(NumNoFree
, "Number of functions marked as nofree");
82 STATISTIC(NumWillReturn
, "Number of functions marked as willreturn");
83 STATISTIC(NumNoSync
, "Number of functions marked as nosync");
85 STATISTIC(NumThinLinkNoRecurse
,
86 "Number of functions marked as norecurse during thinlink");
87 STATISTIC(NumThinLinkNoUnwind
,
88 "Number of functions marked as nounwind during thinlink");
90 static cl::opt
<bool> EnableNonnullArgPropagation(
91 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden
,
92 cl::desc("Try to propagate nonnull argument attributes from callsites to "
93 "caller functions."));
95 static cl::opt
<bool> DisableNoUnwindInference(
96 "disable-nounwind-inference", cl::Hidden
,
97 cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
99 static cl::opt
<bool> DisableNoFreeInference(
100 "disable-nofree-inference", cl::Hidden
,
101 cl::desc("Stop inferring nofree attribute during function-attrs pass"));
103 static cl::opt
<bool> DisableThinLTOPropagation(
104 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden
,
105 cl::desc("Don't propagate function-attrs in thinLTO"));
109 using SCCNodeSet
= SmallSetVector
<Function
*, 8>;
111 } // end anonymous namespace
113 static void addLocAccess(MemoryEffects
&ME
, const MemoryLocation
&Loc
,
114 ModRefInfo MR
, AAResults
&AAR
) {
115 // Ignore accesses to known-invariant or local memory.
116 MR
&= AAR
.getModRefInfoMask(Loc
, /*IgnoreLocal=*/true);
120 const Value
*UO
= getUnderlyingObject(Loc
.Ptr
);
121 assert(!isa
<AllocaInst
>(UO
) &&
122 "Should have been handled by getModRefInfoMask()");
123 if (isa
<Argument
>(UO
)) {
124 ME
|= MemoryEffects::argMemOnly(MR
);
128 // If it's not an identified object, it might be an argument.
129 if (!isIdentifiedObject(UO
))
130 ME
|= MemoryEffects::argMemOnly(MR
);
131 ME
|= MemoryEffects(IRMemLocation::Other
, MR
);
134 static void addArgLocs(MemoryEffects
&ME
, const CallBase
*Call
,
135 ModRefInfo ArgMR
, AAResults
&AAR
) {
136 for (const Value
*Arg
: Call
->args()) {
137 if (!Arg
->getType()->isPtrOrPtrVectorTy())
141 MemoryLocation::getBeforeOrAfter(Arg
, Call
->getAAMetadata()),
146 /// Returns the memory access attribute for function F using AAR for AA results,
147 /// where SCCNodes is the current SCC.
149 /// If ThisBody is true, this function may examine the function body and will
150 /// return a result pertaining to this copy of the function. If it is false, the
151 /// result will be based only on AA results for the function declaration; it
152 /// will be assumed that some other (perhaps less optimized) version of the
153 /// function may be selected at link time.
155 /// The return value is split into two parts: Memory effects that always apply,
156 /// and additional memory effects that apply if any of the functions in the SCC
157 /// can access argmem.
158 static std::pair
<MemoryEffects
, MemoryEffects
>
159 checkFunctionMemoryAccess(Function
&F
, bool ThisBody
, AAResults
&AAR
,
160 const SCCNodeSet
&SCCNodes
) {
161 MemoryEffects OrigME
= AAR
.getMemoryEffects(&F
);
162 if (OrigME
.doesNotAccessMemory())
164 return {OrigME
, MemoryEffects::none()};
167 return {OrigME
, MemoryEffects::none()};
169 MemoryEffects ME
= MemoryEffects::none();
170 // Additional locations accessed if the SCC accesses argmem.
171 MemoryEffects RecursiveArgME
= MemoryEffects::none();
173 // Inalloca and preallocated arguments are always clobbered by the call.
174 if (F
.getAttributes().hasAttrSomewhere(Attribute::InAlloca
) ||
175 F
.getAttributes().hasAttrSomewhere(Attribute::Preallocated
))
176 ME
|= MemoryEffects::argMemOnly(ModRefInfo::ModRef
);
178 // Scan the function body for instructions that may read or write memory.
179 for (Instruction
&I
: instructions(F
)) {
180 // Some instructions can be ignored even if they read or write memory.
181 // Detect these now, skipping to the next instruction if one is found.
182 if (auto *Call
= dyn_cast
<CallBase
>(&I
)) {
183 // We can optimistically ignore calls to functions in the same SCC, with
185 // * Calls with operand bundles may have additional effects.
186 // * Argument memory accesses may imply additional effects depending on
187 // what the argument location is.
188 if (!Call
->hasOperandBundles() && Call
->getCalledFunction() &&
189 SCCNodes
.count(Call
->getCalledFunction())) {
190 // Keep track of which additional locations are accessed if the SCC
191 // turns out to access argmem.
192 addArgLocs(RecursiveArgME
, Call
, ModRefInfo::ModRef
, AAR
);
196 MemoryEffects CallME
= AAR
.getMemoryEffects(Call
);
198 // If the call doesn't access memory, we're done.
199 if (CallME
.doesNotAccessMemory())
202 // A pseudo probe call shouldn't change any function attribute since it
203 // doesn't translate to a real instruction. It comes with a memory access
204 // tag to prevent itself being removed by optimizations and not block
205 // other instructions being optimized.
206 if (isa
<PseudoProbeInst
>(I
))
209 ME
|= CallME
.getWithoutLoc(IRMemLocation::ArgMem
);
211 // If the call accesses captured memory (currently part of "other") and
212 // an argument is captured (currently not tracked), then it may also
213 // access argument memory.
214 ModRefInfo OtherMR
= CallME
.getModRef(IRMemLocation::Other
);
215 ME
|= MemoryEffects::argMemOnly(OtherMR
);
217 // Check whether all pointer arguments point to local memory, and
218 // ignore calls that only access local memory.
219 ModRefInfo ArgMR
= CallME
.getModRef(IRMemLocation::ArgMem
);
220 if (ArgMR
!= ModRefInfo::NoModRef
)
221 addArgLocs(ME
, Call
, ArgMR
, AAR
);
225 ModRefInfo MR
= ModRefInfo::NoModRef
;
226 if (I
.mayWriteToMemory())
227 MR
|= ModRefInfo::Mod
;
228 if (I
.mayReadFromMemory())
229 MR
|= ModRefInfo::Ref
;
230 if (MR
== ModRefInfo::NoModRef
)
233 std::optional
<MemoryLocation
> Loc
= MemoryLocation::getOrNone(&I
);
235 // If no location is known, conservatively assume anything can be
237 ME
|= MemoryEffects(MR
);
241 // Volatile operations may access inaccessible memory.
243 ME
|= MemoryEffects::inaccessibleMemOnly(MR
);
245 addLocAccess(ME
, *Loc
, MR
, AAR
);
248 return {OrigME
& ME
, RecursiveArgME
};
251 MemoryEffects
llvm::computeFunctionBodyMemoryAccess(Function
&F
,
253 return checkFunctionMemoryAccess(F
, /*ThisBody=*/true, AAR
, {}).first
;
256 /// Deduce readonly/readnone/writeonly attributes for the SCC.
257 template <typename AARGetterT
>
258 static void addMemoryAttrs(const SCCNodeSet
&SCCNodes
, AARGetterT
&&AARGetter
,
259 SmallSet
<Function
*, 8> &Changed
) {
260 MemoryEffects ME
= MemoryEffects::none();
261 MemoryEffects RecursiveArgME
= MemoryEffects::none();
262 for (Function
*F
: SCCNodes
) {
263 // Call the callable parameter to look up AA results for this function.
264 AAResults
&AAR
= AARGetter(*F
);
265 // Non-exact function definitions may not be selected at link time, and an
266 // alternative version that writes to memory may be selected. See the
267 // comment on GlobalValue::isDefinitionExact for more details.
268 auto [FnME
, FnRecursiveArgME
] =
269 checkFunctionMemoryAccess(*F
, F
->hasExactDefinition(), AAR
, SCCNodes
);
271 RecursiveArgME
|= FnRecursiveArgME
;
272 // Reached bottom of the lattice, we will not be able to improve the result.
273 if (ME
== MemoryEffects::unknown())
277 // If the SCC accesses argmem, add recursive accesses resulting from that.
278 ModRefInfo ArgMR
= ME
.getModRef(IRMemLocation::ArgMem
);
279 if (ArgMR
!= ModRefInfo::NoModRef
)
280 ME
|= RecursiveArgME
& MemoryEffects(ArgMR
);
282 for (Function
*F
: SCCNodes
) {
283 MemoryEffects OldME
= F
->getMemoryEffects();
284 MemoryEffects NewME
= ME
& OldME
;
285 if (NewME
!= OldME
) {
287 F
->setMemoryEffects(NewME
);
293 // Compute definitive function attributes for a function taking into account
294 // prevailing definitions and linkage types
295 static FunctionSummary
*calculatePrevailingSummary(
297 DenseMap
<ValueInfo
, FunctionSummary
*> &CachedPrevailingSummary
,
298 function_ref
<bool(GlobalValue::GUID
, const GlobalValueSummary
*)>
301 if (CachedPrevailingSummary
.count(VI
))
302 return CachedPrevailingSummary
[VI
];
304 /// At this point, prevailing symbols have been resolved. The following leads
305 /// to returning a conservative result:
306 /// - Multiple instances with local linkage. Normally local linkage would be
307 /// unique per module
308 /// as the GUID includes the module path. We could have a guid alias if
309 /// there wasn't any distinguishing path when each file was compiled, but
310 /// that should be rare so we'll punt on those.
312 /// These next 2 cases should not happen and will assert:
313 /// - Multiple instances with external linkage. This should be caught in
314 /// symbol resolution
315 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
316 /// knowledge meaning we have to go conservative.
318 /// Otherwise, we calculate attributes for a function as:
319 /// 1. If we have a local linkage, take its attributes. If there's somehow
320 /// multiple, bail and go conservative.
321 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
322 /// prevailing, take its attributes.
323 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
324 /// differences. However, if the prevailing copy is known it will be used
325 /// so take its attributes. If the prevailing copy is in a native file
326 /// all IR copies will be dead and propagation will go conservative.
327 /// 4. AvailableExternally summaries without a prevailing copy are known to
328 /// occur in a couple of circumstances:
329 /// a. An internal function gets imported due to its caller getting
330 /// imported, it becomes AvailableExternally but no prevailing
331 /// definition exists. Because it has to get imported along with its
332 /// caller the attributes will be captured by propagating on its
334 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
335 /// definitions of explicitly instanced template declarations
336 /// for inlining which are ultimately dropped from the TU. Since this
337 /// is localized to the TU the attributes will have already made it to
339 /// These are edge cases and already captured by their callers so we
340 /// ignore these for now. If they become relevant to optimize in the
341 /// future this can be revisited.
342 /// 5. Otherwise, go conservative.
344 CachedPrevailingSummary
[VI
] = nullptr;
345 FunctionSummary
*Local
= nullptr;
346 FunctionSummary
*Prevailing
= nullptr;
348 for (const auto &GVS
: VI
.getSummaryList()) {
352 FunctionSummary
*FS
= dyn_cast
<FunctionSummary
>(GVS
->getBaseObject());
353 // Virtual and Unknown (e.g. indirect) calls require going conservative
354 if (!FS
|| FS
->fflags().HasUnknownCall
)
357 const auto &Linkage
= GVS
->linkage();
358 if (GlobalValue::isLocalLinkage(Linkage
)) {
362 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
364 << VI
.name() << " from " << FS
->modulePath() << ". Previous module "
365 << Local
->modulePath() << "\n");
369 } else if (GlobalValue::isExternalLinkage(Linkage
)) {
370 assert(IsPrevailing(VI
.getGUID(), GVS
.get()));
373 } else if (GlobalValue::isWeakODRLinkage(Linkage
) ||
374 GlobalValue::isLinkOnceODRLinkage(Linkage
) ||
375 GlobalValue::isWeakAnyLinkage(Linkage
) ||
376 GlobalValue::isLinkOnceAnyLinkage(Linkage
)) {
377 if (IsPrevailing(VI
.getGUID(), GVS
.get())) {
381 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage
)) {
382 // TODO: Handle these cases if they become meaningful
389 CachedPrevailingSummary
[VI
] = Local
;
390 } else if (Prevailing
) {
392 CachedPrevailingSummary
[VI
] = Prevailing
;
395 return CachedPrevailingSummary
[VI
];
398 bool llvm::thinLTOPropagateFunctionAttrs(
399 ModuleSummaryIndex
&Index
,
400 function_ref
<bool(GlobalValue::GUID
, const GlobalValueSummary
*)>
402 // TODO: implement addNoAliasAttrs once
403 // there's more information about the return type in the summary
404 if (DisableThinLTOPropagation
)
407 DenseMap
<ValueInfo
, FunctionSummary
*> CachedPrevailingSummary
;
408 bool Changed
= false;
410 auto PropagateAttributes
= [&](std::vector
<ValueInfo
> &SCCNodes
) {
411 // Assume we can propagate unless we discover otherwise
412 FunctionSummary::FFlags InferredFlags
;
413 InferredFlags
.NoRecurse
= (SCCNodes
.size() == 1);
414 InferredFlags
.NoUnwind
= true;
416 for (auto &V
: SCCNodes
) {
417 FunctionSummary
*CallerSummary
=
418 calculatePrevailingSummary(V
, CachedPrevailingSummary
, IsPrevailing
);
420 // Function summaries can fail to contain information such as declarations
424 if (CallerSummary
->fflags().MayThrow
)
425 InferredFlags
.NoUnwind
= false;
427 for (const auto &Callee
: CallerSummary
->calls()) {
428 FunctionSummary
*CalleeSummary
= calculatePrevailingSummary(
429 Callee
.first
, CachedPrevailingSummary
, IsPrevailing
);
434 if (!CalleeSummary
->fflags().NoRecurse
)
435 InferredFlags
.NoRecurse
= false;
437 if (!CalleeSummary
->fflags().NoUnwind
)
438 InferredFlags
.NoUnwind
= false;
440 if (!InferredFlags
.NoUnwind
&& !InferredFlags
.NoRecurse
)
445 if (InferredFlags
.NoUnwind
|| InferredFlags
.NoRecurse
) {
447 for (auto &V
: SCCNodes
) {
448 if (InferredFlags
.NoRecurse
) {
449 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
450 << V
.name() << "\n");
451 ++NumThinLinkNoRecurse
;
454 if (InferredFlags
.NoUnwind
) {
455 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
456 << V
.name() << "\n");
457 ++NumThinLinkNoUnwind
;
460 for (const auto &S
: V
.getSummaryList()) {
461 if (auto *FS
= dyn_cast
<FunctionSummary
>(S
.get())) {
462 if (InferredFlags
.NoRecurse
)
465 if (InferredFlags
.NoUnwind
)
473 // Call propagation functions on each SCC in the Index
474 for (scc_iterator
<ModuleSummaryIndex
*> I
= scc_begin(&Index
); !I
.isAtEnd();
476 std::vector
<ValueInfo
> Nodes(*I
);
477 PropagateAttributes(Nodes
);
484 /// For a given pointer Argument, this retains a list of Arguments of functions
485 /// in the same SCC that the pointer data flows into. We use this to build an
486 /// SCC of the arguments.
487 struct ArgumentGraphNode
{
488 Argument
*Definition
;
489 SmallVector
<ArgumentGraphNode
*, 4> Uses
;
492 class ArgumentGraph
{
493 // We store pointers to ArgumentGraphNode objects, so it's important that
494 // that they not move around upon insert.
495 using ArgumentMapTy
= std::map
<Argument
*, ArgumentGraphNode
>;
497 ArgumentMapTy ArgumentMap
;
499 // There is no root node for the argument graph, in fact:
500 // void f(int *x, int *y) { if (...) f(x, y); }
501 // is an example where the graph is disconnected. The SCCIterator requires a
502 // single entry point, so we maintain a fake ("synthetic") root node that
503 // uses every node. Because the graph is directed and nothing points into
504 // the root, it will not participate in any SCCs (except for its own).
505 ArgumentGraphNode SyntheticRoot
;
508 ArgumentGraph() { SyntheticRoot
.Definition
= nullptr; }
510 using iterator
= SmallVectorImpl
<ArgumentGraphNode
*>::iterator
;
512 iterator
begin() { return SyntheticRoot
.Uses
.begin(); }
513 iterator
end() { return SyntheticRoot
.Uses
.end(); }
514 ArgumentGraphNode
*getEntryNode() { return &SyntheticRoot
; }
516 ArgumentGraphNode
*operator[](Argument
*A
) {
517 ArgumentGraphNode
&Node
= ArgumentMap
[A
];
519 SyntheticRoot
.Uses
.push_back(&Node
);
524 /// This tracker checks whether callees are in the SCC, and if so it does not
525 /// consider that a capture, instead adding it to the "Uses" list and
526 /// continuing with the analysis.
527 struct ArgumentUsesTracker
: public CaptureTracker
{
528 ArgumentUsesTracker(const SCCNodeSet
&SCCNodes
) : SCCNodes(SCCNodes
) {}
530 void tooManyUses() override
{ Captured
= true; }
532 bool captured(const Use
*U
) override
{
533 CallBase
*CB
= dyn_cast
<CallBase
>(U
->getUser());
539 Function
*F
= CB
->getCalledFunction();
540 if (!F
|| !F
->hasExactDefinition() || !SCCNodes
.count(F
)) {
545 assert(!CB
->isCallee(U
) && "callee operand reported captured?");
546 const unsigned UseIndex
= CB
->getDataOperandNo(U
);
547 if (UseIndex
>= CB
->arg_size()) {
548 // Data operand, but not a argument operand -- must be a bundle operand
549 assert(CB
->hasOperandBundles() && "Must be!");
551 // CaptureTracking told us that we're being captured by an operand bundle
552 // use. In this case it does not matter if the callee is within our SCC
553 // or not -- we've been captured in some unknown way, and we have to be
559 if (UseIndex
>= F
->arg_size()) {
560 assert(F
->isVarArg() && "More params than args in non-varargs call");
565 Uses
.push_back(&*std::next(F
->arg_begin(), UseIndex
));
569 // True only if certainly captured (used outside our SCC).
570 bool Captured
= false;
572 // Uses within our SCC.
573 SmallVector
<Argument
*, 4> Uses
;
575 const SCCNodeSet
&SCCNodes
;
578 } // end anonymous namespace
582 template <> struct GraphTraits
<ArgumentGraphNode
*> {
583 using NodeRef
= ArgumentGraphNode
*;
584 using ChildIteratorType
= SmallVectorImpl
<ArgumentGraphNode
*>::iterator
;
586 static NodeRef
getEntryNode(NodeRef A
) { return A
; }
587 static ChildIteratorType
child_begin(NodeRef N
) { return N
->Uses
.begin(); }
588 static ChildIteratorType
child_end(NodeRef N
) { return N
->Uses
.end(); }
592 struct GraphTraits
<ArgumentGraph
*> : public GraphTraits
<ArgumentGraphNode
*> {
593 static NodeRef
getEntryNode(ArgumentGraph
*AG
) { return AG
->getEntryNode(); }
595 static ChildIteratorType
nodes_begin(ArgumentGraph
*AG
) {
599 static ChildIteratorType
nodes_end(ArgumentGraph
*AG
) { return AG
->end(); }
602 } // end namespace llvm
604 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
605 static Attribute::AttrKind
606 determinePointerAccessAttrs(Argument
*A
,
607 const SmallPtrSet
<Argument
*, 8> &SCCNodes
) {
608 SmallVector
<Use
*, 32> Worklist
;
609 SmallPtrSet
<Use
*, 32> Visited
;
611 // inalloca arguments are always clobbered by the call.
612 if (A
->hasInAllocaAttr() || A
->hasPreallocatedAttr())
613 return Attribute::None
;
616 bool IsWrite
= false;
618 for (Use
&U
: A
->uses()) {
620 Worklist
.push_back(&U
);
623 while (!Worklist
.empty()) {
624 if (IsWrite
&& IsRead
)
625 // No point in searching further..
626 return Attribute::None
;
628 Use
*U
= Worklist
.pop_back_val();
629 Instruction
*I
= cast
<Instruction
>(U
->getUser());
631 switch (I
->getOpcode()) {
632 case Instruction::BitCast
:
633 case Instruction::GetElementPtr
:
634 case Instruction::PHI
:
635 case Instruction::Select
:
636 case Instruction::AddrSpaceCast
:
637 // The original value is not read/written via this if the new value isn't.
638 for (Use
&UU
: I
->uses())
639 if (Visited
.insert(&UU
).second
)
640 Worklist
.push_back(&UU
);
643 case Instruction::Call
:
644 case Instruction::Invoke
: {
645 CallBase
&CB
= cast
<CallBase
>(*I
);
646 if (CB
.isCallee(U
)) {
648 // Note that indirect calls do not capture, see comment in
649 // CaptureTracking for context
653 // Given we've explictily handled the callee operand above, what's left
654 // must be a data operand (e.g. argument or operand bundle)
655 const unsigned UseIndex
= CB
.getDataOperandNo(U
);
657 if (!CB
.doesNotCapture(UseIndex
)) {
658 if (!CB
.onlyReadsMemory())
659 // If the callee can save a copy into other memory, then simply
660 // scanning uses of the call is insufficient. We have no way
661 // of tracking copies of the pointer through memory to see
662 // if a reloaded copy is written to, thus we must give up.
663 return Attribute::None
;
664 // Push users for processing once we finish this one
665 if (!I
->getType()->isVoidTy())
666 for (Use
&UU
: I
->uses())
667 if (Visited
.insert(&UU
).second
)
668 Worklist
.push_back(&UU
);
671 if (CB
.doesNotAccessMemory())
674 if (Function
*F
= CB
.getCalledFunction())
675 if (CB
.isArgOperand(U
) && UseIndex
< F
->arg_size() &&
676 SCCNodes
.count(F
->getArg(UseIndex
)))
677 // This is an argument which is part of the speculative SCC. Note
678 // that only operands corresponding to formal arguments of the callee
679 // can participate in the speculation.
682 // The accessors used on call site here do the right thing for calls and
683 // invokes with operand bundles.
684 if (CB
.doesNotAccessMemory(UseIndex
)) {
686 } else if (CB
.onlyReadsMemory() || CB
.onlyReadsMemory(UseIndex
)) {
688 } else if (CB
.hasFnAttr(Attribute::WriteOnly
) ||
689 CB
.dataOperandHasImpliedAttr(UseIndex
, Attribute::WriteOnly
)) {
692 return Attribute::None
;
697 case Instruction::Load
:
698 // A volatile load has side effects beyond what readonly can be relied
700 if (cast
<LoadInst
>(I
)->isVolatile())
701 return Attribute::None
;
706 case Instruction::Store
:
707 if (cast
<StoreInst
>(I
)->getValueOperand() == *U
)
708 // untrackable capture
709 return Attribute::None
;
711 // A volatile store has side effects beyond what writeonly can be relied
713 if (cast
<StoreInst
>(I
)->isVolatile())
714 return Attribute::None
;
719 case Instruction::ICmp
:
720 case Instruction::Ret
:
724 return Attribute::None
;
728 if (IsWrite
&& IsRead
)
729 return Attribute::None
;
731 return Attribute::ReadOnly
;
733 return Attribute::WriteOnly
;
735 return Attribute::ReadNone
;
738 /// Deduce returned attributes for the SCC.
739 static void addArgumentReturnedAttrs(const SCCNodeSet
&SCCNodes
,
740 SmallSet
<Function
*, 8> &Changed
) {
741 // Check each function in turn, determining if an argument is always returned.
742 for (Function
*F
: SCCNodes
) {
743 // We can infer and propagate function attributes only when we know that the
744 // definition we'll get at link time is *exactly* the definition we see now.
745 // For more details, see GlobalValue::mayBeDerefined.
746 if (!F
->hasExactDefinition())
749 if (F
->getReturnType()->isVoidTy())
752 // There is nothing to do if an argument is already marked as 'returned'.
753 if (F
->getAttributes().hasAttrSomewhere(Attribute::Returned
))
756 auto FindRetArg
= [&]() -> Argument
* {
757 Argument
*RetArg
= nullptr;
758 for (BasicBlock
&BB
: *F
)
759 if (auto *Ret
= dyn_cast
<ReturnInst
>(BB
.getTerminator())) {
760 // Note that stripPointerCasts should look through functions with
761 // returned arguments.
763 dyn_cast
<Argument
>(Ret
->getReturnValue()->stripPointerCasts());
764 if (!RetVal
|| RetVal
->getType() != F
->getReturnType())
769 else if (RetArg
!= RetVal
)
776 if (Argument
*RetArg
= FindRetArg()) {
777 RetArg
->addAttr(Attribute::Returned
);
784 /// If a callsite has arguments that are also arguments to the parent function,
785 /// try to propagate attributes from the callsite's arguments to the parent's
786 /// arguments. This may be important because inlining can cause information loss
787 /// when attribute knowledge disappears with the inlined call.
788 static bool addArgumentAttrsFromCallsites(Function
&F
) {
789 if (!EnableNonnullArgPropagation
)
792 bool Changed
= false;
794 // For an argument attribute to transfer from a callsite to the parent, the
795 // call must be guaranteed to execute every time the parent is called.
796 // Conservatively, just check for calls in the entry block that are guaranteed
798 // TODO: This could be enhanced by testing if the callsite post-dominates the
799 // entry block or by doing simple forward walks or backward walks to the
801 BasicBlock
&Entry
= F
.getEntryBlock();
802 for (Instruction
&I
: Entry
) {
803 if (auto *CB
= dyn_cast
<CallBase
>(&I
)) {
804 if (auto *CalledFunc
= CB
->getCalledFunction()) {
805 for (auto &CSArg
: CalledFunc
->args()) {
806 if (!CSArg
.hasNonNullAttr(/* AllowUndefOrPoison */ false))
809 // If the non-null callsite argument operand is an argument to 'F'
810 // (the caller) and the call is guaranteed to execute, then the value
811 // must be non-null throughout 'F'.
812 auto *FArg
= dyn_cast
<Argument
>(CB
->getArgOperand(CSArg
.getArgNo()));
813 if (FArg
&& !FArg
->hasNonNullAttr()) {
814 FArg
->addAttr(Attribute::NonNull
);
820 if (!isGuaranteedToTransferExecutionToSuccessor(&I
))
827 static bool addAccessAttr(Argument
*A
, Attribute::AttrKind R
) {
828 assert((R
== Attribute::ReadOnly
|| R
== Attribute::ReadNone
||
829 R
== Attribute::WriteOnly
)
830 && "Must be an access attribute.");
831 assert(A
&& "Argument must not be null.");
833 // If the argument already has the attribute, nothing needs to be done.
834 if (A
->hasAttribute(R
))
837 // Otherwise, remove potentially conflicting attribute, add the new one,
838 // and update statistics.
839 A
->removeAttr(Attribute::WriteOnly
);
840 A
->removeAttr(Attribute::ReadOnly
);
841 A
->removeAttr(Attribute::ReadNone
);
843 if (R
== Attribute::ReadOnly
)
845 else if (R
== Attribute::WriteOnly
)
852 /// Deduce nocapture attributes for the SCC.
853 static void addArgumentAttrs(const SCCNodeSet
&SCCNodes
,
854 SmallSet
<Function
*, 8> &Changed
) {
857 // Check each function in turn, determining which pointer arguments are not
859 for (Function
*F
: SCCNodes
) {
860 // We can infer and propagate function attributes only when we know that the
861 // definition we'll get at link time is *exactly* the definition we see now.
862 // For more details, see GlobalValue::mayBeDerefined.
863 if (!F
->hasExactDefinition())
866 if (addArgumentAttrsFromCallsites(*F
))
869 // Functions that are readonly (or readnone) and nounwind and don't return
870 // a value can't capture arguments. Don't analyze them.
871 if (F
->onlyReadsMemory() && F
->doesNotThrow() &&
872 F
->getReturnType()->isVoidTy()) {
873 for (Argument
&A
: F
->args()) {
874 if (A
.getType()->isPointerTy() && !A
.hasNoCaptureAttr()) {
875 A
.addAttr(Attribute::NoCapture
);
883 for (Argument
&A
: F
->args()) {
884 if (!A
.getType()->isPointerTy())
886 bool HasNonLocalUses
= false;
887 if (!A
.hasNoCaptureAttr()) {
888 ArgumentUsesTracker
Tracker(SCCNodes
);
889 PointerMayBeCaptured(&A
, &Tracker
);
890 if (!Tracker
.Captured
) {
891 if (Tracker
.Uses
.empty()) {
892 // If it's trivially not captured, mark it nocapture now.
893 A
.addAttr(Attribute::NoCapture
);
897 // If it's not trivially captured and not trivially not captured,
898 // then it must be calling into another function in our SCC. Save
899 // its particulars for Argument-SCC analysis later.
900 ArgumentGraphNode
*Node
= AG
[&A
];
901 for (Argument
*Use
: Tracker
.Uses
) {
902 Node
->Uses
.push_back(AG
[Use
]);
904 HasNonLocalUses
= true;
908 // Otherwise, it's captured. Don't bother doing SCC analysis on it.
910 if (!HasNonLocalUses
&& !A
.onlyReadsMemory()) {
911 // Can we determine that it's readonly/readnone/writeonly without doing
912 // an SCC? Note that we don't allow any calls at all here, or else our
913 // result will be dependent on the iteration order through the
914 // functions in the SCC.
915 SmallPtrSet
<Argument
*, 8> Self
;
917 Attribute::AttrKind R
= determinePointerAccessAttrs(&A
, Self
);
918 if (R
!= Attribute::None
)
919 if (addAccessAttr(&A
, R
))
925 // The graph we've collected is partial because we stopped scanning for
926 // argument uses once we solved the argument trivially. These partial nodes
927 // show up as ArgumentGraphNode objects with an empty Uses list, and for
928 // these nodes the final decision about whether they capture has already been
929 // made. If the definition doesn't have a 'nocapture' attribute by now, it
932 for (scc_iterator
<ArgumentGraph
*> I
= scc_begin(&AG
); !I
.isAtEnd(); ++I
) {
933 const std::vector
<ArgumentGraphNode
*> &ArgumentSCC
= *I
;
934 if (ArgumentSCC
.size() == 1) {
935 if (!ArgumentSCC
[0]->Definition
)
936 continue; // synthetic root node
938 // eg. "void f(int* x) { if (...) f(x); }"
939 if (ArgumentSCC
[0]->Uses
.size() == 1 &&
940 ArgumentSCC
[0]->Uses
[0] == ArgumentSCC
[0]) {
941 Argument
*A
= ArgumentSCC
[0]->Definition
;
942 A
->addAttr(Attribute::NoCapture
);
944 Changed
.insert(A
->getParent());
946 // Infer the access attributes given the new nocapture one
947 SmallPtrSet
<Argument
*, 8> Self
;
949 Attribute::AttrKind R
= determinePointerAccessAttrs(&*A
, Self
);
950 if (R
!= Attribute::None
)
956 bool SCCCaptured
= false;
957 for (ArgumentGraphNode
*Node
: ArgumentSCC
) {
958 if (Node
->Uses
.empty() && !Node
->Definition
->hasNoCaptureAttr()) {
966 SmallPtrSet
<Argument
*, 8> ArgumentSCCNodes
;
967 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
968 // quickly looking up whether a given Argument is in this ArgumentSCC.
969 for (ArgumentGraphNode
*I
: ArgumentSCC
) {
970 ArgumentSCCNodes
.insert(I
->Definition
);
973 for (ArgumentGraphNode
*N
: ArgumentSCC
) {
974 for (ArgumentGraphNode
*Use
: N
->Uses
) {
975 Argument
*A
= Use
->Definition
;
976 if (A
->hasNoCaptureAttr() || ArgumentSCCNodes
.count(A
))
987 for (ArgumentGraphNode
*N
: ArgumentSCC
) {
988 Argument
*A
= N
->Definition
;
989 A
->addAttr(Attribute::NoCapture
);
991 Changed
.insert(A
->getParent());
994 // We also want to compute readonly/readnone/writeonly. With a small number
995 // of false negatives, we can assume that any pointer which is captured
996 // isn't going to be provably readonly or readnone, since by definition
997 // we can't analyze all uses of a captured pointer.
999 // The false negatives happen when the pointer is captured by a function
1000 // that promises readonly/readnone behaviour on the pointer, then the
1001 // pointer's lifetime ends before anything that writes to arbitrary memory.
1002 // Also, a readonly/readnone pointer may be returned, but returning a
1003 // pointer is capturing it.
1005 auto meetAccessAttr
= [](Attribute::AttrKind A
, Attribute::AttrKind B
) {
1008 if (A
== Attribute::ReadNone
)
1010 if (B
== Attribute::ReadNone
)
1012 return Attribute::None
;
1015 Attribute::AttrKind AccessAttr
= Attribute::ReadNone
;
1016 for (ArgumentGraphNode
*N
: ArgumentSCC
) {
1017 Argument
*A
= N
->Definition
;
1018 Attribute::AttrKind K
= determinePointerAccessAttrs(A
, ArgumentSCCNodes
);
1019 AccessAttr
= meetAccessAttr(AccessAttr
, K
);
1020 if (AccessAttr
== Attribute::None
)
1024 if (AccessAttr
!= Attribute::None
) {
1025 for (ArgumentGraphNode
*N
: ArgumentSCC
) {
1026 Argument
*A
= N
->Definition
;
1027 if (addAccessAttr(A
, AccessAttr
))
1028 Changed
.insert(A
->getParent());
1034 /// Tests whether a function is "malloc-like".
1036 /// A function is "malloc-like" if it returns either null or a pointer that
1037 /// doesn't alias any other pointer visible to the caller.
1038 static bool isFunctionMallocLike(Function
*F
, const SCCNodeSet
&SCCNodes
) {
1039 SmallSetVector
<Value
*, 8> FlowsToReturn
;
1040 for (BasicBlock
&BB
: *F
)
1041 if (ReturnInst
*Ret
= dyn_cast
<ReturnInst
>(BB
.getTerminator()))
1042 FlowsToReturn
.insert(Ret
->getReturnValue());
1044 for (unsigned i
= 0; i
!= FlowsToReturn
.size(); ++i
) {
1045 Value
*RetVal
= FlowsToReturn
[i
];
1047 if (Constant
*C
= dyn_cast
<Constant
>(RetVal
)) {
1048 if (!C
->isNullValue() && !isa
<UndefValue
>(C
))
1054 if (isa
<Argument
>(RetVal
))
1057 if (Instruction
*RVI
= dyn_cast
<Instruction
>(RetVal
))
1058 switch (RVI
->getOpcode()) {
1059 // Extend the analysis by looking upwards.
1060 case Instruction::BitCast
:
1061 case Instruction::GetElementPtr
:
1062 case Instruction::AddrSpaceCast
:
1063 FlowsToReturn
.insert(RVI
->getOperand(0));
1065 case Instruction::Select
: {
1066 SelectInst
*SI
= cast
<SelectInst
>(RVI
);
1067 FlowsToReturn
.insert(SI
->getTrueValue());
1068 FlowsToReturn
.insert(SI
->getFalseValue());
1071 case Instruction::PHI
: {
1072 PHINode
*PN
= cast
<PHINode
>(RVI
);
1073 for (Value
*IncValue
: PN
->incoming_values())
1074 FlowsToReturn
.insert(IncValue
);
1078 // Check whether the pointer came from an allocation.
1079 case Instruction::Alloca
:
1081 case Instruction::Call
:
1082 case Instruction::Invoke
: {
1083 CallBase
&CB
= cast
<CallBase
>(*RVI
);
1084 if (CB
.hasRetAttr(Attribute::NoAlias
))
1086 if (CB
.getCalledFunction() && SCCNodes
.count(CB
.getCalledFunction()))
1091 return false; // Did not come from an allocation.
1094 if (PointerMayBeCaptured(RetVal
, false, /*StoreCaptures=*/false))
1101 /// Deduce noalias attributes for the SCC.
1102 static void addNoAliasAttrs(const SCCNodeSet
&SCCNodes
,
1103 SmallSet
<Function
*, 8> &Changed
) {
1104 // Check each function in turn, determining which functions return noalias
1106 for (Function
*F
: SCCNodes
) {
1108 if (F
->returnDoesNotAlias())
1111 // We can infer and propagate function attributes only when we know that the
1112 // definition we'll get at link time is *exactly* the definition we see now.
1113 // For more details, see GlobalValue::mayBeDerefined.
1114 if (!F
->hasExactDefinition())
1117 // We annotate noalias return values, which are only applicable to
1119 if (!F
->getReturnType()->isPointerTy())
1122 if (!isFunctionMallocLike(F
, SCCNodes
))
1126 for (Function
*F
: SCCNodes
) {
1127 if (F
->returnDoesNotAlias() ||
1128 !F
->getReturnType()->isPointerTy())
1131 F
->setReturnDoesNotAlias();
1137 /// Tests whether this function is known to not return null.
1139 /// Requires that the function returns a pointer.
1141 /// Returns true if it believes the function will not return a null, and sets
1142 /// \p Speculative based on whether the returned conclusion is a speculative
1143 /// conclusion due to SCC calls.
1144 static bool isReturnNonNull(Function
*F
, const SCCNodeSet
&SCCNodes
,
1145 bool &Speculative
) {
1146 assert(F
->getReturnType()->isPointerTy() &&
1147 "nonnull only meaningful on pointer types");
1148 Speculative
= false;
1150 SmallSetVector
<Value
*, 8> FlowsToReturn
;
1151 for (BasicBlock
&BB
: *F
)
1152 if (auto *Ret
= dyn_cast
<ReturnInst
>(BB
.getTerminator()))
1153 FlowsToReturn
.insert(Ret
->getReturnValue());
1155 auto &DL
= F
->getParent()->getDataLayout();
1157 for (unsigned i
= 0; i
!= FlowsToReturn
.size(); ++i
) {
1158 Value
*RetVal
= FlowsToReturn
[i
];
1160 // If this value is locally known to be non-null, we're good
1161 if (isKnownNonZero(RetVal
, DL
))
1164 // Otherwise, we need to look upwards since we can't make any local
1166 Instruction
*RVI
= dyn_cast
<Instruction
>(RetVal
);
1169 switch (RVI
->getOpcode()) {
1170 // Extend the analysis by looking upwards.
1171 case Instruction::BitCast
:
1172 case Instruction::GetElementPtr
:
1173 case Instruction::AddrSpaceCast
:
1174 FlowsToReturn
.insert(RVI
->getOperand(0));
1176 case Instruction::Select
: {
1177 SelectInst
*SI
= cast
<SelectInst
>(RVI
);
1178 FlowsToReturn
.insert(SI
->getTrueValue());
1179 FlowsToReturn
.insert(SI
->getFalseValue());
1182 case Instruction::PHI
: {
1183 PHINode
*PN
= cast
<PHINode
>(RVI
);
1184 for (int i
= 0, e
= PN
->getNumIncomingValues(); i
!= e
; ++i
)
1185 FlowsToReturn
.insert(PN
->getIncomingValue(i
));
1188 case Instruction::Call
:
1189 case Instruction::Invoke
: {
1190 CallBase
&CB
= cast
<CallBase
>(*RVI
);
1191 Function
*Callee
= CB
.getCalledFunction();
1192 // A call to a node within the SCC is assumed to return null until
1194 if (Callee
&& SCCNodes
.count(Callee
)) {
1201 return false; // Unknown source, may be null
1203 llvm_unreachable("should have either continued or returned");
1209 /// Deduce nonnull attributes for the SCC.
1210 static void addNonNullAttrs(const SCCNodeSet
&SCCNodes
,
1211 SmallSet
<Function
*, 8> &Changed
) {
1212 // Speculative that all functions in the SCC return only nonnull
1213 // pointers. We may refute this as we analyze functions.
1214 bool SCCReturnsNonNull
= true;
1216 // Check each function in turn, determining which functions return nonnull
1218 for (Function
*F
: SCCNodes
) {
1220 if (F
->getAttributes().hasRetAttr(Attribute::NonNull
))
1223 // We can infer and propagate function attributes only when we know that the
1224 // definition we'll get at link time is *exactly* the definition we see now.
1225 // For more details, see GlobalValue::mayBeDerefined.
1226 if (!F
->hasExactDefinition())
1229 // We annotate nonnull return values, which are only applicable to
1231 if (!F
->getReturnType()->isPointerTy())
1234 bool Speculative
= false;
1235 if (isReturnNonNull(F
, SCCNodes
, Speculative
)) {
1237 // Mark the function eagerly since we may discover a function
1238 // which prevents us from speculating about the entire SCC
1239 LLVM_DEBUG(dbgs() << "Eagerly marking " << F
->getName()
1240 << " as nonnull\n");
1241 F
->addRetAttr(Attribute::NonNull
);
1247 // At least one function returns something which could be null, can't
1248 // speculate any more.
1249 SCCReturnsNonNull
= false;
1252 if (SCCReturnsNonNull
) {
1253 for (Function
*F
: SCCNodes
) {
1254 if (F
->getAttributes().hasRetAttr(Attribute::NonNull
) ||
1255 !F
->getReturnType()->isPointerTy())
1258 LLVM_DEBUG(dbgs() << "SCC marking " << F
->getName() << " as nonnull\n");
1259 F
->addRetAttr(Attribute::NonNull
);
1268 /// Collects a set of attribute inference requests and performs them all in one
1269 /// go on a single SCC Node. Inference involves scanning function bodies
1270 /// looking for instructions that violate attribute assumptions.
1271 /// As soon as all the bodies are fine we are free to set the attribute.
1272 /// Customization of inference for individual attributes is performed by
1273 /// providing a handful of predicates for each attribute.
1274 class AttributeInferer
{
1276 /// Describes a request for inference of a single attribute.
1277 struct InferenceDescriptor
{
1279 /// Returns true if this function does not have to be handled.
1280 /// General intent for this predicate is to provide an optimization
1281 /// for functions that do not need this attribute inference at all
1282 /// (say, for functions that already have the attribute).
1283 std::function
<bool(const Function
&)> SkipFunction
;
1285 /// Returns true if this instruction violates attribute assumptions.
1286 std::function
<bool(Instruction
&)> InstrBreaksAttribute
;
1288 /// Sets the inferred attribute for this function.
1289 std::function
<void(Function
&)> SetAttribute
;
1291 /// Attribute we derive.
1292 Attribute::AttrKind AKind
;
1294 /// If true, only "exact" definitions can be used to infer this attribute.
1295 /// See GlobalValue::isDefinitionExact.
1296 bool RequiresExactDefinition
;
1298 InferenceDescriptor(Attribute::AttrKind AK
,
1299 std::function
<bool(const Function
&)> SkipFunc
,
1300 std::function
<bool(Instruction
&)> InstrScan
,
1301 std::function
<void(Function
&)> SetAttr
,
1303 : SkipFunction(SkipFunc
), InstrBreaksAttribute(InstrScan
),
1304 SetAttribute(SetAttr
), AKind(AK
),
1305 RequiresExactDefinition(ReqExactDef
) {}
1309 SmallVector
<InferenceDescriptor
, 4> InferenceDescriptors
;
1312 void registerAttrInference(InferenceDescriptor AttrInference
) {
1313 InferenceDescriptors
.push_back(AttrInference
);
1316 void run(const SCCNodeSet
&SCCNodes
, SmallSet
<Function
*, 8> &Changed
);
1319 /// Perform all the requested attribute inference actions according to the
1320 /// attribute predicates stored before.
1321 void AttributeInferer::run(const SCCNodeSet
&SCCNodes
,
1322 SmallSet
<Function
*, 8> &Changed
) {
1323 SmallVector
<InferenceDescriptor
, 4> InferInSCC
= InferenceDescriptors
;
1324 // Go through all the functions in SCC and check corresponding attribute
1325 // assumptions for each of them. Attributes that are invalid for this SCC
1326 // will be removed from InferInSCC.
1327 for (Function
*F
: SCCNodes
) {
1329 // No attributes whose assumptions are still valid - done.
1330 if (InferInSCC
.empty())
1333 // Check if our attributes ever need scanning/can be scanned.
1334 llvm::erase_if(InferInSCC
, [F
](const InferenceDescriptor
&ID
) {
1335 if (ID
.SkipFunction(*F
))
1338 // Remove from further inference (invalidate) when visiting a function
1339 // that has no instructions to scan/has an unsuitable definition.
1340 return F
->isDeclaration() ||
1341 (ID
.RequiresExactDefinition
&& !F
->hasExactDefinition());
1344 // For each attribute still in InferInSCC that doesn't explicitly skip F,
1345 // set up the F instructions scan to verify assumptions of the attribute.
1346 SmallVector
<InferenceDescriptor
, 4> InferInThisFunc
;
1348 InferInSCC
, std::back_inserter(InferInThisFunc
),
1349 [F
](const InferenceDescriptor
&ID
) { return !ID
.SkipFunction(*F
); });
1351 if (InferInThisFunc
.empty())
1354 // Start instruction scan.
1355 for (Instruction
&I
: instructions(*F
)) {
1356 llvm::erase_if(InferInThisFunc
, [&](const InferenceDescriptor
&ID
) {
1357 if (!ID
.InstrBreaksAttribute(I
))
1359 // Remove attribute from further inference on any other functions
1360 // because attribute assumptions have just been violated.
1361 llvm::erase_if(InferInSCC
, [&ID
](const InferenceDescriptor
&D
) {
1362 return D
.AKind
== ID
.AKind
;
1364 // Remove attribute from the rest of current instruction scan.
1368 if (InferInThisFunc
.empty())
1373 if (InferInSCC
.empty())
1376 for (Function
*F
: SCCNodes
)
1377 // At this point InferInSCC contains only functions that were either:
1378 // - explicitly skipped from scan/inference, or
1379 // - verified to have no instructions that break attribute assumptions.
1380 // Hence we just go and force the attribute for all non-skipped functions.
1381 for (auto &ID
: InferInSCC
) {
1382 if (ID
.SkipFunction(*F
))
1385 ID
.SetAttribute(*F
);
1389 struct SCCNodesResult
{
1390 SCCNodeSet SCCNodes
;
1391 bool HasUnknownCall
;
1394 } // end anonymous namespace
1396 /// Helper for non-Convergent inference predicate InstrBreaksAttribute.
1397 static bool InstrBreaksNonConvergent(Instruction
&I
,
1398 const SCCNodeSet
&SCCNodes
) {
1399 const CallBase
*CB
= dyn_cast
<CallBase
>(&I
);
1400 // Breaks non-convergent assumption if CS is a convergent call to a function
1402 return CB
&& CB
->isConvergent() &&
1403 !SCCNodes
.contains(CB
->getCalledFunction());
1406 /// Helper for NoUnwind inference predicate InstrBreaksAttribute.
1407 static bool InstrBreaksNonThrowing(Instruction
&I
, const SCCNodeSet
&SCCNodes
) {
1408 if (!I
.mayThrow(/* IncludePhaseOneUnwind */ true))
1410 if (const auto *CI
= dyn_cast
<CallInst
>(&I
)) {
1411 if (Function
*Callee
= CI
->getCalledFunction()) {
1412 // I is a may-throw call to a function inside our SCC. This doesn't
1413 // invalidate our current working assumption that the SCC is no-throw; we
1414 // just have to scan that other function.
1415 if (SCCNodes
.contains(Callee
))
1422 /// Helper for NoFree inference predicate InstrBreaksAttribute.
1423 static bool InstrBreaksNoFree(Instruction
&I
, const SCCNodeSet
&SCCNodes
) {
1424 CallBase
*CB
= dyn_cast
<CallBase
>(&I
);
1428 if (CB
->hasFnAttr(Attribute::NoFree
))
1431 // Speculatively assume in SCC.
1432 if (Function
*Callee
= CB
->getCalledFunction())
1433 if (SCCNodes
.contains(Callee
))
1439 // Return true if this is an atomic which has an ordering stronger than
1440 // unordered. Note that this is different than the predicate we use in
1441 // Attributor. Here we chose to be conservative and consider monotonic
1442 // operations potentially synchronizing. We generally don't do much with
1443 // monotonic operations, so this is simply risk reduction.
1444 static bool isOrderedAtomic(Instruction
*I
) {
1448 if (auto *FI
= dyn_cast
<FenceInst
>(I
))
1449 // All legal orderings for fence are stronger than monotonic.
1450 return FI
->getSyncScopeID() != SyncScope::SingleThread
;
1451 else if (isa
<AtomicCmpXchgInst
>(I
) || isa
<AtomicRMWInst
>(I
))
1453 else if (auto *SI
= dyn_cast
<StoreInst
>(I
))
1454 return !SI
->isUnordered();
1455 else if (auto *LI
= dyn_cast
<LoadInst
>(I
))
1456 return !LI
->isUnordered();
1458 llvm_unreachable("unknown atomic instruction?");
1462 static bool InstrBreaksNoSync(Instruction
&I
, const SCCNodeSet
&SCCNodes
) {
1463 // Volatile may synchronize
1467 // An ordered atomic may synchronize. (See comment about on monotonic.)
1468 if (isOrderedAtomic(&I
))
1471 auto *CB
= dyn_cast
<CallBase
>(&I
);
1473 // Non call site cases covered by the two checks above
1476 if (CB
->hasFnAttr(Attribute::NoSync
))
1479 // Non volatile memset/memcpy/memmoves are nosync
1480 // NOTE: Only intrinsics with volatile flags should be handled here. All
1481 // others should be marked in Intrinsics.td.
1482 if (auto *MI
= dyn_cast
<MemIntrinsic
>(&I
))
1483 if (!MI
->isVolatile())
1486 // Speculatively assume in SCC.
1487 if (Function
*Callee
= CB
->getCalledFunction())
1488 if (SCCNodes
.contains(Callee
))
1494 /// Attempt to remove convergent function attribute when possible.
1496 /// Returns true if any changes to function attributes were made.
1497 static void inferConvergent(const SCCNodeSet
&SCCNodes
,
1498 SmallSet
<Function
*, 8> &Changed
) {
1499 AttributeInferer AI
;
1501 // Request to remove the convergent attribute from all functions in the SCC
1502 // if every callsite within the SCC is not convergent (except for calls
1503 // to functions within the SCC).
1504 // Note: Removal of the attr from the callsites will happen in
1505 // InstCombineCalls separately.
1506 AI
.registerAttrInference(AttributeInferer::InferenceDescriptor
{
1507 Attribute::Convergent
,
1508 // Skip non-convergent functions.
1509 [](const Function
&F
) { return !F
.isConvergent(); },
1510 // Instructions that break non-convergent assumption.
1511 [SCCNodes
](Instruction
&I
) {
1512 return InstrBreaksNonConvergent(I
, SCCNodes
);
1515 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F
.getName()
1517 F
.setNotConvergent();
1519 /* RequiresExactDefinition= */ false});
1520 // Perform all the requested attribute inference actions.
1521 AI
.run(SCCNodes
, Changed
);
1524 /// Infer attributes from all functions in the SCC by scanning every
1525 /// instruction for compliance to the attribute assumptions.
1527 /// Returns true if any changes to function attributes were made.
1528 static void inferAttrsFromFunctionBodies(const SCCNodeSet
&SCCNodes
,
1529 SmallSet
<Function
*, 8> &Changed
) {
1530 AttributeInferer AI
;
1532 if (!DisableNoUnwindInference
)
1533 // Request to infer nounwind attribute for all the functions in the SCC if
1534 // every callsite within the SCC is not throwing (except for calls to
1535 // functions within the SCC). Note that nounwind attribute suffers from
1536 // derefinement - results may change depending on how functions are
1537 // optimized. Thus it can be inferred only from exact definitions.
1538 AI
.registerAttrInference(AttributeInferer::InferenceDescriptor
{
1539 Attribute::NoUnwind
,
1540 // Skip non-throwing functions.
1541 [](const Function
&F
) { return F
.doesNotThrow(); },
1542 // Instructions that break non-throwing assumption.
1543 [&SCCNodes
](Instruction
&I
) {
1544 return InstrBreaksNonThrowing(I
, SCCNodes
);
1548 << "Adding nounwind attr to fn " << F
.getName() << "\n");
1549 F
.setDoesNotThrow();
1552 /* RequiresExactDefinition= */ true});
1554 if (!DisableNoFreeInference
)
1555 // Request to infer nofree attribute for all the functions in the SCC if
1556 // every callsite within the SCC does not directly or indirectly free
1557 // memory (except for calls to functions within the SCC). Note that nofree
1558 // attribute suffers from derefinement - results may change depending on
1559 // how functions are optimized. Thus it can be inferred only from exact
1561 AI
.registerAttrInference(AttributeInferer::InferenceDescriptor
{
1563 // Skip functions known not to free memory.
1564 [](const Function
&F
) { return F
.doesNotFreeMemory(); },
1565 // Instructions that break non-deallocating assumption.
1566 [&SCCNodes
](Instruction
&I
) {
1567 return InstrBreaksNoFree(I
, SCCNodes
);
1571 << "Adding nofree attr to fn " << F
.getName() << "\n");
1572 F
.setDoesNotFreeMemory();
1575 /* RequiresExactDefinition= */ true});
1577 AI
.registerAttrInference(AttributeInferer::InferenceDescriptor
{
1579 // Skip already marked functions.
1580 [](const Function
&F
) { return F
.hasNoSync(); },
1581 // Instructions that break nosync assumption.
1582 [&SCCNodes
](Instruction
&I
) {
1583 return InstrBreaksNoSync(I
, SCCNodes
);
1587 << "Adding nosync attr to fn " << F
.getName() << "\n");
1591 /* RequiresExactDefinition= */ true});
1593 // Perform all the requested attribute inference actions.
1594 AI
.run(SCCNodes
, Changed
);
1597 static void addNoRecurseAttrs(const SCCNodeSet
&SCCNodes
,
1598 SmallSet
<Function
*, 8> &Changed
) {
1599 // Try and identify functions that do not recurse.
1601 // If the SCC contains multiple nodes we know for sure there is recursion.
1602 if (SCCNodes
.size() != 1)
1605 Function
*F
= *SCCNodes
.begin();
1606 if (!F
|| !F
->hasExactDefinition() || F
->doesNotRecurse())
1609 // If all of the calls in F are identifiable and are to norecurse functions, F
1610 // is norecurse. This check also detects self-recursion as F is not currently
1611 // marked norecurse, so any called from F to F will not be marked norecurse.
1613 for (auto &I
: BB
.instructionsWithoutDebug())
1614 if (auto *CB
= dyn_cast
<CallBase
>(&I
)) {
1615 Function
*Callee
= CB
->getCalledFunction();
1616 if (!Callee
|| Callee
== F
|| !Callee
->doesNotRecurse())
1617 // Function calls a potentially recursive function.
1621 // Every call was to a non-recursive function other than this function, and
1622 // we have no indirect recursion as the SCC size is one. This function cannot
1624 F
->setDoesNotRecurse();
1629 static bool instructionDoesNotReturn(Instruction
&I
) {
1630 if (auto *CB
= dyn_cast
<CallBase
>(&I
))
1631 return CB
->hasFnAttr(Attribute::NoReturn
);
1635 // A basic block can only return if it terminates with a ReturnInst and does not
1636 // contain calls to noreturn functions.
1637 static bool basicBlockCanReturn(BasicBlock
&BB
) {
1638 if (!isa
<ReturnInst
>(BB
.getTerminator()))
1640 return none_of(BB
, instructionDoesNotReturn
);
1643 // FIXME: this doesn't handle recursion.
1644 static bool canReturn(Function
&F
) {
1645 SmallVector
<BasicBlock
*, 16> Worklist
;
1646 SmallPtrSet
<BasicBlock
*, 16> Visited
;
1648 Visited
.insert(&F
.front());
1649 Worklist
.push_back(&F
.front());
1652 BasicBlock
*BB
= Worklist
.pop_back_val();
1653 if (basicBlockCanReturn(*BB
))
1655 for (BasicBlock
*Succ
: successors(BB
))
1656 if (Visited
.insert(Succ
).second
)
1657 Worklist
.push_back(Succ
);
1658 } while (!Worklist
.empty());
1663 // Set the noreturn function attribute if possible.
1664 static void addNoReturnAttrs(const SCCNodeSet
&SCCNodes
,
1665 SmallSet
<Function
*, 8> &Changed
) {
1666 for (Function
*F
: SCCNodes
) {
1667 if (!F
|| !F
->hasExactDefinition() || F
->hasFnAttribute(Attribute::Naked
) ||
1671 if (!canReturn(*F
)) {
1672 F
->setDoesNotReturn();
1678 static bool functionWillReturn(const Function
&F
) {
1679 // We can infer and propagate function attributes only when we know that the
1680 // definition we'll get at link time is *exactly* the definition we see now.
1681 // For more details, see GlobalValue::mayBeDerefined.
1682 if (!F
.hasExactDefinition())
1685 // Must-progress function without side-effects must return.
1686 if (F
.mustProgress() && F
.onlyReadsMemory())
1689 // Can only analyze functions with a definition.
1690 if (F
.isDeclaration())
1693 // Functions with loops require more sophisticated analysis, as the loop
1694 // may be infinite. For now, don't try to handle them.
1695 SmallVector
<std::pair
<const BasicBlock
*, const BasicBlock
*>> Backedges
;
1696 FindFunctionBackedges(F
, Backedges
);
1697 if (!Backedges
.empty())
1700 // If there are no loops, then the function is willreturn if all calls in
1701 // it are willreturn.
1702 return all_of(instructions(F
), [](const Instruction
&I
) {
1703 return I
.willReturn();
1707 // Set the willreturn function attribute if possible.
1708 static void addWillReturn(const SCCNodeSet
&SCCNodes
,
1709 SmallSet
<Function
*, 8> &Changed
) {
1710 for (Function
*F
: SCCNodes
) {
1711 if (!F
|| F
->willReturn() || !functionWillReturn(*F
))
1720 static SCCNodesResult
createSCCNodeSet(ArrayRef
<Function
*> Functions
) {
1722 Res
.HasUnknownCall
= false;
1723 for (Function
*F
: Functions
) {
1724 if (!F
|| F
->hasOptNone() || F
->hasFnAttribute(Attribute::Naked
) ||
1725 F
->isPresplitCoroutine()) {
1726 // Treat any function we're trying not to optimize as if it were an
1727 // indirect call and omit it from the node set used below.
1728 Res
.HasUnknownCall
= true;
1731 // Track whether any functions in this SCC have an unknown call edge.
1732 // Note: if this is ever a performance hit, we can common it with
1733 // subsequent routines which also do scans over the instructions of the
1735 if (!Res
.HasUnknownCall
) {
1736 for (Instruction
&I
: instructions(*F
)) {
1737 if (auto *CB
= dyn_cast
<CallBase
>(&I
)) {
1738 if (!CB
->getCalledFunction()) {
1739 Res
.HasUnknownCall
= true;
1745 Res
.SCCNodes
.insert(F
);
1750 template <typename AARGetterT
>
1751 static SmallSet
<Function
*, 8>
1752 deriveAttrsInPostOrder(ArrayRef
<Function
*> Functions
, AARGetterT
&&AARGetter
,
1753 bool ArgAttrsOnly
) {
1754 SCCNodesResult Nodes
= createSCCNodeSet(Functions
);
1756 // Bail if the SCC only contains optnone functions.
1757 if (Nodes
.SCCNodes
.empty())
1760 SmallSet
<Function
*, 8> Changed
;
1762 addArgumentAttrs(Nodes
.SCCNodes
, Changed
);
1766 addArgumentReturnedAttrs(Nodes
.SCCNodes
, Changed
);
1767 addMemoryAttrs(Nodes
.SCCNodes
, AARGetter
, Changed
);
1768 addArgumentAttrs(Nodes
.SCCNodes
, Changed
);
1769 inferConvergent(Nodes
.SCCNodes
, Changed
);
1770 addNoReturnAttrs(Nodes
.SCCNodes
, Changed
);
1771 addWillReturn(Nodes
.SCCNodes
, Changed
);
1773 // If we have no external nodes participating in the SCC, we can deduce some
1774 // more precise attributes as well.
1775 if (!Nodes
.HasUnknownCall
) {
1776 addNoAliasAttrs(Nodes
.SCCNodes
, Changed
);
1777 addNonNullAttrs(Nodes
.SCCNodes
, Changed
);
1778 inferAttrsFromFunctionBodies(Nodes
.SCCNodes
, Changed
);
1779 addNoRecurseAttrs(Nodes
.SCCNodes
, Changed
);
1782 // Finally, infer the maximal set of attributes from the ones we've inferred
1783 // above. This is handling the cases where one attribute on a signature
1784 // implies another, but for implementation reasons the inference rule for
1785 // the later is missing (or simply less sophisticated).
1786 for (Function
*F
: Nodes
.SCCNodes
)
1788 if (inferAttributesFromOthers(*F
))
1794 PreservedAnalyses
PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC
&C
,
1795 CGSCCAnalysisManager
&AM
,
1797 CGSCCUpdateResult
&) {
1798 // Skip non-recursive functions if requested.
1799 // Only infer argument attributes for non-recursive functions, because
1800 // it can affect optimization behavior in conjunction with noalias.
1801 bool ArgAttrsOnly
= false;
1802 if (C
.size() == 1 && SkipNonRecursive
) {
1803 LazyCallGraph::Node
&N
= *C
.begin();
1805 ArgAttrsOnly
= true;
1808 FunctionAnalysisManager
&FAM
=
1809 AM
.getResult
<FunctionAnalysisManagerCGSCCProxy
>(C
, CG
).getManager();
1811 // We pass a lambda into functions to wire them up to the analysis manager
1812 // for getting function analyses.
1813 auto AARGetter
= [&](Function
&F
) -> AAResults
& {
1814 return FAM
.getResult
<AAManager
>(F
);
1817 SmallVector
<Function
*, 8> Functions
;
1818 for (LazyCallGraph::Node
&N
: C
) {
1819 Functions
.push_back(&N
.getFunction());
1822 auto ChangedFunctions
=
1823 deriveAttrsInPostOrder(Functions
, AARGetter
, ArgAttrsOnly
);
1824 if (ChangedFunctions
.empty())
1825 return PreservedAnalyses::all();
1827 // Invalidate analyses for modified functions so that we don't have to
1828 // invalidate all analyses for all functions in this SCC.
1829 PreservedAnalyses FuncPA
;
1830 // We haven't changed the CFG for modified functions.
1831 FuncPA
.preserveSet
<CFGAnalyses
>();
1832 for (Function
*Changed
: ChangedFunctions
) {
1833 FAM
.invalidate(*Changed
, FuncPA
);
1834 // Also invalidate any direct callers of changed functions since analyses
1835 // may care about attributes of direct callees. For example, MemorySSA cares
1836 // about whether or not a call's callee modifies memory and queries that
1837 // through function attributes.
1838 for (auto *U
: Changed
->users()) {
1839 if (auto *Call
= dyn_cast
<CallBase
>(U
)) {
1840 if (Call
->getCalledFunction() == Changed
)
1841 FAM
.invalidate(*Call
->getFunction(), FuncPA
);
1846 PreservedAnalyses PA
;
1847 // We have not added or removed functions.
1848 PA
.preserve
<FunctionAnalysisManagerCGSCCProxy
>();
1849 // We already invalidated all relevant function analyses above.
1850 PA
.preserveSet
<AllAnalysesOn
<Function
>>();
1854 void PostOrderFunctionAttrsPass::printPipeline(
1855 raw_ostream
&OS
, function_ref
<StringRef(StringRef
)> MapClassName2PassName
) {
1856 static_cast<PassInfoMixin
<PostOrderFunctionAttrsPass
> *>(this)->printPipeline(
1857 OS
, MapClassName2PassName
);
1858 if (SkipNonRecursive
)
1859 OS
<< "<skip-non-recursive-function-attrs>";
1862 template <typename AARGetterT
>
1863 static bool runImpl(CallGraphSCC
&SCC
, AARGetterT AARGetter
) {
1864 SmallVector
<Function
*, 8> Functions
;
1865 for (CallGraphNode
*I
: SCC
) {
1866 Functions
.push_back(I
->getFunction());
1869 return !deriveAttrsInPostOrder(Functions
, AARGetter
).empty();
1872 static bool addNoRecurseAttrsTopDown(Function
&F
) {
1873 // We check the preconditions for the function prior to calling this to avoid
1874 // the cost of building up a reversible post-order list. We assert them here
1875 // to make sure none of the invariants this relies on were violated.
1876 assert(!F
.isDeclaration() && "Cannot deduce norecurse without a definition!");
1877 assert(!F
.doesNotRecurse() &&
1878 "This function has already been deduced as norecurs!");
1879 assert(F
.hasInternalLinkage() &&
1880 "Can only do top-down deduction for internal linkage functions!");
1882 // If F is internal and all of its uses are calls from a non-recursive
1883 // functions, then none of its calls could in fact recurse without going
1884 // through a function marked norecurse, and so we can mark this function too
1885 // as norecurse. Note that the uses must actually be calls -- otherwise
1886 // a pointer to this function could be returned from a norecurse function but
1887 // this function could be recursively (indirectly) called. Note that this
1888 // also detects if F is directly recursive as F is not yet marked as
1889 // a norecurse function.
1890 for (auto &U
: F
.uses()) {
1891 auto *I
= dyn_cast
<Instruction
>(U
.getUser());
1894 CallBase
*CB
= dyn_cast
<CallBase
>(I
);
1895 if (!CB
|| !CB
->isCallee(&U
) ||
1896 !CB
->getParent()->getParent()->doesNotRecurse())
1899 F
.setDoesNotRecurse();
1904 static bool deduceFunctionAttributeInRPO(Module
&M
, LazyCallGraph
&CG
) {
1905 // We only have a post-order SCC traversal (because SCCs are inherently
1906 // discovered in post-order), so we accumulate them in a vector and then walk
1907 // it in reverse. This is simpler than using the RPO iterator infrastructure
1908 // because we need to combine SCC detection and the PO walk of the call
1909 // graph. We can also cheat egregiously because we're primarily interested in
1910 // synthesizing norecurse and so we can only save the singular SCCs as SCCs
1911 // with multiple functions in them will clearly be recursive.
1913 SmallVector
<Function
*, 16> Worklist
;
1915 for (LazyCallGraph::RefSCC
&RC
: CG
.postorder_ref_sccs()) {
1916 for (LazyCallGraph::SCC
&SCC
: RC
) {
1917 if (SCC
.size() != 1)
1919 Function
&F
= SCC
.begin()->getFunction();
1920 if (!F
.isDeclaration() && !F
.doesNotRecurse() && F
.hasInternalLinkage())
1921 Worklist
.push_back(&F
);
1924 bool Changed
= false;
1925 for (auto *F
: llvm::reverse(Worklist
))
1926 Changed
|= addNoRecurseAttrsTopDown(*F
);
1932 ReversePostOrderFunctionAttrsPass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
1933 auto &CG
= AM
.getResult
<LazyCallGraphAnalysis
>(M
);
1935 if (!deduceFunctionAttributeInRPO(M
, CG
))
1936 return PreservedAnalyses::all();
1938 PreservedAnalyses PA
;
1939 PA
.preserve
<LazyCallGraphAnalysis
>();