[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / tools / llvm-profgen / CSPreInliner.cpp
blob1e642639902b7370016c676ba28857638556133f
1 //===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "CSPreInliner.h"
10 #include "ProfiledBinary.h"
11 #include "llvm/ADT/SCCIterator.h"
12 #include "llvm/ADT/Statistic.h"
13 #include <cstdint>
14 #include <queue>
16 #define DEBUG_TYPE "cs-preinliner"
18 using namespace llvm;
19 using namespace sampleprof;
21 STATISTIC(PreInlNumCSInlined,
22 "Number of functions inlined with context sensitive profile");
23 STATISTIC(PreInlNumCSNotInlined,
24 "Number of functions not inlined with context sensitive profile");
25 STATISTIC(PreInlNumCSInlinedHitMinLimit,
26 "Number of functions with FDO inline stopped due to min size limit");
27 STATISTIC(PreInlNumCSInlinedHitMaxLimit,
28 "Number of functions with FDO inline stopped due to max size limit");
29 STATISTIC(
30 PreInlNumCSInlinedHitGrowthLimit,
31 "Number of functions with FDO inline stopped due to growth size limit");
33 // The switches specify inline thresholds used in SampleProfileLoader inlining.
34 // TODO: the actual threshold to be tuned here because the size here is based
35 // on machine code not LLVM IR.
36 extern cl::opt<int> SampleHotCallSiteThreshold;
37 extern cl::opt<int> SampleColdCallSiteThreshold;
38 extern cl::opt<int> ProfileInlineGrowthLimit;
39 extern cl::opt<int> ProfileInlineLimitMin;
40 extern cl::opt<int> ProfileInlineLimitMax;
41 extern cl::opt<bool> SortProfiledSCC;
43 cl::opt<bool> EnableCSPreInliner(
44 "csspgo-preinliner", cl::Hidden, cl::init(true),
45 cl::desc("Run a global pre-inliner to merge context profile based on "
46 "estimated global top-down inline decisions"));
48 cl::opt<bool> UseContextCostForPreInliner(
49 "use-context-cost-for-preinliner", cl::Hidden, cl::init(true),
50 cl::desc("Use context-sensitive byte size cost for preinliner decisions"));
52 static cl::opt<bool> SamplePreInlineReplay(
53 "csspgo-replay-preinline", cl::Hidden, cl::init(false),
54 cl::desc(
55 "Replay previous inlining and adjust context profile accordingly"));
57 CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
58 uint64_t HotThreshold, uint64_t ColdThreshold)
59 : UseContextCost(UseContextCostForPreInliner),
60 // TODO: Pass in a guid-to-name map in order for
61 // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
62 // as their profile context.
63 ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary),
64 HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {
65 // Set default preinliner hot/cold call site threshold tuned with CSSPGO.
66 // for good performance with reasonable profile size.
67 if (!SampleHotCallSiteThreshold.getNumOccurrences())
68 SampleHotCallSiteThreshold = 1500;
69 if (!SampleColdCallSiteThreshold.getNumOccurrences())
70 SampleColdCallSiteThreshold = 0;
73 std::vector<StringRef> CSPreInliner::buildTopDownOrder() {
74 std::vector<StringRef> Order;
75 ProfiledCallGraph ProfiledCG(ContextTracker);
77 // Now that we have a profiled call graph, construct top-down order
78 // by building up SCC and reversing SCC order.
79 scc_iterator<ProfiledCallGraph *> I = scc_begin(&ProfiledCG);
80 while (!I.isAtEnd()) {
81 auto Range = *I;
82 if (SortProfiledSCC) {
83 // Sort nodes in one SCC based on callsite hotness.
84 scc_member_iterator<ProfiledCallGraph *> SI(*I);
85 Range = *SI;
87 for (auto *Node : Range) {
88 if (Node != ProfiledCG.getEntryNode())
89 Order.push_back(Node->Name);
91 ++I;
93 std::reverse(Order.begin(), Order.end());
95 return Order;
98 bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue,
99 const FunctionSamples *CallerSamples) {
100 assert(CallerSamples && "Expect non-null caller samples");
102 // Ideally we want to consider everything a function calls, but as far as
103 // context profile is concerned, only those frames that are children of
104 // current one in the trie is relavent. So we walk the trie instead of call
105 // targets from function profile.
106 ContextTrieNode *CallerNode =
107 ContextTracker.getContextFor(CallerSamples->getContext());
109 bool HasNewCandidate = false;
110 for (auto &Child : CallerNode->getAllChildContext()) {
111 ContextTrieNode *CalleeNode = &Child.second;
112 FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples();
113 if (!CalleeSamples)
114 continue;
116 // Call site count is more reliable, so we look up the corresponding call
117 // target profile in caller's context profile to retrieve call site count.
118 uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
119 uint64_t CallsiteCount = 0;
120 LineLocation Callsite = CalleeNode->getCallSiteLoc();
121 if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
122 SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
123 auto It = TargetCounts.find(CalleeSamples->getName());
124 if (It != TargetCounts.end())
125 CallsiteCount = It->second;
128 // TODO: call site and callee entry count should be mostly consistent, add
129 // check for that.
130 HasNewCandidate = true;
131 uint32_t CalleeSize = getFuncSize(*CalleeSamples);
132 CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount),
133 CalleeSize);
136 return HasNewCandidate;
139 uint32_t CSPreInliner::getFuncSize(const FunctionSamples &FSamples) {
140 if (UseContextCost) {
141 return Binary.getFuncSizeForContext(FSamples.getContext());
144 return FSamples.getBodySamples().size();
147 bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) {
148 // If replay inline is requested, simply follow the inline decision of the
149 // profiled binary.
150 if (SamplePreInlineReplay)
151 return Candidate.CalleeSamples->getContext().hasAttribute(
152 ContextWasInlined);
154 // Adjust threshold based on call site hotness, only do this for callsite
155 // prioritized inliner because otherwise cost-benefit check is done earlier.
156 unsigned int SampleThreshold = SampleColdCallSiteThreshold;
157 if (Candidate.CallsiteCount > HotCountThreshold)
158 SampleThreshold = SampleHotCallSiteThreshold;
160 // TODO: for small cold functions, we may inlined them and we need to keep
161 // context profile accordingly.
162 if (Candidate.CallsiteCount < ColdCountThreshold)
163 SampleThreshold = SampleColdCallSiteThreshold;
165 return (Candidate.SizeCost < SampleThreshold);
168 void CSPreInliner::processFunction(const StringRef Name) {
169 FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name);
170 if (!FSamples)
171 return;
173 unsigned FuncSize = getFuncSize(*FSamples);
174 unsigned FuncFinalSize = FuncSize;
175 unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit;
176 SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
177 SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
179 LLVM_DEBUG(dbgs() << "Process " << Name
180 << " for context-sensitive pre-inlining (pre-inline size: "
181 << FuncSize << ", size limit: " << SizeLimit << ")\n");
183 ProfiledCandidateQueue CQueue;
184 getInlineCandidates(CQueue, FSamples);
186 while (!CQueue.empty() && FuncFinalSize < SizeLimit) {
187 ProfiledInlineCandidate Candidate = CQueue.top();
188 CQueue.pop();
189 bool ShouldInline = false;
190 if ((ShouldInline = shouldInline(Candidate))) {
191 // We mark context as inlined as the corresponding context profile
192 // won't be merged into that function's base profile.
193 ++PreInlNumCSInlined;
194 ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples);
195 Candidate.CalleeSamples->getContext().setAttribute(
196 ContextShouldBeInlined);
197 FuncFinalSize += Candidate.SizeCost;
198 getInlineCandidates(CQueue, Candidate.CalleeSamples);
199 } else {
200 ++PreInlNumCSNotInlined;
202 LLVM_DEBUG(dbgs() << (ShouldInline ? " Inlined" : " Outlined")
203 << " context profile for: "
204 << Candidate.CalleeSamples->getContext().toString()
205 << " (callee size: " << Candidate.SizeCost
206 << ", call count:" << Candidate.CallsiteCount << ")\n");
209 if (!CQueue.empty()) {
210 if (SizeLimit == (unsigned)ProfileInlineLimitMax)
211 ++PreInlNumCSInlinedHitMaxLimit;
212 else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
213 ++PreInlNumCSInlinedHitMinLimit;
214 else
215 ++PreInlNumCSInlinedHitGrowthLimit;
218 LLVM_DEBUG({
219 if (!CQueue.empty())
220 dbgs() << " Inline candidates ignored due to size limit (inliner "
221 "original size: "
222 << FuncSize << ", inliner final size: " << FuncFinalSize
223 << ", size limit: " << SizeLimit << ")\n";
225 while (!CQueue.empty()) {
226 ProfiledInlineCandidate Candidate = CQueue.top();
227 CQueue.pop();
228 bool WasInlined =
229 Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
230 dbgs() << " " << Candidate.CalleeSamples->getContext().toString()
231 << " (candidate size:" << Candidate.SizeCost
232 << ", call count: " << Candidate.CallsiteCount << ", previously "
233 << (WasInlined ? "inlined)\n" : "not inlined)\n");
238 void CSPreInliner::run() {
239 #ifndef NDEBUG
240 auto printProfileNames = [](SampleProfileMap &Profiles, bool IsInput) {
241 dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles ("
242 << Profiles.size() << " total):\n";
243 for (auto &It : Profiles) {
244 const FunctionSamples &Samples = It.second;
245 dbgs() << " [" << Samples.getContext().toString() << "] "
246 << Samples.getTotalSamples() << ":" << Samples.getHeadSamples()
247 << "\n";
250 #endif
252 LLVM_DEBUG(printProfileNames(ProfileMap, true));
254 // Execute global pre-inliner to estimate a global top-down inline
255 // decision and merge profiles accordingly. This helps with profile
256 // merge for ThinLTO otherwise we won't be able to merge profiles back
257 // to base profile across module/thin-backend boundaries.
258 // It also helps better compress context profile to control profile
259 // size, as we now only need context profile for functions going to
260 // be inlined.
261 for (StringRef FuncName : buildTopDownOrder()) {
262 processFunction(FuncName);
265 // Not inlined context profiles are merged into its base, so we can
266 // trim out such profiles from the output.
267 std::vector<SampleContext> ProfilesToBeRemoved;
268 for (auto &It : ProfileMap) {
269 SampleContext &Context = It.second.getContext();
270 if (!Context.isBaseContext() && !Context.hasState(InlinedContext)) {
271 assert(Context.hasState(MergedContext) &&
272 "Not inlined context profile should be merged already");
273 ProfilesToBeRemoved.push_back(It.first);
277 for (auto &ContextName : ProfilesToBeRemoved) {
278 ProfileMap.erase(ContextName);
281 // Make sure ProfileMap's key is consistent with FunctionSamples' name.
282 SampleContextTrimmer(ProfileMap).canonicalizeContextProfiles();
284 LLVM_DEBUG(printProfileNames(ProfileMap, false));