[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / llvm / tools / llvm-xray / xray-account.cpp
blob111704665c0b7da497c567a9b2380aa4efe95c25
1 //===- xray-account.h - XRay Function Call Accounting ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements basic function call accounting from an XRay trace.
11 //===----------------------------------------------------------------------===//
13 #include <algorithm>
14 #include <cassert>
15 #include <numeric>
16 #include <system_error>
17 #include <utility>
19 #include "xray-account.h"
20 #include "xray-registry.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/XRay/InstrumentationMap.h"
24 #include "llvm/XRay/Trace.h"
26 using namespace llvm;
27 using namespace llvm::xray;
29 static cl::SubCommand Account("account", "Function call accounting");
30 static cl::opt<std::string> AccountInput(cl::Positional,
31 cl::desc("<xray log file>"),
32 cl::Required, cl::sub(Account));
33 static cl::opt<bool>
34 AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
35 cl::sub(Account), cl::init(false));
36 static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
37 cl::desc("Alias for -keep_going"));
38 static cl::opt<bool> AccountRecursiveCallsOnly(
39 "recursive-calls-only", cl::desc("Only count the calls that are recursive"),
40 cl::sub(Account), cl::init(false));
41 static cl::opt<bool> AccountDeduceSiblingCalls(
42 "deduce-sibling-calls",
43 cl::desc("Deduce sibling calls when unrolling function call stacks"),
44 cl::sub(Account), cl::init(false));
45 static cl::alias
46 AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),
47 cl::desc("Alias for -deduce_sibling_calls"));
48 static cl::opt<std::string>
49 AccountOutput("output", cl::value_desc("output file"), cl::init("-"),
50 cl::desc("output file; use '-' for stdout"),
51 cl::sub(Account));
52 static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput),
53 cl::desc("Alias for -output"));
54 enum class AccountOutputFormats { TEXT, CSV };
55 static cl::opt<AccountOutputFormats>
56 AccountOutputFormat("format", cl::desc("output format"),
57 cl::values(clEnumValN(AccountOutputFormats::TEXT,
58 "text", "report stats in text"),
59 clEnumValN(AccountOutputFormats::CSV, "csv",
60 "report stats in csv")),
61 cl::sub(Account));
62 static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"),
63 cl::aliasopt(AccountOutputFormat));
65 enum class SortField {
66 FUNCID,
67 COUNT,
68 MIN,
69 MED,
70 PCT90,
71 PCT99,
72 MAX,
73 SUM,
74 FUNC,
77 static cl::opt<SortField> AccountSortOutput(
78 "sort", cl::desc("sort output by this field"), cl::value_desc("field"),
79 cl::sub(Account), cl::init(SortField::FUNCID),
80 cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"),
81 clEnumValN(SortField::COUNT, "count", "funciton call counts"),
82 clEnumValN(SortField::MIN, "min", "minimum function durations"),
83 clEnumValN(SortField::MED, "med", "median function durations"),
84 clEnumValN(SortField::PCT90, "90p", "90th percentile durations"),
85 clEnumValN(SortField::PCT99, "99p", "99th percentile durations"),
86 clEnumValN(SortField::MAX, "max", "maximum function durations"),
87 clEnumValN(SortField::SUM, "sum", "sum of call durations"),
88 clEnumValN(SortField::FUNC, "func", "function names")));
89 static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput),
90 cl::desc("Alias for -sort"));
92 enum class SortDirection {
93 ASCENDING,
94 DESCENDING,
96 static cl::opt<SortDirection> AccountSortOrder(
97 "sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING),
98 cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"),
99 clEnumValN(SortDirection::DESCENDING, "dsc", "descending")),
100 cl::sub(Account));
101 static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder),
102 cl::desc("Alias for -sortorder"));
104 static cl::opt<int> AccountTop("top", cl::desc("only show the top N results"),
105 cl::value_desc("N"), cl::sub(Account),
106 cl::init(-1));
107 static cl::alias AccountTop2("p", cl::desc("Alias for -top"),
108 cl::aliasopt(AccountTop));
110 static cl::opt<std::string>
111 AccountInstrMap("instr_map",
112 cl::desc("binary with the instrumentation map, or "
113 "a separate instrumentation map"),
114 cl::value_desc("binary with xray_instr_map"),
115 cl::sub(Account), cl::init(""));
116 static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap),
117 cl::desc("Alias for -instr_map"));
119 namespace {
121 template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) {
122 if (MM.first == 0 || MM.second == 0)
123 MM = std::make_pair(std::forward<U>(V), std::forward<U>(V));
124 else
125 MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));
128 template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }
130 } // namespace
132 using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus;
133 RecursionStatus &RecursionStatus::operator++() {
134 auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
135 assert(Depth >= 0 && Depth < std::numeric_limits<decltype(Depth)>::max());
136 ++Depth;
137 Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // ++Storage
138 // Did this function just (maybe indirectly) call itself the first time?
139 if (!isRecursive() && Depth == 2) // Storage == 2 / Storage s> 1
140 Bitfield::set<RecursionStatus::IsRecursive>(Storage,
141 true); // Storage |= INT_MIN
142 return *this;
144 RecursionStatus &RecursionStatus::operator--() {
145 auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
146 assert(Depth > 0);
147 --Depth;
148 Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // --Storage
149 // Did we leave a function that previouly (maybe indirectly) called itself?
150 if (isRecursive() && Depth == 0) // Storage == INT_MIN
151 Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0
152 return *this;
154 bool RecursionStatus::isRecursive() const {
155 return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0
158 bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
159 setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);
160 setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);
162 if (CurrentMaxTSC == 0)
163 CurrentMaxTSC = Record.TSC;
165 if (Record.TSC < CurrentMaxTSC)
166 return false;
168 auto &ThreadStack = PerThreadFunctionStack[Record.TId];
169 if (RecursiveCallsOnly && !ThreadStack.RecursionDepth)
170 ThreadStack.RecursionDepth.emplace();
171 switch (Record.Type) {
172 case RecordTypes::CUSTOM_EVENT:
173 case RecordTypes::TYPED_EVENT:
174 // TODO: Support custom and typed event accounting in the future.
175 return true;
176 case RecordTypes::ENTER:
177 case RecordTypes::ENTER_ARG: {
178 ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC);
179 if (ThreadStack.RecursionDepth)
180 ++(*ThreadStack.RecursionDepth)[Record.FuncId];
181 break;
183 case RecordTypes::EXIT:
184 case RecordTypes::TAIL_EXIT: {
185 if (ThreadStack.Stack.empty())
186 return false;
188 if (ThreadStack.Stack.back().first == Record.FuncId) {
189 const auto &Top = ThreadStack.Stack.back();
190 if (!ThreadStack.RecursionDepth ||
191 (*ThreadStack.RecursionDepth)[Top.first].isRecursive())
192 recordLatency(Top.first, diff(Top.second, Record.TSC));
193 if (ThreadStack.RecursionDepth)
194 --(*ThreadStack.RecursionDepth)[Top.first];
195 ThreadStack.Stack.pop_back();
196 break;
199 if (!DeduceSiblingCalls)
200 return false;
202 // Look for the parent up the stack.
203 auto Parent =
204 std::find_if(ThreadStack.Stack.rbegin(), ThreadStack.Stack.rend(),
205 [&](const std::pair<const int32_t, uint64_t> &E) {
206 return E.first == Record.FuncId;
208 if (Parent == ThreadStack.Stack.rend())
209 return false;
211 // Account time for this apparently sibling call exit up the stack.
212 // Considering the following case:
214 // f()
215 // g()
216 // h()
218 // We might only ever see the following entries:
220 // -> f()
221 // -> g()
222 // -> h()
223 // <- h()
224 // <- f()
226 // Now we don't see the exit to g() because some older version of the XRay
227 // runtime wasn't instrumenting tail exits. If we don't deduce tail calls,
228 // we may potentially never account time for g() -- and this code would have
229 // already bailed out, because `<- f()` doesn't match the current "top" of
230 // stack where we're waiting for the exit to `g()` instead. This is not
231 // ideal and brittle -- so instead we provide a potentially inaccurate
232 // accounting of g() instead, computing it from the exit of f().
234 // While it might be better that we account the time between `-> g()` and
235 // `-> h()` as the proper accounting of time for g() here, this introduces
236 // complexity to do correctly (need to backtrack, etc.).
238 // FIXME: Potentially implement the more complex deduction algorithm?
239 auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end());
240 for (auto &E : R) {
241 if (!ThreadStack.RecursionDepth ||
242 (*ThreadStack.RecursionDepth)[E.first].isRecursive())
243 recordLatency(E.first, diff(E.second, Record.TSC));
245 for (auto &Top : reverse(R)) {
246 if (ThreadStack.RecursionDepth)
247 --(*ThreadStack.RecursionDepth)[Top.first];
248 ThreadStack.Stack.pop_back();
250 break;
254 return true;
257 namespace {
259 // We consolidate the data into a struct which we can output in various forms.
260 struct ResultRow {
261 uint64_t Count;
262 double Min;
263 double Median;
264 double Pct90;
265 double Pct99;
266 double Max;
267 double Sum;
268 std::string DebugInfo;
269 std::string Function;
272 ResultRow getStats(MutableArrayRef<uint64_t> Timings) {
273 assert(!Timings.empty());
274 ResultRow R;
275 R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0);
276 auto MinMax = std::minmax_element(Timings.begin(), Timings.end());
277 R.Min = *MinMax.first;
278 R.Max = *MinMax.second;
279 R.Count = Timings.size();
281 auto MedianOff = Timings.size() / 2;
282 std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end());
283 R.Median = Timings[MedianOff];
285 auto Pct90Off = std::floor(Timings.size() * 0.9);
286 std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct90Off,
287 Timings.end());
288 R.Pct90 = Timings[Pct90Off];
290 auto Pct99Off = std::floor(Timings.size() * 0.99);
291 std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct99Off,
292 Timings.end());
293 R.Pct99 = Timings[Pct99Off];
294 return R;
297 } // namespace
299 using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
301 template <typename F>
302 static void sortByKey(std::vector<TupleType> &Results, F Fn) {
303 bool ASC = AccountSortOrder == SortDirection::ASCENDING;
304 llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {
305 return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);
309 template <class F>
310 void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
311 std::vector<TupleType> Results;
312 Results.reserve(FunctionLatencies.size());
313 for (auto FT : FunctionLatencies) {
314 const auto &FuncId = FT.first;
315 auto &Timings = FT.second;
316 Results.emplace_back(FuncId, Timings.size(), getStats(Timings));
317 auto &Row = std::get<2>(Results.back());
318 if (Header.CycleFrequency) {
319 double CycleFrequency = Header.CycleFrequency;
320 Row.Min /= CycleFrequency;
321 Row.Median /= CycleFrequency;
322 Row.Pct90 /= CycleFrequency;
323 Row.Pct99 /= CycleFrequency;
324 Row.Max /= CycleFrequency;
325 Row.Sum /= CycleFrequency;
328 Row.Function = FuncIdHelper.SymbolOrNumber(FuncId);
329 Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId);
332 // Sort the data according to user-provided flags.
333 switch (AccountSortOutput) {
334 case SortField::FUNCID:
335 sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });
336 break;
337 case SortField::COUNT:
338 sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });
339 break;
340 case SortField::MIN:
341 sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });
342 break;
343 case SortField::MED:
344 sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });
345 break;
346 case SortField::PCT90:
347 sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });
348 break;
349 case SortField::PCT99:
350 sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });
351 break;
352 case SortField::MAX:
353 sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });
354 break;
355 case SortField::SUM:
356 sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });
357 break;
358 case SortField::FUNC:
359 llvm_unreachable("Not implemented");
362 if (AccountTop > 0) {
363 auto MaxTop =
364 std::min(AccountTop.getValue(), static_cast<int>(Results.size()));
365 Results.erase(Results.begin() + MaxTop, Results.end());
368 for (const auto &R : Results)
369 Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R));
372 void LatencyAccountant::exportStatsAsText(raw_ostream &OS,
373 const XRayFileHeader &Header) const {
374 OS << "Functions with latencies: " << FunctionLatencies.size() << "\n";
376 // We spend some effort to make the text output more readable, so we do the
377 // following formatting decisions for each of the fields:
379 // - funcid: 32-bit, but we can determine the largest number and be
380 // between
381 // a minimum of 5 characters, up to 9 characters, right aligned.
382 // - count: 64-bit, but we can determine the largest number and be
383 // between
384 // a minimum of 5 characters, up to 9 characters, right aligned.
385 // - min, median, 90pct, 99pct, max: double precision, but we want to keep
386 // the values in seconds, with microsecond precision (0.000'001), so we
387 // have at most 6 significant digits, with the whole number part to be
388 // at
389 // least 1 character. For readability we'll right-align, with full 9
390 // characters each.
391 // - debug info, function name: we format this as a concatenation of the
392 // debug info and the function name.
394 static constexpr char StatsHeaderFormat[] =
395 "{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}";
396 static constexpr char StatsFormat[] =
397 R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})";
398 OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p",
399 "99p", "max", "sum")
400 << llvm::formatv(" {0,-12}\n", "function");
401 exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
402 OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median,
403 Row.Pct90, Row.Pct99, Row.Max, Row.Sum)
404 << " " << Row.DebugInfo << ": " << Row.Function << "\n";
408 void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS,
409 const XRayFileHeader &Header) const {
410 OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n";
411 exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
412 OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ','
413 << Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum
414 << ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n";
418 using namespace llvm::xray;
420 namespace llvm {
421 template <> struct format_provider<llvm::xray::RecordTypes> {
422 static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,
423 StringRef Style) {
424 switch (T) {
425 case RecordTypes::ENTER:
426 Stream << "enter";
427 break;
428 case RecordTypes::ENTER_ARG:
429 Stream << "enter-arg";
430 break;
431 case RecordTypes::EXIT:
432 Stream << "exit";
433 break;
434 case RecordTypes::TAIL_EXIT:
435 Stream << "tail-exit";
436 break;
437 case RecordTypes::CUSTOM_EVENT:
438 Stream << "custom-event";
439 break;
440 case RecordTypes::TYPED_EVENT:
441 Stream << "typed-event";
442 break;
446 } // namespace llvm
448 static CommandRegistration Unused(&Account, []() -> Error {
449 InstrumentationMap Map;
450 if (!AccountInstrMap.empty()) {
451 auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap);
452 if (!InstrumentationMapOrError)
453 return joinErrors(make_error<StringError>(
454 Twine("Cannot open instrumentation map '") +
455 AccountInstrMap + "'",
456 std::make_error_code(std::errc::invalid_argument)),
457 InstrumentationMapOrError.takeError());
458 Map = std::move(*InstrumentationMapOrError);
461 std::error_code EC;
462 raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_TextWithCRLF);
463 if (EC)
464 return make_error<StringError>(
465 Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
467 const auto &FunctionAddresses = Map.getFunctionAddresses();
468 symbolize::LLVMSymbolizer Symbolizer;
469 llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
470 FunctionAddresses);
471 xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly,
472 AccountDeduceSiblingCalls);
473 auto TraceOrErr = loadTraceFile(AccountInput);
474 if (!TraceOrErr)
475 return joinErrors(
476 make_error<StringError>(
477 Twine("Failed loading input file '") + AccountInput + "'",
478 std::make_error_code(std::errc::executable_format_error)),
479 TraceOrErr.takeError());
481 auto &T = *TraceOrErr;
482 for (const auto &Record : T) {
483 if (FCA.accountRecord(Record))
484 continue;
485 errs()
486 << "Error processing record: "
487 << llvm::formatv(
488 R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",
489 Record.RecordType, Record.CPU, Record.Type, Record.FuncId,
490 Record.TSC, Record.TId, Record.PId)
491 << '\n';
492 for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {
493 errs() << "Thread ID: " << ThreadStack.first << "\n";
494 if (ThreadStack.second.Stack.empty()) {
495 errs() << " (empty stack)\n";
496 continue;
498 auto Level = ThreadStack.second.Stack.size();
499 for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack))
500 errs() << " #" << Level-- << "\t"
501 << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';
503 if (!AccountKeepGoing)
504 return make_error<StringError>(
505 Twine("Failed accounting function calls in file '") + AccountInput +
506 "'.",
507 std::make_error_code(std::errc::executable_format_error));
509 switch (AccountOutputFormat) {
510 case AccountOutputFormats::TEXT:
511 FCA.exportStatsAsText(OS, T.getFileHeader());
512 break;
513 case AccountOutputFormats::CSV:
514 FCA.exportStatsAsCSV(OS, T.getFileHeader());
515 break;
518 return Error::success();