1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Defines the XRay Profile class representing the latency profile generated by
10 // XRay's profiling mode.
12 //===----------------------------------------------------------------------===//
13 #include "llvm/XRay/Profile.h"
15 #include "llvm/Support/DataExtractor.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/XRay/Trace.h"
25 Profile::Profile(const Profile
&O
) {
26 // We need to re-create all the tries from the original (O), into the current
27 // Profile being initialized, through the Block instances we see.
28 for (const auto &Block
: O
) {
29 Blocks
.push_back({Block
.Thread
, {}});
30 auto &B
= Blocks
.back();
31 for (const auto &PathData
: Block
.PathData
)
32 B
.PathData
.push_back({internPath(cantFail(O
.expandPath(PathData
.first
))),
37 Profile
&Profile::operator=(const Profile
&O
) {
51 static Expected
<BlockHeader
> readBlockHeader(DataExtractor
&Extractor
,
54 uint64_t CurrentOffset
= Offset
;
55 H
.Size
= Extractor
.getU32(&Offset
);
56 if (Offset
== CurrentOffset
)
57 return make_error
<StringError
>(
58 Twine("Error parsing block header size at offset '") +
59 Twine(CurrentOffset
) + "'",
60 std::make_error_code(std::errc::invalid_argument
));
61 CurrentOffset
= Offset
;
62 H
.Number
= Extractor
.getU32(&Offset
);
63 if (Offset
== CurrentOffset
)
64 return make_error
<StringError
>(
65 Twine("Error parsing block header number at offset '") +
66 Twine(CurrentOffset
) + "'",
67 std::make_error_code(std::errc::invalid_argument
));
68 CurrentOffset
= Offset
;
69 H
.Thread
= Extractor
.getU64(&Offset
);
70 if (Offset
== CurrentOffset
)
71 return make_error
<StringError
>(
72 Twine("Error parsing block header thread id at offset '") +
73 Twine(CurrentOffset
) + "'",
74 std::make_error_code(std::errc::invalid_argument
));
78 static Expected
<std::vector
<Profile::FuncID
>> readPath(DataExtractor
&Extractor
,
80 // We're reading a sequence of int32_t's until we find a 0.
81 std::vector
<Profile::FuncID
> Path
;
82 auto CurrentOffset
= Offset
;
85 FuncId
= Extractor
.getSigned(&Offset
, 4);
86 if (CurrentOffset
== Offset
)
87 return make_error
<StringError
>(
88 Twine("Error parsing path at offset '") + Twine(CurrentOffset
) + "'",
89 std::make_error_code(std::errc::invalid_argument
));
90 CurrentOffset
= Offset
;
91 Path
.push_back(FuncId
);
92 } while (FuncId
!= 0);
93 return std::move(Path
);
96 static Expected
<Profile::Data
> readData(DataExtractor
&Extractor
,
98 // We expect a certain number of elements for Data:
99 // - A 64-bit CallCount
100 // - A 64-bit CumulativeLocalTime counter
102 auto CurrentOffset
= Offset
;
103 D
.CallCount
= Extractor
.getU64(&Offset
);
104 if (CurrentOffset
== Offset
)
105 return make_error
<StringError
>(
106 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset
) +
108 std::make_error_code(std::errc::invalid_argument
));
109 CurrentOffset
= Offset
;
110 D
.CumulativeLocalTime
= Extractor
.getU64(&Offset
);
111 if (CurrentOffset
== Offset
)
112 return make_error
<StringError
>(
113 Twine("Error parsing cumulative local time at offset '") +
114 Twine(CurrentOffset
) + "'",
115 std::make_error_code(std::errc::invalid_argument
));
121 Error
Profile::addBlock(Block
&&B
) {
122 if (B
.PathData
.empty())
123 return make_error
<StringError
>(
124 "Block may not have empty path data.",
125 std::make_error_code(std::errc::invalid_argument
));
127 Blocks
.emplace_back(std::move(B
));
128 return Error::success();
131 Expected
<std::vector
<Profile::FuncID
>> Profile::expandPath(PathID P
) const {
132 auto It
= PathIDMap
.find(P
);
133 if (It
== PathIDMap
.end())
134 return make_error
<StringError
>(
135 Twine("PathID not found: ") + Twine(P
),
136 std::make_error_code(std::errc::invalid_argument
));
137 std::vector
<Profile::FuncID
> Path
;
138 for (auto Node
= It
->second
; Node
; Node
= Node
->Caller
)
139 Path
.push_back(Node
->Func
);
140 return std::move(Path
);
143 Profile::PathID
Profile::internPath(ArrayRef
<FuncID
> P
) {
147 auto RootToLeafPath
= reverse(P
);
150 auto It
= RootToLeafPath
.begin();
151 auto PathRoot
= *It
++;
153 find_if(Roots
, [PathRoot
](TrieNode
*N
) { return N
->Func
== PathRoot
; });
155 // If we've not seen this root before, remember it.
156 TrieNode
*Node
= nullptr;
157 if (RootIt
== Roots
.end()) {
158 NodeStorage
.emplace_back();
159 Node
= &NodeStorage
.back();
160 Node
->Func
= PathRoot
;
161 Roots
.push_back(Node
);
166 // Now traverse the path, re-creating if necessary.
167 while (It
!= RootToLeafPath
.end()) {
168 auto NodeFuncID
= *It
++;
169 auto CalleeIt
= find_if(Node
->Callees
, [NodeFuncID
](TrieNode
*N
) {
170 return N
->Func
== NodeFuncID
;
172 if (CalleeIt
== Node
->Callees
.end()) {
173 NodeStorage
.emplace_back();
174 auto NewNode
= &NodeStorage
.back();
175 NewNode
->Func
= NodeFuncID
;
176 NewNode
->Caller
= Node
;
177 Node
->Callees
.push_back(NewNode
);
184 // At this point, Node *must* be pointing at the leaf.
185 assert(Node
->Func
== P
.front());
188 PathIDMap
.insert({Node
->ID
, Node
});
193 Profile
mergeProfilesByThread(const Profile
&L
, const Profile
&R
) {
195 using PathDataMap
= DenseMap
<Profile::PathID
, Profile::Data
>;
196 using PathDataMapPtr
= std::unique_ptr
<PathDataMap
>;
197 using PathDataVector
= decltype(Profile::Block::PathData
);
198 using ThreadProfileIndexMap
= DenseMap
<Profile::ThreadID
, PathDataMapPtr
>;
199 ThreadProfileIndexMap ThreadProfileIndex
;
201 for (const auto &P
: {std::ref(L
), std::ref(R
)})
202 for (const auto &Block
: P
.get()) {
203 ThreadProfileIndexMap::iterator It
;
204 std::tie(It
, std::ignore
) = ThreadProfileIndex
.insert(
205 {Block
.Thread
, PathDataMapPtr
{new PathDataMap()}});
206 for (const auto &PathAndData
: Block
.PathData
) {
207 auto &PathID
= PathAndData
.first
;
208 auto &Data
= PathAndData
.second
;
210 Merged
.internPath(cantFail(P
.get().expandPath(PathID
)));
211 PathDataMap::iterator PathDataIt
;
213 std::tie(PathDataIt
, Inserted
) = It
->second
->insert({NewPathID
, Data
});
215 auto &ExistingData
= PathDataIt
->second
;
216 ExistingData
.CallCount
+= Data
.CallCount
;
217 ExistingData
.CumulativeLocalTime
+= Data
.CumulativeLocalTime
;
222 for (const auto &IndexedThreadBlock
: ThreadProfileIndex
) {
223 PathDataVector PathAndData
;
224 PathAndData
.reserve(IndexedThreadBlock
.second
->size());
225 copy(*IndexedThreadBlock
.second
, std::back_inserter(PathAndData
));
227 Merged
.addBlock({IndexedThreadBlock
.first
, std::move(PathAndData
)}));
232 Profile
mergeProfilesByStack(const Profile
&L
, const Profile
&R
) {
234 using PathDataMap
= DenseMap
<Profile::PathID
, Profile::Data
>;
235 PathDataMap PathData
;
236 using PathDataVector
= decltype(Profile::Block::PathData
);
237 for (const auto &P
: {std::ref(L
), std::ref(R
)})
238 for (const auto &Block
: P
.get())
239 for (const auto &PathAndData
: Block
.PathData
) {
240 auto &PathId
= PathAndData
.first
;
241 auto &Data
= PathAndData
.second
;
243 Merged
.internPath(cantFail(P
.get().expandPath(PathId
)));
244 PathDataMap::iterator PathDataIt
;
246 std::tie(PathDataIt
, Inserted
) = PathData
.insert({NewPathID
, Data
});
248 auto &ExistingData
= PathDataIt
->second
;
249 ExistingData
.CallCount
+= Data
.CallCount
;
250 ExistingData
.CumulativeLocalTime
+= Data
.CumulativeLocalTime
;
254 // In the end there's a single Block, for thread 0.
255 PathDataVector Block
;
256 Block
.reserve(PathData
.size());
257 copy(PathData
, std::back_inserter(Block
));
258 cantFail(Merged
.addBlock({0, std::move(Block
)}));
262 Expected
<Profile
> loadProfile(StringRef Filename
) {
263 Expected
<sys::fs::file_t
> FdOrErr
= sys::fs::openNativeFileForRead(Filename
);
265 return FdOrErr
.takeError();
268 if (auto EC
= sys::fs::file_size(Filename
, FileSize
))
269 return make_error
<StringError
>(
270 Twine("Cannot get filesize of '") + Filename
+ "'", EC
);
273 sys::fs::mapped_file_region
MappedFile(
274 *FdOrErr
, sys::fs::mapped_file_region::mapmode::readonly
, FileSize
, 0,
276 sys::fs::closeFile(*FdOrErr
);
278 return make_error
<StringError
>(
279 Twine("Cannot mmap profile '") + Filename
+ "'", EC
);
280 StringRef
Data(MappedFile
.data(), MappedFile
.size());
284 DataExtractor
Extractor(Data
, true, 8);
286 // For each block we get from the file:
287 while (Offset
!= MappedFile
.size()) {
288 auto HeaderOrError
= readBlockHeader(Extractor
, Offset
);
290 return HeaderOrError
.takeError();
292 // TODO: Maybe store this header information for each block, even just for
294 const auto &Header
= HeaderOrError
.get();
296 // Read in the path data.
297 auto PathOrError
= readPath(Extractor
, Offset
);
299 return PathOrError
.takeError();
300 const auto &Path
= PathOrError
.get();
302 // For each path we encounter, we should intern it to get a PathID.
303 auto DataOrError
= readData(Extractor
, Offset
);
305 return DataOrError
.takeError();
306 auto &Data
= DataOrError
.get();
309 P
.addBlock(Profile::Block
{Profile::ThreadID
{Header
.Thread
},
310 {{P
.internPath(Path
), std::move(Data
)}}}))
321 Profile::FuncID FuncId
;
326 Expected
<Profile
> profileFromTrace(const Trace
&T
) {
329 // The implementation of the algorithm re-creates the execution of
330 // the functions based on the trace data. To do this, we set up a number of
331 // data structures to track the execution context of every thread in the
333 DenseMap
<Profile::ThreadID
, std::vector
<StackEntry
>> ThreadStacks
;
334 DenseMap
<Profile::ThreadID
, DenseMap
<Profile::PathID
, Profile::Data
>>
337 // We then do a pass through the Trace to account data on a per-thread-basis.
338 for (const auto &E
: T
) {
339 auto &TSD
= ThreadStacks
[E
.TId
];
341 case RecordTypes::ENTER
:
342 case RecordTypes::ENTER_ARG
:
344 // Push entries into the function call stack.
345 TSD
.push_back({E
.TSC
, E
.FuncId
});
348 case RecordTypes::EXIT
:
349 case RecordTypes::TAIL_EXIT
:
351 // Exits cause some accounting to happen, based on the state of the stack.
352 // For each function we pop off the stack, we take note of the path and
353 // record the cumulative state for this path. As we're doing this, we
354 // intern the path into the Profile.
355 while (!TSD
.empty()) {
356 auto Top
= TSD
.back();
357 auto FunctionLocalTime
= AbsoluteDifference(Top
.Timestamp
, E
.TSC
);
358 SmallVector
<Profile::FuncID
, 16> Path
;
359 transform(reverse(TSD
), std::back_inserter(Path
),
360 std::mem_fn(&StackEntry::FuncId
));
361 auto InternedPath
= P
.internPath(Path
);
362 auto &TPD
= ThreadPathData
[E
.TId
][InternedPath
];
364 TPD
.CumulativeLocalTime
+= FunctionLocalTime
;
367 // If we've matched the corresponding entry event for this function,
368 // then we exit the loop.
369 if (Top
.FuncId
== E
.FuncId
)
372 // FIXME: Consider the intermediate times and the cumulative tree time
378 case RecordTypes::CUSTOM_EVENT
:
379 case RecordTypes::TYPED_EVENT
:
380 // TODO: Support an extension point to allow handling of custom and typed
381 // events in profiles.
386 // Once we've gone through the Trace, we now create one Block per thread in
388 for (const auto &ThreadPaths
: ThreadPathData
) {
389 const auto &TID
= ThreadPaths
.first
;
390 const auto &PathsData
= ThreadPaths
.second
;
391 if (auto E
= P
.addBlock({
393 std::vector
<std::pair
<Profile::PathID
, Profile::Data
>>(
394 PathsData
.begin(), PathsData
.end()),