1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Defines the XRay Profile class representing the latency profile generated by
11 // XRay's profiling mode.
13 //===----------------------------------------------------------------------===//
14 #include "llvm/XRay/Profile.h"
16 #include "llvm/Support/DataExtractor.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/XRay/Trace.h"
26 Profile::Profile(const Profile
&O
) {
27 // We need to re-create all the tries from the original (O), into the current
28 // Profile being initialized, through the Block instances we see.
29 for (const auto &Block
: O
) {
30 Blocks
.push_back({Block
.Thread
, {}});
31 auto &B
= Blocks
.back();
32 for (const auto &PathData
: Block
.PathData
)
33 B
.PathData
.push_back({internPath(cantFail(O
.expandPath(PathData
.first
))),
38 Profile
&Profile::operator=(const Profile
&O
) {
52 static Expected
<BlockHeader
> readBlockHeader(DataExtractor
&Extractor
,
55 uint32_t CurrentOffset
= Offset
;
56 H
.Size
= Extractor
.getU32(&Offset
);
57 if (Offset
== CurrentOffset
)
58 return make_error
<StringError
>(
59 Twine("Error parsing block header size at offset '") +
60 Twine(CurrentOffset
) + "'",
61 std::make_error_code(std::errc::invalid_argument
));
62 CurrentOffset
= Offset
;
63 H
.Number
= Extractor
.getU32(&Offset
);
64 if (Offset
== CurrentOffset
)
65 return make_error
<StringError
>(
66 Twine("Error parsing block header number at offset '") +
67 Twine(CurrentOffset
) + "'",
68 std::make_error_code(std::errc::invalid_argument
));
69 CurrentOffset
= Offset
;
70 H
.Thread
= Extractor
.getU64(&Offset
);
71 if (Offset
== CurrentOffset
)
72 return make_error
<StringError
>(
73 Twine("Error parsing block header thread id at offset '") +
74 Twine(CurrentOffset
) + "'",
75 std::make_error_code(std::errc::invalid_argument
));
79 static Expected
<std::vector
<Profile::FuncID
>> readPath(DataExtractor
&Extractor
,
81 // We're reading a sequence of int32_t's until we find a 0.
82 std::vector
<Profile::FuncID
> Path
;
83 auto CurrentOffset
= Offset
;
86 FuncId
= Extractor
.getSigned(&Offset
, 4);
87 if (CurrentOffset
== Offset
)
88 return make_error
<StringError
>(
89 Twine("Error parsing path at offset '") + Twine(CurrentOffset
) + "'",
90 std::make_error_code(std::errc::invalid_argument
));
91 CurrentOffset
= Offset
;
92 Path
.push_back(FuncId
);
93 } while (FuncId
!= 0);
94 return std::move(Path
);
97 static Expected
<Profile::Data
> readData(DataExtractor
&Extractor
,
99 // We expect a certain number of elements for Data:
100 // - A 64-bit CallCount
101 // - A 64-bit CumulativeLocalTime counter
103 auto CurrentOffset
= Offset
;
104 D
.CallCount
= Extractor
.getU64(&Offset
);
105 if (CurrentOffset
== Offset
)
106 return make_error
<StringError
>(
107 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset
) +
109 std::make_error_code(std::errc::invalid_argument
));
110 CurrentOffset
= Offset
;
111 D
.CumulativeLocalTime
= Extractor
.getU64(&Offset
);
112 if (CurrentOffset
== Offset
)
113 return make_error
<StringError
>(
114 Twine("Error parsing cumulative local time at offset '") +
115 Twine(CurrentOffset
) + "'",
116 std::make_error_code(std::errc::invalid_argument
));
122 Error
Profile::addBlock(Block
&&B
) {
123 if (B
.PathData
.empty())
124 return make_error
<StringError
>(
125 "Block may not have empty path data.",
126 std::make_error_code(std::errc::invalid_argument
));
128 Blocks
.emplace_back(std::move(B
));
129 return Error::success();
132 Expected
<std::vector
<Profile::FuncID
>> Profile::expandPath(PathID P
) const {
133 auto It
= PathIDMap
.find(P
);
134 if (It
== PathIDMap
.end())
135 return make_error
<StringError
>(
136 Twine("PathID not found: ") + Twine(P
),
137 std::make_error_code(std::errc::invalid_argument
));
138 std::vector
<Profile::FuncID
> Path
;
139 for (auto Node
= It
->second
; Node
; Node
= Node
->Caller
)
140 Path
.push_back(Node
->Func
);
141 return std::move(Path
);
144 Profile::PathID
Profile::internPath(ArrayRef
<FuncID
> P
) {
148 auto RootToLeafPath
= reverse(P
);
151 auto It
= RootToLeafPath
.begin();
152 auto PathRoot
= *It
++;
154 find_if(Roots
, [PathRoot
](TrieNode
*N
) { return N
->Func
== PathRoot
; });
156 // If we've not seen this root before, remember it.
157 TrieNode
*Node
= nullptr;
158 if (RootIt
== Roots
.end()) {
159 NodeStorage
.emplace_back();
160 Node
= &NodeStorage
.back();
161 Node
->Func
= PathRoot
;
162 Roots
.push_back(Node
);
167 // Now traverse the path, re-creating if necessary.
168 while (It
!= RootToLeafPath
.end()) {
169 auto NodeFuncID
= *It
++;
170 auto CalleeIt
= find_if(Node
->Callees
, [NodeFuncID
](TrieNode
*N
) {
171 return N
->Func
== NodeFuncID
;
173 if (CalleeIt
== Node
->Callees
.end()) {
174 NodeStorage
.emplace_back();
175 auto NewNode
= &NodeStorage
.back();
176 NewNode
->Func
= NodeFuncID
;
177 NewNode
->Caller
= Node
;
178 Node
->Callees
.push_back(NewNode
);
185 // At this point, Node *must* be pointing at the leaf.
186 assert(Node
->Func
== P
.front());
189 PathIDMap
.insert({Node
->ID
, Node
});
194 Profile
mergeProfilesByThread(const Profile
&L
, const Profile
&R
) {
196 using PathDataMap
= DenseMap
<Profile::PathID
, Profile::Data
>;
197 using PathDataMapPtr
= std::unique_ptr
<PathDataMap
>;
198 using PathDataVector
= decltype(Profile::Block::PathData
);
199 using ThreadProfileIndexMap
= DenseMap
<Profile::ThreadID
, PathDataMapPtr
>;
200 ThreadProfileIndexMap ThreadProfileIndex
;
202 for (const auto &P
: {std::ref(L
), std::ref(R
)})
203 for (const auto &Block
: P
.get()) {
204 ThreadProfileIndexMap::iterator It
;
205 std::tie(It
, std::ignore
) = ThreadProfileIndex
.insert(
206 {Block
.Thread
, PathDataMapPtr
{new PathDataMap()}});
207 for (const auto &PathAndData
: Block
.PathData
) {
208 auto &PathID
= PathAndData
.first
;
209 auto &Data
= PathAndData
.second
;
211 Merged
.internPath(cantFail(P
.get().expandPath(PathID
)));
212 PathDataMap::iterator PathDataIt
;
214 std::tie(PathDataIt
, Inserted
) = It
->second
->insert({NewPathID
, Data
});
216 auto &ExistingData
= PathDataIt
->second
;
217 ExistingData
.CallCount
+= Data
.CallCount
;
218 ExistingData
.CumulativeLocalTime
+= Data
.CumulativeLocalTime
;
223 for (const auto &IndexedThreadBlock
: ThreadProfileIndex
) {
224 PathDataVector PathAndData
;
225 PathAndData
.reserve(IndexedThreadBlock
.second
->size());
226 copy(*IndexedThreadBlock
.second
, std::back_inserter(PathAndData
));
228 Merged
.addBlock({IndexedThreadBlock
.first
, std::move(PathAndData
)}));
233 Profile
mergeProfilesByStack(const Profile
&L
, const Profile
&R
) {
235 using PathDataMap
= DenseMap
<Profile::PathID
, Profile::Data
>;
236 PathDataMap PathData
;
237 using PathDataVector
= decltype(Profile::Block::PathData
);
238 for (const auto &P
: {std::ref(L
), std::ref(R
)})
239 for (const auto &Block
: P
.get())
240 for (const auto &PathAndData
: Block
.PathData
) {
241 auto &PathId
= PathAndData
.first
;
242 auto &Data
= PathAndData
.second
;
244 Merged
.internPath(cantFail(P
.get().expandPath(PathId
)));
245 PathDataMap::iterator PathDataIt
;
247 std::tie(PathDataIt
, Inserted
) = PathData
.insert({NewPathID
, Data
});
249 auto &ExistingData
= PathDataIt
->second
;
250 ExistingData
.CallCount
+= Data
.CallCount
;
251 ExistingData
.CumulativeLocalTime
+= Data
.CumulativeLocalTime
;
255 // In the end there's a single Block, for thread 0.
256 PathDataVector Block
;
257 Block
.reserve(PathData
.size());
258 copy(PathData
, std::back_inserter(Block
));
259 cantFail(Merged
.addBlock({0, std::move(Block
)}));
263 Expected
<Profile
> loadProfile(StringRef Filename
) {
265 if (auto EC
= sys::fs::openFileForRead(Filename
, Fd
))
266 return make_error
<StringError
>(
267 Twine("Cannot read profile from '") + Filename
+ "'", EC
);
270 if (auto EC
= sys::fs::file_size(Filename
, FileSize
))
271 return make_error
<StringError
>(
272 Twine("Cannot get filesize of '") + Filename
+ "'", EC
);
275 sys::fs::mapped_file_region
MappedFile(
276 Fd
, sys::fs::mapped_file_region::mapmode::readonly
, FileSize
, 0, EC
);
278 return make_error
<StringError
>(
279 Twine("Cannot mmap profile '") + Filename
+ "'", EC
);
280 StringRef
Data(MappedFile
.data(), MappedFile
.size());
284 DataExtractor
Extractor(Data
, true, 8);
286 // For each block we get from the file:
287 while (Offset
!= MappedFile
.size()) {
288 auto HeaderOrError
= readBlockHeader(Extractor
, Offset
);
290 return HeaderOrError
.takeError();
292 // TODO: Maybe store this header information for each block, even just for
294 const auto &Header
= HeaderOrError
.get();
296 // Read in the path data.
297 auto PathOrError
= readPath(Extractor
, Offset
);
299 return PathOrError
.takeError();
300 const auto &Path
= PathOrError
.get();
302 // For each path we encounter, we should intern it to get a PathID.
303 auto DataOrError
= readData(Extractor
, Offset
);
305 return DataOrError
.takeError();
306 auto &Data
= DataOrError
.get();
309 P
.addBlock(Profile::Block
{Profile::ThreadID
{Header
.Thread
},
310 {{P
.internPath(Path
), std::move(Data
)}}}))
321 Profile::FuncID FuncId
;
326 Expected
<Profile
> profileFromTrace(const Trace
&T
) {
329 // The implementation of the algorithm re-creates the execution of
330 // the functions based on the trace data. To do this, we set up a number of
331 // data structures to track the execution context of every thread in the
333 DenseMap
<Profile::ThreadID
, std::vector
<StackEntry
>> ThreadStacks
;
334 DenseMap
<Profile::ThreadID
, DenseMap
<Profile::PathID
, Profile::Data
>>
337 // We then do a pass through the Trace to account data on a per-thread-basis.
338 for (const auto &E
: T
) {
339 auto &TSD
= ThreadStacks
[E
.TId
];
341 case RecordTypes::ENTER
:
342 case RecordTypes::ENTER_ARG
:
344 // Push entries into the function call stack.
345 TSD
.push_back({E
.TSC
, E
.FuncId
});
348 case RecordTypes::EXIT
:
349 case RecordTypes::TAIL_EXIT
:
351 // Exits cause some accounting to happen, based on the state of the stack.
352 // For each function we pop off the stack, we take note of the path and
353 // record the cumulative state for this path. As we're doing this, we
354 // intern the path into the Profile.
355 while (!TSD
.empty()) {
356 auto Top
= TSD
.back();
357 auto FunctionLocalTime
= AbsoluteDifference(Top
.Timestamp
, E
.TSC
);
358 SmallVector
<Profile::FuncID
, 16> Path
;
359 transform(reverse(TSD
), std::back_inserter(Path
),
360 std::mem_fn(&StackEntry::FuncId
));
361 auto InternedPath
= P
.internPath(Path
);
362 auto &TPD
= ThreadPathData
[E
.TId
][InternedPath
];
364 TPD
.CumulativeLocalTime
+= FunctionLocalTime
;
367 // If we've matched the corresponding entry event for this function,
368 // then we exit the loop.
369 if (Top
.FuncId
== E
.FuncId
)
372 // FIXME: Consider the intermediate times and the cumulative tree time
380 // Once we've gone through the Trace, we now create one Block per thread in
382 for (const auto &ThreadPaths
: ThreadPathData
) {
383 const auto &TID
= ThreadPaths
.first
;
384 const auto &PathsData
= ThreadPaths
.second
;
385 if (auto E
= P
.addBlock({
387 std::vector
<std::pair
<Profile::PathID
, Profile::Data
>>(
388 PathsData
.begin(), PathsData
.end()),