[llvm-exegesis][NFC] Pass Instruction instead of bare Opcode
[llvm-core.git] / lib / XRay / Profile.cpp
blobfdd1953ab0f02e12487a5e205f0ac5c9d1b6ebe7
1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Defines the XRay Profile class representing the latency profile generated by
11 // XRay's profiling mode.
13 //===----------------------------------------------------------------------===//
14 #include "llvm/XRay/Profile.h"
16 #include "llvm/Support/DataExtractor.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/FileSystem.h"
19 #include "llvm/XRay/Trace.h"
20 #include <deque>
21 #include <memory>
23 namespace llvm {
24 namespace xray {
26 Profile::Profile(const Profile &O) {
27 // We need to re-create all the tries from the original (O), into the current
28 // Profile being initialized, through the Block instances we see.
29 for (const auto &Block : O) {
30 Blocks.push_back({Block.Thread, {}});
31 auto &B = Blocks.back();
32 for (const auto &PathData : Block.PathData)
33 B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
34 PathData.second});
38 Profile &Profile::operator=(const Profile &O) {
39 Profile P = O;
40 *this = std::move(P);
41 return *this;
44 namespace {
46 struct BlockHeader {
47 uint32_t Size;
48 uint32_t Number;
49 uint64_t Thread;
52 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
53 uint32_t &Offset) {
54 BlockHeader H;
55 uint32_t CurrentOffset = Offset;
56 H.Size = Extractor.getU32(&Offset);
57 if (Offset == CurrentOffset)
58 return make_error<StringError>(
59 Twine("Error parsing block header size at offset '") +
60 Twine(CurrentOffset) + "'",
61 std::make_error_code(std::errc::invalid_argument));
62 CurrentOffset = Offset;
63 H.Number = Extractor.getU32(&Offset);
64 if (Offset == CurrentOffset)
65 return make_error<StringError>(
66 Twine("Error parsing block header number at offset '") +
67 Twine(CurrentOffset) + "'",
68 std::make_error_code(std::errc::invalid_argument));
69 CurrentOffset = Offset;
70 H.Thread = Extractor.getU64(&Offset);
71 if (Offset == CurrentOffset)
72 return make_error<StringError>(
73 Twine("Error parsing block header thread id at offset '") +
74 Twine(CurrentOffset) + "'",
75 std::make_error_code(std::errc::invalid_argument));
76 return H;
79 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
80 uint32_t &Offset) {
81 // We're reading a sequence of int32_t's until we find a 0.
82 std::vector<Profile::FuncID> Path;
83 auto CurrentOffset = Offset;
84 int32_t FuncId;
85 do {
86 FuncId = Extractor.getSigned(&Offset, 4);
87 if (CurrentOffset == Offset)
88 return make_error<StringError>(
89 Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
90 std::make_error_code(std::errc::invalid_argument));
91 CurrentOffset = Offset;
92 Path.push_back(FuncId);
93 } while (FuncId != 0);
94 return std::move(Path);
97 static Expected<Profile::Data> readData(DataExtractor &Extractor,
98 uint32_t &Offset) {
99 // We expect a certain number of elements for Data:
100 // - A 64-bit CallCount
101 // - A 64-bit CumulativeLocalTime counter
102 Profile::Data D;
103 auto CurrentOffset = Offset;
104 D.CallCount = Extractor.getU64(&Offset);
105 if (CurrentOffset == Offset)
106 return make_error<StringError>(
107 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
108 "'",
109 std::make_error_code(std::errc::invalid_argument));
110 CurrentOffset = Offset;
111 D.CumulativeLocalTime = Extractor.getU64(&Offset);
112 if (CurrentOffset == Offset)
113 return make_error<StringError>(
114 Twine("Error parsing cumulative local time at offset '") +
115 Twine(CurrentOffset) + "'",
116 std::make_error_code(std::errc::invalid_argument));
117 return D;
120 } // namespace
122 Error Profile::addBlock(Block &&B) {
123 if (B.PathData.empty())
124 return make_error<StringError>(
125 "Block may not have empty path data.",
126 std::make_error_code(std::errc::invalid_argument));
128 Blocks.emplace_back(std::move(B));
129 return Error::success();
132 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
133 auto It = PathIDMap.find(P);
134 if (It == PathIDMap.end())
135 return make_error<StringError>(
136 Twine("PathID not found: ") + Twine(P),
137 std::make_error_code(std::errc::invalid_argument));
138 std::vector<Profile::FuncID> Path;
139 for (auto Node = It->second; Node; Node = Node->Caller)
140 Path.push_back(Node->Func);
141 return std::move(Path);
144 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
145 if (P.empty())
146 return 0;
148 auto RootToLeafPath = reverse(P);
150 // Find the root.
151 auto It = RootToLeafPath.begin();
152 auto PathRoot = *It++;
153 auto RootIt =
154 find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
156 // If we've not seen this root before, remember it.
157 TrieNode *Node = nullptr;
158 if (RootIt == Roots.end()) {
159 NodeStorage.emplace_back();
160 Node = &NodeStorage.back();
161 Node->Func = PathRoot;
162 Roots.push_back(Node);
163 } else {
164 Node = *RootIt;
167 // Now traverse the path, re-creating if necessary.
168 while (It != RootToLeafPath.end()) {
169 auto NodeFuncID = *It++;
170 auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
171 return N->Func == NodeFuncID;
173 if (CalleeIt == Node->Callees.end()) {
174 NodeStorage.emplace_back();
175 auto NewNode = &NodeStorage.back();
176 NewNode->Func = NodeFuncID;
177 NewNode->Caller = Node;
178 Node->Callees.push_back(NewNode);
179 Node = NewNode;
180 } else {
181 Node = *CalleeIt;
185 // At this point, Node *must* be pointing at the leaf.
186 assert(Node->Func == P.front());
187 if (Node->ID == 0) {
188 Node->ID = NextID++;
189 PathIDMap.insert({Node->ID, Node});
191 return Node->ID;
194 Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
195 Profile Merged;
196 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
197 using PathDataMapPtr = std::unique_ptr<PathDataMap>;
198 using PathDataVector = decltype(Profile::Block::PathData);
199 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
200 ThreadProfileIndexMap ThreadProfileIndex;
202 for (const auto &P : {std::ref(L), std::ref(R)})
203 for (const auto &Block : P.get()) {
204 ThreadProfileIndexMap::iterator It;
205 std::tie(It, std::ignore) = ThreadProfileIndex.insert(
206 {Block.Thread, PathDataMapPtr{new PathDataMap()}});
207 for (const auto &PathAndData : Block.PathData) {
208 auto &PathID = PathAndData.first;
209 auto &Data = PathAndData.second;
210 auto NewPathID =
211 Merged.internPath(cantFail(P.get().expandPath(PathID)));
212 PathDataMap::iterator PathDataIt;
213 bool Inserted;
214 std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
215 if (!Inserted) {
216 auto &ExistingData = PathDataIt->second;
217 ExistingData.CallCount += Data.CallCount;
218 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
223 for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
224 PathDataVector PathAndData;
225 PathAndData.reserve(IndexedThreadBlock.second->size());
226 copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
227 cantFail(
228 Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
230 return Merged;
233 Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
234 Profile Merged;
235 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
236 PathDataMap PathData;
237 using PathDataVector = decltype(Profile::Block::PathData);
238 for (const auto &P : {std::ref(L), std::ref(R)})
239 for (const auto &Block : P.get())
240 for (const auto &PathAndData : Block.PathData) {
241 auto &PathId = PathAndData.first;
242 auto &Data = PathAndData.second;
243 auto NewPathID =
244 Merged.internPath(cantFail(P.get().expandPath(PathId)));
245 PathDataMap::iterator PathDataIt;
246 bool Inserted;
247 std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
248 if (!Inserted) {
249 auto &ExistingData = PathDataIt->second;
250 ExistingData.CallCount += Data.CallCount;
251 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
255 // In the end there's a single Block, for thread 0.
256 PathDataVector Block;
257 Block.reserve(PathData.size());
258 copy(PathData, std::back_inserter(Block));
259 cantFail(Merged.addBlock({0, std::move(Block)}));
260 return Merged;
263 Expected<Profile> loadProfile(StringRef Filename) {
264 int Fd;
265 if (auto EC = sys::fs::openFileForRead(Filename, Fd))
266 return make_error<StringError>(
267 Twine("Cannot read profile from '") + Filename + "'", EC);
269 uint64_t FileSize;
270 if (auto EC = sys::fs::file_size(Filename, FileSize))
271 return make_error<StringError>(
272 Twine("Cannot get filesize of '") + Filename + "'", EC);
274 std::error_code EC;
275 sys::fs::mapped_file_region MappedFile(
276 Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
277 if (EC)
278 return make_error<StringError>(
279 Twine("Cannot mmap profile '") + Filename + "'", EC);
280 StringRef Data(MappedFile.data(), MappedFile.size());
282 Profile P;
283 uint32_t Offset = 0;
284 DataExtractor Extractor(Data, true, 8);
286 // For each block we get from the file:
287 while (Offset != MappedFile.size()) {
288 auto HeaderOrError = readBlockHeader(Extractor, Offset);
289 if (!HeaderOrError)
290 return HeaderOrError.takeError();
292 // TODO: Maybe store this header information for each block, even just for
293 // debugging?
294 const auto &Header = HeaderOrError.get();
296 // Read in the path data.
297 auto PathOrError = readPath(Extractor, Offset);
298 if (!PathOrError)
299 return PathOrError.takeError();
300 const auto &Path = PathOrError.get();
302 // For each path we encounter, we should intern it to get a PathID.
303 auto DataOrError = readData(Extractor, Offset);
304 if (!DataOrError)
305 return DataOrError.takeError();
306 auto &Data = DataOrError.get();
308 if (auto E =
309 P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
310 {{P.internPath(Path), std::move(Data)}}}))
311 return std::move(E);
314 return P;
317 namespace {
319 struct StackEntry {
320 uint64_t Timestamp;
321 Profile::FuncID FuncId;
324 } // namespace
326 Expected<Profile> profileFromTrace(const Trace &T) {
327 Profile P;
329 // The implementation of the algorithm re-creates the execution of
330 // the functions based on the trace data. To do this, we set up a number of
331 // data structures to track the execution context of every thread in the
332 // Trace.
333 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
334 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
335 ThreadPathData;
337 // We then do a pass through the Trace to account data on a per-thread-basis.
338 for (const auto &E : T) {
339 auto &TSD = ThreadStacks[E.TId];
340 switch (E.Type) {
341 case RecordTypes::ENTER:
342 case RecordTypes::ENTER_ARG:
344 // Push entries into the function call stack.
345 TSD.push_back({E.TSC, E.FuncId});
346 break;
348 case RecordTypes::EXIT:
349 case RecordTypes::TAIL_EXIT:
351 // Exits cause some accounting to happen, based on the state of the stack.
352 // For each function we pop off the stack, we take note of the path and
353 // record the cumulative state for this path. As we're doing this, we
354 // intern the path into the Profile.
355 while (!TSD.empty()) {
356 auto Top = TSD.back();
357 auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
358 SmallVector<Profile::FuncID, 16> Path;
359 transform(reverse(TSD), std::back_inserter(Path),
360 std::mem_fn(&StackEntry::FuncId));
361 auto InternedPath = P.internPath(Path);
362 auto &TPD = ThreadPathData[E.TId][InternedPath];
363 ++TPD.CallCount;
364 TPD.CumulativeLocalTime += FunctionLocalTime;
365 TSD.pop_back();
367 // If we've matched the corresponding entry event for this function,
368 // then we exit the loop.
369 if (Top.FuncId == E.FuncId)
370 break;
372 // FIXME: Consider the intermediate times and the cumulative tree time
373 // as well.
376 break;
380 // Once we've gone through the Trace, we now create one Block per thread in
381 // the Profile.
382 for (const auto &ThreadPaths : ThreadPathData) {
383 const auto &TID = ThreadPaths.first;
384 const auto &PathsData = ThreadPaths.second;
385 if (auto E = P.addBlock({
386 TID,
387 std::vector<std::pair<Profile::PathID, Profile::Data>>(
388 PathsData.begin(), PathsData.end()),
390 return std::move(E);
393 return P;
396 } // namespace xray
397 } // namespace llvm