[llvm-objcopy] - Reimplement strip-dwo-groups.test to stop using the precompiled...
[llvm-complete.git] / lib / XRay / Trace.cpp
blobb9b67c561c6606272e050917e9a6237cff554dc2
1 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // XRay log reader implementation.
11 //===----------------------------------------------------------------------===//
12 #include "llvm/XRay/Trace.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/Support/DataExtractor.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/FileSystem.h"
17 #include "llvm/XRay/BlockIndexer.h"
18 #include "llvm/XRay/BlockVerifier.h"
19 #include "llvm/XRay/FDRRecordConsumer.h"
20 #include "llvm/XRay/FDRRecordProducer.h"
21 #include "llvm/XRay/FDRRecords.h"
22 #include "llvm/XRay/FDRTraceExpander.h"
23 #include "llvm/XRay/FileHeaderReader.h"
24 #include "llvm/XRay/YAMLXRayRecord.h"
25 #include <memory>
26 #include <vector>
28 using namespace llvm;
29 using namespace llvm::xray;
30 using llvm::yaml::Input;
32 namespace {
33 using XRayRecordStorage =
34 std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
36 Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian,
37 XRayFileHeader &FileHeader,
38 std::vector<XRayRecord> &Records) {
39 if (Data.size() < 32)
40 return make_error<StringError>(
41 "Not enough bytes for an XRay log.",
42 std::make_error_code(std::errc::invalid_argument));
44 if (Data.size() - 32 == 0 || Data.size() % 32 != 0)
45 return make_error<StringError>(
46 "Invalid-sized XRay data.",
47 std::make_error_code(std::errc::invalid_argument));
49 DataExtractor Reader(Data, IsLittleEndian, 8);
50 uint32_t OffsetPtr = 0;
51 auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr);
52 if (!FileHeaderOrError)
53 return FileHeaderOrError.takeError();
54 FileHeader = std::move(FileHeaderOrError.get());
56 // Each record after the header will be 32 bytes, in the following format:
58 // (2) uint16 : record type
59 // (1) uint8 : cpu id
60 // (1) uint8 : type
61 // (4) sint32 : function id
62 // (8) uint64 : tsc
63 // (4) uint32 : thread id
64 // (4) uint32 : process id
65 // (8) - : padding
66 while (Reader.isValidOffset(OffsetPtr)) {
67 if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32))
68 return createStringError(
69 std::make_error_code(std::errc::executable_format_error),
70 "Not enough bytes to read a full record at offset %d.", OffsetPtr);
71 auto PreReadOffset = OffsetPtr;
72 auto RecordType = Reader.getU16(&OffsetPtr);
73 if (OffsetPtr == PreReadOffset)
74 return createStringError(
75 std::make_error_code(std::errc::executable_format_error),
76 "Failed reading record type at offset %d.", OffsetPtr);
78 switch (RecordType) {
79 case 0: { // Normal records.
80 Records.emplace_back();
81 auto &Record = Records.back();
82 Record.RecordType = RecordType;
84 PreReadOffset = OffsetPtr;
85 Record.CPU = Reader.getU8(&OffsetPtr);
86 if (OffsetPtr == PreReadOffset)
87 return createStringError(
88 std::make_error_code(std::errc::executable_format_error),
89 "Failed reading CPU field at offset %d.", OffsetPtr);
91 PreReadOffset = OffsetPtr;
92 auto Type = Reader.getU8(&OffsetPtr);
93 if (OffsetPtr == PreReadOffset)
94 return createStringError(
95 std::make_error_code(std::errc::executable_format_error),
96 "Failed reading record type field at offset %d.", OffsetPtr);
98 switch (Type) {
99 case 0:
100 Record.Type = RecordTypes::ENTER;
101 break;
102 case 1:
103 Record.Type = RecordTypes::EXIT;
104 break;
105 case 2:
106 Record.Type = RecordTypes::TAIL_EXIT;
107 break;
108 case 3:
109 Record.Type = RecordTypes::ENTER_ARG;
110 break;
111 default:
112 return createStringError(
113 std::make_error_code(std::errc::executable_format_error),
114 "Unknown record type '%d' at offset %d.", Type, OffsetPtr);
117 PreReadOffset = OffsetPtr;
118 Record.FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
119 if (OffsetPtr == PreReadOffset)
120 return createStringError(
121 std::make_error_code(std::errc::executable_format_error),
122 "Failed reading function id field at offset %d.", OffsetPtr);
124 PreReadOffset = OffsetPtr;
125 Record.TSC = Reader.getU64(&OffsetPtr);
126 if (OffsetPtr == PreReadOffset)
127 return createStringError(
128 std::make_error_code(std::errc::executable_format_error),
129 "Failed reading TSC field at offset %d.", OffsetPtr);
131 PreReadOffset = OffsetPtr;
132 Record.TId = Reader.getU32(&OffsetPtr);
133 if (OffsetPtr == PreReadOffset)
134 return createStringError(
135 std::make_error_code(std::errc::executable_format_error),
136 "Failed reading thread id field at offset %d.", OffsetPtr);
138 PreReadOffset = OffsetPtr;
139 Record.PId = Reader.getU32(&OffsetPtr);
140 if (OffsetPtr == PreReadOffset)
141 return createStringError(
142 std::make_error_code(std::errc::executable_format_error),
143 "Failed reading process id at offset %d.", OffsetPtr);
145 break;
147 case 1: { // Arg payload record.
148 auto &Record = Records.back();
150 // We skip the next two bytes of the record, because we don't need the
151 // type and the CPU record for arg payloads.
152 OffsetPtr += 2;
153 PreReadOffset = OffsetPtr;
154 int32_t FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
155 if (OffsetPtr == PreReadOffset)
156 return createStringError(
157 std::make_error_code(std::errc::executable_format_error),
158 "Failed reading function id field at offset %d.", OffsetPtr);
160 PreReadOffset = OffsetPtr;
161 auto TId = Reader.getU32(&OffsetPtr);
162 if (OffsetPtr == PreReadOffset)
163 return createStringError(
164 std::make_error_code(std::errc::executable_format_error),
165 "Failed reading thread id field at offset %d.", OffsetPtr);
167 PreReadOffset = OffsetPtr;
168 auto PId = Reader.getU32(&OffsetPtr);
169 if (OffsetPtr == PreReadOffset)
170 return createStringError(
171 std::make_error_code(std::errc::executable_format_error),
172 "Failed reading process id field at offset %d.", OffsetPtr);
174 // Make a check for versions above 3 for the Pid field
175 if (Record.FuncId != FuncId || Record.TId != TId ||
176 (FileHeader.Version >= 3 ? Record.PId != PId : false))
177 return createStringError(
178 std::make_error_code(std::errc::executable_format_error),
179 "Corrupted log, found arg payload following non-matching "
180 "function+thread record. Record for function %d != %d at offset "
181 "%d",
182 Record.FuncId, FuncId, OffsetPtr);
184 PreReadOffset = OffsetPtr;
185 auto Arg = Reader.getU64(&OffsetPtr);
186 if (OffsetPtr == PreReadOffset)
187 return createStringError(
188 std::make_error_code(std::errc::executable_format_error),
189 "Failed reading argument payload at offset %d.", OffsetPtr);
191 Record.CallArgs.push_back(Arg);
192 break;
194 default:
195 return createStringError(
196 std::make_error_code(std::errc::executable_format_error),
197 "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr);
199 // Advance the offset pointer enough bytes to align to 32-byte records for
200 // basic mode logs.
201 OffsetPtr += 8;
203 return Error::success();
206 /// Reads a log in FDR mode for version 1 of this binary format. FDR mode is
207 /// defined as part of the compiler-rt project in xray_fdr_logging.h, and such
208 /// a log consists of the familiar 32 bit XRayHeader, followed by sequences of
209 /// of interspersed 16 byte Metadata Records and 8 byte Function Records.
211 /// The following is an attempt to document the grammar of the format, which is
212 /// parsed by this function for little-endian machines. Since the format makes
213 /// use of BitFields, when we support big-endian architectures, we will need to
214 /// adjust not only the endianness parameter to llvm's RecordExtractor, but also
215 /// the bit twiddling logic, which is consistent with the little-endian
216 /// convention that BitFields within a struct will first be packed into the
217 /// least significant bits the address they belong to.
219 /// We expect a format complying with the grammar in the following pseudo-EBNF
220 /// in Version 1 of the FDR log.
222 /// FDRLog: XRayFileHeader ThreadBuffer*
223 /// XRayFileHeader: 32 bytes to identify the log as FDR with machine metadata.
224 /// Includes BufferSize
225 /// ThreadBuffer: NewBuffer WallClockTime NewCPUId FunctionSequence EOB
226 /// BufSize: 8 byte unsigned integer indicating how large the buffer is.
227 /// NewBuffer: 16 byte metadata record with Thread Id.
228 /// WallClockTime: 16 byte metadata record with human readable time.
229 /// Pid: 16 byte metadata record with Pid
230 /// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading.
231 /// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize.
232 /// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord
233 /// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading.
234 /// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta.
236 /// In Version 2, we make the following changes:
238 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime NewCPUId
239 /// FunctionSequence
240 /// BufferExtents: 16 byte metdata record describing how many usable bytes are
241 /// in the buffer. This is measured from the start of the buffer
242 /// and must always be at least 48 (bytes).
244 /// In Version 3, we make the following changes:
246 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
247 /// FunctionSequence
248 /// EOB: *deprecated*
250 /// In Version 4, we make the following changes:
252 /// CustomEventRecord now includes the CPU data.
254 /// In Version 5, we make the following changes:
256 /// CustomEventRecord and TypedEventRecord now use TSC delta encoding similar to
257 /// what FunctionRecord instances use, and we no longer need to include the CPU
258 /// id in the CustomEventRecord.
260 Error loadFDRLog(StringRef Data, bool IsLittleEndian,
261 XRayFileHeader &FileHeader, std::vector<XRayRecord> &Records) {
263 if (Data.size() < 32)
264 return createStringError(std::make_error_code(std::errc::invalid_argument),
265 "Not enough bytes for an XRay FDR log.");
266 DataExtractor DE(Data, IsLittleEndian, 8);
268 uint32_t OffsetPtr = 0;
269 auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr);
270 if (!FileHeaderOrError)
271 return FileHeaderOrError.takeError();
272 FileHeader = std::move(FileHeaderOrError.get());
274 // First we load the records into memory.
275 std::vector<std::unique_ptr<Record>> FDRRecords;
278 FileBasedRecordProducer P(FileHeader, DE, OffsetPtr);
279 LogBuilderConsumer C(FDRRecords);
280 while (DE.isValidOffsetForDataOfSize(OffsetPtr, 1)) {
281 auto R = P.produce();
282 if (!R)
283 return R.takeError();
284 if (auto E = C.consume(std::move(R.get())))
285 return E;
289 // Next we index the records into blocks.
290 BlockIndexer::Index Index;
292 BlockIndexer Indexer(Index);
293 for (auto &R : FDRRecords)
294 if (auto E = R->apply(Indexer))
295 return E;
296 if (auto E = Indexer.flush())
297 return E;
300 // Then we verify the consistency of the blocks.
302 for (auto &PTB : Index) {
303 auto &Blocks = PTB.second;
304 for (auto &B : Blocks) {
305 BlockVerifier Verifier;
306 for (auto *R : B.Records)
307 if (auto E = R->apply(Verifier))
308 return E;
309 if (auto E = Verifier.verify())
310 return E;
315 // This is now the meat of the algorithm. Here we sort the blocks according to
316 // the Walltime record in each of the blocks for the same thread. This allows
317 // us to more consistently recreate the execution trace in temporal order.
318 // After the sort, we then reconstitute `Trace` records using a stateful
319 // visitor associated with a single process+thread pair.
321 for (auto &PTB : Index) {
322 auto &Blocks = PTB.second;
323 llvm::sort(Blocks, [](const BlockIndexer::Block &L,
324 const BlockIndexer::Block &R) {
325 return (L.WallclockTime->seconds() < R.WallclockTime->seconds() &&
326 L.WallclockTime->nanos() < R.WallclockTime->nanos());
328 auto Adder = [&](const XRayRecord &R) { Records.push_back(R); };
329 TraceExpander Expander(Adder, FileHeader.Version);
330 for (auto &B : Blocks) {
331 for (auto *R : B.Records)
332 if (auto E = R->apply(Expander))
333 return E;
335 if (auto E = Expander.flush())
336 return E;
340 return Error::success();
343 Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
344 std::vector<XRayRecord> &Records) {
345 YAMLXRayTrace Trace;
346 Input In(Data);
347 In >> Trace;
348 if (In.error())
349 return make_error<StringError>("Failed loading YAML Data.", In.error());
351 FileHeader.Version = Trace.Header.Version;
352 FileHeader.Type = Trace.Header.Type;
353 FileHeader.ConstantTSC = Trace.Header.ConstantTSC;
354 FileHeader.NonstopTSC = Trace.Header.NonstopTSC;
355 FileHeader.CycleFrequency = Trace.Header.CycleFrequency;
357 if (FileHeader.Version != 1)
358 return make_error<StringError>(
359 Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
360 std::make_error_code(std::errc::invalid_argument));
362 Records.clear();
363 std::transform(Trace.Records.begin(), Trace.Records.end(),
364 std::back_inserter(Records), [&](const YAMLXRayRecord &R) {
365 return XRayRecord{R.RecordType, R.CPU, R.Type,
366 R.FuncId, R.TSC, R.TId,
367 R.PId, R.CallArgs, R.Data};
369 return Error::success();
371 } // namespace
373 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
374 Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
375 if (!FdOrErr)
376 return FdOrErr.takeError();
378 uint64_t FileSize;
379 if (auto EC = sys::fs::file_size(Filename, FileSize)) {
380 return make_error<StringError>(
381 Twine("Cannot read log from '") + Filename + "'", EC);
383 if (FileSize < 4) {
384 return make_error<StringError>(
385 Twine("File '") + Filename + "' too small for XRay.",
386 std::make_error_code(std::errc::executable_format_error));
389 // Map the opened file into memory and use a StringRef to access it later.
390 std::error_code EC;
391 sys::fs::mapped_file_region MappedFile(
392 *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
393 EC);
394 sys::fs::closeFile(*FdOrErr);
395 if (EC) {
396 return make_error<StringError>(
397 Twine("Cannot read log from '") + Filename + "'", EC);
399 auto Data = StringRef(MappedFile.data(), MappedFile.size());
401 // TODO: Lift the endianness and implementation selection here.
402 DataExtractor LittleEndianDE(Data, true, 8);
403 auto TraceOrError = loadTrace(LittleEndianDE, Sort);
404 if (!TraceOrError) {
405 DataExtractor BigEndianDE(Data, false, 8);
406 TraceOrError = loadTrace(BigEndianDE, Sort);
408 return TraceOrError;
411 Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
412 // Attempt to detect the file type using file magic. We have a slight bias
413 // towards the binary format, and we do this by making sure that the first 4
414 // bytes of the binary file is some combination of the following byte
415 // patterns: (observe the code loading them assumes they're little endian)
417 // 0x01 0x00 0x00 0x00 - version 1, "naive" format
418 // 0x01 0x00 0x01 0x00 - version 1, "flight data recorder" format
419 // 0x02 0x00 0x01 0x00 - version 2, "flight data recorder" format
421 // YAML files don't typically have those first four bytes as valid text so we
422 // try loading assuming YAML if we don't find these bytes.
424 // Only if we can't load either the binary or the YAML format will we yield an
425 // error.
426 DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8);
427 uint32_t OffsetPtr = 0;
428 uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
429 uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
431 enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 };
433 Trace T;
434 switch (Type) {
435 case NAIVE_FORMAT:
436 if (Version == 1 || Version == 2 || Version == 3) {
437 if (auto E = loadNaiveFormatLog(DE.getData(), DE.isLittleEndian(),
438 T.FileHeader, T.Records))
439 return std::move(E);
440 } else {
441 return make_error<StringError>(
442 Twine("Unsupported version for Basic/Naive Mode logging: ") +
443 Twine(Version),
444 std::make_error_code(std::errc::executable_format_error));
446 break;
447 case FLIGHT_DATA_RECORDER_FORMAT:
448 if (Version >= 1 && Version <= 5) {
449 if (auto E = loadFDRLog(DE.getData(), DE.isLittleEndian(), T.FileHeader,
450 T.Records))
451 return std::move(E);
452 } else {
453 return make_error<StringError>(
454 Twine("Unsupported version for FDR Mode logging: ") + Twine(Version),
455 std::make_error_code(std::errc::executable_format_error));
457 break;
458 default:
459 if (auto E = loadYAMLLog(DE.getData(), T.FileHeader, T.Records))
460 return std::move(E);
463 if (Sort)
464 llvm::stable_sort(T.Records, [&](const XRayRecord &L, const XRayRecord &R) {
465 return L.TSC < R.TSC;
468 return std::move(T);