1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Register objects for access by profilers via the perf JIT interface.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
28 #include <sys/mman.h> // mmap()
29 #include <time.h> // clock_gettime(), time(), localtime_r() */
31 #define DEBUG_TYPE "orc"
33 // language identifier (XXX: should we generate something better from debug
35 #define JIT_LANG "llvm-IR"
36 #define LLVM_PERF_JIT_MAGIC \
37 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
39 #define LLVM_PERF_JIT_VERSION 1
42 using namespace llvm::orc
;
48 // base directory for output data
51 // output data stream, closed via Dumpstream
55 std::unique_ptr
<raw_fd_ostream
> Dumpstream
;
58 void *MarkerAddr
= NULL
;
61 // prevent concurrent dumps from messing up the output file
62 static std::mutex Mutex
;
63 static std::optional
<PerfState
> State
;
95 uint64_t UnwindDataSize
;
96 uint64_t EhFrameHeaderSize
;
100 static inline uint64_t timespec_to_ns(const struct timespec
*TS
) {
101 const uint64_t NanoSecPerSec
= 1000000000;
102 return ((uint64_t)TS
->tv_sec
* NanoSecPerSec
) + TS
->tv_nsec
;
105 static inline uint64_t perf_get_timestamp() {
107 if (clock_gettime(CLOCK_MONOTONIC
, &TS
))
110 return timespec_to_ns(&TS
);
113 static void writeDebugRecord(const PerfJITDebugInfoRecord
&DebugRecord
) {
114 assert(State
&& "PerfState not initialized");
115 LLVM_DEBUG(dbgs() << "Writing debug record with "
116 << DebugRecord
.Entries
.size() << " entries\n");
117 [[maybe_unused
]] size_t Written
= 0;
118 DIR Dir
{RecHeader
{static_cast<uint32_t>(DebugRecord
.Prefix
.Id
),
119 DebugRecord
.Prefix
.TotalSize
, perf_get_timestamp()},
120 DebugRecord
.CodeAddr
, DebugRecord
.Entries
.size()};
121 State
->Dumpstream
->write(reinterpret_cast<const char *>(&Dir
), sizeof(Dir
));
122 Written
+= sizeof(Dir
);
123 for (auto &Die
: DebugRecord
.Entries
) {
124 DIE d
{Die
.Addr
, Die
.Lineno
, Die
.Discrim
};
125 State
->Dumpstream
->write(reinterpret_cast<const char *>(&d
), sizeof(d
));
126 State
->Dumpstream
->write(Die
.Name
.data(), Die
.Name
.size() + 1);
127 Written
+= sizeof(d
) + Die
.Name
.size() + 1;
129 LLVM_DEBUG(dbgs() << "wrote " << Written
<< " bytes of debug info\n");
132 static void writeCodeRecord(const PerfJITCodeLoadRecord
&CodeRecord
) {
133 assert(State
&& "PerfState not initialized");
134 uint32_t Tid
= get_threadid();
135 LLVM_DEBUG(dbgs() << "Writing code record with code size "
136 << CodeRecord
.CodeSize
<< " and code index "
137 << CodeRecord
.CodeIndex
<< "\n");
138 CLR Clr
{RecHeader
{static_cast<uint32_t>(CodeRecord
.Prefix
.Id
),
139 CodeRecord
.Prefix
.TotalSize
, perf_get_timestamp()},
145 CodeRecord
.CodeIndex
};
146 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr
) << " bytes of CLR, "
147 << CodeRecord
.Name
.size() + 1 << " bytes of name, "
148 << CodeRecord
.CodeSize
<< " bytes of code\n");
149 State
->Dumpstream
->write(reinterpret_cast<const char *>(&Clr
), sizeof(Clr
));
150 State
->Dumpstream
->write(CodeRecord
.Name
.data(), CodeRecord
.Name
.size() + 1);
151 State
->Dumpstream
->write((const char *)CodeRecord
.CodeAddr
,
152 CodeRecord
.CodeSize
);
156 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord
&UnwindRecord
) {
157 assert(State
&& "PerfState not initialized");
158 dbgs() << "Writing unwind record with unwind data size "
159 << UnwindRecord
.UnwindDataSize
<< " and EH frame header size "
160 << UnwindRecord
.EHFrameHdrSize
<< " and mapped size "
161 << UnwindRecord
.MappedSize
<< "\n";
162 UWR Uwr
{RecHeader
{static_cast<uint32_t>(UnwindRecord
.Prefix
.Id
),
163 UnwindRecord
.Prefix
.TotalSize
, perf_get_timestamp()},
164 UnwindRecord
.UnwindDataSize
, UnwindRecord
.EHFrameHdrSize
,
165 UnwindRecord
.MappedSize
};
166 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr
) << " bytes of UWR, "
167 << UnwindRecord
.EHFrameHdrSize
168 << " bytes of EH frame header, "
169 << UnwindRecord
.UnwindDataSize
- UnwindRecord
.EHFrameHdrSize
170 << " bytes of EH frame\n");
171 State
->Dumpstream
->write(reinterpret_cast<const char *>(&Uwr
), sizeof(Uwr
));
172 if (UnwindRecord
.EHFrameHdrAddr
)
173 State
->Dumpstream
->write((const char *)UnwindRecord
.EHFrameHdrAddr
,
174 UnwindRecord
.EHFrameHdrSize
);
176 State
->Dumpstream
->write(UnwindRecord
.EHFrameHdr
.data(),
177 UnwindRecord
.EHFrameHdrSize
);
178 State
->Dumpstream
->write((const char *)UnwindRecord
.EHFrameAddr
,
179 UnwindRecord
.UnwindDataSize
-
180 UnwindRecord
.EHFrameHdrSize
);
183 static Error
registerJITLoaderPerfImpl(const PerfJITRecordBatch
&Batch
) {
185 return make_error
<StringError
>("PerfState not initialized",
186 inconvertibleErrorCode());
188 // Serialize the batch
189 std::lock_guard
<std::mutex
> Lock(Mutex
);
190 if (Batch
.UnwindingRecord
.Prefix
.TotalSize
> 0)
191 writeUnwindRecord(Batch
.UnwindingRecord
);
193 for (const auto &DebugInfo
: Batch
.DebugInfoRecords
)
194 writeDebugRecord(DebugInfo
);
196 for (const auto &CodeLoad
: Batch
.CodeLoadRecords
)
197 writeCodeRecord(CodeLoad
);
199 State
->Dumpstream
->flush();
201 return Error::success();
205 uint32_t Magic
; // characters "JiTD"
206 uint32_t Version
; // header version
207 uint32_t TotalSize
; // total size of header
208 uint32_t ElfMach
; // elf mach target
209 uint32_t Pad1
; // reserved
211 uint64_t Timestamp
; // timestamp
212 uint64_t Flags
; // flags
215 static Error
OpenMarker(PerfState
&State
) {
216 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
217 // is captured either live (perf record running when we mmap) or in deferred
218 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
219 // file for more meta data info about the jitted code. Perf report/annotate
220 // detect this special filename and process the jitdump file.
222 // Mapping must be PROT_EXEC to ensure it is captured by perf record
223 // even when not using -d option.
225 ::mmap(NULL
, sys::Process::getPageSizeEstimate(), PROT_READ
| PROT_EXEC
,
226 MAP_PRIVATE
, State
.DumpFd
, 0);
228 if (State
.MarkerAddr
== MAP_FAILED
)
229 return make_error
<llvm::StringError
>("could not mmap JIT marker",
230 inconvertibleErrorCode());
232 return Error::success();
235 void CloseMarker(PerfState
&State
) {
236 if (!State
.MarkerAddr
)
239 munmap(State
.MarkerAddr
, sys::Process::getPageSizeEstimate());
240 State
.MarkerAddr
= nullptr;
243 static Expected
<Header
> FillMachine(PerfState
&State
) {
245 Hdr
.Magic
= LLVM_PERF_JIT_MAGIC
;
246 Hdr
.Version
= LLVM_PERF_JIT_VERSION
;
247 Hdr
.TotalSize
= sizeof(Hdr
);
249 Hdr
.Timestamp
= perf_get_timestamp();
257 size_t RequiredMemory
= sizeof(Id
) + sizeof(Info
);
259 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
=
260 MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory
, 0);
262 // This'll not guarantee that enough data was actually read from the
263 // underlying file. Instead the trailing part of the buffer would be
264 // zeroed. Given the ELF signature check below that seems ok though,
265 // it's unlikely that the file ends just after that, and the
266 // consequence would just be that perf wouldn't recognize the
269 return make_error
<llvm::StringError
>("could not open /proc/self/exe",
272 memcpy(&Id
, (*MB
)->getBufferStart(), sizeof(Id
));
273 memcpy(&Info
, (*MB
)->getBufferStart() + sizeof(Id
), sizeof(Info
));
275 // check ELF signature
276 if (Id
[0] != 0x7f || Id
[1] != 'E' || Id
[2] != 'L' || Id
[3] != 'F')
277 return make_error
<llvm::StringError
>("invalid ELF signature",
278 inconvertibleErrorCode());
280 Hdr
.ElfMach
= Info
.e_machine
;
285 static Error
InitDebuggingDir(PerfState
&State
) {
288 char TimeBuffer
[sizeof("YYYYMMDD")];
289 SmallString
<64> Path
;
291 // search for location to dump data to
292 if (const char *BaseDir
= getenv("JITDUMPDIR"))
293 Path
.append(BaseDir
);
294 else if (!sys::path::home_directory(Path
))
297 // create debug directory
298 Path
+= "/.debug/jit/";
299 if (auto EC
= sys::fs::create_directories(Path
)) {
301 raw_string_ostream
ErrStream(ErrStr
);
302 ErrStream
<< "could not create jit cache directory " << Path
<< ": "
303 << EC
.message() << "\n";
304 return make_error
<StringError
>(std::move(ErrStr
), inconvertibleErrorCode());
307 // create unique directory for dump data related to this process
309 localtime_r(&Time
, &LocalTime
);
310 strftime(TimeBuffer
, sizeof(TimeBuffer
), "%Y%m%d", &LocalTime
);
311 Path
+= JIT_LANG
"-jit-";
314 SmallString
<128> UniqueDebugDir
;
316 using sys::fs::createUniqueDirectory
;
317 if (auto EC
= createUniqueDirectory(Path
, UniqueDebugDir
)) {
319 raw_string_ostream
ErrStream(ErrStr
);
320 ErrStream
<< "could not create unique jit cache directory "
321 << UniqueDebugDir
<< ": " << EC
.message() << "\n";
322 return make_error
<StringError
>(std::move(ErrStr
), inconvertibleErrorCode());
325 State
.JitPath
= std::string(UniqueDebugDir
);
327 return Error::success();
330 static Error
registerJITLoaderPerfStartImpl() {
332 Tentative
.Pid
= sys::Process::getProcessId();
333 // check if clock-source is supported
334 if (!perf_get_timestamp())
335 return make_error
<StringError
>("kernel does not support CLOCK_MONOTONIC",
336 inconvertibleErrorCode());
338 if (auto Err
= InitDebuggingDir(Tentative
))
341 std::string Filename
;
342 raw_string_ostream
FilenameBuf(Filename
);
343 FilenameBuf
<< Tentative
.JitPath
<< "/jit-" << Tentative
.Pid
<< ".dump";
345 // Need to open ourselves, because we need to hand the FD to OpenMarker() and
346 // raw_fd_ostream doesn't expose the FD.
347 using sys::fs::openFileForWrite
;
348 if (auto EC
= openFileForReadWrite(Filename
, Tentative
.DumpFd
,
349 sys::fs::CD_CreateNew
, sys::fs::OF_None
)) {
351 raw_string_ostream
ErrStream(ErrStr
);
352 ErrStream
<< "could not open JIT dump file " << Filename
<< ": "
353 << EC
.message() << "\n";
354 return make_error
<StringError
>(std::move(ErrStr
), inconvertibleErrorCode());
357 Tentative
.Dumpstream
=
358 std::make_unique
<raw_fd_ostream
>(Tentative
.DumpFd
, true);
360 auto Header
= FillMachine(Tentative
);
362 return Header
.takeError();
364 // signal this process emits JIT information
365 if (auto Err
= OpenMarker(Tentative
))
368 Tentative
.Dumpstream
->write(reinterpret_cast<const char *>(&Header
.get()),
371 // Everything initialized, can do profiling now.
372 if (Tentative
.Dumpstream
->has_error())
373 return make_error
<StringError
>("could not write JIT dump header",
374 inconvertibleErrorCode());
376 State
= std::move(Tentative
);
377 return Error::success();
380 static Error
registerJITLoaderPerfEndImpl() {
382 return make_error
<StringError
>("PerfState not initialized",
383 inconvertibleErrorCode());
386 Close
.Id
= static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE
);
387 Close
.TotalSize
= sizeof(Close
);
388 Close
.Timestamp
= perf_get_timestamp();
389 State
->Dumpstream
->write(reinterpret_cast<const char *>(&Close
),
391 if (State
->MarkerAddr
)
395 return Error::success();
398 extern "C" llvm::orc::shared::CWrapperFunctionResult
399 llvm_orc_registerJITLoaderPerfImpl(const char *Data
, uint64_t Size
) {
400 using namespace orc::shared
;
401 return WrapperFunction
<SPSError(SPSPerfJITRecordBatch
)>::handle(
402 Data
, Size
, registerJITLoaderPerfImpl
)
406 extern "C" llvm::orc::shared::CWrapperFunctionResult
407 llvm_orc_registerJITLoaderPerfStart(const char *Data
, uint64_t Size
) {
408 using namespace orc::shared
;
409 return WrapperFunction
<SPSError()>::handle(Data
, Size
,
410 registerJITLoaderPerfStartImpl
)
414 extern "C" llvm::orc::shared::CWrapperFunctionResult
415 llvm_orc_registerJITLoaderPerfEnd(const char *Data
, uint64_t Size
) {
416 using namespace orc::shared
;
417 return WrapperFunction
<SPSError()>::handle(Data
, Size
,
418 registerJITLoaderPerfEndImpl
)
424 using namespace llvm
;
425 using namespace llvm::orc
;
427 static Error
badOS() {
428 using namespace llvm
;
429 return llvm::make_error
<StringError
>(
430 "unsupported OS (perf support is only available on linux!)",
431 inconvertibleErrorCode());
434 static Error
badOSBatch(PerfJITRecordBatch
&Batch
) { return badOS(); }
436 extern "C" llvm::orc::shared::CWrapperFunctionResult
437 llvm_orc_registerJITLoaderPerfImpl(const char *Data
, uint64_t Size
) {
438 using namespace shared
;
439 return WrapperFunction
<SPSError(SPSPerfJITRecordBatch
)>::handle(Data
, Size
,
444 extern "C" llvm::orc::shared::CWrapperFunctionResult
445 llvm_orc_registerJITLoaderPerfStart(const char *Data
, uint64_t Size
) {
446 using namespace shared
;
447 return WrapperFunction
<SPSError()>::handle(Data
, Size
, badOS
).release();
450 extern "C" llvm::orc::shared::CWrapperFunctionResult
451 llvm_orc_registerJITLoaderPerfEnd(const char *Data
, uint64_t Size
) {
452 using namespace shared
;
453 return WrapperFunction
<SPSError()>::handle(Data
, Size
, badOS
).release();