1 //===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
11 // Implementation of a simple in-memory log of XRay events. This defines a
12 // logging function that's compatible with the XRay handler interface, and
13 // routines for exporting data to files.
15 //===----------------------------------------------------------------------===//
21 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
22 #include <sys/syscall.h>
24 #include <sys/types.h>
28 #include "sanitizer_common/sanitizer_allocator_internal.h"
29 #include "sanitizer_common/sanitizer_libc.h"
30 #include "xray/xray_records.h"
31 #include "xray_recursion_guard.h"
32 #include "xray_basic_flags.h"
33 #include "xray_basic_logging.h"
34 #include "xray_defs.h"
35 #include "xray_flags.h"
36 #include "xray_interface_internal.h"
38 #include "xray_utils.h"
42 static SpinMutex LogMutex
;
45 // We use elements of this type to record the entry TSC of every function ID we
46 // see as we're tracing a particular thread's execution.
47 struct alignas(16) StackEntry
{
55 static_assert(sizeof(StackEntry
) == 16, "Wrong size for StackEntry");
57 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData
{
58 void *InMemoryBuffer
= nullptr;
59 size_t BufferSize
= 0;
60 size_t BufferOffset
= 0;
61 void *ShadowStack
= nullptr;
63 size_t StackEntries
= 0;
64 __xray::LogWriter
*LogWriter
= nullptr;
67 struct BasicLoggingOptions
{
68 int DurationFilterMicros
= 0;
69 size_t MaxStackDepth
= 0;
70 size_t ThreadBufferSize
= 0;
74 static pthread_key_t PThreadKey
;
76 static atomic_uint8_t BasicInitialized
{0};
78 struct BasicLoggingOptions GlobalOptions
;
80 thread_local atomic_uint8_t Guard
{0};
82 static atomic_uint8_t UseRealTSC
{0};
83 static atomic_uint64_t ThresholdTicks
{0};
84 static atomic_uint64_t TicksPerSec
{0};
85 static atomic_uint64_t CycleFrequency
{NanosecondsPerSecond
};
87 static LogWriter
*getLog() XRAY_NEVER_INSTRUMENT
{
88 LogWriter
* LW
= LogWriter::Open();
92 static pthread_once_t DetectOnce
= PTHREAD_ONCE_INIT
;
93 pthread_once(&DetectOnce
, +[] {
94 if (atomic_load(&UseRealTSC
, memory_order_acquire
))
95 atomic_store(&CycleFrequency
, getTSCFrequency(), memory_order_release
);
98 // Since we're here, we get to write the header. We set it up so that the
99 // header will only be written once, at the start, and let the threads
100 // logging do writes which just append.
101 XRayFileHeader Header
;
102 // Version 2 includes tail exit records.
103 // Version 3 includes pid inside records.
105 Header
.Type
= FileTypes::NAIVE_LOG
;
106 Header
.CycleFrequency
= atomic_load(&CycleFrequency
, memory_order_acquire
);
108 // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
109 // before setting the values in the header.
110 Header
.ConstantTSC
= 1;
111 Header
.NonstopTSC
= 1;
112 LW
->WriteAll(reinterpret_cast<char *>(&Header
),
113 reinterpret_cast<char *>(&Header
) + sizeof(Header
));
117 static LogWriter
*getGlobalLog() XRAY_NEVER_INSTRUMENT
{
118 static pthread_once_t OnceInit
= PTHREAD_ONCE_INIT
;
119 static LogWriter
*LW
= nullptr;
120 pthread_once(&OnceInit
, +[] { LW
= getLog(); });
124 static ThreadLocalData
&getThreadLocalData() XRAY_NEVER_INSTRUMENT
{
125 thread_local ThreadLocalData TLD
;
126 thread_local
bool UNUSED TOnce
= [] {
127 if (GlobalOptions
.ThreadBufferSize
== 0) {
129 Report("Not initializing TLD since ThreadBufferSize == 0.\n");
132 pthread_setspecific(PThreadKey
, &TLD
);
133 TLD
.LogWriter
= getGlobalLog();
134 TLD
.InMemoryBuffer
= reinterpret_cast<XRayRecord
*>(
135 InternalAlloc(sizeof(XRayRecord
) * GlobalOptions
.ThreadBufferSize
,
136 nullptr, alignof(XRayRecord
)));
137 TLD
.BufferSize
= GlobalOptions
.ThreadBufferSize
;
138 TLD
.BufferOffset
= 0;
139 if (GlobalOptions
.MaxStackDepth
== 0) {
141 Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
143 TLD
.StackEntries
= 0;
144 TLD
.ShadowStack
= nullptr;
147 TLD
.ShadowStack
= reinterpret_cast<StackEntry
*>(
148 InternalAlloc(sizeof(StackEntry
) * GlobalOptions
.MaxStackDepth
, nullptr,
149 alignof(StackEntry
)));
150 TLD
.StackSize
= GlobalOptions
.MaxStackDepth
;
151 TLD
.StackEntries
= 0;
157 template <class RDTSC
>
158 void InMemoryRawLog(int32_t FuncId
, XRayEntryType Type
,
159 RDTSC ReadTSC
) XRAY_NEVER_INSTRUMENT
{
160 auto &TLD
= getThreadLocalData();
161 LogWriter
*LW
= getGlobalLog();
165 // Use a simple recursion guard, to handle cases where we're already logging
166 // and for one reason or another, this function gets called again in the same
168 RecursionGuard
G(Guard
);
173 uint64_t TSC
= ReadTSC(CPU
);
176 case XRayEntryType::ENTRY
:
177 case XRayEntryType::LOG_ARGS_ENTRY
: {
178 // Short circuit if we've reached the maximum depth of the stack.
179 if (TLD
.StackEntries
++ >= TLD
.StackSize
)
182 // When we encounter an entry event, we keep track of the TSC and the CPU,
183 // and put it in the stack.
189 auto StackEntryPtr
= static_cast<char *>(TLD
.ShadowStack
) +
190 (sizeof(StackEntry
) * (TLD
.StackEntries
- 1));
191 internal_memcpy(StackEntryPtr
, &E
, sizeof(StackEntry
));
194 case XRayEntryType::EXIT
:
195 case XRayEntryType::TAIL
: {
196 if (TLD
.StackEntries
== 0)
199 if (--TLD
.StackEntries
>= TLD
.StackSize
)
202 // When we encounter an exit event, we check whether all the following are
205 // - The Function ID is the same as the most recent entry in the stack.
206 // - The CPU is the same as the most recent entry in the stack.
207 // - The Delta of the TSCs is less than the threshold amount of time we're
208 // looking to record.
210 // If all of these conditions are true, we pop the stack and don't write a
211 // record and move the record offset back.
213 auto StackEntryPtr
= static_cast<char *>(TLD
.ShadowStack
) +
214 (sizeof(StackEntry
) * TLD
.StackEntries
);
215 internal_memcpy(&StackTop
, StackEntryPtr
, sizeof(StackEntry
));
216 if (StackTop
.FuncId
== FuncId
&& StackTop
.CPU
== CPU
&&
217 StackTop
.TSC
< TSC
) {
218 auto Delta
= TSC
- StackTop
.TSC
;
219 if (Delta
< atomic_load(&ThresholdTicks
, memory_order_relaxed
)) {
220 DCHECK(TLD
.BufferOffset
> 0);
221 TLD
.BufferOffset
-= StackTop
.Type
== XRayEntryType::ENTRY
? 1 : 2;
228 // Should be unreachable.
229 DCHECK(false && "Unsupported XRayEntryType encountered.");
233 // First determine whether the delta between the function's enter record and
234 // the exit record is higher than the threshold.
236 R
.RecordType
= RecordTypes::NORMAL
;
240 R
.PId
= internal_getpid();
243 auto FirstEntry
= reinterpret_cast<XRayRecord
*>(TLD
.InMemoryBuffer
);
244 internal_memcpy(FirstEntry
+ TLD
.BufferOffset
, &R
, sizeof(R
));
245 if (++TLD
.BufferOffset
== TLD
.BufferSize
) {
246 SpinMutexLock
Lock(&LogMutex
);
247 LW
->WriteAll(reinterpret_cast<char *>(FirstEntry
),
248 reinterpret_cast<char *>(FirstEntry
+ TLD
.BufferOffset
));
249 TLD
.BufferOffset
= 0;
250 TLD
.StackEntries
= 0;
254 template <class RDTSC
>
255 void InMemoryRawLogWithArg(int32_t FuncId
, XRayEntryType Type
, uint64_t Arg1
,
256 RDTSC ReadTSC
) XRAY_NEVER_INSTRUMENT
{
257 auto &TLD
= getThreadLocalData();
259 reinterpret_cast<XRayArgPayload
*>(TLD
.InMemoryBuffer
);
260 const auto &BuffLen
= TLD
.BufferSize
;
261 LogWriter
*LW
= getGlobalLog();
265 // First we check whether there's enough space to write the data consecutively
266 // in the thread-local buffer. If not, we first flush the buffer before
267 // attempting to write the two records that must be consecutive.
268 if (TLD
.BufferOffset
+ 2 > BuffLen
) {
269 SpinMutexLock
Lock(&LogMutex
);
270 LW
->WriteAll(reinterpret_cast<char *>(FirstEntry
),
271 reinterpret_cast<char *>(FirstEntry
+ TLD
.BufferOffset
));
272 TLD
.BufferOffset
= 0;
273 TLD
.StackEntries
= 0;
276 // Then we write the "we have an argument" record.
277 InMemoryRawLog(FuncId
, Type
, ReadTSC
);
279 RecursionGuard
G(Guard
);
283 // And, from here on write the arg payload.
285 R
.RecordType
= RecordTypes::ARG_PAYLOAD
;
288 R
.PId
= internal_getpid();
290 internal_memcpy(FirstEntry
+ TLD
.BufferOffset
, &R
, sizeof(R
));
291 if (++TLD
.BufferOffset
== BuffLen
) {
292 SpinMutexLock
Lock(&LogMutex
);
293 LW
->WriteAll(reinterpret_cast<char *>(FirstEntry
),
294 reinterpret_cast<char *>(FirstEntry
+ TLD
.BufferOffset
));
295 TLD
.BufferOffset
= 0;
296 TLD
.StackEntries
= 0;
300 void basicLoggingHandleArg0RealTSC(int32_t FuncId
,
301 XRayEntryType Type
) XRAY_NEVER_INSTRUMENT
{
302 InMemoryRawLog(FuncId
, Type
, readTSC
);
305 void basicLoggingHandleArg0EmulateTSC(int32_t FuncId
, XRayEntryType Type
)
306 XRAY_NEVER_INSTRUMENT
{
307 InMemoryRawLog(FuncId
, Type
, [](uint8_t &CPU
) XRAY_NEVER_INSTRUMENT
{
309 int result
= clock_gettime(CLOCK_REALTIME
, &TS
);
311 Report("clock_gettimg(2) return %d, errno=%d.", result
, int(errno
));
315 return TS
.tv_sec
* NanosecondsPerSecond
+ TS
.tv_nsec
;
319 void basicLoggingHandleArg1RealTSC(int32_t FuncId
, XRayEntryType Type
,
320 uint64_t Arg1
) XRAY_NEVER_INSTRUMENT
{
321 InMemoryRawLogWithArg(FuncId
, Type
, Arg1
, readTSC
);
324 void basicLoggingHandleArg1EmulateTSC(int32_t FuncId
, XRayEntryType Type
,
325 uint64_t Arg1
) XRAY_NEVER_INSTRUMENT
{
326 InMemoryRawLogWithArg(
327 FuncId
, Type
, Arg1
, [](uint8_t &CPU
) XRAY_NEVER_INSTRUMENT
{
329 int result
= clock_gettime(CLOCK_REALTIME
, &TS
);
331 Report("clock_gettimg(2) return %d, errno=%d.", result
, int(errno
));
335 return TS
.tv_sec
* NanosecondsPerSecond
+ TS
.tv_nsec
;
339 static void TLDDestructor(void *P
) XRAY_NEVER_INSTRUMENT
{
340 ThreadLocalData
&TLD
= *reinterpret_cast<ThreadLocalData
*>(P
);
341 auto ExitGuard
= at_scope_exit([&TLD
] {
342 // Clean up dynamic resources.
343 if (TLD
.InMemoryBuffer
)
344 InternalFree(TLD
.InMemoryBuffer
);
346 InternalFree(TLD
.ShadowStack
);
348 Report("Cleaned up log for TID: %llu\n", GetTid());
351 if (TLD
.LogWriter
== nullptr || TLD
.BufferOffset
== 0) {
353 Report("Skipping buffer for TID: %llu; Offset = %zu\n", GetTid(),
359 SpinMutexLock
L(&LogMutex
);
360 TLD
.LogWriter
->WriteAll(reinterpret_cast<char *>(TLD
.InMemoryBuffer
),
361 reinterpret_cast<char *>(TLD
.InMemoryBuffer
) +
362 (sizeof(XRayRecord
) * TLD
.BufferOffset
));
365 // Because this thread's exit could be the last one trying to write to
366 // the file and that we're not able to close out the file properly, we
367 // sync instead and hope that the pending writes are flushed as the
369 TLD
.LogWriter
->Flush();
372 XRayLogInitStatus
basicLoggingInit(UNUSED
size_t BufferSize
,
373 UNUSED
size_t BufferMax
, void *Options
,
374 size_t OptionsSize
) XRAY_NEVER_INSTRUMENT
{
375 uint8_t Expected
= 0;
376 if (!atomic_compare_exchange_strong(&BasicInitialized
, &Expected
, 1,
377 memory_order_acq_rel
)) {
379 Report("Basic logging already initialized.\n");
380 return XRayLogInitStatus::XRAY_LOG_INITIALIZED
;
383 static pthread_once_t OnceInit
= PTHREAD_ONCE_INIT
;
384 pthread_once(&OnceInit
, +[] {
385 pthread_key_create(&PThreadKey
, TLDDestructor
);
386 atomic_store(&UseRealTSC
, probeRequiredCPUFeatures(), memory_order_release
);
387 // Initialize the global TicksPerSec value.
388 atomic_store(&TicksPerSec
,
389 probeRequiredCPUFeatures() ? getTSCFrequency()
390 : NanosecondsPerSecond
,
391 memory_order_release
);
392 if (!atomic_load(&UseRealTSC
, memory_order_relaxed
) && Verbosity())
393 Report("WARNING: Required CPU features missing for XRay instrumentation, "
394 "using emulation instead.\n");
400 registerXRayBasicFlags(&P
, &F
);
401 P
.ParseString(useCompilerDefinedBasicFlags());
402 auto *EnvOpts
= GetEnv("XRAY_BASIC_OPTIONS");
403 if (EnvOpts
== nullptr)
406 P
.ParseString(EnvOpts
);
408 // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options
409 // set through XRAY_OPTIONS instead.
410 if (internal_strlen(EnvOpts
) == 0) {
411 F
.func_duration_threshold_us
=
412 flags()->xray_naive_log_func_duration_threshold_us
;
413 F
.max_stack_depth
= flags()->xray_naive_log_max_stack_depth
;
414 F
.thread_buffer_size
= flags()->xray_naive_log_thread_buffer_size
;
417 P
.ParseString(static_cast<const char *>(Options
));
418 GlobalOptions
.ThreadBufferSize
= F
.thread_buffer_size
;
419 GlobalOptions
.DurationFilterMicros
= F
.func_duration_threshold_us
;
420 GlobalOptions
.MaxStackDepth
= F
.max_stack_depth
;
423 atomic_store(&ThresholdTicks
,
424 atomic_load(&TicksPerSec
, memory_order_acquire
) *
425 GlobalOptions
.DurationFilterMicros
/ 1000000,
426 memory_order_release
);
427 __xray_set_handler_arg1(atomic_load(&UseRealTSC
, memory_order_acquire
)
428 ? basicLoggingHandleArg1RealTSC
429 : basicLoggingHandleArg1EmulateTSC
);
430 __xray_set_handler(atomic_load(&UseRealTSC
, memory_order_acquire
)
431 ? basicLoggingHandleArg0RealTSC
432 : basicLoggingHandleArg0EmulateTSC
);
434 // TODO: Implement custom event and typed event handling support in Basic
436 __xray_remove_customevent_handler();
437 __xray_remove_typedevent_handler();
439 return XRayLogInitStatus::XRAY_LOG_INITIALIZED
;
442 XRayLogInitStatus
basicLoggingFinalize() XRAY_NEVER_INSTRUMENT
{
443 uint8_t Expected
= 0;
444 if (!atomic_compare_exchange_strong(&BasicInitialized
, &Expected
, 0,
445 memory_order_acq_rel
) &&
447 Report("Basic logging already finalized.\n");
449 // Nothing really to do aside from marking state of the global to be
452 return XRayLogInitStatus::XRAY_LOG_FINALIZED
;
455 XRayLogFlushStatus
basicLoggingFlush() XRAY_NEVER_INSTRUMENT
{
456 // This really does nothing, since flushing the logs happen at the end of a
457 // thread's lifetime, or when the buffers are full.
458 return XRayLogFlushStatus::XRAY_LOG_FLUSHED
;
461 // This is a handler that, effectively, does nothing.
462 void basicLoggingHandleArg0Empty(int32_t, XRayEntryType
) XRAY_NEVER_INSTRUMENT
{
465 bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT
{
468 basicLoggingFinalize
,
469 basicLoggingHandleArg0Empty
,
472 auto RegistrationResult
= __xray_log_register_mode("xray-basic", Impl
);
473 if (RegistrationResult
!= XRayLogRegisterStatus::XRAY_REGISTRATION_OK
&&
475 Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
477 if (flags()->xray_naive_log
||
478 !internal_strcmp(flags()->xray_mode
, "xray-basic")) {
479 auto SelectResult
= __xray_log_select_mode("xray-basic");
480 if (SelectResult
!= XRayLogRegisterStatus::XRAY_REGISTRATION_OK
) {
482 Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult
);
486 // We initialize the implementation using the data we get from the
487 // XRAY_BASIC_OPTIONS environment variable, at this point of the
489 auto *Env
= GetEnv("XRAY_BASIC_OPTIONS");
491 __xray_log_init_mode("xray-basic", Env
== nullptr ? "" : Env
);
492 if (InitResult
!= XRayLogInitStatus::XRAY_LOG_INITIALIZED
) {
494 Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult
);
498 // At this point we know that we've successfully initialized Basic mode
499 // tracing, and the only chance we're going to get for the current thread to
500 // clean-up may be at thread/program exit. To ensure that we're going to get
501 // the cleanup even without calling the finalization routines, we're
502 // registering a program exit function that will do the cleanup.
503 static pthread_once_t DynamicOnce
= PTHREAD_ONCE_INIT
;
504 pthread_once(&DynamicOnce
, +[] {
505 static void *FakeTLD
= nullptr;
506 FakeTLD
= &getThreadLocalData();
507 Atexit(+[] { TLDDestructor(FakeTLD
); });
513 } // namespace __xray
515 static auto UNUSED Unused
= __xray::basicLogDynamicInitializer();