1 //===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
11 // This is the implementation of a profiling handler.
13 //===----------------------------------------------------------------------===//
17 #include "sanitizer_common/sanitizer_atomic.h"
18 #include "sanitizer_common/sanitizer_flags.h"
19 #include "xray/xray_interface.h"
20 #include "xray/xray_log_interface.h"
21 #include "xray_buffer_queue.h"
22 #include "xray_flags.h"
23 #include "xray_profile_collector.h"
24 #include "xray_profiling_flags.h"
25 #include "xray_recursion_guard.h"
27 #include "xray_utils.h"
34 static atomic_sint32_t ProfilerLogFlushStatus
= {
35 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING
};
37 static atomic_sint32_t ProfilerLogStatus
= {
38 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
};
40 static SpinMutex ProfilerOptionsMutex
;
42 struct ProfilingData
{
43 atomic_uintptr_t Allocators
;
47 static pthread_key_t ProfilingKey
;
49 // We use a global buffer queue, which gets initialized once at initialisation
50 // time, and gets reset when profiling is "done".
51 static std::aligned_storage
<sizeof(BufferQueue
), alignof(BufferQueue
)>::type
53 static BufferQueue
*BQ
= nullptr;
55 thread_local
FunctionCallTrie::Allocators::Buffers ThreadBuffers
;
56 thread_local
std::aligned_storage
<sizeof(FunctionCallTrie::Allocators
),
57 alignof(FunctionCallTrie::Allocators
)>::type
59 thread_local
std::aligned_storage
<sizeof(FunctionCallTrie
),
60 alignof(FunctionCallTrie
)>::type
61 FunctionCallTrieStorage
;
62 thread_local ProfilingData TLD
{{0}, {0}};
63 thread_local atomic_uint8_t ReentranceGuard
{0};
65 // We use a separate guard for ensuring that for this thread, if we're already
66 // cleaning up, that any signal handlers don't attempt to cleanup nor
68 thread_local atomic_uint8_t TLDInitGuard
{0};
70 // We also use a separate latch to signal that the thread is exiting, and
71 // non-essential work should be ignored (things like recording events, etc.).
72 thread_local atomic_uint8_t ThreadExitingLatch
{0};
74 static ProfilingData
*getThreadLocalData() XRAY_NEVER_INSTRUMENT
{
75 thread_local
auto ThreadOnce
= []() XRAY_NEVER_INSTRUMENT
{
76 pthread_setspecific(ProfilingKey
, &TLD
);
81 RecursionGuard
TLDInit(TLDInitGuard
);
85 if (atomic_load_relaxed(&ThreadExitingLatch
))
89 if (atomic_compare_exchange_strong(&TLD
.Allocators
, &Allocators
, 1,
90 memory_order_acq_rel
)) {
92 auto AllocatorsUndo
= at_scope_exit([&]() XRAY_NEVER_INSTRUMENT
{
94 atomic_store(&TLD
.Allocators
, 0, memory_order_release
);
97 // Acquire a set of buffers for this thread.
101 if (BQ
->getBuffer(ThreadBuffers
.NodeBuffer
) != BufferQueue::ErrorCode::Ok
)
103 auto NodeBufferUndo
= at_scope_exit([&]() XRAY_NEVER_INSTRUMENT
{
105 BQ
->releaseBuffer(ThreadBuffers
.NodeBuffer
);
108 if (BQ
->getBuffer(ThreadBuffers
.RootsBuffer
) != BufferQueue::ErrorCode::Ok
)
110 auto RootsBufferUndo
= at_scope_exit([&]() XRAY_NEVER_INSTRUMENT
{
112 BQ
->releaseBuffer(ThreadBuffers
.RootsBuffer
);
115 if (BQ
->getBuffer(ThreadBuffers
.ShadowStackBuffer
) !=
116 BufferQueue::ErrorCode::Ok
)
118 auto ShadowStackBufferUndo
= at_scope_exit([&]() XRAY_NEVER_INSTRUMENT
{
120 BQ
->releaseBuffer(ThreadBuffers
.ShadowStackBuffer
);
123 if (BQ
->getBuffer(ThreadBuffers
.NodeIdPairBuffer
) !=
124 BufferQueue::ErrorCode::Ok
)
128 new (&AllocatorsStorage
) FunctionCallTrie::Allocators(
129 FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers
));
130 Allocators
= reinterpret_cast<uptr
>(
131 reinterpret_cast<FunctionCallTrie::Allocators
*>(&AllocatorsStorage
));
132 atomic_store(&TLD
.Allocators
, Allocators
, memory_order_release
);
139 if (atomic_compare_exchange_strong(&TLD
.FCT
, &FCT
, 1, memory_order_acq_rel
)) {
140 new (&FunctionCallTrieStorage
)
141 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators
*>(
142 atomic_load_relaxed(&TLD
.Allocators
)));
143 FCT
= reinterpret_cast<uptr
>(
144 reinterpret_cast<FunctionCallTrie
*>(&FunctionCallTrieStorage
));
145 atomic_store(&TLD
.FCT
, FCT
, memory_order_release
);
154 static void cleanupTLD() XRAY_NEVER_INSTRUMENT
{
155 auto FCT
= atomic_exchange(&TLD
.FCT
, 0, memory_order_acq_rel
);
156 if (FCT
== reinterpret_cast<uptr
>(reinterpret_cast<FunctionCallTrie
*>(
157 &FunctionCallTrieStorage
)))
158 reinterpret_cast<FunctionCallTrie
*>(FCT
)->~FunctionCallTrie();
160 auto Allocators
= atomic_exchange(&TLD
.Allocators
, 0, memory_order_acq_rel
);
162 reinterpret_cast<uptr
>(
163 reinterpret_cast<FunctionCallTrie::Allocators
*>(&AllocatorsStorage
)))
164 reinterpret_cast<FunctionCallTrie::Allocators
*>(Allocators
)->~Allocators();
167 static void postCurrentThreadFCT(ProfilingData
&T
) XRAY_NEVER_INSTRUMENT
{
168 RecursionGuard
TLDInit(TLDInitGuard
);
172 uptr P
= atomic_exchange(&T
.FCT
, 0, memory_order_acq_rel
);
173 if (P
!= reinterpret_cast<uptr
>(
174 reinterpret_cast<FunctionCallTrie
*>(&FunctionCallTrieStorage
)))
177 auto FCT
= reinterpret_cast<FunctionCallTrie
*>(P
);
178 DCHECK_NE(FCT
, nullptr);
180 uptr A
= atomic_exchange(&T
.Allocators
, 0, memory_order_acq_rel
);
182 reinterpret_cast<uptr
>(
183 reinterpret_cast<FunctionCallTrie::Allocators
*>(&AllocatorsStorage
)))
186 auto Allocators
= reinterpret_cast<FunctionCallTrie::Allocators
*>(A
);
187 DCHECK_NE(Allocators
, nullptr);
189 // Always move the data into the profile collector.
190 profileCollectorService::post(BQ
, std::move(*FCT
), std::move(*Allocators
),
191 std::move(ThreadBuffers
), GetTid());
193 // Re-initialize the ThreadBuffers object to a known "default" state.
194 ThreadBuffers
= FunctionCallTrie::Allocators::Buffers
{};
199 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT
{
200 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS
201 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS
);
207 XRayLogFlushStatus
profilingFlush() XRAY_NEVER_INSTRUMENT
{
208 if (atomic_load(&ProfilerLogStatus
, memory_order_acquire
) !=
209 XRayLogInitStatus::XRAY_LOG_FINALIZED
) {
211 Report("Not flushing profiles, profiling not been finalized.\n");
212 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING
;
215 RecursionGuard
SignalGuard(ReentranceGuard
);
218 Report("Cannot finalize properly inside a signal handler!\n");
219 atomic_store(&ProfilerLogFlushStatus
,
220 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING
,
221 memory_order_release
);
222 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING
;
225 s32 Previous
= atomic_exchange(&ProfilerLogFlushStatus
,
226 XRayLogFlushStatus::XRAY_LOG_FLUSHING
,
227 memory_order_acq_rel
);
228 if (Previous
== XRayLogFlushStatus::XRAY_LOG_FLUSHING
) {
230 Report("Not flushing profiles, implementation still flushing.\n");
231 return XRayLogFlushStatus::XRAY_LOG_FLUSHING
;
234 // At this point, we'll create the file that will contain the profile, but
235 // only if the options say so.
236 if (!profilingFlags()->no_flush
) {
237 // First check whether we have data in the profile collector service
238 // before we try and write anything down.
239 XRayBuffer B
= profileCollectorService::nextBuffer({nullptr, 0});
240 if (B
.Data
== nullptr) {
242 Report("profiling: No data to flush.\n");
244 LogWriter
*LW
= LogWriter::Open();
247 Report("profiling: Failed to flush to file, dropping data.\n");
249 // Now for each of the buffers, write out the profile data as we would
250 // see it in memory, verbatim.
251 while (B
.Data
!= nullptr && B
.Size
!= 0) {
252 LW
->WriteAll(reinterpret_cast<const char *>(B
.Data
),
253 reinterpret_cast<const char *>(B
.Data
) + B
.Size
);
254 B
= profileCollectorService::nextBuffer(B
);
256 LogWriter::Close(LW
);
261 profileCollectorService::reset();
263 atomic_store(&ProfilerLogFlushStatus
, XRayLogFlushStatus::XRAY_LOG_FLUSHED
,
264 memory_order_release
);
265 atomic_store(&ProfilerLogStatus
, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
,
266 memory_order_release
);
268 return XRayLogFlushStatus::XRAY_LOG_FLUSHED
;
271 void profilingHandleArg0(int32_t FuncId
,
272 XRayEntryType Entry
) XRAY_NEVER_INSTRUMENT
{
274 auto TSC
= readTSC(CPU
);
275 RecursionGuard
G(ReentranceGuard
);
279 auto Status
= atomic_load(&ProfilerLogStatus
, memory_order_acquire
);
280 if (UNLIKELY(Status
== XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
||
281 Status
== XRayLogInitStatus::XRAY_LOG_INITIALIZING
))
284 if (UNLIKELY(Status
== XRayLogInitStatus::XRAY_LOG_FINALIZED
||
285 Status
== XRayLogInitStatus::XRAY_LOG_FINALIZING
)) {
286 postCurrentThreadFCT(TLD
);
290 auto T
= getThreadLocalData();
294 auto FCT
= reinterpret_cast<FunctionCallTrie
*>(atomic_load_relaxed(&T
->FCT
));
296 case XRayEntryType::ENTRY
:
297 case XRayEntryType::LOG_ARGS_ENTRY
:
298 FCT
->enterFunction(FuncId
, TSC
, CPU
);
300 case XRayEntryType::EXIT
:
301 case XRayEntryType::TAIL
:
302 FCT
->exitFunction(FuncId
, TSC
, CPU
);
305 // FIXME: Handle bugs.
310 void profilingHandleArg1(int32_t FuncId
, XRayEntryType Entry
,
311 uint64_t) XRAY_NEVER_INSTRUMENT
{
312 return profilingHandleArg0(FuncId
, Entry
);
315 XRayLogInitStatus
profilingFinalize() XRAY_NEVER_INSTRUMENT
{
316 s32 CurrentStatus
= XRayLogInitStatus::XRAY_LOG_INITIALIZED
;
317 if (!atomic_compare_exchange_strong(&ProfilerLogStatus
, &CurrentStatus
,
318 XRayLogInitStatus::XRAY_LOG_FINALIZING
,
319 memory_order_release
)) {
321 Report("Cannot finalize profile, the profiling is not initialized.\n");
322 return static_cast<XRayLogInitStatus
>(CurrentStatus
);
325 // Mark then finalize the current generation of buffers. This allows us to let
326 // the threads currently holding onto new buffers still use them, but let the
327 // last reference do the memory cleanup.
328 DCHECK_NE(BQ
, nullptr);
331 // Wait a grace period to allow threads to see that we're finalizing.
332 SleepForMillis(profilingFlags()->grace_period_ms
);
334 // If we for some reason are entering this function from an instrumented
335 // handler, we bail out.
336 RecursionGuard
G(ReentranceGuard
);
338 return static_cast<XRayLogInitStatus
>(CurrentStatus
);
340 // Post the current thread's data if we have any.
341 postCurrentThreadFCT(TLD
);
343 // Then we force serialize the log data.
344 profileCollectorService::serialize();
346 atomic_store(&ProfilerLogStatus
, XRayLogInitStatus::XRAY_LOG_FINALIZED
,
347 memory_order_release
);
348 return XRayLogInitStatus::XRAY_LOG_FINALIZED
;
352 profilingLoggingInit(size_t, size_t, void *Options
,
353 size_t OptionsSize
) XRAY_NEVER_INSTRUMENT
{
354 RecursionGuard
G(ReentranceGuard
);
356 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
;
358 s32 CurrentStatus
= XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
;
359 if (!atomic_compare_exchange_strong(&ProfilerLogStatus
, &CurrentStatus
,
360 XRayLogInitStatus::XRAY_LOG_INITIALIZING
,
361 memory_order_acq_rel
)) {
363 Report("Cannot initialize already initialised profiling "
364 "implementation.\n");
365 return static_cast<XRayLogInitStatus
>(CurrentStatus
);
369 SpinMutexLock
Lock(&ProfilerOptionsMutex
);
370 FlagParser ConfigParser
;
373 registerProfilerFlags(&ConfigParser
, &Flags
);
374 ConfigParser
.ParseString(profilingCompilerDefinedFlags());
375 const char *Env
= GetEnv("XRAY_PROFILING_OPTIONS");
378 ConfigParser
.ParseString(Env
);
380 // Then parse the configuration string provided.
381 ConfigParser
.ParseString(static_cast<const char *>(Options
));
383 ReportUnrecognizedFlags();
384 *profilingFlags() = Flags
;
387 // We need to reset the profile data collection implementation now.
388 profileCollectorService::reset();
390 // Then also reset the buffer queue implementation.
392 bool Success
= false;
393 new (&BufferQueueStorage
)
394 BufferQueue(profilingFlags()->per_thread_allocator_max
,
395 profilingFlags()->buffers_max
, Success
);
398 Report("Failed to initialize preallocated memory buffers!");
399 atomic_store(&ProfilerLogStatus
,
400 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
,
401 memory_order_release
);
402 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
;
405 // If we've succeeded, set the global pointer to the initialised storage.
406 BQ
= reinterpret_cast<BufferQueue
*>(&BufferQueueStorage
);
409 auto InitStatus
= BQ
->init(profilingFlags()->per_thread_allocator_max
,
410 profilingFlags()->buffers_max
);
412 if (InitStatus
!= BufferQueue::ErrorCode::Ok
) {
414 Report("Failed to initialize preallocated memory buffers; error: %s",
415 BufferQueue::getErrorString(InitStatus
));
416 atomic_store(&ProfilerLogStatus
,
417 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
,
418 memory_order_release
);
419 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED
;
422 DCHECK(!BQ
->finalizing());
425 // We need to set up the exit handlers.
426 static pthread_once_t Once
= PTHREAD_ONCE_INIT
;
430 &ProfilingKey
, +[](void *P
) XRAY_NEVER_INSTRUMENT
{
431 if (atomic_exchange(&ThreadExitingLatch
, 1, memory_order_acq_rel
))
437 auto T
= reinterpret_cast<ProfilingData
*>(P
);
438 if (atomic_load_relaxed(&T
->Allocators
) == 0)
442 // If we're somehow executing this while inside a
443 // non-reentrant-friendly context, we skip attempting to post
444 // the current thread's data.
445 RecursionGuard
G(ReentranceGuard
);
449 postCurrentThreadFCT(*T
);
453 // We also need to set up an exit handler, so that we can get the
454 // profile information at exit time. We use the C API to do this, to not
455 // rely on C++ ABI functions for registering exit handlers.
456 Atexit(+[]() XRAY_NEVER_INSTRUMENT
{
457 if (atomic_exchange(&ThreadExitingLatch
, 1, memory_order_acq_rel
))
461 at_scope_exit([]() XRAY_NEVER_INSTRUMENT
{ cleanupTLD(); });
463 // Finalize and flush.
464 if (profilingFinalize() != XRAY_LOG_FINALIZED
||
465 profilingFlush() != XRAY_LOG_FLUSHED
)
469 Report("XRay Profile flushed at exit.");
473 __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer
);
474 __xray_set_handler(profilingHandleArg0
);
475 __xray_set_handler_arg1(profilingHandleArg1
);
477 atomic_store(&ProfilerLogStatus
, XRayLogInitStatus::XRAY_LOG_INITIALIZED
,
478 memory_order_release
);
480 Report("XRay Profiling init successful.\n");
482 return XRayLogInitStatus::XRAY_LOG_INITIALIZED
;
485 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT
{
486 // Set up the flag defaults from the static defaults and the
487 // compiler-provided defaults.
489 SpinMutexLock
Lock(&ProfilerOptionsMutex
);
490 auto *F
= profilingFlags();
492 FlagParser ProfilingParser
;
493 registerProfilerFlags(&ProfilingParser
, F
);
494 ProfilingParser
.ParseString(profilingCompilerDefinedFlags());
498 profilingLoggingInit
,
503 auto RegistrationResult
= __xray_log_register_mode("xray-profiling", Impl
);
504 if (RegistrationResult
!= XRayLogRegisterStatus::XRAY_REGISTRATION_OK
) {
506 Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
512 if (!internal_strcmp(flags()->xray_mode
, "xray-profiling"))
513 __xray_log_select_mode("xray_profiling");
517 } // namespace __xray
519 static auto UNUSED Unused
= __xray::profilingDynamicInitializer();