1 //===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
11 // This implements the interface for the profileCollectorService.
13 //===----------------------------------------------------------------------===//
14 #include "xray_profile_collector.h"
15 #include "sanitizer_common/sanitizer_common.h"
16 #include "xray_allocator.h"
17 #include "xray_defs.h"
18 #include "xray_profiling_flags.h"
19 #include "xray_segmented_array.h"
25 namespace profileCollectorService
{
29 SpinMutex GlobalMutex
;
32 alignas(FunctionCallTrie
) std::byte TrieStorage
[sizeof(FunctionCallTrie
)];
35 struct ProfileBuffer
{
40 // Current version of the profile format.
41 constexpr u64 XRayProfilingVersion
= 0x20180424;
43 // Identifier for XRay profiling files 'xrayprof' in hex.
44 constexpr u64 XRayMagicBytes
= 0x7872617970726f66;
46 struct XRayProfilingFileHeader
{
47 const u64 MagicBytes
= XRayMagicBytes
;
48 const u64 Version
= XRayProfilingVersion
;
49 u64 Timestamp
= 0; // System time in nanoseconds.
50 u64 PID
= 0; // Process ID.
61 FunctionCallTrie::Allocators::Buffers Buffers
;
62 FunctionCallTrie::Allocators Allocators
;
67 using ThreadDataArray
= Array
<ThreadData
>;
68 using ThreadDataAllocator
= ThreadDataArray::AllocatorType
;
70 // We use a separate buffer queue for the backing store for the allocator used
71 // by the ThreadData array. This lets us host the buffers, allocators, and tries
72 // associated with a thread by moving the data into the array instead of
73 // attempting to copy the data to a separately backed set of tries.
74 alignas(BufferQueue
) static std::byte BufferQueueStorage
[sizeof(BufferQueue
)];
75 static BufferQueue
*BQ
= nullptr;
76 static BufferQueue::Buffer Buffer
;
77 alignas(ThreadDataAllocator
) static std::byte
78 ThreadDataAllocatorStorage
[sizeof(ThreadDataAllocator
)];
79 alignas(ThreadDataArray
) static std::byte
80 ThreadDataArrayStorage
[sizeof(ThreadDataArray
)];
82 static ThreadDataAllocator
*TDAllocator
= nullptr;
83 static ThreadDataArray
*TDArray
= nullptr;
85 using ProfileBufferArray
= Array
<ProfileBuffer
>;
86 using ProfileBufferArrayAllocator
= typename
ProfileBufferArray::AllocatorType
;
88 // These need to be global aligned storage to avoid dynamic initialization. We
89 // need these to be aligned to allow us to placement new objects into the
90 // storage, and have pointers to those objects be appropriately aligned.
91 alignas(ProfileBufferArray
) static std::byte
92 ProfileBuffersStorage
[sizeof(ProfileBufferArray
)];
93 alignas(ProfileBufferArrayAllocator
) static std::byte
94 ProfileBufferArrayAllocatorStorage
[sizeof(ProfileBufferArrayAllocator
)];
96 static ProfileBufferArrayAllocator
*ProfileBuffersAllocator
= nullptr;
97 static ProfileBufferArray
*ProfileBuffers
= nullptr;
99 // Use a global flag to determine whether the collector implementation has been
101 static atomic_uint8_t CollectorInitialized
{0};
105 void post(BufferQueue
*Q
, FunctionCallTrie
&&T
,
106 FunctionCallTrie::Allocators
&&A
,
107 FunctionCallTrie::Allocators::Buffers
&&B
,
108 tid_t TId
) XRAY_NEVER_INSTRUMENT
{
109 DCHECK_NE(Q
, nullptr);
111 // Bail out early if the collector has not been initialized.
112 if (!atomic_load(&CollectorInitialized
, memory_order_acquire
)) {
113 T
.~FunctionCallTrie();
115 Q
->releaseBuffer(B
.NodeBuffer
);
116 Q
->releaseBuffer(B
.RootsBuffer
);
117 Q
->releaseBuffer(B
.ShadowStackBuffer
);
118 Q
->releaseBuffer(B
.NodeIdPairBuffer
);
124 SpinMutexLock
Lock(&GlobalMutex
);
125 DCHECK_NE(TDAllocator
, nullptr);
126 DCHECK_NE(TDArray
, nullptr);
128 if (TDArray
->AppendEmplace(Q
, std::move(B
), std::move(A
), std::move(T
),
130 // If we fail to add the data to the array, we should destroy the objects
132 T
.~FunctionCallTrie();
134 Q
->releaseBuffer(B
.NodeBuffer
);
135 Q
->releaseBuffer(B
.RootsBuffer
);
136 Q
->releaseBuffer(B
.ShadowStackBuffer
);
137 Q
->releaseBuffer(B
.NodeIdPairBuffer
);
143 // A PathArray represents the function id's representing a stack trace. In this
144 // context a path is almost always represented from the leaf function in a call
145 // stack to a root of the call trie.
146 using PathArray
= Array
<int32_t>;
148 struct ProfileRecord
{
149 using PathAllocator
= typename
PathArray::AllocatorType
;
151 // The Path in this record is the function id's from the leaf to the root of
152 // the function call stack as represented from a FunctionCallTrie.
154 const FunctionCallTrie::Node
*Node
;
159 using ProfileRecordArray
= Array
<ProfileRecord
>;
161 // Walk a depth-first traversal of each root of the FunctionCallTrie to generate
162 // the path(s) and the data associated with the path.
164 populateRecords(ProfileRecordArray
&PRs
, ProfileRecord::PathAllocator
&PA
,
165 const FunctionCallTrie
&Trie
) XRAY_NEVER_INSTRUMENT
{
166 using StackArray
= Array
<const FunctionCallTrie::Node
*>;
167 using StackAllocator
= typename
StackArray::AllocatorType
;
168 StackAllocator
StackAlloc(profilingFlags()->stack_allocator_max
);
169 StackArray
DFSStack(StackAlloc
);
170 for (const auto *R
: Trie
.getRoots()) {
172 while (!DFSStack
.empty()) {
173 auto *Node
= DFSStack
.back();
177 auto Record
= PRs
.AppendEmplace(PathArray
{PA
}, Node
);
178 if (Record
== nullptr)
180 DCHECK_NE(Record
, nullptr);
182 // Traverse the Node's parents and as we're doing so, get the FIds in
183 // the order they appear.
184 for (auto N
= Node
; N
!= nullptr; N
= N
->Parent
)
185 Record
->Path
.Append(N
->FId
);
186 DCHECK(!Record
->Path
.empty());
188 for (const auto C
: Node
->Callees
)
189 DFSStack
.Append(C
.NodePtr
);
194 static void serializeRecords(ProfileBuffer
*Buffer
, const BlockHeader
&Header
,
195 const ProfileRecordArray
&ProfileRecords
)
196 XRAY_NEVER_INSTRUMENT
{
197 auto NextPtr
= static_cast<uint8_t *>(
198 internal_memcpy(Buffer
->Data
, &Header
, sizeof(Header
))) +
200 for (const auto &Record
: ProfileRecords
) {
201 // List of IDs follow:
202 for (const auto FId
: Record
.Path
)
204 static_cast<uint8_t *>(internal_memcpy(NextPtr
, &FId
, sizeof(FId
))) +
207 // Add the sentinel here.
208 constexpr int32_t SentinelFId
= 0;
209 NextPtr
= static_cast<uint8_t *>(
210 internal_memset(NextPtr
, SentinelFId
, sizeof(SentinelFId
))) +
213 // Add the node data here.
215 static_cast<uint8_t *>(internal_memcpy(
216 NextPtr
, &Record
.Node
->CallCount
, sizeof(Record
.Node
->CallCount
))) +
217 sizeof(Record
.Node
->CallCount
);
218 NextPtr
= static_cast<uint8_t *>(
219 internal_memcpy(NextPtr
, &Record
.Node
->CumulativeLocalTime
,
220 sizeof(Record
.Node
->CumulativeLocalTime
))) +
221 sizeof(Record
.Node
->CumulativeLocalTime
);
224 DCHECK_EQ(NextPtr
- static_cast<uint8_t *>(Buffer
->Data
), Buffer
->Size
);
229 void serialize() XRAY_NEVER_INSTRUMENT
{
230 if (!atomic_load(&CollectorInitialized
, memory_order_acquire
))
233 SpinMutexLock
Lock(&GlobalMutex
);
235 // Clear out the global ProfileBuffers, if it's not empty.
236 for (auto &B
: *ProfileBuffers
)
237 deallocateBuffer(reinterpret_cast<unsigned char *>(B
.Data
), B
.Size
);
238 ProfileBuffers
->trim(ProfileBuffers
->size());
240 DCHECK_NE(TDArray
, nullptr);
241 if (TDArray
->empty())
244 // Then repopulate the global ProfileBuffers.
246 auto MaxSize
= profilingFlags()->global_allocator_max
;
247 auto ProfileArena
= allocateBuffer(MaxSize
);
248 if (ProfileArena
== nullptr)
251 auto ProfileArenaCleanup
= at_scope_exit(
252 [&]() XRAY_NEVER_INSTRUMENT
{ deallocateBuffer(ProfileArena
, MaxSize
); });
254 auto PathArena
= allocateBuffer(profilingFlags()->global_allocator_max
);
255 if (PathArena
== nullptr)
258 auto PathArenaCleanup
= at_scope_exit(
259 [&]() XRAY_NEVER_INSTRUMENT
{ deallocateBuffer(PathArena
, MaxSize
); });
261 for (const auto &ThreadTrie
: *TDArray
) {
262 using ProfileRecordAllocator
= typename
ProfileRecordArray::AllocatorType
;
263 ProfileRecordAllocator
PRAlloc(ProfileArena
,
264 profilingFlags()->global_allocator_max
);
265 ProfileRecord::PathAllocator
PathAlloc(
266 PathArena
, profilingFlags()->global_allocator_max
);
267 ProfileRecordArray
ProfileRecords(PRAlloc
);
269 // First, we want to compute the amount of space we're going to need. We'll
270 // use a local allocator and an __xray::Array<...> to store the intermediary
271 // data, then compute the size as we're going along. Then we'll allocate the
272 // contiguous space to contain the thread buffer data.
273 if (ThreadTrie
.FCT
.getRoots().empty())
276 populateRecords(ProfileRecords
, PathAlloc
, ThreadTrie
.FCT
);
277 DCHECK(!ThreadTrie
.FCT
.getRoots().empty());
278 DCHECK(!ProfileRecords
.empty());
280 // Go through each record, to compute the sizes.
282 // header size = block size (4 bytes)
283 // + block number (4 bytes)
284 // + thread id (8 bytes)
285 // record size = path ids (4 bytes * number of ids + sentinel 4 bytes)
286 // + call count (8 bytes)
287 // + local time (8 bytes)
288 // + end of record (8 bytes)
289 u32 CumulativeSizes
= 0;
290 for (const auto &Record
: ProfileRecords
)
291 CumulativeSizes
+= 20 + (4 * Record
.Path
.size());
293 BlockHeader Header
{16 + CumulativeSizes
, I
++, ThreadTrie
.TId
};
294 auto B
= ProfileBuffers
->Append({});
295 B
->Size
= sizeof(Header
) + CumulativeSizes
;
296 B
->Data
= allocateBuffer(B
->Size
);
297 DCHECK_NE(B
->Data
, nullptr);
298 serializeRecords(B
, Header
, ProfileRecords
);
302 void reset() XRAY_NEVER_INSTRUMENT
{
303 atomic_store(&CollectorInitialized
, 0, memory_order_release
);
304 SpinMutexLock
Lock(&GlobalMutex
);
306 if (ProfileBuffers
!= nullptr) {
307 // Clear out the profile buffers that have been serialized.
308 for (auto &B
: *ProfileBuffers
)
309 deallocateBuffer(reinterpret_cast<uint8_t *>(B
.Data
), B
.Size
);
310 ProfileBuffers
->trim(ProfileBuffers
->size());
311 ProfileBuffers
= nullptr;
314 if (TDArray
!= nullptr) {
315 // Release the resources as required.
316 for (auto &TD
: *TDArray
) {
317 TD
.BQ
->releaseBuffer(TD
.Buffers
.NodeBuffer
);
318 TD
.BQ
->releaseBuffer(TD
.Buffers
.RootsBuffer
);
319 TD
.BQ
->releaseBuffer(TD
.Buffers
.ShadowStackBuffer
);
320 TD
.BQ
->releaseBuffer(TD
.Buffers
.NodeIdPairBuffer
);
322 // We don't bother destroying the array here because we've already
323 // potentially freed the backing store for the array. Instead we're going to
324 // reset the pointer to nullptr, and re-use the storage later instead
325 // (placement-new'ing into the storage as-is).
329 if (TDAllocator
!= nullptr) {
330 TDAllocator
->~Allocator();
331 TDAllocator
= nullptr;
334 if (Buffer
.Data
!= nullptr) {
335 BQ
->releaseBuffer(Buffer
);
339 bool Success
= false;
340 new (&BufferQueueStorage
)
341 BufferQueue(profilingFlags()->global_allocator_max
, 1, Success
);
344 BQ
= reinterpret_cast<BufferQueue
*>(&BufferQueueStorage
);
348 if (BQ
->init(profilingFlags()->global_allocator_max
, 1) !=
349 BufferQueue::ErrorCode::Ok
)
353 if (BQ
->getBuffer(Buffer
) != BufferQueue::ErrorCode::Ok
)
356 new (&ProfileBufferArrayAllocatorStorage
)
357 ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max
);
358 ProfileBuffersAllocator
= reinterpret_cast<ProfileBufferArrayAllocator
*>(
359 &ProfileBufferArrayAllocatorStorage
);
361 new (&ProfileBuffersStorage
) ProfileBufferArray(*ProfileBuffersAllocator
);
363 reinterpret_cast<ProfileBufferArray
*>(&ProfileBuffersStorage
);
365 new (&ThreadDataAllocatorStorage
)
366 ThreadDataAllocator(Buffer
.Data
, Buffer
.Size
);
368 reinterpret_cast<ThreadDataAllocator
*>(&ThreadDataAllocatorStorage
);
369 new (&ThreadDataArrayStorage
) ThreadDataArray(*TDAllocator
);
370 TDArray
= reinterpret_cast<ThreadDataArray
*>(&ThreadDataArrayStorage
);
372 atomic_store(&CollectorInitialized
, 1, memory_order_release
);
375 XRayBuffer
nextBuffer(XRayBuffer B
) XRAY_NEVER_INSTRUMENT
{
376 SpinMutexLock
Lock(&GlobalMutex
);
378 if (ProfileBuffers
== nullptr || ProfileBuffers
->size() == 0)
381 static pthread_once_t Once
= PTHREAD_ONCE_INIT
;
382 alignas(XRayProfilingFileHeader
) static std::byte
383 FileHeaderStorage
[sizeof(XRayProfilingFileHeader
)];
385 &Once
, +[]() XRAY_NEVER_INSTRUMENT
{
386 new (&FileHeaderStorage
) XRayProfilingFileHeader
{};
389 if (UNLIKELY(B
.Data
== nullptr)) {
390 // The first buffer should always contain the file header information.
392 *reinterpret_cast<XRayProfilingFileHeader
*>(&FileHeaderStorage
);
393 FileHeader
.Timestamp
= NanoTime();
394 FileHeader
.PID
= internal_getpid();
395 return {&FileHeaderStorage
, sizeof(XRayProfilingFileHeader
)};
398 if (UNLIKELY(B
.Data
== &FileHeaderStorage
))
399 return {(*ProfileBuffers
)[0].Data
, (*ProfileBuffers
)[0].Size
};
402 internal_memcpy(&Header
, B
.Data
, sizeof(BlockHeader
));
403 auto NextBlock
= Header
.BlockNum
+ 1;
404 if (NextBlock
< ProfileBuffers
->size())
405 return {(*ProfileBuffers
)[NextBlock
].Data
,
406 (*ProfileBuffers
)[NextBlock
].Size
};
410 } // namespace profileCollectorService
411 } // namespace __xray