1 //===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
11 // This implements the interface for the profileCollectorService.
13 //===----------------------------------------------------------------------===//
14 #include "xray_profile_collector.h"
15 #include "sanitizer_common/sanitizer_common.h"
16 #include "xray_allocator.h"
17 #include "xray_defs.h"
18 #include "xray_profiling_flags.h"
19 #include "xray_segmented_array.h"
25 namespace profileCollectorService
{
29 SpinMutex GlobalMutex
;
32 typename
std::aligned_storage
<sizeof(FunctionCallTrie
)>::type TrieStorage
;
35 struct ProfileBuffer
{
40 // Current version of the profile format.
41 constexpr u64 XRayProfilingVersion
= 0x20180424;
43 // Identifier for XRay profiling files 'xrayprof' in hex.
44 constexpr u64 XRayMagicBytes
= 0x7872617970726f66;
46 struct XRayProfilingFileHeader
{
47 const u64 MagicBytes
= XRayMagicBytes
;
48 const u64 Version
= XRayProfilingVersion
;
49 u64 Timestamp
= 0; // System time in nanoseconds.
50 u64 PID
= 0; // Process ID.
61 FunctionCallTrie::Allocators::Buffers Buffers
;
62 FunctionCallTrie::Allocators Allocators
;
67 using ThreadDataArray
= Array
<ThreadData
>;
68 using ThreadDataAllocator
= ThreadDataArray::AllocatorType
;
70 // We use a separate buffer queue for the backing store for the allocator used
71 // by the ThreadData array. This lets us host the buffers, allocators, and tries
72 // associated with a thread by moving the data into the array instead of
73 // attempting to copy the data to a separately backed set of tries.
74 static typename
std::aligned_storage
<
75 sizeof(BufferQueue
), alignof(BufferQueue
)>::type BufferQueueStorage
;
76 static BufferQueue
*BQ
= nullptr;
77 static BufferQueue::Buffer Buffer
;
78 static typename
std::aligned_storage
<sizeof(ThreadDataAllocator
),
79 alignof(ThreadDataAllocator
)>::type
80 ThreadDataAllocatorStorage
;
81 static typename
std::aligned_storage
<sizeof(ThreadDataArray
),
82 alignof(ThreadDataArray
)>::type
83 ThreadDataArrayStorage
;
85 static ThreadDataAllocator
*TDAllocator
= nullptr;
86 static ThreadDataArray
*TDArray
= nullptr;
88 using ProfileBufferArray
= Array
<ProfileBuffer
>;
89 using ProfileBufferArrayAllocator
= typename
ProfileBufferArray::AllocatorType
;
91 // These need to be global aligned storage to avoid dynamic initialization. We
92 // need these to be aligned to allow us to placement new objects into the
93 // storage, and have pointers to those objects be appropriately aligned.
94 static typename
std::aligned_storage
<sizeof(ProfileBufferArray
)>::type
95 ProfileBuffersStorage
;
96 static typename
std::aligned_storage
<sizeof(ProfileBufferArrayAllocator
)>::type
97 ProfileBufferArrayAllocatorStorage
;
99 static ProfileBufferArrayAllocator
*ProfileBuffersAllocator
= nullptr;
100 static ProfileBufferArray
*ProfileBuffers
= nullptr;
102 // Use a global flag to determine whether the collector implementation has been
104 static atomic_uint8_t CollectorInitialized
{0};
108 void post(BufferQueue
*Q
, FunctionCallTrie
&&T
,
109 FunctionCallTrie::Allocators
&&A
,
110 FunctionCallTrie::Allocators::Buffers
&&B
,
111 tid_t TId
) XRAY_NEVER_INSTRUMENT
{
112 DCHECK_NE(Q
, nullptr);
114 // Bail out early if the collector has not been initialized.
115 if (!atomic_load(&CollectorInitialized
, memory_order_acquire
)) {
116 T
.~FunctionCallTrie();
118 Q
->releaseBuffer(B
.NodeBuffer
);
119 Q
->releaseBuffer(B
.RootsBuffer
);
120 Q
->releaseBuffer(B
.ShadowStackBuffer
);
121 Q
->releaseBuffer(B
.NodeIdPairBuffer
);
127 SpinMutexLock
Lock(&GlobalMutex
);
128 DCHECK_NE(TDAllocator
, nullptr);
129 DCHECK_NE(TDArray
, nullptr);
131 if (TDArray
->AppendEmplace(Q
, std::move(B
), std::move(A
), std::move(T
),
133 // If we fail to add the data to the array, we should destroy the objects
135 T
.~FunctionCallTrie();
137 Q
->releaseBuffer(B
.NodeBuffer
);
138 Q
->releaseBuffer(B
.RootsBuffer
);
139 Q
->releaseBuffer(B
.ShadowStackBuffer
);
140 Q
->releaseBuffer(B
.NodeIdPairBuffer
);
146 // A PathArray represents the function id's representing a stack trace. In this
147 // context a path is almost always represented from the leaf function in a call
148 // stack to a root of the call trie.
149 using PathArray
= Array
<int32_t>;
151 struct ProfileRecord
{
152 using PathAllocator
= typename
PathArray::AllocatorType
;
154 // The Path in this record is the function id's from the leaf to the root of
155 // the function call stack as represented from a FunctionCallTrie.
157 const FunctionCallTrie::Node
*Node
;
162 using ProfileRecordArray
= Array
<ProfileRecord
>;
164 // Walk a depth-first traversal of each root of the FunctionCallTrie to generate
165 // the path(s) and the data associated with the path.
167 populateRecords(ProfileRecordArray
&PRs
, ProfileRecord::PathAllocator
&PA
,
168 const FunctionCallTrie
&Trie
) XRAY_NEVER_INSTRUMENT
{
169 using StackArray
= Array
<const FunctionCallTrie::Node
*>;
170 using StackAllocator
= typename
StackArray::AllocatorType
;
171 StackAllocator
StackAlloc(profilingFlags()->stack_allocator_max
);
172 StackArray
DFSStack(StackAlloc
);
173 for (const auto *R
: Trie
.getRoots()) {
175 while (!DFSStack
.empty()) {
176 auto *Node
= DFSStack
.back();
180 auto Record
= PRs
.AppendEmplace(PathArray
{PA
}, Node
);
181 if (Record
== nullptr)
183 DCHECK_NE(Record
, nullptr);
185 // Traverse the Node's parents and as we're doing so, get the FIds in
186 // the order they appear.
187 for (auto N
= Node
; N
!= nullptr; N
= N
->Parent
)
188 Record
->Path
.Append(N
->FId
);
189 DCHECK(!Record
->Path
.empty());
191 for (const auto C
: Node
->Callees
)
192 DFSStack
.Append(C
.NodePtr
);
197 static void serializeRecords(ProfileBuffer
*Buffer
, const BlockHeader
&Header
,
198 const ProfileRecordArray
&ProfileRecords
)
199 XRAY_NEVER_INSTRUMENT
{
200 auto NextPtr
= static_cast<uint8_t *>(
201 internal_memcpy(Buffer
->Data
, &Header
, sizeof(Header
))) +
203 for (const auto &Record
: ProfileRecords
) {
204 // List of IDs follow:
205 for (const auto FId
: Record
.Path
)
207 static_cast<uint8_t *>(internal_memcpy(NextPtr
, &FId
, sizeof(FId
))) +
210 // Add the sentinel here.
211 constexpr int32_t SentinelFId
= 0;
212 NextPtr
= static_cast<uint8_t *>(
213 internal_memset(NextPtr
, SentinelFId
, sizeof(SentinelFId
))) +
216 // Add the node data here.
218 static_cast<uint8_t *>(internal_memcpy(
219 NextPtr
, &Record
.Node
->CallCount
, sizeof(Record
.Node
->CallCount
))) +
220 sizeof(Record
.Node
->CallCount
);
221 NextPtr
= static_cast<uint8_t *>(
222 internal_memcpy(NextPtr
, &Record
.Node
->CumulativeLocalTime
,
223 sizeof(Record
.Node
->CumulativeLocalTime
))) +
224 sizeof(Record
.Node
->CumulativeLocalTime
);
227 DCHECK_EQ(NextPtr
- static_cast<uint8_t *>(Buffer
->Data
), Buffer
->Size
);
232 void serialize() XRAY_NEVER_INSTRUMENT
{
233 if (!atomic_load(&CollectorInitialized
, memory_order_acquire
))
236 SpinMutexLock
Lock(&GlobalMutex
);
238 // Clear out the global ProfileBuffers, if it's not empty.
239 for (auto &B
: *ProfileBuffers
)
240 deallocateBuffer(reinterpret_cast<unsigned char *>(B
.Data
), B
.Size
);
241 ProfileBuffers
->trim(ProfileBuffers
->size());
243 DCHECK_NE(TDArray
, nullptr);
244 if (TDArray
->empty())
247 // Then repopulate the global ProfileBuffers.
249 auto MaxSize
= profilingFlags()->global_allocator_max
;
250 auto ProfileArena
= allocateBuffer(MaxSize
);
251 if (ProfileArena
== nullptr)
254 auto ProfileArenaCleanup
= at_scope_exit(
255 [&]() XRAY_NEVER_INSTRUMENT
{ deallocateBuffer(ProfileArena
, MaxSize
); });
257 auto PathArena
= allocateBuffer(profilingFlags()->global_allocator_max
);
258 if (PathArena
== nullptr)
261 auto PathArenaCleanup
= at_scope_exit(
262 [&]() XRAY_NEVER_INSTRUMENT
{ deallocateBuffer(PathArena
, MaxSize
); });
264 for (const auto &ThreadTrie
: *TDArray
) {
265 using ProfileRecordAllocator
= typename
ProfileRecordArray::AllocatorType
;
266 ProfileRecordAllocator
PRAlloc(ProfileArena
,
267 profilingFlags()->global_allocator_max
);
268 ProfileRecord::PathAllocator
PathAlloc(
269 PathArena
, profilingFlags()->global_allocator_max
);
270 ProfileRecordArray
ProfileRecords(PRAlloc
);
272 // First, we want to compute the amount of space we're going to need. We'll
273 // use a local allocator and an __xray::Array<...> to store the intermediary
274 // data, then compute the size as we're going along. Then we'll allocate the
275 // contiguous space to contain the thread buffer data.
276 if (ThreadTrie
.FCT
.getRoots().empty())
279 populateRecords(ProfileRecords
, PathAlloc
, ThreadTrie
.FCT
);
280 DCHECK(!ThreadTrie
.FCT
.getRoots().empty());
281 DCHECK(!ProfileRecords
.empty());
283 // Go through each record, to compute the sizes.
285 // header size = block size (4 bytes)
286 // + block number (4 bytes)
287 // + thread id (8 bytes)
288 // record size = path ids (4 bytes * number of ids + sentinel 4 bytes)
289 // + call count (8 bytes)
290 // + local time (8 bytes)
291 // + end of record (8 bytes)
292 u32 CumulativeSizes
= 0;
293 for (const auto &Record
: ProfileRecords
)
294 CumulativeSizes
+= 20 + (4 * Record
.Path
.size());
296 BlockHeader Header
{16 + CumulativeSizes
, I
++, ThreadTrie
.TId
};
297 auto B
= ProfileBuffers
->Append({});
298 B
->Size
= sizeof(Header
) + CumulativeSizes
;
299 B
->Data
= allocateBuffer(B
->Size
);
300 DCHECK_NE(B
->Data
, nullptr);
301 serializeRecords(B
, Header
, ProfileRecords
);
305 void reset() XRAY_NEVER_INSTRUMENT
{
306 atomic_store(&CollectorInitialized
, 0, memory_order_release
);
307 SpinMutexLock
Lock(&GlobalMutex
);
309 if (ProfileBuffers
!= nullptr) {
310 // Clear out the profile buffers that have been serialized.
311 for (auto &B
: *ProfileBuffers
)
312 deallocateBuffer(reinterpret_cast<uint8_t *>(B
.Data
), B
.Size
);
313 ProfileBuffers
->trim(ProfileBuffers
->size());
314 ProfileBuffers
= nullptr;
317 if (TDArray
!= nullptr) {
318 // Release the resources as required.
319 for (auto &TD
: *TDArray
) {
320 TD
.BQ
->releaseBuffer(TD
.Buffers
.NodeBuffer
);
321 TD
.BQ
->releaseBuffer(TD
.Buffers
.RootsBuffer
);
322 TD
.BQ
->releaseBuffer(TD
.Buffers
.ShadowStackBuffer
);
323 TD
.BQ
->releaseBuffer(TD
.Buffers
.NodeIdPairBuffer
);
325 // We don't bother destroying the array here because we've already
326 // potentially freed the backing store for the array. Instead we're going to
327 // reset the pointer to nullptr, and re-use the storage later instead
328 // (placement-new'ing into the storage as-is).
332 if (TDAllocator
!= nullptr) {
333 TDAllocator
->~Allocator();
334 TDAllocator
= nullptr;
337 if (Buffer
.Data
!= nullptr) {
338 BQ
->releaseBuffer(Buffer
);
342 bool Success
= false;
343 new (&BufferQueueStorage
)
344 BufferQueue(profilingFlags()->global_allocator_max
, 1, Success
);
347 BQ
= reinterpret_cast<BufferQueue
*>(&BufferQueueStorage
);
351 if (BQ
->init(profilingFlags()->global_allocator_max
, 1) !=
352 BufferQueue::ErrorCode::Ok
)
356 if (BQ
->getBuffer(Buffer
) != BufferQueue::ErrorCode::Ok
)
359 new (&ProfileBufferArrayAllocatorStorage
)
360 ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max
);
361 ProfileBuffersAllocator
= reinterpret_cast<ProfileBufferArrayAllocator
*>(
362 &ProfileBufferArrayAllocatorStorage
);
364 new (&ProfileBuffersStorage
) ProfileBufferArray(*ProfileBuffersAllocator
);
366 reinterpret_cast<ProfileBufferArray
*>(&ProfileBuffersStorage
);
368 new (&ThreadDataAllocatorStorage
)
369 ThreadDataAllocator(Buffer
.Data
, Buffer
.Size
);
371 reinterpret_cast<ThreadDataAllocator
*>(&ThreadDataAllocatorStorage
);
372 new (&ThreadDataArrayStorage
) ThreadDataArray(*TDAllocator
);
373 TDArray
= reinterpret_cast<ThreadDataArray
*>(&ThreadDataArrayStorage
);
375 atomic_store(&CollectorInitialized
, 1, memory_order_release
);
378 XRayBuffer
nextBuffer(XRayBuffer B
) XRAY_NEVER_INSTRUMENT
{
379 SpinMutexLock
Lock(&GlobalMutex
);
381 if (ProfileBuffers
== nullptr || ProfileBuffers
->size() == 0)
384 static pthread_once_t Once
= PTHREAD_ONCE_INIT
;
385 static typename
std::aligned_storage
<sizeof(XRayProfilingFileHeader
)>::type
388 &Once
, +[]() XRAY_NEVER_INSTRUMENT
{
389 new (&FileHeaderStorage
) XRayProfilingFileHeader
{};
392 if (UNLIKELY(B
.Data
== nullptr)) {
393 // The first buffer should always contain the file header information.
395 *reinterpret_cast<XRayProfilingFileHeader
*>(&FileHeaderStorage
);
396 FileHeader
.Timestamp
= NanoTime();
397 FileHeader
.PID
= internal_getpid();
398 return {&FileHeaderStorage
, sizeof(XRayProfilingFileHeader
)};
401 if (UNLIKELY(B
.Data
== &FileHeaderStorage
))
402 return {(*ProfileBuffers
)[0].Data
, (*ProfileBuffers
)[0].Size
};
405 internal_memcpy(&Header
, B
.Data
, sizeof(BlockHeader
));
406 auto NextBlock
= Header
.BlockNum
+ 1;
407 if (NextBlock
< ProfileBuffers
->size())
408 return {(*ProfileBuffers
)[NextBlock
].Data
,
409 (*ProfileBuffers
)[NextBlock
].Size
};
413 } // namespace profileCollectorService
414 } // namespace __xray