Backed out changeset b71c8c052463 (bug 1943846) for causing mass failures. CLOSED...
[gecko.git] / tools / profiler / core / memory_hooks.cpp
blob1e9b517819041d677263e58fc791f14f9c3a47f8
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "memory_hooks.h"
9 #include "nscore.h"
11 #include "mozilla/Assertions.h"
12 #include "mozilla/Atomics.h"
13 #include "mozilla/FastBernoulliTrial.h"
14 #include "mozilla/IntegerPrintfMacros.h"
15 #include "mozilla/JSONWriter.h"
16 #include "mozilla/MemoryReporting.h"
17 #include "mozilla/PlatformMutex.h"
18 #include "mozilla/ProfilerCounts.h"
19 #include "mozilla/ThreadLocal.h"
20 #include "mozilla/ThreadSafety.h"
22 #include "GeckoProfiler.h"
23 #include "prenv.h"
24 #include "replace_malloc.h"
26 #include <ctype.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdarg.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
34 #ifdef XP_WIN
35 # include <windows.h>
36 # include <process.h>
37 #else
38 # include <pthread.h>
39 # include <sys/types.h>
40 # include <unistd.h>
41 #endif
43 #ifdef ANDROID
44 # include <android/log.h>
45 #endif
47 // The gBernoulli value starts out as a nullptr, and only gets initialized once.
48 // It then lives for the entire lifetime of the process. It cannot be deleted
49 // without additional multi-threaded protections, since if we deleted it during
50 // profiler_stop then there could be a race between threads already in a
51 // memory hook that might try to access the value after or during deletion.
52 static mozilla::FastBernoulliTrial* gBernoulli;
54 namespace mozilla::profiler {
56 //---------------------------------------------------------------------------
57 // Utilities
58 //---------------------------------------------------------------------------
60 // Returns true or or false depending on whether the marker was actually added
61 // or not.
62 static bool profiler_add_native_allocation_marker(int64_t aSize,
63 uintptr_t aMemoryAddress) {
64 if (!profiler_thread_is_being_profiled_for_markers(
65 profiler_main_thread_id())) {
66 return false;
69 // Because native allocations may be intercepted anywhere, blocking while
70 // locking the profiler mutex here could end up causing a deadlock if another
71 // mutex is taken, which the profiler may indirectly need elsewhere.
72 // See bug 1642726 for such a scenario.
73 // So instead we bail out if the mutex is already locked. Native allocations
74 // are statistically sampled anyway, so missing a few because of this is
75 // acceptable.
76 if (profiler_is_locked_on_current_thread()) {
77 return false;
80 struct NativeAllocationMarker {
81 static constexpr mozilla::Span<const char> MarkerTypeName() {
82 return mozilla::MakeStringSpan("Native allocation");
84 static void StreamJSONMarkerData(
85 mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int64_t aSize,
86 uintptr_t aMemoryAddress, ProfilerThreadId aThreadId) {
87 aWriter.IntProperty("size", aSize);
88 aWriter.IntProperty("memoryAddress",
89 static_cast<int64_t>(aMemoryAddress));
90 // Tech note: If `ToNumber()` returns a uint64_t, the conversion to
91 // int64_t is "implementation-defined" before C++20. This is acceptable
92 // here, because this is a one-way conversion to a unique identifier
93 // that's used to visually separate data by thread on the front-end.
94 aWriter.IntProperty("threadId",
95 static_cast<int64_t>(aThreadId.ToNumber()));
97 static mozilla::MarkerSchema MarkerTypeDisplay() {
98 return mozilla::MarkerSchema::SpecialFrontendLocation{};
102 profiler_add_marker("Native allocation", geckoprofiler::category::OTHER,
103 {MarkerThreadId::MainThread(), MarkerStack::Capture()},
104 NativeAllocationMarker{}, aSize, aMemoryAddress,
105 profiler_current_thread_id());
106 return true;
109 static malloc_table_t gMallocTable;
111 // This is only needed because of the |const void*| vs |void*| arg mismatch.
112 static size_t MallocSizeOf(const void* aPtr) {
113 return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr));
116 // The values for the Bernoulli trial are taken from DMD. According to DMD:
118 // In testing, a probability of 0.003 resulted in ~25% of heap blocks getting
119 // a stack trace and ~80% of heap bytes getting a stack trace. (This is
120 // possible because big heap blocks are more likely to get a stack trace.)
122 // The random number seeds are arbitrary and were obtained from random.org.
124 // However this value resulted in a lot of slowdown since the profiler stacks
125 // are pretty heavy to collect. The value was lowered to 10% of the original to
126 // 0.0003.
127 static void EnsureBernoulliIsInstalled() {
128 if (!gBernoulli) {
129 // This is only installed once. See the gBernoulli definition for more
130 // information.
131 gBernoulli =
132 new FastBernoulliTrial(0.0003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0);
136 // This class provides infallible allocations (they abort on OOM) like
137 // mozalloc's InfallibleAllocPolicy, except that memory hooks are bypassed. This
138 // policy is used by the HashSet.
139 class InfallibleAllocWithoutHooksPolicy {
140 static void ExitOnFailure(const void* aP) {
141 if (!aP) {
142 MOZ_CRASH("Profiler memory hooks out of memory; aborting");
146 public:
147 template <typename T>
148 static T* maybe_pod_malloc(size_t aNumElems) {
149 if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
150 return nullptr;
152 return (T*)gMallocTable.malloc(aNumElems * sizeof(T));
155 template <typename T>
156 static T* maybe_pod_calloc(size_t aNumElems) {
157 return (T*)gMallocTable.calloc(aNumElems, sizeof(T));
160 template <typename T>
161 static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
162 if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
163 return nullptr;
165 return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T));
168 template <typename T>
169 static T* pod_malloc(size_t aNumElems) {
170 T* p = maybe_pod_malloc<T>(aNumElems);
171 ExitOnFailure(p);
172 return p;
175 template <typename T>
176 static T* pod_calloc(size_t aNumElems) {
177 T* p = maybe_pod_calloc<T>(aNumElems);
178 ExitOnFailure(p);
179 return p;
182 template <typename T>
183 static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
184 T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize);
185 ExitOnFailure(p);
186 return p;
189 template <typename T>
190 static void free_(T* aPtr, size_t aSize = 0) {
191 gMallocTable.free(aPtr);
194 static void reportAllocOverflow() { ExitOnFailure(nullptr); }
195 bool checkSimulatedOOM() const { return true; }
198 // We can't use mozilla::Mutex because it causes re-entry into the memory hooks.
199 // Define a custom implementation here.
200 class MOZ_CAPABILITY("mutex") Mutex : private ::mozilla::detail::MutexImpl {
201 public:
202 Mutex() = default;
204 void Lock() MOZ_CAPABILITY_ACQUIRE() { ::mozilla::detail::MutexImpl::lock(); }
205 void Unlock() MOZ_CAPABILITY_RELEASE() {
206 ::mozilla::detail::MutexImpl::unlock();
210 class MOZ_SCOPED_CAPABILITY MutexAutoLock {
211 MutexAutoLock(const MutexAutoLock&) = delete;
212 void operator=(const MutexAutoLock&) = delete;
214 Mutex& mMutex;
216 public:
217 explicit MutexAutoLock(Mutex& aMutex) MOZ_CAPABILITY_ACQUIRE(aMutex)
218 : mMutex(aMutex) {
219 mMutex.Lock();
221 ~MutexAutoLock() MOZ_CAPABILITY_RELEASE() { mMutex.Unlock(); }
224 //---------------------------------------------------------------------------
225 // Tracked allocations
226 //---------------------------------------------------------------------------
228 // The allocation tracker is shared between multiple threads, and is the
229 // coordinator for knowing when allocations have been tracked. The mutable
230 // internal state is protected by a mutex, and managed by the methods.
232 // The tracker knows about all the allocations that we have added to the
233 // profiler. This way, whenever any given piece of memory is freed, we can see
234 // if it was previously tracked, and we can track its deallocation.
236 class AllocationTracker {
237 // This type tracks all of the allocations that we have captured. This way, we
238 // can see if a deallocation is inside of this set. We want to provide a
239 // balanced view into the allocations and deallocations.
240 typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>,
241 InfallibleAllocWithoutHooksPolicy>
242 AllocationSet;
244 public:
245 AllocationTracker() = default;
247 void AddMemoryAddress(const void* memoryAddress) {
248 MutexAutoLock lock(mMutex);
249 if (!mAllocations.put(memoryAddress)) {
250 MOZ_CRASH("Out of memory while tracking native allocations.");
254 void Reset() {
255 MutexAutoLock lock(mMutex);
256 mAllocations.clearAndCompact();
259 // Returns true when the memory address is found and removed, otherwise that
260 // memory address is not being tracked and it returns false.
261 bool RemoveMemoryAddressIfFound(const void* memoryAddress) {
262 MutexAutoLock lock(mMutex);
264 auto ptr = mAllocations.lookup(memoryAddress);
265 if (ptr) {
266 // The memory was present. It no longer needs to be tracked.
267 mAllocations.remove(ptr);
268 return true;
271 return false;
274 private:
275 AllocationSet mAllocations;
276 Mutex mMutex MOZ_UNANNOTATED;
279 static AllocationTracker* gAllocationTracker;
281 static void EnsureAllocationTrackerIsInstalled() {
282 if (!gAllocationTracker) {
283 // This is only installed once.
284 gAllocationTracker = new AllocationTracker();
288 //---------------------------------------------------------------------------
289 // Per-thread blocking of intercepts
290 //---------------------------------------------------------------------------
292 // On MacOS, and Linux the first __thread/thread_local access calls malloc,
293 // which leads to an infinite loop. So we use pthread-based TLS instead, which
294 // doesn't have this problem as long as the TLS key is registered early.
296 // This is a little different from the TLS storage used with mozjemalloc which
297 // uses native TLS on Linux possibly because it is not only initialised but
298 // **used** early.
299 #if !defined(XP_DARWIN) && !defined(XP_LINUX)
300 # define PROFILER_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T)
301 #else
302 # define PROFILER_THREAD_LOCAL(T) \
303 ::mozilla::detail::ThreadLocal<T, ::mozilla::detail::ThreadLocalKeyStorage>
304 #endif
306 // This class is used to determine if allocations on this thread should be
307 // intercepted or not.
308 // Creating a ThreadIntercept object on the stack will implicitly block nested
309 // ones. There are other reasons to block: The feature is off, or we're inside a
310 // profiler function that is locking a mutex.
311 class MOZ_RAII ThreadIntercept {
312 // When set to true, malloc does not intercept additional allocations. This is
313 // needed because collecting stacks creates new allocations. When blocked,
314 // these allocations are then ignored by the memory hook.
315 static PROFILER_THREAD_LOCAL(bool) tlsIsBlocked;
317 // This is a quick flag to check and see if the allocations feature is enabled
318 // or disabled.
319 static mozilla::Atomic<bool, mozilla::Relaxed> sAllocationsFeatureEnabled;
321 // True if this ThreadIntercept has set tlsIsBlocked.
322 bool mIsBlockingTLS;
324 // True if interception is blocked for any reason.
325 bool mIsBlocked;
327 public:
328 static void Init() {
329 tlsIsBlocked.infallibleInit();
330 // infallibleInit should zero-initialize, which corresponds to `false`.
331 MOZ_ASSERT(!tlsIsBlocked.get());
334 ThreadIntercept() {
335 // If the allocation interception feature is enabled, and the TLS is not
336 // blocked yet, we will block the TLS now, and unblock on destruction.
337 mIsBlockingTLS = sAllocationsFeatureEnabled && !tlsIsBlocked.get();
338 if (mIsBlockingTLS) {
339 MOZ_ASSERT(!tlsIsBlocked.get());
340 tlsIsBlocked.set(true);
341 // Since this is the top-level ThreadIntercept, interceptions are not
342 // blocked unless the profiler itself holds a locked mutex, in which case
343 // we don't want to intercept allocations that originate from such a
344 // profiler call.
345 mIsBlocked = profiler_is_locked_on_current_thread();
346 } else {
347 // The feature is off, or the TLS was already blocked, then we block this
348 // interception.
349 mIsBlocked = true;
353 ~ThreadIntercept() {
354 if (mIsBlockingTLS) {
355 MOZ_ASSERT(tlsIsBlocked.get());
356 tlsIsBlocked.set(false);
360 // Is this ThreadIntercept effectively blocked? (Feature is off, or this
361 // ThreadIntercept is nested, or we're inside a locked-Profiler function.)
362 bool IsBlocked() const { return mIsBlocked; }
364 static void EnableAllocationFeature() { sAllocationsFeatureEnabled = true; }
366 static void DisableAllocationFeature() { sAllocationsFeatureEnabled = false; }
369 PROFILER_THREAD_LOCAL(bool) ThreadIntercept::tlsIsBlocked;
371 mozilla::Atomic<bool, mozilla::Relaxed>
372 ThreadIntercept::sAllocationsFeatureEnabled(false);
374 //---------------------------------------------------------------------------
375 // malloc/free callbacks
376 //---------------------------------------------------------------------------
378 static void AllocCallback(void* aPtr, size_t aReqSize) {
379 if (!aPtr) {
380 return;
383 ThreadIntercept threadIntercept;
384 if (threadIntercept.IsBlocked()) {
385 // Either the native allocations feature is not turned on, or we may be
386 // recursing into a memory hook, return. We'll still collect counter
387 // information about this allocation, but no stack.
388 return;
391 AUTO_PROFILER_LABEL("AllocCallback", PROFILER);
393 size_t actualSize = gMallocTable.malloc_usable_size(aPtr);
395 // Perform a bernoulli trial, which will return true or false based on its
396 // configured probability. It takes into account the byte size so that
397 // larger allocations are weighted heavier than smaller allocations.
398 MOZ_ASSERT(gBernoulli,
399 "gBernoulli must be properly installed for the memory hooks.");
400 if (
401 // First perform the Bernoulli trial.
402 gBernoulli->trial(actualSize) &&
403 // Second, attempt to add a marker if the Bernoulli trial passed.
404 profiler_add_native_allocation_marker(
405 static_cast<int64_t>(actualSize),
406 reinterpret_cast<uintptr_t>(aPtr))) {
407 MOZ_ASSERT(gAllocationTracker,
408 "gAllocationTracker must be properly installed for the memory "
409 "hooks.");
410 // Only track the memory if the allocation marker was actually added to the
411 // profiler.
412 gAllocationTracker->AddMemoryAddress(aPtr);
415 // We're ignoring aReqSize here
418 static void FreeCallback(void* aPtr) {
419 if (!aPtr) {
420 return;
423 ThreadIntercept threadIntercept;
424 if (threadIntercept.IsBlocked()) {
425 // Either the native allocations feature is not turned on, or we may be
426 // recursing into a memory hook, return. We'll still collect counter
427 // information about this allocation, but no stack.
428 return;
431 AUTO_PROFILER_LABEL("FreeCallback", PROFILER);
433 // Perform a bernoulli trial, which will return true or false based on its
434 // configured probability. It takes into account the byte size so that
435 // larger allocations are weighted heavier than smaller allocations.
436 MOZ_ASSERT(
437 gAllocationTracker,
438 "gAllocationTracker must be properly installed for the memory hooks.");
439 if (gAllocationTracker->RemoveMemoryAddressIfFound(aPtr)) {
440 size_t unsignedSize = MallocSizeOf(aPtr);
441 int64_t signedSize = -(static_cast<int64_t>(unsignedSize));
443 // This size here is negative, indicating a deallocation.
444 profiler_add_native_allocation_marker(signedSize,
445 reinterpret_cast<uintptr_t>(aPtr));
449 } // namespace mozilla::profiler
451 //---------------------------------------------------------------------------
452 // malloc/free interception
453 //---------------------------------------------------------------------------
455 using namespace mozilla::profiler;
457 static void* replace_malloc(size_t aSize) {
458 // This must be a call to malloc from outside. Intercept it.
459 void* ptr = gMallocTable.malloc(aSize);
460 AllocCallback(ptr, aSize);
461 return ptr;
464 static void* replace_calloc(size_t aCount, size_t aSize) {
465 void* ptr = gMallocTable.calloc(aCount, aSize);
466 AllocCallback(ptr, aCount * aSize);
467 return ptr;
470 static void* replace_realloc(void* aOldPtr, size_t aSize) {
471 // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|.
472 if (!aOldPtr) {
473 return replace_malloc(aSize);
476 FreeCallback(aOldPtr);
477 void* ptr = gMallocTable.realloc(aOldPtr, aSize);
478 if (ptr) {
479 AllocCallback(ptr, aSize);
480 } else {
481 // If realloc fails, we undo the prior operations by re-inserting the old
482 // pointer into the live block table. We don't have to do anything with the
483 // dead block list because the dead block hasn't yet been inserted. The
484 // block will end up looking like it was allocated for the first time here,
485 // which is untrue, and the slop bytes will be zero, which may be untrue.
486 // But this case is rare and doing better isn't worth the effort.
487 AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr));
489 return ptr;
492 static void* replace_memalign(size_t aAlignment, size_t aSize) {
493 void* ptr = gMallocTable.memalign(aAlignment, aSize);
494 AllocCallback(ptr, aSize);
495 return ptr;
498 static void replace_free(void* aPtr) {
499 FreeCallback(aPtr);
500 gMallocTable.free(aPtr);
503 static void* replace_moz_arena_malloc(arena_id_t aArena, size_t aSize) {
504 void* ptr = gMallocTable.moz_arena_malloc(aArena, aSize);
505 AllocCallback(ptr, aSize);
506 return ptr;
509 static void* replace_moz_arena_calloc(arena_id_t aArena, size_t aCount,
510 size_t aSize) {
511 void* ptr = gMallocTable.moz_arena_calloc(aArena, aCount, aSize);
512 AllocCallback(ptr, aCount * aSize);
513 return ptr;
516 static void* replace_moz_arena_realloc(arena_id_t aArena, void* aPtr,
517 size_t aSize) {
518 void* ptr = gMallocTable.moz_arena_realloc(aArena, aPtr, aSize);
519 AllocCallback(ptr, aSize);
520 return ptr;
523 static void replace_moz_arena_free(arena_id_t aArena, void* aPtr) {
524 FreeCallback(aPtr);
525 gMallocTable.moz_arena_free(aArena, aPtr);
528 static void* replace_moz_arena_memalign(arena_id_t aArena, size_t aAlignment,
529 size_t aSize) {
530 void* ptr = gMallocTable.moz_arena_memalign(aArena, aAlignment, aSize);
531 AllocCallback(ptr, aSize);
532 return ptr;
535 // we have to replace these or jemalloc will assume we don't implement any
536 // of the arena replacements!
537 static arena_id_t replace_moz_create_arena_with_params(
538 arena_params_t* aParams) {
539 return gMallocTable.moz_create_arena_with_params(aParams);
542 static void replace_moz_dispose_arena(arena_id_t aArenaId) {
543 return gMallocTable.moz_dispose_arena(aArenaId);
546 static void replace_moz_set_max_dirty_page_modifier(int32_t aModifier) {
547 return gMallocTable.moz_set_max_dirty_page_modifier(aModifier);
550 static bool replace_moz_enable_deferred_purge(bool aEnable) {
551 return gMallocTable.moz_enable_deferred_purge(aEnable);
554 static bool replace_moz_may_purge_one_now(bool aPeekOnly) {
555 return gMallocTable.moz_may_purge_one_now(aPeekOnly);
558 // Must come after all the replace_* funcs
559 void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) {
560 gMallocTable = *aMallocTable;
561 #define MALLOC_FUNCS (MALLOC_FUNCS_MALLOC_BASE | MALLOC_FUNCS_ARENA)
562 #define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name;
563 #include "malloc_decls.h"
566 void profiler_replace_remove() {}
568 namespace mozilla::profiler {
569 //---------------------------------------------------------------------------
570 // Initialization
571 //---------------------------------------------------------------------------
573 void remove_memory_hooks() { jemalloc_replace_dynamic(nullptr); }
575 void enable_native_allocations() {
576 // The bloat log tracks allocations and deallocations. This can conflict
577 // with the memory hook machinery, as the bloat log creates its own
578 // allocations. This means we can re-enter inside the bloat log machinery. At
579 // this time, the bloat log does not know about cannot handle the native
580 // allocation feature.
582 // At the time of this writing, we hit this assertion:
583 // IsIdle(oldState) || IsRead(oldState) in Checker::StartReadOp()
585 // #01: GetBloatEntry(char const*, unsigned int)
586 // #02: NS_LogCtor
587 // #03: profiler_get_backtrace()
588 // #04: profiler_add_native_allocation_marker(long long)
589 // #05: mozilla::profiler::AllocCallback(void*, unsigned long)
590 // #06: replace_calloc(unsigned long, unsigned long)
591 // #07: PLDHashTable::ChangeTable(int)
592 // #08: PLDHashTable::Add(void const*, std::nothrow_t const&)
593 // #09: nsBaseHashtable<nsDepCharHashKey, nsAutoPtr<BloatEntry>, ...
594 // #10: GetBloatEntry(char const*, unsigned int)
595 // #11: NS_LogCtor
596 // #12: profiler_get_backtrace()
597 // ...
598 MOZ_ASSERT(!PR_GetEnv("XPCOM_MEM_BLOAT_LOG"),
599 "The bloat log feature is not compatible with the native "
600 "allocations instrumentation.");
602 EnsureBernoulliIsInstalled();
603 EnsureAllocationTrackerIsInstalled();
604 ThreadIntercept::EnableAllocationFeature();
606 jemalloc_replace_dynamic(replace_init);
609 // This is safe to call even if native allocations hasn't been enabled.
610 void disable_native_allocations() {
611 ThreadIntercept::DisableAllocationFeature();
612 if (gAllocationTracker) {
613 gAllocationTracker->Reset();
617 void memory_hooks_tls_init() {
618 // Initialise the TLS early so that it is allocated with a lower key and on an
619 // earlier page in order to avoid allocation when setting the variable.
620 ThreadIntercept::Init();
623 } // namespace mozilla::profiler