1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This header contains classes that hold data related to thread profiling:
8 // Data members are stored `protected` in `ThreadRegistrationData`.
9 // Non-virtual sub-classes of ProfilerThreadRegistrationData provide layers of
10 // public accessors to subsets of the data. Each level builds on the previous
11 // one and adds further access to more data, but always with the appropriate
12 // guards where necessary.
13 // These classes have protected constructors, so only some trusted classes
14 // `ThreadRegistration` and `ThreadRegistry` will be able to construct them, and
15 // then give limited access depending on who asks (the owning thread or another
16 // one), and how much data they actually need.
18 // The hierarchy is, from base to most derived:
19 // - ThreadRegistrationData
20 // - ThreadRegistrationUnlockedConstReader
21 // - ThreadRegistrationUnlockedConstReaderAndAtomicRW
22 // - ThreadRegistrationUnlockedRWForLockedProfiler
23 // - ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
24 // - ThreadRegistrationLockedRWFromAnyThread
25 // - ThreadRegistrationLockedRWOnThread
26 // - ThreadRegistration::EmbeddedData (actual data member in ThreadRegistration)
28 // Tech detail: These classes need to be a single hierarchy so that
29 // `ThreadRegistration` can contain the most-derived class, and from there can
30 // publish references to base classes without relying on Undefined Behavior.
31 // (It's not allowed to have some object and give a reference to a sub-class,
32 // unless that object was *really* constructed as that sub-class at least, even
33 // if that sub-class only adds member functions!)
34 // And where appropriate, these references will come along with the required
37 #ifndef ProfilerThreadRegistrationData_h
38 #define ProfilerThreadRegistrationData_h
40 #include "js/ProfilingFrameIterator.h"
41 #include "js/ProfilingStack.h"
42 #include "mozilla/Atomics.h"
43 #include "mozilla/BaseProfilerDetail.h"
44 #include "mozilla/MemoryReporting.h"
45 #include "mozilla/ProfilerThreadPlatformData.h"
46 #include "mozilla/ProfilerThreadRegistrationInfo.h"
48 #include "nsIThread.h"
50 class ProfiledThreadData
;
54 // Enum listing which profiling features are active for a single thread.
55 enum class ThreadProfilingFeatures
: uint32_t {
56 // The thread is not being profiled at all (either the profiler is not
57 // running, or this thread is not examined during profiling.)
60 // Single features, binary exclusive. May be `Combine()`d.
61 CPUUtilization
= 1u << 0,
65 // All possible features. Usually used as a mask to see if any feature is
66 // active at a given time.
67 Any
= CPUUtilization
| Sampling
| Markers
70 // Binary OR of one of more ThreadProfilingFeatures, to mix all arguments.
71 template <typename
... Ts
>
72 [[nodiscard
]] constexpr ThreadProfilingFeatures
Combine(
73 ThreadProfilingFeatures a1
, Ts
... as
) {
74 static_assert((true && ... &&
75 std::is_same_v
<std::remove_cv_t
<std::remove_reference_t
<Ts
>>,
76 ThreadProfilingFeatures
>));
77 return static_cast<ThreadProfilingFeatures
>(
78 (static_cast<std::underlying_type_t
<ThreadProfilingFeatures
>>(a1
) | ... |
79 static_cast<std::underlying_type_t
<ThreadProfilingFeatures
>>(as
)));
82 // Binary AND of one of more ThreadProfilingFeatures, to find features common to
84 template <typename
... Ts
>
85 [[nodiscard
]] constexpr ThreadProfilingFeatures
Intersect(
86 ThreadProfilingFeatures a1
, Ts
... as
) {
87 static_assert((true && ... &&
88 std::is_same_v
<std::remove_cv_t
<std::remove_reference_t
<Ts
>>,
89 ThreadProfilingFeatures
>));
90 return static_cast<ThreadProfilingFeatures
>(
91 (static_cast<std::underlying_type_t
<ThreadProfilingFeatures
>>(a1
) & ... &
92 static_cast<std::underlying_type_t
<ThreadProfilingFeatures
>>(as
)));
95 // Are there features in common between the two given sets?
96 // Mostly useful to test if any of a set of features is present in another set.
97 template <typename
... Ts
>
98 [[nodiscard
]] constexpr bool DoFeaturesIntersect(ThreadProfilingFeatures a1
,
99 ThreadProfilingFeatures a2
) {
100 return Intersect(a1
, a2
) != ThreadProfilingFeatures::NotProfiled
;
104 class CycleCollectedJSContext
;
107 namespace mozilla::profiler
{
109 // All data members related to thread profiling are stored here.
110 // See derived classes below, which give limited unlocked/locked read/write
111 // access in different situations, and will be available through
112 // ThreadRegistration and ThreadRegistry.
113 class ThreadRegistrationData
{
115 // No public accessors here. See derived classes for accessors, and
116 // Get.../With... functions for who can use these accessors.
118 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf
) const {
119 // Not including data that is not fully owned here.
123 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf
) const {
124 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
127 static constexpr size_t MAX_JS_FRAMES
= 1024;
128 using JsFrame
= JS::ProfilingFrameIterator::Frame
;
129 using JsFrameBuffer
= JsFrame
[MAX_JS_FRAMES
];
131 // `protected` to allow derived classes to read all data members.
133 ThreadRegistrationData(const char* aName
, const void* aStackTop
);
136 // Destructor only used to check invariants.
137 ~ThreadRegistrationData() {
138 MOZ_ASSERT((mProfilingFeatures
!= ThreadProfilingFeatures::NotProfiled
) ==
139 !!mProfiledThreadData
);
140 MOZ_ASSERT(!mProfiledThreadData
,
141 "mProfiledThreadData pointer should have been reset before "
142 "~ThreadRegistrationData");
146 // Permanent thread information.
147 // Set at construction, read from anywhere, moved-from at destruction.
148 ThreadRegistrationInfo mInfo
;
150 // Contains profiler labels and JS frames.
151 // Deep-written on thread only, deep-read from thread and suspended thread.
152 ProfilingStack mProfilingStack
;
154 // In practice, only read from thread and suspended thread.
155 PlatformData mPlatformData
;
157 // Only read from thread and suspended thread.
158 const void* const mStackTop
;
160 // Written from thread, read from thread and suspended thread.
161 nsCOMPtr
<nsIThread
> mThread
;
163 // If this is a JS thread, this is its JSContext, which is required for any
165 // Written from thread, read from thread and suspended thread.
166 CycleCollectedJSContext
* mCCJSContext
= nullptr;
168 // If mCCJSContext is not null AND the thread is being profiled, this points
169 // at the start of a JsFrameBuffer to be used for on-thread synchronous
171 JsFrame
* mJsFrameBuffer
= nullptr;
173 // The profiler needs to start and stop JS sampling of JS threads at various
174 // times. However, the JS engine can only do the required actions on the
175 // JS thread itself ("on-thread"), not from another thread ("off-thread").
176 // Therefore, we have the following two-step process.
178 // - The profiler requests (on-thread or off-thread) that the JS sampling be
179 // started/stopped, by changing mJSSampling to the appropriate REQUESTED
182 // - The relevant JS thread polls (on-thread) for changes to mJSSampling.
183 // When it sees a REQUESTED state, it performs the appropriate actions to
184 // actually start/stop JS sampling, and changes mJSSampling out of the
187 // The state machine is as follows.
189 // INACTIVE --> ACTIVE_REQUESTED
195 // INACTIVE_REQUESTED <-- ACTIVE
197 // The polling is done in the following two ways.
199 // - Via the interrupt callback mechanism; the JS thread must call
200 // profiler_js_interrupt_callback() from its own interrupt callback.
201 // This is how sampling must be started/stopped for threads where the
202 // request was made off-thread.
204 // - When {Start,Stop}JSSampling() is called on-thread, we can immediately
205 // follow it with a PollJSSampling() call to avoid the delay between the
206 // two steps. Likewise, setJSContext() calls PollJSSampling().
208 // One non-obvious thing about all this: these JS sampling requests are made
209 // on all threads, even non-JS threads. mContext needs to also be set (via
210 // setJSContext(), which can only happen for JS threads) for any JS sampling
211 // to actually happen.
215 ACTIVE_REQUESTED
= 1,
217 INACTIVE_REQUESTED
= 3,
218 } mJSSampling
= INACTIVE
;
220 uint32_t mJSFlags
= 0;
222 // Flags to conveniently track various JS instrumentations.
223 enum class JSInstrumentationFlags
{
228 [[nodiscard
]] bool JSAllocationsEnabled() const {
229 return mJSFlags
& uint32_t(JSInstrumentationFlags::Allocations
);
232 // The following members may be modified from another thread.
233 // They need to be atomic, because LockData() does not prevent reads from
234 // the owning thread.
236 // mSleep tracks whether the thread is sleeping, and if so, whether it has
237 // been previously observed. This is used for an optimization: in some
238 // cases, when a thread is asleep, we duplicate the previous sample, which
239 // is cheaper than taking a new sample.
241 // mSleep is atomic because it is accessed from multiple threads.
243 // - It is written only by this thread, via setSleeping() and setAwake().
245 // - It is read by SamplerThread::Run().
247 // There are two cases where racing between threads can cause an issue.
249 // - If CanDuplicateLastSampleDueToSleep() returns false but that result is
250 // invalidated before being acted upon, we will take a full sample
251 // unnecessarily. This is additional work but won't cause any correctness
252 // issues. (In actual fact, this case is impossible. In order to go from
253 // CanDuplicateLastSampleDueToSleep() returning false to it returning true
254 // requires an intermediate call to it in order for mSleep to go from
255 // SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.)
257 // - If CanDuplicateLastSampleDueToSleep() returns true but that result is
258 // invalidated before being acted upon -- i.e. the thread wakes up before
259 // DuplicateLastSample() is called -- we will duplicate the previous
260 // sample. This is inaccurate, but only slightly... we will effectively
261 // treat the thread as having slept a tiny bit longer than it really did.
263 // This latter inaccuracy could be avoided by moving the
264 // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code,
265 // e.g. the section where Tick() is called. But that would reduce the
266 // effectiveness of the optimization because more code would have to be run
267 // before we can tell that duplication is allowed.
269 static const int AWAKE
= 0;
270 static const int SLEEPING_NOT_OBSERVED
= 1;
271 static const int SLEEPING_OBSERVED
= 2;
272 // Read&written from thread and suspended thread.
273 Atomic
<int> mSleep
{AWAKE
};
274 Atomic
<uint64_t> mThreadCpuTimeInNsAtLastSleep
{0};
277 // The first wake is the thread creation.
278 Atomic
<uint64_t, MemoryOrdering::Relaxed
> mWakeCount
{1};
279 mutable baseprofiler::detail::BaseProfilerMutex mRecordWakeCountMutex
;
280 mutable uint64_t mAlreadyRecordedWakeCount
= 0;
281 mutable uint64_t mAlreadyRecordedCpuTimeInMs
= 0;
284 // Is this thread currently being profiled, and with which features?
285 // Written from profiler, read from any thread.
286 // Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together.)
287 Atomic
<ThreadProfilingFeatures
, MemoryOrdering::Relaxed
> mProfilingFeatures
{
288 ThreadProfilingFeatures::NotProfiled
};
290 // If the profiler is active and this thread is selected for profiling, this
291 // points at the relevant ProfiledThreadData.
292 // Fully controlled by the profiler.
293 // Invariant: `!!mProfilingFeatures == !!mProfiledThreadData` (set together).
294 ProfiledThreadData
* mProfiledThreadData
= nullptr;
297 // Accessing const data from any thread.
298 class ThreadRegistrationUnlockedConstReader
: public ThreadRegistrationData
{
300 [[nodiscard
]] const ThreadRegistrationInfo
& Info() const { return mInfo
; }
302 [[nodiscard
]] const PlatformData
& PlatformDataCRef() const {
303 return mPlatformData
;
306 [[nodiscard
]] const void* StackTop() const { return mStackTop
; }
309 ThreadRegistrationUnlockedConstReader(const char* aName
,
310 const void* aStackTop
)
311 : ThreadRegistrationData(aName
, aStackTop
) {}
314 // Accessing atomic data from any thread.
315 class ThreadRegistrationUnlockedConstReaderAndAtomicRW
316 : public ThreadRegistrationUnlockedConstReader
{
318 [[nodiscard
]] const ProfilingStack
& ProfilingStackCRef() const {
319 return mProfilingStack
;
321 [[nodiscard
]] ProfilingStack
& ProfilingStackRef() { return mProfilingStack
; }
323 // Similar to `profiler_is_active()`, this atomic flag may become out-of-date.
324 // It should only be used as an indication to know whether this thread is
325 // probably being profiled (with some specific features), to avoid doing
326 // expensive operations otherwise. Edge cases:
327 // - This thread could get `NotProfiled`, but the profiler has just started,
328 // so some very early data may be missing. No real impact on profiling.
329 // - This thread could see profiled features, but the profiled has just
330 // stopped, so some some work will be done and then discarded when finally
331 // attempting to write to the buffer. No impact on profiling.
332 // - This thread could see profiled features, but the profiler will quickly
333 // stop and restart, so this thread will write information relevant to the
334 // previous profiling session. Very rare, and little impact on profiling.
335 [[nodiscard
]] ThreadProfilingFeatures
ProfilingFeatures() const {
336 return mProfilingFeatures
;
339 // Call this whenever the current thread sleeps. Calling it twice in a row
340 // without an intervening setAwake() call is an error.
342 MOZ_ASSERT(mSleep
== AWAKE
);
343 mSleep
= SLEEPING_NOT_OBSERVED
;
346 // Call this whenever the current thread wakes. Calling it twice in a row
347 // without an intervening setSleeping() call is an error.
349 MOZ_ASSERT(mSleep
!= AWAKE
);
356 // Returns the CPU time used by the thread since the previous call to this
357 // method or since the thread was started if this is the first call.
358 uint64_t GetNewCpuTimeInNs() {
359 uint64_t newCpuTimeNs
;
360 if (!GetCpuTimeSinceThreadStartInNs(&newCpuTimeNs
, PlatformDataCRef())) {
363 uint64_t before
= mThreadCpuTimeInNsAtLastSleep
;
365 MOZ_LIKELY(newCpuTimeNs
> before
) ? newCpuTimeNs
- before
: 0;
366 mThreadCpuTimeInNsAtLastSleep
= newCpuTimeNs
;
371 void RecordWakeCount() const;
374 // This is called on every profiler restart. Put things that should happen
375 // at that time here.
376 void ReinitializeOnResume() {
377 // This is needed to cause an initial sample to be taken from sleeping
378 // threads that had been observed prior to the profiler stopping and
379 // restarting. Otherwise sleeping threads would not have any samples to
380 // copy forward while sleeping.
381 (void)mSleep
.compareExchange(SLEEPING_OBSERVED
, SLEEPING_NOT_OBSERVED
);
384 // This returns true for the second and subsequent calls in each sleep
385 // cycle, so that the sampler can skip its full sampling and reuse the first
386 // asleep sample instead.
387 [[nodiscard
]] bool CanDuplicateLastSampleDueToSleep() {
388 if (mSleep
== AWAKE
) {
391 if (mSleep
.compareExchange(SLEEPING_NOT_OBSERVED
, SLEEPING_OBSERVED
)) {
397 [[nodiscard
]] bool IsSleeping() const { return mSleep
!= AWAKE
; }
400 ThreadRegistrationUnlockedConstReaderAndAtomicRW(const char* aName
,
401 const void* aStackTop
)
402 : ThreadRegistrationUnlockedConstReader(aName
, aStackTop
) {}
405 // Like above, with special PSAutoLock-guarded accessors.
406 class ThreadRegistrationUnlockedRWForLockedProfiler
407 : public ThreadRegistrationUnlockedConstReaderAndAtomicRW
{
409 // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
410 // Only add functions that take a `const PSAutoLock&` proof-of-lock.
411 // (Because there is no other lock.)
413 [[nodiscard
]] const ProfiledThreadData
* GetProfiledThreadData(
414 const PSAutoLock
&) const {
415 return mProfiledThreadData
;
418 [[nodiscard
]] ProfiledThreadData
* GetProfiledThreadData(const PSAutoLock
&) {
419 return mProfiledThreadData
;
423 ThreadRegistrationUnlockedRWForLockedProfiler(const char* aName
,
424 const void* aStackTop
)
425 : ThreadRegistrationUnlockedConstReaderAndAtomicRW(aName
, aStackTop
) {}
428 // Reading data, unlocked from the thread, or locked otherwise.
429 // This data MUST only be written from the thread with lock (i.e., in
430 // LockedRWOnThread through RWOnThreadWithLock.)
431 class ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
432 : public ThreadRegistrationUnlockedRWForLockedProfiler
{
434 // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
435 // Non-atomic members read here MUST be written from LockedRWOnThread (to
436 // guarantee that they are only modified on this thread.)
438 [[nodiscard
]] JSContext
* GetJSContext() const;
439 [[nodiscard
]] CycleCollectedJSContext
* GetCycleCollectedJSContext() const {
444 ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName
,
445 const void* aStackTop
)
446 : ThreadRegistrationUnlockedRWForLockedProfiler(aName
, aStackTop
) {}
449 // Accessing locked data from the thread, or from any thread through the locked
452 // Like above, and profiler can also read&write mutex-protected members.
453 class ThreadRegistrationLockedRWFromAnyThread
454 : public ThreadRegistrationUnlockedReaderAndAtomicRWOnThread
{
456 void SetProfilingFeaturesAndData(ThreadProfilingFeatures aProfilingFeatures
,
457 ProfiledThreadData
* aProfiledThreadData
,
459 void ClearProfilingFeaturesAndData(const PSAutoLock
&);
461 // Not null when JSContext is not null AND this thread is being profiled.
462 // Points at the start of JsFrameBuffer.
463 [[nodiscard
]] JsFrame
* GetJsFrameBuffer() const { return mJsFrameBuffer
; }
465 [[nodiscard
]] const nsCOMPtr
<nsIEventTarget
> GetEventTarget() const {
469 void ResetMainThread(nsIThread
* aThread
) { mThread
= aThread
; }
471 // aDelay is the time the event that is currently running on the thread was
472 // queued before starting to run (if a PrioritizedEventQueue
473 // (i.e. MainThread), this will be 0 for any event at a lower priority
475 // aRunning is the time the event has been running. If no event is running
476 // these will both be TimeDuration() (i.e. 0). Both are out params, and are
477 // always set. Their initial value is discarded.
478 void GetRunningEventDelay(const TimeStamp
& aNow
, TimeDuration
& aDelay
,
479 TimeDuration
& aRunning
) {
480 if (mThread
) { // can be null right at the start of a process
482 mThread
->GetRunningEventDelay(&aDelay
, &start
);
483 if (!start
.IsNull()) {
484 // Note: the timestamp used here will be from when we started to
485 // suspend and sample the thread; which is also the timestamp
486 // associated with the sample.
487 aRunning
= aNow
- start
;
491 aDelay
= TimeDuration();
492 aRunning
= TimeDuration();
495 // Request that this thread start JS sampling. JS sampling won't actually
496 // start until a subsequent PollJSSampling() call occurs *and* mContext has
498 void StartJSSampling(uint32_t aJSFlags
) {
499 // This function runs on-thread or off-thread.
501 MOZ_RELEASE_ASSERT(mJSSampling
== INACTIVE
||
502 mJSSampling
== INACTIVE_REQUESTED
);
503 mJSSampling
= ACTIVE_REQUESTED
;
507 // Request that this thread stop JS sampling. JS sampling won't actually
508 // stop until a subsequent PollJSSampling() call occurs.
509 void StopJSSampling() {
510 // This function runs on-thread or off-thread.
512 MOZ_RELEASE_ASSERT(mJSSampling
== ACTIVE
||
513 mJSSampling
== ACTIVE_REQUESTED
);
514 mJSSampling
= INACTIVE_REQUESTED
;
518 ThreadRegistrationLockedRWFromAnyThread(const char* aName
,
519 const void* aStackTop
)
520 : ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(aName
, aStackTop
) {}
523 // Accessing data, locked, from the thread.
524 // If any non-atomic data is readable from UnlockedReaderAndAtomicRWOnThread,
525 // it must be written from here, and not in base classes: Since this data is
526 // only written on the thread, it can be read from the same thread without
527 // lock; but writing must be locked so that other threads can safely read it,
528 // typically from LockedRWFromAnyThread.
529 class ThreadRegistrationLockedRWOnThread
530 : public ThreadRegistrationLockedRWFromAnyThread
{
532 void SetCycleCollectedJSContext(CycleCollectedJSContext
* aCCJSContext
);
533 void ClearCycleCollectedJSContext();
535 // Poll to see if JS sampling should be started/stopped.
536 void PollJSSampling();
539 ThreadRegistrationLockedRWOnThread(const char* aName
, const void* aStackTop
)
540 : ThreadRegistrationLockedRWFromAnyThread(aName
, aStackTop
) {}
543 } // namespace mozilla::profiler
545 #endif // ProfilerThreadRegistrationData_h