1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in
12 // the documentation and/or other materials provided with the
14 // * Neither the name of Google, Inc. nor the names of its contributors
15 // may be used to endorse or promote products derived from this
16 // software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include "mozilla/WindowsVersion.h"
37 #include <type_traits>
39 static void PopulateRegsFromContext(Registers
& aRegs
, CONTEXT
* aContext
) {
40 #if defined(GP_ARCH_amd64)
41 aRegs
.mPC
= reinterpret_cast<Address
>(aContext
->Rip
);
42 aRegs
.mSP
= reinterpret_cast<Address
>(aContext
->Rsp
);
43 aRegs
.mFP
= reinterpret_cast<Address
>(aContext
->Rbp
);
44 aRegs
.mR10
= reinterpret_cast<Address
>(aContext
->R10
);
45 aRegs
.mR12
= reinterpret_cast<Address
>(aContext
->R12
);
46 #elif defined(GP_ARCH_x86)
47 aRegs
.mPC
= reinterpret_cast<Address
>(aContext
->Eip
);
48 aRegs
.mSP
= reinterpret_cast<Address
>(aContext
->Esp
);
49 aRegs
.mFP
= reinterpret_cast<Address
>(aContext
->Ebp
);
50 aRegs
.mEcx
= reinterpret_cast<Address
>(aContext
->Ecx
);
51 aRegs
.mEdx
= reinterpret_cast<Address
>(aContext
->Edx
);
52 #elif defined(GP_ARCH_arm64)
53 aRegs
.mPC
= reinterpret_cast<Address
>(aContext
->Pc
);
54 aRegs
.mSP
= reinterpret_cast<Address
>(aContext
->Sp
);
55 aRegs
.mFP
= reinterpret_cast<Address
>(aContext
->Fp
);
56 aRegs
.mLR
= reinterpret_cast<Address
>(aContext
->Lr
);
57 aRegs
.mR11
= reinterpret_cast<Address
>(aContext
->X11
);
63 // Gets a real (i.e. not pseudo) handle for the current thread, with the
64 // permissions needed for profiling.
65 // @return a real HANDLE for the current thread.
66 static HANDLE
GetRealCurrentThreadHandleForProfiling() {
67 HANDLE realCurrentThreadHandle
;
68 if (!::DuplicateHandle(
69 ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
70 &realCurrentThreadHandle
,
71 THREAD_GET_CONTEXT
| THREAD_SUSPEND_RESUME
| THREAD_QUERY_INFORMATION
,
76 return realCurrentThreadHandle
;
80 std::is_same_v
<mozilla::profiler::PlatformData::WindowsHandle
, HANDLE
>);
82 mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId
)
83 : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
84 MOZ_ASSERT(aThreadId
== ProfilerThreadId::FromNumber(::GetCurrentThreadId()));
87 mozilla::profiler::PlatformData::~PlatformData() {
88 if (mProfiledThread
) {
89 CloseHandle(mProfiledThread
);
90 mProfiledThread
= nullptr;
94 static const HANDLE kNoThread
= INVALID_HANDLE_VALUE
;
96 ////////////////////////////////////////////////////////////////////////
97 // BEGIN Sampler target specifics
99 Sampler::Sampler(PSLockRef aLock
) {}
101 void Sampler::Disable(PSLockRef aLock
) {}
103 static void StreamMetaPlatformSampleUnits(PSLockRef aLock
,
104 SpliceableJSONWriter
& aWriter
) {
105 static const Span
<const char> units
=
106 (GetCycleTimeFrequencyMHz() != 0) ? MakeStringSpan("ns")
107 : MakeStringSpan("variable CPU cycles");
108 aWriter
.StringProperty("threadCPUDelta", units
);
112 uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue
) {
113 static const uint64_t cycleTimeFrequencyMHz
= GetCycleTimeFrequencyMHz();
114 if (cycleTimeFrequencyMHz
== 0u) {
118 constexpr uint64_t GHZ_PER_MHZ
= 1'000u;
119 // To get ns, we need to divide cycles by a frequency in GHz, i.e.:
120 // cycles / (f_MHz / GHZ_PER_MHZ). To avoid losing the integer precision of
121 // f_MHz, this is computed as (cycles * GHZ_PER_MHZ) / f_MHz.
122 // Adding GHZ_PER_MHZ/2 to (cycles * GHZ_PER_MHZ) will round to nearest when
123 // the result of the division is truncated.
124 return (aRawValue
* GHZ_PER_MHZ
+ (GHZ_PER_MHZ
/ 2u)) / cycleTimeFrequencyMHz
;
127 static inline uint64_t ToNanoSeconds(const FILETIME
& aFileTime
) {
128 // FILETIME values are 100-nanoseconds units, converting
129 ULARGE_INTEGER usec
= {{aFileTime
.dwLowDateTime
, aFileTime
.dwHighDateTime
}};
130 return usec
.QuadPart
* 100;
133 namespace mozilla::profiler
{
134 bool GetCpuTimeSinceThreadStartInNs(
135 uint64_t* aResult
, const mozilla::profiler::PlatformData
& aPlatformData
) {
136 const HANDLE profiledThread
= aPlatformData
.ProfiledThread();
137 int frequencyInMHz
= GetCycleTimeFrequencyMHz();
138 if (frequencyInMHz
) {
139 uint64_t cpuCycleCount
;
140 if (!QueryThreadCycleTime(profiledThread
, &cpuCycleCount
)) {
144 constexpr uint64_t USEC_PER_NSEC
= 1000L;
145 *aResult
= cpuCycleCount
* USEC_PER_NSEC
/ frequencyInMHz
;
149 FILETIME createTime
, exitTime
, kernelTime
, userTime
;
150 if (!GetThreadTimes(profiledThread
, &createTime
, &exitTime
, &kernelTime
,
155 *aResult
= ToNanoSeconds(kernelTime
) + ToNanoSeconds(userTime
);
158 } // namespace mozilla::profiler
160 static RunningTimes
GetProcessRunningTimesDiff(
161 PSLockRef aLock
, RunningTimes
& aPreviousRunningTimesToBeUpdated
) {
162 AUTO_PROFILER_STATS(GetProcessRunningTimes
);
164 static const HANDLE processHandle
= GetCurrentProcess();
166 RunningTimes newRunningTimes
;
168 AUTO_PROFILER_STATS(GetProcessRunningTimes_QueryProcessCycleTime
);
169 if (ULONG64 cycles
; QueryProcessCycleTime(processHandle
, &cycles
) != 0) {
170 newRunningTimes
.SetThreadCPUDelta(cycles
);
172 newRunningTimes
.SetPostMeasurementTimeStamp(TimeStamp::Now());
175 const RunningTimes diff
= newRunningTimes
- aPreviousRunningTimesToBeUpdated
;
176 aPreviousRunningTimesToBeUpdated
= newRunningTimes
;
180 static RunningTimes
GetThreadRunningTimesDiff(
182 ThreadRegistration::UnlockedRWForLockedProfiler
& aThreadData
) {
183 AUTO_PROFILER_STATS(GetThreadRunningTimes
);
185 const mozilla::profiler::PlatformData
& platformData
=
186 aThreadData
.PlatformDataCRef();
187 const HANDLE profiledThread
= platformData
.ProfiledThread();
189 const RunningTimes newRunningTimes
= GetRunningTimesWithTightTimestamp(
190 [profiledThread
](RunningTimes
& aRunningTimes
) {
191 AUTO_PROFILER_STATS(GetThreadRunningTimes_QueryThreadCycleTime
);
193 QueryThreadCycleTime(profiledThread
, &cycles
) != 0) {
194 aRunningTimes
.ResetThreadCPUDelta(cycles
);
196 aRunningTimes
.ClearThreadCPUDelta();
200 ProfiledThreadData
* profiledThreadData
=
201 aThreadData
.GetProfiledThreadData(aLock
);
202 MOZ_ASSERT(profiledThreadData
);
203 RunningTimes
& previousRunningTimes
=
204 profiledThreadData
->PreviousThreadRunningTimesRef();
205 const RunningTimes diff
= newRunningTimes
- previousRunningTimes
;
206 previousRunningTimes
= newRunningTimes
;
210 static void DiscardSuspendedThreadRunningTimes(
212 ThreadRegistration::UnlockedRWForLockedProfiler
& aThreadData
) {
213 AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes
);
215 // On Windows, suspending a thread makes that thread work a little bit. So we
216 // want to discard any added running time since the call to
217 // GetThreadRunningTimesDiff, which is done by overwriting the thread's
218 // PreviousThreadRunningTimesRef() with the current running time now.
220 const mozilla::profiler::PlatformData
& platformData
=
221 aThreadData
.PlatformDataCRef();
222 const HANDLE profiledThread
= platformData
.ProfiledThread();
224 ProfiledThreadData
* profiledThreadData
=
225 aThreadData
.GetProfiledThreadData(aLock
);
226 MOZ_ASSERT(profiledThreadData
);
227 RunningTimes
& previousRunningTimes
=
228 profiledThreadData
->PreviousThreadRunningTimesRef();
230 if (ULONG64 cycles
; QueryThreadCycleTime(profiledThread
, &cycles
) != 0) {
231 previousRunningTimes
.ResetThreadCPUDelta(cycles
);
233 previousRunningTimes
.ClearThreadCPUDelta();
237 template <typename Func
>
238 void Sampler::SuspendAndSampleAndResumeThread(
240 const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread
& aThreadData
,
241 const TimeStamp
& aNow
, const Func
& aProcessRegs
) {
242 HANDLE profiled_thread
= aThreadData
.PlatformDataCRef().ProfiledThread();
243 if (profiled_thread
== nullptr) {
247 // Context used for sampling the register state of the profiled thread.
249 memset(&context
, 0, sizeof(context
));
251 //----------------------------------------------------------------//
252 // Suspend the samplee thread and get its context.
254 static const DWORD kSuspendFailed
= static_cast<DWORD
>(-1);
255 if (SuspendThread(profiled_thread
) == kSuspendFailed
) {
259 // SuspendThread is asynchronous, so the thread may still be running.
260 // Call GetThreadContext first to ensure the thread is really suspended.
261 // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
263 // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
264 // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
265 #if defined(GP_ARCH_amd64)
266 context
.ContextFlags
= CONTEXT_FULL
;
268 context
.ContextFlags
= CONTEXT_CONTROL
| CONTEXT_INTEGER
;
270 if (!GetThreadContext(profiled_thread
, &context
)) {
271 ResumeThread(profiled_thread
);
275 //----------------------------------------------------------------//
276 // Sample the target thread.
278 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
280 // The profiler's "critical section" begins here. We must be very careful
281 // what we do here, or risk deadlock. See the corresponding comment in
282 // platform-linux-android.cpp for details.
285 PopulateRegsFromContext(regs
, &context
);
286 aProcessRegs(regs
, aNow
);
288 //----------------------------------------------------------------//
289 // Resume the target thread.
291 ResumeThread(profiled_thread
);
293 // The profiler's critical section ends here.
295 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
298 // END Sampler target specifics
299 ////////////////////////////////////////////////////////////////////////
301 ////////////////////////////////////////////////////////////////////////
302 // BEGIN SamplerThread target specifics
304 static unsigned int __stdcall
ThreadEntry(void* aArg
) {
305 auto thread
= static_cast<SamplerThread
*>(aArg
);
310 static unsigned int __stdcall
UnregisteredThreadSpyEntry(void* aArg
) {
311 auto thread
= static_cast<SamplerThread
*>(aArg
);
312 thread
->RunUnregisteredThreadSpy();
316 SamplerThread::SamplerThread(PSLockRef aLock
, uint32_t aActivityGeneration
,
317 double aIntervalMilliseconds
, uint32_t aFeatures
)
319 mActivityGeneration(aActivityGeneration
),
320 mIntervalMicroseconds(
321 std::max(1, int(floor(aIntervalMilliseconds
* 1000 + 0.5)))),
322 mNoTimerResolutionChange(
323 ProfilerFeature::HasNoTimerResolutionChange(aFeatures
)) {
324 if ((!mNoTimerResolutionChange
) && (mIntervalMicroseconds
< 10 * 1000)) {
325 // By default the timer resolution (which tends to be 1/64Hz, around 16ms)
326 // is not changed. However, if the requested interval is sufficiently low,
327 // the resolution will be adjusted to match. Note that this affects all
328 // timers in Firefox, and could therefore hide issues while profiling. This
329 // change may be prevented with the "notimerresolutionchange" feature.
330 ::timeBeginPeriod(mIntervalMicroseconds
/ 1000);
333 if (ProfilerFeature::HasUnregisteredThreads(aFeatures
)) {
334 // Sampler&spy threads are not running yet, so it's safe to modify
335 // mSpyingState without locking the monitor.
336 mSpyingState
= SpyingState::Spy_Initializing
;
337 mUnregisteredThreadSpyThread
= reinterpret_cast<HANDLE
>(
338 _beginthreadex(nullptr,
339 /* stack_size */ 0, UnregisteredThreadSpyEntry
, this,
340 /* initflag */ 0, nullptr));
341 if (mUnregisteredThreadSpyThread
== 0) {
342 MOZ_CRASH("_beginthreadex failed");
346 // Create a new thread. It is important to use _beginthreadex() instead of
347 // the Win32 function CreateThread(), because the CreateThread() does not
348 // initialize thread-specific structures in the C runtime library.
349 mThread
= reinterpret_cast<HANDLE
>(_beginthreadex(nullptr,
352 /* initflag */ 0, nullptr));
354 MOZ_CRASH("_beginthreadex failed");
358 SamplerThread::~SamplerThread() {
359 if (mUnregisteredThreadSpyThread
) {
361 // Make sure the spying thread is not actively working, because the win32
362 // function it's using could deadlock with WaitForSingleObject below.
363 MonitorAutoLock spyingStateLock
{mSpyingStateMonitor
};
364 while (mSpyingState
!= SpyingState::Spy_Waiting
&&
365 mSpyingState
!= SpyingState::SamplerToSpy_Start
) {
366 spyingStateLock
.Wait();
369 mSpyingState
= SpyingState::MainToSpy_Shutdown
;
370 spyingStateLock
.NotifyAll();
373 spyingStateLock
.Wait();
374 } while (mSpyingState
!= SpyingState::SpyToMain_ShuttingDown
);
377 WaitForSingleObject(mUnregisteredThreadSpyThread
, INFINITE
);
379 // Close our own handle for the thread.
380 if (mUnregisteredThreadSpyThread
!= kNoThread
) {
381 CloseHandle(mUnregisteredThreadSpyThread
);
385 WaitForSingleObject(mThread
, INFINITE
);
387 // Close our own handle for the thread.
388 if (mThread
!= kNoThread
) {
389 CloseHandle(mThread
);
392 // Just in the unlikely case some callbacks were added between the end of the
394 InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList
),
395 SamplingState::JustStopped
);
398 void SamplerThread::RunUnregisteredThreadSpy() {
399 // TODO: Consider registering this thread.
400 // Pros: Remove from list of unregistered threads; Not useful to profiling
402 // Cons: Doesn't appear in the profile, so users may miss the expensive CPU
403 // cost of this work on Windows.
404 PR_SetCurrentThreadName("UnregisteredThreadSpy");
408 MonitorAutoLock spyingStateLock
{mSpyingStateMonitor
};
409 // Either this is the first loop, or we're looping after working.
410 MOZ_ASSERT(mSpyingState
== SpyingState::Spy_Initializing
||
411 mSpyingState
== SpyingState::Spy_Working
);
413 // Let everyone know we're waiting, and then wait.
414 mSpyingState
= SpyingState::Spy_Waiting
;
415 mSpyingStateMonitor
.NotifyAll();
417 spyingStateLock
.Wait();
418 } while (mSpyingState
== SpyingState::Spy_Waiting
);
420 if (mSpyingState
== SpyingState::MainToSpy_Shutdown
) {
421 mSpyingState
= SpyingState::SpyToMain_ShuttingDown
;
422 mSpyingStateMonitor
.NotifyAll();
426 MOZ_ASSERT(mSpyingState
== SpyingState::SamplerToSpy_Start
);
427 mSpyingState
= SpyingState::Spy_Working
;
430 // Do the work without lock, so other threads can read the current state.
431 SpyOnUnregisteredThreads();
435 void SamplerThread::SleepMicro(uint32_t aMicroseconds
) {
436 // For now, keep the old behaviour of minimum Sleep(1), even for
437 // smaller-than-usual sleeps after an overshoot, unless the user has
438 // explicitly opted into a sub-millisecond profiler interval.
439 if (mIntervalMicroseconds
>= 1000) {
440 ::Sleep(std::max(1u, aMicroseconds
/ 1000));
442 TimeStamp start
= TimeStamp::Now();
443 TimeStamp end
= start
+ TimeDuration::FromMicroseconds(aMicroseconds
);
445 // First, sleep for as many whole milliseconds as possible.
446 if (aMicroseconds
>= 1000) {
447 ::Sleep(aMicroseconds
/ 1000);
450 // Then, spin until enough time has passed.
451 while (TimeStamp::Now() < end
) {
457 void SamplerThread::Stop(PSLockRef aLock
) {
458 if ((!mNoTimerResolutionChange
) && (mIntervalMicroseconds
< 10 * 1000)) {
459 // Disable any timer resolution changes we've made. Do it now while
460 // gPSMutex is locked, i.e. before any other SamplerThread can be created
461 // and call ::timeBeginPeriod().
463 // It's safe to do this now even though this SamplerThread is still alive,
464 // because the next time the main loop of Run() iterates it won't get past
465 // the mActivityGeneration check, and so it won't make any more ::Sleep()
467 ::timeEndPeriod(mIntervalMicroseconds
/ 1000);
470 mSampler
.Disable(aLock
);
473 // END SamplerThread target specifics
474 ////////////////////////////////////////////////////////////////////////
476 static void PlatformInit(PSLockRef aLock
) {}
478 #if defined(HAVE_NATIVE_UNWIND)
479 # define REGISTERS_SYNC_POPULATE(regs) \
481 RtlCaptureContext(&context); \
482 PopulateRegsFromContext(regs, &context);