Backed out changeset b71c8c052463 (bug 1943846) for causing mass failures. CLOSED...
[gecko.git] / tools / profiler / core / platform-win32.cpp
blob0e5c1c9dbb7719f819f92f97874da80872c8dee1
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in
12 // the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google, Inc. nor the names of its contributors
15 // may be used to endorse or promote products derived from this
16 // software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
31 #include <windows.h>
32 #include <mmsystem.h>
33 #include <process.h>
35 #include "mozilla/WindowsVersion.h"
37 #include <type_traits>
39 static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
40 #if defined(GP_ARCH_amd64)
41 aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
42 aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
43 aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
44 aRegs.mR10 = reinterpret_cast<Address>(aContext->R10);
45 aRegs.mR12 = reinterpret_cast<Address>(aContext->R12);
46 #elif defined(GP_ARCH_x86)
47 aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
48 aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
49 aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
50 aRegs.mEcx = reinterpret_cast<Address>(aContext->Ecx);
51 aRegs.mEdx = reinterpret_cast<Address>(aContext->Edx);
52 #elif defined(GP_ARCH_arm64)
53 aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
54 aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
55 aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
56 aRegs.mLR = reinterpret_cast<Address>(aContext->Lr);
57 aRegs.mR11 = reinterpret_cast<Address>(aContext->X11);
58 #else
59 # error "bad arch"
60 #endif
63 // Gets a real (i.e. not pseudo) handle for the current thread, with the
64 // permissions needed for profiling.
65 // @return a real HANDLE for the current thread.
66 static HANDLE GetRealCurrentThreadHandleForProfiling() {
67 HANDLE realCurrentThreadHandle;
68 if (!::DuplicateHandle(
69 ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
70 &realCurrentThreadHandle,
71 THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
72 FALSE, 0)) {
73 return nullptr;
76 return realCurrentThreadHandle;
79 static_assert(
80 std::is_same_v<mozilla::profiler::PlatformData::WindowsHandle, HANDLE>);
82 mozilla::profiler::PlatformData::PlatformData(ProfilerThreadId aThreadId)
83 : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
84 MOZ_ASSERT(aThreadId == ProfilerThreadId::FromNumber(::GetCurrentThreadId()));
87 mozilla::profiler::PlatformData::~PlatformData() {
88 if (mProfiledThread) {
89 CloseHandle(mProfiledThread);
90 mProfiledThread = nullptr;
94 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
96 ////////////////////////////////////////////////////////////////////////
97 // BEGIN Sampler target specifics
99 Sampler::Sampler(PSLockRef aLock) {}
101 void Sampler::Disable(PSLockRef aLock) {}
103 static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
104 SpliceableJSONWriter& aWriter) {
105 static const Span<const char> units =
106 (GetCycleTimeFrequencyMHz() != 0) ? MakeStringSpan("ns")
107 : MakeStringSpan("variable CPU cycles");
108 aWriter.StringProperty("threadCPUDelta", units);
111 /* static */
112 uint64_t RunningTimes::ConvertRawToJson(uint64_t aRawValue) {
113 static const uint64_t cycleTimeFrequencyMHz = GetCycleTimeFrequencyMHz();
114 if (cycleTimeFrequencyMHz == 0u) {
115 return aRawValue;
118 constexpr uint64_t GHZ_PER_MHZ = 1'000u;
119 // To get ns, we need to divide cycles by a frequency in GHz, i.e.:
120 // cycles / (f_MHz / GHZ_PER_MHZ). To avoid losing the integer precision of
121 // f_MHz, this is computed as (cycles * GHZ_PER_MHZ) / f_MHz.
122 // Adding GHZ_PER_MHZ/2 to (cycles * GHZ_PER_MHZ) will round to nearest when
123 // the result of the division is truncated.
124 return (aRawValue * GHZ_PER_MHZ + (GHZ_PER_MHZ / 2u)) / cycleTimeFrequencyMHz;
127 static inline uint64_t ToNanoSeconds(const FILETIME& aFileTime) {
128 // FILETIME values are 100-nanoseconds units, converting
129 ULARGE_INTEGER usec = {{aFileTime.dwLowDateTime, aFileTime.dwHighDateTime}};
130 return usec.QuadPart * 100;
133 namespace mozilla::profiler {
134 bool GetCpuTimeSinceThreadStartInNs(
135 uint64_t* aResult, const mozilla::profiler::PlatformData& aPlatformData) {
136 const HANDLE profiledThread = aPlatformData.ProfiledThread();
137 int frequencyInMHz = GetCycleTimeFrequencyMHz();
138 if (frequencyInMHz) {
139 uint64_t cpuCycleCount;
140 if (!QueryThreadCycleTime(profiledThread, &cpuCycleCount)) {
141 return false;
144 constexpr uint64_t USEC_PER_NSEC = 1000L;
145 *aResult = cpuCycleCount * USEC_PER_NSEC / frequencyInMHz;
146 return true;
149 FILETIME createTime, exitTime, kernelTime, userTime;
150 if (!GetThreadTimes(profiledThread, &createTime, &exitTime, &kernelTime,
151 &userTime)) {
152 return false;
155 *aResult = ToNanoSeconds(kernelTime) + ToNanoSeconds(userTime);
156 return true;
158 } // namespace mozilla::profiler
160 static RunningTimes GetProcessRunningTimesDiff(
161 PSLockRef aLock, RunningTimes& aPreviousRunningTimesToBeUpdated) {
162 AUTO_PROFILER_STATS(GetProcessRunningTimes);
164 static const HANDLE processHandle = GetCurrentProcess();
166 RunningTimes newRunningTimes;
168 AUTO_PROFILER_STATS(GetProcessRunningTimes_QueryProcessCycleTime);
169 if (ULONG64 cycles; QueryProcessCycleTime(processHandle, &cycles) != 0) {
170 newRunningTimes.SetThreadCPUDelta(cycles);
172 newRunningTimes.SetPostMeasurementTimeStamp(TimeStamp::Now());
175 const RunningTimes diff = newRunningTimes - aPreviousRunningTimesToBeUpdated;
176 aPreviousRunningTimesToBeUpdated = newRunningTimes;
177 return diff;
180 static RunningTimes GetThreadRunningTimesDiff(
181 PSLockRef aLock,
182 ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
183 AUTO_PROFILER_STATS(GetThreadRunningTimes);
185 const mozilla::profiler::PlatformData& platformData =
186 aThreadData.PlatformDataCRef();
187 const HANDLE profiledThread = platformData.ProfiledThread();
189 const RunningTimes newRunningTimes = GetRunningTimesWithTightTimestamp(
190 [profiledThread](RunningTimes& aRunningTimes) {
191 AUTO_PROFILER_STATS(GetThreadRunningTimes_QueryThreadCycleTime);
192 if (ULONG64 cycles;
193 QueryThreadCycleTime(profiledThread, &cycles) != 0) {
194 aRunningTimes.ResetThreadCPUDelta(cycles);
195 } else {
196 aRunningTimes.ClearThreadCPUDelta();
200 ProfiledThreadData* profiledThreadData =
201 aThreadData.GetProfiledThreadData(aLock);
202 MOZ_ASSERT(profiledThreadData);
203 RunningTimes& previousRunningTimes =
204 profiledThreadData->PreviousThreadRunningTimesRef();
205 const RunningTimes diff = newRunningTimes - previousRunningTimes;
206 previousRunningTimes = newRunningTimes;
207 return diff;
210 static void DiscardSuspendedThreadRunningTimes(
211 PSLockRef aLock,
212 ThreadRegistration::UnlockedRWForLockedProfiler& aThreadData) {
213 AUTO_PROFILER_STATS(DiscardSuspendedThreadRunningTimes);
215 // On Windows, suspending a thread makes that thread work a little bit. So we
216 // want to discard any added running time since the call to
217 // GetThreadRunningTimesDiff, which is done by overwriting the thread's
218 // PreviousThreadRunningTimesRef() with the current running time now.
220 const mozilla::profiler::PlatformData& platformData =
221 aThreadData.PlatformDataCRef();
222 const HANDLE profiledThread = platformData.ProfiledThread();
224 ProfiledThreadData* profiledThreadData =
225 aThreadData.GetProfiledThreadData(aLock);
226 MOZ_ASSERT(profiledThreadData);
227 RunningTimes& previousRunningTimes =
228 profiledThreadData->PreviousThreadRunningTimesRef();
230 if (ULONG64 cycles; QueryThreadCycleTime(profiledThread, &cycles) != 0) {
231 previousRunningTimes.ResetThreadCPUDelta(cycles);
232 } else {
233 previousRunningTimes.ClearThreadCPUDelta();
237 template <typename Func>
238 void Sampler::SuspendAndSampleAndResumeThread(
239 PSLockRef aLock,
240 const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
241 const TimeStamp& aNow, const Func& aProcessRegs) {
242 HANDLE profiled_thread = aThreadData.PlatformDataCRef().ProfiledThread();
243 if (profiled_thread == nullptr) {
244 return;
247 // Context used for sampling the register state of the profiled thread.
248 CONTEXT context;
249 memset(&context, 0, sizeof(context));
251 //----------------------------------------------------------------//
252 // Suspend the samplee thread and get its context.
254 static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
255 if (SuspendThread(profiled_thread) == kSuspendFailed) {
256 return;
259 // SuspendThread is asynchronous, so the thread may still be running.
260 // Call GetThreadContext first to ensure the thread is really suspended.
261 // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
263 // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
264 // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
265 #if defined(GP_ARCH_amd64)
266 context.ContextFlags = CONTEXT_FULL;
267 #else
268 context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
269 #endif
270 if (!GetThreadContext(profiled_thread, &context)) {
271 ResumeThread(profiled_thread);
272 return;
275 //----------------------------------------------------------------//
276 // Sample the target thread.
278 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
280 // The profiler's "critical section" begins here. We must be very careful
281 // what we do here, or risk deadlock. See the corresponding comment in
282 // platform-linux-android.cpp for details.
284 Registers regs;
285 PopulateRegsFromContext(regs, &context);
286 aProcessRegs(regs, aNow);
288 //----------------------------------------------------------------//
289 // Resume the target thread.
291 ResumeThread(profiled_thread);
293 // The profiler's critical section ends here.
295 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
298 // END Sampler target specifics
299 ////////////////////////////////////////////////////////////////////////
301 ////////////////////////////////////////////////////////////////////////
302 // BEGIN SamplerThread target specifics
304 static unsigned int __stdcall ThreadEntry(void* aArg) {
305 auto thread = static_cast<SamplerThread*>(aArg);
306 thread->Run();
307 return 0;
310 static unsigned int __stdcall UnregisteredThreadSpyEntry(void* aArg) {
311 auto thread = static_cast<SamplerThread*>(aArg);
312 thread->RunUnregisteredThreadSpy();
313 return 0;
316 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
317 double aIntervalMilliseconds, uint32_t aFeatures)
318 : mSampler(aLock),
319 mActivityGeneration(aActivityGeneration),
320 mIntervalMicroseconds(
321 std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
322 mNoTimerResolutionChange(
323 ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) {
324 if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
325 // By default the timer resolution (which tends to be 1/64Hz, around 16ms)
326 // is not changed. However, if the requested interval is sufficiently low,
327 // the resolution will be adjusted to match. Note that this affects all
328 // timers in Firefox, and could therefore hide issues while profiling. This
329 // change may be prevented with the "notimerresolutionchange" feature.
330 ::timeBeginPeriod(mIntervalMicroseconds / 1000);
333 if (ProfilerFeature::HasUnregisteredThreads(aFeatures)) {
334 // Sampler&spy threads are not running yet, so it's safe to modify
335 // mSpyingState without locking the monitor.
336 mSpyingState = SpyingState::Spy_Initializing;
337 mUnregisteredThreadSpyThread = reinterpret_cast<HANDLE>(
338 _beginthreadex(nullptr,
339 /* stack_size */ 0, UnregisteredThreadSpyEntry, this,
340 /* initflag */ 0, nullptr));
341 if (mUnregisteredThreadSpyThread == 0) {
342 MOZ_CRASH("_beginthreadex failed");
346 // Create a new thread. It is important to use _beginthreadex() instead of
347 // the Win32 function CreateThread(), because the CreateThread() does not
348 // initialize thread-specific structures in the C runtime library.
349 mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
350 /* stack_size */ 0,
351 ThreadEntry, this,
352 /* initflag */ 0, nullptr));
353 if (mThread == 0) {
354 MOZ_CRASH("_beginthreadex failed");
358 SamplerThread::~SamplerThread() {
359 if (mUnregisteredThreadSpyThread) {
361 // Make sure the spying thread is not actively working, because the win32
362 // function it's using could deadlock with WaitForSingleObject below.
363 MonitorAutoLock spyingStateLock{mSpyingStateMonitor};
364 while (mSpyingState != SpyingState::Spy_Waiting &&
365 mSpyingState != SpyingState::SamplerToSpy_Start) {
366 spyingStateLock.Wait();
369 mSpyingState = SpyingState::MainToSpy_Shutdown;
370 spyingStateLock.NotifyAll();
372 do {
373 spyingStateLock.Wait();
374 } while (mSpyingState != SpyingState::SpyToMain_ShuttingDown);
377 WaitForSingleObject(mUnregisteredThreadSpyThread, INFINITE);
379 // Close our own handle for the thread.
380 if (mUnregisteredThreadSpyThread != kNoThread) {
381 CloseHandle(mUnregisteredThreadSpyThread);
385 WaitForSingleObject(mThread, INFINITE);
387 // Close our own handle for the thread.
388 if (mThread != kNoThread) {
389 CloseHandle(mThread);
392 // Just in the unlikely case some callbacks were added between the end of the
393 // thread and now.
394 InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList),
395 SamplingState::JustStopped);
398 void SamplerThread::RunUnregisteredThreadSpy() {
399 // TODO: Consider registering this thread.
400 // Pros: Remove from list of unregistered threads; Not useful to profiling
401 // Firefox itself.
402 // Cons: Doesn't appear in the profile, so users may miss the expensive CPU
403 // cost of this work on Windows.
404 PR_SetCurrentThreadName("UnregisteredThreadSpy");
406 while (true) {
408 MonitorAutoLock spyingStateLock{mSpyingStateMonitor};
409 // Either this is the first loop, or we're looping after working.
410 MOZ_ASSERT(mSpyingState == SpyingState::Spy_Initializing ||
411 mSpyingState == SpyingState::Spy_Working);
413 // Let everyone know we're waiting, and then wait.
414 mSpyingState = SpyingState::Spy_Waiting;
415 mSpyingStateMonitor.NotifyAll();
416 do {
417 spyingStateLock.Wait();
418 } while (mSpyingState == SpyingState::Spy_Waiting);
420 if (mSpyingState == SpyingState::MainToSpy_Shutdown) {
421 mSpyingState = SpyingState::SpyToMain_ShuttingDown;
422 mSpyingStateMonitor.NotifyAll();
423 break;
426 MOZ_ASSERT(mSpyingState == SpyingState::SamplerToSpy_Start);
427 mSpyingState = SpyingState::Spy_Working;
430 // Do the work without lock, so other threads can read the current state.
431 SpyOnUnregisteredThreads();
435 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
436 // For now, keep the old behaviour of minimum Sleep(1), even for
437 // smaller-than-usual sleeps after an overshoot, unless the user has
438 // explicitly opted into a sub-millisecond profiler interval.
439 if (mIntervalMicroseconds >= 1000) {
440 ::Sleep(std::max(1u, aMicroseconds / 1000));
441 } else {
442 TimeStamp start = TimeStamp::Now();
443 TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
445 // First, sleep for as many whole milliseconds as possible.
446 if (aMicroseconds >= 1000) {
447 ::Sleep(aMicroseconds / 1000);
450 // Then, spin until enough time has passed.
451 while (TimeStamp::Now() < end) {
452 YieldProcessor();
457 void SamplerThread::Stop(PSLockRef aLock) {
458 if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
459 // Disable any timer resolution changes we've made. Do it now while
460 // gPSMutex is locked, i.e. before any other SamplerThread can be created
461 // and call ::timeBeginPeriod().
463 // It's safe to do this now even though this SamplerThread is still alive,
464 // because the next time the main loop of Run() iterates it won't get past
465 // the mActivityGeneration check, and so it won't make any more ::Sleep()
466 // calls.
467 ::timeEndPeriod(mIntervalMicroseconds / 1000);
470 mSampler.Disable(aLock);
473 // END SamplerThread target specifics
474 ////////////////////////////////////////////////////////////////////////
476 static void PlatformInit(PSLockRef aLock) {}
478 #if defined(HAVE_NATIVE_UNWIND)
479 # define REGISTERS_SYNC_POPULATE(regs) \
480 CONTEXT context; \
481 RtlCaptureContext(&context); \
482 PopulateRegsFromContext(regs, &context);
483 #endif