Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / content / gpu / gpu_watchdog_thread.cc
blob6052d41647dd380f4ebd4d19cff67129a1b6de55
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #if defined(OS_WIN)
6 #include <windows.h>
7 #endif
9 #include "content/gpu/gpu_watchdog_thread.h"
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/files/file_util.h"
16 #include "base/power_monitor/power_monitor.h"
17 #include "base/process/process.h"
18 #include "build/build_config.h"
19 #include "content/public/common/content_switches.h"
20 #include "content/public/common/result_codes.h"
22 namespace content {
23 namespace {
24 #if defined(OS_CHROMEOS)
25 const base::FilePath::CharType
26 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
27 #endif
28 #if defined(USE_X11)
29 const unsigned char text[20] = "check";
30 #endif
31 } // namespace
33 GpuWatchdogThread::GpuWatchdogThread(int timeout)
34 : base::Thread("Watchdog"),
35 watched_message_loop_(base::MessageLoop::current()),
36 timeout_(base::TimeDelta::FromMilliseconds(timeout)),
37 armed_(false),
38 #if defined(OS_WIN)
39 watched_thread_handle_(0),
40 arm_cpu_time_(),
41 #endif
42 task_observer_(this),
43 suspended_(false),
44 #if defined(USE_X11)
45 display_(NULL),
46 window_(0),
47 atom_(None),
48 #endif
49 weak_factory_(this) {
50 DCHECK(timeout >= 0);
52 #if defined(OS_WIN)
53 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
54 // to identify another. DuplicateHandle creates a "real" handle that can be
55 // used for this purpose.
56 BOOL result = DuplicateHandle(GetCurrentProcess(),
57 GetCurrentThread(),
58 GetCurrentProcess(),
59 &watched_thread_handle_,
60 THREAD_QUERY_INFORMATION,
61 FALSE,
62 0);
63 DCHECK(result);
64 #endif
66 #if defined(OS_CHROMEOS)
67 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r");
68 #endif
69 #if defined(USE_X11)
70 SetupXServer();
71 #endif
72 watched_message_loop_->AddTaskObserver(&task_observer_);
75 void GpuWatchdogThread::PostAcknowledge() {
76 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
77 // the method factory. Rely on reference counting instead.
78 message_loop()->PostTask(
79 FROM_HERE,
80 base::Bind(&GpuWatchdogThread::OnAcknowledge, this));
83 void GpuWatchdogThread::CheckArmed() {
84 // Acknowledge the watchdog if it has armed itself. The watchdog will not
85 // change its armed state until it is acknowledged.
86 if (armed()) {
87 PostAcknowledge();
91 void GpuWatchdogThread::Init() {
92 // Schedule the first check.
93 OnCheck(false);
96 void GpuWatchdogThread::CleanUp() {
97 weak_factory_.InvalidateWeakPtrs();
100 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
101 GpuWatchdogThread* watchdog)
102 : watchdog_(watchdog) {
105 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
108 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
109 const base::PendingTask& pending_task) {
110 watchdog_->CheckArmed();
113 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
114 const base::PendingTask& pending_task) {
117 GpuWatchdogThread::~GpuWatchdogThread() {
118 // Verify that the thread was explicitly stopped. If the thread is stopped
119 // implicitly by the destructor, CleanUp() will not be called.
120 DCHECK(!weak_factory_.HasWeakPtrs());
122 #if defined(OS_WIN)
123 CloseHandle(watched_thread_handle_);
124 #endif
126 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
127 if (power_monitor)
128 power_monitor->RemoveObserver(this);
130 #if defined(OS_CHROMEOS)
131 if (tty_file_)
132 fclose(tty_file_);
133 #endif
135 #if defined(USE_X11)
136 XDestroyWindow(display_, window_);
137 XCloseDisplay(display_);
138 #endif
140 watched_message_loop_->RemoveTaskObserver(&task_observer_);
143 void GpuWatchdogThread::OnAcknowledge() {
144 CHECK(base::PlatformThread::CurrentId() == thread_id());
146 // The check has already been acknowledged and another has already been
147 // scheduled by a previous call to OnAcknowledge. It is normal for a
148 // watched thread to see armed_ being true multiple times before
149 // the OnAcknowledge task is run on the watchdog thread.
150 if (!armed_)
151 return;
153 // Revoke any pending hang termination.
154 weak_factory_.InvalidateWeakPtrs();
155 armed_ = false;
157 if (suspended_)
158 return;
160 // If it took a long time for the acknowledgement, assume the computer was
161 // recently suspended.
162 bool was_suspended = (base::Time::Now() > suspension_timeout_);
164 // The monitored thread has responded. Post a task to check it again.
165 message_loop()->PostDelayedTask(
166 FROM_HERE,
167 base::Bind(&GpuWatchdogThread::OnCheck, weak_factory_.GetWeakPtr(),
168 was_suspended),
169 0.5 * timeout_);
172 void GpuWatchdogThread::OnCheck(bool after_suspend) {
173 CHECK(base::PlatformThread::CurrentId() == thread_id());
175 // Do not create any new termination tasks if one has already been created
176 // or the system is suspended.
177 if (armed_ || suspended_)
178 return;
180 // Must set armed before posting the task. This task might be the only task
181 // that will activate the TaskObserver on the watched thread and it must not
182 // miss the false -> true transition.
183 armed_ = true;
185 #if defined(OS_WIN)
186 arm_cpu_time_ = GetWatchedThreadTime();
187 #endif
189 // Immediately after the computer is woken up from being suspended it might
190 // be pretty sluggish, so allow some extra time before the next timeout.
191 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
192 suspension_timeout_ = base::Time::Now() + timeout * 2;
194 // Post a task to the monitored thread that does nothing but wake up the
195 // TaskObserver. Any other tasks that are pending on the watched thread will
196 // also wake up the observer. This simply ensures there is at least one.
197 watched_message_loop_->PostTask(
198 FROM_HERE,
199 base::Bind(&base::DoNothing));
201 // Post a task to the watchdog thread to exit if the monitored thread does
202 // not respond in time.
203 message_loop()->PostDelayedTask(
204 FROM_HERE,
205 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
206 weak_factory_.GetWeakPtr()),
207 timeout);
210 // Use the --disable-gpu-watchdog command line switch to disable this.
211 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
212 // Should not get here while the system is suspended.
213 DCHECK(!suspended_);
215 #if defined(OS_WIN)
216 // Defer termination until a certain amount of CPU time has elapsed on the
217 // watched thread.
218 base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
219 if (time_since_arm < timeout_) {
220 message_loop()->PostDelayedTask(
221 FROM_HERE,
222 base::Bind(
223 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
224 weak_factory_.GetWeakPtr()),
225 timeout_ - time_since_arm);
226 return;
228 #endif
230 // If the watchdog woke up significantly behind schedule, disarm and reset
231 // the watchdog check. This is to prevent the watchdog thread from terminating
232 // when a machine wakes up from sleep or hibernation, which would otherwise
233 // appear to be a hang.
234 if (base::Time::Now() > suspension_timeout_) {
235 armed_ = false;
236 OnCheck(true);
237 return;
240 #if defined(USE_X11)
241 XWindowAttributes attributes;
242 XGetWindowAttributes(display_, window_, &attributes);
244 XSelectInput(display_, window_, PropertyChangeMask);
245 SetupXChangeProp();
247 XFlush(display_);
249 // We wait for the property change event with a timeout. If it arrives we know
250 // that X is responsive and is not the cause of the watchdog trigger, so we
251 // should
252 // terminate. If it times out, it may be due to X taking a long time, but
253 // terminating won't help, so ignore the watchdog trigger.
254 XEvent event_return;
255 base::TimeTicks deadline = base::TimeTicks::Now() + timeout_;
256 while (true) {
257 base::TimeDelta delta = deadline - base::TimeTicks::Now();
258 if (delta < base::TimeDelta()) {
259 return;
260 } else {
261 while (XCheckWindowEvent(display_, window_, PropertyChangeMask,
262 &event_return)) {
263 if (MatchXEventAtom(&event_return))
264 break;
266 struct pollfd fds[1];
267 fds[0].fd = XConnectionNumber(display_);
268 fds[0].events = POLLIN;
269 int status = poll(fds, 1, delta.InMilliseconds());
270 if (status == -1) {
271 if (errno == EINTR) {
272 continue;
273 } else {
274 LOG(FATAL) << "Lost X connection, aborting.";
275 break;
277 } else if (status == 0) {
278 return;
279 } else {
280 continue;
284 #endif
286 // For minimal developer annoyance, don't keep terminating. You need to skip
287 // the call to base::Process::Terminate below in a debugger for this to be
288 // useful.
289 static bool terminated = false;
290 if (terminated)
291 return;
293 #if defined(OS_WIN)
294 if (IsDebuggerPresent())
295 return;
296 #endif
298 #if defined(OS_CHROMEOS)
299 // Don't crash if we're not on tty1. This avoids noise in the GPU process
300 // crashes caused by people who use VT2 but still enable crash reporting.
301 char tty_string[8] = {0};
302 if (tty_file_ &&
303 !fseek(tty_file_, 0, SEEK_SET) &&
304 fread(tty_string, 1, 7, tty_file_)) {
305 int tty_number = -1;
306 int num_res = sscanf(tty_string, "tty%d", &tty_number);
307 if (num_res == 1 && tty_number != 1)
308 return;
310 #endif
312 LOG(ERROR) << "The GPU process hung. Terminating after "
313 << timeout_.InMilliseconds() << " ms.";
315 // Deliberately crash the process to create a crash dump.
316 *((volatile int*)0) = 0x1337;
318 terminated = true;
321 #if defined(USE_X11)
322 void GpuWatchdogThread::SetupXServer() {
323 display_ = XOpenDisplay(NULL);
324 window_ = XCreateWindow(display_, DefaultRootWindow(display_), 0, 0, 1, 1, 0,
325 CopyFromParent, InputOutput, CopyFromParent, 0, NULL);
326 atom_ = XInternAtom(display_, "CHECK", False);
329 void GpuWatchdogThread::SetupXChangeProp() {
330 XChangeProperty(display_, window_, atom_, XA_STRING, 8, PropModeReplace, text,
331 (arraysize(text) - 1));
334 bool GpuWatchdogThread::MatchXEventAtom(XEvent* event) {
335 if (event->xproperty.window == window_ && event->type == PropertyNotify &&
336 event->xproperty.atom == atom_)
337 return true;
339 return false;
342 #endif
343 void GpuWatchdogThread::AddPowerObserver() {
344 message_loop()->PostTask(
345 FROM_HERE,
346 base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this));
349 void GpuWatchdogThread::OnAddPowerObserver() {
350 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
351 DCHECK(power_monitor);
352 power_monitor->AddObserver(this);
355 void GpuWatchdogThread::OnSuspend() {
356 suspended_ = true;
358 // When suspending force an acknowledgement to cancel any pending termination
359 // tasks.
360 OnAcknowledge();
363 void GpuWatchdogThread::OnResume() {
364 suspended_ = false;
366 // After resuming jump-start the watchdog again.
367 armed_ = false;
368 OnCheck(true);
371 #if defined(OS_WIN)
372 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() {
373 FILETIME creation_time;
374 FILETIME exit_time;
375 FILETIME user_time;
376 FILETIME kernel_time;
377 BOOL result = GetThreadTimes(watched_thread_handle_,
378 &creation_time,
379 &exit_time,
380 &kernel_time,
381 &user_time);
382 DCHECK(result);
384 ULARGE_INTEGER user_time64;
385 user_time64.HighPart = user_time.dwHighDateTime;
386 user_time64.LowPart = user_time.dwLowDateTime;
388 ULARGE_INTEGER kernel_time64;
389 kernel_time64.HighPart = kernel_time.dwHighDateTime;
390 kernel_time64.LowPart = kernel_time.dwLowDateTime;
392 // Time is reported in units of 100 nanoseconds. Kernel and user time are
393 // summed to deal with to kinds of hangs. One is where the GPU process is
394 // stuck in user level, never calling into the kernel and kernel time is
395 // not increasing. The other is where either the kernel hangs and never
396 // returns to user level or where user level code
397 // calls into kernel level repeatedly, giving up its quanta before it is
398 // tracked, for example a loop that repeatedly Sleeps.
399 return base::TimeDelta::FromMilliseconds(static_cast<int64>(
400 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
402 #endif
404 } // namespace content