Convert sync_unit_tests to run exclusively on Swarming
[chromium-blink-merge.git] / content / gpu / gpu_watchdog_thread.cc
blobe6da1a4497d59f306f6fd30e03dede5604a05392
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #if defined(OS_WIN)
6 #include <windows.h>
7 #endif
9 #include "content/gpu/gpu_watchdog_thread.h"
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/files/file_util.h"
16 #include "base/location.h"
17 #include "base/power_monitor/power_monitor.h"
18 #include "base/process/process.h"
19 #include "base/single_thread_task_runner.h"
20 #include "build/build_config.h"
21 #include "content/public/common/content_switches.h"
22 #include "content/public/common/result_codes.h"
24 namespace content {
25 namespace {
26 #if defined(OS_CHROMEOS)
27 const base::FilePath::CharType
28 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
29 #endif
30 #if defined(USE_X11)
31 const unsigned char text[20] = "check";
32 #endif
33 } // namespace
35 GpuWatchdogThread::GpuWatchdogThread(int timeout)
36 : base::Thread("Watchdog"),
37 watched_message_loop_(base::MessageLoop::current()),
38 timeout_(base::TimeDelta::FromMilliseconds(timeout)),
39 armed_(false),
40 #if defined(OS_WIN)
41 watched_thread_handle_(0),
42 arm_cpu_time_(),
43 #endif
44 task_observer_(this),
45 suspended_(false),
46 #if defined(USE_X11)
47 display_(NULL),
48 window_(0),
49 atom_(None),
50 #endif
51 weak_factory_(this) {
52 DCHECK(timeout >= 0);
54 #if defined(OS_WIN)
55 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
56 // to identify another. DuplicateHandle creates a "real" handle that can be
57 // used for this purpose.
58 BOOL result = DuplicateHandle(GetCurrentProcess(),
59 GetCurrentThread(),
60 GetCurrentProcess(),
61 &watched_thread_handle_,
62 THREAD_QUERY_INFORMATION,
63 FALSE,
64 0);
65 DCHECK(result);
66 #endif
68 #if defined(OS_CHROMEOS)
69 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r");
70 #endif
71 #if defined(USE_X11)
72 SetupXServer();
73 #endif
74 watched_message_loop_->AddTaskObserver(&task_observer_);
77 void GpuWatchdogThread::PostAcknowledge() {
78 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
79 // the method factory. Rely on reference counting instead.
80 task_runner()->PostTask(FROM_HERE,
81 base::Bind(&GpuWatchdogThread::OnAcknowledge, this));
84 void GpuWatchdogThread::CheckArmed() {
85 // Acknowledge the watchdog if it has armed itself. The watchdog will not
86 // change its armed state until it is acknowledged.
87 if (armed()) {
88 PostAcknowledge();
92 void GpuWatchdogThread::Init() {
93 // Schedule the first check.
94 OnCheck(false);
97 void GpuWatchdogThread::CleanUp() {
98 weak_factory_.InvalidateWeakPtrs();
101 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
102 GpuWatchdogThread* watchdog)
103 : watchdog_(watchdog) {
106 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
109 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
110 const base::PendingTask& pending_task) {
111 watchdog_->CheckArmed();
114 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
115 const base::PendingTask& pending_task) {
118 GpuWatchdogThread::~GpuWatchdogThread() {
119 // Verify that the thread was explicitly stopped. If the thread is stopped
120 // implicitly by the destructor, CleanUp() will not be called.
121 DCHECK(!weak_factory_.HasWeakPtrs());
123 #if defined(OS_WIN)
124 CloseHandle(watched_thread_handle_);
125 #endif
127 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
128 if (power_monitor)
129 power_monitor->RemoveObserver(this);
131 #if defined(OS_CHROMEOS)
132 if (tty_file_)
133 fclose(tty_file_);
134 #endif
136 #if defined(USE_X11)
137 XDestroyWindow(display_, window_);
138 XCloseDisplay(display_);
139 #endif
141 watched_message_loop_->RemoveTaskObserver(&task_observer_);
144 void GpuWatchdogThread::OnAcknowledge() {
145 CHECK(base::PlatformThread::CurrentId() == thread_id());
147 // The check has already been acknowledged and another has already been
148 // scheduled by a previous call to OnAcknowledge. It is normal for a
149 // watched thread to see armed_ being true multiple times before
150 // the OnAcknowledge task is run on the watchdog thread.
151 if (!armed_)
152 return;
154 // Revoke any pending hang termination.
155 weak_factory_.InvalidateWeakPtrs();
156 armed_ = false;
158 if (suspended_)
159 return;
161 // If it took a long time for the acknowledgement, assume the computer was
162 // recently suspended.
163 bool was_suspended = (base::Time::Now() > suspension_timeout_);
165 // The monitored thread has responded. Post a task to check it again.
166 task_runner()->PostDelayedTask(
167 FROM_HERE, base::Bind(&GpuWatchdogThread::OnCheck,
168 weak_factory_.GetWeakPtr(), was_suspended),
169 0.5 * timeout_);
172 void GpuWatchdogThread::OnCheck(bool after_suspend) {
173 CHECK(base::PlatformThread::CurrentId() == thread_id());
175 // Do not create any new termination tasks if one has already been created
176 // or the system is suspended.
177 if (armed_ || suspended_)
178 return;
180 // Must set armed before posting the task. This task might be the only task
181 // that will activate the TaskObserver on the watched thread and it must not
182 // miss the false -> true transition.
183 armed_ = true;
185 #if defined(OS_WIN)
186 arm_cpu_time_ = GetWatchedThreadTime();
187 #endif
189 // Immediately after the computer is woken up from being suspended it might
190 // be pretty sluggish, so allow some extra time before the next timeout.
191 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
192 suspension_timeout_ = base::Time::Now() + timeout * 2;
194 // Post a task to the monitored thread that does nothing but wake up the
195 // TaskObserver. Any other tasks that are pending on the watched thread will
196 // also wake up the observer. This simply ensures there is at least one.
197 watched_message_loop_->task_runner()->PostTask(FROM_HERE,
198 base::Bind(&base::DoNothing));
200 // Post a task to the watchdog thread to exit if the monitored thread does
201 // not respond in time.
202 task_runner()->PostDelayedTask(
203 FROM_HERE,
204 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
205 weak_factory_.GetWeakPtr()),
206 timeout);
209 // Use the --disable-gpu-watchdog command line switch to disable this.
210 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
211 // Should not get here while the system is suspended.
212 DCHECK(!suspended_);
214 #if defined(OS_WIN)
215 // Defer termination until a certain amount of CPU time has elapsed on the
216 // watched thread.
217 base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
218 if (time_since_arm < timeout_) {
219 message_loop()->PostDelayedTask(
220 FROM_HERE,
221 base::Bind(
222 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
223 weak_factory_.GetWeakPtr()),
224 timeout_ - time_since_arm);
225 return;
227 #endif
229 // If the watchdog woke up significantly behind schedule, disarm and reset
230 // the watchdog check. This is to prevent the watchdog thread from terminating
231 // when a machine wakes up from sleep or hibernation, which would otherwise
232 // appear to be a hang.
233 if (base::Time::Now() > suspension_timeout_) {
234 armed_ = false;
235 OnCheck(true);
236 return;
239 #if defined(USE_X11)
240 XWindowAttributes attributes;
241 XGetWindowAttributes(display_, window_, &attributes);
243 XSelectInput(display_, window_, PropertyChangeMask);
244 SetupXChangeProp();
246 XFlush(display_);
248 // We wait for the property change event with a timeout. If it arrives we know
249 // that X is responsive and is not the cause of the watchdog trigger, so we
250 // should
251 // terminate. If it times out, it may be due to X taking a long time, but
252 // terminating won't help, so ignore the watchdog trigger.
253 XEvent event_return;
254 base::TimeTicks deadline = base::TimeTicks::Now() + timeout_;
255 while (true) {
256 base::TimeDelta delta = deadline - base::TimeTicks::Now();
257 if (delta < base::TimeDelta()) {
258 return;
259 } else {
260 while (XCheckWindowEvent(display_, window_, PropertyChangeMask,
261 &event_return)) {
262 if (MatchXEventAtom(&event_return))
263 break;
265 struct pollfd fds[1];
266 fds[0].fd = XConnectionNumber(display_);
267 fds[0].events = POLLIN;
268 int status = poll(fds, 1, delta.InMilliseconds());
269 if (status == -1) {
270 if (errno == EINTR) {
271 continue;
272 } else {
273 LOG(FATAL) << "Lost X connection, aborting.";
274 break;
276 } else if (status == 0) {
277 return;
278 } else {
279 continue;
283 #endif
285 // For minimal developer annoyance, don't keep terminating. You need to skip
286 // the call to base::Process::Terminate below in a debugger for this to be
287 // useful.
288 static bool terminated = false;
289 if (terminated)
290 return;
292 #if defined(OS_WIN)
293 if (IsDebuggerPresent())
294 return;
295 #endif
297 #if defined(OS_CHROMEOS)
298 // Don't crash if we're not on tty1. This avoids noise in the GPU process
299 // crashes caused by people who use VT2 but still enable crash reporting.
300 char tty_string[8] = {0};
301 if (tty_file_ &&
302 !fseek(tty_file_, 0, SEEK_SET) &&
303 fread(tty_string, 1, 7, tty_file_)) {
304 int tty_number = -1;
305 int num_res = sscanf(tty_string, "tty%d", &tty_number);
306 if (num_res == 1 && tty_number != 1)
307 return;
309 #endif
311 LOG(ERROR) << "The GPU process hung. Terminating after "
312 << timeout_.InMilliseconds() << " ms.";
314 // Deliberately crash the process to create a crash dump.
315 *((volatile int*)0) = 0x1337;
317 terminated = true;
320 #if defined(USE_X11)
321 void GpuWatchdogThread::SetupXServer() {
322 display_ = XOpenDisplay(NULL);
323 window_ = XCreateWindow(display_, DefaultRootWindow(display_), 0, 0, 1, 1, 0,
324 CopyFromParent, InputOutput, CopyFromParent, 0, NULL);
325 atom_ = XInternAtom(display_, "CHECK", False);
328 void GpuWatchdogThread::SetupXChangeProp() {
329 XChangeProperty(display_, window_, atom_, XA_STRING, 8, PropModeReplace, text,
330 (arraysize(text) - 1));
333 bool GpuWatchdogThread::MatchXEventAtom(XEvent* event) {
334 if (event->xproperty.window == window_ && event->type == PropertyNotify &&
335 event->xproperty.atom == atom_)
336 return true;
338 return false;
341 #endif
342 void GpuWatchdogThread::AddPowerObserver() {
343 task_runner()->PostTask(
344 FROM_HERE, base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this));
347 void GpuWatchdogThread::OnAddPowerObserver() {
348 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
349 DCHECK(power_monitor);
350 power_monitor->AddObserver(this);
353 void GpuWatchdogThread::OnSuspend() {
354 suspended_ = true;
356 // When suspending force an acknowledgement to cancel any pending termination
357 // tasks.
358 OnAcknowledge();
361 void GpuWatchdogThread::OnResume() {
362 suspended_ = false;
364 // After resuming jump-start the watchdog again.
365 armed_ = false;
366 OnCheck(true);
369 #if defined(OS_WIN)
370 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() {
371 FILETIME creation_time;
372 FILETIME exit_time;
373 FILETIME user_time;
374 FILETIME kernel_time;
375 BOOL result = GetThreadTimes(watched_thread_handle_,
376 &creation_time,
377 &exit_time,
378 &kernel_time,
379 &user_time);
380 DCHECK(result);
382 ULARGE_INTEGER user_time64;
383 user_time64.HighPart = user_time.dwHighDateTime;
384 user_time64.LowPart = user_time.dwLowDateTime;
386 ULARGE_INTEGER kernel_time64;
387 kernel_time64.HighPart = kernel_time.dwHighDateTime;
388 kernel_time64.LowPart = kernel_time.dwLowDateTime;
390 // Time is reported in units of 100 nanoseconds. Kernel and user time are
391 // summed to deal with to kinds of hangs. One is where the GPU process is
392 // stuck in user level, never calling into the kernel and kernel time is
393 // not increasing. The other is where either the kernel hangs and never
394 // returns to user level or where user level code
395 // calls into kernel level repeatedly, giving up its quanta before it is
396 // tracked, for example a loop that repeatedly Sleeps.
397 return base::TimeDelta::FromMilliseconds(static_cast<int64>(
398 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
400 #endif
402 } // namespace content