Add an UMA stat to be able to see if the User pods are show on start screen,
[chromium-blink-merge.git] / content / gpu / gpu_watchdog_thread.cc
blob6a6f45924a015846847131306e9ddda9ddff6eb3
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #if defined(OS_WIN)
6 #include <windows.h>
7 #endif
9 #include "content/gpu/gpu_watchdog_thread.h"
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/files/file_util.h"
16 #include "base/power_monitor/power_monitor.h"
17 #include "base/process/process.h"
18 #include "build/build_config.h"
19 #include "content/public/common/content_switches.h"
20 #include "content/public/common/result_codes.h"
22 namespace content {
23 namespace {
24 const int64 kCheckPeriodMs = 2000;
25 #if defined(OS_CHROMEOS)
26 const base::FilePath::CharType
27 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
28 #endif
29 #if defined(USE_X11)
30 const unsigned char text[20] = "check";
31 #endif
32 } // namespace
34 GpuWatchdogThread::GpuWatchdogThread(int timeout)
35 : base::Thread("Watchdog"),
36 watched_message_loop_(base::MessageLoop::current()),
37 timeout_(base::TimeDelta::FromMilliseconds(timeout)),
38 armed_(false),
39 #if defined(OS_WIN)
40 watched_thread_handle_(0),
41 arm_cpu_time_(),
42 #endif
43 task_observer_(this),
44 suspended_(false),
45 #if defined(USE_X11)
46 display_(NULL),
47 window_(0),
48 atom_(None),
49 #endif
50 weak_factory_(this) {
51 DCHECK(timeout >= 0);
53 #if defined(OS_WIN)
54 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
55 // to identify another. DuplicateHandle creates a "real" handle that can be
56 // used for this purpose.
57 BOOL result = DuplicateHandle(GetCurrentProcess(),
58 GetCurrentThread(),
59 GetCurrentProcess(),
60 &watched_thread_handle_,
61 THREAD_QUERY_INFORMATION,
62 FALSE,
63 0);
64 DCHECK(result);
65 #endif
67 #if defined(OS_CHROMEOS)
68 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r");
69 #endif
70 #if defined(USE_X11)
71 SetupXServer();
72 #endif
73 watched_message_loop_->AddTaskObserver(&task_observer_);
76 void GpuWatchdogThread::PostAcknowledge() {
77 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
78 // the method factory. Rely on reference counting instead.
79 message_loop()->PostTask(
80 FROM_HERE,
81 base::Bind(&GpuWatchdogThread::OnAcknowledge, this));
84 void GpuWatchdogThread::CheckArmed() {
85 // Acknowledge the watchdog if it has armed itself. The watchdog will not
86 // change its armed state until it is acknowledged.
87 if (armed()) {
88 PostAcknowledge();
92 void GpuWatchdogThread::Init() {
93 // Schedule the first check.
94 OnCheck(false);
97 void GpuWatchdogThread::CleanUp() {
98 weak_factory_.InvalidateWeakPtrs();
101 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
102 GpuWatchdogThread* watchdog)
103 : watchdog_(watchdog) {
106 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
109 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
110 const base::PendingTask& pending_task) {
111 watchdog_->CheckArmed();
114 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
115 const base::PendingTask& pending_task) {
116 watchdog_->CheckArmed();
119 GpuWatchdogThread::~GpuWatchdogThread() {
120 // Verify that the thread was explicitly stopped. If the thread is stopped
121 // implicitly by the destructor, CleanUp() will not be called.
122 DCHECK(!weak_factory_.HasWeakPtrs());
124 #if defined(OS_WIN)
125 CloseHandle(watched_thread_handle_);
126 #endif
128 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
129 if (power_monitor)
130 power_monitor->RemoveObserver(this);
132 #if defined(OS_CHROMEOS)
133 if (tty_file_)
134 fclose(tty_file_);
135 #endif
137 #if defined(USE_X11)
138 XDestroyWindow(display_, window_);
139 XCloseDisplay(display_);
140 #endif
142 watched_message_loop_->RemoveTaskObserver(&task_observer_);
145 void GpuWatchdogThread::OnAcknowledge() {
146 CHECK(base::PlatformThread::CurrentId() == thread_id());
148 // The check has already been acknowledged and another has already been
149 // scheduled by a previous call to OnAcknowledge. It is normal for a
150 // watched thread to see armed_ being true multiple times before
151 // the OnAcknowledge task is run on the watchdog thread.
152 if (!armed_)
153 return;
155 // Revoke any pending hang termination.
156 weak_factory_.InvalidateWeakPtrs();
157 armed_ = false;
159 if (suspended_)
160 return;
162 // If it took a long time for the acknowledgement, assume the computer was
163 // recently suspended.
164 bool was_suspended = (base::Time::Now() > suspension_timeout_);
166 // The monitored thread has responded. Post a task to check it again.
167 message_loop()->PostDelayedTask(
168 FROM_HERE,
169 base::Bind(&GpuWatchdogThread::OnCheck, weak_factory_.GetWeakPtr(),
170 was_suspended),
171 base::TimeDelta::FromMilliseconds(kCheckPeriodMs));
174 void GpuWatchdogThread::OnCheck(bool after_suspend) {
175 CHECK(base::PlatformThread::CurrentId() == thread_id());
177 // Do not create any new termination tasks if one has already been created
178 // or the system is suspended.
179 if (armed_ || suspended_)
180 return;
182 // Must set armed before posting the task. This task might be the only task
183 // that will activate the TaskObserver on the watched thread and it must not
184 // miss the false -> true transition.
185 armed_ = true;
187 #if defined(OS_WIN)
188 arm_cpu_time_ = GetWatchedThreadTime();
189 #endif
191 // Immediately after the computer is woken up from being suspended it might
192 // be pretty sluggish, so allow some extra time before the next timeout.
193 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
194 suspension_timeout_ = base::Time::Now() + timeout * 2;
196 // Post a task to the monitored thread that does nothing but wake up the
197 // TaskObserver. Any other tasks that are pending on the watched thread will
198 // also wake up the observer. This simply ensures there is at least one.
199 watched_message_loop_->PostTask(
200 FROM_HERE,
201 base::Bind(&base::DoNothing));
203 // Post a task to the watchdog thread to exit if the monitored thread does
204 // not respond in time.
205 message_loop()->PostDelayedTask(
206 FROM_HERE,
207 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
208 weak_factory_.GetWeakPtr()),
209 timeout);
212 // Use the --disable-gpu-watchdog command line switch to disable this.
213 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
214 // Should not get here while the system is suspended.
215 DCHECK(!suspended_);
217 #if defined(OS_WIN)
218 // Defer termination until a certain amount of CPU time has elapsed on the
219 // watched thread.
220 base::TimeDelta time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
221 if (time_since_arm < timeout_) {
222 message_loop()->PostDelayedTask(
223 FROM_HERE,
224 base::Bind(
225 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
226 weak_factory_.GetWeakPtr()),
227 timeout_ - time_since_arm);
228 return;
230 #endif
232 // If the watchdog woke up significantly behind schedule, disarm and reset
233 // the watchdog check. This is to prevent the watchdog thread from terminating
234 // when a machine wakes up from sleep or hibernation, which would otherwise
235 // appear to be a hang.
236 if (base::Time::Now() > suspension_timeout_) {
237 armed_ = false;
238 OnCheck(true);
239 return;
242 #if defined(USE_X11)
243 XWindowAttributes attributes;
244 XGetWindowAttributes(display_, window_, &attributes);
246 XSelectInput(display_, window_, PropertyChangeMask);
247 SetupXChangeProp();
249 XFlush(display_);
251 // We wait for the property change event with a timeout. If it arrives we know
252 // that X is responsive and is not the cause of the watchdog trigger, so we
253 // should
254 // terminate. If it times out, it may be due to X taking a long time, but
255 // terminating won't help, so ignore the watchdog trigger.
256 XEvent event_return;
257 base::TimeTicks deadline = base::TimeTicks::Now() + timeout_;
258 while (true) {
259 base::TimeDelta delta = deadline - base::TimeTicks::Now();
260 if (delta < base::TimeDelta()) {
261 return;
262 } else {
263 while (XCheckWindowEvent(display_, window_, PropertyChangeMask,
264 &event_return)) {
265 if (MatchXEventAtom(&event_return))
266 break;
268 struct pollfd fds[1];
269 fds[0].fd = XConnectionNumber(display_);
270 fds[0].events = POLLIN;
271 int status = poll(fds, 1, delta.InMilliseconds());
272 if (status == -1) {
273 if (errno == EINTR) {
274 continue;
275 } else {
276 LOG(FATAL) << "Lost X connection, aborting.";
277 break;
279 } else if (status == 0) {
280 return;
281 } else {
282 continue;
286 #endif
288 // For minimal developer annoyance, don't keep terminating. You need to skip
289 // the call to base::Process::Terminate below in a debugger for this to be
290 // useful.
291 static bool terminated = false;
292 if (terminated)
293 return;
295 #if defined(OS_WIN)
296 if (IsDebuggerPresent())
297 return;
298 #endif
300 #if defined(OS_CHROMEOS)
301 // Don't crash if we're not on tty1. This avoids noise in the GPU process
302 // crashes caused by people who use VT2 but still enable crash reporting.
303 char tty_string[8] = {0};
304 if (tty_file_ &&
305 !fseek(tty_file_, 0, SEEK_SET) &&
306 fread(tty_string, 1, 7, tty_file_)) {
307 int tty_number = -1;
308 int num_res = sscanf(tty_string, "tty%d", &tty_number);
309 if (num_res == 1 && tty_number != 1)
310 return;
312 #endif
314 LOG(ERROR) << "The GPU process hung. Terminating after "
315 << timeout_.InMilliseconds() << " ms.";
317 // Deliberately crash the process to create a crash dump.
318 *((volatile int*)0) = 0x1337;
320 terminated = true;
323 #if defined(USE_X11)
324 void GpuWatchdogThread::SetupXServer() {
325 display_ = XOpenDisplay(NULL);
326 window_ = XCreateWindow(display_, DefaultRootWindow(display_), 0, 0, 1, 1, 0,
327 CopyFromParent, InputOutput, CopyFromParent, 0, NULL);
328 atom_ = XInternAtom(display_, "CHECK", False);
331 void GpuWatchdogThread::SetupXChangeProp() {
332 XChangeProperty(display_, window_, atom_, XA_STRING, 8, PropModeReplace, text,
333 (arraysize(text) - 1));
336 bool GpuWatchdogThread::MatchXEventAtom(XEvent* event) {
337 if (event->xproperty.window == window_ && event->type == PropertyNotify &&
338 event->xproperty.atom == atom_)
339 return true;
341 return false;
344 #endif
345 void GpuWatchdogThread::AddPowerObserver() {
346 message_loop()->PostTask(
347 FROM_HERE,
348 base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this));
351 void GpuWatchdogThread::OnAddPowerObserver() {
352 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
353 DCHECK(power_monitor);
354 power_monitor->AddObserver(this);
357 void GpuWatchdogThread::OnSuspend() {
358 suspended_ = true;
360 // When suspending force an acknowledgement to cancel any pending termination
361 // tasks.
362 OnAcknowledge();
365 void GpuWatchdogThread::OnResume() {
366 suspended_ = false;
368 // After resuming jump-start the watchdog again.
369 armed_ = false;
370 OnCheck(true);
373 #if defined(OS_WIN)
374 base::TimeDelta GpuWatchdogThread::GetWatchedThreadTime() {
375 FILETIME creation_time;
376 FILETIME exit_time;
377 FILETIME user_time;
378 FILETIME kernel_time;
379 BOOL result = GetThreadTimes(watched_thread_handle_,
380 &creation_time,
381 &exit_time,
382 &kernel_time,
383 &user_time);
384 DCHECK(result);
386 ULARGE_INTEGER user_time64;
387 user_time64.HighPart = user_time.dwHighDateTime;
388 user_time64.LowPart = user_time.dwLowDateTime;
390 ULARGE_INTEGER kernel_time64;
391 kernel_time64.HighPart = kernel_time.dwHighDateTime;
392 kernel_time64.LowPart = kernel_time.dwLowDateTime;
394 // Time is reported in units of 100 nanoseconds. Kernel and user time are
395 // summed to deal with to kinds of hangs. One is where the GPU process is
396 // stuck in user level, never calling into the kernel and kernel time is
397 // not increasing. The other is where either the kernel hangs and never
398 // returns to user level or where user level code
399 // calls into kernel level repeatedly, giving up its quanta before it is
400 // tracked, for example a loop that repeatedly Sleeps.
401 return base::TimeDelta::FromMilliseconds(static_cast<int64>(
402 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
404 #endif
406 } // namespace content