1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
9 #include "content/gpu/gpu_watchdog_thread.h"
11 #include "base/bind.h"
12 #include "base/bind_helpers.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/file_util.h"
16 #include "base/power_monitor/power_monitor.h"
17 #include "base/process/process.h"
18 #include "build/build_config.h"
19 #include "content/public/common/content_switches.h"
20 #include "content/public/common/result_codes.h"
24 const int64 kCheckPeriodMs
= 2000;
25 #if defined(OS_CHROMEOS)
26 const base::FilePath::CharType
27 kTtyFilePath
[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
31 GpuWatchdogThread::GpuWatchdogThread(int timeout
)
32 : base::Thread("Watchdog"),
33 watched_message_loop_(base::MessageLoop::current()),
34 timeout_(base::TimeDelta::FromMilliseconds(timeout
)),
37 watched_thread_handle_(0),
46 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
47 // to identify another. DuplicateHandle creates a "real" handle that can be
48 // used for this purpose.
49 BOOL result
= DuplicateHandle(GetCurrentProcess(),
52 &watched_thread_handle_
,
53 THREAD_QUERY_INFORMATION
,
59 #if defined(OS_CHROMEOS)
60 tty_file_
= base::OpenFile(base::FilePath(kTtyFilePath
), "r");
62 watched_message_loop_
->AddTaskObserver(&task_observer_
);
65 void GpuWatchdogThread::PostAcknowledge() {
66 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
67 // the method factory. Rely on reference counting instead.
68 message_loop()->PostTask(
70 base::Bind(&GpuWatchdogThread::OnAcknowledge
, this));
73 void GpuWatchdogThread::CheckArmed() {
74 // Acknowledge the watchdog if it has armed itself. The watchdog will not
75 // change its armed state until it is acknowledged.
81 void GpuWatchdogThread::Init() {
82 // Schedule the first check.
86 void GpuWatchdogThread::CleanUp() {
87 weak_factory_
.InvalidateWeakPtrs();
90 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
91 GpuWatchdogThread
* watchdog
)
92 : watchdog_(watchdog
) {
95 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
98 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
99 const base::PendingTask
& pending_task
) {
100 watchdog_
->CheckArmed();
103 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
104 const base::PendingTask
& pending_task
) {
105 watchdog_
->CheckArmed();
108 GpuWatchdogThread::~GpuWatchdogThread() {
109 // Verify that the thread was explicitly stopped. If the thread is stopped
110 // implicitly by the destructor, CleanUp() will not be called.
111 DCHECK(!weak_factory_
.HasWeakPtrs());
114 CloseHandle(watched_thread_handle_
);
117 base::PowerMonitor
* power_monitor
= base::PowerMonitor::Get();
119 power_monitor
->RemoveObserver(this);
121 #if defined(OS_CHROMEOS)
126 watched_message_loop_
->RemoveTaskObserver(&task_observer_
);
129 void GpuWatchdogThread::OnAcknowledge() {
130 CHECK(base::PlatformThread::CurrentId() == thread_id());
132 // The check has already been acknowledged and another has already been
133 // scheduled by a previous call to OnAcknowledge. It is normal for a
134 // watched thread to see armed_ being true multiple times before
135 // the OnAcknowledge task is run on the watchdog thread.
139 // Revoke any pending hang termination.
140 weak_factory_
.InvalidateWeakPtrs();
146 // If it took a long time for the acknowledgement, assume the computer was
147 // recently suspended.
148 bool was_suspended
= (base::Time::Now() > suspension_timeout_
);
150 // The monitored thread has responded. Post a task to check it again.
151 message_loop()->PostDelayedTask(
153 base::Bind(&GpuWatchdogThread::OnCheck
, weak_factory_
.GetWeakPtr(),
155 base::TimeDelta::FromMilliseconds(kCheckPeriodMs
));
158 void GpuWatchdogThread::OnCheck(bool after_suspend
) {
159 CHECK(base::PlatformThread::CurrentId() == thread_id());
161 // Do not create any new termination tasks if one has already been created
162 // or the system is suspended.
163 if (armed_
|| suspended_
)
166 // Must set armed before posting the task. This task might be the only task
167 // that will activate the TaskObserver on the watched thread and it must not
168 // miss the false -> true transition.
172 arm_cpu_time_
= GetWatchedThreadTime();
175 // Immediately after the computer is woken up from being suspended it might
176 // be pretty sluggish, so allow some extra time before the next timeout.
177 base::TimeDelta timeout
= timeout_
* (after_suspend
? 3 : 1);
178 suspension_timeout_
= base::Time::Now() + timeout
* 2;
180 // Post a task to the monitored thread that does nothing but wake up the
181 // TaskObserver. Any other tasks that are pending on the watched thread will
182 // also wake up the observer. This simply ensures there is at least one.
183 watched_message_loop_
->PostTask(
185 base::Bind(&base::DoNothing
));
187 // Post a task to the watchdog thread to exit if the monitored thread does
188 // not respond in time.
189 message_loop()->PostDelayedTask(
192 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang
,
193 weak_factory_
.GetWeakPtr()),
197 // Use the --disable-gpu-watchdog command line switch to disable this.
198 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
199 // Should not get here while the system is suspended.
203 // Defer termination until a certain amount of CPU time has elapsed on the
205 base::TimeDelta time_since_arm
= GetWatchedThreadTime() - arm_cpu_time_
;
206 if (time_since_arm
< timeout_
) {
207 message_loop()->PostDelayedTask(
210 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang
,
211 weak_factory_
.GetWeakPtr()),
212 timeout_
- time_since_arm
);
217 // If the watchdog woke up significantly behind schedule, disarm and reset
218 // the watchdog check. This is to prevent the watchdog thread from terminating
219 // when a machine wakes up from sleep or hibernation, which would otherwise
220 // appear to be a hang.
221 if (base::Time::Now() > suspension_timeout_
) {
227 // For minimal developer annoyance, don't keep terminating. You need to skip
228 // the call to base::Process::Terminate below in a debugger for this to be
230 static bool terminated
= false;
235 if (IsDebuggerPresent())
239 #if defined(OS_CHROMEOS)
240 // Don't crash if we're not on tty1. This avoids noise in the GPU process
241 // crashes caused by people who use VT2 but still enable crash reporting.
242 char tty_string
[8] = {0};
244 !fseek(tty_file_
, 0, SEEK_SET
) &&
245 fread(tty_string
, 1, 7, tty_file_
)) {
247 int num_res
= sscanf(tty_string
, "tty%d", &tty_number
);
248 if (num_res
== 1 && tty_number
!= 1)
253 LOG(ERROR
) << "The GPU process hung. Terminating after "
254 << timeout_
.InMilliseconds() << " ms.";
256 // Deliberately crash the process to create a crash dump.
257 *((volatile int*)0) = 0x1337;
262 void GpuWatchdogThread::AddPowerObserver() {
263 message_loop()->PostTask(
265 base::Bind(&GpuWatchdogThread::OnAddPowerObserver
, this));
268 void GpuWatchdogThread::OnAddPowerObserver() {
269 base::PowerMonitor
* power_monitor
= base::PowerMonitor::Get();
270 DCHECK(power_monitor
);
271 power_monitor
->AddObserver(this);
274 void GpuWatchdogThread::OnSuspend() {
277 // When suspending force an acknowledgement to cancel any pending termination
282 void GpuWatchdogThread::OnResume() {
285 // After resuming jump-start the watchdog again.
291 base::TimeDelta
GpuWatchdogThread::GetWatchedThreadTime() {
292 FILETIME creation_time
;
295 FILETIME kernel_time
;
296 BOOL result
= GetThreadTimes(watched_thread_handle_
,
303 ULARGE_INTEGER user_time64
;
304 user_time64
.HighPart
= user_time
.dwHighDateTime
;
305 user_time64
.LowPart
= user_time
.dwLowDateTime
;
307 ULARGE_INTEGER kernel_time64
;
308 kernel_time64
.HighPart
= kernel_time
.dwHighDateTime
;
309 kernel_time64
.LowPart
= kernel_time
.dwLowDateTime
;
311 // Time is reported in units of 100 nanoseconds. Kernel and user time are
312 // summed to deal with to kinds of hangs. One is where the GPU process is
313 // stuck in user level, never calling into the kernel and kernel time is
314 // not increasing. The other is where either the kernel hangs and never
315 // returns to user level or where user level code
316 // calls into kernel level repeatedly, giving up its quanta before it is
317 // tracked, for example a loop that repeatedly Sleeps.
318 return base::TimeDelta::FromMilliseconds(static_cast<int64
>(
319 (user_time64
.QuadPart
+ kernel_time64
.QuadPart
) / 10000));
323 } // namespace content