Adding instrumentation to locate the source of jankiness
[chromium-blink-merge.git] / chrome / browser / metrics / thread_watcher.cc
blob709ad2b000f2a2a87514483dbdc8f9617f63a95e
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/metrics/thread_watcher.h"
7 #include <math.h> // ceil
9 #include "base/bind.h"
10 #include "base/compiler_specific.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/debugger.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/lazy_instance.h"
15 #include "base/metrics/field_trial.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/string_tokenizer.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/threading/thread_restrictions.h"
21 #include "build/build_config.h"
22 #include "chrome/browser/chrome_notification_types.h"
23 #include "chrome/common/chrome_switches.h"
24 #include "chrome/common/chrome_version_info.h"
25 #include "chrome/common/logging_chrome.h"
26 #include "content/public/browser/notification_service.h"
28 #if defined(OS_WIN)
29 #include "base/win/windows_version.h"
30 #endif
32 using content::BrowserThread;
34 namespace {
36 // The following are unique function names for forcing the crash when a thread
37 // is unresponsive. This makes it possible to tell from the callstack alone what
38 // thread was unresponsive.
40 // We disable optimizations for this block of functions so the compiler doesn't
41 // merge them all together.
42 MSVC_DISABLE_OPTIMIZE()
43 MSVC_PUSH_DISABLE_WARNING(4748)
45 void ReportThreadHang() {
46 #if defined(NDEBUG)
47 base::debug::DumpWithoutCrashing();
48 #else
49 base::debug::BreakDebugger();
50 #endif
53 #if !defined(OS_ANDROID) || !defined(NDEBUG)
54 // TODO(rtenneti): Enabled crashing, after getting data.
55 NOINLINE void StartupHang() {
56 ReportThreadHang();
58 #endif // OS_ANDROID
60 NOINLINE void ShutdownHang() {
61 ReportThreadHang();
64 NOINLINE void ThreadUnresponsive_UI() {
65 ReportThreadHang();
68 NOINLINE void ThreadUnresponsive_DB() {
69 ReportThreadHang();
72 NOINLINE void ThreadUnresponsive_FILE() {
73 ReportThreadHang();
76 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
77 ReportThreadHang();
80 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
81 ReportThreadHang();
84 NOINLINE void ThreadUnresponsive_CACHE() {
85 ReportThreadHang();
88 NOINLINE void ThreadUnresponsive_IO() {
89 ReportThreadHang();
92 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
93 base::debug::Alias(&thread_id);
95 switch (thread_id) {
96 case BrowserThread::UI:
97 return ThreadUnresponsive_UI();
98 case BrowserThread::DB:
99 return ThreadUnresponsive_DB();
100 case BrowserThread::FILE:
101 return ThreadUnresponsive_FILE();
102 case BrowserThread::FILE_USER_BLOCKING:
103 return ThreadUnresponsive_FILE_USER_BLOCKING();
104 case BrowserThread::PROCESS_LAUNCHER:
105 return ThreadUnresponsive_PROCESS_LAUNCHER();
106 case BrowserThread::CACHE:
107 return ThreadUnresponsive_CACHE();
108 case BrowserThread::IO:
109 return ThreadUnresponsive_IO();
110 case BrowserThread::ID_COUNT:
111 CHECK(false); // This shouldn't actually be reached!
112 break;
114 // Omission of the default hander is intentional -- that way the compiler
115 // should warn if our switch becomes outdated.
118 CHECK(false) << "Unknown thread was unresponsive."; // Shouldn't be reached.
121 MSVC_POP_WARNING()
122 MSVC_ENABLE_OPTIMIZE();
124 } // namespace
126 // ThreadWatcher methods and members.
127 ThreadWatcher::ThreadWatcher(const WatchingParams& params)
128 : thread_id_(params.thread_id),
129 thread_name_(params.thread_name),
130 watched_loop_(
131 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
132 sleep_time_(params.sleep_time),
133 unresponsive_time_(params.unresponsive_time),
134 ping_time_(base::TimeTicks::Now()),
135 pong_time_(ping_time_),
136 ping_sequence_number_(0),
137 active_(false),
138 ping_count_(params.unresponsive_threshold),
139 response_time_histogram_(NULL),
140 unresponsive_time_histogram_(NULL),
141 unresponsive_count_(0),
142 hung_processing_complete_(false),
143 unresponsive_threshold_(params.unresponsive_threshold),
144 crash_on_hang_(params.crash_on_hang),
145 live_threads_threshold_(params.live_threads_threshold),
146 weak_ptr_factory_(this) {
147 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
148 Initialize();
151 ThreadWatcher::~ThreadWatcher() {}
153 // static
154 void ThreadWatcher::StartWatching(const WatchingParams& params) {
155 DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
156 DCHECK_GE(params.unresponsive_time.InMilliseconds(),
157 params.sleep_time.InMilliseconds());
159 // If we are not on WatchDogThread, then post a task to call StartWatching on
160 // WatchDogThread.
161 if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
162 WatchDogThread::PostTask(
163 FROM_HERE,
164 base::Bind(&ThreadWatcher::StartWatching, params));
165 return;
168 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
170 // Create a new thread watcher object for the given thread and activate it.
171 ThreadWatcher* watcher = new ThreadWatcher(params);
173 DCHECK(watcher);
174 // If we couldn't register the thread watcher object, we are shutting down,
175 // then don't activate thread watching.
176 if (!ThreadWatcherList::IsRegistered(params.thread_id))
177 return;
178 watcher->ActivateThreadWatching();
181 void ThreadWatcher::ActivateThreadWatching() {
182 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
183 if (active_) return;
184 active_ = true;
185 ping_count_ = unresponsive_threshold_;
186 ResetHangCounters();
187 base::MessageLoop::current()->PostTask(
188 FROM_HERE,
189 base::Bind(&ThreadWatcher::PostPingMessage,
190 weak_ptr_factory_.GetWeakPtr()));
193 void ThreadWatcher::DeActivateThreadWatching() {
194 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
195 active_ = false;
196 ping_count_ = 0;
197 weak_ptr_factory_.InvalidateWeakPtrs();
200 void ThreadWatcher::WakeUp() {
201 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
202 // There is some user activity, PostPingMessage task of thread watcher if
203 // needed.
204 if (!active_) return;
206 // Throw away the previous |unresponsive_count_| and start over again. Just
207 // before going to sleep, |unresponsive_count_| could be very close to
208 // |unresponsive_threshold_| and when user becomes active,
209 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
210 // response for ping messages. Reset |unresponsive_count_| to start measuring
211 // the unresponsiveness of the threads when system becomes active.
212 unresponsive_count_ = 0;
214 if (ping_count_ <= 0) {
215 ping_count_ = unresponsive_threshold_;
216 ResetHangCounters();
217 PostPingMessage();
218 } else {
219 ping_count_ = unresponsive_threshold_;
223 void ThreadWatcher::PostPingMessage() {
224 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
225 // If we have stopped watching or if the user is idle, then stop sending
226 // ping messages.
227 if (!active_ || ping_count_ <= 0)
228 return;
230 // Save the current time when we have sent ping message.
231 ping_time_ = base::TimeTicks::Now();
233 // Send a ping message to the watched thread. Callback will be called on
234 // the WatchDogThread.
235 base::Closure callback(
236 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
237 ping_sequence_number_));
238 if (watched_loop_->PostTask(
239 FROM_HERE,
240 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
241 callback))) {
242 // Post a task to check the responsiveness of watched thread.
243 base::MessageLoop::current()->PostDelayedTask(
244 FROM_HERE,
245 base::Bind(&ThreadWatcher::OnCheckResponsiveness,
246 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
247 unresponsive_time_);
248 } else {
249 // Watched thread might have gone away, stop watching it.
250 DeActivateThreadWatching();
254 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
255 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
257 // Record watched thread's response time.
258 base::TimeTicks now = base::TimeTicks::Now();
259 base::TimeDelta response_time = now - ping_time_;
260 response_time_histogram_->AddTime(response_time);
262 // Save the current time when we have got pong message.
263 pong_time_ = now;
265 // Check if there are any extra pings in flight.
266 DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
267 if (ping_sequence_number_ != ping_sequence_number)
268 return;
270 // Increment sequence number for the next ping message to indicate watched
271 // thread is responsive.
272 ++ping_sequence_number_;
274 // If we have stopped watching or if the user is idle, then stop sending
275 // ping messages.
276 if (!active_ || --ping_count_ <= 0)
277 return;
279 base::MessageLoop::current()->PostDelayedTask(
280 FROM_HERE,
281 base::Bind(&ThreadWatcher::PostPingMessage,
282 weak_ptr_factory_.GetWeakPtr()),
283 sleep_time_);
286 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
287 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
288 // If we have stopped watching then consider thread as responding.
289 if (!active_) {
290 responsive_ = true;
291 return;
293 // If the latest ping_sequence_number_ is not same as the ping_sequence_number
294 // that is passed in, then we can assume OnPongMessage was called.
295 // OnPongMessage increments ping_sequence_number_.
296 if (ping_sequence_number_ != ping_sequence_number) {
297 // Reset unresponsive_count_ to zero because we got a response from the
298 // watched thread.
299 ResetHangCounters();
301 responsive_ = true;
302 return;
304 // Record that we got no response from watched thread.
305 GotNoResponse();
307 // Post a task to check the responsiveness of watched thread.
308 base::MessageLoop::current()->PostDelayedTask(
309 FROM_HERE,
310 base::Bind(&ThreadWatcher::OnCheckResponsiveness,
311 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
312 unresponsive_time_);
313 responsive_ = false;
316 void ThreadWatcher::Initialize() {
317 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
318 ThreadWatcherList::Register(this);
320 const std::string response_time_histogram_name =
321 "ThreadWatcher.ResponseTime." + thread_name_;
322 response_time_histogram_ = base::Histogram::FactoryTimeGet(
323 response_time_histogram_name,
324 base::TimeDelta::FromMilliseconds(1),
325 base::TimeDelta::FromSeconds(100), 50,
326 base::Histogram::kUmaTargetedHistogramFlag);
328 const std::string unresponsive_time_histogram_name =
329 "ThreadWatcher.Unresponsive." + thread_name_;
330 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
331 unresponsive_time_histogram_name,
332 base::TimeDelta::FromMilliseconds(1),
333 base::TimeDelta::FromSeconds(100), 50,
334 base::Histogram::kUmaTargetedHistogramFlag);
336 const std::string responsive_count_histogram_name =
337 "ThreadWatcher.ResponsiveThreads." + thread_name_;
338 responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
339 responsive_count_histogram_name, 1, 10, 11,
340 base::Histogram::kUmaTargetedHistogramFlag);
342 const std::string unresponsive_count_histogram_name =
343 "ThreadWatcher.UnresponsiveThreads." + thread_name_;
344 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
345 unresponsive_count_histogram_name, 1, 10, 11,
346 base::Histogram::kUmaTargetedHistogramFlag);
349 // static
350 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
351 const base::Closure& callback_task) {
352 // This method is called on watched thread.
353 DCHECK(BrowserThread::CurrentlyOn(thread_id));
354 WatchDogThread::PostTask(FROM_HERE, callback_task);
357 void ThreadWatcher::ResetHangCounters() {
358 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
359 unresponsive_count_ = 0;
360 hung_processing_complete_ = false;
363 void ThreadWatcher::GotNoResponse() {
364 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
366 ++unresponsive_count_;
367 if (!IsVeryUnresponsive())
368 return;
370 // Record total unresponsive_time since last pong message.
371 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
372 unresponsive_time_histogram_->AddTime(unresponse_time);
374 // We have already collected stats for the non-responding watched thread.
375 if (hung_processing_complete_)
376 return;
378 // Record how other threads are responding.
379 uint32 responding_thread_count = 0;
380 uint32 unresponding_thread_count = 0;
381 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
382 &unresponding_thread_count);
384 // Record how many watched threads are responding.
385 responsive_count_histogram_->Add(responding_thread_count);
387 // Record how many watched threads are not responding.
388 unresponsive_count_histogram_->Add(unresponding_thread_count);
390 // Crash the browser if the watched thread is to be crashed on hang and if the
391 // number of other threads responding is less than or equal to
392 // live_threads_threshold_ and at least one other thread is responding.
393 if (crash_on_hang_ &&
394 responding_thread_count > 0 &&
395 responding_thread_count <= live_threads_threshold_) {
396 static bool crashed_once = false;
397 if (!crashed_once) {
398 crashed_once = true;
399 CrashBecauseThreadWasUnresponsive(thread_id_);
403 hung_processing_complete_ = true;
406 bool ThreadWatcher::IsVeryUnresponsive() {
407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
408 return unresponsive_count_ >= unresponsive_threshold_;
411 // ThreadWatcherList methods and members.
413 // static
414 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
415 // static
416 bool ThreadWatcherList::g_stopped_ = false;
417 // static
418 const int ThreadWatcherList::kSleepSeconds = 1;
419 // static
420 const int ThreadWatcherList::kUnresponsiveSeconds = 2;
421 // static
422 const int ThreadWatcherList::kUnresponsiveCount = 9;
423 // static
424 const int ThreadWatcherList::kLiveThreadsThreshold = 2;
425 // static, non-const for tests.
426 int ThreadWatcherList::g_initialize_delay_seconds = 120;
428 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
429 uint32 live_threads_threshold,
430 uint32 unresponsive_threshold)
431 : live_threads_threshold(live_threads_threshold),
432 unresponsive_threshold(unresponsive_threshold) {
435 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
436 : live_threads_threshold(kLiveThreadsThreshold),
437 unresponsive_threshold(kUnresponsiveCount) {
440 // static
441 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
442 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
443 uint32 unresponsive_threshold;
444 CrashOnHangThreadMap crash_on_hang_threads;
445 ParseCommandLine(command_line,
446 &unresponsive_threshold,
447 &crash_on_hang_threads);
449 ThreadWatcherObserver::SetupNotifications(
450 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
452 WatchDogThread::PostTask(
453 FROM_HERE,
454 base::Bind(&ThreadWatcherList::SetStopped, false));
456 if (!WatchDogThread::PostDelayedTask(
457 FROM_HERE,
458 base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
459 unresponsive_threshold,
460 crash_on_hang_threads),
461 base::TimeDelta::FromSeconds(g_initialize_delay_seconds))) {
462 // Disarm() the startup timebomb, if we couldn't post the task to start the
463 // ThreadWatcher (becasue WatchDog thread is not running).
464 StartupTimeBomb::DisarmStartupTimeBomb();
468 // static
469 void ThreadWatcherList::StopWatchingAll() {
470 // TODO(rtenneti): Enable ThreadWatcher.
471 ThreadWatcherObserver::RemoveNotifications();
472 DeleteAll();
475 // static
476 void ThreadWatcherList::Register(ThreadWatcher* watcher) {
477 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
478 if (!g_thread_watcher_list_)
479 return;
480 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
481 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
484 // static
485 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
486 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
487 return NULL != ThreadWatcherList::Find(thread_id);
490 // static
491 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
492 uint32* unresponding_thread_count) {
493 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
494 *responding_thread_count = 0;
495 *unresponding_thread_count = 0;
496 if (!g_thread_watcher_list_)
497 return;
499 for (RegistrationList::iterator it =
500 g_thread_watcher_list_->registered_.begin();
501 g_thread_watcher_list_->registered_.end() != it;
502 ++it) {
503 if (it->second->IsVeryUnresponsive())
504 ++(*unresponding_thread_count);
505 else
506 ++(*responding_thread_count);
510 // static
511 void ThreadWatcherList::WakeUpAll() {
512 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
513 if (!g_thread_watcher_list_)
514 return;
516 for (RegistrationList::iterator it =
517 g_thread_watcher_list_->registered_.begin();
518 g_thread_watcher_list_->registered_.end() != it;
519 ++it)
520 it->second->WakeUp();
523 ThreadWatcherList::ThreadWatcherList() {
524 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
525 CHECK(!g_thread_watcher_list_);
526 g_thread_watcher_list_ = this;
529 ThreadWatcherList::~ThreadWatcherList() {
530 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
531 DCHECK(this == g_thread_watcher_list_);
532 g_thread_watcher_list_ = NULL;
535 // static
536 void ThreadWatcherList::ParseCommandLine(
537 const CommandLine& command_line,
538 uint32* unresponsive_threshold,
539 CrashOnHangThreadMap* crash_on_hang_threads) {
540 // Initialize |unresponsive_threshold| to a default value.
541 // TODO(rtenneti): Changed the default value to 4 times, until we can triage
542 // hangs automatically (and to reduce the crash dumps).
543 *unresponsive_threshold = kUnresponsiveCount * 4;
545 // Increase the unresponsive_threshold on the Stable and Beta channels to
546 // reduce the number of crashes due to ThreadWatcher.
547 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
548 if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
549 *unresponsive_threshold *= 4;
550 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
551 *unresponsive_threshold *= 2;
554 #if defined(OS_WIN)
555 // For Windows XP (old systems), double the unresponsive_threshold to give
556 // the OS a chance to schedule UI/IO threads a time slice to respond with a
557 // pong message (to get around limitations with the OS).
558 if (base::win::GetVersion() <= base::win::VERSION_XP)
559 *unresponsive_threshold *= 2;
560 #endif
562 uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
563 std::string crash_on_hang_thread_names;
564 bool has_command_line_overwrite = false;
565 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
566 crash_on_hang_thread_names =
567 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
568 has_command_line_overwrite = true;
569 } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
570 // Default to crashing the browser if UI or IO or FILE threads are not
571 // responsive except in stable channel.
572 crash_on_hang_thread_names = base::StringPrintf(
573 "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
574 kLiveThreadsThreshold, crash_seconds,
575 kLiveThreadsThreshold, crash_seconds,
576 kLiveThreadsThreshold, crash_seconds * 5);
579 ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
580 kLiveThreadsThreshold,
581 crash_seconds,
582 crash_on_hang_threads);
584 if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
585 has_command_line_overwrite) {
586 return;
589 const char* kFieldTrialName = "ThreadWatcher";
591 // Nothing else to be done if the trial has already been set (i.e., when
592 // StartWatchingAll() has been already called once).
593 if (base::FieldTrialList::TrialExists(kFieldTrialName))
594 return;
596 // Set up a field trial for 100% of the users to crash if either UI or IO
597 // thread is not responsive for 30 seconds (or 15 pings).
598 scoped_refptr<base::FieldTrial> field_trial(
599 base::FieldTrialList::FactoryGetFieldTrial(
600 kFieldTrialName, 100, "default_hung_threads",
601 2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL));
602 int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
603 if (field_trial->group() == hung_thread_group) {
604 for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
605 crash_on_hang_threads->end() != it;
606 ++it) {
607 if (it->first == "FILE")
608 continue;
609 it->second.live_threads_threshold = INT_MAX;
610 if (it->first == "UI") {
611 // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch
612 // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce
613 // it to a more reasonable time ala IO thread.
614 it->second.unresponsive_threshold = 60;
615 } else {
616 it->second.unresponsive_threshold = 15;
622 // static
623 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
624 const std::string& crash_on_hang_thread_names,
625 uint32 default_live_threads_threshold,
626 uint32 default_crash_seconds,
627 CrashOnHangThreadMap* crash_on_hang_threads) {
628 base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
629 std::vector<std::string> values;
630 while (tokens.GetNext()) {
631 const std::string& token = tokens.token();
632 base::SplitString(token, ':', &values);
633 std::string thread_name = values[0];
635 uint32 live_threads_threshold = default_live_threads_threshold;
636 uint32 crash_seconds = default_crash_seconds;
637 if (values.size() >= 2 &&
638 (!base::StringToUint(values[1], &live_threads_threshold))) {
639 continue;
641 if (values.size() >= 3 &&
642 (!base::StringToUint(values[2], &crash_seconds))) {
643 continue;
645 uint32 unresponsive_threshold = static_cast<uint32>(
646 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
648 CrashDataThresholds crash_data(live_threads_threshold,
649 unresponsive_threshold);
650 // Use the last specifier.
651 (*crash_on_hang_threads)[thread_name] = crash_data;
655 // static
656 void ThreadWatcherList::InitializeAndStartWatching(
657 uint32 unresponsive_threshold,
658 const CrashOnHangThreadMap& crash_on_hang_threads) {
659 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
661 // Disarm the startup timebomb, even if stop has been called.
662 BrowserThread::PostTask(
663 BrowserThread::UI,
664 FROM_HERE,
665 base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
667 // This method is deferred in relationship to its StopWatchingAll()
668 // counterpart. If a previous initialization has already happened, or if
669 // stop has been called, there's nothing left to do here.
670 if (g_thread_watcher_list_ || g_stopped_)
671 return;
673 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
674 CHECK(thread_watcher_list);
676 const base::TimeDelta kSleepTime =
677 base::TimeDelta::FromSeconds(kSleepSeconds);
678 const base::TimeDelta kUnresponsiveTime =
679 base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
681 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
682 unresponsive_threshold, crash_on_hang_threads);
683 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
684 unresponsive_threshold, crash_on_hang_threads);
685 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
686 unresponsive_threshold, crash_on_hang_threads);
687 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
688 unresponsive_threshold, crash_on_hang_threads);
689 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
690 unresponsive_threshold, crash_on_hang_threads);
693 // static
694 void ThreadWatcherList::StartWatching(
695 const BrowserThread::ID& thread_id,
696 const std::string& thread_name,
697 const base::TimeDelta& sleep_time,
698 const base::TimeDelta& unresponsive_time,
699 uint32 unresponsive_threshold,
700 const CrashOnHangThreadMap& crash_on_hang_threads) {
701 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
703 CrashOnHangThreadMap::const_iterator it =
704 crash_on_hang_threads.find(thread_name);
705 bool crash_on_hang = false;
706 uint32 live_threads_threshold = 0;
707 if (it != crash_on_hang_threads.end()) {
708 crash_on_hang = true;
709 live_threads_threshold = it->second.live_threads_threshold;
710 unresponsive_threshold = it->second.unresponsive_threshold;
713 ThreadWatcher::StartWatching(
714 ThreadWatcher::WatchingParams(thread_id,
715 thread_name,
716 sleep_time,
717 unresponsive_time,
718 unresponsive_threshold,
719 crash_on_hang,
720 live_threads_threshold));
723 // static
724 void ThreadWatcherList::DeleteAll() {
725 if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
726 WatchDogThread::PostTask(
727 FROM_HERE,
728 base::Bind(&ThreadWatcherList::DeleteAll));
729 return;
732 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
734 SetStopped(true);
736 if (!g_thread_watcher_list_)
737 return;
739 // Delete all thread watcher objects.
740 while (!g_thread_watcher_list_->registered_.empty()) {
741 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
742 delete it->second;
743 g_thread_watcher_list_->registered_.erase(it);
746 delete g_thread_watcher_list_;
749 // static
750 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
751 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
752 if (!g_thread_watcher_list_)
753 return NULL;
754 RegistrationList::iterator it =
755 g_thread_watcher_list_->registered_.find(thread_id);
756 if (g_thread_watcher_list_->registered_.end() == it)
757 return NULL;
758 return it->second;
761 // static
762 void ThreadWatcherList::SetStopped(bool stopped) {
763 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
764 g_stopped_ = stopped;
767 // ThreadWatcherObserver methods and members.
769 // static
770 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
772 ThreadWatcherObserver::ThreadWatcherObserver(
773 const base::TimeDelta& wakeup_interval)
774 : last_wakeup_time_(base::TimeTicks::Now()),
775 wakeup_interval_(wakeup_interval) {
776 CHECK(!g_thread_watcher_observer_);
777 g_thread_watcher_observer_ = this;
780 ThreadWatcherObserver::~ThreadWatcherObserver() {
781 DCHECK(this == g_thread_watcher_observer_);
782 g_thread_watcher_observer_ = NULL;
785 // static
786 void ThreadWatcherObserver::SetupNotifications(
787 const base::TimeDelta& wakeup_interval) {
788 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
789 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
790 observer->registrar_.Add(
791 observer,
792 chrome::NOTIFICATION_BROWSER_OPENED,
793 content::NotificationService::AllBrowserContextsAndSources());
794 observer->registrar_.Add(observer,
795 chrome::NOTIFICATION_BROWSER_CLOSED,
796 content::NotificationService::AllSources());
797 observer->registrar_.Add(observer,
798 chrome::NOTIFICATION_TAB_PARENTED,
799 content::NotificationService::AllSources());
800 observer->registrar_.Add(observer,
801 chrome::NOTIFICATION_TAB_CLOSING,
802 content::NotificationService::AllSources());
803 observer->registrar_.Add(observer,
804 content::NOTIFICATION_LOAD_START,
805 content::NotificationService::AllSources());
806 observer->registrar_.Add(observer,
807 content::NOTIFICATION_LOAD_STOP,
808 content::NotificationService::AllSources());
809 observer->registrar_.Add(observer,
810 content::NOTIFICATION_RENDERER_PROCESS_CLOSED,
811 content::NotificationService::AllSources());
812 observer->registrar_.Add(observer,
813 content::NOTIFICATION_RENDER_WIDGET_HOST_HANG,
814 content::NotificationService::AllSources());
815 observer->registrar_.Add(observer,
816 chrome::NOTIFICATION_OMNIBOX_OPENED_URL,
817 content::NotificationService::AllSources());
820 // static
821 void ThreadWatcherObserver::RemoveNotifications() {
822 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
823 if (!g_thread_watcher_observer_)
824 return;
825 g_thread_watcher_observer_->registrar_.RemoveAll();
826 delete g_thread_watcher_observer_;
829 void ThreadWatcherObserver::Observe(
830 int type,
831 const content::NotificationSource& source,
832 const content::NotificationDetails& details) {
833 // There is some user activity, see if thread watchers are to be awakened.
834 base::TimeTicks now = base::TimeTicks::Now();
835 if ((now - last_wakeup_time_) < wakeup_interval_)
836 return;
837 last_wakeup_time_ = now;
838 WatchDogThread::PostTask(
839 FROM_HERE,
840 base::Bind(&ThreadWatcherList::WakeUpAll));
843 // WatchDogThread methods and members.
845 // This lock protects g_watchdog_thread.
846 static base::LazyInstance<base::Lock>::Leaky
847 g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
849 // The singleton of this class.
850 static WatchDogThread* g_watchdog_thread = NULL;
852 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
855 WatchDogThread::~WatchDogThread() {
856 Stop();
859 // static
860 bool WatchDogThread::CurrentlyOnWatchDogThread() {
861 base::AutoLock lock(g_watchdog_lock.Get());
862 return g_watchdog_thread &&
863 g_watchdog_thread->message_loop() == base::MessageLoop::current();
866 // static
867 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
868 const base::Closure& task) {
869 return PostTaskHelper(from_here, task, base::TimeDelta());
872 // static
873 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
874 const base::Closure& task,
875 base::TimeDelta delay) {
876 return PostTaskHelper(from_here, task, delay);
879 // static
880 bool WatchDogThread::PostTaskHelper(
881 const tracked_objects::Location& from_here,
882 const base::Closure& task,
883 base::TimeDelta delay) {
885 base::AutoLock lock(g_watchdog_lock.Get());
887 base::MessageLoop* message_loop = g_watchdog_thread ?
888 g_watchdog_thread->message_loop() : NULL;
889 if (message_loop) {
890 message_loop->PostDelayedTask(from_here, task, delay);
891 return true;
895 return false;
898 void WatchDogThread::Init() {
899 // This thread shouldn't be allowed to perform any blocking disk I/O.
900 base::ThreadRestrictions::SetIOAllowed(false);
902 base::AutoLock lock(g_watchdog_lock.Get());
903 CHECK(!g_watchdog_thread);
904 g_watchdog_thread = this;
907 void WatchDogThread::CleanUp() {
908 base::AutoLock lock(g_watchdog_lock.Get());
909 g_watchdog_thread = NULL;
912 namespace {
914 // StartupWatchDogThread methods and members.
916 // Class for detecting hangs during startup.
917 class StartupWatchDogThread : public base::Watchdog {
918 public:
919 // Constructor specifies how long the StartupWatchDogThread will wait before
920 // alarming.
921 explicit StartupWatchDogThread(const base::TimeDelta& duration)
922 : base::Watchdog(duration, "Startup watchdog thread", true) {
923 #if defined(OS_ANDROID)
924 // TODO(rtenneti): Delete this code, after getting data.
925 start_time_clock_= base::Time::Now();
926 start_time_monotonic_ = base::TimeTicks::Now();
927 start_time_thread_now_ = base::TimeTicks::IsThreadNowSupported()
928 ? base::TimeTicks::ThreadNow() : base::TimeTicks::Now();
929 #endif // OS_ANDROID
932 // Alarm is called if the time expires after an Arm() without someone calling
933 // Disarm(). When Alarm goes off, in release mode we get the crash dump
934 // without crashing and in debug mode we break into the debugger.
935 virtual void Alarm() override {
936 #if !defined(NDEBUG)
937 StartupHang();
938 return;
939 #elif !defined(OS_ANDROID)
940 WatchDogThread::PostTask(FROM_HERE, base::Bind(&StartupHang));
941 return;
942 #else // Android release: gather stats to figure out when to crash.
943 // TODO(rtenneti): Delete this code, after getting data.
944 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeDuration",
945 base::Time::Now() - start_time_clock_);
946 UMA_HISTOGRAM_TIMES("StartupTimeBomb.Alarm.TimeTicksDuration",
947 base::TimeTicks::Now() - start_time_monotonic_);
948 if (base::TimeTicks::IsThreadNowSupported()) {
949 UMA_HISTOGRAM_TIMES(
950 "StartupTimeBomb.Alarm.ThreadNowDuration",
951 base::TimeTicks::ThreadNow() - start_time_thread_now_);
953 return;
954 #endif // OS_ANDROID
957 private:
958 #if defined(OS_ANDROID)
959 // TODO(rtenneti): Delete this code, after getting data.
960 base::Time start_time_clock_;
961 base::TimeTicks start_time_monotonic_;
962 base::TimeTicks start_time_thread_now_;
963 #endif // OS_ANDROID
965 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
968 // ShutdownWatchDogThread methods and members.
970 // Class for detecting hangs during shutdown.
971 class ShutdownWatchDogThread : public base::Watchdog {
972 public:
973 // Constructor specifies how long the ShutdownWatchDogThread will wait before
974 // alarming.
975 explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
976 : base::Watchdog(duration, "Shutdown watchdog thread", true) {
979 // Alarm is called if the time expires after an Arm() without someone calling
980 // Disarm(). We crash the browser if this method is called.
981 virtual void Alarm() override {
982 ShutdownHang();
985 private:
986 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
988 } // namespace
990 // StartupTimeBomb methods and members.
992 // static
993 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
995 StartupTimeBomb::StartupTimeBomb()
996 : startup_watchdog_(NULL),
997 thread_id_(base::PlatformThread::CurrentId()) {
998 CHECK(!g_startup_timebomb_);
999 g_startup_timebomb_ = this;
1002 StartupTimeBomb::~StartupTimeBomb() {
1003 DCHECK(this == g_startup_timebomb_);
1004 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1005 if (startup_watchdog_)
1006 Disarm();
1007 g_startup_timebomb_ = NULL;
1010 void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
1011 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1012 DCHECK(!startup_watchdog_);
1013 startup_watchdog_ = new StartupWatchDogThread(duration);
1014 startup_watchdog_->Arm();
1015 return;
1018 void StartupTimeBomb::Disarm() {
1019 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1020 if (startup_watchdog_) {
1021 startup_watchdog_->Disarm();
1022 startup_watchdog_->Cleanup();
1023 DeleteStartupWatchdog();
1027 void StartupTimeBomb::DeleteStartupWatchdog() {
1028 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1029 if (startup_watchdog_->IsJoinable()) {
1030 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
1031 // very fast.
1032 base::ThreadRestrictions::SetIOAllowed(true);
1033 delete startup_watchdog_;
1034 startup_watchdog_ = NULL;
1035 return;
1037 base::MessageLoop::current()->PostDelayedTask(
1038 FROM_HERE,
1039 base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
1040 base::Unretained(this)),
1041 base::TimeDelta::FromSeconds(10));
1044 // static
1045 void StartupTimeBomb::DisarmStartupTimeBomb() {
1046 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
1047 if (g_startup_timebomb_)
1048 g_startup_timebomb_->Disarm();
1051 // ShutdownWatcherHelper methods and members.
1053 // ShutdownWatcherHelper is a wrapper class for detecting hangs during
1054 // shutdown.
1055 ShutdownWatcherHelper::ShutdownWatcherHelper()
1056 : shutdown_watchdog_(NULL),
1057 thread_id_(base::PlatformThread::CurrentId()) {
1060 ShutdownWatcherHelper::~ShutdownWatcherHelper() {
1061 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1062 if (shutdown_watchdog_) {
1063 shutdown_watchdog_->Disarm();
1064 delete shutdown_watchdog_;
1065 shutdown_watchdog_ = NULL;
1069 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
1070 DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1071 DCHECK(!shutdown_watchdog_);
1072 base::TimeDelta actual_duration = duration;
1074 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
1075 if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
1076 actual_duration *= 20;
1077 } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
1078 channel == chrome::VersionInfo::CHANNEL_DEV) {
1079 actual_duration *= 10;
1082 #if defined(OS_WIN)
1083 // On Windows XP, give twice the time for shutdown.
1084 if (base::win::GetVersion() <= base::win::VERSION_XP)
1085 actual_duration *= 2;
1086 #endif
1088 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
1089 shutdown_watchdog_->Arm();