base/tracked_objects.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/tracked_objects.h"
   6
   7 #include <limits.h>
   8 #include <stdlib.h>
   9
  10 #include "base/atomicops.h"
  11 #include "base/base_switches.h"
  12 #include "base/command_line.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/debug/leak_annotations.h"
  15 #include "base/logging.h"
  16 #include "base/process/process_handle.h"
  17 #include "base/profiler/alternate_timer.h"
  18 #include "base/strings/stringprintf.h"
  19 #include "base/third_party/valgrind/memcheck.h"
  20 #include "base/tracking_info.h"
  21
  22 using base::TimeDelta;
  23
  24 namespace base {
  25 class TimeDelta;
  26 }
  27
  28 namespace tracked_objects {
  29
  30 namespace {
  31 // When ThreadData is first initialized, should we start in an ACTIVE state to
  32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
  33 // that we only record after parsing the command line flag --enable-tracking.
  34 // Note that the flag may force either state, so this really controls only the
  35 // period of time up until that flag is parsed.  If there is no flag seen, then
  36 // this state may prevail for much or all of the process lifetime.
  37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
  38
  39 // Control whether an alternate time source (Now() function) is supported by
  40 // the ThreadData class.  This compile time flag should be set to true if we
  41 // want other modules (such as a memory allocator, or a thread-specific CPU time
  42 // clock) to be able to provide a thread-specific Now() function.  Without this
  43 // compile-time flag, the code will only support the wall-clock time.  This flag
  44 // can be flipped to efficiently disable this path (if there is a performance
  45 // problem with its presence).
  46 static const bool kAllowAlternateTimeSourceHandling = true;
  47
  48 // Possible states of the profiler timing enabledness.
  49 enum {
  50   UNDEFINED_TIMING,
  51   ENABLED_TIMING,
  52   DISABLED_TIMING,
  53 };
  54
  55 // State of the profiler timing enabledness.
  56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
  57
  58 // Returns whether profiler timing is enabled.  The default is true, but this
  59 // may be overridden by a command-line flag.  Some platforms may
  60 // programmatically set this command-line flag to the "off" value if it's not
  61 // specified.
  62 // This in turn can be overridden by explicitly calling
  63 // ThreadData::EnableProfilerTiming, say, based on a field trial.
  64 inline bool IsProfilerTimingEnabled() {
  65   // Reading |g_profiler_timing_enabled| is done without barrier because
  66   // multiple initialization is not an issue while the barrier can be relatively
  67   // costly given that this method is sometimes called in a tight loop.
  68   base::subtle::Atomic32 current_timing_enabled =
  69       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
  70   if (current_timing_enabled == UNDEFINED_TIMING) {
  71     if (!base::CommandLine::InitializedForCurrentProcess())
  72       return true;
  73     current_timing_enabled =
  74         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
  75              switches::kProfilerTiming) ==
  76          switches::kProfilerTimingDisabledValue)
  77             ? DISABLED_TIMING
  78             : ENABLED_TIMING;
  79     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
  80                                   current_timing_enabled);
  81   }
  82   return current_timing_enabled == ENABLED_TIMING;
  83 }
  84
  85 }  // namespace
  86
  87 //------------------------------------------------------------------------------
  88 // DeathData tallies durations when a death takes place.
  89
  90 DeathData::DeathData()
  91     : count_(0),
  92       sample_probability_count_(0),
  93       run_duration_sum_(0),
  94       queue_duration_sum_(0),
  95       run_duration_max_(0),
  96       queue_duration_max_(0),
  97       run_duration_sample_(0),
  98       queue_duration_sample_(0),
  99       last_phase_snapshot_(nullptr) {
 100 }
 101
 102 DeathData::DeathData(const DeathData& other)
 103     : count_(other.count_),
 104       sample_probability_count_(other.sample_probability_count_),
 105       run_duration_sum_(other.run_duration_sum_),
 106       queue_duration_sum_(other.queue_duration_sum_),
 107       run_duration_max_(other.run_duration_max_),
 108       queue_duration_max_(other.queue_duration_max_),
 109       run_duration_sample_(other.run_duration_sample_),
 110       queue_duration_sample_(other.queue_duration_sample_),
 111       last_phase_snapshot_(nullptr) {
 112   // This constructor will be used by std::map when adding new DeathData values
 113   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
 114   // need to worry about ownership transfer.
 115   DCHECK(other.last_phase_snapshot_ == nullptr);
 116 }
 117
 118 DeathData::~DeathData() {
 119   while (last_phase_snapshot_) {
 120     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
 121     last_phase_snapshot_ = snapshot->prev;
 122     delete snapshot;
 123   }
 124 }
 125
 126 // TODO(jar): I need to see if this macro to optimize branching is worth using.
 127 //
 128 // This macro has no branching, so it is surely fast, and is equivalent to:
 129 //             if (assign_it)
 130 //               target = source;
 131 // We use a macro rather than a template to force this to inline.
 132 // Related code for calculating max is discussed on the web.
 133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
 134     ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it))
 135
 136 void DeathData::RecordDeath(const int32 queue_duration,
 137                             const int32 run_duration,
 138                             const uint32 random_number) {
 139   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
 140   if (count_ < INT_MAX)
 141     ++count_;
 142
 143   int sample_probability_count = sample_probability_count_;
 144   if (sample_probability_count < INT_MAX)
 145     ++sample_probability_count;
 146   sample_probability_count_ = sample_probability_count;
 147
 148   queue_duration_sum_ += queue_duration;
 149   run_duration_sum_ += run_duration;
 150
 151   if (queue_duration_max_ < queue_duration)
 152     queue_duration_max_ = queue_duration;
 153   if (run_duration_max_ < run_duration)
 154     run_duration_max_ = run_duration;
 155
 156   // Take a uniformly distributed sample over all durations ever supplied during
 157   // the current profiling phase.
 158   // The probability that we (instead) use this new sample is
 159   // 1/sample_probability_count_. This results in a completely uniform selection
 160   // of the sample (at least when we don't clamp sample_probability_count_...
 161   // but that should be inconsequentially likely).  We ignore the fact that we
 162   // correlated our selection of a sample to the run and queue times (i.e., we
 163   // used them to generate random_number).
 164   CHECK_GT(sample_probability_count, 0);
 165   if (0 == (random_number % sample_probability_count)) {
 166     queue_duration_sample_ = queue_duration;
 167     run_duration_sample_ = run_duration;
 168   }
 169 }
 170
 171 int DeathData::count() const { return count_; }
 172
 173 int32 DeathData::run_duration_sum() const { return run_duration_sum_; }
 174
 175 int32 DeathData::run_duration_max() const { return run_duration_max_; }
 176
 177 int32 DeathData::run_duration_sample() const {
 178   return run_duration_sample_;
 179 }
 180
 181 int32 DeathData::queue_duration_sum() const {
 182   return queue_duration_sum_;
 183 }
 184
 185 int32 DeathData::queue_duration_max() const {
 186   return queue_duration_max_;
 187 }
 188
 189 int32 DeathData::queue_duration_sample() const {
 190   return queue_duration_sample_;
 191 }
 192
 193 const DeathDataPhaseSnapshot* DeathData::last_phase_snapshot() const {
 194   return last_phase_snapshot_;
 195 }
 196
 197 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
 198   // Snapshotting and storing current state.
 199   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
 200       profiling_phase, count_, run_duration_sum_, run_duration_max_,
 201       run_duration_sample_, queue_duration_sum_, queue_duration_max_,
 202       queue_duration_sample_, last_phase_snapshot_);
 203
 204   // Not touching fields for which a delta can be computed by comparing with a
 205   // snapshot from the previous phase. Resetting other fields.  Sample values
 206   // will be reset upon next death recording because sample_probability_count_
 207   // is set to 0.
 208   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
 209   // is that for incrementable fields, resetting to 0 from the snapshot thread
 210   // potentially in parallel with incrementing in the death thread may result in
 211   // significant data corruption that has a potential to grow with time.  Not
 212   // resetting incrementable fields and using deltas will cause any
 213   // off-by-little corruptions to be likely fixed at the next snapshot.
 214   // The max values are not incrementable, and cannot be deduced using deltas
 215   // for a given phase. Hence, we have to reset them to 0.  But the potential
 216   // damage is limited to getting the previous phase's max to apply for the next
 217   // phase, and the error doesn't have a potential to keep growing with new
 218   // resets.
 219   // sample_probability_count_ is incrementable, but must be reset to 0 at the
 220   // phase end, so that we start a new uniformly randomized sample selection
 221   // after the reset.  Corruptions due to race conditions are possible, but the
 222   // damage is limited to selecting a wrong sample, which is not something that
 223   // can cause accumulating or cascading effects.
 224   // If there were no corruptions caused by race conditions, we never send a
 225   // sample for the previous phase in the next phase's snapshot because
 226   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
 227   sample_probability_count_ = 0;
 228   run_duration_max_ = 0;
 229   queue_duration_max_ = 0;
 230 }
 231
 232 //------------------------------------------------------------------------------
 233 DeathDataSnapshot::DeathDataSnapshot()
 234     : count(-1),
 235       run_duration_sum(-1),
 236       run_duration_max(-1),
 237       run_duration_sample(-1),
 238       queue_duration_sum(-1),
 239       queue_duration_max(-1),
 240       queue_duration_sample(-1) {
 241 }
 242
 243 DeathDataSnapshot::DeathDataSnapshot(int count,
 244                                      int32 run_duration_sum,
 245                                      int32 run_duration_max,
 246                                      int32 run_duration_sample,
 247                                      int32 queue_duration_sum,
 248                                      int32 queue_duration_max,
 249                                      int32 queue_duration_sample)
 250     : count(count),
 251       run_duration_sum(run_duration_sum),
 252       run_duration_max(run_duration_max),
 253       run_duration_sample(run_duration_sample),
 254       queue_duration_sum(queue_duration_sum),
 255       queue_duration_max(queue_duration_max),
 256       queue_duration_sample(queue_duration_sample) {
 257 }
 258
 259 DeathDataSnapshot::~DeathDataSnapshot() {
 260 }
 261
 262 DeathDataSnapshot DeathDataSnapshot::Delta(
 263     const DeathDataSnapshot& older) const {
 264   return DeathDataSnapshot(count - older.count,
 265                            run_duration_sum - older.run_duration_sum,
 266                            run_duration_max, run_duration_sample,
 267                            queue_duration_sum - older.queue_duration_sum,
 268                            queue_duration_max, queue_duration_sample);
 269 }
 270
 271 //------------------------------------------------------------------------------
 272 BirthOnThread::BirthOnThread(const Location& location,
 273                              const ThreadData& current)
 274     : location_(location),
 275       birth_thread_(&current) {
 276 }
 277
 278 //------------------------------------------------------------------------------
 279 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
 280 }
 281
 282 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
 283     : location(birth.location()),
 284       thread_name(birth.birth_thread()->thread_name()) {
 285 }
 286
 287 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
 288 }
 289
 290 //------------------------------------------------------------------------------
 291 Births::Births(const Location& location, const ThreadData& current)
 292     : BirthOnThread(location, current),
 293       birth_count_(1) { }
 294
 295 int Births::birth_count() const { return birth_count_; }
 296
 297 void Births::RecordBirth() { ++birth_count_; }
 298
 299 //------------------------------------------------------------------------------
 300 // ThreadData maintains the central data for all births and deaths on a single
 301 // thread.
 302
 303 // TODO(jar): We should pull all these static vars together, into a struct, and
 304 // optimize layout so that we benefit from locality of reference during accesses
 305 // to them.
 306
 307 // static
 308 NowFunction* ThreadData::now_function_ = NULL;
 309
 310 // static
 311 bool ThreadData::now_function_is_time_ = false;
 312
 313 // A TLS slot which points to the ThreadData instance for the current thread.
 314 // We do a fake initialization here (zeroing out data), and then the real
 315 // in-place construction happens when we call tls_index_.Initialize().
 316 // static
 317 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
 318
 319 // static
 320 int ThreadData::worker_thread_data_creation_count_ = 0;
 321
 322 // static
 323 int ThreadData::cleanup_count_ = 0;
 324
 325 // static
 326 int ThreadData::incarnation_counter_ = 0;
 327
 328 // static
 329 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
 330
 331 // static
 332 ThreadData* ThreadData::first_retired_worker_ = NULL;
 333
 334 // static
 335 base::LazyInstance<base::Lock>::Leaky
 336     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
 337
 338 // static
 339 ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED;
 340
 341 ThreadData::ThreadData(const std::string& suggested_name)
 342     : next_(NULL),
 343       next_retired_worker_(NULL),
 344       worker_thread_number_(0),
 345       incarnation_count_for_pool_(-1),
 346       current_stopwatch_(NULL) {
 347   DCHECK_GE(suggested_name.size(), 0u);
 348   thread_name_ = suggested_name;
 349   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 350 }
 351
 352 ThreadData::ThreadData(int thread_number)
 353     : next_(NULL),
 354       next_retired_worker_(NULL),
 355       worker_thread_number_(thread_number),
 356       incarnation_count_for_pool_(-1),
 357       current_stopwatch_(NULL) {
 358   CHECK_GT(thread_number, 0);
 359   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
 360   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 361 }
 362
 363 ThreadData::~ThreadData() {
 364 }
 365
 366 void ThreadData::PushToHeadOfList() {
 367   // Toss in a hint of randomness (atop the uniniitalized value).
 368   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
 369                                                  sizeof(random_number_));
 370   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
 371   random_number_ += static_cast<uint32>(this - static_cast<ThreadData*>(0));
 372   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
 373
 374   DCHECK(!next_);
 375   base::AutoLock lock(*list_lock_.Pointer());
 376   incarnation_count_for_pool_ = incarnation_counter_;
 377   next_ = all_thread_data_list_head_;
 378   all_thread_data_list_head_ = this;
 379 }
 380
 381 // static
 382 ThreadData* ThreadData::first() {
 383   base::AutoLock lock(*list_lock_.Pointer());
 384   return all_thread_data_list_head_;
 385 }
 386
 387 ThreadData* ThreadData::next() const { return next_; }
 388
 389 // static
 390 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
 391   if (!Initialize())  // Always initialize if needed.
 392     return;
 393   ThreadData* current_thread_data =
 394       reinterpret_cast<ThreadData*>(tls_index_.Get());
 395   if (current_thread_data)
 396     return;  // Browser tests instigate this.
 397   current_thread_data = new ThreadData(suggested_name);
 398   tls_index_.Set(current_thread_data);
 399 }
 400
 401 // static
 402 ThreadData* ThreadData::Get() {
 403   if (!tls_index_.initialized())
 404     return NULL;  // For unittests only.
 405   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
 406   if (registered)
 407     return registered;
 408
 409   // We must be a worker thread, since we didn't pre-register.
 410   ThreadData* worker_thread_data = NULL;
 411   int worker_thread_number = 0;
 412   {
 413     base::AutoLock lock(*list_lock_.Pointer());
 414     if (first_retired_worker_) {
 415       worker_thread_data = first_retired_worker_;
 416       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
 417       worker_thread_data->next_retired_worker_ = NULL;
 418     } else {
 419       worker_thread_number = ++worker_thread_data_creation_count_;
 420     }
 421   }
 422
 423   // If we can't find a previously used instance, then we have to create one.
 424   if (!worker_thread_data) {
 425     DCHECK_GT(worker_thread_number, 0);
 426     worker_thread_data = new ThreadData(worker_thread_number);
 427   }
 428   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
 429
 430   tls_index_.Set(worker_thread_data);
 431   return worker_thread_data;
 432 }
 433
 434 // static
 435 void ThreadData::OnThreadTermination(void* thread_data) {
 436   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
 437   // We must NOT do any allocations during this callback.  There is a chance
 438   // that the allocator is no longer active on this thread.
 439   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
 440 }
 441
 442 void ThreadData::OnThreadTerminationCleanup() {
 443   // The list_lock_ was created when we registered the callback, so it won't be
 444   // allocated here despite the lazy reference.
 445   base::AutoLock lock(*list_lock_.Pointer());
 446   if (incarnation_counter_ != incarnation_count_for_pool_)
 447     return;  // ThreadData was constructed in an earlier unit test.
 448   ++cleanup_count_;
 449   // Only worker threads need to be retired and reused.
 450   if (!worker_thread_number_) {
 451     return;
 452   }
 453   // We must NOT do any allocations during this callback.
 454   // Using the simple linked lists avoids all allocations.
 455   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
 456   this->next_retired_worker_ = first_retired_worker_;
 457   first_retired_worker_ = this;
 458 }
 459
 460 // static
 461 void ThreadData::Snapshot(int current_profiling_phase,
 462                           ProcessDataSnapshot* process_data_snapshot) {
 463   // Get an unchanging copy of a ThreadData list.
 464   ThreadData* my_list = ThreadData::first();
 465
 466   // Gather data serially.
 467   // This hackish approach *can* get some slightly corrupt tallies, as we are
 468   // grabbing values without the protection of a lock, but it has the advantage
 469   // of working even with threads that don't have message loops.  If a user
 470   // sees any strangeness, they can always just run their stats gathering a
 471   // second time.
 472   BirthCountMap birth_counts;
 473   for (ThreadData* thread_data = my_list; thread_data;
 474        thread_data = thread_data->next()) {
 475     thread_data->SnapshotExecutedTasks(current_profiling_phase,
 476                                        &process_data_snapshot->phased_snapshots,
 477                                        &birth_counts);
 478   }
 479
 480   // Add births that are still active -- i.e. objects that have tallied a birth,
 481   // but have not yet tallied a matching death, and hence must be either
 482   // running, queued up, or being held in limbo for future posting.
 483   auto* current_phase_tasks =
 484       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
 485   for (const auto& birth_count : birth_counts) {
 486     if (birth_count.second > 0) {
 487       current_phase_tasks->push_back(
 488           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
 489                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
 490                        "Still_Alive"));
 491     }
 492   }
 493 }
 494
 495 // static
 496 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
 497   // Get an unchanging copy of a ThreadData list.
 498   ThreadData* my_list = ThreadData::first();
 499
 500   // Add snapshots for all instances of death data in all threads serially.
 501   // This hackish approach *can* get some slightly corrupt tallies, as we are
 502   // grabbing values without the protection of a lock, but it has the advantage
 503   // of working even with threads that don't have message loops.  Any corruption
 504   // shouldn't cause "cascading damage" to anything else (in later phases).
 505   for (ThreadData* thread_data = my_list; thread_data;
 506        thread_data = thread_data->next()) {
 507     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
 508   }
 509 }
 510
 511 Births* ThreadData::TallyABirth(const Location& location) {
 512   BirthMap::iterator it = birth_map_.find(location);
 513   Births* child;
 514   if (it != birth_map_.end()) {
 515     child =  it->second;
 516     child->RecordBirth();
 517   } else {
 518     child = new Births(location, *this);  // Leak this.
 519     // Lock since the map may get relocated now, and other threads sometimes
 520     // snapshot it (but they lock before copying it).
 521     base::AutoLock lock(map_lock_);
 522     birth_map_[location] = child;
 523   }
 524
 525   return child;
 526 }
 527
 528 void ThreadData::TallyADeath(const Births& births,
 529                              int32 queue_duration,
 530                              const TaskStopwatch& stopwatch) {
 531   int32 run_duration = stopwatch.RunDurationMs();
 532
 533   // Stir in some randomness, plus add constant in case durations are zero.
 534   const uint32 kSomePrimeNumber = 2147483647;
 535   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
 536   // An address is going to have some randomness to it as well ;-).
 537   random_number_ ^= static_cast<uint32>(&births - reinterpret_cast<Births*>(0));
 538
 539   // We don't have queue durations without OS timer.  OS timer is automatically
 540   // used for task-post-timing, so the use of an alternate timer implies all
 541   // queue times are invalid, unless it was explicitly said that we can trust
 542   // the alternate timer.
 543   if (kAllowAlternateTimeSourceHandling &&
 544       now_function_ &&
 545       !now_function_is_time_) {
 546     queue_duration = 0;
 547   }
 548
 549   DeathMap::iterator it = death_map_.find(&births);
 550   DeathData* death_data;
 551   if (it != death_map_.end()) {
 552     death_data = &it->second;
 553   } else {
 554     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
 555     death_data = &death_map_[&births];
 556   }  // Release lock ASAP.
 557   death_data->RecordDeath(queue_duration, run_duration, random_number_);
 558 }
 559
 560 // static
 561 Births* ThreadData::TallyABirthIfActive(const Location& location) {
 562   if (!TrackingStatus())
 563     return NULL;
 564   ThreadData* current_thread_data = Get();
 565   if (!current_thread_data)
 566     return NULL;
 567   return current_thread_data->TallyABirth(location);
 568 }
 569
 570 // static
 571 void ThreadData::TallyRunOnNamedThreadIfTracking(
 572     const base::TrackingInfo& completed_task,
 573     const TaskStopwatch& stopwatch) {
 574   // Even if we have been DEACTIVATED, we will process any pending births so
 575   // that our data structures (which counted the outstanding births) remain
 576   // consistent.
 577   const Births* births = completed_task.birth_tally;
 578   if (!births)
 579     return;
 580   ThreadData* current_thread_data = stopwatch.GetThreadData();
 581   if (!current_thread_data)
 582     return;
 583
 584   // Watch out for a race where status_ is changing, and hence one or both
 585   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
 586   // get a time value since we "weren't tracking" and we were trying to be
 587   // efficient by not calling for a genuine time value.  For simplicity, we'll
 588   // use a default zero duration when we can't calculate a true value.
 589   TrackedTime start_of_run = stopwatch.StartTime();
 590   int32 queue_duration = 0;
 591   if (!start_of_run.is_null()) {
 592     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
 593         .InMilliseconds();
 594   }
 595   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 596 }
 597
 598 // static
 599 void ThreadData::TallyRunOnWorkerThreadIfTracking(
 600     const Births* births,
 601     const TrackedTime& time_posted,
 602     const TaskStopwatch& stopwatch) {
 603   // Even if we have been DEACTIVATED, we will process any pending births so
 604   // that our data structures (which counted the outstanding births) remain
 605   // consistent.
 606   if (!births)
 607     return;
 608
 609   // TODO(jar): Support the option to coalesce all worker-thread activity under
 610   // one ThreadData instance that uses locks to protect *all* access.  This will
 611   // reduce memory (making it provably bounded), but run incrementally slower
 612   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
 613   // that the locks on TallyADeath will be *after* the worker thread has run,
 614   // and hence nothing will be waiting for the completion (...  besides some
 615   // other thread that might like to run).  Also, the worker threads tasks are
 616   // generally longer, and hence the cost of the lock may perchance be amortized
 617   // over the long task's lifetime.
 618   ThreadData* current_thread_data = stopwatch.GetThreadData();
 619   if (!current_thread_data)
 620     return;
 621
 622   TrackedTime start_of_run = stopwatch.StartTime();
 623   int32 queue_duration = 0;
 624   if (!start_of_run.is_null()) {
 625     queue_duration = (start_of_run - time_posted).InMilliseconds();
 626   }
 627   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 628 }
 629
 630 // static
 631 void ThreadData::TallyRunInAScopedRegionIfTracking(
 632     const Births* births,
 633     const TaskStopwatch& stopwatch) {
 634   // Even if we have been DEACTIVATED, we will process any pending births so
 635   // that our data structures (which counted the outstanding births) remain
 636   // consistent.
 637   if (!births)
 638     return;
 639
 640   ThreadData* current_thread_data = stopwatch.GetThreadData();
 641   if (!current_thread_data)
 642     return;
 643
 644   int32 queue_duration = 0;
 645   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 646 }
 647
 648 void ThreadData::SnapshotExecutedTasks(
 649     int current_profiling_phase,
 650     PhasedProcessDataSnapshotMap* phased_snapshots,
 651     BirthCountMap* birth_counts) {
 652   // Get copy of data, so that the data will not change during the iterations
 653   // and processing.
 654   BirthMap birth_map;
 655   DeathsSnapshot deaths;
 656   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
 657
 658   for (const auto& birth : birth_map) {
 659     (*birth_counts)[birth.second] += birth.second->birth_count();
 660   }
 661
 662   for (const auto& death : deaths) {
 663     (*birth_counts)[death.first] -= death.first->birth_count();
 664
 665     // For the current death data, walk through all its snapshots, starting from
 666     // the current one, then from the previous profiling phase etc., and for
 667     // each snapshot calculate the delta between the snapshot and the previous
 668     // phase, if any.  Store the deltas in the result.
 669     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
 670          phase = phase->prev) {
 671       const DeathDataSnapshot& death_data =
 672           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
 673                       : phase->death_data;
 674
 675       if (death_data.count > 0) {
 676         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
 677             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
 678                          thread_name()));
 679       }
 680     }
 681   }
 682 }
 683
 684 // This may be called from another thread.
 685 void ThreadData::SnapshotMaps(int profiling_phase,
 686                               BirthMap* birth_map,
 687                               DeathsSnapshot* deaths) {
 688   base::AutoLock lock(map_lock_);
 689
 690   for (const auto& birth : birth_map_)
 691     (*birth_map)[birth.first] = birth.second;
 692
 693   for (const auto& death : death_map_) {
 694     deaths->push_back(std::make_pair(
 695         death.first,
 696         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
 697                                death.second.run_duration_sum(),
 698                                death.second.run_duration_max(),
 699                                death.second.run_duration_sample(),
 700                                death.second.queue_duration_sum(),
 701                                death.second.queue_duration_max(),
 702                                death.second.queue_duration_sample(),
 703                                death.second.last_phase_snapshot())));
 704   }
 705 }
 706
 707 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
 708   base::AutoLock lock(map_lock_);
 709
 710   for (auto& death : death_map_) {
 711     death.second.OnProfilingPhaseCompleted(profiling_phase);
 712   }
 713 }
 714
 715 static void OptionallyInitializeAlternateTimer() {
 716   NowFunction* alternate_time_source = GetAlternateTimeSource();
 717   if (alternate_time_source)
 718     ThreadData::SetAlternateTimeSource(alternate_time_source);
 719 }
 720
 721 bool ThreadData::Initialize() {
 722   if (status_ >= DEACTIVATED)
 723     return true;  // Someone else did the initialization.
 724   // Due to racy lazy initialization in tests, we'll need to recheck status_
 725   // after we acquire the lock.
 726
 727   // Ensure that we don't double initialize tls.  We are called when single
 728   // threaded in the product, but some tests may be racy and lazy about our
 729   // initialization.
 730   base::AutoLock lock(*list_lock_.Pointer());
 731   if (status_ >= DEACTIVATED)
 732     return true;  // Someone raced in here and beat us.
 733
 734   // Put an alternate timer in place if the environment calls for it, such as
 735   // for tracking TCMalloc allocations.  This insertion is idempotent, so we
 736   // don't mind if there is a race, and we'd prefer not to be in a lock while
 737   // doing this work.
 738   if (kAllowAlternateTimeSourceHandling)
 739     OptionallyInitializeAlternateTimer();
 740
 741   // Perform the "real" TLS initialization now, and leave it intact through
 742   // process termination.
 743   if (!tls_index_.initialized()) {  // Testing may have initialized this.
 744     DCHECK_EQ(status_, UNINITIALIZED);
 745     tls_index_.Initialize(&ThreadData::OnThreadTermination);
 746     if (!tls_index_.initialized())
 747       return false;
 748   } else {
 749     // TLS was initialzed for us earlier.
 750     DCHECK_EQ(status_, DORMANT_DURING_TESTS);
 751   }
 752
 753   // Incarnation counter is only significant to testing, as it otherwise will
 754   // never again change in this process.
 755   ++incarnation_counter_;
 756
 757   // The lock is not critical for setting status_, but it doesn't hurt.  It also
 758   // ensures that if we have a racy initialization, that we'll bail as soon as
 759   // we get the lock earlier in this method.
 760   status_ = kInitialStartupState;
 761   DCHECK(status_ != UNINITIALIZED);
 762   return true;
 763 }
 764
 765 // static
 766 bool ThreadData::InitializeAndSetTrackingStatus(Status status) {
 767   DCHECK_GE(status, DEACTIVATED);
 768   DCHECK_LE(status, PROFILING_ACTIVE);
 769
 770   if (!Initialize())  // No-op if already initialized.
 771     return false;  // Not compiled in.
 772
 773   if (status > DEACTIVATED)
 774     status = PROFILING_ACTIVE;
 775   status_ = status;
 776   return true;
 777 }
 778
 779 // static
 780 ThreadData::Status ThreadData::status() {
 781   return status_;
 782 }
 783
 784 // static
 785 bool ThreadData::TrackingStatus() {
 786   return status_ > DEACTIVATED;
 787 }
 788
 789 // static
 790 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
 791   DCHECK(now_function);
 792   if (kAllowAlternateTimeSourceHandling)
 793     now_function_ = now_function;
 794 }
 795
 796 // static
 797 void ThreadData::EnableProfilerTiming() {
 798   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
 799 }
 800
 801 // static
 802 TrackedTime ThreadData::Now() {
 803   if (kAllowAlternateTimeSourceHandling && now_function_)
 804     return TrackedTime::FromMilliseconds((*now_function_)());
 805   if (IsProfilerTimingEnabled() && TrackingStatus())
 806     return TrackedTime::Now();
 807   return TrackedTime();  // Super fast when disabled, or not compiled.
 808 }
 809
 810 // static
 811 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
 812   base::AutoLock lock(*list_lock_.Pointer());
 813   if (worker_thread_data_creation_count_ == 0)
 814     return;  // We haven't really run much, and couldn't have leaked.
 815
 816   // TODO(jar): until this is working on XP, don't run the real test.
 817 #if 0
 818   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
 819   // caller should tell us how many thread shutdowns should have taken place by
 820   // now.
 821   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
 822 #endif
 823 }
 824
 825 // static
 826 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
 827   // This is only called from test code, where we need to cleanup so that
 828   // additional tests can be run.
 829   // We must be single threaded... but be careful anyway.
 830   if (!InitializeAndSetTrackingStatus(DEACTIVATED))
 831     return;
 832   ThreadData* thread_data_list;
 833   {
 834     base::AutoLock lock(*list_lock_.Pointer());
 835     thread_data_list = all_thread_data_list_head_;
 836     all_thread_data_list_head_ = NULL;
 837     ++incarnation_counter_;
 838     // To be clean, break apart the retired worker list (though we leak them).
 839     while (first_retired_worker_) {
 840       ThreadData* worker = first_retired_worker_;
 841       CHECK_GT(worker->worker_thread_number_, 0);
 842       first_retired_worker_ = worker->next_retired_worker_;
 843       worker->next_retired_worker_ = NULL;
 844     }
 845   }
 846
 847   // Put most global static back in pristine shape.
 848   worker_thread_data_creation_count_ = 0;
 849   cleanup_count_ = 0;
 850   tls_index_.Set(NULL);
 851   status_ = DORMANT_DURING_TESTS;  // Almost UNINITIALIZED.
 852
 853   // To avoid any chance of racing in unit tests, which is the only place we
 854   // call this function, we may sometimes leak all the data structures we
 855   // recovered, as they may still be in use on threads from prior tests!
 856   if (leak) {
 857     ThreadData* thread_data = thread_data_list;
 858     while (thread_data) {
 859       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
 860       thread_data = thread_data->next();
 861     }
 862     return;
 863   }
 864
 865   // When we want to cleanup (on a single thread), here is what we do.
 866
 867   // Do actual recursive delete in all ThreadData instances.
 868   while (thread_data_list) {
 869     ThreadData* next_thread_data = thread_data_list;
 870     thread_data_list = thread_data_list->next();
 871
 872     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
 873          next_thread_data->birth_map_.end() != it; ++it)
 874       delete it->second;  // Delete the Birth Records.
 875     delete next_thread_data;  // Includes all Death Records.
 876   }
 877 }
 878
 879 //------------------------------------------------------------------------------
 880 TaskStopwatch::TaskStopwatch()
 881     : wallclock_duration_ms_(0),
 882       current_thread_data_(NULL),
 883       excluded_duration_ms_(0),
 884       parent_(NULL) {
 885 #if DCHECK_IS_ON()
 886   state_ = CREATED;
 887   child_ = NULL;
 888 #endif
 889 }
 890
 891 TaskStopwatch::~TaskStopwatch() {
 892 #if DCHECK_IS_ON()
 893   DCHECK(state_ != RUNNING);
 894   DCHECK(child_ == NULL);
 895 #endif
 896 }
 897
 898 void TaskStopwatch::Start() {
 899 #if DCHECK_IS_ON()
 900   DCHECK(state_ == CREATED);
 901   state_ = RUNNING;
 902 #endif
 903
 904   start_time_ = ThreadData::Now();
 905
 906   current_thread_data_ = ThreadData::Get();
 907   if (!current_thread_data_)
 908     return;
 909
 910   parent_ = current_thread_data_->current_stopwatch_;
 911 #if DCHECK_IS_ON()
 912   if (parent_) {
 913     DCHECK(parent_->state_ == RUNNING);
 914     DCHECK(parent_->child_ == NULL);
 915     parent_->child_ = this;
 916   }
 917 #endif
 918   current_thread_data_->current_stopwatch_ = this;
 919 }
 920
 921 void TaskStopwatch::Stop() {
 922   const TrackedTime end_time = ThreadData::Now();
 923 #if DCHECK_IS_ON()
 924   DCHECK(state_ == RUNNING);
 925   state_ = STOPPED;
 926   DCHECK(child_ == NULL);
 927 #endif
 928
 929   if (!start_time_.is_null() && !end_time.is_null()) {
 930     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
 931   }
 932
 933   if (!current_thread_data_)
 934     return;
 935
 936   DCHECK(current_thread_data_->current_stopwatch_ == this);
 937   current_thread_data_->current_stopwatch_ = parent_;
 938   if (!parent_)
 939     return;
 940
 941 #if DCHECK_IS_ON()
 942   DCHECK(parent_->state_ == RUNNING);
 943   DCHECK(parent_->child_ == this);
 944   parent_->child_ = NULL;
 945 #endif
 946   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
 947   parent_ = NULL;
 948 }
 949
 950 TrackedTime TaskStopwatch::StartTime() const {
 951 #if DCHECK_IS_ON()
 952   DCHECK(state_ != CREATED);
 953 #endif
 954
 955   return start_time_;
 956 }
 957
 958 int32 TaskStopwatch::RunDurationMs() const {
 959 #if DCHECK_IS_ON()
 960   DCHECK(state_ == STOPPED);
 961 #endif
 962
 963   return wallclock_duration_ms_ - excluded_duration_ms_;
 964 }
 965
 966 ThreadData* TaskStopwatch::GetThreadData() const {
 967 #if DCHECK_IS_ON()
 968   DCHECK(state_ != CREATED);
 969 #endif
 970
 971   return current_thread_data_;
 972 }
 973
 974 //------------------------------------------------------------------------------
 975 // DeathDataPhaseSnapshot
 976
 977 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
 978     int profiling_phase,
 979     int count,
 980     int32 run_duration_sum,
 981     int32 run_duration_max,
 982     int32 run_duration_sample,
 983     int32 queue_duration_sum,
 984     int32 queue_duration_max,
 985     int32 queue_duration_sample,
 986     const DeathDataPhaseSnapshot* prev)
 987     : profiling_phase(profiling_phase),
 988       death_data(count,
 989                  run_duration_sum,
 990                  run_duration_max,
 991                  run_duration_sample,
 992                  queue_duration_sum,
 993                  queue_duration_max,
 994                  queue_duration_sample),
 995       prev(prev) {
 996 }
 997
 998 //------------------------------------------------------------------------------
 999 // TaskSnapshot
1000
1001 TaskSnapshot::TaskSnapshot() {
1002 }
1003
1004 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
1005                            const DeathDataSnapshot& death_data,
1006                            const std::string& death_thread_name)
1007     : birth(birth),
1008       death_data(death_data),
1009       death_thread_name(death_thread_name) {
1010 }
1011
1012 TaskSnapshot::~TaskSnapshot() {
1013 }
1014
1015 //------------------------------------------------------------------------------
1016 // ProcessDataPhaseSnapshot
1017
1018 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
1019 }
1020
1021 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
1022 }
1023
1024 //------------------------------------------------------------------------------
1025 // ProcessDataPhaseSnapshot
1026
1027 ProcessDataSnapshot::ProcessDataSnapshot()
1028 #if !defined(OS_NACL)
1029     : process_id(base::GetCurrentProcId()) {
1030 #else
1031     : process_id(base::kNullProcessId) {
1032 #endif
1033 }
1034
1035 ProcessDataSnapshot::~ProcessDataSnapshot() {
1036 }
1037
1038 }  // namespace tracked_objects