base/tracked_objects.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/tracked_objects.h"
   6
   7 #include <limits.h>
   8 #include <stdlib.h>
   9
  10 #include "base/atomicops.h"
  11 #include "base/base_switches.h"
  12 #include "base/command_line.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/debug/leak_annotations.h"
  15 #include "base/logging.h"
  16 #include "base/process/process_handle.h"
  17 #include "base/profiler/alternate_timer.h"
  18 #include "base/strings/stringprintf.h"
  19 #include "base/third_party/valgrind/memcheck.h"
  20 #include "base/tracking_info.h"
  21
  22 using base::TimeDelta;
  23
  24 namespace base {
  25 class TimeDelta;
  26 }
  27
  28 namespace tracked_objects {
  29
  30 namespace {
  31 // When ThreadData is first initialized, should we start in an ACTIVE state to
  32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
  33 // that we only record after parsing the command line flag --enable-tracking.
  34 // Note that the flag may force either state, so this really controls only the
  35 // period of time up until that flag is parsed.  If there is no flag seen, then
  36 // this state may prevail for much or all of the process lifetime.
  37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
  38
  39 // Control whether an alternate time source (Now() function) is supported by
  40 // the ThreadData class.  This compile time flag should be set to true if we
  41 // want other modules (such as a memory allocator, or a thread-specific CPU time
  42 // clock) to be able to provide a thread-specific Now() function.  Without this
  43 // compile-time flag, the code will only support the wall-clock time.  This flag
  44 // can be flipped to efficiently disable this path (if there is a performance
  45 // problem with its presence).
  46 static const bool kAllowAlternateTimeSourceHandling = true;
  47
  48 // Possible states of the profiler timing enabledness.
  49 enum {
  50   UNDEFINED_TIMING,
  51   ENABLED_TIMING,
  52   DISABLED_TIMING,
  53 };
  54
  55 // State of the profiler timing enabledness.
  56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
  57
  58 // Returns whether profiler timing is enabled.  The default is true, but this
  59 // may be overridden by a command-line flag.  Some platforms may
  60 // programmatically set this command-line flag to the "off" value if it's not
  61 // specified.
  62 // This in turn can be overridden by explicitly calling
  63 // ThreadData::EnableProfilerTiming, say, based on a field trial.
  64 inline bool IsProfilerTimingEnabled() {
  65   // Reading |g_profiler_timing_enabled| is done without barrier because
  66   // multiple initialization is not an issue while the barrier can be relatively
  67   // costly given that this method is sometimes called in a tight loop.
  68   base::subtle::Atomic32 current_timing_enabled =
  69       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
  70   if (current_timing_enabled == UNDEFINED_TIMING) {
  71     if (!base::CommandLine::InitializedForCurrentProcess())
  72       return true;
  73     current_timing_enabled =
  74         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
  75              switches::kProfilerTiming) ==
  76          switches::kProfilerTimingDisabledValue)
  77             ? DISABLED_TIMING
  78             : ENABLED_TIMING;
  79     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
  80                                   current_timing_enabled);
  81   }
  82   return current_timing_enabled == ENABLED_TIMING;
  83 }
  84
  85 }  // namespace
  86
  87 //------------------------------------------------------------------------------
  88 // DeathData tallies durations when a death takes place.
  89
  90 DeathData::DeathData()
  91     : count_(0),
  92       sample_probability_count_(0),
  93       run_duration_sum_(0),
  94       queue_duration_sum_(0),
  95       run_duration_max_(0),
  96       queue_duration_max_(0),
  97       run_duration_sample_(0),
  98       queue_duration_sample_(0),
  99       last_phase_snapshot_(nullptr) {
 100 }
 101
 102 DeathData::DeathData(const DeathData& other)
 103     : count_(other.count_),
 104       sample_probability_count_(other.sample_probability_count_),
 105       run_duration_sum_(other.run_duration_sum_),
 106       queue_duration_sum_(other.queue_duration_sum_),
 107       run_duration_max_(other.run_duration_max_),
 108       queue_duration_max_(other.queue_duration_max_),
 109       run_duration_sample_(other.run_duration_sample_),
 110       queue_duration_sample_(other.queue_duration_sample_),
 111       last_phase_snapshot_(nullptr) {
 112   // This constructor will be used by std::map when adding new DeathData values
 113   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
 114   // need to worry about ownership transfer.
 115   DCHECK(other.last_phase_snapshot_ == nullptr);
 116 }
 117
 118 DeathData::~DeathData() {
 119   while (last_phase_snapshot_) {
 120     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
 121     last_phase_snapshot_ = snapshot->prev;
 122     delete snapshot;
 123   }
 124 }
 125
 126 // TODO(jar): I need to see if this macro to optimize branching is worth using.
 127 //
 128 // This macro has no branching, so it is surely fast, and is equivalent to:
 129 //             if (assign_it)
 130 //               target = source;
 131 // We use a macro rather than a template to force this to inline.
 132 // Related code for calculating max is discussed on the web.
 133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
 134     ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it))
 135
 136 void DeathData::RecordDeath(const int32 queue_duration,
 137                             const int32 run_duration,
 138                             const uint32 random_number) {
 139   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
 140   if (count_ < INT_MAX)
 141     ++count_;
 142
 143   int sample_probability_count = sample_probability_count_;
 144   if (sample_probability_count < INT_MAX)
 145     ++sample_probability_count;
 146   sample_probability_count_ = sample_probability_count;
 147
 148   queue_duration_sum_ += queue_duration;
 149   run_duration_sum_ += run_duration;
 150
 151   if (queue_duration_max_ < queue_duration)
 152     queue_duration_max_ = queue_duration;
 153   if (run_duration_max_ < run_duration)
 154     run_duration_max_ = run_duration;
 155
 156   // Take a uniformly distributed sample over all durations ever supplied during
 157   // the current profiling phase.
 158   // The probability that we (instead) use this new sample is
 159   // 1/sample_probability_count_. This results in a completely uniform selection
 160   // of the sample (at least when we don't clamp sample_probability_count_...
 161   // but that should be inconsequentially likely).  We ignore the fact that we
 162   // correlated our selection of a sample to the run and queue times (i.e., we
 163   // used them to generate random_number).
 164   CHECK_GT(sample_probability_count, 0);
 165   if (0 == (random_number % sample_probability_count)) {
 166     queue_duration_sample_ = queue_duration;
 167     run_duration_sample_ = run_duration;
 168   }
 169 }
 170
 171 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
 172   // Snapshotting and storing current state.
 173   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
 174       profiling_phase, count_, run_duration_sum_, run_duration_max_,
 175       run_duration_sample_, queue_duration_sum_, queue_duration_max_,
 176       queue_duration_sample_, last_phase_snapshot_);
 177
 178   // Not touching fields for which a delta can be computed by comparing with a
 179   // snapshot from the previous phase. Resetting other fields.  Sample values
 180   // will be reset upon next death recording because sample_probability_count_
 181   // is set to 0.
 182   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
 183   // is that for incrementable fields, resetting to 0 from the snapshot thread
 184   // potentially in parallel with incrementing in the death thread may result in
 185   // significant data corruption that has a potential to grow with time.  Not
 186   // resetting incrementable fields and using deltas will cause any
 187   // off-by-little corruptions to be likely fixed at the next snapshot.
 188   // The max values are not incrementable, and cannot be deduced using deltas
 189   // for a given phase. Hence, we have to reset them to 0.  But the potential
 190   // damage is limited to getting the previous phase's max to apply for the next
 191   // phase, and the error doesn't have a potential to keep growing with new
 192   // resets.
 193   // sample_probability_count_ is incrementable, but must be reset to 0 at the
 194   // phase end, so that we start a new uniformly randomized sample selection
 195   // after the reset.  Corruptions due to race conditions are possible, but the
 196   // damage is limited to selecting a wrong sample, which is not something that
 197   // can cause accumulating or cascading effects.
 198   // If there were no corruptions caused by race conditions, we never send a
 199   // sample for the previous phase in the next phase's snapshot because
 200   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
 201   sample_probability_count_ = 0;
 202   run_duration_max_ = 0;
 203   queue_duration_max_ = 0;
 204 }
 205
 206 //------------------------------------------------------------------------------
 207 DeathDataSnapshot::DeathDataSnapshot()
 208     : count(-1),
 209       run_duration_sum(-1),
 210       run_duration_max(-1),
 211       run_duration_sample(-1),
 212       queue_duration_sum(-1),
 213       queue_duration_max(-1),
 214       queue_duration_sample(-1) {
 215 }
 216
 217 DeathDataSnapshot::DeathDataSnapshot(int count,
 218                                      int32 run_duration_sum,
 219                                      int32 run_duration_max,
 220                                      int32 run_duration_sample,
 221                                      int32 queue_duration_sum,
 222                                      int32 queue_duration_max,
 223                                      int32 queue_duration_sample)
 224     : count(count),
 225       run_duration_sum(run_duration_sum),
 226       run_duration_max(run_duration_max),
 227       run_duration_sample(run_duration_sample),
 228       queue_duration_sum(queue_duration_sum),
 229       queue_duration_max(queue_duration_max),
 230       queue_duration_sample(queue_duration_sample) {
 231 }
 232
 233 DeathDataSnapshot::~DeathDataSnapshot() {
 234 }
 235
 236 DeathDataSnapshot DeathDataSnapshot::Delta(
 237     const DeathDataSnapshot& older) const {
 238   return DeathDataSnapshot(count - older.count,
 239                            run_duration_sum - older.run_duration_sum,
 240                            run_duration_max, run_duration_sample,
 241                            queue_duration_sum - older.queue_duration_sum,
 242                            queue_duration_max, queue_duration_sample);
 243 }
 244
 245 //------------------------------------------------------------------------------
 246 BirthOnThread::BirthOnThread(const Location& location,
 247                              const ThreadData& current)
 248     : location_(location),
 249       birth_thread_(&current) {
 250 }
 251
 252 //------------------------------------------------------------------------------
 253 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
 254 }
 255
 256 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
 257     : location(birth.location()),
 258       thread_name(birth.birth_thread()->thread_name()) {
 259 }
 260
 261 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
 262 }
 263
 264 //------------------------------------------------------------------------------
 265 Births::Births(const Location& location, const ThreadData& current)
 266     : BirthOnThread(location, current),
 267       birth_count_(1) { }
 268
 269 int Births::birth_count() const { return birth_count_; }
 270
 271 void Births::RecordBirth() { ++birth_count_; }
 272
 273 //------------------------------------------------------------------------------
 274 // ThreadData maintains the central data for all births and deaths on a single
 275 // thread.
 276
 277 // TODO(jar): We should pull all these static vars together, into a struct, and
 278 // optimize layout so that we benefit from locality of reference during accesses
 279 // to them.
 280
 281 // static
 282 NowFunction* ThreadData::now_function_ = NULL;
 283
 284 // static
 285 bool ThreadData::now_function_is_time_ = false;
 286
 287 // A TLS slot which points to the ThreadData instance for the current thread.
 288 // We do a fake initialization here (zeroing out data), and then the real
 289 // in-place construction happens when we call tls_index_.Initialize().
 290 // static
 291 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
 292
 293 // static
 294 int ThreadData::worker_thread_data_creation_count_ = 0;
 295
 296 // static
 297 int ThreadData::cleanup_count_ = 0;
 298
 299 // static
 300 int ThreadData::incarnation_counter_ = 0;
 301
 302 // static
 303 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
 304
 305 // static
 306 ThreadData* ThreadData::first_retired_worker_ = NULL;
 307
 308 // static
 309 base::LazyInstance<base::Lock>::Leaky
 310     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
 311
 312 // static
 313 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
 314
 315 ThreadData::ThreadData(const std::string& suggested_name)
 316     : next_(NULL),
 317       next_retired_worker_(NULL),
 318       worker_thread_number_(0),
 319       incarnation_count_for_pool_(-1),
 320       current_stopwatch_(NULL) {
 321   DCHECK_GE(suggested_name.size(), 0u);
 322   thread_name_ = suggested_name;
 323   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 324 }
 325
 326 ThreadData::ThreadData(int thread_number)
 327     : next_(NULL),
 328       next_retired_worker_(NULL),
 329       worker_thread_number_(thread_number),
 330       incarnation_count_for_pool_(-1),
 331       current_stopwatch_(NULL) {
 332   CHECK_GT(thread_number, 0);
 333   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
 334   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 335 }
 336
 337 ThreadData::~ThreadData() {
 338 }
 339
 340 void ThreadData::PushToHeadOfList() {
 341   // Toss in a hint of randomness (atop the uniniitalized value).
 342   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
 343                                                  sizeof(random_number_));
 344   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
 345   random_number_ += static_cast<uint32>(this - static_cast<ThreadData*>(0));
 346   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
 347
 348   DCHECK(!next_);
 349   base::AutoLock lock(*list_lock_.Pointer());
 350   incarnation_count_for_pool_ = incarnation_counter_;
 351   next_ = all_thread_data_list_head_;
 352   all_thread_data_list_head_ = this;
 353 }
 354
 355 // static
 356 ThreadData* ThreadData::first() {
 357   base::AutoLock lock(*list_lock_.Pointer());
 358   return all_thread_data_list_head_;
 359 }
 360
 361 ThreadData* ThreadData::next() const { return next_; }
 362
 363 // static
 364 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
 365   Initialize();
 366   ThreadData* current_thread_data =
 367       reinterpret_cast<ThreadData*>(tls_index_.Get());
 368   if (current_thread_data)
 369     return;  // Browser tests instigate this.
 370   current_thread_data = new ThreadData(suggested_name);
 371   tls_index_.Set(current_thread_data);
 372 }
 373
 374 // static
 375 ThreadData* ThreadData::Get() {
 376   if (!tls_index_.initialized())
 377     return NULL;  // For unittests only.
 378   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
 379   if (registered)
 380     return registered;
 381
 382   // We must be a worker thread, since we didn't pre-register.
 383   ThreadData* worker_thread_data = NULL;
 384   int worker_thread_number = 0;
 385   {
 386     base::AutoLock lock(*list_lock_.Pointer());
 387     if (first_retired_worker_) {
 388       worker_thread_data = first_retired_worker_;
 389       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
 390       worker_thread_data->next_retired_worker_ = NULL;
 391     } else {
 392       worker_thread_number = ++worker_thread_data_creation_count_;
 393     }
 394   }
 395
 396   // If we can't find a previously used instance, then we have to create one.
 397   if (!worker_thread_data) {
 398     DCHECK_GT(worker_thread_number, 0);
 399     worker_thread_data = new ThreadData(worker_thread_number);
 400   }
 401   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
 402
 403   tls_index_.Set(worker_thread_data);
 404   return worker_thread_data;
 405 }
 406
 407 // static
 408 void ThreadData::OnThreadTermination(void* thread_data) {
 409   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
 410   // We must NOT do any allocations during this callback.  There is a chance
 411   // that the allocator is no longer active on this thread.
 412   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
 413 }
 414
 415 void ThreadData::OnThreadTerminationCleanup() {
 416   // The list_lock_ was created when we registered the callback, so it won't be
 417   // allocated here despite the lazy reference.
 418   base::AutoLock lock(*list_lock_.Pointer());
 419   if (incarnation_counter_ != incarnation_count_for_pool_)
 420     return;  // ThreadData was constructed in an earlier unit test.
 421   ++cleanup_count_;
 422   // Only worker threads need to be retired and reused.
 423   if (!worker_thread_number_) {
 424     return;
 425   }
 426   // We must NOT do any allocations during this callback.
 427   // Using the simple linked lists avoids all allocations.
 428   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
 429   this->next_retired_worker_ = first_retired_worker_;
 430   first_retired_worker_ = this;
 431 }
 432
 433 // static
 434 void ThreadData::Snapshot(int current_profiling_phase,
 435                           ProcessDataSnapshot* process_data_snapshot) {
 436   // Get an unchanging copy of a ThreadData list.
 437   ThreadData* my_list = ThreadData::first();
 438
 439   // Gather data serially.
 440   // This hackish approach *can* get some slightly corrupt tallies, as we are
 441   // grabbing values without the protection of a lock, but it has the advantage
 442   // of working even with threads that don't have message loops.  If a user
 443   // sees any strangeness, they can always just run their stats gathering a
 444   // second time.
 445   BirthCountMap birth_counts;
 446   for (ThreadData* thread_data = my_list; thread_data;
 447        thread_data = thread_data->next()) {
 448     thread_data->SnapshotExecutedTasks(current_profiling_phase,
 449                                        &process_data_snapshot->phased_snapshots,
 450                                        &birth_counts);
 451   }
 452
 453   // Add births that are still active -- i.e. objects that have tallied a birth,
 454   // but have not yet tallied a matching death, and hence must be either
 455   // running, queued up, or being held in limbo for future posting.
 456   auto* current_phase_tasks =
 457       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
 458   for (const auto& birth_count : birth_counts) {
 459     if (birth_count.second > 0) {
 460       current_phase_tasks->push_back(
 461           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
 462                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
 463                        "Still_Alive"));
 464     }
 465   }
 466 }
 467
 468 // static
 469 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
 470   // Get an unchanging copy of a ThreadData list.
 471   ThreadData* my_list = ThreadData::first();
 472
 473   // Add snapshots for all instances of death data in all threads serially.
 474   // This hackish approach *can* get some slightly corrupt tallies, as we are
 475   // grabbing values without the protection of a lock, but it has the advantage
 476   // of working even with threads that don't have message loops.  Any corruption
 477   // shouldn't cause "cascading damage" to anything else (in later phases).
 478   for (ThreadData* thread_data = my_list; thread_data;
 479        thread_data = thread_data->next()) {
 480     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
 481   }
 482 }
 483
 484 Births* ThreadData::TallyABirth(const Location& location) {
 485   BirthMap::iterator it = birth_map_.find(location);
 486   Births* child;
 487   if (it != birth_map_.end()) {
 488     child =  it->second;
 489     child->RecordBirth();
 490   } else {
 491     child = new Births(location, *this);  // Leak this.
 492     // Lock since the map may get relocated now, and other threads sometimes
 493     // snapshot it (but they lock before copying it).
 494     base::AutoLock lock(map_lock_);
 495     birth_map_[location] = child;
 496   }
 497
 498   return child;
 499 }
 500
 501 void ThreadData::TallyADeath(const Births& births,
 502                              int32 queue_duration,
 503                              const TaskStopwatch& stopwatch) {
 504   int32 run_duration = stopwatch.RunDurationMs();
 505
 506   // Stir in some randomness, plus add constant in case durations are zero.
 507   const uint32 kSomePrimeNumber = 2147483647;
 508   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
 509   // An address is going to have some randomness to it as well ;-).
 510   random_number_ ^= static_cast<uint32>(&births - reinterpret_cast<Births*>(0));
 511
 512   // We don't have queue durations without OS timer.  OS timer is automatically
 513   // used for task-post-timing, so the use of an alternate timer implies all
 514   // queue times are invalid, unless it was explicitly said that we can trust
 515   // the alternate timer.
 516   if (kAllowAlternateTimeSourceHandling &&
 517       now_function_ &&
 518       !now_function_is_time_) {
 519     queue_duration = 0;
 520   }
 521
 522   DeathMap::iterator it = death_map_.find(&births);
 523   DeathData* death_data;
 524   if (it != death_map_.end()) {
 525     death_data = &it->second;
 526   } else {
 527     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
 528     death_data = &death_map_[&births];
 529   }  // Release lock ASAP.
 530   death_data->RecordDeath(queue_duration, run_duration, random_number_);
 531 }
 532
 533 // static
 534 Births* ThreadData::TallyABirthIfActive(const Location& location) {
 535   if (!TrackingStatus())
 536     return NULL;
 537   ThreadData* current_thread_data = Get();
 538   if (!current_thread_data)
 539     return NULL;
 540   return current_thread_data->TallyABirth(location);
 541 }
 542
 543 // static
 544 void ThreadData::TallyRunOnNamedThreadIfTracking(
 545     const base::TrackingInfo& completed_task,
 546     const TaskStopwatch& stopwatch) {
 547   // Even if we have been DEACTIVATED, we will process any pending births so
 548   // that our data structures (which counted the outstanding births) remain
 549   // consistent.
 550   const Births* births = completed_task.birth_tally;
 551   if (!births)
 552     return;
 553   ThreadData* current_thread_data = stopwatch.GetThreadData();
 554   if (!current_thread_data)
 555     return;
 556
 557   // Watch out for a race where status_ is changing, and hence one or both
 558   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
 559   // get a time value since we "weren't tracking" and we were trying to be
 560   // efficient by not calling for a genuine time value.  For simplicity, we'll
 561   // use a default zero duration when we can't calculate a true value.
 562   TrackedTime start_of_run = stopwatch.StartTime();
 563   int32 queue_duration = 0;
 564   if (!start_of_run.is_null()) {
 565     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
 566         .InMilliseconds();
 567   }
 568   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 569 }
 570
 571 // static
 572 void ThreadData::TallyRunOnWorkerThreadIfTracking(
 573     const Births* births,
 574     const TrackedTime& time_posted,
 575     const TaskStopwatch& stopwatch) {
 576   // Even if we have been DEACTIVATED, we will process any pending births so
 577   // that our data structures (which counted the outstanding births) remain
 578   // consistent.
 579   if (!births)
 580     return;
 581
 582   // TODO(jar): Support the option to coalesce all worker-thread activity under
 583   // one ThreadData instance that uses locks to protect *all* access.  This will
 584   // reduce memory (making it provably bounded), but run incrementally slower
 585   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
 586   // that the locks on TallyADeath will be *after* the worker thread has run,
 587   // and hence nothing will be waiting for the completion (...  besides some
 588   // other thread that might like to run).  Also, the worker threads tasks are
 589   // generally longer, and hence the cost of the lock may perchance be amortized
 590   // over the long task's lifetime.
 591   ThreadData* current_thread_data = stopwatch.GetThreadData();
 592   if (!current_thread_data)
 593     return;
 594
 595   TrackedTime start_of_run = stopwatch.StartTime();
 596   int32 queue_duration = 0;
 597   if (!start_of_run.is_null()) {
 598     queue_duration = (start_of_run - time_posted).InMilliseconds();
 599   }
 600   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 601 }
 602
 603 // static
 604 void ThreadData::TallyRunInAScopedRegionIfTracking(
 605     const Births* births,
 606     const TaskStopwatch& stopwatch) {
 607   // Even if we have been DEACTIVATED, we will process any pending births so
 608   // that our data structures (which counted the outstanding births) remain
 609   // consistent.
 610   if (!births)
 611     return;
 612
 613   ThreadData* current_thread_data = stopwatch.GetThreadData();
 614   if (!current_thread_data)
 615     return;
 616
 617   int32 queue_duration = 0;
 618   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 619 }
 620
 621 void ThreadData::SnapshotExecutedTasks(
 622     int current_profiling_phase,
 623     PhasedProcessDataSnapshotMap* phased_snapshots,
 624     BirthCountMap* birth_counts) {
 625   // Get copy of data, so that the data will not change during the iterations
 626   // and processing.
 627   BirthMap birth_map;
 628   DeathsSnapshot deaths;
 629   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
 630
 631   for (const auto& birth : birth_map) {
 632     (*birth_counts)[birth.second] += birth.second->birth_count();
 633   }
 634
 635   for (const auto& death : deaths) {
 636     (*birth_counts)[death.first] -= death.first->birth_count();
 637
 638     // For the current death data, walk through all its snapshots, starting from
 639     // the current one, then from the previous profiling phase etc., and for
 640     // each snapshot calculate the delta between the snapshot and the previous
 641     // phase, if any.  Store the deltas in the result.
 642     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
 643          phase = phase->prev) {
 644       const DeathDataSnapshot& death_data =
 645           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
 646                       : phase->death_data;
 647
 648       if (death_data.count > 0) {
 649         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
 650             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
 651                          thread_name()));
 652       }
 653     }
 654   }
 655 }
 656
 657 // This may be called from another thread.
 658 void ThreadData::SnapshotMaps(int profiling_phase,
 659                               BirthMap* birth_map,
 660                               DeathsSnapshot* deaths) {
 661   base::AutoLock lock(map_lock_);
 662
 663   for (const auto& birth : birth_map_)
 664     (*birth_map)[birth.first] = birth.second;
 665
 666   for (const auto& death : death_map_) {
 667     deaths->push_back(std::make_pair(
 668         death.first,
 669         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
 670                                death.second.run_duration_sum(),
 671                                death.second.run_duration_max(),
 672                                death.second.run_duration_sample(),
 673                                death.second.queue_duration_sum(),
 674                                death.second.queue_duration_max(),
 675                                death.second.queue_duration_sample(),
 676                                death.second.last_phase_snapshot())));
 677   }
 678 }
 679
 680 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
 681   base::AutoLock lock(map_lock_);
 682
 683   for (auto& death : death_map_) {
 684     death.second.OnProfilingPhaseCompleted(profiling_phase);
 685   }
 686 }
 687
 688 static void OptionallyInitializeAlternateTimer() {
 689   NowFunction* alternate_time_source = GetAlternateTimeSource();
 690   if (alternate_time_source)
 691     ThreadData::SetAlternateTimeSource(alternate_time_source);
 692 }
 693
 694 void ThreadData::Initialize() {
 695   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
 696     return;  // Someone else did the initialization.
 697   // Due to racy lazy initialization in tests, we'll need to recheck status_
 698   // after we acquire the lock.
 699
 700   // Ensure that we don't double initialize tls.  We are called when single
 701   // threaded in the product, but some tests may be racy and lazy about our
 702   // initialization.
 703   base::AutoLock lock(*list_lock_.Pointer());
 704   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
 705     return;  // Someone raced in here and beat us.
 706
 707   // Put an alternate timer in place if the environment calls for it, such as
 708   // for tracking TCMalloc allocations.  This insertion is idempotent, so we
 709   // don't mind if there is a race, and we'd prefer not to be in a lock while
 710   // doing this work.
 711   if (kAllowAlternateTimeSourceHandling)
 712     OptionallyInitializeAlternateTimer();
 713
 714   // Perform the "real" TLS initialization now, and leave it intact through
 715   // process termination.
 716   if (!tls_index_.initialized()) {  // Testing may have initialized this.
 717     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
 718     tls_index_.Initialize(&ThreadData::OnThreadTermination);
 719     DCHECK(tls_index_.initialized());
 720   } else {
 721     // TLS was initialzed for us earlier.
 722     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
 723   }
 724
 725   // Incarnation counter is only significant to testing, as it otherwise will
 726   // never again change in this process.
 727   ++incarnation_counter_;
 728
 729   // The lock is not critical for setting status_, but it doesn't hurt.  It also
 730   // ensures that if we have a racy initialization, that we'll bail as soon as
 731   // we get the lock earlier in this method.
 732   base::subtle::Release_Store(&status_, kInitialStartupState);
 733   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
 734 }
 735
 736 // static
 737 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
 738   DCHECK_GE(status, DEACTIVATED);
 739   DCHECK_LE(status, PROFILING_ACTIVE);
 740
 741   Initialize();  // No-op if already initialized.
 742
 743   if (status > DEACTIVATED)
 744     status = PROFILING_ACTIVE;
 745   base::subtle::Release_Store(&status_, status);
 746 }
 747
 748 // static
 749 ThreadData::Status ThreadData::status() {
 750   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
 751 }
 752
 753 // static
 754 bool ThreadData::TrackingStatus() {
 755   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
 756 }
 757
 758 // static
 759 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
 760   DCHECK(now_function);
 761   if (kAllowAlternateTimeSourceHandling)
 762     now_function_ = now_function;
 763 }
 764
 765 // static
 766 void ThreadData::EnableProfilerTiming() {
 767   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
 768 }
 769
 770 // static
 771 TrackedTime ThreadData::Now() {
 772   if (kAllowAlternateTimeSourceHandling && now_function_)
 773     return TrackedTime::FromMilliseconds((*now_function_)());
 774   if (IsProfilerTimingEnabled() && TrackingStatus())
 775     return TrackedTime::Now();
 776   return TrackedTime();  // Super fast when disabled, or not compiled.
 777 }
 778
 779 // static
 780 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
 781   base::AutoLock lock(*list_lock_.Pointer());
 782   if (worker_thread_data_creation_count_ == 0)
 783     return;  // We haven't really run much, and couldn't have leaked.
 784
 785   // TODO(jar): until this is working on XP, don't run the real test.
 786 #if 0
 787   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
 788   // caller should tell us how many thread shutdowns should have taken place by
 789   // now.
 790   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
 791 #endif
 792 }
 793
 794 // static
 795 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
 796   // This is only called from test code, where we need to cleanup so that
 797   // additional tests can be run.
 798   // We must be single threaded... but be careful anyway.
 799   InitializeAndSetTrackingStatus(DEACTIVATED);
 800
 801   ThreadData* thread_data_list;
 802   {
 803     base::AutoLock lock(*list_lock_.Pointer());
 804     thread_data_list = all_thread_data_list_head_;
 805     all_thread_data_list_head_ = NULL;
 806     ++incarnation_counter_;
 807     // To be clean, break apart the retired worker list (though we leak them).
 808     while (first_retired_worker_) {
 809       ThreadData* worker = first_retired_worker_;
 810       CHECK_GT(worker->worker_thread_number_, 0);
 811       first_retired_worker_ = worker->next_retired_worker_;
 812       worker->next_retired_worker_ = NULL;
 813     }
 814   }
 815
 816   // Put most global static back in pristine shape.
 817   worker_thread_data_creation_count_ = 0;
 818   cleanup_count_ = 0;
 819   tls_index_.Set(NULL);
 820   // Almost UNINITIALIZED.
 821   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
 822
 823   // To avoid any chance of racing in unit tests, which is the only place we
 824   // call this function, we may sometimes leak all the data structures we
 825   // recovered, as they may still be in use on threads from prior tests!
 826   if (leak) {
 827     ThreadData* thread_data = thread_data_list;
 828     while (thread_data) {
 829       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
 830       thread_data = thread_data->next();
 831     }
 832     return;
 833   }
 834
 835   // When we want to cleanup (on a single thread), here is what we do.
 836
 837   // Do actual recursive delete in all ThreadData instances.
 838   while (thread_data_list) {
 839     ThreadData* next_thread_data = thread_data_list;
 840     thread_data_list = thread_data_list->next();
 841
 842     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
 843          next_thread_data->birth_map_.end() != it; ++it)
 844       delete it->second;  // Delete the Birth Records.
 845     delete next_thread_data;  // Includes all Death Records.
 846   }
 847 }
 848
 849 //------------------------------------------------------------------------------
 850 TaskStopwatch::TaskStopwatch()
 851     : wallclock_duration_ms_(0),
 852       current_thread_data_(NULL),
 853       excluded_duration_ms_(0),
 854       parent_(NULL) {
 855 #if DCHECK_IS_ON()
 856   state_ = CREATED;
 857   child_ = NULL;
 858 #endif
 859 }
 860
 861 TaskStopwatch::~TaskStopwatch() {
 862 #if DCHECK_IS_ON()
 863   DCHECK(state_ != RUNNING);
 864   DCHECK(child_ == NULL);
 865 #endif
 866 }
 867
 868 void TaskStopwatch::Start() {
 869 #if DCHECK_IS_ON()
 870   DCHECK(state_ == CREATED);
 871   state_ = RUNNING;
 872 #endif
 873
 874   start_time_ = ThreadData::Now();
 875
 876   current_thread_data_ = ThreadData::Get();
 877   if (!current_thread_data_)
 878     return;
 879
 880   parent_ = current_thread_data_->current_stopwatch_;
 881 #if DCHECK_IS_ON()
 882   if (parent_) {
 883     DCHECK(parent_->state_ == RUNNING);
 884     DCHECK(parent_->child_ == NULL);
 885     parent_->child_ = this;
 886   }
 887 #endif
 888   current_thread_data_->current_stopwatch_ = this;
 889 }
 890
 891 void TaskStopwatch::Stop() {
 892   const TrackedTime end_time = ThreadData::Now();
 893 #if DCHECK_IS_ON()
 894   DCHECK(state_ == RUNNING);
 895   state_ = STOPPED;
 896   DCHECK(child_ == NULL);
 897 #endif
 898
 899   if (!start_time_.is_null() && !end_time.is_null()) {
 900     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
 901   }
 902
 903   if (!current_thread_data_)
 904     return;
 905
 906   DCHECK(current_thread_data_->current_stopwatch_ == this);
 907   current_thread_data_->current_stopwatch_ = parent_;
 908   if (!parent_)
 909     return;
 910
 911 #if DCHECK_IS_ON()
 912   DCHECK(parent_->state_ == RUNNING);
 913   DCHECK(parent_->child_ == this);
 914   parent_->child_ = NULL;
 915 #endif
 916   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
 917   parent_ = NULL;
 918 }
 919
 920 TrackedTime TaskStopwatch::StartTime() const {
 921 #if DCHECK_IS_ON()
 922   DCHECK(state_ != CREATED);
 923 #endif
 924
 925   return start_time_;
 926 }
 927
 928 int32 TaskStopwatch::RunDurationMs() const {
 929 #if DCHECK_IS_ON()
 930   DCHECK(state_ == STOPPED);
 931 #endif
 932
 933   return wallclock_duration_ms_ - excluded_duration_ms_;
 934 }
 935
 936 ThreadData* TaskStopwatch::GetThreadData() const {
 937 #if DCHECK_IS_ON()
 938   DCHECK(state_ != CREATED);
 939 #endif
 940
 941   return current_thread_data_;
 942 }
 943
 944 //------------------------------------------------------------------------------
 945 // DeathDataPhaseSnapshot
 946
 947 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
 948     int profiling_phase,
 949     int count,
 950     int32 run_duration_sum,
 951     int32 run_duration_max,
 952     int32 run_duration_sample,
 953     int32 queue_duration_sum,
 954     int32 queue_duration_max,
 955     int32 queue_duration_sample,
 956     const DeathDataPhaseSnapshot* prev)
 957     : profiling_phase(profiling_phase),
 958       death_data(count,
 959                  run_duration_sum,
 960                  run_duration_max,
 961                  run_duration_sample,
 962                  queue_duration_sum,
 963                  queue_duration_max,
 964                  queue_duration_sample),
 965       prev(prev) {
 966 }
 967
 968 //------------------------------------------------------------------------------
 969 // TaskSnapshot
 970
 971 TaskSnapshot::TaskSnapshot() {
 972 }
 973
 974 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
 975                            const DeathDataSnapshot& death_data,
 976                            const std::string& death_thread_name)
 977     : birth(birth),
 978       death_data(death_data),
 979       death_thread_name(death_thread_name) {
 980 }
 981
 982 TaskSnapshot::~TaskSnapshot() {
 983 }
 984
 985 //------------------------------------------------------------------------------
 986 // ProcessDataPhaseSnapshot
 987
 988 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
 989 }
 990
 991 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
 992 }
 993
 994 //------------------------------------------------------------------------------
 995 // ProcessDataPhaseSnapshot
 996
 997 ProcessDataSnapshot::ProcessDataSnapshot()
 998 #if !defined(OS_NACL)
 999     : process_id(base::GetCurrentProcId()) {
1000 #else
1001     : process_id(base::kNullProcessId) {
1002 #endif
1003 }
1004
1005 ProcessDataSnapshot::~ProcessDataSnapshot() {
1006 }
1007
1008 }  // namespace tracked_objects