base/tracked_objects.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/tracked_objects.h"
   6
   7 #include <limits.h>
   8 #include <stdlib.h>
   9
  10 #include "base/atomicops.h"
  11 #include "base/base_switches.h"
  12 #include "base/command_line.h"
  13 #include "base/compiler_specific.h"
  14 #include "base/debug/leak_annotations.h"
  15 #include "base/logging.h"
  16 #include "base/process/process_handle.h"
  17 #include "base/profiler/alternate_timer.h"
  18 #include "base/strings/stringprintf.h"
  19 #include "base/third_party/valgrind/memcheck.h"
  20 #include "base/tracking_info.h"
  21
  22 using base::TimeDelta;
  23
  24 namespace base {
  25 class TimeDelta;
  26 }
  27
  28 namespace tracked_objects {
  29
  30 namespace {
  31 // TODO(jar): Evaluate the perf impact of enabling this.  If the perf impact is
  32 // negligible, enable by default.
  33 // Flag to compile out parent-child link recording.
  34 const bool kTrackParentChildLinks = false;
  35
  36 // When ThreadData is first initialized, should we start in an ACTIVE state to
  37 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
  38 // that we only record after parsing the command line flag --enable-tracking.
  39 // Note that the flag may force either state, so this really controls only the
  40 // period of time up until that flag is parsed.  If there is no flag seen, then
  41 // this state may prevail for much or all of the process lifetime.
  42 const ThreadData::Status kInitialStartupState =
  43     ThreadData::PROFILING_CHILDREN_ACTIVE;
  44
  45 // Control whether an alternate time source (Now() function) is supported by
  46 // the ThreadData class.  This compile time flag should be set to true if we
  47 // want other modules (such as a memory allocator, or a thread-specific CPU time
  48 // clock) to be able to provide a thread-specific Now() function.  Without this
  49 // compile-time flag, the code will only support the wall-clock time.  This flag
  50 // can be flipped to efficiently disable this path (if there is a performance
  51 // problem with its presence).
  52 static const bool kAllowAlternateTimeSourceHandling = true;
  53
  54 // Possible states of the profiler timing enabledness.
  55 enum {
  56   UNDEFINED_TIMING,
  57   ENABLED_TIMING,
  58   DISABLED_TIMING,
  59 };
  60
  61 // State of the profiler timing enabledness.
  62 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
  63
  64 // Returns whether profiler timing is enabled.  The default is true, but this
  65 // may be overridden by a command-line flag.  Some platforms may
  66 // programmatically set this command-line flag to the "off" value if it's not
  67 // specified.
  68 // This in turn can be overridden by explicitly calling
  69 // ThreadData::EnableProfilerTiming, say, based on a field trial.
  70 inline bool IsProfilerTimingEnabled() {
  71   // Reading |g_profiler_timing_enabled| is done without barrier because
  72   // multiple initialization is not an issue while the barrier can be relatively
  73   // costly given that this method is sometimes called in a tight loop.
  74   base::subtle::Atomic32 current_timing_enabled =
  75       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
  76   if (current_timing_enabled == UNDEFINED_TIMING) {
  77     if (!base::CommandLine::InitializedForCurrentProcess())
  78       return true;
  79     current_timing_enabled =
  80         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
  81              switches::kProfilerTiming) ==
  82          switches::kProfilerTimingDisabledValue)
  83             ? DISABLED_TIMING
  84             : ENABLED_TIMING;
  85     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
  86                                   current_timing_enabled);
  87   }
  88   return current_timing_enabled == ENABLED_TIMING;
  89 }
  90
  91 }  // namespace
  92
  93 //------------------------------------------------------------------------------
  94 // DeathData tallies durations when a death takes place.
  95
  96 DeathData::DeathData()
  97     : count_(0),
  98       sample_probability_count_(0),
  99       run_duration_sum_(0),
 100       queue_duration_sum_(0),
 101       run_duration_max_(0),
 102       queue_duration_max_(0),
 103       run_duration_sample_(0),
 104       queue_duration_sample_(0),
 105       last_phase_snapshot_(nullptr) {
 106 }
 107
 108 DeathData::DeathData(const DeathData& other)
 109     : count_(other.count_),
 110       sample_probability_count_(other.sample_probability_count_),
 111       run_duration_sum_(other.run_duration_sum_),
 112       queue_duration_sum_(other.queue_duration_sum_),
 113       run_duration_max_(other.run_duration_max_),
 114       queue_duration_max_(other.queue_duration_max_),
 115       run_duration_sample_(other.run_duration_sample_),
 116       queue_duration_sample_(other.queue_duration_sample_),
 117       last_phase_snapshot_(nullptr) {
 118   // This constructor will be used by std::map when adding new DeathData values
 119   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
 120   // need to worry about ownership transfer.
 121   DCHECK(other.last_phase_snapshot_ == nullptr);
 122 }
 123
 124 DeathData::~DeathData() {
 125   while (last_phase_snapshot_) {
 126     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
 127     last_phase_snapshot_ = snapshot->prev;
 128     delete snapshot;
 129   }
 130 }
 131
 132 // TODO(jar): I need to see if this macro to optimize branching is worth using.
 133 //
 134 // This macro has no branching, so it is surely fast, and is equivalent to:
 135 //             if (assign_it)
 136 //               target = source;
 137 // We use a macro rather than a template to force this to inline.
 138 // Related code for calculating max is discussed on the web.
 139 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
 140     ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it))
 141
 142 void DeathData::RecordDeath(const int32 queue_duration,
 143                             const int32 run_duration,
 144                             const uint32 random_number) {
 145   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
 146   if (count_ < INT_MAX)
 147     ++count_;
 148
 149   int sample_probability_count = sample_probability_count_;
 150   if (sample_probability_count < INT_MAX)
 151     ++sample_probability_count;
 152   sample_probability_count_ = sample_probability_count;
 153
 154   queue_duration_sum_ += queue_duration;
 155   run_duration_sum_ += run_duration;
 156
 157   if (queue_duration_max_ < queue_duration)
 158     queue_duration_max_ = queue_duration;
 159   if (run_duration_max_ < run_duration)
 160     run_duration_max_ = run_duration;
 161
 162   // Take a uniformly distributed sample over all durations ever supplied during
 163   // the current profiling phase.
 164   // The probability that we (instead) use this new sample is
 165   // 1/sample_probability_count_. This results in a completely uniform selection
 166   // of the sample (at least when we don't clamp sample_probability_count_...
 167   // but that should be inconsequentially likely).  We ignore the fact that we
 168   // correlated our selection of a sample to the run and queue times (i.e., we
 169   // used them to generate random_number).
 170   CHECK_GT(sample_probability_count, 0);
 171   if (0 == (random_number % sample_probability_count)) {
 172     queue_duration_sample_ = queue_duration;
 173     run_duration_sample_ = run_duration;
 174   }
 175 }
 176
 177 int DeathData::count() const { return count_; }
 178
 179 int32 DeathData::run_duration_sum() const { return run_duration_sum_; }
 180
 181 int32 DeathData::run_duration_max() const { return run_duration_max_; }
 182
 183 int32 DeathData::run_duration_sample() const {
 184   return run_duration_sample_;
 185 }
 186
 187 int32 DeathData::queue_duration_sum() const {
 188   return queue_duration_sum_;
 189 }
 190
 191 int32 DeathData::queue_duration_max() const {
 192   return queue_duration_max_;
 193 }
 194
 195 int32 DeathData::queue_duration_sample() const {
 196   return queue_duration_sample_;
 197 }
 198
 199 const DeathDataPhaseSnapshot* DeathData::last_phase_snapshot() const {
 200   return last_phase_snapshot_;
 201 }
 202
 203 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
 204   // Snapshotting and storing current state.
 205   last_phase_snapshot_ = new DeathDataPhaseSnapshot(
 206       profiling_phase, count_, run_duration_sum_, run_duration_max_,
 207       run_duration_sample_, queue_duration_sum_, queue_duration_max_,
 208       queue_duration_sample_, last_phase_snapshot_);
 209
 210   // Not touching fields for which a delta can be computed by comparing with a
 211   // snapshot from the previous phase. Resetting other fields.  Sample values
 212   // will be reset upon next death recording because sample_probability_count_
 213   // is set to 0.
 214   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
 215   // is that for incrementable fields, resetting to 0 from the snapshot thread
 216   // potentially in parallel with incrementing in the death thread may result in
 217   // significant data corruption that has a potential to grow with time.  Not
 218   // resetting incrementable fields and using deltas will cause any
 219   // off-by-little corruptions to be likely fixed at the next snapshot.
 220   // The max values are not incrementable, and cannot be deduced using deltas
 221   // for a given phase. Hence, we have to reset them to 0.  But the potential
 222   // damage is limited to getting the previous phase's max to apply for the next
 223   // phase, and the error doesn't have a potential to keep growing with new
 224   // resets.
 225   // sample_probability_count_ is incrementable, but must be reset to 0 at the
 226   // phase end, so that we start a new uniformly randomized sample selection
 227   // after the reset.  Corruptions due to race conditions are possible, but the
 228   // damage is limited to selecting a wrong sample, which is not something that
 229   // can cause accumulating or cascading effects.
 230   // If there were no corruptions caused by race conditions, we never send a
 231   // sample for the previous phase in the next phase's snapshot because
 232   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
 233   sample_probability_count_ = 0;
 234   run_duration_max_ = 0;
 235   queue_duration_max_ = 0;
 236 }
 237
 238 //------------------------------------------------------------------------------
 239 DeathDataSnapshot::DeathDataSnapshot()
 240     : count(-1),
 241       run_duration_sum(-1),
 242       run_duration_max(-1),
 243       run_duration_sample(-1),
 244       queue_duration_sum(-1),
 245       queue_duration_max(-1),
 246       queue_duration_sample(-1) {
 247 }
 248
 249 DeathDataSnapshot::DeathDataSnapshot(int count,
 250                                      int32 run_duration_sum,
 251                                      int32 run_duration_max,
 252                                      int32 run_duration_sample,
 253                                      int32 queue_duration_sum,
 254                                      int32 queue_duration_max,
 255                                      int32 queue_duration_sample)
 256     : count(count),
 257       run_duration_sum(run_duration_sum),
 258       run_duration_max(run_duration_max),
 259       run_duration_sample(run_duration_sample),
 260       queue_duration_sum(queue_duration_sum),
 261       queue_duration_max(queue_duration_max),
 262       queue_duration_sample(queue_duration_sample) {
 263 }
 264
 265 DeathDataSnapshot::~DeathDataSnapshot() {
 266 }
 267
 268 DeathDataSnapshot DeathDataSnapshot::Delta(
 269     const DeathDataSnapshot& older) const {
 270   return DeathDataSnapshot(count - older.count,
 271                            run_duration_sum - older.run_duration_sum,
 272                            run_duration_max, run_duration_sample,
 273                            queue_duration_sum - older.queue_duration_sum,
 274                            queue_duration_max, queue_duration_sample);
 275 }
 276
 277 //------------------------------------------------------------------------------
 278 BirthOnThread::BirthOnThread(const Location& location,
 279                              const ThreadData& current)
 280     : location_(location),
 281       birth_thread_(&current) {
 282 }
 283
 284 //------------------------------------------------------------------------------
 285 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
 286 }
 287
 288 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
 289     : location(birth.location()),
 290       thread_name(birth.birth_thread()->thread_name()) {
 291 }
 292
 293 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
 294 }
 295
 296 //------------------------------------------------------------------------------
 297 Births::Births(const Location& location, const ThreadData& current)
 298     : BirthOnThread(location, current),
 299       birth_count_(1) { }
 300
 301 int Births::birth_count() const { return birth_count_; }
 302
 303 void Births::RecordBirth() { ++birth_count_; }
 304
 305 //------------------------------------------------------------------------------
 306 // ThreadData maintains the central data for all births and deaths on a single
 307 // thread.
 308
 309 // TODO(jar): We should pull all these static vars together, into a struct, and
 310 // optimize layout so that we benefit from locality of reference during accesses
 311 // to them.
 312
 313 // static
 314 NowFunction* ThreadData::now_function_ = NULL;
 315
 316 // static
 317 bool ThreadData::now_function_is_time_ = false;
 318
 319 // A TLS slot which points to the ThreadData instance for the current thread.
 320 // We do a fake initialization here (zeroing out data), and then the real
 321 // in-place construction happens when we call tls_index_.Initialize().
 322 // static
 323 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
 324
 325 // static
 326 int ThreadData::worker_thread_data_creation_count_ = 0;
 327
 328 // static
 329 int ThreadData::cleanup_count_ = 0;
 330
 331 // static
 332 int ThreadData::incarnation_counter_ = 0;
 333
 334 // static
 335 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
 336
 337 // static
 338 ThreadData* ThreadData::first_retired_worker_ = NULL;
 339
 340 // static
 341 base::LazyInstance<base::Lock>::Leaky
 342     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
 343
 344 // static
 345 ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED;
 346
 347 ThreadData::ThreadData(const std::string& suggested_name)
 348     : next_(NULL),
 349       next_retired_worker_(NULL),
 350       worker_thread_number_(0),
 351       incarnation_count_for_pool_(-1),
 352       current_stopwatch_(NULL) {
 353   DCHECK_GE(suggested_name.size(), 0u);
 354   thread_name_ = suggested_name;
 355   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 356 }
 357
 358 ThreadData::ThreadData(int thread_number)
 359     : next_(NULL),
 360       next_retired_worker_(NULL),
 361       worker_thread_number_(thread_number),
 362       incarnation_count_for_pool_(-1),
 363       current_stopwatch_(NULL) {
 364   CHECK_GT(thread_number, 0);
 365   base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
 366   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
 367 }
 368
 369 ThreadData::~ThreadData() {
 370 }
 371
 372 void ThreadData::PushToHeadOfList() {
 373   // Toss in a hint of randomness (atop the uniniitalized value).
 374   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
 375                                                  sizeof(random_number_));
 376   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
 377   random_number_ += static_cast<uint32>(this - static_cast<ThreadData*>(0));
 378   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
 379
 380   DCHECK(!next_);
 381   base::AutoLock lock(*list_lock_.Pointer());
 382   incarnation_count_for_pool_ = incarnation_counter_;
 383   next_ = all_thread_data_list_head_;
 384   all_thread_data_list_head_ = this;
 385 }
 386
 387 // static
 388 ThreadData* ThreadData::first() {
 389   base::AutoLock lock(*list_lock_.Pointer());
 390   return all_thread_data_list_head_;
 391 }
 392
 393 ThreadData* ThreadData::next() const { return next_; }
 394
 395 // static
 396 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
 397   if (!Initialize())  // Always initialize if needed.
 398     return;
 399   ThreadData* current_thread_data =
 400       reinterpret_cast<ThreadData*>(tls_index_.Get());
 401   if (current_thread_data)
 402     return;  // Browser tests instigate this.
 403   current_thread_data = new ThreadData(suggested_name);
 404   tls_index_.Set(current_thread_data);
 405 }
 406
 407 // static
 408 ThreadData* ThreadData::Get() {
 409   if (!tls_index_.initialized())
 410     return NULL;  // For unittests only.
 411   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
 412   if (registered)
 413     return registered;
 414
 415   // We must be a worker thread, since we didn't pre-register.
 416   ThreadData* worker_thread_data = NULL;
 417   int worker_thread_number = 0;
 418   {
 419     base::AutoLock lock(*list_lock_.Pointer());
 420     if (first_retired_worker_) {
 421       worker_thread_data = first_retired_worker_;
 422       first_retired_worker_ = first_retired_worker_->next_retired_worker_;
 423       worker_thread_data->next_retired_worker_ = NULL;
 424     } else {
 425       worker_thread_number = ++worker_thread_data_creation_count_;
 426     }
 427   }
 428
 429   // If we can't find a previously used instance, then we have to create one.
 430   if (!worker_thread_data) {
 431     DCHECK_GT(worker_thread_number, 0);
 432     worker_thread_data = new ThreadData(worker_thread_number);
 433   }
 434   DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
 435
 436   tls_index_.Set(worker_thread_data);
 437   return worker_thread_data;
 438 }
 439
 440 // static
 441 void ThreadData::OnThreadTermination(void* thread_data) {
 442   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
 443   // We must NOT do any allocations during this callback.  There is a chance
 444   // that the allocator is no longer active on this thread.
 445   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
 446 }
 447
 448 void ThreadData::OnThreadTerminationCleanup() {
 449   // The list_lock_ was created when we registered the callback, so it won't be
 450   // allocated here despite the lazy reference.
 451   base::AutoLock lock(*list_lock_.Pointer());
 452   if (incarnation_counter_ != incarnation_count_for_pool_)
 453     return;  // ThreadData was constructed in an earlier unit test.
 454   ++cleanup_count_;
 455   // Only worker threads need to be retired and reused.
 456   if (!worker_thread_number_) {
 457     return;
 458   }
 459   // We must NOT do any allocations during this callback.
 460   // Using the simple linked lists avoids all allocations.
 461   DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
 462   this->next_retired_worker_ = first_retired_worker_;
 463   first_retired_worker_ = this;
 464 }
 465
 466 // static
 467 void ThreadData::Snapshot(int current_profiling_phase,
 468                           ProcessDataSnapshot* process_data_snapshot) {
 469   // Get an unchanging copy of a ThreadData list.
 470   ThreadData* my_list = ThreadData::first();
 471
 472   // Gather data serially.
 473   // This hackish approach *can* get some slightly corrupt tallies, as we are
 474   // grabbing values without the protection of a lock, but it has the advantage
 475   // of working even with threads that don't have message loops.  If a user
 476   // sees any strangeness, they can always just run their stats gathering a
 477   // second time.
 478   BirthCountMap birth_counts;
 479   for (ThreadData* thread_data = my_list; thread_data;
 480        thread_data = thread_data->next()) {
 481     thread_data->SnapshotExecutedTasks(current_profiling_phase,
 482                                        &process_data_snapshot->phased_snapshots,
 483                                        &birth_counts);
 484   }
 485
 486   // Add births that are still active -- i.e. objects that have tallied a birth,
 487   // but have not yet tallied a matching death, and hence must be either
 488   // running, queued up, or being held in limbo for future posting.
 489   auto* current_phase_tasks =
 490       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
 491   for (const auto& birth_count : birth_counts) {
 492     if (birth_count.second > 0) {
 493       current_phase_tasks->push_back(
 494           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
 495                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
 496                        "Still_Alive"));
 497     }
 498   }
 499 }
 500
 501 // static
 502 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
 503   // Get an unchanging copy of a ThreadData list.
 504   ThreadData* my_list = ThreadData::first();
 505
 506   // Add snapshots for all instances of death data in all threads serially.
 507   // This hackish approach *can* get some slightly corrupt tallies, as we are
 508   // grabbing values without the protection of a lock, but it has the advantage
 509   // of working even with threads that don't have message loops.  Any corruption
 510   // shouldn't cause "cascading damage" to anything else (in later phases).
 511   for (ThreadData* thread_data = my_list; thread_data;
 512        thread_data = thread_data->next()) {
 513     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
 514   }
 515 }
 516
 517 Births* ThreadData::TallyABirth(const Location& location) {
 518   BirthMap::iterator it = birth_map_.find(location);
 519   Births* child;
 520   if (it != birth_map_.end()) {
 521     child =  it->second;
 522     child->RecordBirth();
 523   } else {
 524     child = new Births(location, *this);  // Leak this.
 525     // Lock since the map may get relocated now, and other threads sometimes
 526     // snapshot it (but they lock before copying it).
 527     base::AutoLock lock(map_lock_);
 528     birth_map_[location] = child;
 529   }
 530
 531   if (kTrackParentChildLinks && status_ > PROFILING_ACTIVE &&
 532       !parent_stack_.empty()) {
 533     const Births* parent = parent_stack_.top();
 534     ParentChildPair pair(parent, child);
 535     if (parent_child_set_.find(pair) == parent_child_set_.end()) {
 536       // Lock since the map may get relocated now, and other threads sometimes
 537       // snapshot it (but they lock before copying it).
 538       base::AutoLock lock(map_lock_);
 539       parent_child_set_.insert(pair);
 540     }
 541   }
 542
 543   return child;
 544 }
 545
 546 void ThreadData::TallyADeath(const Births& births,
 547                              int32 queue_duration,
 548                              const TaskStopwatch& stopwatch) {
 549   int32 run_duration = stopwatch.RunDurationMs();
 550
 551   // Stir in some randomness, plus add constant in case durations are zero.
 552   const uint32 kSomePrimeNumber = 2147483647;
 553   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
 554   // An address is going to have some randomness to it as well ;-).
 555   random_number_ ^= static_cast<uint32>(&births - reinterpret_cast<Births*>(0));
 556
 557   // We don't have queue durations without OS timer.  OS timer is automatically
 558   // used for task-post-timing, so the use of an alternate timer implies all
 559   // queue times are invalid, unless it was explicitly said that we can trust
 560   // the alternate timer.
 561   if (kAllowAlternateTimeSourceHandling &&
 562       now_function_ &&
 563       !now_function_is_time_) {
 564     queue_duration = 0;
 565   }
 566
 567   DeathMap::iterator it = death_map_.find(&births);
 568   DeathData* death_data;
 569   if (it != death_map_.end()) {
 570     death_data = &it->second;
 571   } else {
 572     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
 573     death_data = &death_map_[&births];
 574   }  // Release lock ASAP.
 575   death_data->RecordDeath(queue_duration, run_duration, random_number_);
 576
 577   if (!kTrackParentChildLinks)
 578     return;
 579   if (!parent_stack_.empty()) {  // We might get turned off.
 580     DCHECK_EQ(parent_stack_.top(), &births);
 581     parent_stack_.pop();
 582   }
 583 }
 584
 585 // static
 586 Births* ThreadData::TallyABirthIfActive(const Location& location) {
 587   if (!TrackingStatus())
 588     return NULL;
 589   ThreadData* current_thread_data = Get();
 590   if (!current_thread_data)
 591     return NULL;
 592   return current_thread_data->TallyABirth(location);
 593 }
 594
 595 // static
 596 void ThreadData::TallyRunOnNamedThreadIfTracking(
 597     const base::TrackingInfo& completed_task,
 598     const TaskStopwatch& stopwatch) {
 599   // Even if we have been DEACTIVATED, we will process any pending births so
 600   // that our data structures (which counted the outstanding births) remain
 601   // consistent.
 602   const Births* births = completed_task.birth_tally;
 603   if (!births)
 604     return;
 605   ThreadData* current_thread_data = stopwatch.GetThreadData();
 606   if (!current_thread_data)
 607     return;
 608
 609   // Watch out for a race where status_ is changing, and hence one or both
 610   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
 611   // get a time value since we "weren't tracking" and we were trying to be
 612   // efficient by not calling for a genuine time value.  For simplicity, we'll
 613   // use a default zero duration when we can't calculate a true value.
 614   TrackedTime start_of_run = stopwatch.StartTime();
 615   int32 queue_duration = 0;
 616   if (!start_of_run.is_null()) {
 617     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
 618         .InMilliseconds();
 619   }
 620   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 621 }
 622
 623 // static
 624 void ThreadData::TallyRunOnWorkerThreadIfTracking(
 625     const Births* births,
 626     const TrackedTime& time_posted,
 627     const TaskStopwatch& stopwatch) {
 628   // Even if we have been DEACTIVATED, we will process any pending births so
 629   // that our data structures (which counted the outstanding births) remain
 630   // consistent.
 631   if (!births)
 632     return;
 633
 634   // TODO(jar): Support the option to coalesce all worker-thread activity under
 635   // one ThreadData instance that uses locks to protect *all* access.  This will
 636   // reduce memory (making it provably bounded), but run incrementally slower
 637   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
 638   // that the locks on TallyADeath will be *after* the worker thread has run,
 639   // and hence nothing will be waiting for the completion (...  besides some
 640   // other thread that might like to run).  Also, the worker threads tasks are
 641   // generally longer, and hence the cost of the lock may perchance be amortized
 642   // over the long task's lifetime.
 643   ThreadData* current_thread_data = stopwatch.GetThreadData();
 644   if (!current_thread_data)
 645     return;
 646
 647   TrackedTime start_of_run = stopwatch.StartTime();
 648   int32 queue_duration = 0;
 649   if (!start_of_run.is_null()) {
 650     queue_duration = (start_of_run - time_posted).InMilliseconds();
 651   }
 652   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 653 }
 654
 655 // static
 656 void ThreadData::TallyRunInAScopedRegionIfTracking(
 657     const Births* births,
 658     const TaskStopwatch& stopwatch) {
 659   // Even if we have been DEACTIVATED, we will process any pending births so
 660   // that our data structures (which counted the outstanding births) remain
 661   // consistent.
 662   if (!births)
 663     return;
 664
 665   ThreadData* current_thread_data = stopwatch.GetThreadData();
 666   if (!current_thread_data)
 667     return;
 668
 669   int32 queue_duration = 0;
 670   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
 671 }
 672
 673 void ThreadData::SnapshotExecutedTasks(
 674     int current_profiling_phase,
 675     PhasedProcessDataSnapshotMap* phased_snapshots,
 676     BirthCountMap* birth_counts) {
 677   // Get copy of data, so that the data will not change during the iterations
 678   // and processing.
 679   BirthMap birth_map;
 680   DeathsSnapshot deaths;
 681   ParentChildSet parent_child_set;
 682   SnapshotMaps(current_profiling_phase, &birth_map, &deaths, &parent_child_set);
 683
 684   for (const auto& birth : birth_map) {
 685     (*birth_counts)[birth.second] += birth.second->birth_count();
 686   }
 687
 688   for (const auto& death : deaths) {
 689     (*birth_counts)[death.first] -= death.first->birth_count();
 690
 691     // For the current death data, walk through all its snapshots, starting from
 692     // the current one, then from the previous profiling phase etc., and for
 693     // each snapshot calculate the delta between the snapshot and the previous
 694     // phase, if any.  Store the deltas in the result.
 695     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
 696          phase = phase->prev) {
 697       const DeathDataSnapshot& death_data =
 698           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
 699                       : phase->death_data;
 700
 701       if (death_data.count > 0) {
 702         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
 703             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
 704                          thread_name()));
 705       }
 706     }
 707   }
 708 }
 709
 710 // This may be called from another thread.
 711 void ThreadData::SnapshotMaps(int profiling_phase,
 712                               BirthMap* birth_map,
 713                               DeathsSnapshot* deaths,
 714                               ParentChildSet* parent_child_set) {
 715   base::AutoLock lock(map_lock_);
 716
 717   for (const auto& birth : birth_map_)
 718     (*birth_map)[birth.first] = birth.second;
 719
 720   for (const auto& death : death_map_) {
 721     deaths->push_back(std::make_pair(
 722         death.first,
 723         DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
 724                                death.second.run_duration_sum(),
 725                                death.second.run_duration_max(),
 726                                death.second.run_duration_sample(),
 727                                death.second.queue_duration_sum(),
 728                                death.second.queue_duration_max(),
 729                                death.second.queue_duration_sample(),
 730                                death.second.last_phase_snapshot())));
 731   }
 732
 733   if (!kTrackParentChildLinks)
 734     return;
 735
 736   for (const auto& parent_child : parent_child_set_)
 737     parent_child_set->insert(parent_child);
 738 }
 739
 740 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
 741   base::AutoLock lock(map_lock_);
 742
 743   for (auto& death : death_map_) {
 744     death.second.OnProfilingPhaseCompleted(profiling_phase);
 745   }
 746 }
 747
 748 static void OptionallyInitializeAlternateTimer() {
 749   NowFunction* alternate_time_source = GetAlternateTimeSource();
 750   if (alternate_time_source)
 751     ThreadData::SetAlternateTimeSource(alternate_time_source);
 752 }
 753
 754 bool ThreadData::Initialize() {
 755   if (status_ >= DEACTIVATED)
 756     return true;  // Someone else did the initialization.
 757   // Due to racy lazy initialization in tests, we'll need to recheck status_
 758   // after we acquire the lock.
 759
 760   // Ensure that we don't double initialize tls.  We are called when single
 761   // threaded in the product, but some tests may be racy and lazy about our
 762   // initialization.
 763   base::AutoLock lock(*list_lock_.Pointer());
 764   if (status_ >= DEACTIVATED)
 765     return true;  // Someone raced in here and beat us.
 766
 767   // Put an alternate timer in place if the environment calls for it, such as
 768   // for tracking TCMalloc allocations.  This insertion is idempotent, so we
 769   // don't mind if there is a race, and we'd prefer not to be in a lock while
 770   // doing this work.
 771   if (kAllowAlternateTimeSourceHandling)
 772     OptionallyInitializeAlternateTimer();
 773
 774   // Perform the "real" TLS initialization now, and leave it intact through
 775   // process termination.
 776   if (!tls_index_.initialized()) {  // Testing may have initialized this.
 777     DCHECK_EQ(status_, UNINITIALIZED);
 778     tls_index_.Initialize(&ThreadData::OnThreadTermination);
 779     if (!tls_index_.initialized())
 780       return false;
 781   } else {
 782     // TLS was initialzed for us earlier.
 783     DCHECK_EQ(status_, DORMANT_DURING_TESTS);
 784   }
 785
 786   // Incarnation counter is only significant to testing, as it otherwise will
 787   // never again change in this process.
 788   ++incarnation_counter_;
 789
 790   // The lock is not critical for setting status_, but it doesn't hurt.  It also
 791   // ensures that if we have a racy initialization, that we'll bail as soon as
 792   // we get the lock earlier in this method.
 793   status_ = kInitialStartupState;
 794   if (!kTrackParentChildLinks &&
 795       kInitialStartupState == PROFILING_CHILDREN_ACTIVE)
 796     status_ = PROFILING_ACTIVE;
 797   DCHECK(status_ != UNINITIALIZED);
 798   return true;
 799 }
 800
 801 // static
 802 bool ThreadData::InitializeAndSetTrackingStatus(Status status) {
 803   DCHECK_GE(status, DEACTIVATED);
 804   DCHECK_LE(status, PROFILING_CHILDREN_ACTIVE);
 805
 806   if (!Initialize())  // No-op if already initialized.
 807     return false;  // Not compiled in.
 808
 809   if (!kTrackParentChildLinks && status > DEACTIVATED)
 810     status = PROFILING_ACTIVE;
 811   status_ = status;
 812   return true;
 813 }
 814
 815 // static
 816 ThreadData::Status ThreadData::status() {
 817   return status_;
 818 }
 819
 820 // static
 821 bool ThreadData::TrackingStatus() {
 822   return status_ > DEACTIVATED;
 823 }
 824
 825 // static
 826 bool ThreadData::TrackingParentChildStatus() {
 827   return status_ >= PROFILING_CHILDREN_ACTIVE;
 828 }
 829
 830 // static
 831 void ThreadData::PrepareForStartOfRun(const Births* parent) {
 832   if (kTrackParentChildLinks && parent && status_ > PROFILING_ACTIVE) {
 833     ThreadData* current_thread_data = Get();
 834     if (current_thread_data)
 835       current_thread_data->parent_stack_.push(parent);
 836   }
 837 }
 838
 839 // static
 840 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
 841   DCHECK(now_function);
 842   if (kAllowAlternateTimeSourceHandling)
 843     now_function_ = now_function;
 844 }
 845
 846 // static
 847 void ThreadData::EnableProfilerTiming() {
 848   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
 849 }
 850
 851 // static
 852 TrackedTime ThreadData::Now() {
 853   if (kAllowAlternateTimeSourceHandling && now_function_)
 854     return TrackedTime::FromMilliseconds((*now_function_)());
 855   if (IsProfilerTimingEnabled() && TrackingStatus())
 856     return TrackedTime::Now();
 857   return TrackedTime();  // Super fast when disabled, or not compiled.
 858 }
 859
 860 // static
 861 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
 862   base::AutoLock lock(*list_lock_.Pointer());
 863   if (worker_thread_data_creation_count_ == 0)
 864     return;  // We haven't really run much, and couldn't have leaked.
 865
 866   // TODO(jar): until this is working on XP, don't run the real test.
 867 #if 0
 868   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
 869   // caller should tell us how many thread shutdowns should have taken place by
 870   // now.
 871   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
 872 #endif
 873 }
 874
 875 // static
 876 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
 877   // This is only called from test code, where we need to cleanup so that
 878   // additional tests can be run.
 879   // We must be single threaded... but be careful anyway.
 880   if (!InitializeAndSetTrackingStatus(DEACTIVATED))
 881     return;
 882   ThreadData* thread_data_list;
 883   {
 884     base::AutoLock lock(*list_lock_.Pointer());
 885     thread_data_list = all_thread_data_list_head_;
 886     all_thread_data_list_head_ = NULL;
 887     ++incarnation_counter_;
 888     // To be clean, break apart the retired worker list (though we leak them).
 889     while (first_retired_worker_) {
 890       ThreadData* worker = first_retired_worker_;
 891       CHECK_GT(worker->worker_thread_number_, 0);
 892       first_retired_worker_ = worker->next_retired_worker_;
 893       worker->next_retired_worker_ = NULL;
 894     }
 895   }
 896
 897   // Put most global static back in pristine shape.
 898   worker_thread_data_creation_count_ = 0;
 899   cleanup_count_ = 0;
 900   tls_index_.Set(NULL);
 901   status_ = DORMANT_DURING_TESTS;  // Almost UNINITIALIZED.
 902
 903   // To avoid any chance of racing in unit tests, which is the only place we
 904   // call this function, we may sometimes leak all the data structures we
 905   // recovered, as they may still be in use on threads from prior tests!
 906   if (leak) {
 907     ThreadData* thread_data = thread_data_list;
 908     while (thread_data) {
 909       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
 910       thread_data = thread_data->next();
 911     }
 912     return;
 913   }
 914
 915   // When we want to cleanup (on a single thread), here is what we do.
 916
 917   // Do actual recursive delete in all ThreadData instances.
 918   while (thread_data_list) {
 919     ThreadData* next_thread_data = thread_data_list;
 920     thread_data_list = thread_data_list->next();
 921
 922     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
 923          next_thread_data->birth_map_.end() != it; ++it)
 924       delete it->second;  // Delete the Birth Records.
 925     delete next_thread_data;  // Includes all Death Records.
 926   }
 927 }
 928
 929 //------------------------------------------------------------------------------
 930 TaskStopwatch::TaskStopwatch()
 931     : wallclock_duration_ms_(0),
 932       current_thread_data_(NULL),
 933       excluded_duration_ms_(0),
 934       parent_(NULL) {
 935 #if DCHECK_IS_ON()
 936   state_ = CREATED;
 937   child_ = NULL;
 938 #endif
 939 }
 940
 941 TaskStopwatch::~TaskStopwatch() {
 942 #if DCHECK_IS_ON()
 943   DCHECK(state_ != RUNNING);
 944   DCHECK(child_ == NULL);
 945 #endif
 946 }
 947
 948 void TaskStopwatch::Start() {
 949 #if DCHECK_IS_ON()
 950   DCHECK(state_ == CREATED);
 951   state_ = RUNNING;
 952 #endif
 953
 954   start_time_ = ThreadData::Now();
 955
 956   current_thread_data_ = ThreadData::Get();
 957   if (!current_thread_data_)
 958     return;
 959
 960   parent_ = current_thread_data_->current_stopwatch_;
 961 #if DCHECK_IS_ON()
 962   if (parent_) {
 963     DCHECK(parent_->state_ == RUNNING);
 964     DCHECK(parent_->child_ == NULL);
 965     parent_->child_ = this;
 966   }
 967 #endif
 968   current_thread_data_->current_stopwatch_ = this;
 969 }
 970
 971 void TaskStopwatch::Stop() {
 972   const TrackedTime end_time = ThreadData::Now();
 973 #if DCHECK_IS_ON()
 974   DCHECK(state_ == RUNNING);
 975   state_ = STOPPED;
 976   DCHECK(child_ == NULL);
 977 #endif
 978
 979   if (!start_time_.is_null() && !end_time.is_null()) {
 980     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
 981   }
 982
 983   if (!current_thread_data_)
 984     return;
 985
 986   DCHECK(current_thread_data_->current_stopwatch_ == this);
 987   current_thread_data_->current_stopwatch_ = parent_;
 988   if (!parent_)
 989     return;
 990
 991 #if DCHECK_IS_ON()
 992   DCHECK(parent_->state_ == RUNNING);
 993   DCHECK(parent_->child_ == this);
 994   parent_->child_ = NULL;
 995 #endif
 996   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
 997   parent_ = NULL;
 998 }
 999
1000 TrackedTime TaskStopwatch::StartTime() const {
1001 #if DCHECK_IS_ON()
1002   DCHECK(state_ != CREATED);
1003 #endif
1004
1005   return start_time_;
1006 }
1007
1008 int32 TaskStopwatch::RunDurationMs() const {
1009 #if DCHECK_IS_ON()
1010   DCHECK(state_ == STOPPED);
1011 #endif
1012
1013   return wallclock_duration_ms_ - excluded_duration_ms_;
1014 }
1015
1016 ThreadData* TaskStopwatch::GetThreadData() const {
1017 #if DCHECK_IS_ON()
1018   DCHECK(state_ != CREATED);
1019 #endif
1020
1021   return current_thread_data_;
1022 }
1023
1024 //------------------------------------------------------------------------------
1025 // DeathDataPhaseSnapshot
1026
1027 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
1028     int profiling_phase,
1029     int count,
1030     int32 run_duration_sum,
1031     int32 run_duration_max,
1032     int32 run_duration_sample,
1033     int32 queue_duration_sum,
1034     int32 queue_duration_max,
1035     int32 queue_duration_sample,
1036     const DeathDataPhaseSnapshot* prev)
1037     : profiling_phase(profiling_phase),
1038       death_data(count,
1039                  run_duration_sum,
1040                  run_duration_max,
1041                  run_duration_sample,
1042                  queue_duration_sum,
1043                  queue_duration_max,
1044                  queue_duration_sample),
1045       prev(prev) {
1046 }
1047
1048 //------------------------------------------------------------------------------
1049 // TaskSnapshot
1050
1051 TaskSnapshot::TaskSnapshot() {
1052 }
1053
1054 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
1055                            const DeathDataSnapshot& death_data,
1056                            const std::string& death_thread_name)
1057     : birth(birth),
1058       death_data(death_data),
1059       death_thread_name(death_thread_name) {
1060 }
1061
1062 TaskSnapshot::~TaskSnapshot() {
1063 }
1064
1065 //------------------------------------------------------------------------------
1066 // ParentChildPairSnapshot
1067
1068 ParentChildPairSnapshot::ParentChildPairSnapshot() {
1069 }
1070
1071 ParentChildPairSnapshot::ParentChildPairSnapshot(
1072     const ThreadData::ParentChildPair& parent_child)
1073     : parent(*parent_child.first),
1074       child(*parent_child.second) {
1075 }
1076
1077 ParentChildPairSnapshot::~ParentChildPairSnapshot() {
1078 }
1079
1080 //------------------------------------------------------------------------------
1081 // ProcessDataPhaseSnapshot
1082
1083 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
1084 }
1085
1086 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
1087 }
1088
1089 //------------------------------------------------------------------------------
1090 // ProcessDataPhaseSnapshot
1091
1092 ProcessDataSnapshot::ProcessDataSnapshot()
1093 #if !defined(OS_NACL)
1094     : process_id(base::GetCurrentProcId()) {
1095 #else
1096     : process_id(base::kNullProcessId) {
1097 #endif
1098 }
1099
1100 ProcessDataSnapshot::~ProcessDataSnapshot() {
1101 }
1102
1103 }  // namespace tracked_objects