Roll src/third_party/skia d7c014f:95cc012
[chromium-blink-merge.git] / base / tracked_objects.cc
blob574745813ad20cffb09699ff1774ec24b3676e5a
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/tracked_objects.h"
7 #include <limits.h>
8 #include <stdlib.h>
10 #include "base/atomicops.h"
11 #include "base/base_switches.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/logging.h"
16 #include "base/process/process_handle.h"
17 #include "base/profiler/alternate_timer.h"
18 #include "base/strings/stringprintf.h"
19 #include "base/third_party/valgrind/memcheck.h"
20 #include "base/tracking_info.h"
22 using base::TimeDelta;
24 namespace base {
25 class TimeDelta;
28 namespace tracked_objects {
30 namespace {
31 // TODO(jar): Evaluate the perf impact of enabling this. If the perf impact is
32 // negligible, enable by default.
33 // Flag to compile out parent-child link recording.
34 const bool kTrackParentChildLinks = false;
36 // When ThreadData is first initialized, should we start in an ACTIVE state to
37 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
38 // that we only record after parsing the command line flag --enable-tracking.
39 // Note that the flag may force either state, so this really controls only the
40 // period of time up until that flag is parsed. If there is no flag seen, then
41 // this state may prevail for much or all of the process lifetime.
42 const ThreadData::Status kInitialStartupState =
43 ThreadData::PROFILING_CHILDREN_ACTIVE;
45 // Control whether an alternate time source (Now() function) is supported by
46 // the ThreadData class. This compile time flag should be set to true if we
47 // want other modules (such as a memory allocator, or a thread-specific CPU time
48 // clock) to be able to provide a thread-specific Now() function. Without this
49 // compile-time flag, the code will only support the wall-clock time. This flag
50 // can be flipped to efficiently disable this path (if there is a performance
51 // problem with its presence).
52 static const bool kAllowAlternateTimeSourceHandling = true;
54 // Possible states of the profiler timing enabledness.
55 enum {
56 UNDEFINED_TIMING,
57 ENABLED_TIMING,
58 DISABLED_TIMING,
61 // State of the profiler timing enabledness.
62 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
64 // Returns whether profiler timing is enabled. The default is true, but this
65 // may be overridden by a command-line flag. Some platforms may
66 // programmatically set this command-line flag to the "off" value if it's not
67 // specified.
68 // This in turn can be overridden by explicitly calling
69 // ThreadData::EnableProfilerTiming, say, based on a field trial.
70 inline bool IsProfilerTimingEnabled() {
71 // Reading |g_profiler_timing_enabled| is done without barrier because
72 // multiple initialization is not an issue while the barrier can be relatively
73 // costly given that this method is sometimes called in a tight loop.
74 base::subtle::Atomic32 current_timing_enabled =
75 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
76 if (current_timing_enabled == UNDEFINED_TIMING) {
77 if (!base::CommandLine::InitializedForCurrentProcess())
78 return true;
79 current_timing_enabled =
80 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
81 switches::kProfilerTiming) ==
82 switches::kProfilerTimingDisabledValue)
83 ? DISABLED_TIMING
84 : ENABLED_TIMING;
85 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
86 current_timing_enabled);
88 return current_timing_enabled == ENABLED_TIMING;
91 } // namespace
93 //------------------------------------------------------------------------------
94 // DeathData tallies durations when a death takes place.
96 DeathData::DeathData()
97 : count_(0),
98 sample_probability_count_(0),
99 run_duration_sum_(0),
100 queue_duration_sum_(0),
101 run_duration_max_(0),
102 queue_duration_max_(0),
103 run_duration_sample_(0),
104 queue_duration_sample_(0),
105 last_phase_snapshot_(nullptr) {
108 DeathData::DeathData(const DeathData& other)
109 : count_(other.count_),
110 sample_probability_count_(other.sample_probability_count_),
111 run_duration_sum_(other.run_duration_sum_),
112 queue_duration_sum_(other.queue_duration_sum_),
113 run_duration_max_(other.run_duration_max_),
114 queue_duration_max_(other.queue_duration_max_),
115 run_duration_sample_(other.run_duration_sample_),
116 queue_duration_sample_(other.queue_duration_sample_),
117 last_phase_snapshot_(nullptr) {
118 // This constructor will be used by std::map when adding new DeathData values
119 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't
120 // need to worry about ownership transfer.
121 DCHECK(other.last_phase_snapshot_ == nullptr);
124 DeathData::~DeathData() {
125 while (last_phase_snapshot_) {
126 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
127 last_phase_snapshot_ = snapshot->prev;
128 delete snapshot;
132 // TODO(jar): I need to see if this macro to optimize branching is worth using.
134 // This macro has no branching, so it is surely fast, and is equivalent to:
135 // if (assign_it)
136 // target = source;
137 // We use a macro rather than a template to force this to inline.
138 // Related code for calculating max is discussed on the web.
139 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
140 ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it))
142 void DeathData::RecordDeath(const int32 queue_duration,
143 const int32 run_duration,
144 const uint32 random_number) {
145 // We'll just clamp at INT_MAX, but we should note this in the UI as such.
146 if (count_ < INT_MAX)
147 ++count_;
149 int sample_probability_count = sample_probability_count_;
150 if (sample_probability_count < INT_MAX)
151 ++sample_probability_count;
152 sample_probability_count_ = sample_probability_count;
154 queue_duration_sum_ += queue_duration;
155 run_duration_sum_ += run_duration;
157 if (queue_duration_max_ < queue_duration)
158 queue_duration_max_ = queue_duration;
159 if (run_duration_max_ < run_duration)
160 run_duration_max_ = run_duration;
162 // Take a uniformly distributed sample over all durations ever supplied during
163 // the current profiling phase.
164 // The probability that we (instead) use this new sample is
165 // 1/sample_probability_count_. This results in a completely uniform selection
166 // of the sample (at least when we don't clamp sample_probability_count_...
167 // but that should be inconsequentially likely). We ignore the fact that we
168 // correlated our selection of a sample to the run and queue times (i.e., we
169 // used them to generate random_number).
170 CHECK_GT(sample_probability_count, 0);
171 if (0 == (random_number % sample_probability_count)) {
172 queue_duration_sample_ = queue_duration;
173 run_duration_sample_ = run_duration;
177 int DeathData::count() const { return count_; }
179 int32 DeathData::run_duration_sum() const { return run_duration_sum_; }
181 int32 DeathData::run_duration_max() const { return run_duration_max_; }
183 int32 DeathData::run_duration_sample() const {
184 return run_duration_sample_;
187 int32 DeathData::queue_duration_sum() const {
188 return queue_duration_sum_;
191 int32 DeathData::queue_duration_max() const {
192 return queue_duration_max_;
195 int32 DeathData::queue_duration_sample() const {
196 return queue_duration_sample_;
199 const DeathDataPhaseSnapshot* DeathData::last_phase_snapshot() const {
200 return last_phase_snapshot_;
203 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
204 // Snapshotting and storing current state.
205 last_phase_snapshot_ = new DeathDataPhaseSnapshot(
206 profiling_phase, count_, run_duration_sum_, run_duration_max_,
207 run_duration_sample_, queue_duration_sum_, queue_duration_max_,
208 queue_duration_sample_, last_phase_snapshot_);
210 // Not touching fields for which a delta can be computed by comparing with a
211 // snapshot from the previous phase. Resetting other fields. Sample values
212 // will be reset upon next death recording because sample_probability_count_
213 // is set to 0.
214 // We avoid resetting to 0 in favor of deltas whenever possible. The reason
215 // is that for incrementable fields, resetting to 0 from the snapshot thread
216 // potentially in parallel with incrementing in the death thread may result in
217 // significant data corruption that has a potential to grow with time. Not
218 // resetting incrementable fields and using deltas will cause any
219 // off-by-little corruptions to be likely fixed at the next snapshot.
220 // The max values are not incrementable, and cannot be deduced using deltas
221 // for a given phase. Hence, we have to reset them to 0. But the potential
222 // damage is limited to getting the previous phase's max to apply for the next
223 // phase, and the error doesn't have a potential to keep growing with new
224 // resets.
225 // sample_probability_count_ is incrementable, but must be reset to 0 at the
226 // phase end, so that we start a new uniformly randomized sample selection
227 // after the reset. Corruptions due to race conditions are possible, but the
228 // damage is limited to selecting a wrong sample, which is not something that
229 // can cause accumulating or cascading effects.
230 // If there were no corruptions caused by race conditions, we never send a
231 // sample for the previous phase in the next phase's snapshot because
232 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
233 sample_probability_count_ = 0;
234 run_duration_max_ = 0;
235 queue_duration_max_ = 0;
238 //------------------------------------------------------------------------------
239 DeathDataSnapshot::DeathDataSnapshot()
240 : count(-1),
241 run_duration_sum(-1),
242 run_duration_max(-1),
243 run_duration_sample(-1),
244 queue_duration_sum(-1),
245 queue_duration_max(-1),
246 queue_duration_sample(-1) {
249 DeathDataSnapshot::DeathDataSnapshot(int count,
250 int32 run_duration_sum,
251 int32 run_duration_max,
252 int32 run_duration_sample,
253 int32 queue_duration_sum,
254 int32 queue_duration_max,
255 int32 queue_duration_sample)
256 : count(count),
257 run_duration_sum(run_duration_sum),
258 run_duration_max(run_duration_max),
259 run_duration_sample(run_duration_sample),
260 queue_duration_sum(queue_duration_sum),
261 queue_duration_max(queue_duration_max),
262 queue_duration_sample(queue_duration_sample) {
265 DeathDataSnapshot::~DeathDataSnapshot() {
268 DeathDataSnapshot DeathDataSnapshot::Delta(
269 const DeathDataSnapshot& older) const {
270 return DeathDataSnapshot(count - older.count,
271 run_duration_sum - older.run_duration_sum,
272 run_duration_max, run_duration_sample,
273 queue_duration_sum - older.queue_duration_sum,
274 queue_duration_max, queue_duration_sample);
277 //------------------------------------------------------------------------------
278 BirthOnThread::BirthOnThread(const Location& location,
279 const ThreadData& current)
280 : location_(location),
281 birth_thread_(&current) {
284 //------------------------------------------------------------------------------
285 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
288 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
289 : location(birth.location()),
290 thread_name(birth.birth_thread()->thread_name()) {
293 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
296 //------------------------------------------------------------------------------
297 Births::Births(const Location& location, const ThreadData& current)
298 : BirthOnThread(location, current),
299 birth_count_(1) { }
301 int Births::birth_count() const { return birth_count_; }
303 void Births::RecordBirth() { ++birth_count_; }
305 //------------------------------------------------------------------------------
306 // ThreadData maintains the central data for all births and deaths on a single
307 // thread.
309 // TODO(jar): We should pull all these static vars together, into a struct, and
310 // optimize layout so that we benefit from locality of reference during accesses
311 // to them.
313 // static
314 NowFunction* ThreadData::now_function_ = NULL;
316 // static
317 bool ThreadData::now_function_is_time_ = false;
319 // A TLS slot which points to the ThreadData instance for the current thread.
320 // We do a fake initialization here (zeroing out data), and then the real
321 // in-place construction happens when we call tls_index_.Initialize().
322 // static
323 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
325 // static
326 int ThreadData::worker_thread_data_creation_count_ = 0;
328 // static
329 int ThreadData::cleanup_count_ = 0;
331 // static
332 int ThreadData::incarnation_counter_ = 0;
334 // static
335 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
337 // static
338 ThreadData* ThreadData::first_retired_worker_ = NULL;
340 // static
341 base::LazyInstance<base::Lock>::Leaky
342 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
344 // static
345 ThreadData::Status ThreadData::status_ = ThreadData::UNINITIALIZED;
347 ThreadData::ThreadData(const std::string& suggested_name)
348 : next_(NULL),
349 next_retired_worker_(NULL),
350 worker_thread_number_(0),
351 incarnation_count_for_pool_(-1),
352 current_stopwatch_(NULL) {
353 DCHECK_GE(suggested_name.size(), 0u);
354 thread_name_ = suggested_name;
355 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
358 ThreadData::ThreadData(int thread_number)
359 : next_(NULL),
360 next_retired_worker_(NULL),
361 worker_thread_number_(thread_number),
362 incarnation_count_for_pool_(-1),
363 current_stopwatch_(NULL) {
364 CHECK_GT(thread_number, 0);
365 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
366 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
369 ThreadData::~ThreadData() {
372 void ThreadData::PushToHeadOfList() {
373 // Toss in a hint of randomness (atop the uniniitalized value).
374 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
375 sizeof(random_number_));
376 MSAN_UNPOISON(&random_number_, sizeof(random_number_));
377 random_number_ += static_cast<uint32>(this - static_cast<ThreadData*>(0));
378 random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
380 DCHECK(!next_);
381 base::AutoLock lock(*list_lock_.Pointer());
382 incarnation_count_for_pool_ = incarnation_counter_;
383 next_ = all_thread_data_list_head_;
384 all_thread_data_list_head_ = this;
387 // static
388 ThreadData* ThreadData::first() {
389 base::AutoLock lock(*list_lock_.Pointer());
390 return all_thread_data_list_head_;
393 ThreadData* ThreadData::next() const { return next_; }
395 // static
396 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
397 if (!Initialize()) // Always initialize if needed.
398 return;
399 ThreadData* current_thread_data =
400 reinterpret_cast<ThreadData*>(tls_index_.Get());
401 if (current_thread_data)
402 return; // Browser tests instigate this.
403 current_thread_data = new ThreadData(suggested_name);
404 tls_index_.Set(current_thread_data);
407 // static
408 ThreadData* ThreadData::Get() {
409 if (!tls_index_.initialized())
410 return NULL; // For unittests only.
411 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
412 if (registered)
413 return registered;
415 // We must be a worker thread, since we didn't pre-register.
416 ThreadData* worker_thread_data = NULL;
417 int worker_thread_number = 0;
419 base::AutoLock lock(*list_lock_.Pointer());
420 if (first_retired_worker_) {
421 worker_thread_data = first_retired_worker_;
422 first_retired_worker_ = first_retired_worker_->next_retired_worker_;
423 worker_thread_data->next_retired_worker_ = NULL;
424 } else {
425 worker_thread_number = ++worker_thread_data_creation_count_;
429 // If we can't find a previously used instance, then we have to create one.
430 if (!worker_thread_data) {
431 DCHECK_GT(worker_thread_number, 0);
432 worker_thread_data = new ThreadData(worker_thread_number);
434 DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
436 tls_index_.Set(worker_thread_data);
437 return worker_thread_data;
440 // static
441 void ThreadData::OnThreadTermination(void* thread_data) {
442 DCHECK(thread_data); // TLS should *never* call us with a NULL.
443 // We must NOT do any allocations during this callback. There is a chance
444 // that the allocator is no longer active on this thread.
445 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
448 void ThreadData::OnThreadTerminationCleanup() {
449 // The list_lock_ was created when we registered the callback, so it won't be
450 // allocated here despite the lazy reference.
451 base::AutoLock lock(*list_lock_.Pointer());
452 if (incarnation_counter_ != incarnation_count_for_pool_)
453 return; // ThreadData was constructed in an earlier unit test.
454 ++cleanup_count_;
455 // Only worker threads need to be retired and reused.
456 if (!worker_thread_number_) {
457 return;
459 // We must NOT do any allocations during this callback.
460 // Using the simple linked lists avoids all allocations.
461 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
462 this->next_retired_worker_ = first_retired_worker_;
463 first_retired_worker_ = this;
466 // static
467 void ThreadData::Snapshot(int current_profiling_phase,
468 ProcessDataSnapshot* process_data_snapshot) {
469 // Get an unchanging copy of a ThreadData list.
470 ThreadData* my_list = ThreadData::first();
472 // Gather data serially.
473 // This hackish approach *can* get some slightly corrupt tallies, as we are
474 // grabbing values without the protection of a lock, but it has the advantage
475 // of working even with threads that don't have message loops. If a user
476 // sees any strangeness, they can always just run their stats gathering a
477 // second time.
478 BirthCountMap birth_counts;
479 for (ThreadData* thread_data = my_list; thread_data;
480 thread_data = thread_data->next()) {
481 thread_data->SnapshotExecutedTasks(current_profiling_phase,
482 &process_data_snapshot->phased_snapshots,
483 &birth_counts);
486 // Add births that are still active -- i.e. objects that have tallied a birth,
487 // but have not yet tallied a matching death, and hence must be either
488 // running, queued up, or being held in limbo for future posting.
489 auto* current_phase_tasks =
490 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
491 for (const auto& birth_count : birth_counts) {
492 if (birth_count.second > 0) {
493 current_phase_tasks->push_back(
494 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
495 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
496 "Still_Alive"));
501 // static
502 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
503 // Get an unchanging copy of a ThreadData list.
504 ThreadData* my_list = ThreadData::first();
506 // Add snapshots for all instances of death data in all threads serially.
507 // This hackish approach *can* get some slightly corrupt tallies, as we are
508 // grabbing values without the protection of a lock, but it has the advantage
509 // of working even with threads that don't have message loops. Any corruption
510 // shouldn't cause "cascading damage" to anything else (in later phases).
511 for (ThreadData* thread_data = my_list; thread_data;
512 thread_data = thread_data->next()) {
513 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
517 Births* ThreadData::TallyABirth(const Location& location) {
518 BirthMap::iterator it = birth_map_.find(location);
519 Births* child;
520 if (it != birth_map_.end()) {
521 child = it->second;
522 child->RecordBirth();
523 } else {
524 child = new Births(location, *this); // Leak this.
525 // Lock since the map may get relocated now, and other threads sometimes
526 // snapshot it (but they lock before copying it).
527 base::AutoLock lock(map_lock_);
528 birth_map_[location] = child;
531 if (kTrackParentChildLinks && status_ > PROFILING_ACTIVE &&
532 !parent_stack_.empty()) {
533 const Births* parent = parent_stack_.top();
534 ParentChildPair pair(parent, child);
535 if (parent_child_set_.find(pair) == parent_child_set_.end()) {
536 // Lock since the map may get relocated now, and other threads sometimes
537 // snapshot it (but they lock before copying it).
538 base::AutoLock lock(map_lock_);
539 parent_child_set_.insert(pair);
543 return child;
546 void ThreadData::TallyADeath(const Births& births,
547 int32 queue_duration,
548 const TaskStopwatch& stopwatch) {
549 int32 run_duration = stopwatch.RunDurationMs();
551 // Stir in some randomness, plus add constant in case durations are zero.
552 const uint32 kSomePrimeNumber = 2147483647;
553 random_number_ += queue_duration + run_duration + kSomePrimeNumber;
554 // An address is going to have some randomness to it as well ;-).
555 random_number_ ^= static_cast<uint32>(&births - reinterpret_cast<Births*>(0));
557 // We don't have queue durations without OS timer. OS timer is automatically
558 // used for task-post-timing, so the use of an alternate timer implies all
559 // queue times are invalid, unless it was explicitly said that we can trust
560 // the alternate timer.
561 if (kAllowAlternateTimeSourceHandling &&
562 now_function_ &&
563 !now_function_is_time_) {
564 queue_duration = 0;
567 DeathMap::iterator it = death_map_.find(&births);
568 DeathData* death_data;
569 if (it != death_map_.end()) {
570 death_data = &it->second;
571 } else {
572 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now.
573 death_data = &death_map_[&births];
574 } // Release lock ASAP.
575 death_data->RecordDeath(queue_duration, run_duration, random_number_);
577 if (!kTrackParentChildLinks)
578 return;
579 if (!parent_stack_.empty()) { // We might get turned off.
580 DCHECK_EQ(parent_stack_.top(), &births);
581 parent_stack_.pop();
585 // static
586 Births* ThreadData::TallyABirthIfActive(const Location& location) {
587 if (!TrackingStatus())
588 return NULL;
589 ThreadData* current_thread_data = Get();
590 if (!current_thread_data)
591 return NULL;
592 return current_thread_data->TallyABirth(location);
595 // static
596 void ThreadData::TallyRunOnNamedThreadIfTracking(
597 const base::TrackingInfo& completed_task,
598 const TaskStopwatch& stopwatch) {
599 // Even if we have been DEACTIVATED, we will process any pending births so
600 // that our data structures (which counted the outstanding births) remain
601 // consistent.
602 const Births* births = completed_task.birth_tally;
603 if (!births)
604 return;
605 ThreadData* current_thread_data = stopwatch.GetThreadData();
606 if (!current_thread_data)
607 return;
609 // Watch out for a race where status_ is changing, and hence one or both
610 // of start_of_run or end_of_run is zero. In that case, we didn't bother to
611 // get a time value since we "weren't tracking" and we were trying to be
612 // efficient by not calling for a genuine time value. For simplicity, we'll
613 // use a default zero duration when we can't calculate a true value.
614 TrackedTime start_of_run = stopwatch.StartTime();
615 int32 queue_duration = 0;
616 if (!start_of_run.is_null()) {
617 queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
618 .InMilliseconds();
620 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
623 // static
624 void ThreadData::TallyRunOnWorkerThreadIfTracking(
625 const Births* births,
626 const TrackedTime& time_posted,
627 const TaskStopwatch& stopwatch) {
628 // Even if we have been DEACTIVATED, we will process any pending births so
629 // that our data structures (which counted the outstanding births) remain
630 // consistent.
631 if (!births)
632 return;
634 // TODO(jar): Support the option to coalesce all worker-thread activity under
635 // one ThreadData instance that uses locks to protect *all* access. This will
636 // reduce memory (making it provably bounded), but run incrementally slower
637 // (since we'll use locks on TallyABirth and TallyADeath). The good news is
638 // that the locks on TallyADeath will be *after* the worker thread has run,
639 // and hence nothing will be waiting for the completion (... besides some
640 // other thread that might like to run). Also, the worker threads tasks are
641 // generally longer, and hence the cost of the lock may perchance be amortized
642 // over the long task's lifetime.
643 ThreadData* current_thread_data = stopwatch.GetThreadData();
644 if (!current_thread_data)
645 return;
647 TrackedTime start_of_run = stopwatch.StartTime();
648 int32 queue_duration = 0;
649 if (!start_of_run.is_null()) {
650 queue_duration = (start_of_run - time_posted).InMilliseconds();
652 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
655 // static
656 void ThreadData::TallyRunInAScopedRegionIfTracking(
657 const Births* births,
658 const TaskStopwatch& stopwatch) {
659 // Even if we have been DEACTIVATED, we will process any pending births so
660 // that our data structures (which counted the outstanding births) remain
661 // consistent.
662 if (!births)
663 return;
665 ThreadData* current_thread_data = stopwatch.GetThreadData();
666 if (!current_thread_data)
667 return;
669 int32 queue_duration = 0;
670 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
673 void ThreadData::SnapshotExecutedTasks(
674 int current_profiling_phase,
675 PhasedProcessDataSnapshotMap* phased_snapshots,
676 BirthCountMap* birth_counts) {
677 // Get copy of data, so that the data will not change during the iterations
678 // and processing.
679 BirthMap birth_map;
680 DeathsSnapshot deaths;
681 ParentChildSet parent_child_set;
682 SnapshotMaps(current_profiling_phase, &birth_map, &deaths, &parent_child_set);
684 for (const auto& birth : birth_map) {
685 (*birth_counts)[birth.second] += birth.second->birth_count();
688 for (const auto& death : deaths) {
689 (*birth_counts)[death.first] -= death.first->birth_count();
691 // For the current death data, walk through all its snapshots, starting from
692 // the current one, then from the previous profiling phase etc., and for
693 // each snapshot calculate the delta between the snapshot and the previous
694 // phase, if any. Store the deltas in the result.
695 for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
696 phase = phase->prev) {
697 const DeathDataSnapshot& death_data =
698 phase->prev ? phase->death_data.Delta(phase->prev->death_data)
699 : phase->death_data;
701 if (death_data.count > 0) {
702 (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
703 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
704 thread_name()));
710 // This may be called from another thread.
711 void ThreadData::SnapshotMaps(int profiling_phase,
712 BirthMap* birth_map,
713 DeathsSnapshot* deaths,
714 ParentChildSet* parent_child_set) {
715 base::AutoLock lock(map_lock_);
717 for (const auto& birth : birth_map_)
718 (*birth_map)[birth.first] = birth.second;
720 for (const auto& death : death_map_) {
721 deaths->push_back(std::make_pair(
722 death.first,
723 DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
724 death.second.run_duration_sum(),
725 death.second.run_duration_max(),
726 death.second.run_duration_sample(),
727 death.second.queue_duration_sum(),
728 death.second.queue_duration_max(),
729 death.second.queue_duration_sample(),
730 death.second.last_phase_snapshot())));
733 if (!kTrackParentChildLinks)
734 return;
736 for (const auto& parent_child : parent_child_set_)
737 parent_child_set->insert(parent_child);
740 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
741 base::AutoLock lock(map_lock_);
743 for (auto& death : death_map_) {
744 death.second.OnProfilingPhaseCompleted(profiling_phase);
748 static void OptionallyInitializeAlternateTimer() {
749 NowFunction* alternate_time_source = GetAlternateTimeSource();
750 if (alternate_time_source)
751 ThreadData::SetAlternateTimeSource(alternate_time_source);
754 bool ThreadData::Initialize() {
755 if (status_ >= DEACTIVATED)
756 return true; // Someone else did the initialization.
757 // Due to racy lazy initialization in tests, we'll need to recheck status_
758 // after we acquire the lock.
760 // Ensure that we don't double initialize tls. We are called when single
761 // threaded in the product, but some tests may be racy and lazy about our
762 // initialization.
763 base::AutoLock lock(*list_lock_.Pointer());
764 if (status_ >= DEACTIVATED)
765 return true; // Someone raced in here and beat us.
767 // Put an alternate timer in place if the environment calls for it, such as
768 // for tracking TCMalloc allocations. This insertion is idempotent, so we
769 // don't mind if there is a race, and we'd prefer not to be in a lock while
770 // doing this work.
771 if (kAllowAlternateTimeSourceHandling)
772 OptionallyInitializeAlternateTimer();
774 // Perform the "real" TLS initialization now, and leave it intact through
775 // process termination.
776 if (!tls_index_.initialized()) { // Testing may have initialized this.
777 DCHECK_EQ(status_, UNINITIALIZED);
778 tls_index_.Initialize(&ThreadData::OnThreadTermination);
779 if (!tls_index_.initialized())
780 return false;
781 } else {
782 // TLS was initialzed for us earlier.
783 DCHECK_EQ(status_, DORMANT_DURING_TESTS);
786 // Incarnation counter is only significant to testing, as it otherwise will
787 // never again change in this process.
788 ++incarnation_counter_;
790 // The lock is not critical for setting status_, but it doesn't hurt. It also
791 // ensures that if we have a racy initialization, that we'll bail as soon as
792 // we get the lock earlier in this method.
793 status_ = kInitialStartupState;
794 if (!kTrackParentChildLinks &&
795 kInitialStartupState == PROFILING_CHILDREN_ACTIVE)
796 status_ = PROFILING_ACTIVE;
797 DCHECK(status_ != UNINITIALIZED);
798 return true;
801 // static
802 bool ThreadData::InitializeAndSetTrackingStatus(Status status) {
803 DCHECK_GE(status, DEACTIVATED);
804 DCHECK_LE(status, PROFILING_CHILDREN_ACTIVE);
806 if (!Initialize()) // No-op if already initialized.
807 return false; // Not compiled in.
809 if (!kTrackParentChildLinks && status > DEACTIVATED)
810 status = PROFILING_ACTIVE;
811 status_ = status;
812 return true;
815 // static
816 ThreadData::Status ThreadData::status() {
817 return status_;
820 // static
821 bool ThreadData::TrackingStatus() {
822 return status_ > DEACTIVATED;
825 // static
826 bool ThreadData::TrackingParentChildStatus() {
827 return status_ >= PROFILING_CHILDREN_ACTIVE;
830 // static
831 void ThreadData::PrepareForStartOfRun(const Births* parent) {
832 if (kTrackParentChildLinks && parent && status_ > PROFILING_ACTIVE) {
833 ThreadData* current_thread_data = Get();
834 if (current_thread_data)
835 current_thread_data->parent_stack_.push(parent);
839 // static
840 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
841 DCHECK(now_function);
842 if (kAllowAlternateTimeSourceHandling)
843 now_function_ = now_function;
846 // static
847 void ThreadData::EnableProfilerTiming() {
848 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
851 // static
852 TrackedTime ThreadData::Now() {
853 if (kAllowAlternateTimeSourceHandling && now_function_)
854 return TrackedTime::FromMilliseconds((*now_function_)());
855 if (IsProfilerTimingEnabled() && TrackingStatus())
856 return TrackedTime::Now();
857 return TrackedTime(); // Super fast when disabled, or not compiled.
860 // static
861 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
862 base::AutoLock lock(*list_lock_.Pointer());
863 if (worker_thread_data_creation_count_ == 0)
864 return; // We haven't really run much, and couldn't have leaked.
866 // TODO(jar): until this is working on XP, don't run the real test.
867 #if 0
868 // Verify that we've at least shutdown/cleanup the major namesd threads. The
869 // caller should tell us how many thread shutdowns should have taken place by
870 // now.
871 CHECK_GT(cleanup_count_, major_threads_shutdown_count);
872 #endif
875 // static
876 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
877 // This is only called from test code, where we need to cleanup so that
878 // additional tests can be run.
879 // We must be single threaded... but be careful anyway.
880 if (!InitializeAndSetTrackingStatus(DEACTIVATED))
881 return;
882 ThreadData* thread_data_list;
884 base::AutoLock lock(*list_lock_.Pointer());
885 thread_data_list = all_thread_data_list_head_;
886 all_thread_data_list_head_ = NULL;
887 ++incarnation_counter_;
888 // To be clean, break apart the retired worker list (though we leak them).
889 while (first_retired_worker_) {
890 ThreadData* worker = first_retired_worker_;
891 CHECK_GT(worker->worker_thread_number_, 0);
892 first_retired_worker_ = worker->next_retired_worker_;
893 worker->next_retired_worker_ = NULL;
897 // Put most global static back in pristine shape.
898 worker_thread_data_creation_count_ = 0;
899 cleanup_count_ = 0;
900 tls_index_.Set(NULL);
901 status_ = DORMANT_DURING_TESTS; // Almost UNINITIALIZED.
903 // To avoid any chance of racing in unit tests, which is the only place we
904 // call this function, we may sometimes leak all the data structures we
905 // recovered, as they may still be in use on threads from prior tests!
906 if (leak) {
907 ThreadData* thread_data = thread_data_list;
908 while (thread_data) {
909 ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
910 thread_data = thread_data->next();
912 return;
915 // When we want to cleanup (on a single thread), here is what we do.
917 // Do actual recursive delete in all ThreadData instances.
918 while (thread_data_list) {
919 ThreadData* next_thread_data = thread_data_list;
920 thread_data_list = thread_data_list->next();
922 for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
923 next_thread_data->birth_map_.end() != it; ++it)
924 delete it->second; // Delete the Birth Records.
925 delete next_thread_data; // Includes all Death Records.
929 //------------------------------------------------------------------------------
930 TaskStopwatch::TaskStopwatch()
931 : wallclock_duration_ms_(0),
932 current_thread_data_(NULL),
933 excluded_duration_ms_(0),
934 parent_(NULL) {
935 #if DCHECK_IS_ON()
936 state_ = CREATED;
937 child_ = NULL;
938 #endif
941 TaskStopwatch::~TaskStopwatch() {
942 #if DCHECK_IS_ON()
943 DCHECK(state_ != RUNNING);
944 DCHECK(child_ == NULL);
945 #endif
948 void TaskStopwatch::Start() {
949 #if DCHECK_IS_ON()
950 DCHECK(state_ == CREATED);
951 state_ = RUNNING;
952 #endif
954 start_time_ = ThreadData::Now();
956 current_thread_data_ = ThreadData::Get();
957 if (!current_thread_data_)
958 return;
960 parent_ = current_thread_data_->current_stopwatch_;
961 #if DCHECK_IS_ON()
962 if (parent_) {
963 DCHECK(parent_->state_ == RUNNING);
964 DCHECK(parent_->child_ == NULL);
965 parent_->child_ = this;
967 #endif
968 current_thread_data_->current_stopwatch_ = this;
971 void TaskStopwatch::Stop() {
972 const TrackedTime end_time = ThreadData::Now();
973 #if DCHECK_IS_ON()
974 DCHECK(state_ == RUNNING);
975 state_ = STOPPED;
976 DCHECK(child_ == NULL);
977 #endif
979 if (!start_time_.is_null() && !end_time.is_null()) {
980 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
983 if (!current_thread_data_)
984 return;
986 DCHECK(current_thread_data_->current_stopwatch_ == this);
987 current_thread_data_->current_stopwatch_ = parent_;
988 if (!parent_)
989 return;
991 #if DCHECK_IS_ON()
992 DCHECK(parent_->state_ == RUNNING);
993 DCHECK(parent_->child_ == this);
994 parent_->child_ = NULL;
995 #endif
996 parent_->excluded_duration_ms_ += wallclock_duration_ms_;
997 parent_ = NULL;
1000 TrackedTime TaskStopwatch::StartTime() const {
1001 #if DCHECK_IS_ON()
1002 DCHECK(state_ != CREATED);
1003 #endif
1005 return start_time_;
1008 int32 TaskStopwatch::RunDurationMs() const {
1009 #if DCHECK_IS_ON()
1010 DCHECK(state_ == STOPPED);
1011 #endif
1013 return wallclock_duration_ms_ - excluded_duration_ms_;
1016 ThreadData* TaskStopwatch::GetThreadData() const {
1017 #if DCHECK_IS_ON()
1018 DCHECK(state_ != CREATED);
1019 #endif
1021 return current_thread_data_;
1024 //------------------------------------------------------------------------------
1025 // DeathDataPhaseSnapshot
1027 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
1028 int profiling_phase,
1029 int count,
1030 int32 run_duration_sum,
1031 int32 run_duration_max,
1032 int32 run_duration_sample,
1033 int32 queue_duration_sum,
1034 int32 queue_duration_max,
1035 int32 queue_duration_sample,
1036 const DeathDataPhaseSnapshot* prev)
1037 : profiling_phase(profiling_phase),
1038 death_data(count,
1039 run_duration_sum,
1040 run_duration_max,
1041 run_duration_sample,
1042 queue_duration_sum,
1043 queue_duration_max,
1044 queue_duration_sample),
1045 prev(prev) {
1048 //------------------------------------------------------------------------------
1049 // TaskSnapshot
1051 TaskSnapshot::TaskSnapshot() {
1054 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
1055 const DeathDataSnapshot& death_data,
1056 const std::string& death_thread_name)
1057 : birth(birth),
1058 death_data(death_data),
1059 death_thread_name(death_thread_name) {
1062 TaskSnapshot::~TaskSnapshot() {
1065 //------------------------------------------------------------------------------
1066 // ParentChildPairSnapshot
1068 ParentChildPairSnapshot::ParentChildPairSnapshot() {
1071 ParentChildPairSnapshot::ParentChildPairSnapshot(
1072 const ThreadData::ParentChildPair& parent_child)
1073 : parent(*parent_child.first),
1074 child(*parent_child.second) {
1077 ParentChildPairSnapshot::~ParentChildPairSnapshot() {
1080 //------------------------------------------------------------------------------
1081 // ProcessDataPhaseSnapshot
1083 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
1086 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
1089 //------------------------------------------------------------------------------
1090 // ProcessDataPhaseSnapshot
1092 ProcessDataSnapshot::ProcessDataSnapshot()
1093 #if !defined(OS_NACL)
1094 : process_id(base::GetCurrentProcId()) {
1095 #else
1096 : process_id(base::kNullProcessId) {
1097 #endif
1100 ProcessDataSnapshot::~ProcessDataSnapshot() {
1103 } // namespace tracked_objects