1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/tracked_objects.h"
10 #include "base/atomicops.h"
11 #include "base/base_switches.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/logging.h"
16 #include "base/process/process_handle.h"
17 #include "base/profiler/alternate_timer.h"
18 #include "base/strings/stringprintf.h"
19 #include "base/third_party/valgrind/memcheck.h"
20 #include "base/tracking_info.h"
22 using base::TimeDelta
;
28 namespace tracked_objects
{
31 // When ThreadData is first initialized, should we start in an ACTIVE state to
32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
33 // that we only record after parsing the command line flag --enable-tracking.
34 // Note that the flag may force either state, so this really controls only the
35 // period of time up until that flag is parsed. If there is no flag seen, then
36 // this state may prevail for much or all of the process lifetime.
37 const ThreadData::Status kInitialStartupState
= ThreadData::PROFILING_ACTIVE
;
39 // Control whether an alternate time source (Now() function) is supported by
40 // the ThreadData class. This compile time flag should be set to true if we
41 // want other modules (such as a memory allocator, or a thread-specific CPU time
42 // clock) to be able to provide a thread-specific Now() function. Without this
43 // compile-time flag, the code will only support the wall-clock time. This flag
44 // can be flipped to efficiently disable this path (if there is a performance
45 // problem with its presence).
46 static const bool kAllowAlternateTimeSourceHandling
= true;
48 // Possible states of the profiler timing enabledness.
55 // State of the profiler timing enabledness.
56 base::subtle::Atomic32 g_profiler_timing_enabled
= UNDEFINED_TIMING
;
58 // Returns whether profiler timing is enabled. The default is true, but this
59 // may be overridden by a command-line flag. Some platforms may
60 // programmatically set this command-line flag to the "off" value if it's not
62 // This in turn can be overridden by explicitly calling
63 // ThreadData::EnableProfilerTiming, say, based on a field trial.
64 inline bool IsProfilerTimingEnabled() {
65 // Reading |g_profiler_timing_enabled| is done without barrier because
66 // multiple initialization is not an issue while the barrier can be relatively
67 // costly given that this method is sometimes called in a tight loop.
68 base::subtle::Atomic32 current_timing_enabled
=
69 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled
);
70 if (current_timing_enabled
== UNDEFINED_TIMING
) {
71 if (!base::CommandLine::InitializedForCurrentProcess())
73 current_timing_enabled
=
74 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
75 switches::kProfilerTiming
) ==
76 switches::kProfilerTimingDisabledValue
)
79 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled
,
80 current_timing_enabled
);
82 return current_timing_enabled
== ENABLED_TIMING
;
87 //------------------------------------------------------------------------------
88 // DeathData tallies durations when a death takes place.
90 DeathData::DeathData()
92 sample_probability_count_(0),
94 queue_duration_sum_(0),
96 queue_duration_max_(0),
97 run_duration_sample_(0),
98 queue_duration_sample_(0),
99 last_phase_snapshot_(nullptr) {
102 DeathData::DeathData(const DeathData
& other
)
103 : count_(other
.count_
),
104 sample_probability_count_(other
.sample_probability_count_
),
105 run_duration_sum_(other
.run_duration_sum_
),
106 queue_duration_sum_(other
.queue_duration_sum_
),
107 run_duration_max_(other
.run_duration_max_
),
108 queue_duration_max_(other
.queue_duration_max_
),
109 run_duration_sample_(other
.run_duration_sample_
),
110 queue_duration_sample_(other
.queue_duration_sample_
),
111 last_phase_snapshot_(nullptr) {
112 // This constructor will be used by std::map when adding new DeathData values
113 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't
114 // need to worry about ownership transfer.
115 DCHECK(other
.last_phase_snapshot_
== nullptr);
118 DeathData::~DeathData() {
119 while (last_phase_snapshot_
) {
120 const DeathDataPhaseSnapshot
* snapshot
= last_phase_snapshot_
;
121 last_phase_snapshot_
= snapshot
->prev
;
126 // TODO(jar): I need to see if this macro to optimize branching is worth using.
128 // This macro has no branching, so it is surely fast, and is equivalent to:
131 // We use a macro rather than a template to force this to inline.
132 // Related code for calculating max is discussed on the web.
133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
134 ((target) ^= ((target) ^ (source)) & -static_cast<int32>(assign_it))
136 void DeathData::RecordDeath(const int32 queue_duration
,
137 const int32 run_duration
,
138 const uint32 random_number
) {
139 // We'll just clamp at INT_MAX, but we should note this in the UI as such.
140 if (count_
< INT_MAX
)
143 int sample_probability_count
= sample_probability_count_
;
144 if (sample_probability_count
< INT_MAX
)
145 ++sample_probability_count
;
146 sample_probability_count_
= sample_probability_count
;
148 queue_duration_sum_
+= queue_duration
;
149 run_duration_sum_
+= run_duration
;
151 if (queue_duration_max_
< queue_duration
)
152 queue_duration_max_
= queue_duration
;
153 if (run_duration_max_
< run_duration
)
154 run_duration_max_
= run_duration
;
156 // Take a uniformly distributed sample over all durations ever supplied during
157 // the current profiling phase.
158 // The probability that we (instead) use this new sample is
159 // 1/sample_probability_count_. This results in a completely uniform selection
160 // of the sample (at least when we don't clamp sample_probability_count_...
161 // but that should be inconsequentially likely). We ignore the fact that we
162 // correlated our selection of a sample to the run and queue times (i.e., we
163 // used them to generate random_number).
164 CHECK_GT(sample_probability_count
, 0);
165 if (0 == (random_number
% sample_probability_count
)) {
166 queue_duration_sample_
= queue_duration
;
167 run_duration_sample_
= run_duration
;
171 void DeathData::OnProfilingPhaseCompleted(int profiling_phase
) {
172 // Snapshotting and storing current state.
173 last_phase_snapshot_
= new DeathDataPhaseSnapshot(
174 profiling_phase
, count_
, run_duration_sum_
, run_duration_max_
,
175 run_duration_sample_
, queue_duration_sum_
, queue_duration_max_
,
176 queue_duration_sample_
, last_phase_snapshot_
);
178 // Not touching fields for which a delta can be computed by comparing with a
179 // snapshot from the previous phase. Resetting other fields. Sample values
180 // will be reset upon next death recording because sample_probability_count_
182 // We avoid resetting to 0 in favor of deltas whenever possible. The reason
183 // is that for incrementable fields, resetting to 0 from the snapshot thread
184 // potentially in parallel with incrementing in the death thread may result in
185 // significant data corruption that has a potential to grow with time. Not
186 // resetting incrementable fields and using deltas will cause any
187 // off-by-little corruptions to be likely fixed at the next snapshot.
188 // The max values are not incrementable, and cannot be deduced using deltas
189 // for a given phase. Hence, we have to reset them to 0. But the potential
190 // damage is limited to getting the previous phase's max to apply for the next
191 // phase, and the error doesn't have a potential to keep growing with new
193 // sample_probability_count_ is incrementable, but must be reset to 0 at the
194 // phase end, so that we start a new uniformly randomized sample selection
195 // after the reset. Corruptions due to race conditions are possible, but the
196 // damage is limited to selecting a wrong sample, which is not something that
197 // can cause accumulating or cascading effects.
198 // If there were no corruptions caused by race conditions, we never send a
199 // sample for the previous phase in the next phase's snapshot because
200 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
201 sample_probability_count_
= 0;
202 run_duration_max_
= 0;
203 queue_duration_max_
= 0;
206 //------------------------------------------------------------------------------
207 DeathDataSnapshot::DeathDataSnapshot()
209 run_duration_sum(-1),
210 run_duration_max(-1),
211 run_duration_sample(-1),
212 queue_duration_sum(-1),
213 queue_duration_max(-1),
214 queue_duration_sample(-1) {
217 DeathDataSnapshot::DeathDataSnapshot(int count
,
218 int32 run_duration_sum
,
219 int32 run_duration_max
,
220 int32 run_duration_sample
,
221 int32 queue_duration_sum
,
222 int32 queue_duration_max
,
223 int32 queue_duration_sample
)
225 run_duration_sum(run_duration_sum
),
226 run_duration_max(run_duration_max
),
227 run_duration_sample(run_duration_sample
),
228 queue_duration_sum(queue_duration_sum
),
229 queue_duration_max(queue_duration_max
),
230 queue_duration_sample(queue_duration_sample
) {
233 DeathDataSnapshot::~DeathDataSnapshot() {
236 DeathDataSnapshot
DeathDataSnapshot::Delta(
237 const DeathDataSnapshot
& older
) const {
238 return DeathDataSnapshot(count
- older
.count
,
239 run_duration_sum
- older
.run_duration_sum
,
240 run_duration_max
, run_duration_sample
,
241 queue_duration_sum
- older
.queue_duration_sum
,
242 queue_duration_max
, queue_duration_sample
);
245 //------------------------------------------------------------------------------
246 BirthOnThread::BirthOnThread(const Location
& location
,
247 const ThreadData
& current
)
248 : location_(location
),
249 birth_thread_(¤t
) {
252 //------------------------------------------------------------------------------
253 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
256 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread
& birth
)
257 : location(birth
.location()),
258 thread_name(birth
.birth_thread()->thread_name()) {
261 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
264 //------------------------------------------------------------------------------
265 Births::Births(const Location
& location
, const ThreadData
& current
)
266 : BirthOnThread(location
, current
),
269 int Births::birth_count() const { return birth_count_
; }
271 void Births::RecordBirth() { ++birth_count_
; }
273 //------------------------------------------------------------------------------
274 // ThreadData maintains the central data for all births and deaths on a single
277 // TODO(jar): We should pull all these static vars together, into a struct, and
278 // optimize layout so that we benefit from locality of reference during accesses
282 NowFunction
* ThreadData::now_function_
= NULL
;
285 bool ThreadData::now_function_is_time_
= false;
287 // A TLS slot which points to the ThreadData instance for the current thread.
288 // We do a fake initialization here (zeroing out data), and then the real
289 // in-place construction happens when we call tls_index_.Initialize().
291 base::ThreadLocalStorage::StaticSlot
ThreadData::tls_index_
= TLS_INITIALIZER
;
294 int ThreadData::worker_thread_data_creation_count_
= 0;
297 int ThreadData::cleanup_count_
= 0;
300 int ThreadData::incarnation_counter_
= 0;
303 ThreadData
* ThreadData::all_thread_data_list_head_
= NULL
;
306 ThreadData
* ThreadData::first_retired_worker_
= NULL
;
309 base::LazyInstance
<base::Lock
>::Leaky
310 ThreadData::list_lock_
= LAZY_INSTANCE_INITIALIZER
;
313 base::subtle::Atomic32
ThreadData::status_
= ThreadData::UNINITIALIZED
;
315 ThreadData::ThreadData(const std::string
& suggested_name
)
317 next_retired_worker_(NULL
),
318 worker_thread_number_(0),
319 incarnation_count_for_pool_(-1),
320 current_stopwatch_(NULL
) {
321 DCHECK_GE(suggested_name
.size(), 0u);
322 thread_name_
= suggested_name
;
323 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
326 ThreadData::ThreadData(int thread_number
)
328 next_retired_worker_(NULL
),
329 worker_thread_number_(thread_number
),
330 incarnation_count_for_pool_(-1),
331 current_stopwatch_(NULL
) {
332 CHECK_GT(thread_number
, 0);
333 base::StringAppendF(&thread_name_
, "WorkerThread-%d", thread_number
);
334 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
337 ThreadData::~ThreadData() {
340 void ThreadData::PushToHeadOfList() {
341 // Toss in a hint of randomness (atop the uniniitalized value).
342 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_
,
343 sizeof(random_number_
));
344 MSAN_UNPOISON(&random_number_
, sizeof(random_number_
));
345 random_number_
+= static_cast<uint32
>(this - static_cast<ThreadData
*>(0));
346 random_number_
^= (Now() - TrackedTime()).InMilliseconds();
349 base::AutoLock
lock(*list_lock_
.Pointer());
350 incarnation_count_for_pool_
= incarnation_counter_
;
351 next_
= all_thread_data_list_head_
;
352 all_thread_data_list_head_
= this;
356 ThreadData
* ThreadData::first() {
357 base::AutoLock
lock(*list_lock_
.Pointer());
358 return all_thread_data_list_head_
;
361 ThreadData
* ThreadData::next() const { return next_
; }
364 void ThreadData::InitializeThreadContext(const std::string
& suggested_name
) {
366 ThreadData
* current_thread_data
=
367 reinterpret_cast<ThreadData
*>(tls_index_
.Get());
368 if (current_thread_data
)
369 return; // Browser tests instigate this.
370 current_thread_data
= new ThreadData(suggested_name
);
371 tls_index_
.Set(current_thread_data
);
375 ThreadData
* ThreadData::Get() {
376 if (!tls_index_
.initialized())
377 return NULL
; // For unittests only.
378 ThreadData
* registered
= reinterpret_cast<ThreadData
*>(tls_index_
.Get());
382 // We must be a worker thread, since we didn't pre-register.
383 ThreadData
* worker_thread_data
= NULL
;
384 int worker_thread_number
= 0;
386 base::AutoLock
lock(*list_lock_
.Pointer());
387 if (first_retired_worker_
) {
388 worker_thread_data
= first_retired_worker_
;
389 first_retired_worker_
= first_retired_worker_
->next_retired_worker_
;
390 worker_thread_data
->next_retired_worker_
= NULL
;
392 worker_thread_number
= ++worker_thread_data_creation_count_
;
396 // If we can't find a previously used instance, then we have to create one.
397 if (!worker_thread_data
) {
398 DCHECK_GT(worker_thread_number
, 0);
399 worker_thread_data
= new ThreadData(worker_thread_number
);
401 DCHECK_GT(worker_thread_data
->worker_thread_number_
, 0);
403 tls_index_
.Set(worker_thread_data
);
404 return worker_thread_data
;
408 void ThreadData::OnThreadTermination(void* thread_data
) {
409 DCHECK(thread_data
); // TLS should *never* call us with a NULL.
410 // We must NOT do any allocations during this callback. There is a chance
411 // that the allocator is no longer active on this thread.
412 reinterpret_cast<ThreadData
*>(thread_data
)->OnThreadTerminationCleanup();
415 void ThreadData::OnThreadTerminationCleanup() {
416 // The list_lock_ was created when we registered the callback, so it won't be
417 // allocated here despite the lazy reference.
418 base::AutoLock
lock(*list_lock_
.Pointer());
419 if (incarnation_counter_
!= incarnation_count_for_pool_
)
420 return; // ThreadData was constructed in an earlier unit test.
422 // Only worker threads need to be retired and reused.
423 if (!worker_thread_number_
) {
426 // We must NOT do any allocations during this callback.
427 // Using the simple linked lists avoids all allocations.
428 DCHECK_EQ(this->next_retired_worker_
, reinterpret_cast<ThreadData
*>(NULL
));
429 this->next_retired_worker_
= first_retired_worker_
;
430 first_retired_worker_
= this;
434 void ThreadData::Snapshot(int current_profiling_phase
,
435 ProcessDataSnapshot
* process_data_snapshot
) {
436 // Get an unchanging copy of a ThreadData list.
437 ThreadData
* my_list
= ThreadData::first();
439 // Gather data serially.
440 // This hackish approach *can* get some slightly corrupt tallies, as we are
441 // grabbing values without the protection of a lock, but it has the advantage
442 // of working even with threads that don't have message loops. If a user
443 // sees any strangeness, they can always just run their stats gathering a
445 BirthCountMap birth_counts
;
446 for (ThreadData
* thread_data
= my_list
; thread_data
;
447 thread_data
= thread_data
->next()) {
448 thread_data
->SnapshotExecutedTasks(current_profiling_phase
,
449 &process_data_snapshot
->phased_snapshots
,
453 // Add births that are still active -- i.e. objects that have tallied a birth,
454 // but have not yet tallied a matching death, and hence must be either
455 // running, queued up, or being held in limbo for future posting.
456 auto* current_phase_tasks
=
457 &process_data_snapshot
->phased_snapshots
[current_profiling_phase
].tasks
;
458 for (const auto& birth_count
: birth_counts
) {
459 if (birth_count
.second
> 0) {
460 current_phase_tasks
->push_back(
461 TaskSnapshot(BirthOnThreadSnapshot(*birth_count
.first
),
462 DeathDataSnapshot(birth_count
.second
, 0, 0, 0, 0, 0, 0),
469 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase
) {
470 // Get an unchanging copy of a ThreadData list.
471 ThreadData
* my_list
= ThreadData::first();
473 // Add snapshots for all instances of death data in all threads serially.
474 // This hackish approach *can* get some slightly corrupt tallies, as we are
475 // grabbing values without the protection of a lock, but it has the advantage
476 // of working even with threads that don't have message loops. Any corruption
477 // shouldn't cause "cascading damage" to anything else (in later phases).
478 for (ThreadData
* thread_data
= my_list
; thread_data
;
479 thread_data
= thread_data
->next()) {
480 thread_data
->OnProfilingPhaseCompletedOnThread(profiling_phase
);
484 Births
* ThreadData::TallyABirth(const Location
& location
) {
485 BirthMap::iterator it
= birth_map_
.find(location
);
487 if (it
!= birth_map_
.end()) {
489 child
->RecordBirth();
491 child
= new Births(location
, *this); // Leak this.
492 // Lock since the map may get relocated now, and other threads sometimes
493 // snapshot it (but they lock before copying it).
494 base::AutoLock
lock(map_lock_
);
495 birth_map_
[location
] = child
;
501 void ThreadData::TallyADeath(const Births
& births
,
502 int32 queue_duration
,
503 const TaskStopwatch
& stopwatch
) {
504 int32 run_duration
= stopwatch
.RunDurationMs();
506 // Stir in some randomness, plus add constant in case durations are zero.
507 const uint32 kSomePrimeNumber
= 2147483647;
508 random_number_
+= queue_duration
+ run_duration
+ kSomePrimeNumber
;
509 // An address is going to have some randomness to it as well ;-).
510 random_number_
^= static_cast<uint32
>(&births
- reinterpret_cast<Births
*>(0));
512 // We don't have queue durations without OS timer. OS timer is automatically
513 // used for task-post-timing, so the use of an alternate timer implies all
514 // queue times are invalid, unless it was explicitly said that we can trust
515 // the alternate timer.
516 if (kAllowAlternateTimeSourceHandling
&&
518 !now_function_is_time_
) {
522 DeathMap::iterator it
= death_map_
.find(&births
);
523 DeathData
* death_data
;
524 if (it
!= death_map_
.end()) {
525 death_data
= &it
->second
;
527 base::AutoLock
lock(map_lock_
); // Lock as the map may get relocated now.
528 death_data
= &death_map_
[&births
];
529 } // Release lock ASAP.
530 death_data
->RecordDeath(queue_duration
, run_duration
, random_number_
);
534 Births
* ThreadData::TallyABirthIfActive(const Location
& location
) {
535 if (!TrackingStatus())
537 ThreadData
* current_thread_data
= Get();
538 if (!current_thread_data
)
540 return current_thread_data
->TallyABirth(location
);
544 void ThreadData::TallyRunOnNamedThreadIfTracking(
545 const base::TrackingInfo
& completed_task
,
546 const TaskStopwatch
& stopwatch
) {
547 // Even if we have been DEACTIVATED, we will process any pending births so
548 // that our data structures (which counted the outstanding births) remain
550 const Births
* births
= completed_task
.birth_tally
;
553 ThreadData
* current_thread_data
= stopwatch
.GetThreadData();
554 if (!current_thread_data
)
557 // Watch out for a race where status_ is changing, and hence one or both
558 // of start_of_run or end_of_run is zero. In that case, we didn't bother to
559 // get a time value since we "weren't tracking" and we were trying to be
560 // efficient by not calling for a genuine time value. For simplicity, we'll
561 // use a default zero duration when we can't calculate a true value.
562 TrackedTime start_of_run
= stopwatch
.StartTime();
563 int32 queue_duration
= 0;
564 if (!start_of_run
.is_null()) {
565 queue_duration
= (start_of_run
- completed_task
.EffectiveTimePosted())
568 current_thread_data
->TallyADeath(*births
, queue_duration
, stopwatch
);
572 void ThreadData::TallyRunOnWorkerThreadIfTracking(
573 const Births
* births
,
574 const TrackedTime
& time_posted
,
575 const TaskStopwatch
& stopwatch
) {
576 // Even if we have been DEACTIVATED, we will process any pending births so
577 // that our data structures (which counted the outstanding births) remain
582 // TODO(jar): Support the option to coalesce all worker-thread activity under
583 // one ThreadData instance that uses locks to protect *all* access. This will
584 // reduce memory (making it provably bounded), but run incrementally slower
585 // (since we'll use locks on TallyABirth and TallyADeath). The good news is
586 // that the locks on TallyADeath will be *after* the worker thread has run,
587 // and hence nothing will be waiting for the completion (... besides some
588 // other thread that might like to run). Also, the worker threads tasks are
589 // generally longer, and hence the cost of the lock may perchance be amortized
590 // over the long task's lifetime.
591 ThreadData
* current_thread_data
= stopwatch
.GetThreadData();
592 if (!current_thread_data
)
595 TrackedTime start_of_run
= stopwatch
.StartTime();
596 int32 queue_duration
= 0;
597 if (!start_of_run
.is_null()) {
598 queue_duration
= (start_of_run
- time_posted
).InMilliseconds();
600 current_thread_data
->TallyADeath(*births
, queue_duration
, stopwatch
);
604 void ThreadData::TallyRunInAScopedRegionIfTracking(
605 const Births
* births
,
606 const TaskStopwatch
& stopwatch
) {
607 // Even if we have been DEACTIVATED, we will process any pending births so
608 // that our data structures (which counted the outstanding births) remain
613 ThreadData
* current_thread_data
= stopwatch
.GetThreadData();
614 if (!current_thread_data
)
617 int32 queue_duration
= 0;
618 current_thread_data
->TallyADeath(*births
, queue_duration
, stopwatch
);
621 void ThreadData::SnapshotExecutedTasks(
622 int current_profiling_phase
,
623 PhasedProcessDataSnapshotMap
* phased_snapshots
,
624 BirthCountMap
* birth_counts
) {
625 // Get copy of data, so that the data will not change during the iterations
628 DeathsSnapshot deaths
;
629 SnapshotMaps(current_profiling_phase
, &birth_map
, &deaths
);
631 for (const auto& birth
: birth_map
) {
632 (*birth_counts
)[birth
.second
] += birth
.second
->birth_count();
635 for (const auto& death
: deaths
) {
636 (*birth_counts
)[death
.first
] -= death
.first
->birth_count();
638 // For the current death data, walk through all its snapshots, starting from
639 // the current one, then from the previous profiling phase etc., and for
640 // each snapshot calculate the delta between the snapshot and the previous
641 // phase, if any. Store the deltas in the result.
642 for (const DeathDataPhaseSnapshot
* phase
= &death
.second
; phase
;
643 phase
= phase
->prev
) {
644 const DeathDataSnapshot
& death_data
=
645 phase
->prev
? phase
->death_data
.Delta(phase
->prev
->death_data
)
648 if (death_data
.count
> 0) {
649 (*phased_snapshots
)[phase
->profiling_phase
].tasks
.push_back(
650 TaskSnapshot(BirthOnThreadSnapshot(*death
.first
), death_data
,
657 // This may be called from another thread.
658 void ThreadData::SnapshotMaps(int profiling_phase
,
660 DeathsSnapshot
* deaths
) {
661 base::AutoLock
lock(map_lock_
);
663 for (const auto& birth
: birth_map_
)
664 (*birth_map
)[birth
.first
] = birth
.second
;
666 for (const auto& death
: death_map_
) {
667 deaths
->push_back(std::make_pair(
669 DeathDataPhaseSnapshot(profiling_phase
, death
.second
.count(),
670 death
.second
.run_duration_sum(),
671 death
.second
.run_duration_max(),
672 death
.second
.run_duration_sample(),
673 death
.second
.queue_duration_sum(),
674 death
.second
.queue_duration_max(),
675 death
.second
.queue_duration_sample(),
676 death
.second
.last_phase_snapshot())));
680 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase
) {
681 base::AutoLock
lock(map_lock_
);
683 for (auto& death
: death_map_
) {
684 death
.second
.OnProfilingPhaseCompleted(profiling_phase
);
688 static void OptionallyInitializeAlternateTimer() {
689 NowFunction
* alternate_time_source
= GetAlternateTimeSource();
690 if (alternate_time_source
)
691 ThreadData::SetAlternateTimeSource(alternate_time_source
);
694 void ThreadData::Initialize() {
695 if (base::subtle::Acquire_Load(&status_
) >= DEACTIVATED
)
696 return; // Someone else did the initialization.
697 // Due to racy lazy initialization in tests, we'll need to recheck status_
698 // after we acquire the lock.
700 // Ensure that we don't double initialize tls. We are called when single
701 // threaded in the product, but some tests may be racy and lazy about our
703 base::AutoLock
lock(*list_lock_
.Pointer());
704 if (base::subtle::Acquire_Load(&status_
) >= DEACTIVATED
)
705 return; // Someone raced in here and beat us.
707 // Put an alternate timer in place if the environment calls for it, such as
708 // for tracking TCMalloc allocations. This insertion is idempotent, so we
709 // don't mind if there is a race, and we'd prefer not to be in a lock while
711 if (kAllowAlternateTimeSourceHandling
)
712 OptionallyInitializeAlternateTimer();
714 // Perform the "real" TLS initialization now, and leave it intact through
715 // process termination.
716 if (!tls_index_
.initialized()) { // Testing may have initialized this.
717 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_
), UNINITIALIZED
);
718 tls_index_
.Initialize(&ThreadData::OnThreadTermination
);
719 DCHECK(tls_index_
.initialized());
721 // TLS was initialzed for us earlier.
722 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_
), DORMANT_DURING_TESTS
);
725 // Incarnation counter is only significant to testing, as it otherwise will
726 // never again change in this process.
727 ++incarnation_counter_
;
729 // The lock is not critical for setting status_, but it doesn't hurt. It also
730 // ensures that if we have a racy initialization, that we'll bail as soon as
731 // we get the lock earlier in this method.
732 base::subtle::Release_Store(&status_
, kInitialStartupState
);
733 DCHECK(base::subtle::NoBarrier_Load(&status_
) != UNINITIALIZED
);
737 void ThreadData::InitializeAndSetTrackingStatus(Status status
) {
738 DCHECK_GE(status
, DEACTIVATED
);
739 DCHECK_LE(status
, PROFILING_ACTIVE
);
741 Initialize(); // No-op if already initialized.
743 if (status
> DEACTIVATED
)
744 status
= PROFILING_ACTIVE
;
745 base::subtle::Release_Store(&status_
, status
);
749 ThreadData::Status
ThreadData::status() {
750 return static_cast<ThreadData::Status
>(base::subtle::Acquire_Load(&status_
));
754 bool ThreadData::TrackingStatus() {
755 return base::subtle::Acquire_Load(&status_
) > DEACTIVATED
;
759 void ThreadData::SetAlternateTimeSource(NowFunction
* now_function
) {
760 DCHECK(now_function
);
761 if (kAllowAlternateTimeSourceHandling
)
762 now_function_
= now_function
;
766 void ThreadData::EnableProfilerTiming() {
767 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled
, ENABLED_TIMING
);
771 TrackedTime
ThreadData::Now() {
772 if (kAllowAlternateTimeSourceHandling
&& now_function_
)
773 return TrackedTime::FromMilliseconds((*now_function_
)());
774 if (IsProfilerTimingEnabled() && TrackingStatus())
775 return TrackedTime::Now();
776 return TrackedTime(); // Super fast when disabled, or not compiled.
780 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count
) {
781 base::AutoLock
lock(*list_lock_
.Pointer());
782 if (worker_thread_data_creation_count_
== 0)
783 return; // We haven't really run much, and couldn't have leaked.
785 // TODO(jar): until this is working on XP, don't run the real test.
787 // Verify that we've at least shutdown/cleanup the major namesd threads. The
788 // caller should tell us how many thread shutdowns should have taken place by
790 CHECK_GT(cleanup_count_
, major_threads_shutdown_count
);
795 void ThreadData::ShutdownSingleThreadedCleanup(bool leak
) {
796 // This is only called from test code, where we need to cleanup so that
797 // additional tests can be run.
798 // We must be single threaded... but be careful anyway.
799 InitializeAndSetTrackingStatus(DEACTIVATED
);
801 ThreadData
* thread_data_list
;
803 base::AutoLock
lock(*list_lock_
.Pointer());
804 thread_data_list
= all_thread_data_list_head_
;
805 all_thread_data_list_head_
= NULL
;
806 ++incarnation_counter_
;
807 // To be clean, break apart the retired worker list (though we leak them).
808 while (first_retired_worker_
) {
809 ThreadData
* worker
= first_retired_worker_
;
810 CHECK_GT(worker
->worker_thread_number_
, 0);
811 first_retired_worker_
= worker
->next_retired_worker_
;
812 worker
->next_retired_worker_
= NULL
;
816 // Put most global static back in pristine shape.
817 worker_thread_data_creation_count_
= 0;
819 tls_index_
.Set(NULL
);
820 // Almost UNINITIALIZED.
821 base::subtle::Release_Store(&status_
, DORMANT_DURING_TESTS
);
823 // To avoid any chance of racing in unit tests, which is the only place we
824 // call this function, we may sometimes leak all the data structures we
825 // recovered, as they may still be in use on threads from prior tests!
827 ThreadData
* thread_data
= thread_data_list
;
828 while (thread_data
) {
829 ANNOTATE_LEAKING_OBJECT_PTR(thread_data
);
830 thread_data
= thread_data
->next();
835 // When we want to cleanup (on a single thread), here is what we do.
837 // Do actual recursive delete in all ThreadData instances.
838 while (thread_data_list
) {
839 ThreadData
* next_thread_data
= thread_data_list
;
840 thread_data_list
= thread_data_list
->next();
842 for (BirthMap::iterator it
= next_thread_data
->birth_map_
.begin();
843 next_thread_data
->birth_map_
.end() != it
; ++it
)
844 delete it
->second
; // Delete the Birth Records.
845 delete next_thread_data
; // Includes all Death Records.
849 //------------------------------------------------------------------------------
850 TaskStopwatch::TaskStopwatch()
851 : wallclock_duration_ms_(0),
852 current_thread_data_(NULL
),
853 excluded_duration_ms_(0),
861 TaskStopwatch::~TaskStopwatch() {
863 DCHECK(state_
!= RUNNING
);
864 DCHECK(child_
== NULL
);
868 void TaskStopwatch::Start() {
870 DCHECK(state_
== CREATED
);
874 start_time_
= ThreadData::Now();
876 current_thread_data_
= ThreadData::Get();
877 if (!current_thread_data_
)
880 parent_
= current_thread_data_
->current_stopwatch_
;
883 DCHECK(parent_
->state_
== RUNNING
);
884 DCHECK(parent_
->child_
== NULL
);
885 parent_
->child_
= this;
888 current_thread_data_
->current_stopwatch_
= this;
891 void TaskStopwatch::Stop() {
892 const TrackedTime end_time
= ThreadData::Now();
894 DCHECK(state_
== RUNNING
);
896 DCHECK(child_
== NULL
);
899 if (!start_time_
.is_null() && !end_time
.is_null()) {
900 wallclock_duration_ms_
= (end_time
- start_time_
).InMilliseconds();
903 if (!current_thread_data_
)
906 DCHECK(current_thread_data_
->current_stopwatch_
== this);
907 current_thread_data_
->current_stopwatch_
= parent_
;
912 DCHECK(parent_
->state_
== RUNNING
);
913 DCHECK(parent_
->child_
== this);
914 parent_
->child_
= NULL
;
916 parent_
->excluded_duration_ms_
+= wallclock_duration_ms_
;
920 TrackedTime
TaskStopwatch::StartTime() const {
922 DCHECK(state_
!= CREATED
);
928 int32
TaskStopwatch::RunDurationMs() const {
930 DCHECK(state_
== STOPPED
);
933 return wallclock_duration_ms_
- excluded_duration_ms_
;
936 ThreadData
* TaskStopwatch::GetThreadData() const {
938 DCHECK(state_
!= CREATED
);
941 return current_thread_data_
;
944 //------------------------------------------------------------------------------
945 // DeathDataPhaseSnapshot
947 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
950 int32 run_duration_sum
,
951 int32 run_duration_max
,
952 int32 run_duration_sample
,
953 int32 queue_duration_sum
,
954 int32 queue_duration_max
,
955 int32 queue_duration_sample
,
956 const DeathDataPhaseSnapshot
* prev
)
957 : profiling_phase(profiling_phase
),
964 queue_duration_sample
),
968 //------------------------------------------------------------------------------
971 TaskSnapshot::TaskSnapshot() {
974 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot
& birth
,
975 const DeathDataSnapshot
& death_data
,
976 const std::string
& death_thread_name
)
978 death_data(death_data
),
979 death_thread_name(death_thread_name
) {
982 TaskSnapshot::~TaskSnapshot() {
985 //------------------------------------------------------------------------------
986 // ProcessDataPhaseSnapshot
988 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
991 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
994 //------------------------------------------------------------------------------
995 // ProcessDataPhaseSnapshot
997 ProcessDataSnapshot::ProcessDataSnapshot()
998 #if !defined(OS_NACL)
999 : process_id(base::GetCurrentProcId()) {
1001 : process_id(base::kNullProcessId
) {
1005 ProcessDataSnapshot::~ProcessDataSnapshot() {
1008 } // namespace tracked_objects