Separate Simple Backend creation from initialization.
[chromium-blink-merge.git] / third_party / tcmalloc / chromium / src / thread_cache.cc
blob1ad0f6d66d675011b7ec7db74a6f5b1a0afdf9b7
1 // Copyright (c) 2008, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // ---
31 // Author: Ken Ashcraft <opensource@google.com>
33 #include <config.h>
34 #include "thread_cache.h"
35 #include <errno.h>
36 #include <string.h> // for memcpy
37 #include <algorithm> // for max, min
38 #include "base/commandlineflags.h" // for SpinLockHolder
39 #include "base/spinlock.h" // for SpinLockHolder
40 #include "central_freelist.h" // for CentralFreeListPadded
41 #include "maybe_threads.h"
43 using std::min;
44 using std::max;
46 DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
47 EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
48 kDefaultOverallThreadCacheSize),
49 "Bound on the total amount of bytes allocated to "
50 "thread caches. This bound is not strict, so it is possible "
51 "for the cache to go over this bound in certain circumstances. "
52 "Maximum value of this flag is capped to 1 GB.");
54 namespace tcmalloc {
56 static bool phinited = false;
58 volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
59 size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
60 ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
61 PageHeapAllocator<ThreadCache> threadcache_allocator;
62 ThreadCache* ThreadCache::thread_heaps_ = NULL;
63 int ThreadCache::thread_heap_count_ = 0;
64 ThreadCache* ThreadCache::next_memory_steal_ = NULL;
65 #ifdef HAVE_TLS
66 __thread ThreadCache* ThreadCache::threadlocal_heap_
67 // See comments in thread_cache.h about this. Bug here:
68 // http://code.google.com/p/chromium/issues/detail?id=124489
69 #if defined(HAVE___ATTRIBUTE__) && !defined(PGO_GENERATE)
70 __attribute__ ((tls_model ("initial-exec")))
71 # endif
73 #endif
74 bool ThreadCache::tsd_inited_ = false;
75 pthread_key_t ThreadCache::heap_key_;
77 #if defined(HAVE_TLS)
78 bool kernel_supports_tls = false; // be conservative
79 # if defined(_WIN32) // windows has supported TLS since winnt, I think.
80 void CheckIfKernelSupportsTLS() {
81 kernel_supports_tls = true;
83 # elif !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
84 void CheckIfKernelSupportsTLS() {
85 kernel_supports_tls = false;
87 # else
88 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
89 void CheckIfKernelSupportsTLS() {
90 struct utsname buf;
91 if (uname(&buf) < 0) { // should be impossible
92 Log(kLog, __FILE__, __LINE__,
93 "uname failed assuming no TLS support (errno)", errno);
94 kernel_supports_tls = false;
95 } else if (strcasecmp(buf.sysname, "linux") == 0) {
96 // The linux case: the first kernel to support TLS was 2.6.0
97 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
98 kernel_supports_tls = false;
99 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
100 buf.release[2] >= '0' && buf.release[2] < '6' &&
101 buf.release[3] == '.') // 2.0 - 2.5
102 kernel_supports_tls = false;
103 else
104 kernel_supports_tls = true;
105 } else if (strcasecmp(buf.sysname, "CYGWIN_NT-6.1-WOW64") == 0) {
106 // In my testing, this version of cygwin, at least, would hang
107 // when using TLS.
108 kernel_supports_tls = false;
109 } else { // some other kernel, we'll be optimisitic
110 kernel_supports_tls = true;
112 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
114 # endif // HAVE_DECL_UNAME
115 #endif // HAVE_TLS
117 void ThreadCache::Init(pthread_t tid) {
118 size_ = 0;
119 total_bytes_allocated_ = 0;
121 max_size_ = 0;
122 IncreaseCacheLimitLocked();
123 if (max_size_ == 0) {
124 // There isn't enough memory to go around. Just give the minimum to
125 // this thread.
126 max_size_ = kMinThreadCacheSize;
128 // Take unclaimed_cache_space_ negative.
129 unclaimed_cache_space_ -= kMinThreadCacheSize;
130 ASSERT(unclaimed_cache_space_ < 0);
133 next_ = NULL;
134 prev_ = NULL;
135 tid_ = tid;
136 in_setspecific_ = false;
137 for (size_t cl = 0; cl < kNumClasses; ++cl) {
138 list_[cl].Init();
141 uint32_t sampler_seed;
142 memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
143 sampler_.Init(sampler_seed);
146 void ThreadCache::Cleanup() {
147 // Put unused memory back into central cache
148 for (int cl = 0; cl < kNumClasses; ++cl) {
149 if (list_[cl].length() > 0) {
150 ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
155 // Remove some objects of class "cl" from central cache and add to thread heap.
156 // On success, return the first object for immediate use; otherwise return NULL.
157 void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
158 FreeList* list = &list_[cl];
159 ASSERT(list->empty());
160 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
162 const int num_to_move = min<int>(list->max_length(), batch_size);
163 void *start, *end;
164 int fetch_count = Static::central_cache()[cl].RemoveRange(
165 &start, &end, num_to_move);
167 ASSERT((start == NULL) == (fetch_count == 0));
168 if (--fetch_count >= 0) {
169 size_ += byte_size * fetch_count;
170 // Pop the top of the list and add the rest to the freelist.
171 void *second = start;
172 start = FL_Pop(&second);
173 list->PushRange(fetch_count, second, end);
176 // Increase max length slowly up to batch_size. After that,
177 // increase by batch_size in one shot so that the length is a
178 // multiple of batch_size.
179 if (list->max_length() < batch_size) {
180 list->set_max_length(list->max_length() + 1);
181 } else {
182 // Don't let the list get too long. In 32 bit builds, the length
183 // is represented by a 16 bit int, so we need to watch out for
184 // integer overflow.
185 int new_length = min<int>(list->max_length() + batch_size,
186 kMaxDynamicFreeListLength);
187 // The list's max_length must always be a multiple of batch_size,
188 // and kMaxDynamicFreeListLength is not necessarily a multiple
189 // of batch_size.
190 new_length -= new_length % batch_size;
191 ASSERT(new_length % batch_size == 0);
192 list->set_max_length(new_length);
194 return start;
197 void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
198 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
199 ReleaseToCentralCache(list, cl, batch_size);
201 // If the list is too long, we need to transfer some number of
202 // objects to the central cache. Ideally, we would transfer
203 // num_objects_to_move, so the code below tries to make max_length
204 // converge on num_objects_to_move.
206 if (list->max_length() < batch_size) {
207 // Slow start the max_length so we don't overreserve.
208 list->set_max_length(list->max_length() + 1);
209 } else if (list->max_length() > batch_size) {
210 // If we consistently go over max_length, shrink max_length. If we don't
211 // shrink it, some amount of memory will always stay in this freelist.
212 list->set_length_overages(list->length_overages() + 1);
213 if (list->length_overages() > kMaxOverages) {
214 ASSERT(list->max_length() > batch_size);
215 list->set_max_length(list->max_length() - batch_size);
216 list->set_length_overages(0);
221 // Remove some objects of class "cl" from thread heap and add to central cache
222 void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
223 ASSERT(src == &list_[cl]);
224 if (N > src->length()) N = src->length();
225 size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
227 // We return prepackaged chains of the correct size to the central cache.
228 // TODO: Use the same format internally in the thread caches?
229 int batch_size = Static::sizemap()->num_objects_to_move(cl);
230 while (N > batch_size) {
231 void *tail, *head;
232 src->PopRange(batch_size, &head, &tail);
233 Static::central_cache()[cl].InsertRange(head, tail, batch_size);
234 N -= batch_size;
236 void *tail, *head;
237 src->PopRange(N, &head, &tail);
238 Static::central_cache()[cl].InsertRange(head, tail, N);
239 size_ -= delta_bytes;
242 // Release idle memory to the central cache
243 void ThreadCache::Scavenge() {
244 // If the low-water mark for the free list is L, it means we would
245 // not have had to allocate anything from the central cache even if
246 // we had reduced the free list size by L. We aim to get closer to
247 // that situation by dropping L/2 nodes from the free list. This
248 // may not release much memory, but if so we will call scavenge again
249 // pretty soon and the low-water marks will be high on that call.
250 //int64 start = CycleClock::Now();
251 for (int cl = 0; cl < kNumClasses; cl++) {
252 FreeList* list = &list_[cl];
253 const int lowmark = list->lowwatermark();
254 if (lowmark > 0) {
255 const int drop = (lowmark > 1) ? lowmark/2 : 1;
256 ReleaseToCentralCache(list, cl, drop);
258 // Shrink the max length if it isn't used. Only shrink down to
259 // batch_size -- if the thread was active enough to get the max_length
260 // above batch_size, it will likely be that active again. If
261 // max_length shinks below batch_size, the thread will have to
262 // go through the slow-start behavior again. The slow-start is useful
263 // mainly for threads that stay relatively idle for their entire
264 // lifetime.
265 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
266 if (list->max_length() > batch_size) {
267 list->set_max_length(
268 max<int>(list->max_length() - batch_size, batch_size));
271 list->clear_lowwatermark();
274 IncreaseCacheLimit();
277 void ThreadCache::IncreaseCacheLimit() {
278 SpinLockHolder h(Static::pageheap_lock());
279 IncreaseCacheLimitLocked();
282 void ThreadCache::IncreaseCacheLimitLocked() {
283 if (unclaimed_cache_space_ > 0) {
284 // Possibly make unclaimed_cache_space_ negative.
285 unclaimed_cache_space_ -= kStealAmount;
286 max_size_ += kStealAmount;
287 return;
289 // Don't hold pageheap_lock too long. Try to steal from 10 other
290 // threads before giving up. The i < 10 condition also prevents an
291 // infinite loop in case none of the existing thread heaps are
292 // suitable places to steal from.
293 for (int i = 0; i < 10;
294 ++i, next_memory_steal_ = next_memory_steal_->next_) {
295 // Reached the end of the linked list. Start at the beginning.
296 if (next_memory_steal_ == NULL) {
297 ASSERT(thread_heaps_ != NULL);
298 next_memory_steal_ = thread_heaps_;
300 if (next_memory_steal_ == this ||
301 next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
302 continue;
304 next_memory_steal_->max_size_ -= kStealAmount;
305 max_size_ += kStealAmount;
307 next_memory_steal_ = next_memory_steal_->next_;
308 return;
312 int ThreadCache::GetSamplePeriod() {
313 return sampler_.GetSamplePeriod();
316 // static
317 unsigned int ThreadCache::GetBytesAllocatedOnCurrentThread() {
318 return ThreadCache::GetThreadHeap()->GetTotalBytesAllocated();
321 void ThreadCache::InitModule() {
322 SpinLockHolder h(Static::pageheap_lock());
323 if (!phinited) {
324 Static::InitStaticVars();
325 threadcache_allocator.Init();
326 phinited = 1;
330 void ThreadCache::InitTSD() {
331 ASSERT(!tsd_inited_);
332 perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
333 tsd_inited_ = true;
335 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
336 // We may have used a fake pthread_t for the main thread. Fix it.
337 pthread_t zero;
338 memset(&zero, 0, sizeof(zero));
339 SpinLockHolder h(Static::pageheap_lock());
340 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
341 if (h->tid_ == zero) {
342 h->tid_ = pthread_self();
345 #endif
348 ThreadCache* ThreadCache::CreateCacheIfNecessary() {
349 // Initialize per-thread data if necessary
350 ThreadCache* heap = NULL;
352 SpinLockHolder h(Static::pageheap_lock());
353 // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
354 // calling pthread routines (even pthread_self) too early could
355 // cause a segfault. Since we can call pthreads quite early, we
356 // have to protect against that in such situations by making a
357 // 'fake' pthread. This is not ideal since it doesn't work well
358 // when linking tcmalloc statically with apps that create threads
359 // before main, so we only do it if we have to.
360 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
361 pthread_t me;
362 if (!tsd_inited_) {
363 memset(&me, 0, sizeof(me));
364 } else {
365 me = pthread_self();
367 #else
368 const pthread_t me = pthread_self();
369 #endif
371 // This may be a recursive malloc call from pthread_setspecific()
372 // In that case, the heap for this thread has already been created
373 // and added to the linked list. So we search for that first.
374 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
375 if (h->tid_ == me) {
376 heap = h;
377 break;
381 if (heap == NULL) heap = NewHeap(me);
384 // We call pthread_setspecific() outside the lock because it may
385 // call malloc() recursively. We check for the recursive call using
386 // the "in_setspecific_" flag so that we can avoid calling
387 // pthread_setspecific() if we are already inside pthread_setspecific().
388 if (!heap->in_setspecific_ && tsd_inited_) {
389 heap->in_setspecific_ = true;
390 perftools_pthread_setspecific(heap_key_, heap);
391 #ifdef HAVE_TLS
392 // Also keep a copy in __thread for faster retrieval
393 threadlocal_heap_ = heap;
394 #endif
395 heap->in_setspecific_ = false;
397 return heap;
400 ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
401 // Create the heap and add it to the linked list
402 ThreadCache *heap = threadcache_allocator.New();
403 heap->Init(tid);
404 heap->next_ = thread_heaps_;
405 heap->prev_ = NULL;
406 if (thread_heaps_ != NULL) {
407 thread_heaps_->prev_ = heap;
408 } else {
409 // This is the only thread heap at the momment.
410 ASSERT(next_memory_steal_ == NULL);
411 next_memory_steal_ = heap;
413 thread_heaps_ = heap;
414 thread_heap_count_++;
415 return heap;
418 void ThreadCache::BecomeIdle() {
419 if (!tsd_inited_) return; // No caches yet
420 ThreadCache* heap = GetThreadHeap();
421 if (heap == NULL) return; // No thread cache to remove
422 if (heap->in_setspecific_) return; // Do not disturb the active caller
424 heap->in_setspecific_ = true;
425 perftools_pthread_setspecific(heap_key_, NULL);
426 #ifdef HAVE_TLS
427 // Also update the copy in __thread
428 threadlocal_heap_ = NULL;
429 #endif
430 heap->in_setspecific_ = false;
431 if (GetThreadHeap() == heap) {
432 // Somehow heap got reinstated by a recursive call to malloc
433 // from pthread_setspecific. We give up in this case.
434 return;
437 // We can now get rid of the heap
438 DeleteCache(heap);
441 void ThreadCache::DestroyThreadCache(void* ptr) {
442 // Note that "ptr" cannot be NULL since pthread promises not
443 // to invoke the destructor on NULL values, but for safety,
444 // we check anyway.
445 if (ptr == NULL) return;
446 #ifdef HAVE_TLS
447 // Prevent fast path of GetThreadHeap() from returning heap.
448 threadlocal_heap_ = NULL;
449 #endif
450 DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
453 void ThreadCache::DeleteCache(ThreadCache* heap) {
454 // Remove all memory from heap
455 heap->Cleanup();
457 // Remove from linked list
458 SpinLockHolder h(Static::pageheap_lock());
459 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
460 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
461 if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
462 thread_heap_count_--;
464 if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
465 if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
466 unclaimed_cache_space_ += heap->max_size_;
468 threadcache_allocator.Delete(heap);
471 void ThreadCache::RecomputePerThreadCacheSize() {
472 // Divide available space across threads
473 int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
474 size_t space = overall_thread_cache_size_ / n;
476 // Limit to allowed range
477 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
478 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
480 double ratio = space / max<double>(1, per_thread_cache_size_);
481 size_t claimed = 0;
482 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
483 // Increasing the total cache size should not circumvent the
484 // slow-start growth of max_size_.
485 if (ratio < 1.0) {
486 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
488 claimed += h->max_size_;
490 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
491 per_thread_cache_size_ = space;
494 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
495 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
496 *total_bytes += h->Size();
497 if (class_count) {
498 for (int cl = 0; cl < kNumClasses; ++cl) {
499 class_count[cl] += h->freelist_length(cl);
505 void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
506 // Clip the value to a reasonable range
507 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
508 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
509 overall_thread_cache_size_ = new_size;
511 RecomputePerThreadCacheSize();
514 } // namespace tcmalloc