Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / tcmalloc / vendor / src / thread_cache.cc
blobd6dead39c2f58e44d6f0bddac8ed373a102d4f6e
1 // Copyright (c) 2008, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // ---
31 // Author: Ken Ashcraft <opensource@google.com>
33 #include <config.h>
34 #include "thread_cache.h"
35 #include <errno.h>
36 #include <string.h> // for memcpy
37 #include <algorithm> // for max, min
38 #include "base/commandlineflags.h" // for SpinLockHolder
39 #include "base/spinlock.h" // for SpinLockHolder
40 #include "central_freelist.h" // for CentralFreeListPadded
41 #include "maybe_threads.h"
43 using std::min;
44 using std::max;
46 DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
47 EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
48 kDefaultOverallThreadCacheSize),
49 "Bound on the total amount of bytes allocated to "
50 "thread caches. This bound is not strict, so it is possible "
51 "for the cache to go over this bound in certain circumstances. "
52 "Maximum value of this flag is capped to 1 GB.");
54 namespace tcmalloc {
56 static bool phinited = false;
58 volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
59 size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
60 ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
61 PageHeapAllocator<ThreadCache> threadcache_allocator;
62 ThreadCache* ThreadCache::thread_heaps_ = NULL;
63 int ThreadCache::thread_heap_count_ = 0;
64 ThreadCache* ThreadCache::next_memory_steal_ = NULL;
65 #ifdef HAVE_TLS
66 __thread ThreadCache* ThreadCache::threadlocal_heap_
67 # ifdef HAVE___ATTRIBUTE__
68 __attribute__ ((tls_model ("initial-exec")))
69 # endif
71 #endif
72 bool ThreadCache::tsd_inited_ = false;
73 pthread_key_t ThreadCache::heap_key_;
75 #if defined(HAVE_TLS)
76 bool kernel_supports_tls = false; // be conservative
77 # if defined(_WIN32) // windows has supported TLS since winnt, I think.
78 void CheckIfKernelSupportsTLS() {
79 kernel_supports_tls = true;
81 # elif !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
82 void CheckIfKernelSupportsTLS() {
83 kernel_supports_tls = false;
85 # else
86 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
87 void CheckIfKernelSupportsTLS() {
88 struct utsname buf;
89 if (uname(&buf) < 0) { // should be impossible
90 Log(kLog, __FILE__, __LINE__,
91 "uname failed assuming no TLS support (errno)", errno);
92 kernel_supports_tls = false;
93 } else if (strcasecmp(buf.sysname, "linux") == 0) {
94 // The linux case: the first kernel to support TLS was 2.6.0
95 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
96 kernel_supports_tls = false;
97 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
98 buf.release[2] >= '0' && buf.release[2] < '6' &&
99 buf.release[3] == '.') // 2.0 - 2.5
100 kernel_supports_tls = false;
101 else
102 kernel_supports_tls = true;
103 } else if (strcasecmp(buf.sysname, "CYGWIN_NT-6.1-WOW64") == 0) {
104 // In my testing, this version of cygwin, at least, would hang
105 // when using TLS.
106 kernel_supports_tls = false;
107 } else { // some other kernel, we'll be optimisitic
108 kernel_supports_tls = true;
110 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
112 # endif // HAVE_DECL_UNAME
113 #endif // HAVE_TLS
115 void ThreadCache::Init(pthread_t tid) {
116 size_ = 0;
118 max_size_ = 0;
119 IncreaseCacheLimitLocked();
120 if (max_size_ == 0) {
121 // There isn't enough memory to go around. Just give the minimum to
122 // this thread.
123 max_size_ = kMinThreadCacheSize;
125 // Take unclaimed_cache_space_ negative.
126 unclaimed_cache_space_ -= kMinThreadCacheSize;
127 ASSERT(unclaimed_cache_space_ < 0);
130 next_ = NULL;
131 prev_ = NULL;
132 tid_ = tid;
133 in_setspecific_ = false;
134 for (size_t cl = 0; cl < kNumClasses; ++cl) {
135 list_[cl].Init();
138 uint32_t sampler_seed;
139 memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
140 sampler_.Init(sampler_seed);
143 void ThreadCache::Cleanup() {
144 // Put unused memory back into central cache
145 for (int cl = 0; cl < kNumClasses; ++cl) {
146 if (list_[cl].length() > 0) {
147 ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
152 // Remove some objects of class "cl" from central cache and add to thread heap.
153 // On success, return the first object for immediate use; otherwise return NULL.
154 void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
155 FreeList* list = &list_[cl];
156 ASSERT(list->empty());
157 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
159 const int num_to_move = min<int>(list->max_length(), batch_size);
160 void *start, *end;
161 int fetch_count = Static::central_cache()[cl].RemoveRange(
162 &start, &end, num_to_move);
164 ASSERT((start == NULL) == (fetch_count == 0));
165 if (--fetch_count >= 0) {
166 size_ += byte_size * fetch_count;
167 list->PushRange(fetch_count, SLL_Next(start), end);
170 // Increase max length slowly up to batch_size. After that,
171 // increase by batch_size in one shot so that the length is a
172 // multiple of batch_size.
173 if (list->max_length() < batch_size) {
174 list->set_max_length(list->max_length() + 1);
175 } else {
176 // Don't let the list get too long. In 32 bit builds, the length
177 // is represented by a 16 bit int, so we need to watch out for
178 // integer overflow.
179 int new_length = min<int>(list->max_length() + batch_size,
180 kMaxDynamicFreeListLength);
181 // The list's max_length must always be a multiple of batch_size,
182 // and kMaxDynamicFreeListLength is not necessarily a multiple
183 // of batch_size.
184 new_length -= new_length % batch_size;
185 ASSERT(new_length % batch_size == 0);
186 list->set_max_length(new_length);
188 return start;
191 void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
192 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
193 ReleaseToCentralCache(list, cl, batch_size);
195 // If the list is too long, we need to transfer some number of
196 // objects to the central cache. Ideally, we would transfer
197 // num_objects_to_move, so the code below tries to make max_length
198 // converge on num_objects_to_move.
200 if (list->max_length() < batch_size) {
201 // Slow start the max_length so we don't overreserve.
202 list->set_max_length(list->max_length() + 1);
203 } else if (list->max_length() > batch_size) {
204 // If we consistently go over max_length, shrink max_length. If we don't
205 // shrink it, some amount of memory will always stay in this freelist.
206 list->set_length_overages(list->length_overages() + 1);
207 if (list->length_overages() > kMaxOverages) {
208 ASSERT(list->max_length() > batch_size);
209 list->set_max_length(list->max_length() - batch_size);
210 list->set_length_overages(0);
215 // Remove some objects of class "cl" from thread heap and add to central cache
216 void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
217 ASSERT(src == &list_[cl]);
218 if (N > src->length()) N = src->length();
219 size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
221 // We return prepackaged chains of the correct size to the central cache.
222 // TODO: Use the same format internally in the thread caches?
223 int batch_size = Static::sizemap()->num_objects_to_move(cl);
224 while (N > batch_size) {
225 void *tail, *head;
226 src->PopRange(batch_size, &head, &tail);
227 Static::central_cache()[cl].InsertRange(head, tail, batch_size);
228 N -= batch_size;
230 void *tail, *head;
231 src->PopRange(N, &head, &tail);
232 Static::central_cache()[cl].InsertRange(head, tail, N);
233 size_ -= delta_bytes;
236 // Release idle memory to the central cache
237 void ThreadCache::Scavenge() {
238 // If the low-water mark for the free list is L, it means we would
239 // not have had to allocate anything from the central cache even if
240 // we had reduced the free list size by L. We aim to get closer to
241 // that situation by dropping L/2 nodes from the free list. This
242 // may not release much memory, but if so we will call scavenge again
243 // pretty soon and the low-water marks will be high on that call.
244 //int64 start = CycleClock::Now();
245 for (int cl = 0; cl < kNumClasses; cl++) {
246 FreeList* list = &list_[cl];
247 const int lowmark = list->lowwatermark();
248 if (lowmark > 0) {
249 const int drop = (lowmark > 1) ? lowmark/2 : 1;
250 ReleaseToCentralCache(list, cl, drop);
252 // Shrink the max length if it isn't used. Only shrink down to
253 // batch_size -- if the thread was active enough to get the max_length
254 // above batch_size, it will likely be that active again. If
255 // max_length shinks below batch_size, the thread will have to
256 // go through the slow-start behavior again. The slow-start is useful
257 // mainly for threads that stay relatively idle for their entire
258 // lifetime.
259 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
260 if (list->max_length() > batch_size) {
261 list->set_max_length(
262 max<int>(list->max_length() - batch_size, batch_size));
265 list->clear_lowwatermark();
268 IncreaseCacheLimit();
271 void ThreadCache::IncreaseCacheLimit() {
272 SpinLockHolder h(Static::pageheap_lock());
273 IncreaseCacheLimitLocked();
276 void ThreadCache::IncreaseCacheLimitLocked() {
277 if (unclaimed_cache_space_ > 0) {
278 // Possibly make unclaimed_cache_space_ negative.
279 unclaimed_cache_space_ -= kStealAmount;
280 max_size_ += kStealAmount;
281 return;
283 // Don't hold pageheap_lock too long. Try to steal from 10 other
284 // threads before giving up. The i < 10 condition also prevents an
285 // infinite loop in case none of the existing thread heaps are
286 // suitable places to steal from.
287 for (int i = 0; i < 10;
288 ++i, next_memory_steal_ = next_memory_steal_->next_) {
289 // Reached the end of the linked list. Start at the beginning.
290 if (next_memory_steal_ == NULL) {
291 ASSERT(thread_heaps_ != NULL);
292 next_memory_steal_ = thread_heaps_;
294 if (next_memory_steal_ == this ||
295 next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
296 continue;
298 next_memory_steal_->max_size_ -= kStealAmount;
299 max_size_ += kStealAmount;
301 next_memory_steal_ = next_memory_steal_->next_;
302 return;
306 int ThreadCache::GetSamplePeriod() {
307 return sampler_.GetSamplePeriod();
310 void ThreadCache::InitModule() {
311 SpinLockHolder h(Static::pageheap_lock());
312 if (!phinited) {
313 Static::InitStaticVars();
314 threadcache_allocator.Init();
315 phinited = 1;
319 void ThreadCache::InitTSD() {
320 ASSERT(!tsd_inited_);
321 perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
322 tsd_inited_ = true;
324 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
325 // We may have used a fake pthread_t for the main thread. Fix it.
326 pthread_t zero;
327 memset(&zero, 0, sizeof(zero));
328 SpinLockHolder h(Static::pageheap_lock());
329 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
330 if (h->tid_ == zero) {
331 h->tid_ = pthread_self();
334 #endif
337 ThreadCache* ThreadCache::CreateCacheIfNecessary() {
338 // Initialize per-thread data if necessary
339 ThreadCache* heap = NULL;
341 SpinLockHolder h(Static::pageheap_lock());
342 // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
343 // calling pthread routines (even pthread_self) too early could
344 // cause a segfault. Since we can call pthreads quite early, we
345 // have to protect against that in such situations by making a
346 // 'fake' pthread. This is not ideal since it doesn't work well
347 // when linking tcmalloc statically with apps that create threads
348 // before main, so we only do it if we have to.
349 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
350 pthread_t me;
351 if (!tsd_inited_) {
352 memset(&me, 0, sizeof(me));
353 } else {
354 me = pthread_self();
356 #else
357 const pthread_t me = pthread_self();
358 #endif
360 // This may be a recursive malloc call from pthread_setspecific()
361 // In that case, the heap for this thread has already been created
362 // and added to the linked list. So we search for that first.
363 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
364 if (h->tid_ == me) {
365 heap = h;
366 break;
370 if (heap == NULL) heap = NewHeap(me);
373 // We call pthread_setspecific() outside the lock because it may
374 // call malloc() recursively. We check for the recursive call using
375 // the "in_setspecific_" flag so that we can avoid calling
376 // pthread_setspecific() if we are already inside pthread_setspecific().
377 if (!heap->in_setspecific_ && tsd_inited_) {
378 heap->in_setspecific_ = true;
379 perftools_pthread_setspecific(heap_key_, heap);
380 #ifdef HAVE_TLS
381 // Also keep a copy in __thread for faster retrieval
382 threadlocal_heap_ = heap;
383 #endif
384 heap->in_setspecific_ = false;
386 return heap;
389 ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
390 // Create the heap and add it to the linked list
391 ThreadCache *heap = threadcache_allocator.New();
392 heap->Init(tid);
393 heap->next_ = thread_heaps_;
394 heap->prev_ = NULL;
395 if (thread_heaps_ != NULL) {
396 thread_heaps_->prev_ = heap;
397 } else {
398 // This is the only thread heap at the momment.
399 ASSERT(next_memory_steal_ == NULL);
400 next_memory_steal_ = heap;
402 thread_heaps_ = heap;
403 thread_heap_count_++;
404 return heap;
407 void ThreadCache::BecomeIdle() {
408 if (!tsd_inited_) return; // No caches yet
409 ThreadCache* heap = GetThreadHeap();
410 if (heap == NULL) return; // No thread cache to remove
411 if (heap->in_setspecific_) return; // Do not disturb the active caller
413 heap->in_setspecific_ = true;
414 perftools_pthread_setspecific(heap_key_, NULL);
415 #ifdef HAVE_TLS
416 // Also update the copy in __thread
417 threadlocal_heap_ = NULL;
418 #endif
419 heap->in_setspecific_ = false;
420 if (GetThreadHeap() == heap) {
421 // Somehow heap got reinstated by a recursive call to malloc
422 // from pthread_setspecific. We give up in this case.
423 return;
426 // We can now get rid of the heap
427 DeleteCache(heap);
430 void ThreadCache::DestroyThreadCache(void* ptr) {
431 // Note that "ptr" cannot be NULL since pthread promises not
432 // to invoke the destructor on NULL values, but for safety,
433 // we check anyway.
434 if (ptr == NULL) return;
435 #ifdef HAVE_TLS
436 // Prevent fast path of GetThreadHeap() from returning heap.
437 threadlocal_heap_ = NULL;
438 #endif
439 DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
442 void ThreadCache::DeleteCache(ThreadCache* heap) {
443 // Remove all memory from heap
444 heap->Cleanup();
446 // Remove from linked list
447 SpinLockHolder h(Static::pageheap_lock());
448 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
449 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
450 if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
451 thread_heap_count_--;
453 if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
454 if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
455 unclaimed_cache_space_ += heap->max_size_;
457 threadcache_allocator.Delete(heap);
460 void ThreadCache::RecomputePerThreadCacheSize() {
461 // Divide available space across threads
462 int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
463 size_t space = overall_thread_cache_size_ / n;
465 // Limit to allowed range
466 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
467 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
469 double ratio = space / max<double>(1, per_thread_cache_size_);
470 size_t claimed = 0;
471 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
472 // Increasing the total cache size should not circumvent the
473 // slow-start growth of max_size_.
474 if (ratio < 1.0) {
475 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
477 claimed += h->max_size_;
479 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
480 per_thread_cache_size_ = space;
483 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
484 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
485 *total_bytes += h->Size();
486 if (class_count) {
487 for (int cl = 0; cl < kNumClasses; ++cl) {
488 class_count[cl] += h->freelist_length(cl);
494 void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
495 // Clip the value to a reasonable range
496 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
497 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
498 overall_thread_cache_size_ = new_size;
500 RecomputePerThreadCacheSize();
503 } // namespace tcmalloc