Roll src/third_party/WebKit 06cb9e9:a978ee5 (svn 202558:202559)
[chromium-blink-merge.git] / third_party / tcmalloc / vendor / src / thread_cache.h
blob1d0413b2a8a1dc712defa76ae103eea9ff011209
1 // Copyright (c) 2008, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // ---
31 // Author: Sanjay Ghemawat <opensource@google.com>
33 #ifndef TCMALLOC_THREAD_CACHE_H_
34 #define TCMALLOC_THREAD_CACHE_H_
36 #include <config.h>
37 #ifdef HAVE_PTHREAD
38 #include <pthread.h> // for pthread_t, pthread_key_t
39 #endif
40 #include <stddef.h> // for size_t, NULL
41 #ifdef HAVE_STDINT_H
42 #include <stdint.h> // for uint32_t, uint64_t
43 #endif
44 #include <sys/types.h> // for ssize_t
45 #include "common.h"
46 #include "linked_list.h"
47 #include "maybe_threads.h"
48 #include "page_heap_allocator.h"
49 #include "sampler.h"
50 #include "static_vars.h"
52 #include "common.h" // for SizeMap, kMaxSize, etc
53 #include "internal_logging.h" // for ASSERT, etc
54 #include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc
55 #include "page_heap_allocator.h" // for PageHeapAllocator
56 #include "sampler.h" // for Sampler
57 #include "static_vars.h" // for Static
59 namespace tcmalloc {
61 // Even if we have support for thread-local storage in the compiler
62 // and linker, the OS may not support it. We need to check that at
63 // runtime. Right now, we have to keep a manual set of "bad" OSes.
64 #if defined(HAVE_TLS)
65 extern bool kernel_supports_tls; // defined in thread_cache.cc
66 void CheckIfKernelSupportsTLS();
67 inline bool KernelSupportsTLS() {
68 return kernel_supports_tls;
70 #endif // HAVE_TLS
72 //-------------------------------------------------------------------
73 // Data kept per thread
74 //-------------------------------------------------------------------
76 class ThreadCache {
77 public:
78 // All ThreadCache objects are kept in a linked list (for stats collection)
79 ThreadCache* next_;
80 ThreadCache* prev_;
82 void Init(pthread_t tid);
83 void Cleanup();
85 // Accessors (mostly just for printing stats)
86 int freelist_length(size_t cl) const { return list_[cl].length(); }
88 // Total byte size in cache
89 size_t Size() const { return size_; }
91 // Allocate an object of the given size and class. The size given
92 // must be the same as the size of the class in the size map.
93 void* Allocate(size_t size, size_t cl);
94 void Deallocate(void* ptr, size_t size_class);
96 void Scavenge();
98 int GetSamplePeriod();
100 // Record allocation of "k" bytes. Return true iff allocation
101 // should be sampled
102 bool SampleAllocation(size_t k);
104 static void InitModule();
105 static void InitTSD();
106 static ThreadCache* GetThreadHeap();
107 static ThreadCache* GetCache();
108 static ThreadCache* GetCacheIfPresent();
109 static ThreadCache* CreateCacheIfNecessary();
110 static void BecomeIdle();
112 // Return the number of thread heaps in use.
113 static inline int HeapsInUse();
115 // Writes to total_bytes the total number of bytes used by all thread heaps.
116 // class_count must be an array of size kNumClasses. Writes the number of
117 // items on the corresponding freelist. class_count may be NULL.
118 // The storage of both parameters must be zero intialized.
119 // REQUIRES: Static::pageheap_lock is held.
120 static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
122 // Sets the total thread cache size to new_size, recomputing the
123 // individual thread cache sizes as necessary.
124 // REQUIRES: Static::pageheap lock is held.
125 static void set_overall_thread_cache_size(size_t new_size);
126 static size_t overall_thread_cache_size() {
127 return overall_thread_cache_size_;
130 private:
131 class FreeList {
132 private:
133 void* list_; // Linked list of nodes
135 #ifdef _LP64
136 // On 64-bit hardware, manipulating 16-bit values may be slightly slow.
137 uint32_t length_; // Current length.
138 uint32_t lowater_; // Low water mark for list length.
139 uint32_t max_length_; // Dynamic max list length based on usage.
140 // Tracks the number of times a deallocation has caused
141 // length_ > max_length_. After the kMaxOverages'th time, max_length_
142 // shrinks and length_overages_ is reset to zero.
143 uint32_t length_overages_;
144 #else
145 // If we aren't using 64-bit pointers then pack these into less space.
146 uint16_t length_;
147 uint16_t lowater_;
148 uint16_t max_length_;
149 uint16_t length_overages_;
150 #endif
152 public:
153 void Init() {
154 list_ = NULL;
155 length_ = 0;
156 lowater_ = 0;
157 max_length_ = 1;
158 length_overages_ = 0;
161 // Return current length of list
162 size_t length() const {
163 return length_;
166 // Return the maximum length of the list.
167 size_t max_length() const {
168 return max_length_;
171 // Set the maximum length of the list. If 'new_max' > length(), the
172 // client is responsible for removing objects from the list.
173 void set_max_length(size_t new_max) {
174 max_length_ = new_max;
177 // Return the number of times that length() has gone over max_length().
178 size_t length_overages() const {
179 return length_overages_;
182 void set_length_overages(size_t new_count) {
183 length_overages_ = new_count;
186 // Is list empty?
187 bool empty() const {
188 return list_ == NULL;
191 // Low-water mark management
192 int lowwatermark() const { return lowater_; }
193 void clear_lowwatermark() { lowater_ = length_; }
195 void Push(void* ptr) {
196 SLL_Push(&list_, ptr);
197 length_++;
200 void* Pop() {
201 ASSERT(list_ != NULL);
202 length_--;
203 if (length_ < lowater_) lowater_ = length_;
204 return SLL_Pop(&list_);
207 void* Next() {
208 return SLL_Next(&list_);
211 void PushRange(int N, void *start, void *end) {
212 SLL_PushRange(&list_, start, end);
213 length_ += N;
216 void PopRange(int N, void **start, void **end) {
217 SLL_PopRange(&list_, N, start, end);
218 ASSERT(length_ >= N);
219 length_ -= N;
220 if (length_ < lowater_) lowater_ = length_;
224 // Gets and returns an object from the central cache, and, if possible,
225 // also adds some objects of that size class to this thread cache.
226 void* FetchFromCentralCache(size_t cl, size_t byte_size);
228 // Releases some number of items from src. Adjusts the list's max_length
229 // to eventually converge on num_objects_to_move(cl).
230 void ListTooLong(FreeList* src, size_t cl);
232 // Releases N items from this thread cache.
233 void ReleaseToCentralCache(FreeList* src, size_t cl, int N);
235 // Increase max_size_ by reducing unclaimed_cache_space_ or by
236 // reducing the max_size_ of some other thread. In both cases,
237 // the delta is kStealAmount.
238 void IncreaseCacheLimit();
239 // Same as above but requires Static::pageheap_lock() is held.
240 void IncreaseCacheLimitLocked();
242 // If TLS is available, we also store a copy of the per-thread object
243 // in a __thread variable since __thread variables are faster to read
244 // than pthread_getspecific(). We still need pthread_setspecific()
245 // because __thread variables provide no way to run cleanup code when
246 // a thread is destroyed.
247 // We also give a hint to the compiler to use the "initial exec" TLS
248 // model. This is faster than the default TLS model, at the cost that
249 // you cannot dlopen this library. (To see the difference, look at
250 // the CPU use of __tls_get_addr with and without this attribute.)
251 // Since we don't really use dlopen in google code -- and using dlopen
252 // on a malloc replacement is asking for trouble in any case -- that's
253 // a good tradeoff for us.
254 #ifdef HAVE_TLS
255 static __thread ThreadCache* threadlocal_heap_
256 # ifdef HAVE___ATTRIBUTE__
257 __attribute__ ((tls_model ("initial-exec")))
258 # endif
260 #endif
262 // Thread-specific key. Initialization here is somewhat tricky
263 // because some Linux startup code invokes malloc() before it
264 // is in a good enough state to handle pthread_keycreate().
265 // Therefore, we use TSD keys only after tsd_inited is set to true.
266 // Until then, we use a slow path to get the heap object.
267 static bool tsd_inited_;
268 static pthread_key_t heap_key_;
270 // Linked list of heap objects. Protected by Static::pageheap_lock.
271 static ThreadCache* thread_heaps_;
272 static int thread_heap_count_;
274 // A pointer to one of the objects in thread_heaps_. Represents
275 // the next ThreadCache from which a thread over its max_size_ should
276 // steal memory limit. Round-robin through all of the objects in
277 // thread_heaps_. Protected by Static::pageheap_lock.
278 static ThreadCache* next_memory_steal_;
280 // Overall thread cache size. Protected by Static::pageheap_lock.
281 static size_t overall_thread_cache_size_;
283 // Global per-thread cache size. Writes are protected by
284 // Static::pageheap_lock. Reads are done without any locking, which should be
285 // fine as long as size_t can be written atomically and we don't place
286 // invariants between this variable and other pieces of state.
287 static volatile size_t per_thread_cache_size_;
289 // Represents overall_thread_cache_size_ minus the sum of max_size_
290 // across all ThreadCaches. Protected by Static::pageheap_lock.
291 static ssize_t unclaimed_cache_space_;
293 // This class is laid out with the most frequently used fields
294 // first so that hot elements are placed on the same cache line.
296 size_t size_; // Combined size of data
297 size_t max_size_; // size_ > max_size_ --> Scavenge()
299 // We sample allocations, biased by the size of the allocation
300 Sampler sampler_; // A sampler
302 FreeList list_[kNumClasses]; // Array indexed by size-class
304 pthread_t tid_; // Which thread owns it
305 bool in_setspecific_; // In call to pthread_setspecific?
307 // Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
308 static ThreadCache* NewHeap(pthread_t tid);
310 // Use only as pthread thread-specific destructor function.
311 static void DestroyThreadCache(void* ptr);
313 static void DeleteCache(ThreadCache* heap);
314 static void RecomputePerThreadCacheSize();
316 // Ensure that this class is cacheline-aligned. This is critical for
317 // performance, as false sharing would negate many of the benefits
318 // of a per-thread cache.
319 } CACHELINE_ALIGNED;
321 // Allocator for thread heaps
322 // This is logically part of the ThreadCache class, but MSVC, at
323 // least, does not like using ThreadCache as a template argument
324 // before the class is fully defined. So we put it outside the class.
325 extern PageHeapAllocator<ThreadCache> threadcache_allocator;
327 inline int ThreadCache::HeapsInUse() {
328 return threadcache_allocator.inuse();
331 inline bool ThreadCache::SampleAllocation(size_t k) {
332 return sampler_.SampleAllocation(k);
335 inline void* ThreadCache::Allocate(size_t size, size_t cl) {
336 ASSERT(size <= kMaxSize);
337 ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));
339 FreeList* list = &list_[cl];
340 if (list->empty()) {
341 return FetchFromCentralCache(cl, size);
343 size_ -= size;
344 return list->Pop();
347 inline void ThreadCache::Deallocate(void* ptr, size_t cl) {
348 FreeList* list = &list_[cl];
349 size_ += Static::sizemap()->ByteSizeForClass(cl);
350 ssize_t size_headroom = max_size_ - size_ - 1;
352 // This catches back-to-back frees of allocs in the same size
353 // class. A more comprehensive (and expensive) test would be to walk
354 // the entire freelist. But this might be enough to find some bugs.
355 ASSERT(ptr != list->Next());
357 list->Push(ptr);
358 ssize_t list_headroom =
359 static_cast<ssize_t>(list->max_length()) - list->length();
361 // There are two relatively uncommon things that require further work.
362 // In the common case we're done, and in that case we need a single branch
363 // because of the bitwise-or trick that follows.
364 if ((list_headroom | size_headroom) < 0) {
365 if (list_headroom < 0) {
366 ListTooLong(list, cl);
368 if (size_ >= max_size_) Scavenge();
372 inline ThreadCache* ThreadCache::GetThreadHeap() {
373 #ifdef HAVE_TLS
374 // __thread is faster, but only when the kernel supports it
375 if (KernelSupportsTLS())
376 return threadlocal_heap_;
377 #endif
378 return reinterpret_cast<ThreadCache *>(
379 perftools_pthread_getspecific(heap_key_));
382 inline ThreadCache* ThreadCache::GetCache() {
383 ThreadCache* ptr = NULL;
384 if (!tsd_inited_) {
385 InitModule();
386 } else {
387 ptr = GetThreadHeap();
389 if (ptr == NULL) ptr = CreateCacheIfNecessary();
390 return ptr;
393 // In deletion paths, we do not try to create a thread-cache. This is
394 // because we may be in the thread destruction code and may have
395 // already cleaned up the cache for this thread.
396 inline ThreadCache* ThreadCache::GetCacheIfPresent() {
397 if (!tsd_inited_) return NULL;
398 return GetThreadHeap();
401 } // namespace tcmalloc
403 #endif // TCMALLOC_THREAD_CACHE_H_