1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This is a OS-independent* module which purpose is tracking allocations and
6 // their call sites (stack traces). It is able to deal with hole punching
7 // (read: munmap). Also, it has low overhead and its presence in the system its
8 // barely noticeable, even if tracing *all* the processes.
9 // This module does NOT know how to deal with stack unwinding. The caller must
10 // do that and pass the addresses of the unwound stack.
11 // * (Modulo three lines for mutexes.)
14 // void heap_profiler_init(HeapStats*);
15 // void heap_profiler_alloc(addr, size, stack_frames, depth, flags);
16 // void heap_profiler_free(addr, size); (size == 0 means free entire region).
18 // The profiling information is tracked into two data structures:
19 // 1) A RB-Tree of non-overlapping VM regions (allocs) sorted by their start
20 // addr. Each entry tracks the start-end addresses and points to the stack
21 // trace which created that allocation (see below).
22 // 2) A (hash) table of stack traces. In general the #allocations >> #call sites
23 // which create those allocations. In order to avoid duplicating the latter,
24 // they are stored distinctly in this hash table and used by reference.
26 // / Process virtual address space \
27 // +------+ +------+ +------+
28 // |Alloc1| |Alloc2| |Alloc3| <- Allocs (a RB-Tree underneath)
29 // +------+ +------+ +------+
30 // Len: 12 Len: 4 Len: 4
32 // | | | +-----------+--------------+
33 // | | | | Alloc tot | stack frames +
34 // | | | +-----------+--------------+
35 // +------------|-------------+------------> | 16 | 0x1234 .... |
36 // | +-----------+--------------+
37 // +--------------------------> | 4 | 0x5678 .... |
38 // +-----------+--------------+
39 // (A hash-table underneath)
41 // Final note: the memory for both 1) and 2) entries is carved out from two
42 // static pools (i.e. stack_traces and allocs). The pools are treated as
43 // a sbrk essentially, and are kept compact by reusing freed elements (hence
44 // having a freelist for each of them).
46 // All the internal (static) functions here assume that the |lock| is held.
51 // Platform-dependent mutex boilerplate.
52 #if defined(__linux__) || defined(__ANDROID__)
54 #define DEFINE_MUTEX(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
55 #define LOCK_MUTEX(x) pthread_mutex_lock(&x)
56 #define UNLOCK_MUTEX(x) pthread_mutex_unlock(&x)
58 #error OS not supported.
61 #include "tools/android/heap_profiler/heap_profiler.h"
64 static DEFINE_MUTEX(lock
);
66 // |stats| contains the global tracking metadata and is the entry point which
67 // is read by the heap_dump tool.
68 static HeapStats
* stats
;
70 // +---------------------------------------------------------------------------+
71 // + Stack traces hash-table +
72 // +---------------------------------------------------------------------------+
73 #define ST_ENTRIES_MAX (64 * 1024)
74 #define ST_HASHTABLE_BUCKETS (64 * 1024) /* Must be a power of 2. */
76 static StacktraceEntry stack_traces
[ST_ENTRIES_MAX
];
77 static StacktraceEntry
* stack_traces_freelist
;
78 static StacktraceEntry
* stack_traces_ht
[ST_HASHTABLE_BUCKETS
];
80 // Looks up a stack trace from the stack frames. Creates a new one if necessary.
81 static StacktraceEntry
* record_stacktrace(uintptr_t* frames
, uint32_t depth
) {
85 if (depth
> HEAP_PROFILER_MAX_DEPTH
)
86 depth
= HEAP_PROFILER_MAX_DEPTH
;
90 for (i
= 0; i
< depth
; ++i
)
91 hash
= (hash
<< 1) ^ (frames
[i
]);
92 const uint32_t slot
= hash
& (ST_HASHTABLE_BUCKETS
- 1);
93 StacktraceEntry
* st
= stack_traces_ht
[slot
];
95 // Look for an existing entry in the hash-table.
96 const size_t frames_length
= depth
* sizeof(uintptr_t);
97 while (st
!= NULL
&& st
->hash
!= hash
&&
98 memcmp(frames
, st
->frames
, frames_length
) != 0) {
102 // If not found, create a new one from the stack_traces array and add it to
105 // Get a free element either from the freelist or from the pool.
106 if (stack_traces_freelist
!= NULL
) {
107 st
= stack_traces_freelist
;
108 stack_traces_freelist
= stack_traces_freelist
->next
;
109 } else if (stats
->max_stack_traces
< ST_ENTRIES_MAX
) {
110 st
= &stack_traces
[stats
->max_stack_traces
];
111 ++stats
->max_stack_traces
;
116 memset(st
, 0, sizeof(*st
));
117 memcpy(st
->frames
, frames
, frames_length
);
119 st
->next
= stack_traces_ht
[slot
];
120 stack_traces_ht
[slot
] = st
;
121 ++stats
->num_stack_traces
;
127 // Frees up a stack trace and appends it to the corresponding freelist.
128 static void free_stacktrace(StacktraceEntry
* st
) {
129 assert(st
->alloc_bytes
== 0);
130 const uint32_t slot
= st
->hash
& (ST_HASHTABLE_BUCKETS
- 1);
132 // The expected load factor of the hash-table is very low. Frees should be
133 // pretty rare. Hence don't bother with a doubly linked list, might cost more.
134 StacktraceEntry
** prev
= &stack_traces_ht
[slot
];
136 prev
= &((*prev
)->next
);
138 // Remove from the hash-table bucket.
142 // Add to the freelist.
143 st
->next
= stack_traces_freelist
;
144 stack_traces_freelist
= st
;
145 --stats
->num_stack_traces
;
148 // +---------------------------------------------------------------------------+
149 // + Allocs RB-tree +
150 // +---------------------------------------------------------------------------+
151 #define ALLOCS_ENTRIES_MAX (256 * 1024)
153 static Alloc allocs
[ALLOCS_ENTRIES_MAX
];
154 static Alloc
* allocs_freelist
;
155 static RB_HEAD(HeapEntriesTree
, Alloc
) allocs_tree
=
156 RB_INITIALIZER(&allocs_tree
);
158 // Comparator used by the RB-Tree (mind the overflow, avoid arith on addresses).
159 static int allocs_tree_cmp(Alloc
*alloc_1
, Alloc
*alloc_2
) {
160 if (alloc_1
->start
< alloc_2
->start
)
162 if (alloc_1
->start
> alloc_2
->start
)
167 RB_PROTOTYPE(HeapEntriesTree
, Alloc
, rb_node
, allocs_tree_cmp
);
168 RB_GENERATE(HeapEntriesTree
, Alloc
, rb_node
, allocs_tree_cmp
);
170 // Allocates a new Alloc and inserts it in the tree.
171 static Alloc
* insert_alloc(
172 uintptr_t start
, uintptr_t end
, StacktraceEntry
* st
, uint32_t flags
) {
175 // First of all, get a free element either from the freelist or from the pool.
176 if (allocs_freelist
!= NULL
) {
177 alloc
= allocs_freelist
;
178 allocs_freelist
= alloc
->next_free
;
179 } else if (stats
->max_allocs
< ALLOCS_ENTRIES_MAX
) {
180 alloc
= &allocs
[stats
->max_allocs
];
186 alloc
->start
= start
;
189 alloc
->flags
= flags
;
190 alloc
->next_free
= NULL
;
191 RB_INSERT(HeapEntriesTree
, &allocs_tree
, alloc
);
196 // Deletes all the allocs in the range [addr, addr+size[ dealing with partial
197 // frees and hole punching. Note that in the general case this function might
198 // need to deal with very unfortunate cases, as below:
200 // Alloc tree begin: [Alloc 1]----[Alloc 2]-------[Alloc 3][Alloc 4]---[Alloc 5]
201 // Deletion range: [xxxxxxxxxxxxxxxxxxxx]
202 // Alloc tree end: [Alloc 1]----[Al.2]----------------------[Al.4]---[Alloc 5]
203 // Alloc3 has to be deleted and Alloc 2,4 shrunk.
204 static uint32_t delete_allocs_in_range(void* addr
, size_t size
) {
205 uintptr_t del_start
= (uintptr_t) addr
;
206 uintptr_t del_end
= del_start
+ size
- 1;
210 Alloc
* next_alloc
= RB_ROOT(&allocs_tree
);
212 // Lookup the first (by address) relevant Alloc to initiate the deletion walk.
213 // At the end of the loop next_alloc is either:
214 // - the closest alloc starting before (or exactly at) the start of the
215 // deletion range (i.e. addr == del_start).
216 // - the first alloc inside the deletion range.
217 // - the first alloc after the deletion range iff the range was already empty
218 // (in this case the next loop will just bail out doing nothing).
219 // - NULL: iff the entire tree is empty (as above).
220 while (next_alloc
!= NULL
) {
222 if (alloc
->start
> del_start
) {
223 next_alloc
= RB_LEFT(alloc
, rb_node
);
224 } else if (alloc
->end
< del_start
) {
225 next_alloc
= RB_RIGHT(alloc
, rb_node
);
226 } else { // alloc->start <= del_start && alloc->end >= del_start
231 // Now scan the allocs linearly deleting chunks (or eventually whole allocs)
232 // until passing the end of the deleting region.
234 while (next_alloc
!= NULL
) {
236 next_alloc
= RB_NEXT(HeapEntriesTree
, &allocs_tree
, alloc
);
239 // In the general case we stop passed the end of the deletion range.
240 if (alloc
->start
> del_end
)
243 // This deals with the case of the first Alloc laying before the range.
244 if (alloc
->end
< del_start
)
247 // size == 0 is a special case. It means deleting only the alloc which
248 // starts exactly at |del_start| if any (for dealing with free(ptr)).
249 if (alloc
->start
> del_start
)
251 if (alloc
->start
< del_start
)
253 del_end
= alloc
->end
;
256 // Reached this point the Alloc must overlap (partially or completely) with
257 // the deletion range.
258 assert(!(alloc
->start
> del_end
|| alloc
->end
< del_start
));
260 StacktraceEntry
* st
= alloc
->st
;
261 flags
|= alloc
->flags
;
262 uintptr_t freed_bytes
= 0; // Bytes freed in this cycle.
264 if (del_start
<= alloc
->start
) {
265 if (del_end
>= alloc
->end
) {
266 // Complete overlap. Delete full Alloc. Note: the range might might
267 // still overlap with the next allocs.
268 // Begin: ------[alloc.start alloc.end]-[next alloc]
269 // Del range: [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]
270 // Result: ---------------------------------[next alloc]
271 // [next alloc] will be shrinked on the next iteration.
272 freed_bytes
= alloc
->end
- alloc
->start
+ 1;
273 RB_REMOVE(HeapEntriesTree
, &allocs_tree
, alloc
);
275 // Clean-up, so heap_dump can tell this is a free entry and skip it.
276 alloc
->start
= alloc
->end
= 0;
279 // Put in the freelist.
280 alloc
->next_free
= allocs_freelist
;
281 allocs_freelist
= alloc
;
284 // Partial overlap at beginning. Cut first part and shrink the alloc.
285 // Begin: ------[alloc.start alloc.end]-[next alloc]
286 // Del range: [xxxxxx]
287 // Result: ------------[start alloc.end]-[next alloc]
288 freed_bytes
= del_end
- alloc
->start
+ 1;
289 alloc
->start
= del_end
+ 1;
290 // No need to update the tree even if we changed the key. The keys are
291 // still monotonic (because the ranges are guaranteed to not overlap).
294 if (del_end
>= alloc
->end
) {
295 // Partial overlap at end. Cut last part and shrink the alloc left.
296 // Begin: ------[alloc.start alloc.end]-[next alloc]
297 // Del range: [xxxxxxxx]
298 // Result: ------[alloc.start alloc.end]-----[next alloc]
299 // [next alloc] will be shrinked on the next iteration.
300 freed_bytes
= alloc
->end
- del_start
+ 1;
301 alloc
->end
= del_start
- 1;
303 // Hole punching. Requires creating an extra alloc.
304 // Begin: ------[alloc.start alloc.end]-[next alloc]
306 // Result: ------[ alloc 1 ]-----[ alloc 2 ]-[next alloc]
307 freed_bytes
= del_end
- del_start
+ 1;
308 const uintptr_t old_end
= alloc
->end
;
309 alloc
->end
= del_start
- 1;
311 // In case of OOM, don't count the 2nd alloc we failed to allocate.
312 if (insert_alloc(del_end
+ 1, old_end
, st
, alloc
->flags
) == NULL
)
313 freed_bytes
+= (old_end
- del_end
);
316 // Now update the StackTraceEntry the Alloc was pointing to, eventually
318 assert(st
->alloc_bytes
>= freed_bytes
);
319 st
->alloc_bytes
-= freed_bytes
;
320 if (st
->alloc_bytes
== 0)
322 stats
->total_alloc_bytes
-= freed_bytes
;
327 // +---------------------------------------------------------------------------+
328 // + Library entry points (refer to heap_profiler.h for API doc). +
329 // +---------------------------------------------------------------------------+
330 void heap_profiler_free(void* addr
, size_t size
, uint32_t* old_flags
) {
331 assert(size
== 0 || ((uintptr_t) addr
+ (size
- 1)) >= (uintptr_t) addr
);
334 uint32_t flags
= delete_allocs_in_range(addr
, size
);
337 if (old_flags
!= NULL
)
341 void heap_profiler_alloc(void* addr
, size_t size
, uintptr_t* frames
,
342 uint32_t depth
, uint32_t flags
) {
343 if (depth
> HEAP_PROFILER_MAX_DEPTH
)
344 depth
= HEAP_PROFILER_MAX_DEPTH
;
346 if (size
== 0) // Apps calling malloc(0), sometimes it happens.
349 const uintptr_t start
= (uintptr_t) addr
;
350 const uintptr_t end
= start
+ (size
- 1);
351 assert(start
<= end
);
355 delete_allocs_in_range(addr
, size
);
357 StacktraceEntry
* st
= record_stacktrace(frames
, depth
);
359 Alloc
* alloc
= insert_alloc(start
, end
, st
, flags
);
361 st
->alloc_bytes
+= size
;
362 stats
->total_alloc_bytes
+= size
;
369 void heap_profiler_init(HeapStats
* heap_stats
) {
372 assert(stats
== NULL
);
374 memset(stats
, 0, sizeof(HeapStats
));
375 stats
->magic_start
= HEAP_PROFILER_MAGIC_MARKER
;
376 stats
->allocs
= &allocs
[0];
377 stats
->stack_traces
= &stack_traces
[0];
382 void heap_profiler_cleanup(void) {
385 assert(stats
!= NULL
);
386 memset(stack_traces
, 0, sizeof(StacktraceEntry
) * stats
->max_stack_traces
);
387 memset(stack_traces_ht
, 0, sizeof(stack_traces_ht
));
388 stack_traces_freelist
= NULL
;
390 memset(allocs
, 0, sizeof(Alloc
) * stats
->max_allocs
);
391 allocs_freelist
= NULL
;
392 RB_INIT(&allocs_tree
);