Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / dhat / dh_main.c
blob9e6ec1c066117ad5438cb0f1552b00963c7dfd6e
2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #include "dhat.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
48 #define USER_HISTOGRAM_SIZE_LIMIT 25*HISTOGRAM_SIZE_LIMIT
50 //------------------------------------------------------------//
51 //--- Globals ---//
52 //------------------------------------------------------------//
54 // Values for the entire run.
55 static ULong g_total_blocks = 0;
56 static ULong g_total_bytes = 0;
58 // Current values. g_curr_blocks and g_curr_bytes are only used with
59 // clo_mode=Heap.
60 static ULong g_curr_blocks = 0;
61 static ULong g_curr_bytes = 0;
62 static ULong g_curr_instrs = 0; // incremented from generated code
64 // Values at the global max, i.e. when g_curr_bytes peaks.
65 // Only used with clo_mode=Heap.
66 static ULong g_max_blocks = 0;
67 static ULong g_max_bytes = 0;
69 // Time of the global max.
70 static ULong g_tgmax_instrs = 0;
72 // Values for the entire run. Updated each time a block is retired.
73 // Only used with clo_mode=Heap.
74 static ULong g_reads_bytes = 0;
75 static ULong g_writes_bytes = 0;
77 //------------------------------------------------------------//
78 //--- Command line args ---//
79 //------------------------------------------------------------//
81 typedef enum { Heap=55, Copy, AdHoc } ProfileKind;
83 static ProfileKind clo_mode = Heap;
85 static const HChar* clo_dhat_out_file = "dhat.out.%p";
87 static Bool dh_process_cmd_line_option(const HChar* arg)
89 if VG_STR_CLO(arg, "--dhat-out-file", clo_dhat_out_file) {
91 } else if (VG_XACT_CLO(arg, "--mode=heap", clo_mode, Heap)) {
92 } else if (VG_XACT_CLO(arg, "--mode=copy", clo_mode, Copy)) {
93 } else if (VG_XACT_CLO(arg, "--mode=ad-hoc", clo_mode, AdHoc)) {
95 } else {
96 return VG_(replacement_malloc_process_cmd_line_option)(arg);
99 return True;
102 static void dh_print_usage(void)
104 VG_(printf)(
105 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
106 " --mode=heap|copy|ad-hoc profiling mode\n"
110 static void dh_print_debug_usage(void)
112 VG_(printf)(
113 " (none)\n"
117 //------------------------------------------------------------//
118 //--- an Interval Tree of live blocks ---//
119 //------------------------------------------------------------//
121 /* Tracks information about live blocks. */
122 typedef
123 struct {
124 Addr payload;
125 SizeT req_szB;
126 ExeContext* ec; /* allocation ec */
127 ULong allocd_at; /* instruction number */
128 ULong reads_bytes;
129 ULong writes_bytes;
130 /* Approx histogram, one byte per payload byte. Counts latch up
131 therefore at 0xFFFF. Can be NULL if the block is resized or if
132 the block is larger than HISTOGRAM_SIZE_LIMIT. */
133 UShort* histoW; /* [0 .. req_szB-1] */
135 Block;
137 /* May not contain zero-sized blocks. May not contain
138 overlapping blocks. */
139 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
141 /* Here's the comparison function. Since the tree is required
142 to contain non-zero sized, non-overlapping blocks, it's good
143 enough to consider any overlap as a match. */
144 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
146 Block* b1 = (Block*)k1;
147 Block* b2 = (Block*)k2;
148 tl_assert(b1->req_szB > 0);
149 tl_assert(b2->req_szB > 0);
150 if (b1->payload + b1->req_szB <= b2->payload) return -1;
151 if (b2->payload + b2->req_szB <= b1->payload) return 1;
152 return 0;
155 // 3-entry cache for find_Block_containing
156 static Block* fbc_cache0 = NULL;
157 static Block* fbc_cache1 = NULL;
158 static Block* fbc_cache2 = NULL;
160 static UWord stats__n_fBc_cached0 = 0;
161 static UWord stats__n_fBc_cached1 = 0;
162 static UWord stats__n_fBc_cached2 = 0;
163 static UWord stats__n_fBc_uncached = 0;
164 static UWord stats__n_fBc_notfound = 0;
166 static Block* find_Block_containing ( Addr a )
168 tl_assert(clo_mode == Heap);
170 if (LIKELY(fbc_cache0
171 && fbc_cache0->payload <= a
172 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
173 // found at 0
174 stats__n_fBc_cached0++;
175 return fbc_cache0;
177 if (LIKELY(fbc_cache1
178 && fbc_cache1->payload <= a
179 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
180 // found at 1; swap 0 and 1
181 Block* tmp = fbc_cache1;
182 fbc_cache1 = fbc_cache0;
183 fbc_cache0 = tmp;
184 stats__n_fBc_cached1++;
185 return tmp;
187 if (LIKELY(fbc_cache2
188 && fbc_cache2->payload <= a
189 && a < fbc_cache2->payload + fbc_cache2->req_szB)) {
190 // found at 2; swap 1 and 2
191 Block* tmp = fbc_cache2;
192 fbc_cache2 = fbc_cache1;
193 fbc_cache1 = tmp;
194 stats__n_fBc_cached2++;
195 return tmp;
198 Block fake;
199 fake.payload = a;
200 fake.req_szB = 1;
201 UWord foundkey = 1;
202 UWord foundval = 1;
203 Bool found = VG_(lookupFM)( interval_tree,
204 &foundkey, &foundval, (UWord)&fake );
205 if (!found) {
206 stats__n_fBc_notfound++;
207 return NULL;
209 tl_assert(foundval == 0); // we don't store vals in the interval tree
210 tl_assert(foundkey != 1);
211 Block* res = (Block*)foundkey;
212 tl_assert(res != &fake);
213 // put at the top position
214 fbc_cache2 = fbc_cache1;
215 fbc_cache1 = fbc_cache0;
216 fbc_cache0 = res;
217 stats__n_fBc_uncached++;
218 return res;
221 // delete a block; asserts if not found. (viz, 'a' must be
222 // known to be present.)
223 static void delete_Block_starting_at ( Addr a )
225 tl_assert(clo_mode == Heap);
227 Block fake;
228 fake.payload = a;
229 fake.req_szB = 1;
230 Bool found = VG_(delFromFM)( interval_tree,
231 NULL, NULL, (Addr)&fake );
232 tl_assert(found);
233 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
236 //------------------------------------------------------------//
237 //--- a FM of allocation points (APs) ---//
238 //------------------------------------------------------------//
240 typedef
241 struct {
242 // The program point that we're summarising stats for.
243 ExeContext* ec;
245 // Total number of blocks and bytes allocated by this PP.
246 ULong total_blocks;
247 ULong total_bytes;
249 // The current number of blocks and bytes live for this PP.
250 // Only used with clo_mode=Heap.
251 ULong curr_blocks;
252 ULong curr_bytes;
254 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
255 // Only used with clo_mode=Heap.
256 ULong max_blocks; // Blocks at the PP max.
257 ULong max_bytes; // The PP max, measured in bytes.
259 // Values at the global max.
260 // Only used with clo_mode=Heap.
261 ULong at_tgmax_blocks;
262 ULong at_tgmax_bytes;
264 // Total lifetimes of all blocks allocated by this PP. Includes blocks
265 // explicitly freed and blocks implicitly freed at termination.
266 // Only used with clo_mode=Heap.
267 ULong total_lifetimes_instrs;
269 // Number of blocks freed by this PP. (Only used in assertions.)
270 // Only used with clo_mode=Heap.
271 ULong freed_blocks;
273 // Total number of reads and writes in all blocks allocated
274 // by this PP. Only used with clo_mode=Heap.
275 ULong reads_bytes;
276 ULong writes_bytes;
278 /* Histogram information. We maintain a histogram aggregated for
279 all retiring Blocks allocated by this PP, but only if:
280 - this PP has only ever allocated objects of one size
281 - that size is <= HISTOGRAM_SIZE_LIMIT
282 What we need therefore is a mechanism to see if this PP
283 has only ever allocated blocks of one size.
285 3 states:
286 Unknown because no retirement yet
287 Exactly xsize all retiring blocks are of this size
288 Mixed multiple different sizes seen
290 Only used with clo_mode=Heap.
292 enum { Unknown=999, Exactly, Mixed } xsize_tag;
293 SizeT xsize;
294 UInt* histo; /* [0 .. xsize-1] */
296 PPInfo;
298 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
299 .ec field in the values. */
300 static WordFM* ppinfo = NULL; /* WordFM* ExeContext* PPInfo* */
302 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
303 // all PPInfos. Note that this is moderately expensive so we avoid calling it
304 // on every allocation.
305 static void check_for_peak(void)
307 tl_assert(clo_mode == Heap);
309 if (g_curr_bytes == g_max_bytes) {
310 // It's a peak. (If there are multiple equal peaks we record the latest
311 // one.)
312 UWord keyW, valW;
313 VG_(initIterFM)(ppinfo);
314 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
315 PPInfo* ppi = (PPInfo*)valW;
316 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
317 ppi->at_tgmax_blocks = ppi->curr_blocks;
318 ppi->at_tgmax_bytes = ppi->curr_bytes;
320 VG_(doneIterFM)(ppinfo);
324 /* 'bk' is being introduced (has just been allocated). Find the
325 relevant PPInfo entry for it, or create one, based on the block's
326 allocation EC. Then, update the PPInfo to the extent that we
327 actually can, to reflect the allocation. */
328 static void intro_Block(Block* bk)
330 tl_assert(bk);
331 tl_assert(bk->ec);
333 PPInfo* ppi = NULL;
334 UWord keyW = 0;
335 UWord valW = 0;
336 Bool found = VG_(lookupFM)( ppinfo,
337 &keyW, &valW, (UWord)bk->ec );
338 if (found) {
339 ppi = (PPInfo*)valW;
340 tl_assert(keyW == (UWord)bk->ec);
341 } else {
342 ppi = VG_(malloc)( "dh.intro_Block.1", sizeof(PPInfo) );
343 VG_(memset)(ppi, 0, sizeof(*ppi));
344 ppi->ec = bk->ec;
345 Bool present = VG_(addToFM)( ppinfo,
346 (UWord)bk->ec, (UWord)ppi );
347 tl_assert(!present);
348 if (clo_mode == Heap) {
349 // histo stuff
350 tl_assert(ppi->freed_blocks == 0);
351 ppi->xsize_tag = Unknown;
352 ppi->xsize = 0;
353 if (0) VG_(printf)("ppi %p --> Unknown\n", ppi);
357 tl_assert(ppi->ec == bk->ec);
359 // Update global stats and PPInfo stats.
361 g_total_blocks++;
362 g_total_bytes += bk->req_szB;
364 ppi->total_blocks++;
365 ppi->total_bytes += bk->req_szB;
367 if (clo_mode == Heap) {
368 g_curr_blocks++;
369 g_curr_bytes += bk->req_szB;
371 ppi->curr_blocks++;
372 ppi->curr_bytes += bk->req_szB;
374 // The use of `>=` rather than `>` means that if there are multiple equal
375 // peaks we record the latest one, like `check_for_peak` does.
376 if (g_curr_bytes >= g_max_bytes) {
377 g_max_blocks = g_curr_blocks;
378 g_max_bytes = g_curr_bytes;
379 g_tgmax_instrs = g_curr_instrs;
381 ppi->max_blocks = ppi->curr_blocks;
382 ppi->max_bytes = ppi->curr_bytes;
387 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
388 it, which must already exist. Then, fold info from 'bk' into that
389 entry. 'because_freed' is True if the block is retiring because
390 the client has freed it. If it is False then the block is retiring
391 because the program has finished, in which case we want to skip the
392 updates of the total blocks live etc for this PP, but still fold in
393 the access counts and histo data that have so far accumulated for
394 the block. */
395 static void retire_Block(Block* bk, Bool because_freed)
397 tl_assert(clo_mode == Heap);
398 tl_assert(bk);
399 tl_assert(bk->ec);
401 PPInfo* ppi = NULL;
402 UWord keyW = 0;
403 UWord valW = 0;
404 Bool found = VG_(lookupFM)( ppinfo,
405 &keyW, &valW, (UWord)bk->ec );
406 tl_assert(found);
407 ppi = (PPInfo*)valW;
408 tl_assert(ppi->ec == bk->ec);
410 // update stats following this free.
411 if (0)
412 VG_(printf)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
413 bk->ec, ppi->curr_bytes, (ULong)bk->req_szB);
415 if (because_freed) {
416 // Total bytes is coming down from a possible peak.
417 check_for_peak();
419 // Then update global stats.
420 tl_assert(g_curr_blocks >= 1);
421 tl_assert(g_curr_bytes >= bk->req_szB);
422 g_curr_blocks--;
423 g_curr_bytes -= bk->req_szB;
425 // Then update PPInfo stats.
426 tl_assert(ppi->curr_blocks >= 1);
427 tl_assert(ppi->curr_bytes >= bk->req_szB);
428 ppi->curr_blocks--;
429 ppi->curr_bytes -= bk->req_szB;
431 ppi->freed_blocks++;
434 tl_assert(bk->allocd_at <= g_curr_instrs);
435 ppi->total_lifetimes_instrs += (g_curr_instrs - bk->allocd_at);
437 // access counts
438 ppi->reads_bytes += bk->reads_bytes;
439 ppi->writes_bytes += bk->writes_bytes;
440 g_reads_bytes += bk->reads_bytes;
441 g_writes_bytes += bk->writes_bytes;
443 // histo stuff. First, do state transitions for xsize/xsize_tag.
444 switch (ppi->xsize_tag) {
446 case Unknown:
447 tl_assert(ppi->xsize == 0);
448 tl_assert(ppi->freed_blocks == 1 || ppi->freed_blocks == 0);
449 tl_assert(!ppi->histo);
450 ppi->xsize_tag = Exactly;
451 ppi->xsize = bk->req_szB;
452 if (0) VG_(printf)("ppi %p --> Exactly(%lu)\n", ppi, ppi->xsize);
453 // and allocate the histo
454 if (bk->histoW) {
455 ppi->histo = VG_(malloc)("dh.retire_Block.1",
456 ppi->xsize * sizeof(UInt));
457 VG_(memset)(ppi->histo, 0, ppi->xsize * sizeof(UInt));
459 break;
461 case Exactly:
462 //tl_assert(ppi->freed_blocks > 1);
463 if (bk->req_szB != ppi->xsize) {
464 if (0) VG_(printf)("ppi %p --> Mixed(%lu -> %lu)\n",
465 ppi, ppi->xsize, bk->req_szB);
466 ppi->xsize_tag = Mixed;
467 ppi->xsize = 0;
468 // deallocate the histo, if any
469 if (ppi->histo) {
470 VG_(free)(ppi->histo);
471 ppi->histo = NULL;
474 break;
476 case Mixed:
477 //tl_assert(ppi->freed_blocks > 1);
478 break;
480 default:
481 tl_assert(0);
484 // See if we can fold the histo data from this block into
485 // the data for the PP.
486 if (ppi->xsize_tag == Exactly && ppi->histo && bk->histoW) {
487 tl_assert(ppi->xsize == bk->req_szB);
488 UWord i;
489 for (i = 0; i < ppi->xsize; i++) {
490 // FIXME: do something better in case of overflow of ppi->histo[..]
491 // Right now, at least don't let it overflow/wrap around
492 if (ppi->histo[i] <= 0xFFFE0000)
493 ppi->histo[i] += (UInt)bk->histoW[i];
495 if (0) VG_(printf)("fold in, PP = %p\n", ppi);
498 #if 0
499 if (bk->histoB) {
500 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
501 UWord i;
502 for (i = 0; i < bk->req_szB; i++)
503 VG_(printf)("%u ", (UInt)bk->histoB[i]);
504 VG_(printf)("\n");
505 } else {
506 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
508 #endif
511 /* This handles block resizing. When a block with PP 'ec' has a
512 size change of 'delta', call here to update the PPInfo. */
513 static void resize_Block(ExeContext* ec, SizeT old_req_szB, SizeT new_req_szB)
515 tl_assert(clo_mode == Heap);
517 Long delta = (Long)new_req_szB - (Long)old_req_szB;
518 PPInfo* ppi = NULL;
519 UWord keyW = 0;
520 UWord valW = 0;
521 Bool found = VG_(lookupFM)( ppinfo,
522 &keyW, &valW, (UWord)ec );
524 tl_assert(found);
525 ppi = (PPInfo*)valW;
526 tl_assert(ppi->ec == ec);
528 if (delta < 0) {
529 tl_assert(ppi->curr_bytes >= -delta);
530 tl_assert(g_curr_bytes >= -delta);
532 // Total bytes might be coming down from a possible peak.
533 check_for_peak();
536 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
537 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
539 // A reasonable alternative would be to leave total_blocks unchanged and
540 // increment total_bytes by delta (but only if delta is positive). But then
541 // calls to realloc wouldn't be counted towards the total_blocks count,
542 // which is undesirable.
544 // Update global stats and PPInfo stats.
546 g_total_blocks++;
547 g_total_bytes += new_req_szB;
549 ppi->total_blocks++;
550 ppi->total_bytes += new_req_szB;
552 g_curr_blocks += 0; // unchanged
553 g_curr_bytes += delta;
555 ppi->curr_blocks += 0; // unchanged
556 ppi->curr_bytes += delta;
558 // The use of `>=` rather than `>` means that if there are multiple equal
559 // peaks we record the latest one, like `check_for_peak` does.
560 if (g_curr_bytes >= g_max_bytes) {
561 g_max_blocks = g_curr_blocks;
562 g_max_bytes = g_curr_bytes;
563 g_tgmax_instrs = g_curr_instrs;
565 ppi->max_blocks = ppi->curr_blocks;
566 ppi->max_bytes = ppi->curr_bytes;
570 //------------------------------------------------------------//
571 //--- update both Block and PPInfos after {m,re}alloc/free ---//
572 //------------------------------------------------------------//
574 static
575 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
576 Bool is_zeroed )
578 tl_assert(p == NULL); // don't handle custom allocators right now
579 SizeT actual_szB;
581 if ((SSizeT)req_szB < 0) return NULL;
583 if (req_szB == 0) {
584 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
587 // Allocate and zero if necessary
588 if (!p) {
589 p = VG_(cli_malloc)( req_alignB, req_szB );
590 if (!p) {
591 return NULL;
593 if (is_zeroed) VG_(memset)(p, 0, req_szB);
594 actual_szB = VG_(cli_malloc_usable_size)(p);
595 tl_assert(actual_szB >= req_szB);
598 if (clo_mode != Heap) {
599 return p;
602 // Make new Block, add to interval_tree.
603 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
604 bk->payload = (Addr)p;
605 bk->req_szB = req_szB;
606 bk->ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
607 bk->allocd_at = g_curr_instrs;
608 bk->reads_bytes = 0;
609 bk->writes_bytes = 0;
610 // Set up histogram array, if the block isn't too large.
611 bk->histoW = NULL;
612 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
613 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
614 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
617 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
618 tl_assert(!present);
619 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
621 intro_Block(bk);
623 return p;
626 static
627 void die_block ( void* p )
629 VG_(cli_free)(p);
631 if (clo_mode != Heap) {
632 return;
635 Block* bk = find_Block_containing( (Addr)p );
636 if (!bk) {
637 return; // bogus free
640 tl_assert(bk->req_szB > 0);
641 // assert the block finder is behaving sanely
642 tl_assert(bk->payload <= (Addr)p);
643 tl_assert( (Addr)p < bk->payload + bk->req_szB );
645 if (bk->payload != (Addr)p) {
646 return; // bogus free
649 retire_Block(bk, True/*because_freed*/);
651 delete_Block_starting_at( bk->payload );
652 if (bk->histoW) {
653 VG_(free)( bk->histoW );
654 bk->histoW = NULL;
656 VG_(free)( bk );
659 static
660 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
662 void* p_new = NULL;
664 tl_assert(new_req_szB > 0); // map 0 to 1
666 if (clo_mode != Heap) {
667 SizeT old_actual_szB = VG_(cli_malloc_usable_size)(p_old);
668 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
669 if (!p_new) {
670 return NULL;
672 VG_(memmove)(p_new, p_old, VG_MIN(old_actual_szB, new_req_szB));
673 VG_(cli_free)(p_old);
674 return p_new;
677 // Find the old block.
678 Block* bk = find_Block_containing( (Addr)p_old );
679 if (!bk) {
680 return NULL; // bogus realloc
683 tl_assert(bk->req_szB > 0);
684 // Assert the block finder is behaving sanely.
685 tl_assert(bk->payload <= (Addr)p_old);
686 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
688 if (bk->payload != (Addr)p_old) {
689 return NULL; // bogus realloc
692 // Keeping the histogram alive in any meaningful way across
693 // block resizing is too darn complicated. Just throw it away.
694 if (bk->histoW) {
695 VG_(free)(bk->histoW);
696 bk->histoW = NULL;
699 // Actually do the allocation, if necessary.
700 if (new_req_szB <= bk->req_szB) {
701 // New size is smaller or same; block not moved.
702 resize_Block(bk->ec, bk->req_szB, new_req_szB);
703 bk->req_szB = new_req_szB;
705 // Update reads/writes for the implicit copy. Even though we didn't
706 // actually do a copy, we act like we did, to match up with the fact
707 // that we treat this as an additional allocation.
708 bk->reads_bytes += new_req_szB;
709 bk->writes_bytes += new_req_szB;
711 p_new = p_old;
713 } else {
714 // New size is bigger; make new block, copy shared contents, free old.
715 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
716 if (!p_new) {
717 // Nb: if realloc fails, NULL is returned but the old block is not
718 // touched. What an awful function.
719 return NULL;
721 tl_assert(p_new != p_old);
723 VG_(memcpy)(p_new, p_old, bk->req_szB);
724 VG_(cli_free)(p_old);
726 // Since the block has moved, we need to re-insert it into the
727 // interval tree at the new place. Do this by removing
728 // and re-adding it.
729 delete_Block_starting_at( (Addr)p_old );
730 // Now 'bk' is no longer in the tree, but the Block itself
731 // is still alive.
733 // Update reads/writes for the copy.
734 bk->reads_bytes += bk->req_szB;
735 bk->writes_bytes += bk->req_szB;
737 // Update the metadata.
738 resize_Block(bk->ec, bk->req_szB, new_req_szB);
739 bk->payload = (Addr)p_new;
740 bk->req_szB = new_req_szB;
742 // And re-add it to the interval tree.
743 Bool present
744 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
745 tl_assert(!present);
746 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
749 return p_new;
752 //------------------------------------------------------------//
753 //--- malloc() et al replacement wrappers ---//
754 //------------------------------------------------------------//
756 static void* dh_malloc ( ThreadId tid, SizeT szB )
758 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
761 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
763 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
766 static void* dh___builtin_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB, SizeT orig_alignB )
768 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
771 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
773 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
776 static void* dh___builtin_vec_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB, SizeT orig_alignB )
778 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
781 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
783 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
786 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT orig_alignB, SizeT szB)
788 return new_block( tid, NULL, szB, alignB, False );
791 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
793 die_block(p);
796 static void dh___builtin_delete ( ThreadId tid, void* p )
798 die_block(p);
801 static void dh___builtin_delete_aligned ( ThreadId tid, void* p, SizeT align )
803 die_block(p);
806 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
808 die_block(p);
811 static void dh___builtin_vec_delete_aligned ( ThreadId tid, void* p, SizeT align )
813 die_block(p);
816 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
818 if (p_old == NULL) {
819 return dh_malloc(tid, new_szB);
821 if (new_szB == 0) {
822 if (VG_(clo_realloc_zero_bytes_frees) == True) {
823 dh_free(tid, p_old);
824 return NULL;
826 new_szB = 1;
828 return renew_block(tid, p_old, new_szB);
831 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
833 if (clo_mode != Heap) {
834 return VG_(cli_malloc_usable_size)(p);
837 Block* bk = find_Block_containing( (Addr)p );
838 return bk ? bk->req_szB : 0;
841 //------------------------------------------------------------//
842 //--- memory references ---//
843 //------------------------------------------------------------//
845 static
846 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
848 UWord i, offMin, offMax1;
849 offMin = addr - bk->payload;
850 tl_assert(offMin < bk->req_szB);
851 offMax1 = offMin + szB;
852 if (offMax1 > bk->req_szB)
853 offMax1 = bk->req_szB;
854 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
855 for (i = offMin; i < offMax1; i++) {
856 UShort n = bk->histoW[i];
857 if (n < 0xFFFF) n++;
858 bk->histoW[i] = n;
862 static VG_REGPARM(2)
863 void dh_handle_write ( Addr addr, UWord szB )
865 tl_assert(clo_mode == Heap);
867 Block* bk = find_Block_containing(addr);
868 if (bk) {
869 bk->writes_bytes += szB;
870 if (bk->histoW)
871 inc_histo_for_block(bk, addr, szB);
875 static VG_REGPARM(2)
876 void dh_handle_read ( Addr addr, UWord szB )
878 tl_assert(clo_mode == Heap);
880 Block* bk = find_Block_containing(addr);
881 if (bk) {
882 bk->reads_bytes += szB;
883 if (bk->histoW)
884 inc_histo_for_block(bk, addr, szB);
888 // Handle reads and writes by syscalls (read == kernel
889 // reads user space, write == kernel writes user space).
890 // Assumes no such read or write spans a heap block
891 // boundary and so we can treat it just as one giant
892 // read or write.
893 static
894 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
895 Addr base, SizeT size )
897 tl_assert(clo_mode == Heap);
899 switch (part) {
900 case Vg_CoreSysCall:
901 dh_handle_read(base, size);
902 break;
903 case Vg_CoreSysCallArgInMem:
904 break;
905 case Vg_CoreTranslate:
906 break;
907 default:
908 tl_assert(0);
912 static
913 void dh_handle_noninsn_read_asciiz(CorePart part, ThreadId tid, const HChar* s,
914 Addr str)
916 tl_assert(clo_mode == Heap);
918 tl_assert(part == Vg_CoreSysCall);
919 dh_handle_noninsn_read(part, tid, s, str, VG_(strlen)((const HChar*)str+1));
922 static
923 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
924 Addr base, SizeT size )
926 tl_assert(clo_mode == Heap);
928 switch (part) {
929 case Vg_CoreSysCall:
930 case Vg_CoreClientReq:
931 dh_handle_write(base, size);
932 break;
933 case Vg_CoreSignal:
934 break;
935 default:
936 tl_assert(0);
940 //------------------------------------------------------------//
941 //--- Instrumentation ---//
942 //------------------------------------------------------------//
944 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
945 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
946 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
947 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
948 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
950 static
951 void add_counter_update(IRSB* sbOut, Int n)
953 #if defined(VG_BIGENDIAN)
954 # define END Iend_BE
955 #elif defined(VG_LITTLEENDIAN)
956 # define END Iend_LE
957 #else
958 # error "Unknown endianness"
959 #endif
960 // Add code to increment 'g_curr_instrs' by 'n', like this:
961 // WrTmp(t1, Load64(&g_curr_instrs))
962 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
963 // Store(&g_curr_instrs, t2)
964 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
965 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
966 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_curr_instrs );
968 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
969 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
970 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
972 addStmtToIRSB( sbOut, st1 );
973 addStmtToIRSB( sbOut, st2 );
974 addStmtToIRSB( sbOut, st3 );
977 static
978 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
979 Int goff_sp)
981 if (clo_mode != Heap) {
982 return;
985 IRType tyAddr = Ity_INVALID;
986 const HChar* hName= NULL;
987 void* hAddr = NULL;
988 IRExpr** argv = NULL;
989 IRDirty* di = NULL;
991 const Int THRESH = 4096 * 4; // somewhat arbitrary
992 const Int rz_szB = VG_STACK_REDZONE_SZB;
994 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
995 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
997 if (isWrite) {
998 hName = "dh_handle_write";
999 hAddr = &dh_handle_write;
1000 } else {
1001 hName = "dh_handle_read";
1002 hAddr = &dh_handle_read;
1005 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
1007 /* Add the helper. */
1008 tl_assert(hName);
1009 tl_assert(hAddr);
1010 tl_assert(argv);
1011 di = unsafeIRDirty_0_N( 2/*regparms*/,
1012 hName, VG_(fnptr_to_fnentry)( hAddr ),
1013 argv );
1015 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
1016 some arbitrary N. If that fails then addr is in the range (SP -
1017 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
1018 addr is within a page of SP and so can't possibly be a heap
1019 access, and so can be skipped. */
1020 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
1021 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
1023 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
1024 addStmtToIRSB(
1025 sbOut,
1026 assign(sp_minus_rz,
1027 tyAddr == Ity_I32
1028 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
1029 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
1032 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
1033 addStmtToIRSB(
1034 sbOut,
1035 assign(diff,
1036 tyAddr == Ity_I32
1037 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
1038 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
1041 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
1042 addStmtToIRSB(
1043 sbOut,
1044 assign(guard,
1045 tyAddr == Ity_I32
1046 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
1047 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
1049 di->guard = mkexpr(guard);
1051 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
1054 static
1055 IRSB* dh_instrument ( VgCallbackClosure* closure,
1056 IRSB* sbIn,
1057 const VexGuestLayout* layout,
1058 const VexGuestExtents* vge,
1059 const VexArchInfo* archinfo_host,
1060 IRType gWordTy, IRType hWordTy )
1062 Int i, n = 0;
1063 IRSB* sbOut;
1064 IRTypeEnv* tyenv = sbIn->tyenv;
1066 const Int goff_sp = layout->offset_SP;
1068 // We increment the instruction count in two places:
1069 // - just before any Ist_Exit statements;
1070 // - just before the IRSB's end.
1071 // In the former case, we zero 'n' and then continue instrumenting.
1073 sbOut = deepCopyIRSBExceptStmts(sbIn);
1075 // Copy verbatim any IR preamble preceding the first IMark
1076 i = 0;
1077 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1078 addStmtToIRSB( sbOut, sbIn->stmts[i] );
1079 i++;
1082 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1083 IRStmt* st = sbIn->stmts[i];
1085 if (!st || st->tag == Ist_NoOp) continue;
1087 switch (st->tag) {
1089 case Ist_IMark: {
1090 n++;
1091 break;
1094 case Ist_Exit: {
1095 if (n > 0) {
1096 // Add an increment before the Exit statement, then reset 'n'.
1097 add_counter_update(sbOut, n);
1098 n = 0;
1100 break;
1103 case Ist_WrTmp: {
1104 IRExpr* data = st->Ist.WrTmp.data;
1105 if (data->tag == Iex_Load) {
1106 IRExpr* aexpr = data->Iex.Load.addr;
1107 // Note also, endianness info is ignored. I guess
1108 // that's not interesting.
1109 addMemEvent( sbOut, False/*!isWrite*/,
1110 sizeofIRType(data->Iex.Load.ty),
1111 aexpr, goff_sp );
1113 break;
1116 case Ist_Store: {
1117 IRExpr* data = st->Ist.Store.data;
1118 IRExpr* aexpr = st->Ist.Store.addr;
1119 addMemEvent( sbOut, True/*isWrite*/,
1120 sizeofIRType(typeOfIRExpr(tyenv, data)),
1121 aexpr, goff_sp );
1122 break;
1125 case Ist_Dirty: {
1126 Int dataSize;
1127 IRDirty* d = st->Ist.Dirty.details;
1128 if (d->mFx != Ifx_None) {
1129 /* This dirty helper accesses memory. Collect the details. */
1130 tl_assert(d->mAddr != NULL);
1131 tl_assert(d->mSize != 0);
1132 dataSize = d->mSize;
1133 // Large (eg. 28B, 108B, 512B on x86) data-sized
1134 // instructions will be done inaccurately, but they're
1135 // very rare and this avoids errors from hitting more
1136 // than two cache lines in the simulation.
1137 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1138 addMemEvent( sbOut, False/*!isWrite*/,
1139 dataSize, d->mAddr, goff_sp );
1140 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1141 addMemEvent( sbOut, True/*isWrite*/,
1142 dataSize, d->mAddr, goff_sp );
1143 } else {
1144 tl_assert(d->mAddr == NULL);
1145 tl_assert(d->mSize == 0);
1147 break;
1150 case Ist_CAS: {
1151 /* We treat it as a read and a write of the location. I
1152 think that is the same behaviour as it was before IRCAS
1153 was introduced, since prior to that point, the Vex
1154 front ends would translate a lock-prefixed instruction
1155 into a (normal) read followed by a (normal) write. */
1156 Int dataSize;
1157 IRCAS* cas = st->Ist.CAS.details;
1158 tl_assert(cas->addr != NULL);
1159 tl_assert(cas->dataLo != NULL);
1160 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1161 if (cas->dataHi != NULL)
1162 dataSize *= 2; /* since it's a doubleword-CAS */
1163 addMemEvent( sbOut, False/*!isWrite*/,
1164 dataSize, cas->addr, goff_sp );
1165 addMemEvent( sbOut, True/*isWrite*/,
1166 dataSize, cas->addr, goff_sp );
1167 break;
1170 case Ist_LLSC: {
1171 IRType dataTy;
1172 if (st->Ist.LLSC.storedata == NULL) {
1173 /* LL */
1174 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1175 addMemEvent( sbOut, False/*!isWrite*/,
1176 sizeofIRType(dataTy),
1177 st->Ist.LLSC.addr, goff_sp );
1178 } else {
1179 /* SC */
1180 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1181 addMemEvent( sbOut, True/*isWrite*/,
1182 sizeofIRType(dataTy),
1183 st->Ist.LLSC.addr, goff_sp );
1185 break;
1188 default:
1189 break;
1192 addStmtToIRSB( sbOut, st );
1195 if (n > 0) {
1196 // Add an increment before the SB end.
1197 add_counter_update(sbOut, n);
1199 return sbOut;
1202 #undef binop
1203 #undef mkexpr
1204 #undef mkU32
1205 #undef mkU64
1206 #undef assign
1208 //------------------------------------------------------------//
1209 //--- Client requests ---//
1210 //------------------------------------------------------------//
1212 static Bool dh_handle_client_request(ThreadId tid, UWord* arg, UWord* ret)
1214 if (!VG_IS_TOOL_USERREQ('D','H',arg[0]))
1215 return False;
1217 switch (arg[0]) {
1218 case VG_USERREQ__DHAT_AD_HOC_EVENT: {
1219 if (clo_mode != AdHoc) {
1220 return False;
1223 SizeT len = (SizeT)arg[1];
1225 // Only the ec and req_szB fields are used by intro_Block().
1226 Block bk;
1227 VG_(memset)(&bk, 0, sizeof(bk));
1228 bk.req_szB = len;
1229 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1231 intro_Block(&bk);
1233 return True;
1236 case VG_USERREQ__DHAT_HISTOGRAM_MEMORY: {
1237 Addr address = (Addr)arg[1];
1239 Block* bk = find_Block_containing( address );
1240 // bogus address
1241 if (!bk) {
1242 VG_(message)(
1243 Vg_UserMsg,
1244 "Warning: address for user histogram request not found %llx\n", (ULong)address
1246 return False;
1249 // already histogrammed
1250 if (bk->req_szB <= HISTOGRAM_SIZE_LIMIT) {
1251 VG_(message)(
1252 Vg_UserMsg,
1253 "Warning: request for user histogram of size %lu is smaller than the normal histogram limit, request ignored\n",
1254 bk->req_szB
1256 return False;
1259 // too big
1260 if (bk->req_szB > USER_HISTOGRAM_SIZE_LIMIT) {
1261 VG_(message)(
1262 Vg_UserMsg,
1263 "Warning: request for user histogram of size %lu is larger than the maximum user request limit, request ignored\n",
1264 bk->req_szB
1266 return False;
1270 bk->histoW = VG_(malloc)("dh.new_block.3", bk->req_szB * sizeof(UShort));
1271 VG_(memset)(bk->histoW, 0, bk->req_szB * sizeof(UShort));
1273 return True;
1276 case _VG_USERREQ__DHAT_COPY: {
1277 SizeT len = (SizeT)arg[1];
1279 if (clo_mode != Copy) {
1280 return False;
1283 // Only the ec and req_szB fields are used by intro_Block().
1284 Block bk;
1285 VG_(memset)(&bk, 0, sizeof(bk));
1286 bk.req_szB = len;
1287 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1289 intro_Block(&bk);
1291 return True;
1294 default:
1295 VG_(message)(Vg_UserMsg,
1296 "Warning: unknown DHAT client request code %llx\n",
1297 (ULong)arg[0]
1300 return False;
1304 //------------------------------------------------------------//
1305 //--- Finalisation ---//
1306 //------------------------------------------------------------//
1308 // File format notes.
1310 // - The files are JSON, because it's a widely-used format and saves us having
1311 // to write a parser in dh_view.js.
1313 // - We use a comma-first style for the generated JSON. Comma-first style
1314 // moves the special case for arrays/objects from the last item to the
1315 // first. This helps in cases where you can't easily tell in advance the
1316 // size of arrays/objects, such as iterating over a WordFM (because
1317 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1318 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1320 // - We use short field names and minimal whitespace to minimize file sizes.
1322 // Sample output:
1324 // {
1325 // // Version number of the format. Incremented on each
1326 // // backwards-incompatible change. A mandatory integer.
1327 // "dhatFileVersion": 2,
1329 // // The invocation mode. A mandatory, free-form string.
1330 // "mode": "heap",
1332 // // The verb used before above stack frames, i.e. "<verb> at {". A
1333 // // mandatory string.
1334 // "verb": "Allocated",
1336 // // Are block lifetimes recorded? Affects whether some other fields are
1337 // // present. A mandatory boolean.
1338 // "bklt": true,
1340 // // Are block accesses recorded? Affects whether some other fields are
1341 // // present. A mandatory boolean.
1342 // "bkacc": true,
1344 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1345 // // and "blocks" are the values used if these fields are omitted.
1346 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1348 // // Time units (individual and 1,000,000x). Mandatory strings.
1349 // "tu": "instrs", "Mtu": "Minstr"
1351 // // The "short-lived" time threshold, measures in "tu"s.
1352 // // - bklt=true: a mandatory integer.
1353 // // - bklt=false: omitted.
1354 // "tuth": 500,
1356 // // The executed command. A mandatory string.
1357 // "cmd": "date",
1359 // // The process ID. A mandatory integer.
1360 // "pid": 61129
1362 // // The time at the end of execution (t-end). A mandatory integer.
1363 // "te": 350682
1365 // // The time of the global max (t-gmax).
1366 // // - bklt=true: a mandatory integer.
1367 // // - bklt=false: omitted.
1368 // "tg": 331312,
1370 // // The program points. A mandatory array.
1371 // "pps": [
1372 // {
1373 // // Total bytes and blocks. Mandatory integers.
1374 // "tb": 5, "tbk": 1,
1376 // // Total lifetimes of all blocks allocated at this PP.
1377 // // - bklt=true: a mandatory integer.
1378 // // - bklt=false: omitted.
1379 // "tl": 274,
1381 // // The maximum bytes and blocks for this PP.
1382 // // - bklt=true: mandatory integers.
1383 // // - bklt=false: omitted.
1384 // "mb": 5, "mbk": 1,
1386 // // The bytes and blocks at t-gmax for this PP.
1387 // // - bklt=true: mandatory integers.
1388 // // - bklt=false: omitted.
1389 // "gb": 0, "gbk": 0,
1391 // // The bytes and blocks at t-end for this PP.
1392 // // - bklt=true: mandatory integers.
1393 // // - bklt=false: omitted.
1394 // "eb": 0, "ebk": 0,
1396 // // The reads and writes of blocks for this PP.
1397 // // - bkacc=true: mandatory integers.
1398 // // - bkacc=false: omitted.
1399 // "rb": 41, "wb": 5,
1401 // // The exact accesses of blocks for this PP. Only used when all
1402 // // allocations are the same size and sufficiently small. A negative
1403 // // element indicates run-length encoding of the following integer.
1404 // // E.g. `-3, 4` means "three 4s in a row".
1405 // // - bkacc=true: an optional array of integers.
1406 // // - bkacc=false: omitted.
1407 // "acc": [5, -3, 4, 2],
1409 // // Frames. Each element is an index into the "ftbl" array below.
1410 // // - All modes: A mandatory array of integers.
1411 // "fs": [1, 2, 3]
1412 // }
1413 // ],
1415 // // Frame table. A mandatory array of strings.
1416 // "ftbl": [
1417 // "[root]",
1418 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1419 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1420 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1421 // ]
1422 // }
1424 static VgFile* fp;
1426 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1428 // The frame table holds unique frames.
1429 static WordFM* frame_tbl = NULL;
1430 static UWord next_frame_n = 0;
1432 static Word frame_cmp(UWord a, UWord b)
1434 return VG_(strcmp)((const HChar*)a, (const HChar*)b);
1437 static HChar hex_digit_to_ascii_char(UChar d)
1439 d = d & 0xf;
1440 return (d < 10) ? ('0' + d) : ('a' + (d - 10));
1443 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1445 // Returns the original string if no escaping was required. Returns a pointer
1446 // to a static buffer if escaping was required. Therefore, the return value is
1447 // only valid until the next call to this function.
1448 static const HChar* json_escape(const HChar* s)
1450 static HChar* buf = NULL;
1451 static SizeT bufcap = 0;
1453 // Do we need any escaping?
1454 SizeT extra = 0;
1455 const HChar* p = s;
1456 while (*p) {
1457 UChar c = *p;
1458 if (c == '"' || c == '\\') {
1459 extra += 1;
1460 } else if (c <= 0x1f) {
1461 extra += 5;
1463 p++;
1465 SizeT len = p - s;
1467 if (extra == 0) {
1468 // No escaping needed.
1469 return s;
1472 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1473 // necessary.
1474 SizeT newcap = len + extra + 1;
1475 if (bufcap < newcap) {
1476 buf = VG_(realloc)("dh.json", buf, newcap);
1477 bufcap = newcap;
1480 p = s;
1481 HChar* q = buf;
1482 while (*p) {
1483 UChar c = *p;
1484 if (c == '"') {
1485 *q++ = '\\';
1486 *q++ = '"';
1487 } else if (c == '\\') {
1488 *q++ = '\\';
1489 *q++ = '\\';
1490 } else if (c <= 0x1f) {
1491 *q++ = '\\';
1492 *q++ = 'u';
1493 *q++ = '0';
1494 *q++ = '0';
1495 *q++ = hex_digit_to_ascii_char((c & 0x00f0) >> 4);
1496 *q++ = hex_digit_to_ascii_char(c & 0x000f);
1497 } else {
1498 *q++ = c;
1500 p++;
1502 *q = '\0';
1504 return buf;
1507 static void write_PPInfo_frame(UInt n, DiEpoch ep, Addr ip, void* opaque)
1509 Bool* is_first = (Bool*)opaque;
1510 InlIPCursor* iipc = VG_(new_IIPC)(ep, ip);
1512 do {
1513 const HChar* buf = VG_(describe_IP)(ep, ip, iipc);
1515 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1516 // `realloc`, `operator new`) because they're boring and clog up the
1517 // output.
1518 if (VG_(strstr)(buf, "vg_replace_malloc.c")) {
1519 continue;
1522 // If this description has been seen before, get its number. Otherwise,
1523 // give it a new number and put it in the table.
1524 UWord keyW = 0, valW = 0;
1525 UWord frame_n = 0;
1526 Bool found = VG_(lookupFM)(frame_tbl, &keyW, &valW, (UWord)buf);
1527 if (found) {
1528 //const HChar* str = (const HChar*)keyW;
1529 //tl_assert(0 == VG_(strcmp)(buf, str));
1530 frame_n = valW;
1531 } else {
1532 // `buf` is a static buffer, we must copy it.
1533 const HChar* str = VG_(strdup)("dh.frame_tbl.3", buf);
1534 frame_n = next_frame_n++;
1535 Bool present = VG_(addToFM)(frame_tbl, (UWord)str, frame_n);
1536 tl_assert(!present);
1539 FP("%c%lu", *is_first ? '[' : ',', frame_n);
1540 *is_first = False;
1542 } while (VG_(next_IIPC)(iipc));
1544 VG_(delete_IIPC)(iipc);
1547 static void write_PPInfo(PPInfo* ppi, Bool is_first)
1549 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1550 is_first ? '[' : ',',
1551 ppi->total_bytes, ppi->total_blocks);
1553 if (clo_mode == Heap) {
1554 tl_assert(ppi->total_blocks >= ppi->max_blocks);
1555 tl_assert(ppi->total_bytes >= ppi->max_bytes);
1557 FP(" ,\"tl\":%llu\n",
1558 ppi->total_lifetimes_instrs);
1559 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1560 ppi->max_bytes, ppi->max_blocks);
1561 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1562 ppi->at_tgmax_bytes, ppi->at_tgmax_blocks);
1563 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1564 ppi->curr_bytes, ppi->curr_blocks);
1565 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1566 ppi->reads_bytes, ppi->writes_bytes);
1568 if (ppi->histo && ppi->xsize_tag == Exactly) {
1569 FP(" ,\"acc\":[");
1571 // Simple run-length encoding: when N entries in a row have the same
1572 // value M, we print "-N,M". If there is just one in a row, we just
1573 // print "M". This reduces file size significantly.
1574 UShort repval = 0;
1575 Int reps = 0;
1576 for (UWord i = 0; i < ppi->xsize; i++) {
1577 UShort h = ppi->histo[i];
1578 if (repval == h) {
1579 // Continue current run.
1580 reps++;
1581 } else {
1582 // End of run; print it.
1583 if (reps == 1) {
1584 FP("%u,", repval);
1585 } else if (reps > 1) {
1586 FP("-%d,%u,", reps, repval);
1588 reps = 1;
1589 repval = h;
1592 // Print the final run.
1593 if (reps == 1) {
1594 FP("%u", repval);
1595 } else if (reps > 1) {
1596 FP("-%d,%u", reps, repval);
1599 FP("]\n");
1601 } else {
1602 tl_assert(ppi->curr_bytes == 0);
1603 tl_assert(ppi->curr_blocks == 0);
1604 tl_assert(ppi->max_bytes == 0);
1605 tl_assert(ppi->max_blocks == 0);
1606 tl_assert(ppi->at_tgmax_bytes == 0);
1607 tl_assert(ppi->at_tgmax_blocks == 0);
1608 tl_assert(ppi->total_lifetimes_instrs == 0);
1609 tl_assert(ppi->freed_blocks == 0);
1610 tl_assert(ppi->reads_bytes == 0);
1611 tl_assert(ppi->writes_bytes == 0);
1612 tl_assert(ppi->xsize_tag == 0);
1613 tl_assert(ppi->xsize == 0);
1614 tl_assert(ppi->histo == NULL);
1617 FP(" ,\"fs\":");
1618 Bool is_first_frame = True;
1619 VG_(apply_ExeContext)(write_PPInfo_frame, &is_first_frame, ppi->ec);
1620 FP("]\n");
1622 FP(" }\n");
1625 static void write_PPInfos(void)
1627 UWord keyW, valW;
1629 FP(",\"pps\":\n");
1631 VG_(initIterFM)(ppinfo);
1632 Bool is_first = True;
1633 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
1634 PPInfo* ppi = (PPInfo*)valW;
1635 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
1636 write_PPInfo(ppi, is_first);
1637 is_first = False;
1639 VG_(doneIterFM)(ppinfo);
1641 if (is_first) {
1642 // We didn't print any elements. This happens if ppinfo is empty.
1643 FP(" [\n");
1646 FP(" ]\n");
1649 static void dh_fini(Int exit_status)
1651 // This function does lots of allocations that it doesn't bother to free,
1652 // because execution is almost over anyway.
1654 UWord keyW, valW;
1656 // Total bytes might be at a possible peak.
1657 if (clo_mode == Heap) {
1658 check_for_peak();
1660 // Before printing statistics, we must harvest various stats (such as
1661 // lifetimes and accesses) for all the blocks that are still alive.
1662 VG_(initIterFM)( interval_tree );
1663 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1664 Block* bk = (Block*)keyW;
1665 tl_assert(valW == 0);
1666 tl_assert(bk);
1667 retire_Block(bk, False/*!because_freed*/);
1669 VG_(doneIterFM)( interval_tree );
1671 // Stats.
1672 if (VG_(clo_stats)) {
1673 VG_(dmsg)(" dhat: find_Block_containing:\n");
1674 VG_(dmsg)(" dhat: found: %'lu\n",
1675 stats__n_fBc_cached0 + stats__n_fBc_cached1
1676 + stats__n_fBc_cached2
1677 + stats__n_fBc_uncached);
1678 VG_(dmsg)(" dhat: at cache0 %'14lu at cache1 %'14lu\n",
1679 stats__n_fBc_cached0,
1680 stats__n_fBc_cached1);
1681 VG_(dmsg)(" dhat: at cache2 %'14lu uncached %'14lu\n",
1682 stats__n_fBc_cached2,
1683 stats__n_fBc_uncached);
1684 VG_(dmsg)(" dhat: notfound: %'lu\n", stats__n_fBc_notfound);
1685 VG_(dmsg)("\n");
1689 // Create the frame table, and insert the special "[root]" node at index 0.
1690 frame_tbl = VG_(newFM)(VG_(malloc),
1691 "dh.frame_tbl.1",
1692 VG_(free),
1693 frame_cmp);
1694 const HChar* root = VG_(strdup)("dh.frame_tbl.2", "[root]");
1695 Bool present = VG_(addToFM)(frame_tbl, (UWord)root, 0);
1696 tl_assert(!present);
1697 next_frame_n = 1;
1699 // Setup output filename. Nb: it's important to do this now, i.e. as late
1700 // as possible. If we do it at start-up and the program forks and the
1701 // output file format string contains a %p (pid) specifier, both the parent
1702 // and child will incorrectly write to the same file; this happened in
1703 // 3.3.0.
1704 HChar* dhat_out_file =
1705 VG_(expand_file_name)("--dhat-out-file", clo_dhat_out_file);
1707 fp = VG_(fopen)(dhat_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1708 VKI_S_IRUSR|VKI_S_IWUSR);
1709 if (!fp) {
1710 VG_(umsg)("error: can't open DHAT output file '%s'\n", dhat_out_file);
1711 VG_(free)(dhat_out_file);
1712 return;
1715 // Write to data file.
1716 FP("{\"dhatFileVersion\":2\n");
1718 // The output mode, block booleans, and byte/block units.
1719 if (clo_mode == Heap) {
1720 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1721 FP(",\"bklt\":true,\"bkacc\":true\n");
1722 } else if (clo_mode == Copy) {
1723 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1724 FP(",\"bklt\":false,\"bkacc\":false\n");
1725 } else if (clo_mode == AdHoc) {
1726 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1727 FP(",\"bklt\":false,\"bkacc\":false\n");
1728 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1729 } else {
1730 tl_assert(False);
1733 // The time units.
1734 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1735 if (clo_mode == Heap) {
1736 FP(",\"tuth\":500\n");
1739 // The command.
1740 const HChar* exe = VG_(args_the_exename);
1741 FP(",\"cmd\":\"%s", json_escape(exe));
1742 for (Word i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
1743 const HChar* arg = *(HChar**)VG_(indexXA)(VG_(args_for_client), i);
1744 FP(" %s", json_escape(arg));
1746 FP("\"\n");
1748 // The PID.
1749 FP(",\"pid\":%d\n", VG_(getpid)());
1751 // Times.
1752 FP(",\"te\":%llu\n", g_curr_instrs);
1753 if (clo_mode == Heap) {
1754 FP(",\"tg\":%llu\n", g_tgmax_instrs);
1755 } else {
1756 tl_assert(g_tgmax_instrs == 0);
1759 // APs.
1760 write_PPInfos();
1762 // Frame table.
1763 FP(",\"ftbl\":\n");
1765 // The frame table maps strings to numbers. We want to print it ordered by
1766 // numbers. So we create an array and fill it in from the frame table, then
1767 // print that.
1768 UWord n_frames = next_frame_n;
1769 const HChar** frames =
1770 VG_(malloc)("dh.frames", n_frames * sizeof(const HChar*));
1771 VG_(initIterFM)(frame_tbl);
1772 while (VG_(nextIterFM)(frame_tbl, &keyW, &valW)) {
1773 const HChar* str = (const HChar*)keyW;
1774 UWord n = valW;
1775 frames[n] = str;
1777 VG_(doneIterFM)(frame_tbl);
1779 for (UWord i = 0; i < n_frames; i++) {
1780 FP(" %c\"%s\"\n", i == 0 ? '[' : ',', json_escape(frames[i]));
1782 FP(" ]\n");
1783 VG_(free)(frames);
1785 FP("}\n");
1787 VG_(fclose)(fp);
1788 fp = NULL;
1790 if (VG_(clo_verbosity) == 0) {
1791 return;
1794 // Print brief global stats.
1795 VG_(umsg)("Total: %'llu %s in %'llu %s\n",
1796 g_total_bytes, clo_mode == AdHoc ? "units" : "bytes",
1797 g_total_blocks, clo_mode == AdHoc ? "events" : "blocks");
1798 if (clo_mode == Heap) {
1799 VG_(umsg)("At t-gmax: %'llu bytes in %'llu blocks\n",
1800 g_max_bytes, g_max_blocks);
1801 VG_(umsg)("At t-end: %'llu bytes in %'llu blocks\n",
1802 g_curr_bytes, g_curr_blocks);
1803 VG_(umsg)("Reads: %'llu bytes\n", g_reads_bytes);
1804 VG_(umsg)("Writes: %'llu bytes\n", g_writes_bytes);
1805 } else {
1806 tl_assert(g_max_bytes == 0);
1807 tl_assert(g_max_blocks == 0);
1808 tl_assert(g_curr_bytes == 0);
1809 tl_assert(g_curr_blocks == 0);
1810 tl_assert(g_reads_bytes == 0);
1811 tl_assert(g_writes_bytes == 0);
1814 // Print a how-to-view-the-profile hint.
1815 VG_(umsg)("\n");
1816 VG_(umsg)("To view the resulting profile, open\n");
1817 VG_(umsg)(" file://%s/%s\n", DHAT_VIEW_DIR, "dh_view.html");
1818 VG_(umsg)("in a web browser, click on \"Load...\", "
1819 "and then select the file\n");
1820 VG_(umsg)(" %s\n", dhat_out_file);
1821 VG_(umsg)("The text at the bottom explains the abbreviations used in the "
1822 "output.\n");
1824 VG_(free)(dhat_out_file);
1827 //------------------------------------------------------------//
1828 //--- Initialisation ---//
1829 //------------------------------------------------------------//
1831 static void dh_post_clo_init(void)
1833 if (clo_mode == Heap) {
1834 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1835 VG_(track_pre_mem_read_asciiz) ( dh_handle_noninsn_read_asciiz );
1836 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1840 static void dh_pre_clo_init(void)
1842 VG_(details_name) ("DHAT");
1843 VG_(details_version) (NULL);
1844 VG_(details_description) ("a dynamic heap analysis tool");
1845 VG_(details_copyright_author)(
1846 "Copyright (C) 2010-2024, and GNU GPL'd, by Mozilla Foundation et al.");
1847 VG_(details_bug_reports_to) (VG_BUGS_TO);
1848 VG_(details_avg_translation_sizeB) ( 600 );
1850 // Basic functions.
1851 VG_(basic_tool_funcs) (dh_post_clo_init,
1852 dh_instrument,
1853 dh_fini);
1855 // Needs.
1856 VG_(needs_libc_freeres)();
1857 VG_(needs_cxx_freeres)();
1858 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1859 dh_print_usage,
1860 dh_print_debug_usage);
1861 VG_(needs_client_requests) (dh_handle_client_request);
1862 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1863 // dh_expensive_sanity_check);
1864 VG_(needs_malloc_replacement)(dh_malloc,
1865 dh___builtin_new,
1866 dh___builtin_new_aligned,
1867 dh___builtin_vec_new,
1868 dh___builtin_vec_new_aligned,
1869 dh_memalign,
1870 dh_calloc,
1871 dh_free,
1872 dh___builtin_delete,
1873 dh___builtin_delete_aligned,
1874 dh___builtin_vec_delete,
1875 dh___builtin_vec_delete_aligned,
1876 dh_realloc,
1877 dh_malloc_usable_size,
1878 0 );
1880 tl_assert(!interval_tree);
1881 tl_assert(!fbc_cache0);
1882 tl_assert(!fbc_cache1);
1883 tl_assert(!fbc_cache2);
1885 interval_tree = VG_(newFM)( VG_(malloc),
1886 "dh.interval_tree.1",
1887 VG_(free),
1888 interval_tree_Cmp );
1890 ppinfo = VG_(newFM)( VG_(malloc),
1891 "dh.ppinfo.1",
1892 VG_(free),
1893 NULL/*unboxedcmp*/ );
1896 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1898 //--------------------------------------------------------------------//
1899 //--- end dh_main.c ---//
1900 //--------------------------------------------------------------------//