2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
48 #define USER_HISTOGRAM_SIZE_LIMIT 25*HISTOGRAM_SIZE_LIMIT
50 //------------------------------------------------------------//
52 //------------------------------------------------------------//
54 // Values for the entire run.
55 static ULong g_total_blocks
= 0;
56 static ULong g_total_bytes
= 0;
58 // Current values. g_curr_blocks and g_curr_bytes are only used with
60 static ULong g_curr_blocks
= 0;
61 static ULong g_curr_bytes
= 0;
62 static ULong g_curr_instrs
= 0; // incremented from generated code
64 // Values at the global max, i.e. when g_curr_bytes peaks.
65 // Only used with clo_mode=Heap.
66 static ULong g_max_blocks
= 0;
67 static ULong g_max_bytes
= 0;
69 // Time of the global max.
70 static ULong g_tgmax_instrs
= 0;
72 // Values for the entire run. Updated each time a block is retired.
73 // Only used with clo_mode=Heap.
74 static ULong g_reads_bytes
= 0;
75 static ULong g_writes_bytes
= 0;
77 //------------------------------------------------------------//
78 //--- Command line args ---//
79 //------------------------------------------------------------//
81 typedef enum { Heap
=55, Copy
, AdHoc
} ProfileKind
;
83 static ProfileKind clo_mode
= Heap
;
85 static const HChar
* clo_dhat_out_file
= "dhat.out.%p";
87 static Bool
dh_process_cmd_line_option(const HChar
* arg
)
89 if VG_STR_CLO(arg
, "--dhat-out-file", clo_dhat_out_file
) {
91 } else if (VG_XACT_CLO(arg
, "--mode=heap", clo_mode
, Heap
)) {
92 } else if (VG_XACT_CLO(arg
, "--mode=copy", clo_mode
, Copy
)) {
93 } else if (VG_XACT_CLO(arg
, "--mode=ad-hoc", clo_mode
, AdHoc
)) {
96 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
102 static void dh_print_usage(void)
105 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
106 " --mode=heap|copy|ad-hoc profiling mode\n"
110 static void dh_print_debug_usage(void)
117 //------------------------------------------------------------//
118 //--- an Interval Tree of live blocks ---//
119 //------------------------------------------------------------//
121 /* Tracks information about live blocks. */
126 ExeContext
* ec
; /* allocation ec */
127 ULong allocd_at
; /* instruction number */
130 /* Approx histogram, one byte per payload byte. Counts latch up
131 therefore at 0xFFFF. Can be NULL if the block is resized or if
132 the block is larger than HISTOGRAM_SIZE_LIMIT. */
133 UShort
* histoW
; /* [0 .. req_szB-1] */
137 /* May not contain zero-sized blocks. May not contain
138 overlapping blocks. */
139 static WordFM
* interval_tree
= NULL
; /* WordFM* Block* void */
141 /* Here's the comparison function. Since the tree is required
142 to contain non-zero sized, non-overlapping blocks, it's good
143 enough to consider any overlap as a match. */
144 static Word
interval_tree_Cmp ( UWord k1
, UWord k2
)
146 Block
* b1
= (Block
*)k1
;
147 Block
* b2
= (Block
*)k2
;
148 tl_assert(b1
->req_szB
> 0);
149 tl_assert(b2
->req_szB
> 0);
150 if (b1
->payload
+ b1
->req_szB
<= b2
->payload
) return -1;
151 if (b2
->payload
+ b2
->req_szB
<= b1
->payload
) return 1;
155 // 3-entry cache for find_Block_containing
156 static Block
* fbc_cache0
= NULL
;
157 static Block
* fbc_cache1
= NULL
;
158 static Block
* fbc_cache2
= NULL
;
160 static UWord stats__n_fBc_cached0
= 0;
161 static UWord stats__n_fBc_cached1
= 0;
162 static UWord stats__n_fBc_cached2
= 0;
163 static UWord stats__n_fBc_uncached
= 0;
164 static UWord stats__n_fBc_notfound
= 0;
166 static Block
* find_Block_containing ( Addr a
)
168 tl_assert(clo_mode
== Heap
);
170 if (LIKELY(fbc_cache0
171 && fbc_cache0
->payload
<= a
172 && a
< fbc_cache0
->payload
+ fbc_cache0
->req_szB
)) {
174 stats__n_fBc_cached0
++;
177 if (LIKELY(fbc_cache1
178 && fbc_cache1
->payload
<= a
179 && a
< fbc_cache1
->payload
+ fbc_cache1
->req_szB
)) {
180 // found at 1; swap 0 and 1
181 Block
* tmp
= fbc_cache1
;
182 fbc_cache1
= fbc_cache0
;
184 stats__n_fBc_cached1
++;
187 if (LIKELY(fbc_cache2
188 && fbc_cache2
->payload
<= a
189 && a
< fbc_cache2
->payload
+ fbc_cache2
->req_szB
)) {
190 // found at 2; swap 1 and 2
191 Block
* tmp
= fbc_cache2
;
192 fbc_cache2
= fbc_cache1
;
194 stats__n_fBc_cached2
++;
203 Bool found
= VG_(lookupFM
)( interval_tree
,
204 &foundkey
, &foundval
, (UWord
)&fake
);
206 stats__n_fBc_notfound
++;
209 tl_assert(foundval
== 0); // we don't store vals in the interval tree
210 tl_assert(foundkey
!= 1);
211 Block
* res
= (Block
*)foundkey
;
212 tl_assert(res
!= &fake
);
213 // put at the top position
214 fbc_cache2
= fbc_cache1
;
215 fbc_cache1
= fbc_cache0
;
217 stats__n_fBc_uncached
++;
221 // delete a block; asserts if not found. (viz, 'a' must be
222 // known to be present.)
223 static void delete_Block_starting_at ( Addr a
)
225 tl_assert(clo_mode
== Heap
);
230 Bool found
= VG_(delFromFM
)( interval_tree
,
231 NULL
, NULL
, (Addr
)&fake
);
233 fbc_cache0
= fbc_cache1
= fbc_cache2
= NULL
;
236 //------------------------------------------------------------//
237 //--- a FM of allocation points (APs) ---//
238 //------------------------------------------------------------//
242 // The program point that we're summarising stats for.
245 // Total number of blocks and bytes allocated by this PP.
249 // The current number of blocks and bytes live for this PP.
250 // Only used with clo_mode=Heap.
254 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
255 // Only used with clo_mode=Heap.
256 ULong max_blocks
; // Blocks at the PP max.
257 ULong max_bytes
; // The PP max, measured in bytes.
259 // Values at the global max.
260 // Only used with clo_mode=Heap.
261 ULong at_tgmax_blocks
;
262 ULong at_tgmax_bytes
;
264 // Total lifetimes of all blocks allocated by this PP. Includes blocks
265 // explicitly freed and blocks implicitly freed at termination.
266 // Only used with clo_mode=Heap.
267 ULong total_lifetimes_instrs
;
269 // Number of blocks freed by this PP. (Only used in assertions.)
270 // Only used with clo_mode=Heap.
273 // Total number of reads and writes in all blocks allocated
274 // by this PP. Only used with clo_mode=Heap.
278 /* Histogram information. We maintain a histogram aggregated for
279 all retiring Blocks allocated by this PP, but only if:
280 - this PP has only ever allocated objects of one size
281 - that size is <= HISTOGRAM_SIZE_LIMIT
282 What we need therefore is a mechanism to see if this PP
283 has only ever allocated blocks of one size.
286 Unknown because no retirement yet
287 Exactly xsize all retiring blocks are of this size
288 Mixed multiple different sizes seen
290 Only used with clo_mode=Heap.
292 enum { Unknown
=999, Exactly
, Mixed
} xsize_tag
;
294 UInt
* histo
; /* [0 .. xsize-1] */
298 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
299 .ec field in the values. */
300 static WordFM
* ppinfo
= NULL
; /* WordFM* ExeContext* PPInfo* */
302 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
303 // all PPInfos. Note that this is moderately expensive so we avoid calling it
304 // on every allocation.
305 static void check_for_peak(void)
307 tl_assert(clo_mode
== Heap
);
309 if (g_curr_bytes
== g_max_bytes
) {
310 // It's a peak. (If there are multiple equal peaks we record the latest
313 VG_(initIterFM
)(ppinfo
);
314 while (VG_(nextIterFM
)(ppinfo
, &keyW
, &valW
)) {
315 PPInfo
* ppi
= (PPInfo
*)valW
;
316 tl_assert(ppi
&& ppi
->ec
== (ExeContext
*)keyW
);
317 ppi
->at_tgmax_blocks
= ppi
->curr_blocks
;
318 ppi
->at_tgmax_bytes
= ppi
->curr_bytes
;
320 VG_(doneIterFM
)(ppinfo
);
324 /* 'bk' is being introduced (has just been allocated). Find the
325 relevant PPInfo entry for it, or create one, based on the block's
326 allocation EC. Then, update the PPInfo to the extent that we
327 actually can, to reflect the allocation. */
328 static void intro_Block(Block
* bk
)
336 Bool found
= VG_(lookupFM
)( ppinfo
,
337 &keyW
, &valW
, (UWord
)bk
->ec
);
340 tl_assert(keyW
== (UWord
)bk
->ec
);
342 ppi
= VG_(malloc
)( "dh.intro_Block.1", sizeof(PPInfo
) );
343 VG_(memset
)(ppi
, 0, sizeof(*ppi
));
345 Bool present
= VG_(addToFM
)( ppinfo
,
346 (UWord
)bk
->ec
, (UWord
)ppi
);
348 if (clo_mode
== Heap
) {
350 tl_assert(ppi
->freed_blocks
== 0);
351 ppi
->xsize_tag
= Unknown
;
353 if (0) VG_(printf
)("ppi %p --> Unknown\n", ppi
);
357 tl_assert(ppi
->ec
== bk
->ec
);
359 // Update global stats and PPInfo stats.
362 g_total_bytes
+= bk
->req_szB
;
365 ppi
->total_bytes
+= bk
->req_szB
;
367 if (clo_mode
== Heap
) {
369 g_curr_bytes
+= bk
->req_szB
;
372 ppi
->curr_bytes
+= bk
->req_szB
;
374 // The use of `>=` rather than `>` means that if there are multiple equal
375 // peaks we record the latest one, like `check_for_peak` does.
376 if (g_curr_bytes
>= g_max_bytes
) {
377 g_max_blocks
= g_curr_blocks
;
378 g_max_bytes
= g_curr_bytes
;
379 g_tgmax_instrs
= g_curr_instrs
;
381 ppi
->max_blocks
= ppi
->curr_blocks
;
382 ppi
->max_bytes
= ppi
->curr_bytes
;
387 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
388 it, which must already exist. Then, fold info from 'bk' into that
389 entry. 'because_freed' is True if the block is retiring because
390 the client has freed it. If it is False then the block is retiring
391 because the program has finished, in which case we want to skip the
392 updates of the total blocks live etc for this PP, but still fold in
393 the access counts and histo data that have so far accumulated for
395 static void retire_Block(Block
* bk
, Bool because_freed
)
397 tl_assert(clo_mode
== Heap
);
404 Bool found
= VG_(lookupFM
)( ppinfo
,
405 &keyW
, &valW
, (UWord
)bk
->ec
);
408 tl_assert(ppi
->ec
== bk
->ec
);
410 // update stats following this free.
412 VG_(printf
)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
413 bk
->ec
, ppi
->curr_bytes
, (ULong
)bk
->req_szB
);
416 // Total bytes is coming down from a possible peak.
419 // Then update global stats.
420 tl_assert(g_curr_blocks
>= 1);
421 tl_assert(g_curr_bytes
>= bk
->req_szB
);
423 g_curr_bytes
-= bk
->req_szB
;
425 // Then update PPInfo stats.
426 tl_assert(ppi
->curr_blocks
>= 1);
427 tl_assert(ppi
->curr_bytes
>= bk
->req_szB
);
429 ppi
->curr_bytes
-= bk
->req_szB
;
434 tl_assert(bk
->allocd_at
<= g_curr_instrs
);
435 ppi
->total_lifetimes_instrs
+= (g_curr_instrs
- bk
->allocd_at
);
438 ppi
->reads_bytes
+= bk
->reads_bytes
;
439 ppi
->writes_bytes
+= bk
->writes_bytes
;
440 g_reads_bytes
+= bk
->reads_bytes
;
441 g_writes_bytes
+= bk
->writes_bytes
;
443 // histo stuff. First, do state transitions for xsize/xsize_tag.
444 switch (ppi
->xsize_tag
) {
447 tl_assert(ppi
->xsize
== 0);
448 tl_assert(ppi
->freed_blocks
== 1 || ppi
->freed_blocks
== 0);
449 tl_assert(!ppi
->histo
);
450 ppi
->xsize_tag
= Exactly
;
451 ppi
->xsize
= bk
->req_szB
;
452 if (0) VG_(printf
)("ppi %p --> Exactly(%lu)\n", ppi
, ppi
->xsize
);
453 // and allocate the histo
455 ppi
->histo
= VG_(malloc
)("dh.retire_Block.1",
456 ppi
->xsize
* sizeof(UInt
));
457 VG_(memset
)(ppi
->histo
, 0, ppi
->xsize
* sizeof(UInt
));
462 //tl_assert(ppi->freed_blocks > 1);
463 if (bk
->req_szB
!= ppi
->xsize
) {
464 if (0) VG_(printf
)("ppi %p --> Mixed(%lu -> %lu)\n",
465 ppi
, ppi
->xsize
, bk
->req_szB
);
466 ppi
->xsize_tag
= Mixed
;
468 // deallocate the histo, if any
470 VG_(free
)(ppi
->histo
);
477 //tl_assert(ppi->freed_blocks > 1);
484 // See if we can fold the histo data from this block into
485 // the data for the PP.
486 if (ppi
->xsize_tag
== Exactly
&& ppi
->histo
&& bk
->histoW
) {
487 tl_assert(ppi
->xsize
== bk
->req_szB
);
489 for (i
= 0; i
< ppi
->xsize
; i
++) {
490 // FIXME: do something better in case of overflow of ppi->histo[..]
491 // Right now, at least don't let it overflow/wrap around
492 if (ppi
->histo
[i
] <= 0xFFFE0000)
493 ppi
->histo
[i
] += (UInt
)bk
->histoW
[i
];
495 if (0) VG_(printf
)("fold in, PP = %p\n", ppi
);
500 VG_(printf
)("block retiring, histo %lu: ", bk
->req_szB
);
502 for (i
= 0; i
< bk
->req_szB
; i
++)
503 VG_(printf
)("%u ", (UInt
)bk
->histoB
[i
]);
506 VG_(printf
)("block retiring, no histo %lu\n", bk
->req_szB
);
511 /* This handles block resizing. When a block with PP 'ec' has a
512 size change of 'delta', call here to update the PPInfo. */
513 static void resize_Block(ExeContext
* ec
, SizeT old_req_szB
, SizeT new_req_szB
)
515 tl_assert(clo_mode
== Heap
);
517 Long delta
= (Long
)new_req_szB
- (Long
)old_req_szB
;
521 Bool found
= VG_(lookupFM
)( ppinfo
,
522 &keyW
, &valW
, (UWord
)ec
);
526 tl_assert(ppi
->ec
== ec
);
529 tl_assert(ppi
->curr_bytes
>= -delta
);
530 tl_assert(g_curr_bytes
>= -delta
);
532 // Total bytes might be coming down from a possible peak.
536 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
537 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
539 // A reasonable alternative would be to leave total_blocks unchanged and
540 // increment total_bytes by delta (but only if delta is positive). But then
541 // calls to realloc wouldn't be counted towards the total_blocks count,
542 // which is undesirable.
544 // Update global stats and PPInfo stats.
547 g_total_bytes
+= new_req_szB
;
550 ppi
->total_bytes
+= new_req_szB
;
552 g_curr_blocks
+= 0; // unchanged
553 g_curr_bytes
+= delta
;
555 ppi
->curr_blocks
+= 0; // unchanged
556 ppi
->curr_bytes
+= delta
;
558 // The use of `>=` rather than `>` means that if there are multiple equal
559 // peaks we record the latest one, like `check_for_peak` does.
560 if (g_curr_bytes
>= g_max_bytes
) {
561 g_max_blocks
= g_curr_blocks
;
562 g_max_bytes
= g_curr_bytes
;
563 g_tgmax_instrs
= g_curr_instrs
;
565 ppi
->max_blocks
= ppi
->curr_blocks
;
566 ppi
->max_bytes
= ppi
->curr_bytes
;
570 //------------------------------------------------------------//
571 //--- update both Block and PPInfos after {m,re}alloc/free ---//
572 //------------------------------------------------------------//
575 void* new_block ( ThreadId tid
, void* p
, SizeT req_szB
, SizeT req_alignB
,
578 tl_assert(p
== NULL
); // don't handle custom allocators right now
581 if ((SSizeT
)req_szB
< 0) return NULL
;
584 req_szB
= 1; /* can't allow zero-sized blocks in the interval tree */
587 // Allocate and zero if necessary
589 p
= VG_(cli_malloc
)( req_alignB
, req_szB
);
593 if (is_zeroed
) VG_(memset
)(p
, 0, req_szB
);
594 actual_szB
= VG_(cli_malloc_usable_size
)(p
);
595 tl_assert(actual_szB
>= req_szB
);
598 if (clo_mode
!= Heap
) {
602 // Make new Block, add to interval_tree.
603 Block
* bk
= VG_(malloc
)("dh.new_block.1", sizeof(Block
));
604 bk
->payload
= (Addr
)p
;
605 bk
->req_szB
= req_szB
;
606 bk
->ec
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
607 bk
->allocd_at
= g_curr_instrs
;
609 bk
->writes_bytes
= 0;
610 // Set up histogram array, if the block isn't too large.
612 if (req_szB
<= HISTOGRAM_SIZE_LIMIT
) {
613 bk
->histoW
= VG_(malloc
)("dh.new_block.2", req_szB
* sizeof(UShort
));
614 VG_(memset
)(bk
->histoW
, 0, req_szB
* sizeof(UShort
));
617 Bool present
= VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
619 fbc_cache0
= fbc_cache1
= fbc_cache2
= NULL
;
627 void die_block ( void* p
)
631 if (clo_mode
!= Heap
) {
635 Block
* bk
= find_Block_containing( (Addr
)p
);
637 return; // bogus free
640 tl_assert(bk
->req_szB
> 0);
641 // assert the block finder is behaving sanely
642 tl_assert(bk
->payload
<= (Addr
)p
);
643 tl_assert( (Addr
)p
< bk
->payload
+ bk
->req_szB
);
645 if (bk
->payload
!= (Addr
)p
) {
646 return; // bogus free
649 retire_Block(bk
, True
/*because_freed*/);
651 delete_Block_starting_at( bk
->payload
);
653 VG_(free
)( bk
->histoW
);
660 void* renew_block ( ThreadId tid
, void* p_old
, SizeT new_req_szB
)
664 tl_assert(new_req_szB
> 0); // map 0 to 1
666 if (clo_mode
!= Heap
) {
667 SizeT old_actual_szB
= VG_(cli_malloc_usable_size
)(p_old
);
668 p_new
= VG_(cli_malloc
)(VG_(clo_alignment
), new_req_szB
);
672 VG_(memmove
)(p_new
, p_old
, VG_MIN(old_actual_szB
, new_req_szB
));
673 VG_(cli_free
)(p_old
);
677 // Find the old block.
678 Block
* bk
= find_Block_containing( (Addr
)p_old
);
680 return NULL
; // bogus realloc
683 tl_assert(bk
->req_szB
> 0);
684 // Assert the block finder is behaving sanely.
685 tl_assert(bk
->payload
<= (Addr
)p_old
);
686 tl_assert( (Addr
)p_old
< bk
->payload
+ bk
->req_szB
);
688 if (bk
->payload
!= (Addr
)p_old
) {
689 return NULL
; // bogus realloc
692 // Keeping the histogram alive in any meaningful way across
693 // block resizing is too darn complicated. Just throw it away.
695 VG_(free
)(bk
->histoW
);
699 // Actually do the allocation, if necessary.
700 if (new_req_szB
<= bk
->req_szB
) {
701 // New size is smaller or same; block not moved.
702 resize_Block(bk
->ec
, bk
->req_szB
, new_req_szB
);
703 bk
->req_szB
= new_req_szB
;
705 // Update reads/writes for the implicit copy. Even though we didn't
706 // actually do a copy, we act like we did, to match up with the fact
707 // that we treat this as an additional allocation.
708 bk
->reads_bytes
+= new_req_szB
;
709 bk
->writes_bytes
+= new_req_szB
;
714 // New size is bigger; make new block, copy shared contents, free old.
715 p_new
= VG_(cli_malloc
)(VG_(clo_alignment
), new_req_szB
);
717 // Nb: if realloc fails, NULL is returned but the old block is not
718 // touched. What an awful function.
721 tl_assert(p_new
!= p_old
);
723 VG_(memcpy
)(p_new
, p_old
, bk
->req_szB
);
724 VG_(cli_free
)(p_old
);
726 // Since the block has moved, we need to re-insert it into the
727 // interval tree at the new place. Do this by removing
729 delete_Block_starting_at( (Addr
)p_old
);
730 // Now 'bk' is no longer in the tree, but the Block itself
733 // Update reads/writes for the copy.
734 bk
->reads_bytes
+= bk
->req_szB
;
735 bk
->writes_bytes
+= bk
->req_szB
;
737 // Update the metadata.
738 resize_Block(bk
->ec
, bk
->req_szB
, new_req_szB
);
739 bk
->payload
= (Addr
)p_new
;
740 bk
->req_szB
= new_req_szB
;
742 // And re-add it to the interval tree.
744 = VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
746 fbc_cache0
= fbc_cache1
= fbc_cache2
= NULL
;
752 //------------------------------------------------------------//
753 //--- malloc() et al replacement wrappers ---//
754 //------------------------------------------------------------//
756 static void* dh_malloc ( ThreadId tid
, SizeT szB
)
758 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
761 static void* dh___builtin_new ( ThreadId tid
, SizeT szB
)
763 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
766 static void* dh___builtin_new_aligned ( ThreadId tid
, SizeT szB
, SizeT alignB
, SizeT orig_alignB
)
768 return new_block( tid
, NULL
, szB
, alignB
, /*is_zeroed*/False
);
771 static void* dh___builtin_vec_new ( ThreadId tid
, SizeT szB
)
773 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
776 static void* dh___builtin_vec_new_aligned ( ThreadId tid
, SizeT szB
, SizeT alignB
, SizeT orig_alignB
)
778 return new_block( tid
, NULL
, szB
, alignB
, /*is_zeroed*/False
);
781 static void* dh_calloc ( ThreadId tid
, SizeT m
, SizeT szB
)
783 return new_block( tid
, NULL
, m
*szB
, VG_(clo_alignment
), /*is_zeroed*/True
);
786 static void *dh_memalign ( ThreadId tid
, SizeT alignB
, SizeT orig_alignB
, SizeT szB
)
788 return new_block( tid
, NULL
, szB
, alignB
, False
);
791 static void dh_free ( ThreadId tid
__attribute__((unused
)), void* p
)
796 static void dh___builtin_delete ( ThreadId tid
, void* p
)
801 static void dh___builtin_delete_aligned ( ThreadId tid
, void* p
, SizeT align
)
806 static void dh___builtin_vec_delete ( ThreadId tid
, void* p
)
811 static void dh___builtin_vec_delete_aligned ( ThreadId tid
, void* p
, SizeT align
)
816 static void* dh_realloc ( ThreadId tid
, void* p_old
, SizeT new_szB
)
819 return dh_malloc(tid
, new_szB
);
822 if (VG_(clo_realloc_zero_bytes_frees
) == True
) {
828 return renew_block(tid
, p_old
, new_szB
);
831 static SizeT
dh_malloc_usable_size ( ThreadId tid
, void* p
)
833 if (clo_mode
!= Heap
) {
834 return VG_(cli_malloc_usable_size
)(p
);
837 Block
* bk
= find_Block_containing( (Addr
)p
);
838 return bk
? bk
->req_szB
: 0;
841 //------------------------------------------------------------//
842 //--- memory references ---//
843 //------------------------------------------------------------//
846 void inc_histo_for_block ( Block
* bk
, Addr addr
, UWord szB
)
848 UWord i
, offMin
, offMax1
;
849 offMin
= addr
- bk
->payload
;
850 tl_assert(offMin
< bk
->req_szB
);
851 offMax1
= offMin
+ szB
;
852 if (offMax1
> bk
->req_szB
)
853 offMax1
= bk
->req_szB
;
854 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
855 for (i
= offMin
; i
< offMax1
; i
++) {
856 UShort n
= bk
->histoW
[i
];
863 void dh_handle_write ( Addr addr
, UWord szB
)
865 tl_assert(clo_mode
== Heap
);
867 Block
* bk
= find_Block_containing(addr
);
869 bk
->writes_bytes
+= szB
;
871 inc_histo_for_block(bk
, addr
, szB
);
876 void dh_handle_read ( Addr addr
, UWord szB
)
878 tl_assert(clo_mode
== Heap
);
880 Block
* bk
= find_Block_containing(addr
);
882 bk
->reads_bytes
+= szB
;
884 inc_histo_for_block(bk
, addr
, szB
);
888 // Handle reads and writes by syscalls (read == kernel
889 // reads user space, write == kernel writes user space).
890 // Assumes no such read or write spans a heap block
891 // boundary and so we can treat it just as one giant
894 void dh_handle_noninsn_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
895 Addr base
, SizeT size
)
897 tl_assert(clo_mode
== Heap
);
901 dh_handle_read(base
, size
);
903 case Vg_CoreSysCallArgInMem
:
905 case Vg_CoreTranslate
:
913 void dh_handle_noninsn_read_asciiz(CorePart part
, ThreadId tid
, const HChar
* s
,
916 tl_assert(clo_mode
== Heap
);
918 tl_assert(part
== Vg_CoreSysCall
);
919 dh_handle_noninsn_read(part
, tid
, s
, str
, VG_(strlen
)((const HChar
*)str
+1));
923 void dh_handle_noninsn_write ( CorePart part
, ThreadId tid
,
924 Addr base
, SizeT size
)
926 tl_assert(clo_mode
== Heap
);
930 case Vg_CoreClientReq
:
931 dh_handle_write(base
, size
);
940 //------------------------------------------------------------//
941 //--- Instrumentation ---//
942 //------------------------------------------------------------//
944 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
945 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
946 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
947 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
948 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
951 void add_counter_update(IRSB
* sbOut
, Int n
)
953 #if defined(VG_BIGENDIAN)
955 #elif defined(VG_LITTLEENDIAN)
958 # error "Unknown endianness"
960 // Add code to increment 'g_curr_instrs' by 'n', like this:
961 // WrTmp(t1, Load64(&g_curr_instrs))
962 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
963 // Store(&g_curr_instrs, t2)
964 IRTemp t1
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
965 IRTemp t2
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
966 IRExpr
* counter_addr
= mkIRExpr_HWord( (HWord
)&g_curr_instrs
);
968 IRStmt
* st1
= assign(t1
, IRExpr_Load(END
, Ity_I64
, counter_addr
));
969 IRStmt
* st2
= assign(t2
, binop(Iop_Add64
, mkexpr(t1
), mkU64(n
)));
970 IRStmt
* st3
= IRStmt_Store(END
, counter_addr
, mkexpr(t2
));
972 addStmtToIRSB( sbOut
, st1
);
973 addStmtToIRSB( sbOut
, st2
);
974 addStmtToIRSB( sbOut
, st3
);
978 void addMemEvent(IRSB
* sbOut
, Bool isWrite
, Int szB
, IRExpr
* addr
,
981 if (clo_mode
!= Heap
) {
985 IRType tyAddr
= Ity_INVALID
;
986 const HChar
* hName
= NULL
;
988 IRExpr
** argv
= NULL
;
991 const Int THRESH
= 4096 * 4; // somewhat arbitrary
992 const Int rz_szB
= VG_STACK_REDZONE_SZB
;
994 tyAddr
= typeOfIRExpr( sbOut
->tyenv
, addr
);
995 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
998 hName
= "dh_handle_write";
999 hAddr
= &dh_handle_write
;
1001 hName
= "dh_handle_read";
1002 hAddr
= &dh_handle_read
;
1005 argv
= mkIRExprVec_2( addr
, mkIRExpr_HWord(szB
) );
1007 /* Add the helper. */
1011 di
= unsafeIRDirty_0_N( 2/*regparms*/,
1012 hName
, VG_(fnptr_to_fnentry
)( hAddr
),
1015 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
1016 some arbitrary N. If that fails then addr is in the range (SP -
1017 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
1018 addr is within a page of SP and so can't possibly be a heap
1019 access, and so can be skipped. */
1020 IRTemp sp
= newIRTemp(sbOut
->tyenv
, tyAddr
);
1021 addStmtToIRSB( sbOut
, assign(sp
, IRExpr_Get(goff_sp
, tyAddr
)));
1023 IRTemp sp_minus_rz
= newIRTemp(sbOut
->tyenv
, tyAddr
);
1028 ? binop(Iop_Sub32
, mkexpr(sp
), mkU32(rz_szB
))
1029 : binop(Iop_Sub64
, mkexpr(sp
), mkU64(rz_szB
)))
1032 IRTemp diff
= newIRTemp(sbOut
->tyenv
, tyAddr
);
1037 ? binop(Iop_Sub32
, addr
, mkexpr(sp_minus_rz
))
1038 : binop(Iop_Sub64
, addr
, mkexpr(sp_minus_rz
)))
1041 IRTemp guard
= newIRTemp(sbOut
->tyenv
, Ity_I1
);
1046 ? binop(Iop_CmpLT32U
, mkU32(THRESH
), mkexpr(diff
))
1047 : binop(Iop_CmpLT64U
, mkU64(THRESH
), mkexpr(diff
)))
1049 di
->guard
= mkexpr(guard
);
1051 addStmtToIRSB( sbOut
, IRStmt_Dirty(di
) );
1055 IRSB
* dh_instrument ( VgCallbackClosure
* closure
,
1057 const VexGuestLayout
* layout
,
1058 const VexGuestExtents
* vge
,
1059 const VexArchInfo
* archinfo_host
,
1060 IRType gWordTy
, IRType hWordTy
)
1064 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
1066 const Int goff_sp
= layout
->offset_SP
;
1068 // We increment the instruction count in two places:
1069 // - just before any Ist_Exit statements;
1070 // - just before the IRSB's end.
1071 // In the former case, we zero 'n' and then continue instrumenting.
1073 sbOut
= deepCopyIRSBExceptStmts(sbIn
);
1075 // Copy verbatim any IR preamble preceding the first IMark
1077 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
1078 addStmtToIRSB( sbOut
, sbIn
->stmts
[i
] );
1082 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
1083 IRStmt
* st
= sbIn
->stmts
[i
];
1085 if (!st
|| st
->tag
== Ist_NoOp
) continue;
1096 // Add an increment before the Exit statement, then reset 'n'.
1097 add_counter_update(sbOut
, n
);
1104 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
1105 if (data
->tag
== Iex_Load
) {
1106 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
1107 // Note also, endianness info is ignored. I guess
1108 // that's not interesting.
1109 addMemEvent( sbOut
, False
/*!isWrite*/,
1110 sizeofIRType(data
->Iex
.Load
.ty
),
1117 IRExpr
* data
= st
->Ist
.Store
.data
;
1118 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
1119 addMemEvent( sbOut
, True
/*isWrite*/,
1120 sizeofIRType(typeOfIRExpr(tyenv
, data
)),
1127 IRDirty
* d
= st
->Ist
.Dirty
.details
;
1128 if (d
->mFx
!= Ifx_None
) {
1129 /* This dirty helper accesses memory. Collect the details. */
1130 tl_assert(d
->mAddr
!= NULL
);
1131 tl_assert(d
->mSize
!= 0);
1132 dataSize
= d
->mSize
;
1133 // Large (eg. 28B, 108B, 512B on x86) data-sized
1134 // instructions will be done inaccurately, but they're
1135 // very rare and this avoids errors from hitting more
1136 // than two cache lines in the simulation.
1137 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1138 addMemEvent( sbOut
, False
/*!isWrite*/,
1139 dataSize
, d
->mAddr
, goff_sp
);
1140 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1141 addMemEvent( sbOut
, True
/*isWrite*/,
1142 dataSize
, d
->mAddr
, goff_sp
);
1144 tl_assert(d
->mAddr
== NULL
);
1145 tl_assert(d
->mSize
== 0);
1151 /* We treat it as a read and a write of the location. I
1152 think that is the same behaviour as it was before IRCAS
1153 was introduced, since prior to that point, the Vex
1154 front ends would translate a lock-prefixed instruction
1155 into a (normal) read followed by a (normal) write. */
1157 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1158 tl_assert(cas
->addr
!= NULL
);
1159 tl_assert(cas
->dataLo
!= NULL
);
1160 dataSize
= sizeofIRType(typeOfIRExpr(tyenv
, cas
->dataLo
));
1161 if (cas
->dataHi
!= NULL
)
1162 dataSize
*= 2; /* since it's a doubleword-CAS */
1163 addMemEvent( sbOut
, False
/*!isWrite*/,
1164 dataSize
, cas
->addr
, goff_sp
);
1165 addMemEvent( sbOut
, True
/*isWrite*/,
1166 dataSize
, cas
->addr
, goff_sp
);
1172 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1174 dataTy
= typeOfIRTemp(tyenv
, st
->Ist
.LLSC
.result
);
1175 addMemEvent( sbOut
, False
/*!isWrite*/,
1176 sizeofIRType(dataTy
),
1177 st
->Ist
.LLSC
.addr
, goff_sp
);
1180 dataTy
= typeOfIRExpr(tyenv
, st
->Ist
.LLSC
.storedata
);
1181 addMemEvent( sbOut
, True
/*isWrite*/,
1182 sizeofIRType(dataTy
),
1183 st
->Ist
.LLSC
.addr
, goff_sp
);
1192 addStmtToIRSB( sbOut
, st
);
1196 // Add an increment before the SB end.
1197 add_counter_update(sbOut
, n
);
1208 //------------------------------------------------------------//
1209 //--- Client requests ---//
1210 //------------------------------------------------------------//
1212 static Bool
dh_handle_client_request(ThreadId tid
, UWord
* arg
, UWord
* ret
)
1214 if (!VG_IS_TOOL_USERREQ('D','H',arg
[0]))
1218 case VG_USERREQ__DHAT_AD_HOC_EVENT
: {
1219 if (clo_mode
!= AdHoc
) {
1223 SizeT len
= (SizeT
)arg
[1];
1225 // Only the ec and req_szB fields are used by intro_Block().
1227 VG_(memset
)(&bk
, 0, sizeof(bk
));
1229 bk
.ec
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
1236 case VG_USERREQ__DHAT_HISTOGRAM_MEMORY
: {
1237 Addr address
= (Addr
)arg
[1];
1239 Block
* bk
= find_Block_containing( address
);
1244 "Warning: address for user histogram request not found %llx\n", (ULong
)address
1249 // already histogrammed
1250 if (bk
->req_szB
<= HISTOGRAM_SIZE_LIMIT
) {
1253 "Warning: request for user histogram of size %lu is smaller than the normal histogram limit, request ignored\n",
1260 if (bk
->req_szB
> USER_HISTOGRAM_SIZE_LIMIT
) {
1263 "Warning: request for user histogram of size %lu is larger than the maximum user request limit, request ignored\n",
1270 bk
->histoW
= VG_(malloc
)("dh.new_block.3", bk
->req_szB
* sizeof(UShort
));
1271 VG_(memset
)(bk
->histoW
, 0, bk
->req_szB
* sizeof(UShort
));
1276 case _VG_USERREQ__DHAT_COPY
: {
1277 SizeT len
= (SizeT
)arg
[1];
1279 if (clo_mode
!= Copy
) {
1283 // Only the ec and req_szB fields are used by intro_Block().
1285 VG_(memset
)(&bk
, 0, sizeof(bk
));
1287 bk
.ec
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
1295 VG_(message
)(Vg_UserMsg
,
1296 "Warning: unknown DHAT client request code %llx\n",
1304 //------------------------------------------------------------//
1305 //--- Finalisation ---//
1306 //------------------------------------------------------------//
1308 // File format notes.
1310 // - The files are JSON, because it's a widely-used format and saves us having
1311 // to write a parser in dh_view.js.
1313 // - We use a comma-first style for the generated JSON. Comma-first style
1314 // moves the special case for arrays/objects from the last item to the
1315 // first. This helps in cases where you can't easily tell in advance the
1316 // size of arrays/objects, such as iterating over a WordFM (because
1317 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1318 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1320 // - We use short field names and minimal whitespace to minimize file sizes.
1325 // // Version number of the format. Incremented on each
1326 // // backwards-incompatible change. A mandatory integer.
1327 // "dhatFileVersion": 2,
1329 // // The invocation mode. A mandatory, free-form string.
1332 // // The verb used before above stack frames, i.e. "<verb> at {". A
1333 // // mandatory string.
1334 // "verb": "Allocated",
1336 // // Are block lifetimes recorded? Affects whether some other fields are
1337 // // present. A mandatory boolean.
1340 // // Are block accesses recorded? Affects whether some other fields are
1341 // // present. A mandatory boolean.
1344 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1345 // // and "blocks" are the values used if these fields are omitted.
1346 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1348 // // Time units (individual and 1,000,000x). Mandatory strings.
1349 // "tu": "instrs", "Mtu": "Minstr"
1351 // // The "short-lived" time threshold, measures in "tu"s.
1352 // // - bklt=true: a mandatory integer.
1353 // // - bklt=false: omitted.
1356 // // The executed command. A mandatory string.
1359 // // The process ID. A mandatory integer.
1362 // // The time at the end of execution (t-end). A mandatory integer.
1365 // // The time of the global max (t-gmax).
1366 // // - bklt=true: a mandatory integer.
1367 // // - bklt=false: omitted.
1370 // // The program points. A mandatory array.
1373 // // Total bytes and blocks. Mandatory integers.
1374 // "tb": 5, "tbk": 1,
1376 // // Total lifetimes of all blocks allocated at this PP.
1377 // // - bklt=true: a mandatory integer.
1378 // // - bklt=false: omitted.
1381 // // The maximum bytes and blocks for this PP.
1382 // // - bklt=true: mandatory integers.
1383 // // - bklt=false: omitted.
1384 // "mb": 5, "mbk": 1,
1386 // // The bytes and blocks at t-gmax for this PP.
1387 // // - bklt=true: mandatory integers.
1388 // // - bklt=false: omitted.
1389 // "gb": 0, "gbk": 0,
1391 // // The bytes and blocks at t-end for this PP.
1392 // // - bklt=true: mandatory integers.
1393 // // - bklt=false: omitted.
1394 // "eb": 0, "ebk": 0,
1396 // // The reads and writes of blocks for this PP.
1397 // // - bkacc=true: mandatory integers.
1398 // // - bkacc=false: omitted.
1399 // "rb": 41, "wb": 5,
1401 // // The exact accesses of blocks for this PP. Only used when all
1402 // // allocations are the same size and sufficiently small. A negative
1403 // // element indicates run-length encoding of the following integer.
1404 // // E.g. `-3, 4` means "three 4s in a row".
1405 // // - bkacc=true: an optional array of integers.
1406 // // - bkacc=false: omitted.
1407 // "acc": [5, -3, 4, 2],
1409 // // Frames. Each element is an index into the "ftbl" array below.
1410 // // - All modes: A mandatory array of integers.
1415 // // Frame table. A mandatory array of strings.
1418 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1419 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1420 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1426 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1428 // The frame table holds unique frames.
1429 static WordFM
* frame_tbl
= NULL
;
1430 static UWord next_frame_n
= 0;
1432 static Word
frame_cmp(UWord a
, UWord b
)
1434 return VG_(strcmp
)((const HChar
*)a
, (const HChar
*)b
);
1437 static HChar
hex_digit_to_ascii_char(UChar d
)
1440 return (d
< 10) ? ('0' + d
) : ('a' + (d
- 10));
1443 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1445 // Returns the original string if no escaping was required. Returns a pointer
1446 // to a static buffer if escaping was required. Therefore, the return value is
1447 // only valid until the next call to this function.
1448 static const HChar
* json_escape(const HChar
* s
)
1450 static HChar
* buf
= NULL
;
1451 static SizeT bufcap
= 0;
1453 // Do we need any escaping?
1458 if (c
== '"' || c
== '\\') {
1460 } else if (c
<= 0x1f) {
1468 // No escaping needed.
1472 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1474 SizeT newcap
= len
+ extra
+ 1;
1475 if (bufcap
< newcap
) {
1476 buf
= VG_(realloc
)("dh.json", buf
, newcap
);
1487 } else if (c
== '\\') {
1490 } else if (c
<= 0x1f) {
1495 *q
++ = hex_digit_to_ascii_char((c
& 0x00f0) >> 4);
1496 *q
++ = hex_digit_to_ascii_char(c
& 0x000f);
1507 static void write_PPInfo_frame(UInt n
, DiEpoch ep
, Addr ip
, void* opaque
)
1509 Bool
* is_first
= (Bool
*)opaque
;
1510 InlIPCursor
* iipc
= VG_(new_IIPC
)(ep
, ip
);
1513 const HChar
* buf
= VG_(describe_IP
)(ep
, ip
, iipc
);
1515 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1516 // `realloc`, `operator new`) because they're boring and clog up the
1518 if (VG_(strstr
)(buf
, "vg_replace_malloc.c")) {
1522 // If this description has been seen before, get its number. Otherwise,
1523 // give it a new number and put it in the table.
1524 UWord keyW
= 0, valW
= 0;
1526 Bool found
= VG_(lookupFM
)(frame_tbl
, &keyW
, &valW
, (UWord
)buf
);
1528 //const HChar* str = (const HChar*)keyW;
1529 //tl_assert(0 == VG_(strcmp)(buf, str));
1532 // `buf` is a static buffer, we must copy it.
1533 const HChar
* str
= VG_(strdup
)("dh.frame_tbl.3", buf
);
1534 frame_n
= next_frame_n
++;
1535 Bool present
= VG_(addToFM
)(frame_tbl
, (UWord
)str
, frame_n
);
1536 tl_assert(!present
);
1539 FP("%c%lu", *is_first
? '[' : ',', frame_n
);
1542 } while (VG_(next_IIPC
)(iipc
));
1544 VG_(delete_IIPC
)(iipc
);
1547 static void write_PPInfo(PPInfo
* ppi
, Bool is_first
)
1549 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1550 is_first
? '[' : ',',
1551 ppi
->total_bytes
, ppi
->total_blocks
);
1553 if (clo_mode
== Heap
) {
1554 tl_assert(ppi
->total_blocks
>= ppi
->max_blocks
);
1555 tl_assert(ppi
->total_bytes
>= ppi
->max_bytes
);
1557 FP(" ,\"tl\":%llu\n",
1558 ppi
->total_lifetimes_instrs
);
1559 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1560 ppi
->max_bytes
, ppi
->max_blocks
);
1561 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1562 ppi
->at_tgmax_bytes
, ppi
->at_tgmax_blocks
);
1563 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1564 ppi
->curr_bytes
, ppi
->curr_blocks
);
1565 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1566 ppi
->reads_bytes
, ppi
->writes_bytes
);
1568 if (ppi
->histo
&& ppi
->xsize_tag
== Exactly
) {
1571 // Simple run-length encoding: when N entries in a row have the same
1572 // value M, we print "-N,M". If there is just one in a row, we just
1573 // print "M". This reduces file size significantly.
1576 for (UWord i
= 0; i
< ppi
->xsize
; i
++) {
1577 UShort h
= ppi
->histo
[i
];
1579 // Continue current run.
1582 // End of run; print it.
1585 } else if (reps
> 1) {
1586 FP("-%d,%u,", reps
, repval
);
1592 // Print the final run.
1595 } else if (reps
> 1) {
1596 FP("-%d,%u", reps
, repval
);
1602 tl_assert(ppi
->curr_bytes
== 0);
1603 tl_assert(ppi
->curr_blocks
== 0);
1604 tl_assert(ppi
->max_bytes
== 0);
1605 tl_assert(ppi
->max_blocks
== 0);
1606 tl_assert(ppi
->at_tgmax_bytes
== 0);
1607 tl_assert(ppi
->at_tgmax_blocks
== 0);
1608 tl_assert(ppi
->total_lifetimes_instrs
== 0);
1609 tl_assert(ppi
->freed_blocks
== 0);
1610 tl_assert(ppi
->reads_bytes
== 0);
1611 tl_assert(ppi
->writes_bytes
== 0);
1612 tl_assert(ppi
->xsize_tag
== 0);
1613 tl_assert(ppi
->xsize
== 0);
1614 tl_assert(ppi
->histo
== NULL
);
1618 Bool is_first_frame
= True
;
1619 VG_(apply_ExeContext
)(write_PPInfo_frame
, &is_first_frame
, ppi
->ec
);
1625 static void write_PPInfos(void)
1631 VG_(initIterFM
)(ppinfo
);
1632 Bool is_first
= True
;
1633 while (VG_(nextIterFM
)(ppinfo
, &keyW
, &valW
)) {
1634 PPInfo
* ppi
= (PPInfo
*)valW
;
1635 tl_assert(ppi
&& ppi
->ec
== (ExeContext
*)keyW
);
1636 write_PPInfo(ppi
, is_first
);
1639 VG_(doneIterFM
)(ppinfo
);
1642 // We didn't print any elements. This happens if ppinfo is empty.
1649 static void dh_fini(Int exit_status
)
1651 // This function does lots of allocations that it doesn't bother to free,
1652 // because execution is almost over anyway.
1656 // Total bytes might be at a possible peak.
1657 if (clo_mode
== Heap
) {
1660 // Before printing statistics, we must harvest various stats (such as
1661 // lifetimes and accesses) for all the blocks that are still alive.
1662 VG_(initIterFM
)( interval_tree
);
1663 while (VG_(nextIterFM
)( interval_tree
, &keyW
, &valW
)) {
1664 Block
* bk
= (Block
*)keyW
;
1665 tl_assert(valW
== 0);
1667 retire_Block(bk
, False
/*!because_freed*/);
1669 VG_(doneIterFM
)( interval_tree
);
1672 if (VG_(clo_stats
)) {
1673 VG_(dmsg
)(" dhat: find_Block_containing:\n");
1674 VG_(dmsg
)(" dhat: found: %'lu\n",
1675 stats__n_fBc_cached0
+ stats__n_fBc_cached1
1676 + stats__n_fBc_cached2
1677 + stats__n_fBc_uncached
);
1678 VG_(dmsg
)(" dhat: at cache0 %'14lu at cache1 %'14lu\n",
1679 stats__n_fBc_cached0
,
1680 stats__n_fBc_cached1
);
1681 VG_(dmsg
)(" dhat: at cache2 %'14lu uncached %'14lu\n",
1682 stats__n_fBc_cached2
,
1683 stats__n_fBc_uncached
);
1684 VG_(dmsg
)(" dhat: notfound: %'lu\n", stats__n_fBc_notfound
);
1689 // Create the frame table, and insert the special "[root]" node at index 0.
1690 frame_tbl
= VG_(newFM
)(VG_(malloc
),
1694 const HChar
* root
= VG_(strdup
)("dh.frame_tbl.2", "[root]");
1695 Bool present
= VG_(addToFM
)(frame_tbl
, (UWord
)root
, 0);
1696 tl_assert(!present
);
1699 // Setup output filename. Nb: it's important to do this now, i.e. as late
1700 // as possible. If we do it at start-up and the program forks and the
1701 // output file format string contains a %p (pid) specifier, both the parent
1702 // and child will incorrectly write to the same file; this happened in
1704 HChar
* dhat_out_file
=
1705 VG_(expand_file_name
)("--dhat-out-file", clo_dhat_out_file
);
1707 fp
= VG_(fopen
)(dhat_out_file
, VKI_O_CREAT
|VKI_O_TRUNC
|VKI_O_WRONLY
,
1708 VKI_S_IRUSR
|VKI_S_IWUSR
);
1710 VG_(umsg
)("error: can't open DHAT output file '%s'\n", dhat_out_file
);
1711 VG_(free
)(dhat_out_file
);
1715 // Write to data file.
1716 FP("{\"dhatFileVersion\":2\n");
1718 // The output mode, block booleans, and byte/block units.
1719 if (clo_mode
== Heap
) {
1720 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1721 FP(",\"bklt\":true,\"bkacc\":true\n");
1722 } else if (clo_mode
== Copy
) {
1723 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1724 FP(",\"bklt\":false,\"bkacc\":false\n");
1725 } else if (clo_mode
== AdHoc
) {
1726 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1727 FP(",\"bklt\":false,\"bkacc\":false\n");
1728 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1734 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1735 if (clo_mode
== Heap
) {
1736 FP(",\"tuth\":500\n");
1740 const HChar
* exe
= VG_(args_the_exename
);
1741 FP(",\"cmd\":\"%s", json_escape(exe
));
1742 for (Word i
= 0; i
< VG_(sizeXA
)(VG_(args_for_client
)); i
++) {
1743 const HChar
* arg
= *(HChar
**)VG_(indexXA
)(VG_(args_for_client
), i
);
1744 FP(" %s", json_escape(arg
));
1749 FP(",\"pid\":%d\n", VG_(getpid
)());
1752 FP(",\"te\":%llu\n", g_curr_instrs
);
1753 if (clo_mode
== Heap
) {
1754 FP(",\"tg\":%llu\n", g_tgmax_instrs
);
1756 tl_assert(g_tgmax_instrs
== 0);
1765 // The frame table maps strings to numbers. We want to print it ordered by
1766 // numbers. So we create an array and fill it in from the frame table, then
1768 UWord n_frames
= next_frame_n
;
1769 const HChar
** frames
=
1770 VG_(malloc
)("dh.frames", n_frames
* sizeof(const HChar
*));
1771 VG_(initIterFM
)(frame_tbl
);
1772 while (VG_(nextIterFM
)(frame_tbl
, &keyW
, &valW
)) {
1773 const HChar
* str
= (const HChar
*)keyW
;
1777 VG_(doneIterFM
)(frame_tbl
);
1779 for (UWord i
= 0; i
< n_frames
; i
++) {
1780 FP(" %c\"%s\"\n", i
== 0 ? '[' : ',', json_escape(frames
[i
]));
1790 if (VG_(clo_verbosity
) == 0) {
1794 // Print brief global stats.
1795 VG_(umsg
)("Total: %'llu %s in %'llu %s\n",
1796 g_total_bytes
, clo_mode
== AdHoc
? "units" : "bytes",
1797 g_total_blocks
, clo_mode
== AdHoc
? "events" : "blocks");
1798 if (clo_mode
== Heap
) {
1799 VG_(umsg
)("At t-gmax: %'llu bytes in %'llu blocks\n",
1800 g_max_bytes
, g_max_blocks
);
1801 VG_(umsg
)("At t-end: %'llu bytes in %'llu blocks\n",
1802 g_curr_bytes
, g_curr_blocks
);
1803 VG_(umsg
)("Reads: %'llu bytes\n", g_reads_bytes
);
1804 VG_(umsg
)("Writes: %'llu bytes\n", g_writes_bytes
);
1806 tl_assert(g_max_bytes
== 0);
1807 tl_assert(g_max_blocks
== 0);
1808 tl_assert(g_curr_bytes
== 0);
1809 tl_assert(g_curr_blocks
== 0);
1810 tl_assert(g_reads_bytes
== 0);
1811 tl_assert(g_writes_bytes
== 0);
1814 // Print a how-to-view-the-profile hint.
1816 VG_(umsg
)("To view the resulting profile, open\n");
1817 VG_(umsg
)(" file://%s/%s\n", DHAT_VIEW_DIR
, "dh_view.html");
1818 VG_(umsg
)("in a web browser, click on \"Load...\", "
1819 "and then select the file\n");
1820 VG_(umsg
)(" %s\n", dhat_out_file
);
1821 VG_(umsg
)("The text at the bottom explains the abbreviations used in the "
1824 VG_(free
)(dhat_out_file
);
1827 //------------------------------------------------------------//
1828 //--- Initialisation ---//
1829 //------------------------------------------------------------//
1831 static void dh_post_clo_init(void)
1833 if (clo_mode
== Heap
) {
1834 VG_(track_pre_mem_read
) ( dh_handle_noninsn_read
);
1835 VG_(track_pre_mem_read_asciiz
) ( dh_handle_noninsn_read_asciiz
);
1836 VG_(track_post_mem_write
) ( dh_handle_noninsn_write
);
1840 static void dh_pre_clo_init(void)
1842 VG_(details_name
) ("DHAT");
1843 VG_(details_version
) (NULL
);
1844 VG_(details_description
) ("a dynamic heap analysis tool");
1845 VG_(details_copyright_author
)(
1846 "Copyright (C) 2010-2024, and GNU GPL'd, by Mozilla Foundation et al.");
1847 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
1848 VG_(details_avg_translation_sizeB
) ( 600 );
1851 VG_(basic_tool_funcs
) (dh_post_clo_init
,
1856 VG_(needs_libc_freeres
)();
1857 VG_(needs_cxx_freeres
)();
1858 VG_(needs_command_line_options
)(dh_process_cmd_line_option
,
1860 dh_print_debug_usage
);
1861 VG_(needs_client_requests
) (dh_handle_client_request
);
1862 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1863 // dh_expensive_sanity_check);
1864 VG_(needs_malloc_replacement
)(dh_malloc
,
1866 dh___builtin_new_aligned
,
1867 dh___builtin_vec_new
,
1868 dh___builtin_vec_new_aligned
,
1872 dh___builtin_delete
,
1873 dh___builtin_delete_aligned
,
1874 dh___builtin_vec_delete
,
1875 dh___builtin_vec_delete_aligned
,
1877 dh_malloc_usable_size
,
1880 tl_assert(!interval_tree
);
1881 tl_assert(!fbc_cache0
);
1882 tl_assert(!fbc_cache1
);
1883 tl_assert(!fbc_cache2
);
1885 interval_tree
= VG_(newFM
)( VG_(malloc
),
1886 "dh.interval_tree.1",
1888 interval_tree_Cmp
);
1890 ppinfo
= VG_(newFM
)( VG_(malloc
),
1893 NULL
/*unboxedcmp*/ );
1896 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init
)
1898 //--------------------------------------------------------------------//
1899 //--- end dh_main.c ---//
1900 //--------------------------------------------------------------------//