2 //--------------------------------------------------------------------*/
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---*/
4 //--------------------------------------------------------------------*/
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2017 Mozilla Inc
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 The GNU General Public License is contained in the file COPYING.
30 /* Contributed by Julian Seward <jseward@acm.org> */
33 #include "pub_tool_basics.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_options.h"
40 #include "pub_tool_replacemalloc.h"
41 #include "pub_tool_tooliface.h"
42 #include "pub_tool_wordfm.h"
44 #define HISTOGRAM_SIZE_LIMIT 1024
47 //------------------------------------------------------------//
49 //------------------------------------------------------------//
51 // Number of guest instructions executed so far. This is
52 // incremented directly from the generated code.
53 static ULong g_guest_instrs_executed
= 0;
55 // Summary statistics for the entire run.
56 static ULong g_tot_blocks
= 0; // total blocks allocated
57 static ULong g_tot_bytes
= 0; // total bytes allocated
59 static ULong g_cur_blocks_live
= 0; // curr # blocks live
60 static ULong g_cur_bytes_live
= 0; // curr # bytes live
62 static ULong g_max_blocks_live
= 0; // bytes and blocks at
63 static ULong g_max_bytes_live
= 0; // the max residency point
66 //------------------------------------------------------------//
67 //--- an Interval Tree of live blocks ---//
68 //------------------------------------------------------------//
70 /* Tracks information about live blocks. */
75 ExeContext
* ap
; /* allocation ec */
76 ULong allocd_at
; /* instruction number */
79 /* Approx histogram, one byte per payload byte. Counts latch up
80 therefore at 0xFFFF. Can be NULL if the block is resized or if
81 the block is larger than HISTOGRAM_SIZE_LIMIT. */
82 UShort
* histoW
; /* [0 .. req_szB-1] */
86 /* May not contain zero-sized blocks. May not contain
87 overlapping blocks. */
88 static WordFM
* interval_tree
= NULL
; /* WordFM* Block* void */
90 /* Here's the comparison function. Since the tree is required
91 to contain non-zero sized, non-overlapping blocks, it's good
92 enough to consider any overlap as a match. */
93 static Word
interval_tree_Cmp ( UWord k1
, UWord k2
)
95 Block
* b1
= (Block
*)k1
;
96 Block
* b2
= (Block
*)k2
;
97 tl_assert(b1
->req_szB
> 0);
98 tl_assert(b2
->req_szB
> 0);
99 if (b1
->payload
+ b1
->req_szB
<= b2
->payload
) return -1;
100 if (b2
->payload
+ b2
->req_szB
<= b1
->payload
) return 1;
104 // 2-entry cache for find_Block_containing
105 static Block
* fbc_cache0
= NULL
;
106 static Block
* fbc_cache1
= NULL
;
108 static UWord stats__n_fBc_cached
= 0;
109 static UWord stats__n_fBc_uncached
= 0;
110 static UWord stats__n_fBc_notfound
= 0;
112 static Block
* find_Block_containing ( Addr a
)
114 if (LIKELY(fbc_cache0
115 && fbc_cache0
->payload
<= a
116 && a
< fbc_cache0
->payload
+ fbc_cache0
->req_szB
)) {
118 stats__n_fBc_cached
++;
121 if (LIKELY(fbc_cache1
122 && fbc_cache1
->payload
<= a
123 && a
< fbc_cache1
->payload
+ fbc_cache1
->req_szB
)) {
124 // found at 1; swap 0 and 1
125 Block
* tmp
= fbc_cache0
;
126 fbc_cache0
= fbc_cache1
;
128 stats__n_fBc_cached
++;
136 Bool found
= VG_(lookupFM
)( interval_tree
,
137 &foundkey
, &foundval
, (UWord
)&fake
);
139 stats__n_fBc_notfound
++;
142 tl_assert(foundval
== 0); // we don't store vals in the interval tree
143 tl_assert(foundkey
!= 1);
144 Block
* res
= (Block
*)foundkey
;
145 tl_assert(res
!= &fake
);
146 // put at the top position
147 fbc_cache1
= fbc_cache0
;
149 stats__n_fBc_uncached
++;
153 // delete a block; asserts if not found. (viz, 'a' must be
154 // known to be present.)
155 static void delete_Block_starting_at ( Addr a
)
160 Bool found
= VG_(delFromFM
)( interval_tree
,
161 NULL
, NULL
, (Addr
)&fake
);
163 fbc_cache0
= fbc_cache1
= NULL
;
167 //------------------------------------------------------------//
168 //--- a FM of allocation points (APs) ---//
169 //------------------------------------------------------------//
173 // the allocation point that we're summarising stats for
175 // used when printing results
177 // The current number of blocks and bytes live for this AP
178 ULong cur_blocks_live
;
179 ULong cur_bytes_live
;
180 // The number of blocks and bytes live at the max-liveness
181 // point. Note this is a bit subtle. max_blocks_live is not
182 // the maximum number of live blocks, but rather the number of
183 // blocks live at the point of maximum byte liveness. These are
184 // not necessarily the same thing.
185 ULong max_blocks_live
;
186 ULong max_bytes_live
;
187 // Total number of blocks and bytes allocated by this AP.
190 // Sum of death ages for all blocks allocated by this AP,
191 // that have subsequently been freed.
192 ULong death_ages_sum
;
194 // Total number of reads and writes in all blocks allocated
198 /* Histogram information. We maintain a histogram aggregated for
199 all retiring Blocks allocated by this AP, but only if:
200 - this AP has only ever allocated objects of one size
201 - that size is <= HISTOGRAM_SIZE_LIMIT
202 What we need therefore is a mechanism to see if this AP
203 has only ever allocated blocks of one size.
206 Unknown because no retirement yet
207 Exactly xsize all retiring blocks are of this size
208 Mixed multiple different sizes seen
210 enum { Unknown
=999, Exactly
, Mixed
} xsize_tag
;
212 UInt
* histo
; /* [0 .. xsize-1] */
216 /* maps ExeContext*'s to APInfo*'s. Note that the keys must match the
217 .ap field in the values. */
218 static WordFM
* apinfo
= NULL
; /* WordFM* ExeContext* APInfo* */
221 /* 'bk' is being introduced (has just been allocated). Find the
222 relevant APInfo entry for it, or create one, based on the block's
223 allocation EC. Then, update the APInfo to the extent that we
224 actually can, to reflect the allocation. */
225 static void intro_Block ( Block
* bk
)
233 Bool found
= VG_(lookupFM
)( apinfo
,
234 &keyW
, &valW
, (UWord
)bk
->ap
);
237 tl_assert(keyW
== (UWord
)bk
->ap
);
239 api
= VG_(malloc
)( "dh.main.intro_Block.1", sizeof(APInfo
) );
240 VG_(memset
)(api
, 0, sizeof(*api
));
242 Bool present
= VG_(addToFM
)( apinfo
,
243 (UWord
)bk
->ap
, (UWord
)api
);
246 tl_assert(api
->deaths
== 0);
247 api
->xsize_tag
= Unknown
;
249 if (0) VG_(printf
)("api %p --> Unknown\n", api
);
252 tl_assert(api
->ap
== bk
->ap
);
254 /* So: update stats to reflect an allocation */
257 api
->cur_blocks_live
++;
260 api
->cur_bytes_live
+= bk
->req_szB
;
261 if (api
->cur_bytes_live
> api
->max_bytes_live
) {
262 api
->max_bytes_live
= api
->cur_bytes_live
;
263 api
->max_blocks_live
= api
->cur_blocks_live
;
266 // total blocks and bytes allocated here
268 api
->tot_bytes
+= bk
->req_szB
;
270 // update summary globals
272 g_tot_bytes
+= bk
->req_szB
;
275 g_cur_bytes_live
+= bk
->req_szB
;
276 if (g_cur_bytes_live
> g_max_bytes_live
) {
277 g_max_bytes_live
= g_cur_bytes_live
;
278 g_max_blocks_live
= g_cur_blocks_live
;
283 /* 'bk' is retiring (being freed). Find the relevant APInfo entry for
284 it, which must already exist. Then, fold info from 'bk' into that
285 entry. 'because_freed' is True if the block is retiring because
286 the client has freed it. If it is False then the block is retiring
287 because the program has finished, in which case we want to skip the
288 updates of the total blocks live etc for this AP, but still fold in
289 the access counts and histo data that have so far accumulated for
291 static void retire_Block ( Block
* bk
, Bool because_freed
)
299 Bool found
= VG_(lookupFM
)( apinfo
,
300 &keyW
, &valW
, (UWord
)bk
->ap
);
304 tl_assert(api
->ap
== bk
->ap
);
306 // update stats following this free.
308 VG_(printf
)("ec %p api->c_by_l %llu bk->rszB %llu\n",
309 bk
->ap
, api
->cur_bytes_live
, (ULong
)bk
->req_szB
);
311 // update total blocks live etc for this AP
313 tl_assert(api
->cur_blocks_live
>= 1);
314 tl_assert(api
->cur_bytes_live
>= bk
->req_szB
);
315 api
->cur_blocks_live
--;
316 api
->cur_bytes_live
-= bk
->req_szB
;
320 tl_assert(bk
->allocd_at
<= g_guest_instrs_executed
);
321 api
->death_ages_sum
+= (g_guest_instrs_executed
- bk
->allocd_at
);
323 // update global summary stats
324 tl_assert(g_cur_blocks_live
> 0);
326 tl_assert(g_cur_bytes_live
>= bk
->req_szB
);
327 g_cur_bytes_live
-= bk
->req_szB
;
331 api
->n_reads
+= bk
->n_reads
;
332 api
->n_writes
+= bk
->n_writes
;
334 // histo stuff. First, do state transitions for xsize/xsize_tag.
335 switch (api
->xsize_tag
) {
338 tl_assert(api
->xsize
== 0);
339 tl_assert(api
->deaths
== 1 || api
->deaths
== 0);
340 tl_assert(!api
->histo
);
341 api
->xsize_tag
= Exactly
;
342 api
->xsize
= bk
->req_szB
;
343 if (0) VG_(printf
)("api %p --> Exactly(%lu)\n", api
, api
->xsize
);
344 // and allocate the histo
346 api
->histo
= VG_(malloc
)("dh.main.retire_Block.1",
347 api
->xsize
* sizeof(UInt
));
348 VG_(memset
)(api
->histo
, 0, api
->xsize
* sizeof(UInt
));
353 //tl_assert(api->deaths > 1);
354 if (bk
->req_szB
!= api
->xsize
) {
355 if (0) VG_(printf
)("api %p --> Mixed(%lu -> %lu)\n",
356 api
, api
->xsize
, bk
->req_szB
);
357 api
->xsize_tag
= Mixed
;
359 // deallocate the histo, if any
361 VG_(free
)(api
->histo
);
368 //tl_assert(api->deaths > 1);
375 // See if we can fold the histo data from this block into
376 // the data for the AP
377 if (api
->xsize_tag
== Exactly
&& api
->histo
&& bk
->histoW
) {
378 tl_assert(api
->xsize
== bk
->req_szB
);
380 for (i
= 0; i
< api
->xsize
; i
++) {
381 // FIXME: do something better in case of overflow of api->histo[..]
382 // Right now, at least don't let it overflow/wrap around
383 if (api
->histo
[i
] <= 0xFFFE0000)
384 api
->histo
[i
] += (UInt
)bk
->histoW
[i
];
386 if (0) VG_(printf
)("fold in, AP = %p\n", api
);
393 VG_(printf
)("block retiring, histo %lu: ", bk
->req_szB
);
395 for (i
= 0; i
< bk
->req_szB
; i
++)
396 VG_(printf
)("%u ", (UInt
)bk
->histoB
[i
]);
399 VG_(printf
)("block retiring, no histo %lu\n", bk
->req_szB
);
404 /* This handles block resizing. When a block with AP 'ec' has a
405 size change of 'delta', call here to update the APInfo. */
406 static void apinfo_change_cur_bytes_live( ExeContext
* ec
, Long delta
)
411 Bool found
= VG_(lookupFM
)( apinfo
,
412 &keyW
, &valW
, (UWord
)ec
);
416 tl_assert(api
->ap
== ec
);
419 tl_assert(api
->cur_bytes_live
>= -delta
);
420 tl_assert(g_cur_bytes_live
>= -delta
);
423 // adjust current live size
424 api
->cur_bytes_live
+= delta
;
425 g_cur_bytes_live
+= delta
;
427 if (delta
> 0 && api
->cur_bytes_live
> api
->max_bytes_live
) {
428 api
->max_bytes_live
= api
->cur_bytes_live
;
429 api
->max_blocks_live
= api
->cur_blocks_live
;
432 // update global summary stats
433 if (delta
> 0 && g_cur_bytes_live
> g_max_bytes_live
) {
434 g_max_bytes_live
= g_cur_bytes_live
;
435 g_max_blocks_live
= g_cur_blocks_live
;
438 g_tot_bytes
+= delta
;
440 // adjust total allocation size
442 api
->tot_bytes
+= delta
;
446 //------------------------------------------------------------//
447 //--- update both Block and APInfos after {m,re}alloc/free ---//
448 //------------------------------------------------------------//
451 void* new_block ( ThreadId tid
, void* p
, SizeT req_szB
, SizeT req_alignB
,
454 tl_assert(p
== NULL
); // don't handle custom allocators right now
455 SizeT actual_szB
/*, slop_szB*/;
457 if ((SSizeT
)req_szB
< 0) return NULL
;
460 req_szB
= 1; /* can't allow zero-sized blocks in the interval tree */
462 // Allocate and zero if necessary
464 p
= VG_(cli_malloc
)( req_alignB
, req_szB
);
468 if (is_zeroed
) VG_(memset
)(p
, 0, req_szB
);
469 actual_szB
= VG_(cli_malloc_usable_size
)(p
);
470 tl_assert(actual_szB
>= req_szB
);
471 /* slop_szB = actual_szB - req_szB; */
476 // Make new HP_Chunk node, add to malloc_list
477 Block
* bk
= VG_(malloc
)("dh.new_block.1", sizeof(Block
));
478 bk
->payload
= (Addr
)p
;
479 bk
->req_szB
= req_szB
;
480 bk
->ap
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
481 bk
->allocd_at
= g_guest_instrs_executed
;
484 // set up histogram array, if the block isn't too large
486 if (req_szB
<= HISTOGRAM_SIZE_LIMIT
) {
487 bk
->histoW
= VG_(malloc
)("dh.new_block.2", req_szB
* sizeof(UShort
));
488 VG_(memset
)(bk
->histoW
, 0, req_szB
* sizeof(UShort
));
491 Bool present
= VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
493 fbc_cache0
= fbc_cache1
= NULL
;
497 if (0) VG_(printf
)("ALLOC %lu -> %p\n", req_szB
, p
);
503 void die_block ( void* p
, Bool custom_free
)
505 tl_assert(!custom_free
); // at least for now
507 Block
* bk
= find_Block_containing( (Addr
)p
);
510 return; // bogus free
513 tl_assert(bk
->req_szB
> 0);
514 // assert the block finder is behaving sanely
515 tl_assert(bk
->payload
<= (Addr
)p
);
516 tl_assert( (Addr
)p
< bk
->payload
+ bk
->req_szB
);
518 if (bk
->payload
!= (Addr
)p
) {
519 return; // bogus free
522 if (0) VG_(printf
)(" FREE %p %llu\n",
523 p
, g_guest_instrs_executed
- bk
->allocd_at
);
525 retire_Block(bk
, True
/*because_freed*/);
527 VG_(cli_free
)( (void*)bk
->payload
);
528 delete_Block_starting_at( bk
->payload
);
530 VG_(free
)( bk
->histoW
);
538 void* renew_block ( ThreadId tid
, void* p_old
, SizeT new_req_szB
)
540 if (0) VG_(printf
)("REALL %p %lu\n", p_old
, new_req_szB
);
543 tl_assert(new_req_szB
> 0); // map 0 to 1
545 // Find the old block.
546 Block
* bk
= find_Block_containing( (Addr
)p_old
);
548 return NULL
; // bogus realloc
551 tl_assert(bk
->req_szB
> 0);
552 // assert the block finder is behaving sanely
553 tl_assert(bk
->payload
<= (Addr
)p_old
);
554 tl_assert( (Addr
)p_old
< bk
->payload
+ bk
->req_szB
);
556 if (bk
->payload
!= (Addr
)p_old
) {
557 return NULL
; // bogus realloc
560 // Keeping the histogram alive in any meaningful way across
561 // block resizing is too darn complicated. Just throw it away.
563 VG_(free
)(bk
->histoW
);
567 // Actually do the allocation, if necessary.
568 if (new_req_szB
<= bk
->req_szB
) {
570 // New size is smaller or same; block not moved.
571 apinfo_change_cur_bytes_live(bk
->ap
,
572 (Long
)new_req_szB
- (Long
)bk
->req_szB
);
573 bk
->req_szB
= new_req_szB
;
578 // New size is bigger; make new block, copy shared contents, free old.
579 p_new
= VG_(cli_malloc
)(VG_(clo_alignment
), new_req_szB
);
581 // Nb: if realloc fails, NULL is returned but the old block is not
582 // touched. What an awful function.
585 tl_assert(p_new
!= p_old
);
587 VG_(memcpy
)(p_new
, p_old
, bk
->req_szB
);
588 VG_(cli_free
)(p_old
);
590 // Since the block has moved, we need to re-insert it into the
591 // interval tree at the new place. Do this by removing
593 delete_Block_starting_at( (Addr
)p_old
);
594 // now 'bk' is no longer in the tree, but the Block itself
597 // Update the metadata.
598 apinfo_change_cur_bytes_live(bk
->ap
,
599 (Long
)new_req_szB
- (Long
)bk
->req_szB
);
600 bk
->payload
= (Addr
)p_new
;
601 bk
->req_szB
= new_req_szB
;
605 = VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
607 fbc_cache0
= fbc_cache1
= NULL
;
616 //------------------------------------------------------------//
617 //--- malloc() et al replacement wrappers ---//
618 //------------------------------------------------------------//
620 static void* dh_malloc ( ThreadId tid
, SizeT szB
)
622 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
625 static void* dh___builtin_new ( ThreadId tid
, SizeT szB
)
627 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
630 static void* dh___builtin_vec_new ( ThreadId tid
, SizeT szB
)
632 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
635 static void* dh_calloc ( ThreadId tid
, SizeT m
, SizeT szB
)
637 return new_block( tid
, NULL
, m
*szB
, VG_(clo_alignment
), /*is_zeroed*/True
);
640 static void *dh_memalign ( ThreadId tid
, SizeT alignB
, SizeT szB
)
642 return new_block( tid
, NULL
, szB
, alignB
, False
);
645 static void dh_free ( ThreadId tid
__attribute__((unused
)), void* p
)
647 die_block( p
, /*custom_free*/False
);
650 static void dh___builtin_delete ( ThreadId tid
, void* p
)
652 die_block( p
, /*custom_free*/False
);
655 static void dh___builtin_vec_delete ( ThreadId tid
, void* p
)
657 die_block( p
, /*custom_free*/False
);
660 static void* dh_realloc ( ThreadId tid
, void* p_old
, SizeT new_szB
)
663 return dh_malloc(tid
, new_szB
);
669 return renew_block(tid
, p_old
, new_szB
);
672 static SizeT
dh_malloc_usable_size ( ThreadId tid
, void* p
)
674 Block
* bk
= find_Block_containing( (Addr
)p
);
675 return bk
? bk
->req_szB
: 0;
679 //------------------------------------------------------------//
680 //--- memory references ---//
681 //------------------------------------------------------------//
684 void inc_histo_for_block ( Block
* bk
, Addr addr
, UWord szB
)
686 UWord i
, offMin
, offMax1
;
687 offMin
= addr
- bk
->payload
;
688 tl_assert(offMin
< bk
->req_szB
);
689 offMax1
= offMin
+ szB
;
690 if (offMax1
> bk
->req_szB
)
691 offMax1
= bk
->req_szB
;
692 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
693 for (i
= offMin
; i
< offMax1
; i
++) {
694 UShort n
= bk
->histoW
[i
];
701 void dh_handle_write ( Addr addr
, UWord szB
)
703 Block
* bk
= find_Block_containing(addr
);
707 inc_histo_for_block(bk
, addr
, szB
);
712 void dh_handle_read ( Addr addr
, UWord szB
)
714 Block
* bk
= find_Block_containing(addr
);
718 inc_histo_for_block(bk
, addr
, szB
);
723 // Handle reads and writes by syscalls (read == kernel
724 // reads user space, write == kernel writes user space).
725 // Assumes no such read or write spans a heap block
726 // boundary and so we can treat it just as one giant
729 void dh_handle_noninsn_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
730 Addr base
, SizeT size
)
734 dh_handle_read(base
, size
);
736 case Vg_CoreSysCallArgInMem
:
738 case Vg_CoreTranslate
:
746 void dh_handle_noninsn_write ( CorePart part
, ThreadId tid
,
747 Addr base
, SizeT size
)
751 dh_handle_write(base
, size
);
761 //------------------------------------------------------------//
762 //--- Instrumentation ---//
763 //------------------------------------------------------------//
765 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
766 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
767 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
768 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
769 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
772 void add_counter_update(IRSB
* sbOut
, Int n
)
774 #if defined(VG_BIGENDIAN)
776 #elif defined(VG_LITTLEENDIAN)
779 # error "Unknown endianness"
781 // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
782 // WrTmp(t1, Load64(&g_guest_instrs_executed))
783 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
784 // Store(&g_guest_instrs_executed, t2)
785 IRTemp t1
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
786 IRTemp t2
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
787 IRExpr
* counter_addr
= mkIRExpr_HWord( (HWord
)&g_guest_instrs_executed
);
789 IRStmt
* st1
= assign(t1
, IRExpr_Load(END
, Ity_I64
, counter_addr
));
790 IRStmt
* st2
= assign(t2
, binop(Iop_Add64
, mkexpr(t1
), mkU64(n
)));
791 IRStmt
* st3
= IRStmt_Store(END
, counter_addr
, mkexpr(t2
));
793 addStmtToIRSB( sbOut
, st1
);
794 addStmtToIRSB( sbOut
, st2
);
795 addStmtToIRSB( sbOut
, st3
);
799 void addMemEvent(IRSB
* sbOut
, Bool isWrite
, Int szB
, IRExpr
* addr
,
802 IRType tyAddr
= Ity_INVALID
;
803 const HChar
* hName
= NULL
;
805 IRExpr
** argv
= NULL
;
808 const Int THRESH
= 4096 * 4; // somewhat arbitrary
809 const Int rz_szB
= VG_STACK_REDZONE_SZB
;
811 tyAddr
= typeOfIRExpr( sbOut
->tyenv
, addr
);
812 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
815 hName
= "dh_handle_write";
816 hAddr
= &dh_handle_write
;
818 hName
= "dh_handle_read";
819 hAddr
= &dh_handle_read
;
822 argv
= mkIRExprVec_2( addr
, mkIRExpr_HWord(szB
) );
824 /* Add the helper. */
828 di
= unsafeIRDirty_0_N( 2/*regparms*/,
829 hName
, VG_(fnptr_to_fnentry
)( hAddr
),
832 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
833 some arbitrary N. If that fails then addr is in the range (SP -
834 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
835 addr is within a page of SP and so can't possibly be a heap
836 access, and so can be skipped. */
837 IRTemp sp
= newIRTemp(sbOut
->tyenv
, tyAddr
);
838 addStmtToIRSB( sbOut
, assign(sp
, IRExpr_Get(goff_sp
, tyAddr
)));
840 IRTemp sp_minus_rz
= newIRTemp(sbOut
->tyenv
, tyAddr
);
845 ? binop(Iop_Sub32
, mkexpr(sp
), mkU32(rz_szB
))
846 : binop(Iop_Sub64
, mkexpr(sp
), mkU64(rz_szB
)))
849 IRTemp diff
= newIRTemp(sbOut
->tyenv
, tyAddr
);
854 ? binop(Iop_Sub32
, addr
, mkexpr(sp_minus_rz
))
855 : binop(Iop_Sub64
, addr
, mkexpr(sp_minus_rz
)))
858 IRTemp guard
= newIRTemp(sbOut
->tyenv
, Ity_I1
);
863 ? binop(Iop_CmpLT32U
, mkU32(THRESH
), mkexpr(diff
))
864 : binop(Iop_CmpLT64U
, mkU64(THRESH
), mkexpr(diff
)))
866 di
->guard
= mkexpr(guard
);
868 addStmtToIRSB( sbOut
, IRStmt_Dirty(di
) );
872 IRSB
* dh_instrument ( VgCallbackClosure
* closure
,
874 const VexGuestLayout
* layout
,
875 const VexGuestExtents
* vge
,
876 const VexArchInfo
* archinfo_host
,
877 IRType gWordTy
, IRType hWordTy
)
881 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
883 const Int goff_sp
= layout
->offset_SP
;
885 // We increment the instruction count in two places:
886 // - just before any Ist_Exit statements;
887 // - just before the IRSB's end.
888 // In the former case, we zero 'n' and then continue instrumenting.
890 sbOut
= deepCopyIRSBExceptStmts(sbIn
);
892 // Copy verbatim any IR preamble preceding the first IMark
894 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
895 addStmtToIRSB( sbOut
, sbIn
->stmts
[i
] );
899 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
900 IRStmt
* st
= sbIn
->stmts
[i
];
902 if (!st
|| st
->tag
== Ist_NoOp
) continue;
913 // Add an increment before the Exit statement, then reset 'n'.
914 add_counter_update(sbOut
, n
);
921 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
922 if (data
->tag
== Iex_Load
) {
923 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
924 // Note also, endianness info is ignored. I guess
925 // that's not interesting.
926 addMemEvent( sbOut
, False
/*!isWrite*/,
927 sizeofIRType(data
->Iex
.Load
.ty
),
934 IRExpr
* data
= st
->Ist
.Store
.data
;
935 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
936 addMemEvent( sbOut
, True
/*isWrite*/,
937 sizeofIRType(typeOfIRExpr(tyenv
, data
)),
944 IRDirty
* d
= st
->Ist
.Dirty
.details
;
945 if (d
->mFx
!= Ifx_None
) {
946 /* This dirty helper accesses memory. Collect the details. */
947 tl_assert(d
->mAddr
!= NULL
);
948 tl_assert(d
->mSize
!= 0);
950 // Large (eg. 28B, 108B, 512B on x86) data-sized
951 // instructions will be done inaccurately, but they're
952 // very rare and this avoids errors from hitting more
953 // than two cache lines in the simulation.
954 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
955 addMemEvent( sbOut
, False
/*!isWrite*/,
956 dataSize
, d
->mAddr
, goff_sp
);
957 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
958 addMemEvent( sbOut
, True
/*isWrite*/,
959 dataSize
, d
->mAddr
, goff_sp
);
961 tl_assert(d
->mAddr
== NULL
);
962 tl_assert(d
->mSize
== 0);
968 /* We treat it as a read and a write of the location. I
969 think that is the same behaviour as it was before IRCAS
970 was introduced, since prior to that point, the Vex
971 front ends would translate a lock-prefixed instruction
972 into a (normal) read followed by a (normal) write. */
974 IRCAS
* cas
= st
->Ist
.CAS
.details
;
975 tl_assert(cas
->addr
!= NULL
);
976 tl_assert(cas
->dataLo
!= NULL
);
977 dataSize
= sizeofIRType(typeOfIRExpr(tyenv
, cas
->dataLo
));
978 if (cas
->dataHi
!= NULL
)
979 dataSize
*= 2; /* since it's a doubleword-CAS */
980 addMemEvent( sbOut
, False
/*!isWrite*/,
981 dataSize
, cas
->addr
, goff_sp
);
982 addMemEvent( sbOut
, True
/*isWrite*/,
983 dataSize
, cas
->addr
, goff_sp
);
989 if (st
->Ist
.LLSC
.storedata
== NULL
) {
991 dataTy
= typeOfIRTemp(tyenv
, st
->Ist
.LLSC
.result
);
992 addMemEvent( sbOut
, False
/*!isWrite*/,
993 sizeofIRType(dataTy
),
994 st
->Ist
.LLSC
.addr
, goff_sp
);
997 dataTy
= typeOfIRExpr(tyenv
, st
->Ist
.LLSC
.storedata
);
998 addMemEvent( sbOut
, True
/*isWrite*/,
999 sizeofIRType(dataTy
),
1000 st
->Ist
.LLSC
.addr
, goff_sp
);
1009 addStmtToIRSB( sbOut
, st
);
1013 // Add an increment before the SB end.
1014 add_counter_update(sbOut
, n
);
1026 //------------------------------------------------------------//
1027 //--- Command line args ---//
1028 //------------------------------------------------------------//
1031 static Bool
identify_metric ( /*OUT*/ULong(**get_metricP
)(APInfo
*),
1032 /*OUT*/Bool
* increasingP
,
1033 const HChar
* metric_name
);
1035 static Int clo_show_top_n
= 10;
1036 static const HChar
*clo_sort_by
= "max-bytes-live";
1038 static Bool
dh_process_cmd_line_option(const HChar
* arg
)
1040 if VG_BINT_CLO(arg
, "--show-top-n", clo_show_top_n
, 1, 100000) {}
1042 else if VG_STR_CLO(arg
, "--sort-by", clo_sort_by
) {
1043 ULong (*dummyFn
)(APInfo
*);
1045 Bool ok
= identify_metric( &dummyFn
, &dummyB
, clo_sort_by
);
1048 // otherwise it's OK, in which case leave it alone.
1049 // show_top_n_apinfos will later convert the string by a
1050 // second call to identify_metric.
1054 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
1060 static void dh_print_usage(void)
1063 " --show-top-n=number show the top <number> alloc points [10]\n"
1064 " --sort-by=string\n"
1065 " sort the allocation points by the metric\n"
1066 " defined by <string>, thusly:\n"
1067 " max-bytes-live maximum live bytes [default]\n"
1068 " tot-bytes-allocd bytes allocated in total (turnover)\n"
1069 " max-blocks-live maximum live blocks\n"
1070 " tot-blocks-allocd blocks allocated in total (turnover)\n"
1074 static void dh_print_debug_usage(void)
1082 //------------------------------------------------------------//
1083 //--- Finalisation ---//
1084 //------------------------------------------------------------//
1086 static void show_N_div_100( /*OUT*/HChar
* buf
, ULong n
)
1090 VG_(sprintf
)(buf
, "%llu.%s%llu", nK
,
1095 static void show_APInfo ( APInfo
* api
)
1097 HChar bufA
[80]; // large enough
1098 VG_(memset
)(bufA
, 0, sizeof(bufA
));
1099 if (api
->tot_blocks
> 0) {
1100 show_N_div_100( bufA
, ((ULong
)api
->tot_bytes
* 100ULL)
1101 / (ULong
)api
->tot_blocks
);
1103 bufA
[0] = 'N'; bufA
[1] = 'a'; bufA
[2] = 'N';
1106 VG_(umsg
)("max-live: %'llu in %'llu blocks\n",
1107 api
->max_bytes_live
, api
->max_blocks_live
);
1108 VG_(umsg
)("tot-alloc: %'llu in %'llu blocks (avg size %s)\n",
1109 api
->tot_bytes
, api
->tot_blocks
, bufA
);
1111 tl_assert(api
->tot_blocks
>= api
->max_blocks_live
);
1112 tl_assert(api
->tot_bytes
>= api
->max_bytes_live
);
1114 if (api
->deaths
> 0) {
1115 // Average Age at Death
1116 ULong aad
= api
->deaths
== 0
1117 ? 0 : (api
->death_ages_sum
/ api
->deaths
);
1118 // AAD as a fraction of the total program lifetime (so far)
1119 // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1120 // complete lifetime of the program.
1122 = g_guest_instrs_executed
== 0
1123 ? 0 : (10000ULL * aad
) / g_guest_instrs_executed
;
1124 HChar buf
[80]; // large enough
1125 show_N_div_100(buf
, aad_frac_10k
);
1126 VG_(umsg
)("deaths: %'llu, at avg age %'llu "
1127 "(%s%% of prog lifetime)\n",
1128 api
->deaths
, aad
, buf
);
1130 VG_(umsg
)("deaths: none (none of these blocks were freed)\n");
1133 HChar bufR
[80], bufW
[80]; // large enough
1134 VG_(memset
)(bufR
, 0, sizeof(bufR
));
1135 VG_(memset
)(bufW
, 0, sizeof(bufW
));
1136 if (api
->tot_bytes
> 0) {
1137 show_N_div_100(bufR
, (100ULL * api
->n_reads
) / api
->tot_bytes
);
1138 show_N_div_100(bufW
, (100ULL * api
->n_writes
) / api
->tot_bytes
);
1140 VG_(strcat
)(bufR
, "Inf");
1141 VG_(strcat
)(bufW
, "Inf");
1144 VG_(umsg
)("acc-ratios: %s rd, %s wr "
1145 " (%'llu b-read, %'llu b-written)\n",
1147 api
->n_reads
, api
->n_writes
);
1149 VG_(pp_ExeContext
)(api
->ap
);
1151 if (api
->histo
&& api
->xsize_tag
== Exactly
) {
1152 VG_(umsg
)("\nAggregated access counts by offset:\n");
1157 for (i
= 0; i
< api
->xsize
; i
++) {
1158 if (i
> 0 && (i
% 16) == 0 && i
!= api
->xsize
-1) {
1160 VG_(umsg
)("[%4lu] ", i
);
1162 VG_(umsg
)("%u ", api
->histo
[i
]);
1169 /* Metric-access functions for APInfos. */
1170 static ULong
get_metric__max_bytes_live ( APInfo
* api
) {
1171 return api
->max_bytes_live
;
1173 static ULong
get_metric__tot_bytes ( APInfo
* api
) {
1174 return api
->tot_bytes
;
1176 static ULong
get_metric__max_blocks_live ( APInfo
* api
) {
1177 return api
->max_blocks_live
;
1179 static ULong
get_metric__tot_blocks ( APInfo
* api
) {
1180 return api
->tot_blocks
;
1183 /* Given a string, return the metric-access function and also a Bool
1184 indicating whether we want increasing or decreasing values of the
1185 metric. This is used twice, once in command line processing, and
1186 then again in show_top_n_apinfos. Returns False if the given
1187 string could not be identified.*/
1188 static Bool
identify_metric ( /*OUT*/ULong(**get_metricP
)(APInfo
*),
1189 /*OUT*/Bool
* increasingP
,
1190 const HChar
* metric_name
)
1192 if (0 == VG_(strcmp
)(metric_name
, "max-bytes-live")) {
1193 *get_metricP
= get_metric__max_bytes_live
;
1194 *increasingP
= False
;
1197 if (0 == VG_(strcmp
)(metric_name
, "tot-bytes-allocd")) {
1198 *get_metricP
= get_metric__tot_bytes
;
1199 *increasingP
= False
;
1202 if (0 == VG_(strcmp
)(metric_name
, "max-blocks-live")) {
1203 *get_metricP
= get_metric__max_blocks_live
;
1204 *increasingP
= False
;
1207 if (0 == VG_(strcmp
)(metric_name
, "tot-blocks-allocd")) {
1208 *get_metricP
= get_metric__tot_blocks
;
1209 *increasingP
= False
;
1216 static void show_top_n_apinfos ( void )
1220 ULong (*get_metric
)(APInfo
*);
1223 const HChar
* metric_name
= clo_sort_by
;
1224 tl_assert(metric_name
); // ensured by clo processing
1226 Bool ok
= identify_metric( &get_metric
, &increasing
, metric_name
);
1227 tl_assert(ok
); // ensured by clo processing
1230 VG_(umsg
)("======== ORDERED BY %s \"%s\": "
1231 "top %d allocators ========\n",
1232 increasing
? "increasing" : "decreasing",
1233 metric_name
, clo_show_top_n
);
1235 // Clear all .shown bits
1236 VG_(initIterFM
)( apinfo
);
1237 while (VG_(nextIterFM
)( apinfo
, &keyW
, &valW
)) {
1238 APInfo
* api
= (APInfo
*)valW
;
1239 tl_assert(api
&& api
->ap
== (ExeContext
*)keyW
);
1242 VG_(doneIterFM
)( apinfo
);
1244 // Now print the top N entries. Each one requires a
1245 // complete scan of the set. Duh.
1246 for (i
= 0; i
< clo_show_top_n
; i
++) {
1247 ULong best_metric
= increasing
? ~0ULL : 0ULL;
1248 APInfo
* best_api
= NULL
;
1250 VG_(initIterFM
)( apinfo
);
1251 while (VG_(nextIterFM
)( apinfo
, &keyW
, &valW
)) {
1252 APInfo
* api
= (APInfo
*)valW
;
1255 ULong metric
= get_metric(api
);
1256 if (increasing
? (metric
< best_metric
) : (metric
> best_metric
)) {
1257 best_metric
= metric
;
1261 VG_(doneIterFM
)( apinfo
);
1264 break; // all APIs have been shown. Stop.
1267 VG_(umsg
)("-------------------- %d of %d --------------------\n",
1268 i
+1, clo_show_top_n
);
1269 show_APInfo(best_api
);
1270 best_api
->shown
= True
;
1277 static void dh_fini(Int exit_status
)
1279 // Before printing statistics, we must harvest access counts for
1280 // all the blocks that are still alive. Not doing so gives
1281 // access ratios which are too low (zero, in the worst case)
1282 // for such blocks, since the accesses that do get made will
1283 // (if we skip this step) not get folded into the AP summaries.
1285 VG_(initIterFM
)( interval_tree
);
1286 while (VG_(nextIterFM
)( interval_tree
, &keyW
, &valW
)) {
1287 Block
* bk
= (Block
*)keyW
;
1288 tl_assert(valW
== 0);
1290 retire_Block(bk
, False
/*!because_freed*/);
1292 VG_(doneIterFM
)( interval_tree
);
1295 VG_(umsg
)("======== SUMMARY STATISTICS ========\n");
1297 VG_(umsg
)("guest_insns: %'llu\n", g_guest_instrs_executed
);
1299 VG_(umsg
)("max_live: %'llu in %'llu blocks\n",
1300 g_max_bytes_live
, g_max_blocks_live
);
1302 VG_(umsg
)("tot_alloc: %'llu in %'llu blocks\n",
1303 g_tot_bytes
, g_tot_blocks
);
1305 if (g_tot_bytes
> 0) {
1306 VG_(umsg
)("insns per allocated byte: %'llu\n",
1307 g_guest_instrs_executed
/ g_tot_bytes
);
1311 show_top_n_apinfos();
1315 VG_(umsg
)("==============================================================\n");
1317 VG_(umsg
)("Some hints: (see --help for command line option details):\n");
1319 VG_(umsg
)("* summary stats for whole program are at the top of this output\n");
1321 VG_(umsg
)("* --show-top-n= controls how many alloc points are shown.\n");
1322 VG_(umsg
)(" You probably want to set it much higher than\n");
1323 VG_(umsg
)(" the default value (10)\n");
1325 VG_(umsg
)("* --sort-by= specifies the sort key for output.\n");
1326 VG_(umsg
)(" See --help for details.\n");
1328 VG_(umsg
)("* Each allocation stack, by default 12 frames, counts as\n");
1329 VG_(umsg
)(" a separate alloc point. This causes the data to be spread out\n");
1330 VG_(umsg
)(" over far too many alloc points. I strongly suggest using\n");
1331 VG_(umsg
)(" --num-callers=4 or some such, to reduce the spreading.\n");
1334 if (VG_(clo_stats
)) {
1335 VG_(dmsg
)(" dhat: find_Block_containing:\n");
1336 VG_(dmsg
)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1337 stats__n_fBc_cached
+ stats__n_fBc_uncached
,
1338 stats__n_fBc_cached
,
1339 stats__n_fBc_uncached
);
1340 VG_(dmsg
)(" notfound: %'lu\n", stats__n_fBc_notfound
);
1346 //------------------------------------------------------------//
1347 //--- Initialisation ---//
1348 //------------------------------------------------------------//
1350 static void dh_post_clo_init(void)
1354 static void dh_pre_clo_init(void)
1356 VG_(details_name
) ("DHAT");
1357 VG_(details_version
) (NULL
);
1358 VG_(details_description
) ("a dynamic heap analysis tool");
1359 VG_(details_copyright_author
)(
1360 "Copyright (C) 2010-2017, and GNU GPL'd, by Mozilla Inc");
1361 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
1364 VG_(basic_tool_funcs
) (dh_post_clo_init
,
1369 VG_(needs_libc_freeres
)();
1370 VG_(needs_cxx_freeres
)();
1371 VG_(needs_command_line_options
)(dh_process_cmd_line_option
,
1373 dh_print_debug_usage
);
1374 //zz VG_(needs_client_requests) (dh_handle_client_request);
1375 //zz VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1376 //zz dh_expensive_sanity_check);
1377 VG_(needs_malloc_replacement
) (dh_malloc
,
1379 dh___builtin_vec_new
,
1383 dh___builtin_delete
,
1384 dh___builtin_vec_delete
,
1386 dh_malloc_usable_size
,
1389 VG_(track_pre_mem_read
) ( dh_handle_noninsn_read
);
1390 //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1391 VG_(track_post_mem_write
) ( dh_handle_noninsn_write
);
1393 tl_assert(!interval_tree
);
1394 tl_assert(!fbc_cache0
);
1395 tl_assert(!fbc_cache1
);
1397 interval_tree
= VG_(newFM
)( VG_(malloc
),
1398 "dh.main.interval_tree.1",
1400 interval_tree_Cmp
);
1402 apinfo
= VG_(newFM
)( VG_(malloc
),
1405 NULL
/*unboxedcmp*/ );
1408 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init
)
1410 //--------------------------------------------------------------------//
1411 //--- end dh_main.c ---//
1412 //--------------------------------------------------------------------//