2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2016-2017 Philippe Waroquiers
12 This code generalises the XTree idea that was implemented in
13 the massif tool in Valgrind versions <= 3.12, which is
14 Copyright (C) 2005-2017 Nicholas Nethercote
17 The XTree implementation in this file is however implemented completely
18 differently. Some code has been re-used for the production of the
19 massif file header (e.g. FP_cmd function).
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of the
24 License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public License
32 along with this program; if not, write to the Free Software
33 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
36 The GNU General Public License is contained in the file COPYING.
39 #include "pub_core_basics.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_clientstate.h"
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_execontext.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcfile.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_hashtable.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_debuginfo.h"
53 #include "pub_core_deduppoolalloc.h"
54 #include "pub_core_xtree.h" /* self */
56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \
57 VG_(dmsg)(__VA_ARGS__) \
60 /* Defines the relevant part of an ec. This is shared between an xt
61 and its snapshots (see XT_shared XArray of xec). */
65 UShort top
; // The first ips of ec to take into account.
66 UShort n_ips_sel
; // The nr of ips from top to take into account.
70 /* XT_shared maintains the information shared between an XT and all
74 UWord nrRef
; /* nr of XTrees referencing this shared memory. */
76 Alloc_Fn_t alloc_fn
; /* alloc fn (nofail) */
77 const HChar
* cc
; /* cost centre for alloc */
78 Free_Fn_t free_fn
; /* free fn */
80 /* The data associated to each ec is stored in 2 arrays:
81 an xec array, shared between an xt and all its snapshots.
82 a data array, private to each XTree.
83 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
84 xec and data arrays where the ec information is located (this
85 indirection is used to avoid huge xec and data arrays, in
86 case an XTree contains data only for a small number of ec.
87 The offset in the xec and data array is used as xtree ec unique
90 UInt d4ecu2xecu_sz
; /* size of d4ecu2xecu (in nr of elements). */
93 /* ec information common to an xt and its snapshots. */
94 XArray
* xec
; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
96 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
98 XArray
* ips_order_xecu
;
101 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec
102 (with the index ecu/4). */
103 #define NO_OFFSET 0xffffffff
105 static XT_shared
* new_XT_shared (Alloc_Fn_t alloc_fn
,
107 void (*free_fn
)(void*))
114 shared
= alloc_fn(cc
, sizeof(*shared
));
116 shared
->alloc_fn
= alloc_fn
;
118 shared
->free_fn
= free_fn
;
120 shared
->d4ecu2xecu_sz
= 0;
121 shared
->d4ecu2xecu
= NULL
;
122 shared
->xec
= VG_(newXA
)(alloc_fn
, cc
, free_fn
, sizeof(xec
));
123 shared
->ips_order_xecu
= NULL
; // Allocated when needed.
128 static void delete_XT_shared (XT_shared
* shared
)
130 vg_assert(shared
->nrRef
== 0);
131 shared
->free_fn(shared
->d4ecu2xecu
);
132 VG_(deleteXA
)(shared
->xec
);
133 if (shared
->ips_order_xecu
!= NULL
)
134 VG_(deleteXA
)(shared
->ips_order_xecu
);
135 shared
->free_fn(shared
);
138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
139 In case stack traces are of different length, an 'absent' ips is
140 considered smaller than any other address. */
141 static XArray
* xec_data_for_sort
; // Needed to translate an xecu into an xec
142 static Int
ips_order_cmp(const void* vleft
, const void* vright
)
144 const Xecu left_xecu
= *(const Xecu
*)vleft
;
145 const Xecu right_xecu
= *(const Xecu
*)vright
;
146 const xec
* left
= VG_(indexXA
)(xec_data_for_sort
, left_xecu
);
147 const xec
* right
= VG_(indexXA
)(xec_data_for_sort
, right_xecu
);
148 const StackTrace left_ips
= VG_(get_ExeContext_StackTrace
)(left
->ec
)
150 const StackTrace right_ips
= VG_(get_ExeContext_StackTrace
)(right
->ec
)
154 const UInt c_n_ips_sel
= left
->n_ips_sel
< right
->n_ips_sel
155 ? left
->n_ips_sel
: right
->n_ips_sel
;
157 // First see if we have a difference on the common nr of ips selected
158 for (i
= 0; i
< c_n_ips_sel
; i
++) {
159 if (left_ips
[i
] == right_ips
[i
]) continue;
160 if (left_ips
[i
] < right_ips
[i
]) return -1;
163 // Common part is equal => compare lengths.
164 if (left
->n_ips_sel
< right
->n_ips_sel
) return -1;
165 if (left
->n_ips_sel
> right
->n_ips_sel
) return 1;
169 // If needed, build or refresh shared->ips_order_xecu
170 static void ensure_ips_order_xecu_valid(XT_shared
* shared
)
175 if (shared
->ips_order_xecu
== NULL
) {
176 shared
->ips_order_xecu
= VG_(newXA
)(shared
->alloc_fn
, shared
->cc
,
177 shared
->free_fn
, sizeof(Xecu
));
178 VG_(hintSizeXA
)(shared
->ips_order_xecu
, VG_(sizeXA
)(shared
->xec
));
179 VG_(setCmpFnXA
)(shared
->ips_order_xecu
, ips_order_cmp
);
182 if (VG_(sizeXA
)(shared
->xec
) == VG_(sizeXA
)(shared
->ips_order_xecu
))
185 n_xecu
= VG_(sizeXA
)(shared
->xec
);
186 for (i
= VG_(sizeXA
)(shared
->ips_order_xecu
); i
< n_xecu
; i
++)
187 VG_(addToXA
)(shared
->ips_order_xecu
, &i
);
189 xec_data_for_sort
= shared
->xec
;
190 VG_(sortXA
)(shared
->ips_order_xecu
);
193 static void addRef_XT_shared (XT_shared
* shared
)
198 static UWord
release_XT_shared(XT_shared
* shared
)
202 vg_assert(shared
->nrRef
> 0);
203 nrRef
= --shared
->nrRef
;
205 delete_XT_shared(shared
);
211 Alloc_Fn_t alloc_fn
; /* alloc fn (nofail) */
212 const HChar
* cc
; /* cost centre for alloc */
213 Free_Fn_t free_fn
; /* free fn */
214 Word dataSzB
; /* data size in bytes */
215 XT_init_data_t init_data_fn
;
216 XT_add_data_t add_data_fn
;
217 XT_sub_data_t sub_data_fn
;
218 XT_filter_IPs_t filter_IPs_fn
;
222 HChar
* tmp_data
; /* temporary buffer, to insert new elements. */
223 XArray
* data
; /* of elements of size dataSzB */
227 XTree
* VG_(XT_create
) ( Alloc_Fn_t alloc_fn
,
231 XT_init_data_t init_data_fn
,
232 XT_add_data_t add_data_fn
,
233 XT_sub_data_t sub_data_fn
,
234 XT_filter_IPs_t filter_IPs_fn
)
238 /* check user-supplied info .. */
241 vg_assert(dataSzB
>= 0);
242 vg_assert(init_data_fn
);
243 vg_assert(add_data_fn
);
244 vg_assert(sub_data_fn
);
246 xt
= alloc_fn(cc
, sizeof(struct _XTree
) );
247 xt
->alloc_fn
= alloc_fn
;
249 xt
->free_fn
= free_fn
;
250 xt
->dataSzB
= dataSzB
;
251 xt
->init_data_fn
= init_data_fn
;
252 xt
->add_data_fn
= add_data_fn
;
253 xt
->sub_data_fn
= sub_data_fn
;
254 xt
->filter_IPs_fn
= filter_IPs_fn
;
256 xt
->shared
= new_XT_shared(alloc_fn
, cc
, free_fn
);
257 addRef_XT_shared(xt
->shared
);
258 xt
->tmp_data
= alloc_fn(cc
, xt
->dataSzB
);
259 xt
->data
= VG_(newXA
)(alloc_fn
, cc
, free_fn
, dataSzB
);
264 XTree
* VG_(XT_snapshot
)(XTree
* xt
)
270 nxt
= xt
->alloc_fn(xt
->cc
, sizeof(struct _XTree
) );
273 addRef_XT_shared(nxt
->shared
);
274 nxt
->tmp_data
= nxt
->alloc_fn(nxt
->cc
, nxt
->dataSzB
);
275 nxt
->data
= VG_(cloneXA
)(nxt
->cc
, xt
->data
);
280 void VG_(XT_delete
) ( XTree
* xt
)
284 release_XT_shared(xt
->shared
);
285 xt
->free_fn(xt
->tmp_data
);
286 VG_(deleteXA
)(xt
->data
);
290 static Xecu
find_or_insert (XTree
* xt
, ExeContext
* ec
)
293 const UInt d4ecu
= VG_(get_ECU_from_ExeContext
)(ec
) / 4;
294 XT_shared
* shared
= xt
->shared
;
296 /* First grow the d4ecu2xecu array if needed. */
297 if (d4ecu
>= shared
->d4ecu2xecu_sz
) {
298 UInt old_sz
= shared
->d4ecu2xecu_sz
;
299 UInt new_sz
= (3 * d4ecu
) / 2;
303 shared
->d4ecu2xecu
= VG_(realloc
)(xt
->cc
, shared
->d4ecu2xecu
,
304 new_sz
* sizeof(UInt
));
305 shared
->d4ecu2xecu_sz
= new_sz
;
306 for (UInt i
= old_sz
; i
< new_sz
; i
++)
307 shared
->d4ecu2xecu
[i
] = NO_OFFSET
;
310 if (shared
->d4ecu2xecu
[d4ecu
] == NO_OFFSET
) {
314 if (xt
->filter_IPs_fn
== NULL
) {
316 xe
.n_ips_sel
= (UShort
)VG_(get_ExeContext_n_ips
)(xe
.ec
);
319 UInt n_ips_sel
= VG_(get_ExeContext_n_ips
)(xe
.ec
);
320 xt
->filter_IPs_fn(VG_(get_ExeContext_StackTrace
)(xe
.ec
), n_ips_sel
,
322 xe
.top
= (UShort
)top
;
323 xe
.n_ips_sel
= (UShort
)n_ips_sel
;
325 xt
->init_data_fn(xt
->tmp_data
);
326 VG_(addToXA
)(shared
->xec
, &xe
);
327 shared
->d4ecu2xecu
[d4ecu
] = (UInt
)VG_(addToXA
)(xt
->data
, xt
->tmp_data
);
330 return shared
->d4ecu2xecu
[d4ecu
];
333 Xecu
VG_(XT_add_to_ec
) (XTree
* xt
, ExeContext
* ec
, const void* value
)
335 Xecu xecu
= find_or_insert(xt
, ec
);
336 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
338 xt
->add_data_fn(data
, value
);
342 Xecu
VG_(XT_sub_from_ec
) (XTree
* xt
, ExeContext
* ec
, const void* value
)
344 Xecu xecu
= find_or_insert(xt
, ec
);
345 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
347 xt
->sub_data_fn(data
, value
);
351 void VG_(XT_add_to_xecu
) (XTree
* xt
, Xecu xecu
, const void* value
)
353 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
354 xt
->add_data_fn(data
, value
);
357 void VG_(XT_sub_from_xecu
) (XTree
* xt
, Xecu xecu
, const void* value
)
359 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
360 xt
->sub_data_fn(data
, value
);
363 UInt
VG_(XT_n_ips_sel
) (XTree
* xt
, Xecu xecu
)
365 xec
* xe
= (xec
*)VG_(indexXA
)(xt
->shared
->xec
, xecu
);
366 return (UInt
)xe
->n_ips_sel
;
369 ExeContext
* VG_(XT_get_ec_from_xecu
) (XTree
* xt
, Xecu xecu
)
371 xec
* xe
= (xec
*)VG_(indexXA
)(xt
->shared
->xec
, xecu
);
375 static VgFile
* xt_open (const HChar
* outfilename
)
379 fp
= VG_(fopen
)(outfilename
, VKI_O_CREAT
|VKI_O_WRONLY
|VKI_O_TRUNC
,
380 VKI_S_IRUSR
|VKI_S_IWUSR
|VKI_S_IRGRP
|VKI_S_IROTH
);
382 VG_(message
)(Vg_UserMsg
,
383 "Error: can not open xtree output file `%s'\n",
389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
391 // Print "cmd:" line.
392 static void FP_cmd(VgFile
* fp
)
397 FP("%s", VG_(args_the_exename
));
398 for (i
= 0; i
< VG_(sizeXA
)(VG_(args_for_client
)); i
++) {
399 HChar
* arg
= * (HChar
**) VG_(indexXA
)(VG_(args_for_client
), i
);
405 /* ----------- Callgrind output ------------------------------------------- */
407 /* Output a callgrind format element in compressed format:
408 "name=(pos)" or "name=(pos) value" (if value_new)
409 or not compressed format: "name=value"
410 VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
411 name is the format element (e.g. fl, fn, cfi, cfn, ...).
412 pos is the value dictionary position, used for compressed format.
413 value_new is True if this is the first usage of value. */
414 static void FP_pos_str(VgFile
* fp
, const HChar
* name
, UInt pos
,
415 const HChar
* value
, Bool value_new
)
417 if (!VG_(clo_xtree_compress_strings
))
418 FP("%s=%s\n", name
, value
);
420 FP("%s=(%d) %s\n", name
, pos
, value
);
422 FP("%s=(%d)\n", name
, pos
);
425 void VG_(XT_callgrind_print
)
427 const HChar
* outfilename
,
429 const HChar
* (*img_value
)(const void* value
))
432 XT_shared
* shared
= xt
->shared
;
433 VgFile
* fp
= xt_open(outfilename
);
434 DedupPoolAlloc
* fnname_ddpa
;
435 DedupPoolAlloc
* filename_ddpa
;
436 HChar
* filename_buf
= NULL
;
437 UInt filename_buf_size
= 0;
438 const HChar
* filename_dir
;
439 const HChar
* filename_name
;
444 fnname_ddpa
= VG_(newDedupPA
)(16000, 1, xt
->alloc_fn
,
445 "XT_callgrind_print.fn", xt
->free_fn
);
446 filename_ddpa
= VG_(newDedupPA
)(16000, 1, xt
->alloc_fn
,
447 "XT_callgrind_print.fl", xt
->free_fn
);
449 FP("# callgrind format\n");
451 FP("creator: xtree-1\n");
452 FP("pid: %d\n", VG_(getpid
)());
455 /* Currently, we only need/support line positions. */
456 FP("\npositions:%s\n", " line");
458 /* Produce one "event:" line for each event, and the "events:" line. */
460 HChar strtok_events
[VG_(strlen
)(events
)+1];
465 VG_(strcpy
)(strtok_events
, events
);
466 for (e
= VG_(strtok_r
)(strtok_events
, ",", &ssaveptr
);
468 e
= VG_(strtok_r
)(NULL
, ",", &ssaveptr
))
469 FP("event: %s\n", e
);
471 VG_(strcpy
)(strtok_events
, events
);
472 for (e
= VG_(strtok_r
)(strtok_events
, ",", &ssaveptr
);
474 e
= VG_(strtok_r
)(NULL
, ",", &ssaveptr
)) {
485 xt
->init_data_fn(xt
->tmp_data
); // to compute totals
487 n_xecu
= VG_(sizeXA
)(xt
->data
);
488 vg_assert (n_xecu
<= VG_(sizeXA
)(shared
->xec
));
489 for (Xecu xecu
= 0; xecu
< n_xecu
; xecu
++) {
490 xec
* xe
= (xec
*)VG_(indexXA
)(shared
->xec
, xecu
);
491 if (xe
->n_ips_sel
== 0)
494 const HChar
* img
= img_value(VG_(indexXA
)(xt
->data
, xecu
));
496 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
497 // in the variables called_filename/called_linenum/called_fnname.
498 // The booleans called_filename_new/called_fnname_new are set to True
499 // the first time the called_filename/called_fnname are encountered.
500 // The called_filename_nr/called_fnname_nr are numbers identifying
501 // the strings called_filename/called_fnname.
502 #define CALLED_FLF(n) \
504 || !VG_(get_filename_linenum)(ep, ips[(n)], \
507 &called_linenum)) { \
508 filename_name = "UnknownFile???"; \
509 called_linenum = 0; \
512 || !VG_(get_fnname)(ep, ips[(n)], &called_fnname)) { \
513 called_fnname = "UnknownFn???"; \
516 UInt needed_size = VG_(strlen)(filename_dir) + 1 \
517 + VG_(strlen)(filename_name) + 1; \
518 if (filename_buf_size < needed_size) { \
519 filename_buf_size = needed_size; \
520 filename_buf = VG_(realloc)(xt->cc, filename_buf, \
521 filename_buf_size); \
524 VG_(strcpy)(filename_buf, filename_dir); \
525 if (filename_buf[0] != '\0') { \
526 VG_(strcat)(filename_buf, "/"); \
528 VG_(strcat)(filename_buf, filename_name); \
529 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \
531 &called_filename_new); \
532 called_filename = filename_buf; \
533 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \
537 /* Instead of unknown fnname ???, CALLED_FLF could use instead:
538 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
539 but that creates a lot of (useless) nodes at least for
540 valgrind self-hosting. */
543 const HChar
* called_filename
;
544 UInt called_filename_nr
;
545 Bool called_filename_new
; // True the first time we see this filename.
546 const HChar
* called_fnname
;
547 UInt called_fnname_nr
;
548 Bool called_fnname_new
; // True the first time we see this fnname.
552 const Addr
* ips
= VG_(get_ExeContext_StackTrace
)(xe
->ec
) + xe
->top
;
553 const DiEpoch ep
= VG_(get_ExeContext_epoch
)(xe
->ec
);
555 Int ips_idx
= xe
->n_ips_sel
- 1;
558 VG_(printf
)("entry img %s\n", img
);
559 VG_(pp_ExeContext
)(xe
->ec
);
562 xt
->add_data_fn(xt
->tmp_data
, VG_(indexXA
)(xt
->data
, xecu
));
567 FP_pos_str(fp
, "fl", called_filename_nr
,
568 called_filename
, called_filename_new
);
569 FP_pos_str(fp
, "fn", called_fnname_nr
,
570 called_fnname
, called_fnname_new
);
572 FP("%d %s\n", called_linenum
, img
);
574 FP("%d\n", called_linenum
); //no self cost.
575 prev_linenum
= called_linenum
;
577 CALLED_FLF(ips_idx
-1);
578 FP_pos_str(fp
, "cfi", called_filename_nr
,
579 called_filename
, called_filename_new
);
580 FP_pos_str(fp
, "cfn", called_fnname_nr
,
581 called_fnname
, called_fnname_new
);
582 called_filename_new
= False
;
583 called_fnname_new
= False
;
584 /* Giving a call count of 0 allows kcachegrind to hide the calls
585 column. A call count of 1 means kcachegrind would show in the
586 calls column the nr of stacktrace containing this arc, which
587 is very confusing. So, the less bad is to give a 0 call
589 FP("calls=0 %d\n", called_linenum
);
590 FP("%d %s\n", prev_linenum
, img
);
596 /* callgrind format is not really fully supporting (yet?) execution trees:
597 in an execution tree, self and inclusive costs are identical, and
598 cannot be added together.
599 If no totals: line is given, callgrind_annotate calculates the addition
600 of all costs, and so gives a wrong totals.
601 Giving a totals: line solves this, but the user must give the option
602 --inclusive=yes (kind of hack) to have all the functions given
603 in the output file. */
604 FP("totals: %s\n", img_value(xt
->tmp_data
));
606 VG_(deleteDedupPA
)(fnname_ddpa
);
607 VG_(deleteDedupPA
)(filename_ddpa
);
608 VG_(free
)(filename_buf
);
612 /* ----------- Massif output ---------------------------------------------- */
614 /* For Massif output, some functions from the execontext are not output, a.o.
615 the allocation functions at the top of the stack and the functions below
616 main. So, the StackTrace of the execontexts in the xtree must be filtered.
617 Ms_Ec defines the subset of the stacktrace relevant for the report. */
620 StackTrace ips
; // ips and n_ips provides the subset of the xtree ec
621 UInt n_ips
; // to use for a massif report.
623 SizeT report_value
; // The value to report for this stack trace.
626 /* Ms_Group defines (at a certain depth) a group of ec context that
627 have the same IPs at the given depth, and have the same 'parent'.
628 total is the sum of the values of all group elements.
629 A Ms_Group can also represent a set of ec contexts that do not
630 have the same IP, but that have each a total which is below the
631 significant size. Such a group has a NULL ms_ec, a zero group_io.
632 n_ec is the nr of insignificant ec that have been collected inside this
633 insignificant group, and total is the sum of all non significant ec
634 at the given depth. */
637 Ms_Ec
* ms_ec
; // The group consists in ms_ec[0 .. n_ec-1]
643 /* Compare 2 groups by total, to have bigger total first. */
644 static Int
ms_group_revcmp_total(const void* vleft
, const void* vright
)
646 const Ms_Group
* left
= (const Ms_Group
*)vleft
;
647 const Ms_Group
* right
= (const Ms_Group
*)vright
;
649 // First reverse compare total
650 if (left
->total
> right
->total
) return -1;
651 if (left
->total
< right
->total
) return 1;
653 /* Equal size => compare IPs.
654 This (somewhat?) helps to have deterministic test results.
655 If this would change between platforms, then we should compare
656 function names/filename/linenr */
657 if (left
->group_ip
< right
->group_ip
) return -1;
658 if (left
->group_ip
> right
->group_ip
) return 1;
662 /* Scan the addresses in ms_ec at the given depth.
664 *groups points to an array of Ms_Group sorted by total.
665 *n_groups is the nr of groups
666 The caller is responsible to free the allocated group array. */
667 static void ms_make_groups (UInt depth
, Ms_Ec
* ms_ec
, UInt n_ec
, SizeT sig_sz
,
668 UInt
* n_groups
, Ms_Group
** groups
)
671 Addr cur_group_ip
= 0;
675 /* Handle special case somewhat more efficiently */
681 /* Compute how many groups we have. */
682 for (i
= 0; i
< n_ec
; i
++) {
683 if (ms_ec
[i
].n_ips
> depth
684 && (*n_groups
== 0 || cur_group_ip
!= ms_ec
[i
].ips
[depth
])) {
686 cur_group_ip
= ms_ec
[i
].ips
[depth
];
690 /* make the group array. */
691 *groups
= VG_(malloc
)("ms_make_groups", *n_groups
* sizeof(Ms_Group
));
693 for (g
= 0; g
< *n_groups
; g
++) {
694 while (ms_ec
[i
].n_ips
<= depth
)
696 cur_group_ip
= ms_ec
[i
].ips
[depth
];
697 (*groups
)[g
].group_ip
= cur_group_ip
;
698 (*groups
)[g
].ms_ec
= &ms_ec
[i
];
699 (*groups
)[g
].n_ec
= 1;
700 (*groups
)[g
].total
= ms_ec
[i
].report_value
;
703 && ms_ec
[i
].n_ips
> depth
704 && cur_group_ip
== ms_ec
[i
].ips
[depth
]) {
705 (*groups
)[g
].total
+= ms_ec
[i
].report_value
;
711 /* Search for insignificant groups, collect them all together
712 in the first insignificant group, and compact the group array. */
714 UInt insig1
; // Position of first insignificant group.
715 UInt n_insig
= 0; // Nr of insignificant groups found.
717 for (g
= 0; g
< *n_groups
; g
++) {
718 if ((*groups
)[g
].total
< sig_sz
) {
720 // First insig group => transform it into the special group
721 (*groups
)[g
].ms_ec
= NULL
;
722 (*groups
)[g
].group_ip
= 0;
723 (*groups
)[g
].n_ec
= 0;
724 // start the sum of insig total as total
727 // Add this insig group total into insig1 first group
728 (*groups
)[insig1
].total
+= (*groups
)[g
].total
;
733 (*groups
)[g
- n_insig
+ 1] = (*groups
)[g
];
737 (*groups
)[insig1
].n_ec
= n_insig
;
738 *n_groups
-= n_insig
- 1;
740 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth
, *n_groups
, n_insig
);
743 /* Sort on total size, bigger size first. */
744 VG_(ssort
)(*groups
, *n_groups
, sizeof(Ms_Group
), ms_group_revcmp_total
);
747 /* Output the given group (located in an xtree at the given depth).
748 indent tells by how much to indent the information output for the group.
749 indent can be bigger than depth when outputting a group that is made
750 of one or more inlined calls: all inlined calls are output with the
751 same depth but with one more indent for each inlined call. */
752 static void ms_output_group (VgFile
* fp
, UInt depth
, UInt indent
,
753 Ms_Group
* group
, SizeT sig_sz
,
754 double sig_pct_threshold
)
760 // If this is an insignificant group, handle it specially
761 if (group
->ms_ec
== NULL
) {
762 const HChar
* s
= ( 1 == group
->n_ec
? "," : "s, all" );
763 vg_assert(group
->group_ip
== 0);
764 FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
765 indent
+1, "", group
->total
, group
->n_ec
, s
, sig_pct_threshold
);
769 // Normal group => output the group and its subgroups.
770 ms_make_groups(depth
+1, group
->ms_ec
, group
->n_ec
, sig_sz
,
773 // FIXME JRS EPOCH 28 July 2017: HACK! Is this correct?
774 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
775 // // FIXME PW EPOCH : No, the above is not correct.
776 // Xtree Massif output regroups execontext in the layout of a 'tree'.
777 // So, possibly, the same IP address value can be in 2 different ec, but
778 // the epoch to symbolise this address must be retrieved from the ec it
780 // So, to fix this, it is not enough to make a group based on identical
781 // IP addr value, one must also find the di used to symbolise this address,
782 // A group will then be defined as 'same IP and same di'.
783 // Fix not trivial to do, so for the moment, --keep-debuginfo=yes will
784 // have no impact on xtree massif output.
786 Addr cur_ip
= group
->ms_ec
->ips
[depth
];
788 InlIPCursor
*iipc
= VG_(new_IIPC
)(cur_ep
, cur_ip
);
791 const HChar
* buf
= VG_(describe_IP
)(cur_ep
, cur_ip
, iipc
);
792 Bool is_inlined
= VG_(next_IIPC
)(iipc
);
794 FP("%*s" "n%u: %ld %s\n",
796 is_inlined
? 1 : n_groups
, // Inlined frames always have one child.
807 VG_(delete_IIPC
)(iipc
);
809 /* Output sub groups of this group. */
810 for (i
= 0; i
< n_groups
; i
++)
811 ms_output_group(fp
, depth
+1, indent
+1, &groups
[i
], sig_sz
,
817 /* Allocate and build an array of Ms_Ec sorted by addresses in the
819 static void prepare_ms_ec (XTree
* xt
,
820 ULong (*report_value
)(const void* value
),
821 ULong
* top_total
, Ms_Ec
** vms_ec
, UInt
* vn_ec
)
823 XT_shared
* shared
= xt
->shared
;
824 const UInt n_xecu
= VG_(sizeXA
)(shared
->xec
);
825 const UInt n_data_xecu
= VG_(sizeXA
)(xt
->data
);
826 Ms_Ec
* ms_ec
= VG_(malloc
)("XT_massif_print.ms_ec", n_xecu
* sizeof(Ms_Ec
));
827 UInt n_xecu_sel
= 0; // Nr of xecu that are selected for output.
829 vg_assert(n_data_xecu
<= n_xecu
);
831 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
832 ensure_ips_order_xecu_valid(shared
);
835 DMSG(1, "iteration %u\n", n_xecu
);
836 for (UInt i
= 0; i
< n_xecu
; i
++) {
837 Xecu xecu
= *(Xecu
*)VG_(indexXA
)(shared
->ips_order_xecu
, i
);
838 xec
* xe
= (xec
*)VG_(indexXA
)(shared
->xec
, xecu
);
840 if (xecu
>= n_data_xecu
)
841 continue; // No data for this xecu in xt->data.
842 ms_ec
[n_xecu_sel
].n_ips
= xe
->n_ips_sel
;
843 if (ms_ec
[n_xecu_sel
].n_ips
== 0)
846 ms_ec
[n_xecu_sel
].ips
= VG_(get_ExeContext_StackTrace
)(xe
->ec
) + xe
->top
;
847 ms_ec
[n_xecu_sel
].report_value
848 = (*report_value
)(VG_(indexXA
)(xt
->data
, xecu
));
849 *top_total
+= ms_ec
[n_xecu_sel
].report_value
;
853 vg_assert(n_xecu_sel
<= n_xecu
);
859 MsFile
* VG_(XT_massif_open
)
860 (const HChar
* outfilename
,
862 const XArray
* desc_args
,
863 const HChar
* time_unit
)
866 VgFile
* fp
= xt_open(outfilename
);
869 return NULL
; // xt_open reported the error.
871 /* ------ file header ------------------------------- */
877 for (i
= 0; i
< VG_(sizeXA
)(desc_args
); i
++) {
878 HChar
* arg
= *(HChar
**)VG_(indexXA
)(desc_args
, i
);
882 if (0 == i
&& desc
== NULL
) FP(" (none)");
887 FP("time_unit: %s\n", time_unit
);
892 void VG_(XT_massif_close
)(MsFile
* fp
)
895 return; // Error should have been reported by VG_(XT_massif_open)
900 void VG_(XT_massif_print
)
903 const Massif_Header
* header
,
904 ULong (*report_value
)(const void* value
))
909 return; // Normally VG_(XT_massif_open) already reported an error.
911 /* Compute/prepare Snapshot totals/data/... */
914 /* Following variables only used for detailed snapshot. */
918 header
->detailed
? (header
->peak
? "peak" : "detailed") : "empty";
920 DMSG(1, "XT_massif_print %s\n", kind
);
921 if (header
->detailed
) {
922 /* Prepare the Ms_Ec sorted array of stacktraces and the groups
924 prepare_ms_ec(xt
, report_value
, &top_total
, &ms_ec
, &n_ec
);
925 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec
);
926 } else if (xt
== NULL
) {
927 /* Non detailed, no xt => use the sz provided in the header. */
928 top_total
= header
->sz_B
;
930 /* For non detailed snapshot, compute total directly from the xec. */
931 const XT_shared
* shared
= xt
->shared
;
932 const UInt n_xecu
= VG_(sizeXA
)(xt
->data
);
935 for (UInt xecu
= 0; xecu
< n_xecu
; xecu
++) {
936 xec
* xe
= (xec
*)VG_(indexXA
)(shared
->xec
, xecu
);
937 if (xe
->n_ips_sel
== 0)
939 top_total
+= (*report_value
)(VG_(indexXA
)(xt
->data
, xecu
));
943 /* ------ snapshot header --------------------------- */
944 FP("#-----------\n");
945 FP("snapshot=%d\n", header
->snapshot_n
);
946 FP("#-----------\n");
947 FP("time=%lld\n", header
->time
);
949 FP("mem_heap_B=%llu\n", top_total
); // without extra_B and without stacks_B
950 FP("mem_heap_extra_B=%llu\n", header
->extra_B
);
951 FP("mem_stacks_B=%llu\n", header
->stacks_B
);
952 FP("heap_tree=%s\n", kind
);
954 /* ------ detailed snapshot data ----------------------------- */
955 if (header
->detailed
) {
960 // Work out how big a child must be to be significant. If the current
961 // top_total is zero, then we set it to 1, which means everything will be
962 // judged insignificant -- this is sensible, as there's no point showing
963 // any detail for this case. Unless they used threshold=0, in which
964 // case we show them everything because that's what they asked for.
966 // Nb: We do this once now, rather than once per child, because if we do
967 // that the cost of all the divisions adds up to something significant.
968 if (0 == top_total
&& 0 != header
->sig_threshold
)
971 sig_sz
= ((top_total
+ header
->extra_B
+ header
->stacks_B
)
972 * header
->sig_threshold
) / 100;
974 /* Produce the groups at depth 0 */
975 DMSG(1, "XT_massif_print producing depth 0 groups\n");
976 ms_make_groups(0, ms_ec
, n_ec
, sig_sz
, &n_groups
, &groups
);
978 /* Output the top node. */
979 FP("n%u: %llu %s\n", n_groups
, top_total
, header
->top_node_desc
);
981 /* Output depth 0 groups. */
982 DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups
);
983 for (i
= 0; i
< n_groups
; i
++)
984 ms_output_group(fp
, 0, 0, &groups
[i
], sig_sz
, header
->sig_threshold
);
991 Int
VG_(XT_offset_main_or_below_main
)(DiEpoch ep
, Addr
* ips
, Int n_ips
)
993 /* Search for main or below main function.
994 To limit the nr of ips to examine, we maintain the deepest
995 offset where main was found, and we first search main
997 If no main is found, we will then do a search for main or
998 below main function till the top. */
999 static Int deepest_main
= 0;
1000 Vg_FnNameKind kind
= Vg_FnNameNormal
;
1001 Int mbm
= n_ips
- 1; // Position of deepest main or below main.
1002 Vg_FnNameKind mbmkind
= Vg_FnNameNormal
;
1005 for (i
= n_ips
- 1 - deepest_main
;
1008 mbmkind
= VG_(get_fnname_kind_from_IP
)(ep
, ips
[i
]);
1009 if (mbmkind
!= Vg_FnNameNormal
) {
1015 /* Search for main or below main function till top. */
1017 i
>= 0 && mbmkind
!= Vg_FnNameMain
;
1019 kind
= VG_(get_fnname_kind_from_IP
)(ep
, ips
[i
]);
1020 if (kind
!= Vg_FnNameNormal
) {
1025 if (Vg_FnNameMain
== mbmkind
|| Vg_FnNameBelowMain
== mbmkind
) {
1026 if (mbmkind
== Vg_FnNameMain
&& (n_ips
- 1 - mbm
) > deepest_main
)
1027 deepest_main
= n_ips
- 1 - mbm
;
1033 void VG_(XT_filter_1top_and_maybe_below_main
)
1034 (Addr
* ips
, Int n_ips
,
1035 UInt
* top
, UInt
* n_ips_sel
)
1045 /* Filter top function. */
1048 if (VG_(clo_show_below_main
))
1051 // FIXME PW EPOCH : use the real ips epoch
1052 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
1053 mbm
= VG_(XT_offset_main_or_below_main
)(cur_ep
, ips
, n_ips
);
1056 *n_ips_sel
= mbm
- *top
+ 1;
1059 void VG_(XT_filter_maybe_below_main
)
1060 (Addr
* ips
, Int n_ips
,
1061 UInt
* top
, UInt
* n_ips_sel
)
1070 if (VG_(clo_show_below_main
))
1073 // FIXME PW EPOCH : use the real ips epoch
1074 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
1075 mbm
= VG_(XT_offset_main_or_below_main
)(cur_ep
, ips
, n_ips
);
1077 *n_ips_sel
= mbm
- *top
+ 1;
1080 /*--------------------------------------------------------------------*/
1081 /*--- end m_xtree.c ---*/
1082 /*--------------------------------------------------------------------*/