2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2016-2017 Philippe Waroquiers
12 This code generalises the XTree idea that was implemented in
13 the massif tool in Valgrind versions <= 3.12, which is
14 Copyright (C) 2005-2017 Nicholas Nethercote
17 The XTree implementation in this file is however implemented completely
18 differently. Some code has been re-used for the production of the
19 massif file header (e.g. FP_cmd function).
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of the
24 License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public License
32 along with this program; if not, see <http://www.gnu.org/licenses/>.
34 The GNU General Public License is contained in the file COPYING.
37 #include "pub_core_basics.h"
38 #include "pub_core_debuglog.h"
39 #include "pub_core_clientstate.h"
40 #include "pub_core_stacktrace.h"
41 #include "pub_core_execontext.h"
42 #include "pub_core_libcbase.h"
43 #include "pub_core_libcassert.h"
44 #include "pub_core_libcfile.h"
45 #include "pub_core_libcprint.h"
46 #include "pub_core_libcproc.h"
47 #include "pub_core_hashtable.h"
48 #include "pub_core_mallocfree.h"
49 #include "pub_core_options.h"
50 #include "pub_core_debuginfo.h"
51 #include "pub_core_deduppoolalloc.h"
52 #include "pub_core_xtree.h" /* self */
54 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \
55 VG_(dmsg)(__VA_ARGS__) \
58 /* Defines the relevant part of an ec. This is shared between an xt
59 and its snapshots (see XT_shared XArray of xec). */
63 UShort top
; // The first ips of ec to take into account.
64 UShort n_ips_sel
; // The nr of ips from top to take into account.
68 /* XT_shared maintains the information shared between an XT and all
72 UWord nrRef
; /* nr of XTrees referencing this shared memory. */
74 Alloc_Fn_t alloc_fn
; /* alloc fn (nofail) */
75 const HChar
* cc
; /* cost centre for alloc */
76 Free_Fn_t free_fn
; /* free fn */
78 /* The data associated to each ec is stored in 2 arrays:
79 an xec array, shared between an xt and all its snapshots.
80 a data array, private to each XTree.
81 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
82 xec and data arrays where the ec information is located (this
83 indirection is used to avoid huge xec and data arrays, in
84 case an XTree contains data only for a small number of ec.
85 The offset in the xec and data array is used as xtree ec unique
88 UInt d4ecu2xecu_sz
; /* size of d4ecu2xecu (in nr of elements). */
91 /* ec information common to an xt and its snapshots. */
92 XArray
* xec
; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
94 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
96 XArray
* ips_order_xecu
;
99 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec
100 (with the index ecu/4). */
101 #define NO_OFFSET 0xffffffff
103 static XT_shared
* new_XT_shared (Alloc_Fn_t alloc_fn
,
105 void (*free_fn
)(void*))
112 shared
= alloc_fn(cc
, sizeof(*shared
));
114 shared
->alloc_fn
= alloc_fn
;
116 shared
->free_fn
= free_fn
;
118 shared
->d4ecu2xecu_sz
= 0;
119 shared
->d4ecu2xecu
= NULL
;
120 shared
->xec
= VG_(newXA
)(alloc_fn
, cc
, free_fn
, sizeof(xec
));
121 shared
->ips_order_xecu
= NULL
; // Allocated when needed.
126 static void delete_XT_shared (XT_shared
* shared
)
128 vg_assert(shared
->nrRef
== 0);
129 shared
->free_fn(shared
->d4ecu2xecu
);
130 VG_(deleteXA
)(shared
->xec
);
131 if (shared
->ips_order_xecu
!= NULL
)
132 VG_(deleteXA
)(shared
->ips_order_xecu
);
133 shared
->free_fn(shared
);
136 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
137 In case stack traces are of different length, an 'absent' ips is
138 considered smaller than any other address. */
139 static XArray
* xec_data_for_sort
; // Needed to translate an xecu into an xec
140 static Int
ips_order_cmp(const void* vleft
, const void* vright
)
142 const Xecu left_xecu
= *(const Xecu
*)vleft
;
143 const Xecu right_xecu
= *(const Xecu
*)vright
;
144 const xec
* left
= VG_(indexXA
)(xec_data_for_sort
, left_xecu
);
145 const xec
* right
= VG_(indexXA
)(xec_data_for_sort
, right_xecu
);
146 const StackTrace left_ips
= VG_(get_ExeContext_StackTrace
)(left
->ec
)
148 const StackTrace right_ips
= VG_(get_ExeContext_StackTrace
)(right
->ec
)
152 const UInt c_n_ips_sel
= left
->n_ips_sel
< right
->n_ips_sel
153 ? left
->n_ips_sel
: right
->n_ips_sel
;
155 // First see if we have a difference on the common nr of ips selected
156 for (i
= 0; i
< c_n_ips_sel
; i
++) {
157 if (left_ips
[i
] == right_ips
[i
]) continue;
158 if (left_ips
[i
] < right_ips
[i
]) return -1;
161 // Common part is equal => compare lengths.
162 if (left
->n_ips_sel
< right
->n_ips_sel
) return -1;
163 if (left
->n_ips_sel
> right
->n_ips_sel
) return 1;
167 // If needed, build or refresh shared->ips_order_xecu
168 static void ensure_ips_order_xecu_valid(XT_shared
* shared
)
173 if (shared
->ips_order_xecu
== NULL
) {
174 shared
->ips_order_xecu
= VG_(newXA
)(shared
->alloc_fn
, shared
->cc
,
175 shared
->free_fn
, sizeof(Xecu
));
176 VG_(hintSizeXA
)(shared
->ips_order_xecu
, VG_(sizeXA
)(shared
->xec
));
177 VG_(setCmpFnXA
)(shared
->ips_order_xecu
, ips_order_cmp
);
180 if (VG_(sizeXA
)(shared
->xec
) == VG_(sizeXA
)(shared
->ips_order_xecu
))
183 n_xecu
= VG_(sizeXA
)(shared
->xec
);
184 for (i
= VG_(sizeXA
)(shared
->ips_order_xecu
); i
< n_xecu
; i
++)
185 VG_(addToXA
)(shared
->ips_order_xecu
, &i
);
187 xec_data_for_sort
= shared
->xec
;
188 VG_(sortXA
)(shared
->ips_order_xecu
);
191 static void addRef_XT_shared (XT_shared
* shared
)
196 static UWord
release_XT_shared(XT_shared
* shared
)
200 vg_assert(shared
->nrRef
> 0);
201 nrRef
= --shared
->nrRef
;
203 delete_XT_shared(shared
);
209 Alloc_Fn_t alloc_fn
; /* alloc fn (nofail) */
210 const HChar
* cc
; /* cost centre for alloc */
211 Free_Fn_t free_fn
; /* free fn */
212 Word dataSzB
; /* data size in bytes */
213 XT_init_data_t init_data_fn
;
214 XT_add_data_t add_data_fn
;
215 XT_sub_data_t sub_data_fn
;
216 XT_filter_IPs_t filter_IPs_fn
;
220 HChar
* tmp_data
; /* temporary buffer, to insert new elements. */
221 XArray
* data
; /* of elements of size dataSzB */
225 XTree
* VG_(XT_create
) ( Alloc_Fn_t alloc_fn
,
229 XT_init_data_t init_data_fn
,
230 XT_add_data_t add_data_fn
,
231 XT_sub_data_t sub_data_fn
,
232 XT_filter_IPs_t filter_IPs_fn
)
236 /* check user-supplied info .. */
239 vg_assert(dataSzB
>= 0);
240 vg_assert(init_data_fn
);
241 vg_assert(add_data_fn
);
242 vg_assert(sub_data_fn
);
244 xt
= alloc_fn(cc
, sizeof(struct _XTree
) );
245 xt
->alloc_fn
= alloc_fn
;
247 xt
->free_fn
= free_fn
;
248 xt
->dataSzB
= dataSzB
;
249 xt
->init_data_fn
= init_data_fn
;
250 xt
->add_data_fn
= add_data_fn
;
251 xt
->sub_data_fn
= sub_data_fn
;
252 xt
->filter_IPs_fn
= filter_IPs_fn
;
254 xt
->shared
= new_XT_shared(alloc_fn
, cc
, free_fn
);
255 addRef_XT_shared(xt
->shared
);
256 xt
->tmp_data
= alloc_fn(cc
, xt
->dataSzB
);
257 xt
->data
= VG_(newXA
)(alloc_fn
, cc
, free_fn
, dataSzB
);
262 XTree
* VG_(XT_snapshot
)(XTree
* xt
)
268 nxt
= xt
->alloc_fn(xt
->cc
, sizeof(struct _XTree
) );
271 addRef_XT_shared(nxt
->shared
);
272 nxt
->tmp_data
= nxt
->alloc_fn(nxt
->cc
, nxt
->dataSzB
);
273 nxt
->data
= VG_(cloneXA
)(nxt
->cc
, xt
->data
);
278 void VG_(XT_delete
) ( XTree
* xt
)
282 release_XT_shared(xt
->shared
);
283 xt
->free_fn(xt
->tmp_data
);
284 VG_(deleteXA
)(xt
->data
);
288 static Xecu
find_or_insert (XTree
* xt
, ExeContext
* ec
)
291 const UInt d4ecu
= VG_(get_ECU_from_ExeContext
)(ec
) / 4;
292 XT_shared
* shared
= xt
->shared
;
294 /* First grow the d4ecu2xecu array if needed. */
295 if (d4ecu
>= shared
->d4ecu2xecu_sz
) {
296 UInt old_sz
= shared
->d4ecu2xecu_sz
;
297 UInt new_sz
= (3 * d4ecu
) / 2;
301 shared
->d4ecu2xecu
= VG_(realloc
)(xt
->cc
, shared
->d4ecu2xecu
,
302 new_sz
* sizeof(UInt
));
303 shared
->d4ecu2xecu_sz
= new_sz
;
304 for (UInt i
= old_sz
; i
< new_sz
; i
++)
305 shared
->d4ecu2xecu
[i
] = NO_OFFSET
;
308 if (shared
->d4ecu2xecu
[d4ecu
] == NO_OFFSET
) {
312 if (xt
->filter_IPs_fn
== NULL
) {
314 xe
.n_ips_sel
= (UShort
)VG_(get_ExeContext_n_ips
)(xe
.ec
);
317 UInt n_ips_sel
= VG_(get_ExeContext_n_ips
)(xe
.ec
);
318 xt
->filter_IPs_fn(VG_(get_ExeContext_StackTrace
)(xe
.ec
), n_ips_sel
,
320 xe
.top
= (UShort
)top
;
321 xe
.n_ips_sel
= (UShort
)n_ips_sel
;
323 xt
->init_data_fn(xt
->tmp_data
);
324 VG_(addToXA
)(shared
->xec
, &xe
);
325 shared
->d4ecu2xecu
[d4ecu
] = (UInt
)VG_(addToXA
)(xt
->data
, xt
->tmp_data
);
328 return shared
->d4ecu2xecu
[d4ecu
];
331 Xecu
VG_(XT_add_to_ec
) (XTree
* xt
, ExeContext
* ec
, const void* value
)
333 Xecu xecu
= find_or_insert(xt
, ec
);
334 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
336 xt
->add_data_fn(data
, value
);
340 Xecu
VG_(XT_sub_from_ec
) (XTree
* xt
, ExeContext
* ec
, const void* value
)
342 Xecu xecu
= find_or_insert(xt
, ec
);
343 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
345 xt
->sub_data_fn(data
, value
);
349 void VG_(XT_add_to_xecu
) (XTree
* xt
, Xecu xecu
, const void* value
)
351 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
352 xt
->add_data_fn(data
, value
);
355 void VG_(XT_sub_from_xecu
) (XTree
* xt
, Xecu xecu
, const void* value
)
357 void* data
= VG_(indexXA
)(xt
->data
, xecu
);
358 xt
->sub_data_fn(data
, value
);
361 UInt
VG_(XT_n_ips_sel
) (XTree
* xt
, Xecu xecu
)
363 xec
* xe
= (xec
*)VG_(indexXA
)(xt
->shared
->xec
, xecu
);
364 return (UInt
)xe
->n_ips_sel
;
367 ExeContext
* VG_(XT_get_ec_from_xecu
) (XTree
* xt
, Xecu xecu
)
369 xec
* xe
= (xec
*)VG_(indexXA
)(xt
->shared
->xec
, xecu
);
373 static VgFile
* xt_open (const HChar
* outfilename
)
377 fp
= VG_(fopen
)(outfilename
, VKI_O_CREAT
|VKI_O_WRONLY
|VKI_O_TRUNC
,
378 VKI_S_IRUSR
|VKI_S_IWUSR
|VKI_S_IRGRP
|VKI_S_IROTH
);
380 VG_(message
)(Vg_UserMsg
,
381 "Error: can not open xtree output file `%s'\n",
387 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
389 // Print "cmd:" line.
390 static void FP_cmd(VgFile
* fp
)
395 FP("%s", VG_(args_the_exename
));
396 for (i
= 0; i
< VG_(sizeXA
)(VG_(args_for_client
)); i
++) {
397 HChar
* arg
= * (HChar
**) VG_(indexXA
)(VG_(args_for_client
), i
);
403 /* ----------- Callgrind output ------------------------------------------- */
405 /* Output a callgrind format element in compressed format:
406 "name=(pos)" or "name=(pos) value" (if value_new)
407 or not compressed format: "name=value"
408 VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
409 name is the format element (e.g. fl, fn, cfi, cfn, ...).
410 pos is the value dictionary position, used for compressed format.
411 value_new is True if this is the first usage of value. */
412 static void FP_pos_str(VgFile
* fp
, const HChar
* name
, UInt pos
,
413 const HChar
* value
, Bool value_new
)
415 if (!VG_(clo_xtree_compress_strings
))
416 FP("%s=%s\n", name
, value
);
418 FP("%s=(%u) %s\n", name
, pos
, value
);
420 FP("%s=(%u)\n", name
, pos
);
423 void VG_(XT_callgrind_print
)
425 const HChar
* outfilename
,
427 const HChar
* (*img_value
)(const void* value
))
430 XT_shared
* shared
= xt
->shared
;
431 VgFile
* fp
= xt_open(outfilename
);
432 DedupPoolAlloc
* fnname_ddpa
;
433 DedupPoolAlloc
* filename_ddpa
;
434 HChar
* filename_buf
= NULL
;
435 UInt filename_buf_size
= 0;
436 const HChar
* filename_dir
;
437 const HChar
* filename_name
;
442 fnname_ddpa
= VG_(newDedupPA
)(16000, 1, xt
->alloc_fn
,
443 "XT_callgrind_print.fn", xt
->free_fn
);
444 filename_ddpa
= VG_(newDedupPA
)(16000, 1, xt
->alloc_fn
,
445 "XT_callgrind_print.fl", xt
->free_fn
);
447 FP("# callgrind format\n");
449 FP("creator: xtree-1\n");
450 FP("pid: %d\n", VG_(getpid
)());
453 /* Currently, we only need/support line positions. */
454 FP("\npositions:%s\n", " line");
456 /* Produce one "event:" line for each event, and the "events:" line. */
458 HChar strtok_events
[VG_(strlen
)(events
)+1];
463 VG_(strcpy
)(strtok_events
, events
);
464 for (e
= VG_(strtok_r
)(strtok_events
, ",", &ssaveptr
);
466 e
= VG_(strtok_r
)(NULL
, ",", &ssaveptr
))
467 FP("event: %s\n", e
);
469 VG_(strcpy
)(strtok_events
, events
);
470 for (e
= VG_(strtok_r
)(strtok_events
, ",", &ssaveptr
);
472 e
= VG_(strtok_r
)(NULL
, ",", &ssaveptr
)) {
483 xt
->init_data_fn(xt
->tmp_data
); // to compute totals
485 n_xecu
= VG_(sizeXA
)(xt
->data
);
486 vg_assert (n_xecu
<= VG_(sizeXA
)(shared
->xec
));
487 for (Xecu xecu
= 0; xecu
< n_xecu
; xecu
++) {
488 xec
* xe
= (xec
*)VG_(indexXA
)(shared
->xec
, xecu
);
489 if (xe
->n_ips_sel
== 0)
492 const HChar
* img
= img_value(VG_(indexXA
)(xt
->data
, xecu
));
494 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
495 // in the variables called_filename/called_linenum/called_fnname.
496 // The booleans called_filename_new/called_fnname_new are set to True
497 // the first time the called_filename/called_fnname are encountered.
498 // The called_filename_nr/called_fnname_nr are numbers identifying
499 // the strings called_filename/called_fnname.
500 #define CALLED_FLF(n) \
502 || !VG_(get_filename_linenum)(ep, ips[(n)], \
505 &called_linenum)) { \
506 filename_name = "UnknownFile???"; \
507 called_linenum = 0; \
510 || !VG_(get_fnname)(ep, ips[(n)], &called_fnname)) { \
511 called_fnname = "UnknownFn???"; \
514 UInt needed_size = VG_(strlen)(filename_dir) + 1 \
515 + VG_(strlen)(filename_name) + 1; \
516 if (filename_buf_size < needed_size) { \
517 filename_buf_size = needed_size; \
518 filename_buf = VG_(realloc)(xt->cc, filename_buf, \
519 filename_buf_size); \
522 VG_(strcpy)(filename_buf, filename_dir); \
523 if (filename_buf[0] != '\0') { \
524 VG_(strcat)(filename_buf, "/"); \
526 VG_(strcat)(filename_buf, filename_name); \
527 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \
529 &called_filename_new); \
530 called_filename = filename_buf; \
531 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \
535 /* Instead of unknown fnname ???, CALLED_FLF could use instead:
536 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
537 but that creates a lot of (useless) nodes at least for
538 valgrind self-hosting. */
541 const HChar
* called_filename
;
542 UInt called_filename_nr
;
543 Bool called_filename_new
; // True the first time we see this filename.
544 const HChar
* called_fnname
;
545 UInt called_fnname_nr
;
546 Bool called_fnname_new
; // True the first time we see this fnname.
550 const Addr
* ips
= VG_(get_ExeContext_StackTrace
)(xe
->ec
) + xe
->top
;
551 const DiEpoch ep
= VG_(get_ExeContext_epoch
)(xe
->ec
);
553 Int ips_idx
= xe
->n_ips_sel
- 1;
556 VG_(printf
)("entry img %s\n", img
);
557 VG_(pp_ExeContext
)(xe
->ec
);
560 xt
->add_data_fn(xt
->tmp_data
, VG_(indexXA
)(xt
->data
, xecu
));
565 FP_pos_str(fp
, "fl", called_filename_nr
,
566 called_filename
, called_filename_new
);
567 FP_pos_str(fp
, "fn", called_fnname_nr
,
568 called_fnname
, called_fnname_new
);
570 FP("%u %s\n", called_linenum
, img
);
572 FP("%u\n", called_linenum
); //no self cost.
573 prev_linenum
= called_linenum
;
575 CALLED_FLF(ips_idx
-1);
576 FP_pos_str(fp
, "cfi", called_filename_nr
,
577 called_filename
, called_filename_new
);
578 FP_pos_str(fp
, "cfn", called_fnname_nr
,
579 called_fnname
, called_fnname_new
);
580 called_filename_new
= False
;
581 called_fnname_new
= False
;
582 /* Giving a call count of 0 allows kcachegrind to hide the calls
583 column. A call count of 1 means kcachegrind would show in the
584 calls column the nr of stacktrace containing this arc, which
585 is very confusing. So, the less bad is to give a 0 call
587 FP("calls=0 %u\n", called_linenum
);
588 FP("%u %s\n", prev_linenum
, img
);
594 /* callgrind format is not really fully supporting (yet?) execution trees:
595 in an execution tree, self and inclusive costs are identical, and
596 cannot be added together.
597 If no totals: line is given, callgrind_annotate calculates the addition
598 of all costs, and so gives a wrong totals.
599 Giving a totals: line solves this, but the user must give the option
600 --inclusive=yes (kind of hack) to have all the functions given
601 in the output file. */
602 FP("totals: %s\n", img_value(xt
->tmp_data
));
604 VG_(deleteDedupPA
)(fnname_ddpa
);
605 VG_(deleteDedupPA
)(filename_ddpa
);
606 VG_(free
)(filename_buf
);
610 /* ----------- Massif output ---------------------------------------------- */
612 /* For Massif output, some functions from the execontext are not output, a.o.
613 the allocation functions at the top of the stack and the functions below
614 main. So, the StackTrace of the execontexts in the xtree must be filtered.
615 Ms_Ec defines the subset of the stacktrace relevant for the report. */
618 StackTrace ips
; // ips and n_ips provides the subset of the xtree ec
619 UInt n_ips
; // to use for a massif report.
621 SizeT report_value
; // The value to report for this stack trace.
624 /* Ms_Group defines (at a certain depth) a group of ec context that
625 have the same IPs at the given depth, and have the same 'parent'.
626 total is the sum of the values of all group elements.
627 A Ms_Group can also represent a set of ec contexts that do not
628 have the same IP, but that have each a total which is below the
629 significant size. Such a group has a NULL ms_ec, a zero group_io.
630 n_ec is the nr of insignificant ec that have been collected inside this
631 insignificant group, and total is the sum of all non significant ec
632 at the given depth. */
635 Ms_Ec
* ms_ec
; // The group consists in ms_ec[0 .. n_ec-1]
641 /* Compare 2 groups by total, to have bigger total first. */
642 static Int
ms_group_revcmp_total(const void* vleft
, const void* vright
)
644 const Ms_Group
* left
= (const Ms_Group
*)vleft
;
645 const Ms_Group
* right
= (const Ms_Group
*)vright
;
647 // First reverse compare total
648 if (left
->total
> right
->total
) return -1;
649 if (left
->total
< right
->total
) return 1;
651 /* Equal size => compare IPs.
652 This (somewhat?) helps to have deterministic test results.
653 If this would change between platforms, then we should compare
654 function names/filename/linenr */
655 if (left
->group_ip
< right
->group_ip
) return -1;
656 if (left
->group_ip
> right
->group_ip
) return 1;
660 /* Scan the addresses in ms_ec at the given depth.
662 *groups points to an array of Ms_Group sorted by total.
663 *n_groups is the nr of groups
664 The caller is responsible to free the allocated group array. */
665 static void ms_make_groups (UInt depth
, Ms_Ec
* ms_ec
, UInt n_ec
, SizeT sig_sz
,
666 UInt
* n_groups
, Ms_Group
** groups
)
669 Addr cur_group_ip
= 0;
673 /* Handle special case somewhat more efficiently */
679 /* Compute how many groups we have. */
680 for (i
= 0; i
< n_ec
; i
++) {
681 if (ms_ec
[i
].n_ips
> depth
682 && (*n_groups
== 0 || cur_group_ip
!= ms_ec
[i
].ips
[depth
])) {
684 cur_group_ip
= ms_ec
[i
].ips
[depth
];
688 /* make the group array. */
689 *groups
= VG_(malloc
)("ms_make_groups", *n_groups
* sizeof(Ms_Group
));
691 for (g
= 0; g
< *n_groups
; g
++) {
692 while (ms_ec
[i
].n_ips
<= depth
)
694 cur_group_ip
= ms_ec
[i
].ips
[depth
];
695 (*groups
)[g
].group_ip
= cur_group_ip
;
696 (*groups
)[g
].ms_ec
= &ms_ec
[i
];
697 (*groups
)[g
].n_ec
= 1;
698 (*groups
)[g
].total
= ms_ec
[i
].report_value
;
701 && ms_ec
[i
].n_ips
> depth
702 && cur_group_ip
== ms_ec
[i
].ips
[depth
]) {
703 (*groups
)[g
].total
+= ms_ec
[i
].report_value
;
709 /* Search for insignificant groups, collect them all together
710 in the first insignificant group, and compact the group array. */
712 UInt insig1
; // Position of first insignificant group.
713 UInt n_insig
= 0; // Nr of insignificant groups found.
715 for (g
= 0; g
< *n_groups
; g
++) {
716 if ((*groups
)[g
].total
< sig_sz
) {
718 // First insig group => transform it into the special group
719 (*groups
)[g
].ms_ec
= NULL
;
720 (*groups
)[g
].group_ip
= 0;
721 (*groups
)[g
].n_ec
= 0;
722 // start the sum of insig total as total
725 // Add this insig group total into insig1 first group
726 (*groups
)[insig1
].total
+= (*groups
)[g
].total
;
731 (*groups
)[g
- n_insig
+ 1] = (*groups
)[g
];
735 (*groups
)[insig1
].n_ec
= n_insig
;
736 *n_groups
-= n_insig
- 1;
738 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth
, *n_groups
, n_insig
);
741 /* Sort on total size, bigger size first. */
742 VG_(ssort
)(*groups
, *n_groups
, sizeof(Ms_Group
), ms_group_revcmp_total
);
745 /* Output the given group (located in an xtree at the given depth).
746 indent tells by how much to indent the information output for the group.
747 indent can be bigger than depth when outputting a group that is made
748 of one or more inlined calls: all inlined calls are output with the
749 same depth but with one more indent for each inlined call. */
750 static void ms_output_group (VgFile
* fp
, UInt depth
, UInt indent
,
751 Ms_Group
* group
, SizeT sig_sz
,
752 double sig_pct_threshold
)
758 // If this is an insignificant group, handle it specially
759 if (group
->ms_ec
== NULL
) {
760 const HChar
* s
= ( 1 == group
->n_ec
? "," : "s, all" );
761 vg_assert(group
->group_ip
== 0);
762 FP("%*sn0: %lu in %u place%s below massif's threshold (%.2f%%)\n",
763 (Int
)(indent
+1), "", group
->total
, group
->n_ec
, s
, sig_pct_threshold
);
767 // Normal group => output the group and its subgroups.
768 ms_make_groups(depth
+1, group
->ms_ec
, group
->n_ec
, sig_sz
,
771 // FIXME JRS EPOCH 28 July 2017: HACK! Is this correct?
772 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
773 // // FIXME PW EPOCH : No, the above is not correct.
774 // Xtree Massif output regroups execontext in the layout of a 'tree'.
775 // So, possibly, the same IP address value can be in 2 different ec, but
776 // the epoch to symbolise this address must be retrieved from the ec it
778 // So, to fix this, it is not enough to make a group based on identical
779 // IP addr value, one must also find the di used to symbolise this address,
780 // A group will then be defined as 'same IP and same di'.
781 // Fix not trivial to do, so for the moment, --keep-debuginfo=yes will
782 // have no impact on xtree massif output.
784 Addr cur_ip
= group
->ms_ec
->ips
[depth
];
786 InlIPCursor
*iipc
= VG_(new_IIPC
)(cur_ep
, cur_ip
);
789 const HChar
* buf
= VG_(describe_IP
)(cur_ep
, cur_ip
, iipc
);
790 Bool is_inlined
= VG_(next_IIPC
)(iipc
);
792 FP("%*s" "n%u: %lu %s\n",
793 (Int
)(indent
+ 1), "",
794 is_inlined
? 1 : n_groups
, // Inlined frames always have one child.
805 VG_(delete_IIPC
)(iipc
);
807 /* Output sub groups of this group. */
808 for (i
= 0; i
< n_groups
; i
++)
809 ms_output_group(fp
, depth
+1, indent
+1, &groups
[i
], sig_sz
,
815 /* Allocate and build an array of Ms_Ec sorted by addresses in the
817 static void prepare_ms_ec (XTree
* xt
,
818 ULong (*report_value
)(const void* value
),
819 ULong
* top_total
, Ms_Ec
** vms_ec
, UInt
* vn_ec
)
821 XT_shared
* shared
= xt
->shared
;
822 const UInt n_xecu
= VG_(sizeXA
)(shared
->xec
);
823 const UInt n_data_xecu
= VG_(sizeXA
)(xt
->data
);
824 Ms_Ec
* ms_ec
= VG_(malloc
)("XT_massif_print.ms_ec", n_xecu
* sizeof(Ms_Ec
));
825 UInt n_xecu_sel
= 0; // Nr of xecu that are selected for output.
827 vg_assert(n_data_xecu
<= n_xecu
);
829 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
830 ensure_ips_order_xecu_valid(shared
);
833 DMSG(1, "iteration %u\n", n_xecu
);
834 for (UInt i
= 0; i
< n_xecu
; i
++) {
835 Xecu xecu
= *(Xecu
*)VG_(indexXA
)(shared
->ips_order_xecu
, i
);
836 xec
* xe
= (xec
*)VG_(indexXA
)(shared
->xec
, xecu
);
838 if (xecu
>= n_data_xecu
)
839 continue; // No data for this xecu in xt->data.
840 ms_ec
[n_xecu_sel
].n_ips
= xe
->n_ips_sel
;
841 if (ms_ec
[n_xecu_sel
].n_ips
== 0)
844 ms_ec
[n_xecu_sel
].ips
= VG_(get_ExeContext_StackTrace
)(xe
->ec
) + xe
->top
;
845 ms_ec
[n_xecu_sel
].report_value
846 = (*report_value
)(VG_(indexXA
)(xt
->data
, xecu
));
847 *top_total
+= ms_ec
[n_xecu_sel
].report_value
;
851 vg_assert(n_xecu_sel
<= n_xecu
);
857 MsFile
* VG_(XT_massif_open
)
858 (const HChar
* outfilename
,
860 const XArray
* desc_args
,
861 const HChar
* time_unit
)
864 VgFile
* fp
= xt_open(outfilename
);
867 return NULL
; // xt_open reported the error.
869 /* ------ file header ------------------------------- */
875 for (i
= 0; i
< VG_(sizeXA
)(desc_args
); i
++) {
876 HChar
* arg
= *(HChar
**)VG_(indexXA
)(desc_args
, i
);
880 if (0 == i
&& desc
== NULL
) FP(" (none)");
885 FP("time_unit: %s\n", time_unit
);
890 void VG_(XT_massif_close
)(MsFile
* fp
)
893 return; // Error should have been reported by VG_(XT_massif_open)
898 void VG_(XT_massif_print
)
901 const Massif_Header
* header
,
902 ULong (*report_value
)(const void* value
))
907 return; // Normally VG_(XT_massif_open) already reported an error.
909 /* Compute/prepare Snapshot totals/data/... */
912 /* Following variables only used for detailed snapshot. */
916 header
->detailed
? (header
->peak
? "peak" : "detailed") : "empty";
918 DMSG(1, "XT_massif_print %s\n", kind
);
919 if (header
->detailed
) {
920 /* Prepare the Ms_Ec sorted array of stacktraces and the groups
922 prepare_ms_ec(xt
, report_value
, &top_total
, &ms_ec
, &n_ec
);
923 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec
);
924 } else if (xt
== NULL
) {
925 /* Non detailed, no xt => use the sz provided in the header. */
926 top_total
= header
->sz_B
;
928 /* For non detailed snapshot, compute total directly from the xec. */
929 const XT_shared
* shared
= xt
->shared
;
930 const UInt n_xecu
= VG_(sizeXA
)(xt
->data
);
933 for (UInt xecu
= 0; xecu
< n_xecu
; xecu
++) {
934 xec
* xe
= (xec
*)VG_(indexXA
)(shared
->xec
, xecu
);
935 if (xe
->n_ips_sel
== 0)
937 top_total
+= (*report_value
)(VG_(indexXA
)(xt
->data
, xecu
));
941 /* ------ snapshot header --------------------------- */
942 FP("#-----------\n");
943 FP("snapshot=%d\n", header
->snapshot_n
);
944 FP("#-----------\n");
945 FP("time=%lld\n", header
->time
);
947 FP("mem_heap_B=%llu\n", top_total
); // without extra_B and without stacks_B
948 FP("mem_heap_extra_B=%llu\n", header
->extra_B
);
949 FP("mem_stacks_B=%llu\n", header
->stacks_B
);
950 FP("heap_tree=%s\n", kind
);
952 /* ------ detailed snapshot data ----------------------------- */
953 if (header
->detailed
) {
958 // Work out how big a child must be to be significant. If the current
959 // top_total is zero, then we set it to 1, which means everything will be
960 // judged insignificant -- this is sensible, as there's no point showing
961 // any detail for this case. Unless they used threshold=0, in which
962 // case we show them everything because that's what they asked for.
964 // Nb: We do this once now, rather than once per child, because if we do
965 // that the cost of all the divisions adds up to something significant.
966 if (0 == top_total
&& 0 != header
->sig_threshold
)
969 sig_sz
= ((top_total
+ header
->extra_B
+ header
->stacks_B
)
970 * header
->sig_threshold
) / 100;
972 /* Produce the groups at depth 0 */
973 DMSG(1, "XT_massif_print producing depth 0 groups\n");
974 ms_make_groups(0, ms_ec
, n_ec
, sig_sz
, &n_groups
, &groups
);
976 /* Output the top node. */
977 FP("n%u: %llu %s\n", n_groups
, top_total
, header
->top_node_desc
);
979 /* Output depth 0 groups. */
980 DMSG(1, "XT_massif_print outputting %u depth 0 groups\n", n_groups
);
981 for (i
= 0; i
< n_groups
; i
++)
982 ms_output_group(fp
, 0, 0, &groups
[i
], sig_sz
, header
->sig_threshold
);
989 Int
VG_(XT_offset_main_or_below_main
)(DiEpoch ep
, Addr
* ips
, Int n_ips
)
991 /* Search for main or below main function.
992 To limit the nr of ips to examine, we maintain the deepest
993 offset where main was found, and we first search main
995 If no main is found, we will then do a search for main or
996 below main function till the top. */
997 static Int deepest_main
= 0;
998 Vg_FnNameKind kind
= Vg_FnNameNormal
;
999 Int mbm
= n_ips
- 1; // Position of deepest main or below main.
1000 Vg_FnNameKind mbmkind
= Vg_FnNameNormal
;
1003 for (i
= n_ips
- 1 - deepest_main
;
1006 mbmkind
= VG_(get_fnname_kind_from_IP
)(ep
, ips
[i
]);
1007 if (mbmkind
!= Vg_FnNameNormal
) {
1013 /* Search for main or below main function till top. */
1015 i
>= 0 && mbmkind
!= Vg_FnNameMain
;
1017 kind
= VG_(get_fnname_kind_from_IP
)(ep
, ips
[i
]);
1018 if (kind
!= Vg_FnNameNormal
) {
1023 if (Vg_FnNameMain
== mbmkind
|| Vg_FnNameBelowMain
== mbmkind
) {
1024 if (mbmkind
== Vg_FnNameMain
&& (n_ips
- 1 - mbm
) > deepest_main
)
1025 deepest_main
= n_ips
- 1 - mbm
;
1031 void VG_(XT_filter_1top_and_maybe_below_main
)
1032 (Addr
* ips
, Int n_ips
,
1033 UInt
* top
, UInt
* n_ips_sel
)
1043 /* Filter top function. */
1046 if (VG_(clo_show_below_main
))
1049 // FIXME PW EPOCH : use the real ips epoch
1050 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
1051 mbm
= VG_(XT_offset_main_or_below_main
)(cur_ep
, ips
, n_ips
);
1054 *n_ips_sel
= mbm
- *top
+ 1;
1057 void VG_(XT_filter_maybe_below_main
)
1058 (Addr
* ips
, Int n_ips
,
1059 UInt
* top
, UInt
* n_ips_sel
)
1068 if (VG_(clo_show_below_main
))
1071 // FIXME PW EPOCH : use the real ips epoch
1072 const DiEpoch cur_ep
= VG_(current_DiEpoch
)();
1073 mbm
= VG_(XT_offset_main_or_below_main
)(cur_ep
, ips
, n_ips
);
1075 *n_ips_sel
= mbm
- *top
+ 1;
1078 /*--------------------------------------------------------------------*/
1079 /*--- end m_xtree.c ---*/
1080 /*--------------------------------------------------------------------*/