Add missing zstd.h to coregrind Makefile.am noinst_HEADERS
[valgrind.git] / coregrind / m_xtree.c
blob16635ebb62cf1bb063ac9adc700f82ab6b1611f6
2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2016-2017 Philippe Waroquiers
12 This code generalises the XTree idea that was implemented in
13 the massif tool in Valgrind versions <= 3.12, which is
14 Copyright (C) 2005-2017 Nicholas Nethercote
15 njn@valgrind.org
17 The XTree implementation in this file is however implemented completely
18 differently. Some code has been re-used for the production of the
19 massif file header (e.g. FP_cmd function).
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of the
24 License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public License
32 along with this program; if not, see <http://www.gnu.org/licenses/>.
34 The GNU General Public License is contained in the file COPYING.
37 #include "pub_core_basics.h"
38 #include "pub_core_debuglog.h"
39 #include "pub_core_clientstate.h"
40 #include "pub_core_stacktrace.h"
41 #include "pub_core_execontext.h"
42 #include "pub_core_libcbase.h"
43 #include "pub_core_libcassert.h"
44 #include "pub_core_libcfile.h"
45 #include "pub_core_libcprint.h"
46 #include "pub_core_libcproc.h"
47 #include "pub_core_hashtable.h"
48 #include "pub_core_mallocfree.h"
49 #include "pub_core_options.h"
50 #include "pub_core_debuginfo.h"
51 #include "pub_core_deduppoolalloc.h"
52 #include "pub_core_xtree.h" /* self */
54 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \
55 VG_(dmsg)(__VA_ARGS__) \
56 : 0)
58 /* Defines the relevant part of an ec. This is shared between an xt
59 and its snapshots (see XT_shared XArray of xec). */
60 typedef
61 struct _xec {
62 ExeContext* ec;
63 UShort top; // The first ips of ec to take into account.
64 UShort n_ips_sel; // The nr of ips from top to take into account.
66 xec;
68 /* XT_shared maintains the information shared between an XT and all
69 its snapshots. */
70 typedef
71 struct _XT_shared {
72 UWord nrRef; /* nr of XTrees referencing this shared memory. */
74 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
75 const HChar* cc; /* cost centre for alloc */
76 Free_Fn_t free_fn; /* free fn */
78 /* The data associated to each ec is stored in 2 arrays:
79 an xec array, shared between an xt and all its snapshots.
80 a data array, private to each XTree.
81 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
82 xec and data arrays where the ec information is located (this
83 indirection is used to avoid huge xec and data arrays, in
84 case an XTree contains data only for a small number of ec.
85 The offset in the xec and data array is used as xtree ec unique
86 id i.e. an xecu. */
88 UInt d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
89 UInt* d4ecu2xecu;
91 /* ec information common to an xt and its snapshots. */
92 XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
94 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
95 See ips_order_cmp. */
96 XArray* ips_order_xecu;
97 } XT_shared;
99 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec
100 (with the index ecu/4). */
101 #define NO_OFFSET 0xffffffff
103 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
104 const HChar* cc,
105 void (*free_fn)(void*))
107 XT_shared* shared;
109 vg_assert(alloc_fn);
110 vg_assert(cc);
111 vg_assert(free_fn);
112 shared = alloc_fn(cc, sizeof(*shared));
113 shared->nrRef = 0;
114 shared->alloc_fn = alloc_fn;
115 shared->cc = cc;
116 shared->free_fn = free_fn;
118 shared->d4ecu2xecu_sz = 0;
119 shared->d4ecu2xecu = NULL;
120 shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
121 shared->ips_order_xecu = NULL; // Allocated when needed.
123 return shared;
126 static void delete_XT_shared (XT_shared* shared)
128 vg_assert(shared->nrRef == 0);
129 shared->free_fn(shared->d4ecu2xecu);
130 VG_(deleteXA)(shared->xec);
131 if (shared->ips_order_xecu != NULL)
132 VG_(deleteXA)(shared->ips_order_xecu);
133 shared->free_fn(shared);
136 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
137 In case stack traces are of different length, an 'absent' ips is
138 considered smaller than any other address. */
139 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
140 static Int ips_order_cmp(const void* vleft, const void* vright)
142 const Xecu left_xecu = *(const Xecu*)vleft;
143 const Xecu right_xecu = *(const Xecu*)vright;
144 const xec* left = VG_(indexXA)(xec_data_for_sort, left_xecu);
145 const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
146 const StackTrace left_ips = VG_(get_ExeContext_StackTrace)(left->ec)
147 + left->top;
148 const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
149 + right->top;
150 UInt i;
152 const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
153 ? left->n_ips_sel : right->n_ips_sel;
155 // First see if we have a difference on the common nr of ips selected
156 for (i = 0; i < c_n_ips_sel; i++) {
157 if (left_ips[i] == right_ips[i]) continue;
158 if (left_ips[i] < right_ips[i]) return -1;
159 return 1;
161 // Common part is equal => compare lengths.
162 if (left->n_ips_sel < right->n_ips_sel) return -1;
163 if (left->n_ips_sel > right->n_ips_sel) return 1;
164 return 0;
167 // If needed, build or refresh shared->ips_order_xecu
168 static void ensure_ips_order_xecu_valid(XT_shared* shared)
170 UInt i;
171 UInt n_xecu;
173 if (shared->ips_order_xecu == NULL) {
174 shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
175 shared->free_fn, sizeof(Xecu));
176 VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
177 VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
180 if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
181 return;
183 n_xecu = VG_(sizeXA)(shared->xec);
184 for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
185 VG_(addToXA)(shared->ips_order_xecu, &i);
187 xec_data_for_sort = shared->xec;
188 VG_(sortXA)(shared->ips_order_xecu);
191 static void addRef_XT_shared (XT_shared* shared)
193 shared->nrRef++;
196 static UWord release_XT_shared(XT_shared* shared)
198 UWord nrRef;
200 vg_assert(shared->nrRef > 0);
201 nrRef = --shared->nrRef;
202 if (nrRef == 0)
203 delete_XT_shared(shared);
204 return nrRef;
208 struct _XTree {
209 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
210 const HChar* cc; /* cost centre for alloc */
211 Free_Fn_t free_fn; /* free fn */
212 Word dataSzB; /* data size in bytes */
213 XT_init_data_t init_data_fn;
214 XT_add_data_t add_data_fn;
215 XT_sub_data_t sub_data_fn;
216 XT_filter_IPs_t filter_IPs_fn;
218 XT_shared* shared;
220 HChar* tmp_data; /* temporary buffer, to insert new elements. */
221 XArray* data; /* of elements of size dataSzB */
225 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
226 const HChar* cc,
227 Free_Fn_t free_fn,
228 Word dataSzB,
229 XT_init_data_t init_data_fn,
230 XT_add_data_t add_data_fn,
231 XT_sub_data_t sub_data_fn,
232 XT_filter_IPs_t filter_IPs_fn)
234 XTree* xt;
236 /* check user-supplied info .. */
237 vg_assert(alloc_fn);
238 vg_assert(free_fn);
239 vg_assert(dataSzB >= 0);
240 vg_assert(init_data_fn);
241 vg_assert(add_data_fn);
242 vg_assert(sub_data_fn);
244 xt = alloc_fn(cc, sizeof(struct _XTree) );
245 xt->alloc_fn = alloc_fn;
246 xt->cc = cc;
247 xt->free_fn = free_fn;
248 xt->dataSzB = dataSzB;
249 xt->init_data_fn = init_data_fn;
250 xt->add_data_fn = add_data_fn;
251 xt->sub_data_fn = sub_data_fn;
252 xt->filter_IPs_fn = filter_IPs_fn;
254 xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
255 addRef_XT_shared(xt->shared);
256 xt->tmp_data = alloc_fn(cc, xt->dataSzB);
257 xt->data = VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
259 return xt;
262 XTree* VG_(XT_snapshot)(XTree* xt)
264 XTree* nxt;
266 vg_assert(xt);
268 nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
270 *nxt = *xt;
271 addRef_XT_shared(nxt->shared);
272 nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
273 nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
275 return nxt;
278 void VG_(XT_delete) ( XTree* xt )
280 vg_assert(xt);
282 release_XT_shared(xt->shared);
283 xt->free_fn(xt->tmp_data);
284 VG_(deleteXA)(xt->data);
285 xt->free_fn(xt);
288 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
291 const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
292 XT_shared* shared = xt->shared;
294 /* First grow the d4ecu2xecu array if needed. */
295 if (d4ecu >= shared->d4ecu2xecu_sz) {
296 UInt old_sz = shared->d4ecu2xecu_sz;
297 UInt new_sz = (3 * d4ecu) / 2;
299 if (new_sz < 1000)
300 new_sz = 1000;
301 shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
302 new_sz * sizeof(UInt));
303 shared->d4ecu2xecu_sz = new_sz;
304 for (UInt i = old_sz; i < new_sz; i++)
305 shared->d4ecu2xecu[i] = NO_OFFSET;
308 if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
309 xec xe;
311 xe.ec = ec;
312 if (xt->filter_IPs_fn == NULL) {
313 xe.top = 0;
314 xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
315 } else {
316 UInt top;
317 UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
318 xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
319 &top, &n_ips_sel);
320 xe.top = (UShort)top;
321 xe.n_ips_sel = (UShort)n_ips_sel;
323 xt->init_data_fn(xt->tmp_data);
324 VG_(addToXA)(shared->xec, &xe);
325 shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
328 return shared->d4ecu2xecu[d4ecu];
331 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
333 Xecu xecu = find_or_insert(xt, ec);
334 void* data = VG_(indexXA)(xt->data, xecu);
336 xt->add_data_fn(data, value);
337 return xecu;
340 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
342 Xecu xecu = find_or_insert(xt, ec);
343 void* data = VG_(indexXA)(xt->data, xecu);
345 xt->sub_data_fn(data, value);
346 return xecu;
349 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
351 void* data = VG_(indexXA)(xt->data, xecu);
352 xt->add_data_fn(data, value);
355 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
357 void* data = VG_(indexXA)(xt->data, xecu);
358 xt->sub_data_fn(data, value);
361 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
363 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
364 return (UInt)xe->n_ips_sel;
367 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
369 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
370 return xe->ec;
373 static VgFile* xt_open (const HChar* outfilename)
375 VgFile* fp;
377 fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
378 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IROTH);
379 if (fp == NULL) {
380 VG_(message)(Vg_UserMsg,
381 "Error: can not open xtree output file `%s'\n",
382 outfilename);
384 return fp;
387 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
389 // Print "cmd:" line.
390 static void FP_cmd(VgFile* fp)
392 UInt i;
394 FP("cmd: ");
395 FP("%s", VG_(args_the_exename));
396 for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
397 HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
398 FP(" %s", arg);
400 FP("\n");
403 /* ----------- Callgrind output ------------------------------------------- */
405 /* Output a callgrind format element in compressed format:
406 "name=(pos)" or "name=(pos) value" (if value_new)
407 or not compressed format: "name=value"
408 VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
409 name is the format element (e.g. fl, fn, cfi, cfn, ...).
410 pos is the value dictionary position, used for compressed format.
411 value_new is True if this is the first usage of value. */
412 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
413 const HChar* value, Bool value_new)
415 if (!VG_(clo_xtree_compress_strings))
416 FP("%s=%s\n", name, value);
417 else if (value_new)
418 FP("%s=(%u) %s\n", name, pos, value);
419 else
420 FP("%s=(%u)\n", name, pos);
423 void VG_(XT_callgrind_print)
424 (XTree* xt,
425 const HChar* outfilename,
426 const HChar* events,
427 const HChar* (*img_value)(const void* value))
429 UInt n_xecu;
430 XT_shared* shared = xt->shared;
431 VgFile* fp = xt_open(outfilename);
432 DedupPoolAlloc* fnname_ddpa;
433 DedupPoolAlloc* filename_ddpa;
434 HChar* filename_buf = NULL;
435 UInt filename_buf_size = 0;
436 const HChar* filename_dir;
437 const HChar* filename_name;
439 if (fp == NULL)
440 return;
442 fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
443 "XT_callgrind_print.fn", xt->free_fn);
444 filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
445 "XT_callgrind_print.fl", xt->free_fn);
447 FP("# callgrind format\n");
448 FP("version: 1\n");
449 FP("creator: xtree-1\n");
450 FP("pid: %d\n", VG_(getpid)());
451 FP_cmd(fp);
453 /* Currently, we only need/support line positions. */
454 FP("\npositions:%s\n", " line");
456 /* Produce one "event:" line for each event, and the "events:" line. */
458 HChar strtok_events[VG_(strlen)(events)+1];
459 HChar* e;
460 HChar* ssaveptr;
461 HChar* p;
463 VG_(strcpy)(strtok_events, events);
464 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
465 e != NULL;
466 e = VG_(strtok_r)(NULL, ",", &ssaveptr))
467 FP("event: %s\n", e);
468 FP("events:");
469 VG_(strcpy)(strtok_events, events);
470 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
471 e != NULL;
472 e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
473 p = e;
474 while (*p) {
475 if (*p == ':')
476 *p = 0;
477 p++;
479 FP(" %s", e);
481 FP("\n");
483 xt->init_data_fn(xt->tmp_data); // to compute totals
485 n_xecu = VG_(sizeXA)(xt->data);
486 vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
487 for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
488 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
489 if (xe->n_ips_sel == 0)
490 continue;
492 const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
494 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
495 // in the variables called_filename/called_linenum/called_fnname.
496 // The booleans called_filename_new/called_fnname_new are set to True
497 // the first time the called_filename/called_fnname are encountered.
498 // The called_filename_nr/called_fnname_nr are numbers identifying
499 // the strings called_filename/called_fnname.
500 #define CALLED_FLF(n) \
501 if ((n) < 0 \
502 || !VG_(get_filename_linenum)(ep, ips[(n)], \
503 &filename_name, \
504 &filename_dir, \
505 &called_linenum)) { \
506 filename_name = "UnknownFile???"; \
507 called_linenum = 0; \
509 if ((n) < 0 \
510 || !VG_(get_fnname)(ep, ips[(n)], &called_fnname)) { \
511 called_fnname = "UnknownFn???"; \
514 UInt needed_size = VG_(strlen)(filename_dir) + 1 \
515 + VG_(strlen)(filename_name) + 1; \
516 if (filename_buf_size < needed_size) { \
517 filename_buf_size = needed_size; \
518 filename_buf = VG_(realloc)(xt->cc, filename_buf, \
519 filename_buf_size); \
522 VG_(strcpy)(filename_buf, filename_dir); \
523 if (filename_buf[0] != '\0') { \
524 VG_(strcat)(filename_buf, "/"); \
526 VG_(strcat)(filename_buf, filename_name); \
527 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \
528 filename_buf, \
529 &called_filename_new); \
530 called_filename = filename_buf; \
531 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \
532 called_fnname, \
533 &called_fnname_new);
535 /* Instead of unknown fnname ???, CALLED_FLF could use instead:
536 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
537 but that creates a lot of (useless) nodes at least for
538 valgrind self-hosting. */
540 if (img) {
541 const HChar* called_filename;
542 UInt called_filename_nr;
543 Bool called_filename_new; // True the first time we see this filename.
544 const HChar* called_fnname;
545 UInt called_fnname_nr;
546 Bool called_fnname_new; // True the first time we see this fnname.
547 UInt called_linenum;
548 UInt prev_linenum;
550 const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
551 const DiEpoch ep = VG_(get_ExeContext_epoch)(xe->ec);
553 Int ips_idx = xe->n_ips_sel - 1;
555 if (0) {
556 VG_(printf)("entry img %s\n", img);
557 VG_(pp_ExeContext)(xe->ec);
558 VG_(printf)("\n");
560 xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
561 CALLED_FLF(ips_idx);
562 for (;
563 ips_idx >= 0;
564 ips_idx--) {
565 FP_pos_str(fp, "fl", called_filename_nr,
566 called_filename, called_filename_new);
567 FP_pos_str(fp, "fn", called_fnname_nr,
568 called_fnname, called_fnname_new);
569 if (ips_idx == 0)
570 FP("%u %s\n", called_linenum, img);
571 else
572 FP("%u\n", called_linenum); //no self cost.
573 prev_linenum = called_linenum;
574 if (ips_idx >= 1) {
575 CALLED_FLF(ips_idx-1);
576 FP_pos_str(fp, "cfi", called_filename_nr,
577 called_filename, called_filename_new);
578 FP_pos_str(fp, "cfn", called_fnname_nr,
579 called_fnname, called_fnname_new);
580 called_filename_new = False;
581 called_fnname_new = False;
582 /* Giving a call count of 0 allows kcachegrind to hide the calls
583 column. A call count of 1 means kcachegrind would show in the
584 calls column the nr of stacktrace containing this arc, which
585 is very confusing. So, the less bad is to give a 0 call
586 count. */
587 FP("calls=0 %u\n", called_linenum);
588 FP("%u %s\n", prev_linenum, img);
591 FP("\n");
594 /* callgrind format is not really fully supporting (yet?) execution trees:
595 in an execution tree, self and inclusive costs are identical, and
596 cannot be added together.
597 If no totals: line is given, callgrind_annotate calculates the addition
598 of all costs, and so gives a wrong totals.
599 Giving a totals: line solves this, but the user must give the option
600 --inclusive=yes (kind of hack) to have all the functions given
601 in the output file. */
602 FP("totals: %s\n", img_value(xt->tmp_data));
603 VG_(fclose)(fp);
604 VG_(deleteDedupPA)(fnname_ddpa);
605 VG_(deleteDedupPA)(filename_ddpa);
606 VG_(free)(filename_buf);
610 /* ----------- Massif output ---------------------------------------------- */
612 /* For Massif output, some functions from the execontext are not output, a.o.
613 the allocation functions at the top of the stack and the functions below
614 main. So, the StackTrace of the execontexts in the xtree must be filtered.
615 Ms_Ec defines the subset of the stacktrace relevant for the report. */
616 typedef
617 struct {
618 StackTrace ips; // ips and n_ips provides the subset of the xtree ec
619 UInt n_ips; // to use for a massif report.
621 SizeT report_value; // The value to report for this stack trace.
622 } Ms_Ec;
624 /* Ms_Group defines (at a certain depth) a group of ec context that
625 have the same IPs at the given depth, and have the same 'parent'.
626 total is the sum of the values of all group elements.
627 A Ms_Group can also represent a set of ec contexts that do not
628 have the same IP, but that have each a total which is below the
629 significant size. Such a group has a NULL ms_ec, a zero group_io.
630 n_ec is the nr of insignificant ec that have been collected inside this
631 insignificant group, and total is the sum of all non significant ec
632 at the given depth. */
633 typedef
634 struct {
635 Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
636 Addr group_ip;
637 UInt n_ec;
638 SizeT total;
639 } Ms_Group;
641 /* Compare 2 groups by total, to have bigger total first. */
642 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
644 const Ms_Group* left = (const Ms_Group*)vleft;
645 const Ms_Group* right = (const Ms_Group*)vright;
647 // First reverse compare total
648 if (left->total > right->total) return -1;
649 if (left->total < right->total) return 1;
651 /* Equal size => compare IPs.
652 This (somewhat?) helps to have deterministic test results.
653 If this would change between platforms, then we should compare
654 function names/filename/linenr */
655 if (left->group_ip < right->group_ip) return -1;
656 if (left->group_ip > right->group_ip) return 1;
657 return 0;
660 /* Scan the addresses in ms_ec at the given depth.
661 On return,
662 *groups points to an array of Ms_Group sorted by total.
663 *n_groups is the nr of groups
664 The caller is responsible to free the allocated group array. */
665 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
666 UInt* n_groups, Ms_Group** groups)
668 UInt i, g;
669 Addr cur_group_ip = 0;
671 *n_groups = 0;
673 /* Handle special case somewhat more efficiently */
674 if (n_ec == 0) {
675 *groups = NULL;
676 return;
679 /* Compute how many groups we have. */
680 for (i = 0; i < n_ec; i++) {
681 if (ms_ec[i].n_ips > depth
682 && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
683 (*n_groups)++;
684 cur_group_ip = ms_ec[i].ips[depth];
688 /* make the group array. */
689 *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
690 i = 0;
691 for (g = 0; g < *n_groups; g++) {
692 while (ms_ec[i].n_ips <= depth)
693 i++;
694 cur_group_ip = ms_ec[i].ips[depth];
695 (*groups)[g].group_ip = cur_group_ip;
696 (*groups)[g].ms_ec = &ms_ec[i];
697 (*groups)[g].n_ec = 1;
698 (*groups)[g].total = ms_ec[i].report_value;
699 i++;
700 while (i < n_ec
701 && ms_ec[i].n_ips > depth
702 && cur_group_ip == ms_ec[i].ips[depth]) {
703 (*groups)[g].total += ms_ec[i].report_value;
704 i++;
705 (*groups)[g].n_ec++;
709 /* Search for insignificant groups, collect them all together
710 in the first insignificant group, and compact the group array. */
712 UInt insig1; // Position of first insignificant group.
713 UInt n_insig = 0; // Nr of insignificant groups found.
715 for (g = 0; g < *n_groups; g++) {
716 if ((*groups)[g].total < sig_sz) {
717 if (n_insig == 0) {
718 // First insig group => transform it into the special group
719 (*groups)[g].ms_ec = NULL;
720 (*groups)[g].group_ip = 0;
721 (*groups)[g].n_ec = 0;
722 // start the sum of insig total as total
723 insig1 = g;
724 } else {
725 // Add this insig group total into insig1 first group
726 (*groups)[insig1].total += (*groups)[g].total;
728 n_insig++;
729 } else {
730 if (n_insig > 1)
731 (*groups)[g - n_insig + 1] = (*groups)[g];
734 if (n_insig > 0) {
735 (*groups)[insig1].n_ec = n_insig;
736 *n_groups -= n_insig - 1;
738 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
741 /* Sort on total size, bigger size first. */
742 VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
745 /* Output the given group (located in an xtree at the given depth).
746 indent tells by how much to indent the information output for the group.
747 indent can be bigger than depth when outputting a group that is made
748 of one or more inlined calls: all inlined calls are output with the
749 same depth but with one more indent for each inlined call. */
750 static void ms_output_group (VgFile* fp, UInt depth, UInt indent,
751 Ms_Group* group, SizeT sig_sz,
752 double sig_pct_threshold)
754 UInt i;
755 Ms_Group* groups;
756 UInt n_groups;
758 // If this is an insignificant group, handle it specially
759 if (group->ms_ec == NULL) {
760 const HChar* s = ( 1 == group->n_ec? "," : "s, all" );
761 vg_assert(group->group_ip == 0);
762 FP("%*sn0: %lu in %u place%s below massif's threshold (%.2f%%)\n",
763 (Int)(indent+1), "", group->total, group->n_ec, s, sig_pct_threshold);
764 return;
767 // Normal group => output the group and its subgroups.
768 ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
769 &n_groups, &groups);
771 // FIXME JRS EPOCH 28 July 2017: HACK! Is this correct?
772 const DiEpoch cur_ep = VG_(current_DiEpoch)();
773 // // FIXME PW EPOCH : No, the above is not correct.
774 // Xtree Massif output regroups execontext in the layout of a 'tree'.
775 // So, possibly, the same IP address value can be in 2 different ec, but
776 // the epoch to symbolise this address must be retrieved from the ec it
777 // originates from.
778 // So, to fix this, it is not enough to make a group based on identical
779 // IP addr value, one must also find the di used to symbolise this address,
780 // A group will then be defined as 'same IP and same di'.
781 // Fix not trivial to do, so for the moment, --keep-debuginfo=yes will
782 // have no impact on xtree massif output.
784 Addr cur_ip = group->ms_ec->ips[depth];
786 InlIPCursor *iipc = VG_(new_IIPC)(cur_ep, cur_ip);
788 while (True) {
789 const HChar* buf = VG_(describe_IP)(cur_ep, cur_ip, iipc);
790 Bool is_inlined = VG_(next_IIPC)(iipc);
792 FP("%*s" "n%u: %lu %s\n",
793 (Int)(indent + 1), "",
794 is_inlined ? 1 : n_groups, // Inlined frames always have one child.
795 group->total,
796 buf);
798 if (!is_inlined) {
799 break;
802 indent++;
805 VG_(delete_IIPC)(iipc);
807 /* Output sub groups of this group. */
808 for (i = 0; i < n_groups; i++)
809 ms_output_group(fp, depth+1, indent+1, &groups[i], sig_sz,
810 sig_pct_threshold);
812 VG_(free)(groups);
815 /* Allocate and build an array of Ms_Ec sorted by addresses in the
816 Ms_Ec StackTrace. */
817 static void prepare_ms_ec (XTree* xt,
818 ULong (*report_value)(const void* value),
819 ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
821 XT_shared* shared = xt->shared;
822 const UInt n_xecu = VG_(sizeXA)(shared->xec);
823 const UInt n_data_xecu = VG_(sizeXA)(xt->data);
824 Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
825 UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
827 vg_assert(n_data_xecu <= n_xecu);
829 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
830 ensure_ips_order_xecu_valid(shared);
832 *top_total = 0;
833 DMSG(1, "iteration %u\n", n_xecu);
834 for (UInt i = 0; i < n_xecu; i++) {
835 Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
836 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
838 if (xecu >= n_data_xecu)
839 continue; // No data for this xecu in xt->data.
840 ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
841 if (ms_ec[n_xecu_sel].n_ips == 0)
842 continue;
844 ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
845 ms_ec[n_xecu_sel].report_value
846 = (*report_value)(VG_(indexXA)(xt->data, xecu));
847 *top_total += ms_ec[n_xecu_sel].report_value;
849 n_xecu_sel++;
851 vg_assert(n_xecu_sel <= n_xecu);
853 *vms_ec = ms_ec;
854 *vn_ec = n_xecu_sel;
857 MsFile* VG_(XT_massif_open)
858 (const HChar* outfilename,
859 const HChar* desc,
860 const XArray* desc_args,
861 const HChar* time_unit)
863 UInt i;
864 VgFile* fp = xt_open(outfilename);
866 if (fp == NULL)
867 return NULL; // xt_open reported the error.
869 /* ------ file header ------------------------------- */
870 FP("desc:");
871 if (desc)
872 FP(" %s", desc);
873 i = 0;
874 if (desc_args) {
875 for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
876 HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
877 FP(" %s", arg);
880 if (0 == i && desc == NULL) FP(" (none)");
881 FP("\n");
883 FP_cmd(fp);
885 FP("time_unit: %s\n", time_unit);
887 return fp;
890 void VG_(XT_massif_close)(MsFile* fp)
892 if (fp == NULL)
893 return; // Error should have been reported by VG_(XT_massif_open)
895 VG_(fclose)(fp);
898 void VG_(XT_massif_print)
899 (MsFile* fp,
900 XTree* xt,
901 const Massif_Header* header,
902 ULong (*report_value)(const void* value))
904 UInt i;
906 if (fp == NULL)
907 return; // Normally VG_(XT_massif_open) already reported an error.
909 /* Compute/prepare Snapshot totals/data/... */
910 ULong top_total;
912 /* Following variables only used for detailed snapshot. */
913 UInt n_ec = 0;
914 Ms_Ec* ms_ec = NULL;
915 const HChar* kind =
916 header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
918 DMSG(1, "XT_massif_print %s\n", kind);
919 if (header->detailed) {
920 /* Prepare the Ms_Ec sorted array of stacktraces and the groups
921 at level 0. */
922 prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
923 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
924 } else if (xt == NULL) {
925 /* Non detailed, no xt => use the sz provided in the header. */
926 top_total = header->sz_B;
927 } else {
928 /* For non detailed snapshot, compute total directly from the xec. */
929 const XT_shared* shared = xt->shared;
930 const UInt n_xecu = VG_(sizeXA)(xt->data);
931 top_total = 0;
933 for (UInt xecu = 0; xecu < n_xecu; xecu++) {
934 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
935 if (xe->n_ips_sel == 0)
936 continue;
937 top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
941 /* ------ snapshot header --------------------------- */
942 FP("#-----------\n");
943 FP("snapshot=%d\n", header->snapshot_n);
944 FP("#-----------\n");
945 FP("time=%lld\n", header->time);
947 FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
948 FP("mem_heap_extra_B=%llu\n", header->extra_B);
949 FP("mem_stacks_B=%llu\n", header->stacks_B);
950 FP("heap_tree=%s\n", kind);
952 /* ------ detailed snapshot data ----------------------------- */
953 if (header->detailed) {
954 UInt n_groups;
955 Ms_Group* groups;
957 ULong sig_sz;
958 // Work out how big a child must be to be significant. If the current
959 // top_total is zero, then we set it to 1, which means everything will be
960 // judged insignificant -- this is sensible, as there's no point showing
961 // any detail for this case. Unless they used threshold=0, in which
962 // case we show them everything because that's what they asked for.
964 // Nb: We do this once now, rather than once per child, because if we do
965 // that the cost of all the divisions adds up to something significant.
966 if (0 == top_total && 0 != header->sig_threshold)
967 sig_sz = 1;
968 else
969 sig_sz = ((top_total + header->extra_B + header->stacks_B)
970 * header->sig_threshold) / 100;
972 /* Produce the groups at depth 0 */
973 DMSG(1, "XT_massif_print producing depth 0 groups\n");
974 ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
976 /* Output the top node. */
977 FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
979 /* Output depth 0 groups. */
980 DMSG(1, "XT_massif_print outputting %u depth 0 groups\n", n_groups);
981 for (i = 0; i < n_groups; i++)
982 ms_output_group(fp, 0, 0, &groups[i], sig_sz, header->sig_threshold);
984 VG_(free)(groups);
985 VG_(free)(ms_ec);
989 Int VG_(XT_offset_main_or_below_main)(DiEpoch ep, Addr* ips, Int n_ips)
991 /* Search for main or below main function.
992 To limit the nr of ips to examine, we maintain the deepest
993 offset where main was found, and we first search main
994 from there.
995 If no main is found, we will then do a search for main or
996 below main function till the top. */
997 static Int deepest_main = 0;
998 Vg_FnNameKind kind = Vg_FnNameNormal;
999 Int mbm = n_ips - 1; // Position of deepest main or below main.
1000 Vg_FnNameKind mbmkind = Vg_FnNameNormal;
1001 Int i;
1003 for (i = n_ips - 1 - deepest_main;
1004 i < n_ips;
1005 i++) {
1006 mbmkind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1007 if (mbmkind != Vg_FnNameNormal) {
1008 mbm = i;
1009 break;
1013 /* Search for main or below main function till top. */
1014 for (i = mbm - 1;
1015 i >= 0 && mbmkind != Vg_FnNameMain;
1016 i--) {
1017 kind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1018 if (kind != Vg_FnNameNormal) {
1019 mbm = i;
1020 mbmkind = kind;
1023 if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
1024 if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
1025 deepest_main = n_ips - 1 - mbm;
1026 return mbm;
1027 } else
1028 return n_ips-1;
1031 void VG_(XT_filter_1top_and_maybe_below_main)
1032 (Addr* ips, Int n_ips,
1033 UInt* top, UInt* n_ips_sel)
1035 Int mbm;
1037 *n_ips_sel = n_ips;
1038 if (n_ips == 0) {
1039 *top = 0;
1040 return;
1043 /* Filter top function. */
1044 *top = 1;
1046 if (VG_(clo_show_below_main))
1047 mbm = n_ips - 1;
1048 else {
1049 // FIXME PW EPOCH : use the real ips epoch
1050 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1051 mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1054 *n_ips_sel = mbm - *top + 1;
1057 void VG_(XT_filter_maybe_below_main)
1058 (Addr* ips, Int n_ips,
1059 UInt* top, UInt* n_ips_sel)
1061 Int mbm;
1063 *n_ips_sel = n_ips;
1064 *top = 0;
1065 if (n_ips == 0)
1066 return;
1068 if (VG_(clo_show_below_main))
1069 mbm = n_ips - 1;
1070 else {
1071 // FIXME PW EPOCH : use the real ips epoch
1072 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1073 mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1075 *n_ips_sel = mbm - *top + 1;
1078 /*--------------------------------------------------------------------*/
1079 /*--- end m_xtree.c ---*/
1080 /*--------------------------------------------------------------------*/