Add vbit-test-sec.vgtest and vbit-test-sec.stderr.exp to EXTRA_DIST.
[valgrind.git] / coregrind / m_xtree.c
blob7dbba7129f0be7da28f544de61f1d933606f2313
2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2016-2017 Philippe Waroquiers
12 This code generalises the XTree idea that was implemented in
13 the massif tool in Valgrind versions <= 3.12, which is
14 Copyright (C) 2005-2017 Nicholas Nethercote
15 njn@valgrind.org
17 The XTree implementation in this file is however implemented completely
18 differently. Some code has been re-used for the production of the
19 massif file header (e.g. FP_cmd function).
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of the
24 License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public License
32 along with this program; if not, write to the Free Software
33 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34 02111-1307, USA.
36 The GNU General Public License is contained in the file COPYING.
39 #include "pub_core_basics.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_clientstate.h"
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_execontext.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcfile.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_hashtable.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_debuginfo.h"
53 #include "pub_core_deduppoolalloc.h"
54 #include "pub_core_xtree.h" /* self */
56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \
57 VG_(dmsg)(__VA_ARGS__) \
58 : 0)
60 /* Defines the relevant part of an ec. This is shared between an xt
61 and its snapshots (see XT_shared XArray of xec). */
62 typedef
63 struct _xec {
64 ExeContext* ec;
65 UShort top; // The first ips of ec to take into account.
66 UShort n_ips_sel; // The nr of ips from top to take into account.
68 xec;
70 /* XT_shared maintains the information shared between an XT and all
71 its snapshots. */
72 typedef
73 struct _XT_shared {
74 UWord nrRef; /* nr of XTrees referencing this shared memory. */
76 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
77 const HChar* cc; /* cost centre for alloc */
78 Free_Fn_t free_fn; /* free fn */
80 /* The data associated to each ec is stored in 2 arrays:
81 an xec array, shared between an xt and all its snapshots.
82 a data array, private to each XTree.
83 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
84 xec and data arrays where the ec information is located (this
85 indirection is used to avoid huge xec and data arrays, in
86 case an XTree contains data only for a small number of ec.
87 The offset in the xec and data array is used as xtree ec unique
88 id i.e. an xecu. */
90 UInt d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
91 UInt* d4ecu2xecu;
93 /* ec information common to an xt and its snapshots. */
94 XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
96 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
97 See ips_order_cmp. */
98 XArray* ips_order_xecu;
99 } XT_shared;
101 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec
102 (with the index ecu/4). */
103 #define NO_OFFSET 0xffffffff
105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
106 const HChar* cc,
107 void (*free_fn)(void*))
109 XT_shared* shared;
111 vg_assert(alloc_fn);
112 vg_assert(cc);
113 vg_assert(free_fn);
114 shared = alloc_fn(cc, sizeof(*shared));
115 shared->nrRef = 0;
116 shared->alloc_fn = alloc_fn;
117 shared->cc = cc;
118 shared->free_fn = free_fn;
120 shared->d4ecu2xecu_sz = 0;
121 shared->d4ecu2xecu = NULL;
122 shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
123 shared->ips_order_xecu = NULL; // Allocated when needed.
125 return shared;
128 static void delete_XT_shared (XT_shared* shared)
130 vg_assert(shared->nrRef == 0);
131 shared->free_fn(shared->d4ecu2xecu);
132 VG_(deleteXA)(shared->xec);
133 if (shared->ips_order_xecu != NULL)
134 VG_(deleteXA)(shared->ips_order_xecu);
135 shared->free_fn(shared);
138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
139 In case stack traces are of different length, an 'absent' ips is
140 considered smaller than any other address. */
141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
142 static Int ips_order_cmp(const void* vleft, const void* vright)
144 const Xecu left_xecu = *(const Xecu*)vleft;
145 const Xecu right_xecu = *(const Xecu*)vright;
146 const xec* left = VG_(indexXA)(xec_data_for_sort, left_xecu);
147 const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
148 const StackTrace left_ips = VG_(get_ExeContext_StackTrace)(left->ec)
149 + left->top;
150 const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
151 + right->top;
152 UInt i;
154 const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
155 ? left->n_ips_sel : right->n_ips_sel;
157 // First see if we have a difference on the common nr of ips selected
158 for (i = 0; i < c_n_ips_sel; i++) {
159 if (left_ips[i] == right_ips[i]) continue;
160 if (left_ips[i] < right_ips[i]) return -1;
161 return 1;
163 // Common part is equal => compare lengths.
164 if (left->n_ips_sel < right->n_ips_sel) return -1;
165 if (left->n_ips_sel > right->n_ips_sel) return 1;
166 return 0;
169 // If needed, build or refresh shared->ips_order_xecu
170 static void ensure_ips_order_xecu_valid(XT_shared* shared)
172 UInt i;
173 UInt n_xecu;
175 if (shared->ips_order_xecu == NULL) {
176 shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
177 shared->free_fn, sizeof(Xecu));
178 VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
179 VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
182 if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
183 return;
185 n_xecu = VG_(sizeXA)(shared->xec);
186 for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
187 VG_(addToXA)(shared->ips_order_xecu, &i);
189 xec_data_for_sort = shared->xec;
190 VG_(sortXA)(shared->ips_order_xecu);
193 static void addRef_XT_shared (XT_shared* shared)
195 shared->nrRef++;
198 static UWord release_XT_shared(XT_shared* shared)
200 UWord nrRef;
202 vg_assert(shared->nrRef > 0);
203 nrRef = --shared->nrRef;
204 if (nrRef == 0)
205 delete_XT_shared(shared);
206 return nrRef;
210 struct _XTree {
211 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
212 const HChar* cc; /* cost centre for alloc */
213 Free_Fn_t free_fn; /* free fn */
214 Word dataSzB; /* data size in bytes */
215 XT_init_data_t init_data_fn;
216 XT_add_data_t add_data_fn;
217 XT_sub_data_t sub_data_fn;
218 XT_filter_IPs_t filter_IPs_fn;
220 XT_shared* shared;
222 HChar* tmp_data; /* temporary buffer, to insert new elements. */
223 XArray* data; /* of elements of size dataSzB */
227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
228 const HChar* cc,
229 Free_Fn_t free_fn,
230 Word dataSzB,
231 XT_init_data_t init_data_fn,
232 XT_add_data_t add_data_fn,
233 XT_sub_data_t sub_data_fn,
234 XT_filter_IPs_t filter_IPs_fn)
236 XTree* xt;
238 /* check user-supplied info .. */
239 vg_assert(alloc_fn);
240 vg_assert(free_fn);
241 vg_assert(dataSzB >= 0);
242 vg_assert(init_data_fn);
243 vg_assert(add_data_fn);
244 vg_assert(sub_data_fn);
246 xt = alloc_fn(cc, sizeof(struct _XTree) );
247 xt->alloc_fn = alloc_fn;
248 xt->cc = cc;
249 xt->free_fn = free_fn;
250 xt->dataSzB = dataSzB;
251 xt->init_data_fn = init_data_fn;
252 xt->add_data_fn = add_data_fn;
253 xt->sub_data_fn = sub_data_fn;
254 xt->filter_IPs_fn = filter_IPs_fn;
256 xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
257 addRef_XT_shared(xt->shared);
258 xt->tmp_data = alloc_fn(cc, xt->dataSzB);
259 xt->data = VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
261 return xt;
264 XTree* VG_(XT_snapshot)(XTree* xt)
266 XTree* nxt;
268 vg_assert(xt);
270 nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
272 *nxt = *xt;
273 addRef_XT_shared(nxt->shared);
274 nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
275 nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
277 return nxt;
280 void VG_(XT_delete) ( XTree* xt )
282 vg_assert(xt);
284 release_XT_shared(xt->shared);
285 xt->free_fn(xt->tmp_data);
286 VG_(deleteXA)(xt->data);
287 xt->free_fn(xt);
290 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
293 const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
294 XT_shared* shared = xt->shared;
296 /* First grow the d4ecu2xecu array if needed. */
297 if (d4ecu >= shared->d4ecu2xecu_sz) {
298 UInt old_sz = shared->d4ecu2xecu_sz;
299 UInt new_sz = (3 * d4ecu) / 2;
301 if (new_sz < 1000)
302 new_sz = 1000;
303 shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
304 new_sz * sizeof(UInt));
305 shared->d4ecu2xecu_sz = new_sz;
306 for (UInt i = old_sz; i < new_sz; i++)
307 shared->d4ecu2xecu[i] = NO_OFFSET;
310 if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
311 xec xe;
313 xe.ec = ec;
314 if (xt->filter_IPs_fn == NULL) {
315 xe.top = 0;
316 xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
317 } else {
318 UInt top;
319 UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
320 xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
321 &top, &n_ips_sel);
322 xe.top = (UShort)top;
323 xe.n_ips_sel = (UShort)n_ips_sel;
325 xt->init_data_fn(xt->tmp_data);
326 VG_(addToXA)(shared->xec, &xe);
327 shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
330 return shared->d4ecu2xecu[d4ecu];
333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
335 Xecu xecu = find_or_insert(xt, ec);
336 void* data = VG_(indexXA)(xt->data, xecu);
338 xt->add_data_fn(data, value);
339 return xecu;
342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
344 Xecu xecu = find_or_insert(xt, ec);
345 void* data = VG_(indexXA)(xt->data, xecu);
347 xt->sub_data_fn(data, value);
348 return xecu;
351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
353 void* data = VG_(indexXA)(xt->data, xecu);
354 xt->add_data_fn(data, value);
357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
359 void* data = VG_(indexXA)(xt->data, xecu);
360 xt->sub_data_fn(data, value);
363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
365 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
366 return (UInt)xe->n_ips_sel;
369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
371 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
372 return xe->ec;
375 static VgFile* xt_open (const HChar* outfilename)
377 VgFile* fp;
379 fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
380 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IROTH);
381 if (fp == NULL) {
382 VG_(message)(Vg_UserMsg,
383 "Error: can not open xtree output file `%s'\n",
384 outfilename);
386 return fp;
389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
391 // Print "cmd:" line.
392 static void FP_cmd(VgFile* fp)
394 UInt i;
396 FP("cmd: ");
397 FP("%s", VG_(args_the_exename));
398 for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
399 HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
400 FP(" %s", arg);
402 FP("\n");
405 /* ----------- Callgrind output ------------------------------------------- */
407 /* Output a callgrind format element in compressed format:
408 "name=(pos)" or "name=(pos) value" (if value_new)
409 or not compressed format: "name=value"
410 VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
411 name is the format element (e.g. fl, fn, cfi, cfn, ...).
412 pos is the value dictionary position, used for compressed format.
413 value_new is True if this is the first usage of value. */
414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
415 const HChar* value, Bool value_new)
417 if (!VG_(clo_xtree_compress_strings))
418 FP("%s=%s\n", name, value);
419 else if (value_new)
420 FP("%s=(%d) %s\n", name, pos, value);
421 else
422 FP("%s=(%d)\n", name, pos);
425 void VG_(XT_callgrind_print)
426 (XTree* xt,
427 const HChar* outfilename,
428 const HChar* events,
429 const HChar* (*img_value)(const void* value))
431 UInt n_xecu;
432 XT_shared* shared = xt->shared;
433 VgFile* fp = xt_open(outfilename);
434 DedupPoolAlloc* fnname_ddpa;
435 DedupPoolAlloc* filename_ddpa;
436 HChar* filename_buf = NULL;
437 UInt filename_buf_size = 0;
438 const HChar* filename_dir;
439 const HChar* filename_name;
441 if (fp == NULL)
442 return;
444 fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
445 "XT_callgrind_print.fn", xt->free_fn);
446 filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
447 "XT_callgrind_print.fl", xt->free_fn);
449 FP("# callgrind format\n");
450 FP("version: 1\n");
451 FP("creator: xtree-1\n");
452 FP("pid: %d\n", VG_(getpid)());
453 FP_cmd(fp);
455 /* Currently, we only need/support line positions. */
456 FP("\npositions:%s\n", " line");
458 /* Produce one "event:" line for each event, and the "events:" line. */
460 HChar strtok_events[VG_(strlen)(events)+1];
461 HChar* e;
462 HChar* ssaveptr;
463 HChar* p;
465 VG_(strcpy)(strtok_events, events);
466 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
467 e != NULL;
468 e = VG_(strtok_r)(NULL, ",", &ssaveptr))
469 FP("event: %s\n", e);
470 FP("events:");
471 VG_(strcpy)(strtok_events, events);
472 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
473 e != NULL;
474 e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
475 p = e;
476 while (*p) {
477 if (*p == ':')
478 *p = 0;
479 p++;
481 FP(" %s", e);
483 FP("\n");
485 xt->init_data_fn(xt->tmp_data); // to compute totals
487 n_xecu = VG_(sizeXA)(xt->data);
488 vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
489 for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
490 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
491 if (xe->n_ips_sel == 0)
492 continue;
494 const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
496 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
497 // in the variables called_filename/called_linenum/called_fnname.
498 // The booleans called_filename_new/called_fnname_new are set to True
499 // the first time the called_filename/called_fnname are encountered.
500 // The called_filename_nr/called_fnname_nr are numbers identifying
501 // the strings called_filename/called_fnname.
502 #define CALLED_FLF(n) \
503 if ((n) < 0 \
504 || !VG_(get_filename_linenum)(ep, ips[(n)], \
505 &filename_name, \
506 &filename_dir, \
507 &called_linenum)) { \
508 filename_name = "UnknownFile???"; \
509 called_linenum = 0; \
511 if ((n) < 0 \
512 || !VG_(get_fnname)(ep, ips[(n)], &called_fnname)) { \
513 called_fnname = "UnknownFn???"; \
516 UInt needed_size = VG_(strlen)(filename_dir) + 1 \
517 + VG_(strlen)(filename_name) + 1; \
518 if (filename_buf_size < needed_size) { \
519 filename_buf_size = needed_size; \
520 filename_buf = VG_(realloc)(xt->cc, filename_buf, \
521 filename_buf_size); \
524 VG_(strcpy)(filename_buf, filename_dir); \
525 if (filename_buf[0] != '\0') { \
526 VG_(strcat)(filename_buf, "/"); \
528 VG_(strcat)(filename_buf, filename_name); \
529 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \
530 filename_buf, \
531 &called_filename_new); \
532 called_filename = filename_buf; \
533 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \
534 called_fnname, \
535 &called_fnname_new);
537 /* Instead of unknown fnname ???, CALLED_FLF could use instead:
538 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
539 but that creates a lot of (useless) nodes at least for
540 valgrind self-hosting. */
542 if (img) {
543 const HChar* called_filename;
544 UInt called_filename_nr;
545 Bool called_filename_new; // True the first time we see this filename.
546 const HChar* called_fnname;
547 UInt called_fnname_nr;
548 Bool called_fnname_new; // True the first time we see this fnname.
549 UInt called_linenum;
550 UInt prev_linenum;
552 const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
553 const DiEpoch ep = VG_(get_ExeContext_epoch)(xe->ec);
555 Int ips_idx = xe->n_ips_sel - 1;
557 if (0) {
558 VG_(printf)("entry img %s\n", img);
559 VG_(pp_ExeContext)(xe->ec);
560 VG_(printf)("\n");
562 xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
563 CALLED_FLF(ips_idx);
564 for (;
565 ips_idx >= 0;
566 ips_idx--) {
567 FP_pos_str(fp, "fl", called_filename_nr,
568 called_filename, called_filename_new);
569 FP_pos_str(fp, "fn", called_fnname_nr,
570 called_fnname, called_fnname_new);
571 if (ips_idx == 0)
572 FP("%d %s\n", called_linenum, img);
573 else
574 FP("%d\n", called_linenum); //no self cost.
575 prev_linenum = called_linenum;
576 if (ips_idx >= 1) {
577 CALLED_FLF(ips_idx-1);
578 FP_pos_str(fp, "cfi", called_filename_nr,
579 called_filename, called_filename_new);
580 FP_pos_str(fp, "cfn", called_fnname_nr,
581 called_fnname, called_fnname_new);
582 called_filename_new = False;
583 called_fnname_new = False;
584 /* Giving a call count of 0 allows kcachegrind to hide the calls
585 column. A call count of 1 means kcachegrind would show in the
586 calls column the nr of stacktrace containing this arc, which
587 is very confusing. So, the less bad is to give a 0 call
588 count. */
589 FP("calls=0 %d\n", called_linenum);
590 FP("%d %s\n", prev_linenum, img);
593 FP("\n");
596 /* callgrind format is not really fully supporting (yet?) execution trees:
597 in an execution tree, self and inclusive costs are identical, and
598 cannot be added together.
599 If no totals: line is given, callgrind_annotate calculates the addition
600 of all costs, and so gives a wrong totals.
601 Giving a totals: line solves this, but the user must give the option
602 --inclusive=yes (kind of hack) to have all the functions given
603 in the output file. */
604 FP("totals: %s\n", img_value(xt->tmp_data));
605 VG_(fclose)(fp);
606 VG_(deleteDedupPA)(fnname_ddpa);
607 VG_(deleteDedupPA)(filename_ddpa);
608 VG_(free)(filename_buf);
612 /* ----------- Massif output ---------------------------------------------- */
614 /* For Massif output, some functions from the execontext are not output, a.o.
615 the allocation functions at the top of the stack and the functions below
616 main. So, the StackTrace of the execontexts in the xtree must be filtered.
617 Ms_Ec defines the subset of the stacktrace relevant for the report. */
618 typedef
619 struct {
620 StackTrace ips; // ips and n_ips provides the subset of the xtree ec
621 UInt n_ips; // to use for a massif report.
623 SizeT report_value; // The value to report for this stack trace.
624 } Ms_Ec;
626 /* Ms_Group defines (at a certain depth) a group of ec context that
627 have the same IPs at the given depth, and have the same 'parent'.
628 total is the sum of the values of all group elements.
629 A Ms_Group can also represent a set of ec contexts that do not
630 have the same IP, but that have each a total which is below the
631 significant size. Such a group has a NULL ms_ec, a zero group_io.
632 n_ec is the nr of insignificant ec that have been collected inside this
633 insignificant group, and total is the sum of all non significant ec
634 at the given depth. */
635 typedef
636 struct {
637 Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
638 Addr group_ip;
639 UInt n_ec;
640 SizeT total;
641 } Ms_Group;
643 /* Compare 2 groups by total, to have bigger total first. */
644 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
646 const Ms_Group* left = (const Ms_Group*)vleft;
647 const Ms_Group* right = (const Ms_Group*)vright;
649 // First reverse compare total
650 if (left->total > right->total) return -1;
651 if (left->total < right->total) return 1;
653 /* Equal size => compare IPs.
654 This (somewhat?) helps to have deterministic test results.
655 If this would change between platforms, then we should compare
656 function names/filename/linenr */
657 if (left->group_ip < right->group_ip) return -1;
658 if (left->group_ip > right->group_ip) return 1;
659 return 0;
662 /* Scan the addresses in ms_ec at the given depth.
663 On return,
664 *groups points to an array of Ms_Group sorted by total.
665 *n_groups is the nr of groups
666 The caller is responsible to free the allocated group array. */
667 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
668 UInt* n_groups, Ms_Group** groups)
670 UInt i, g;
671 Addr cur_group_ip = 0;
673 *n_groups = 0;
675 /* Handle special case somewhat more efficiently */
676 if (n_ec == 0) {
677 *groups = NULL;
678 return;
681 /* Compute how many groups we have. */
682 for (i = 0; i < n_ec; i++) {
683 if (ms_ec[i].n_ips > depth
684 && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
685 (*n_groups)++;
686 cur_group_ip = ms_ec[i].ips[depth];
690 /* make the group array. */
691 *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
692 i = 0;
693 for (g = 0; g < *n_groups; g++) {
694 while (ms_ec[i].n_ips <= depth)
695 i++;
696 cur_group_ip = ms_ec[i].ips[depth];
697 (*groups)[g].group_ip = cur_group_ip;
698 (*groups)[g].ms_ec = &ms_ec[i];
699 (*groups)[g].n_ec = 1;
700 (*groups)[g].total = ms_ec[i].report_value;
701 i++;
702 while (i < n_ec
703 && ms_ec[i].n_ips > depth
704 && cur_group_ip == ms_ec[i].ips[depth]) {
705 (*groups)[g].total += ms_ec[i].report_value;
706 i++;
707 (*groups)[g].n_ec++;
711 /* Search for insignificant groups, collect them all together
712 in the first insignificant group, and compact the group array. */
714 UInt insig1; // Position of first insignificant group.
715 UInt n_insig = 0; // Nr of insignificant groups found.
717 for (g = 0; g < *n_groups; g++) {
718 if ((*groups)[g].total < sig_sz) {
719 if (n_insig == 0) {
720 // First insig group => transform it into the special group
721 (*groups)[g].ms_ec = NULL;
722 (*groups)[g].group_ip = 0;
723 (*groups)[g].n_ec = 0;
724 // start the sum of insig total as total
725 insig1 = g;
726 } else {
727 // Add this insig group total into insig1 first group
728 (*groups)[insig1].total += (*groups)[g].total;
730 n_insig++;
731 } else {
732 if (n_insig > 1)
733 (*groups)[g - n_insig + 1] = (*groups)[g];
736 if (n_insig > 0) {
737 (*groups)[insig1].n_ec = n_insig;
738 *n_groups -= n_insig - 1;
740 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
743 /* Sort on total size, bigger size first. */
744 VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
747 /* Output the given group (located in an xtree at the given depth).
748 indent tells by how much to indent the information output for the group.
749 indent can be bigger than depth when outputting a group that is made
750 of one or more inlined calls: all inlined calls are output with the
751 same depth but with one more indent for each inlined call. */
752 static void ms_output_group (VgFile* fp, UInt depth, UInt indent,
753 Ms_Group* group, SizeT sig_sz,
754 double sig_pct_threshold)
756 UInt i;
757 Ms_Group* groups;
758 UInt n_groups;
760 // If this is an insignificant group, handle it specially
761 if (group->ms_ec == NULL) {
762 const HChar* s = ( 1 == group->n_ec? "," : "s, all" );
763 vg_assert(group->group_ip == 0);
764 FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
765 indent+1, "", group->total, group->n_ec, s, sig_pct_threshold);
766 return;
769 // Normal group => output the group and its subgroups.
770 ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
771 &n_groups, &groups);
773 // FIXME JRS EPOCH 28 July 2017: HACK! Is this correct?
774 const DiEpoch cur_ep = VG_(current_DiEpoch)();
775 // // FIXME PW EPOCH : No, the above is not correct.
776 // Xtree Massif output regroups execontext in the layout of a 'tree'.
777 // So, possibly, the same IP address value can be in 2 different ec, but
778 // the epoch to symbolise this address must be retrieved from the ec it
779 // originates from.
780 // So, to fix this, it is not enough to make a group based on identical
781 // IP addr value, one must also find the di used to symbolise this address,
782 // A group will then be defined as 'same IP and same di'.
783 // Fix not trivial to do, so for the moment, --keep-debuginfo=yes will
784 // have no impact on xtree massif output.
786 Addr cur_ip = group->ms_ec->ips[depth];
788 InlIPCursor *iipc = VG_(new_IIPC)(cur_ep, cur_ip);
790 while (True) {
791 const HChar* buf = VG_(describe_IP)(cur_ep, cur_ip, iipc);
792 Bool is_inlined = VG_(next_IIPC)(iipc);
794 FP("%*s" "n%u: %ld %s\n",
795 indent + 1, "",
796 is_inlined ? 1 : n_groups, // Inlined frames always have one child.
797 group->total,
798 buf);
800 if (!is_inlined) {
801 break;
804 indent++;
807 VG_(delete_IIPC)(iipc);
809 /* Output sub groups of this group. */
810 for (i = 0; i < n_groups; i++)
811 ms_output_group(fp, depth+1, indent+1, &groups[i], sig_sz,
812 sig_pct_threshold);
814 VG_(free)(groups);
817 /* Allocate and build an array of Ms_Ec sorted by addresses in the
818 Ms_Ec StackTrace. */
819 static void prepare_ms_ec (XTree* xt,
820 ULong (*report_value)(const void* value),
821 ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
823 XT_shared* shared = xt->shared;
824 const UInt n_xecu = VG_(sizeXA)(shared->xec);
825 const UInt n_data_xecu = VG_(sizeXA)(xt->data);
826 Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
827 UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
829 vg_assert(n_data_xecu <= n_xecu);
831 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
832 ensure_ips_order_xecu_valid(shared);
834 *top_total = 0;
835 DMSG(1, "iteration %u\n", n_xecu);
836 for (UInt i = 0; i < n_xecu; i++) {
837 Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
838 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
840 if (xecu >= n_data_xecu)
841 continue; // No data for this xecu in xt->data.
842 ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
843 if (ms_ec[n_xecu_sel].n_ips == 0)
844 continue;
846 ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
847 ms_ec[n_xecu_sel].report_value
848 = (*report_value)(VG_(indexXA)(xt->data, xecu));
849 *top_total += ms_ec[n_xecu_sel].report_value;
851 n_xecu_sel++;
853 vg_assert(n_xecu_sel <= n_xecu);
855 *vms_ec = ms_ec;
856 *vn_ec = n_xecu_sel;
859 MsFile* VG_(XT_massif_open)
860 (const HChar* outfilename,
861 const HChar* desc,
862 const XArray* desc_args,
863 const HChar* time_unit)
865 UInt i;
866 VgFile* fp = xt_open(outfilename);
868 if (fp == NULL)
869 return NULL; // xt_open reported the error.
871 /* ------ file header ------------------------------- */
872 FP("desc:");
873 if (desc)
874 FP(" %s", desc);
875 i = 0;
876 if (desc_args) {
877 for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
878 HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
879 FP(" %s", arg);
882 if (0 == i && desc == NULL) FP(" (none)");
883 FP("\n");
885 FP_cmd(fp);
887 FP("time_unit: %s\n", time_unit);
889 return fp;
892 void VG_(XT_massif_close)(MsFile* fp)
894 if (fp == NULL)
895 return; // Error should have been reported by VG_(XT_massif_open)
897 VG_(fclose)(fp);
900 void VG_(XT_massif_print)
901 (MsFile* fp,
902 XTree* xt,
903 const Massif_Header* header,
904 ULong (*report_value)(const void* value))
906 UInt i;
908 if (fp == NULL)
909 return; // Normally VG_(XT_massif_open) already reported an error.
911 /* Compute/prepare Snapshot totals/data/... */
912 ULong top_total;
914 /* Following variables only used for detailed snapshot. */
915 UInt n_ec = 0;
916 Ms_Ec* ms_ec = NULL;
917 const HChar* kind =
918 header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
920 DMSG(1, "XT_massif_print %s\n", kind);
921 if (header->detailed) {
922 /* Prepare the Ms_Ec sorted array of stacktraces and the groups
923 at level 0. */
924 prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
925 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
926 } else if (xt == NULL) {
927 /* Non detailed, no xt => use the sz provided in the header. */
928 top_total = header->sz_B;
929 } else {
930 /* For non detailed snapshot, compute total directly from the xec. */
931 const XT_shared* shared = xt->shared;
932 const UInt n_xecu = VG_(sizeXA)(xt->data);
933 top_total = 0;
935 for (UInt xecu = 0; xecu < n_xecu; xecu++) {
936 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
937 if (xe->n_ips_sel == 0)
938 continue;
939 top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
943 /* ------ snapshot header --------------------------- */
944 FP("#-----------\n");
945 FP("snapshot=%d\n", header->snapshot_n);
946 FP("#-----------\n");
947 FP("time=%lld\n", header->time);
949 FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
950 FP("mem_heap_extra_B=%llu\n", header->extra_B);
951 FP("mem_stacks_B=%llu\n", header->stacks_B);
952 FP("heap_tree=%s\n", kind);
954 /* ------ detailed snapshot data ----------------------------- */
955 if (header->detailed) {
956 UInt n_groups;
957 Ms_Group* groups;
959 ULong sig_sz;
960 // Work out how big a child must be to be significant. If the current
961 // top_total is zero, then we set it to 1, which means everything will be
962 // judged insignificant -- this is sensible, as there's no point showing
963 // any detail for this case. Unless they used threshold=0, in which
964 // case we show them everything because that's what they asked for.
966 // Nb: We do this once now, rather than once per child, because if we do
967 // that the cost of all the divisions adds up to something significant.
968 if (0 == top_total && 0 != header->sig_threshold)
969 sig_sz = 1;
970 else
971 sig_sz = ((top_total + header->extra_B + header->stacks_B)
972 * header->sig_threshold) / 100;
974 /* Produce the groups at depth 0 */
975 DMSG(1, "XT_massif_print producing depth 0 groups\n");
976 ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
978 /* Output the top node. */
979 FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
981 /* Output depth 0 groups. */
982 DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups);
983 for (i = 0; i < n_groups; i++)
984 ms_output_group(fp, 0, 0, &groups[i], sig_sz, header->sig_threshold);
986 VG_(free)(groups);
987 VG_(free)(ms_ec);
991 Int VG_(XT_offset_main_or_below_main)(DiEpoch ep, Addr* ips, Int n_ips)
993 /* Search for main or below main function.
994 To limit the nr of ips to examine, we maintain the deepest
995 offset where main was found, and we first search main
996 from there.
997 If no main is found, we will then do a search for main or
998 below main function till the top. */
999 static Int deepest_main = 0;
1000 Vg_FnNameKind kind = Vg_FnNameNormal;
1001 Int mbm = n_ips - 1; // Position of deepest main or below main.
1002 Vg_FnNameKind mbmkind = Vg_FnNameNormal;
1003 Int i;
1005 for (i = n_ips - 1 - deepest_main;
1006 i < n_ips;
1007 i++) {
1008 mbmkind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1009 if (mbmkind != Vg_FnNameNormal) {
1010 mbm = i;
1011 break;
1015 /* Search for main or below main function till top. */
1016 for (i = mbm - 1;
1017 i >= 0 && mbmkind != Vg_FnNameMain;
1018 i--) {
1019 kind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1020 if (kind != Vg_FnNameNormal) {
1021 mbm = i;
1022 mbmkind = kind;
1025 if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
1026 if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
1027 deepest_main = n_ips - 1 - mbm;
1028 return mbm;
1029 } else
1030 return n_ips-1;
1033 void VG_(XT_filter_1top_and_maybe_below_main)
1034 (Addr* ips, Int n_ips,
1035 UInt* top, UInt* n_ips_sel)
1037 Int mbm;
1039 *n_ips_sel = n_ips;
1040 if (n_ips == 0) {
1041 *top = 0;
1042 return;
1045 /* Filter top function. */
1046 *top = 1;
1048 if (VG_(clo_show_below_main))
1049 mbm = n_ips - 1;
1050 else {
1051 // FIXME PW EPOCH : use the real ips epoch
1052 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1053 mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1056 *n_ips_sel = mbm - *top + 1;
1059 void VG_(XT_filter_maybe_below_main)
1060 (Addr* ips, Int n_ips,
1061 UInt* top, UInt* n_ips_sel)
1063 Int mbm;
1065 *n_ips_sel = n_ips;
1066 *top = 0;
1067 if (n_ips == 0)
1068 return;
1070 if (VG_(clo_show_below_main))
1071 mbm = n_ips - 1;
1072 else {
1073 // FIXME PW EPOCH : use the real ips epoch
1074 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1075 mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1077 *n_ips_sel = mbm - *top + 1;
1080 /*--------------------------------------------------------------------*/
1081 /*--- end m_xtree.c ---*/
1082 /*--------------------------------------------------------------------*/