Add support for the Linux membarrier() system call
[valgrind.git] / coregrind / m_xtree.c
blob217cad9da5cc5ceb7d6d2cf8f211cbffcd5d3aec
2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2016-2017 Philippe Waroquiers
12 This code generalises the XTree idea that was implemented in
13 the massif tool in Valgrind versions <= 3.12, which is
14 Copyright (C) 2005-2017 Nicholas Nethercote
15 njn@valgrind.org
17 The XTree implementation in this file is however implemented completely
18 differently. Some code has been re-used for the production of the
19 massif file header (e.g. FP_cmd function).
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of the
24 License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public License
32 along with this program; if not, write to the Free Software
33 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34 02111-1307, USA.
36 The GNU General Public License is contained in the file COPYING.
39 #include "pub_core_basics.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_clientstate.h"
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_execontext.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcfile.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_hashtable.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_debuginfo.h"
53 #include "pub_core_deduppoolalloc.h"
54 #include "pub_core_xtree.h" /* self */
56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \
57 VG_(dmsg)(__VA_ARGS__) \
58 : 0)
60 /* Defines the relevant part of an ec. This is shared between an xt
61 and its snapshots (see XT_shared XArray of xec). */
62 typedef
63 struct _xec {
64 ExeContext* ec;
65 UShort top; // The first ips of ec to take into account.
66 UShort n_ips_sel; // The nr of ips from top to take into account.
68 xec;
70 /* XT_shared maintains the information shared between an XT and all
71 its snapshots. */
72 typedef
73 struct _XT_shared {
74 UWord nrRef; /* nr of XTrees referencing this shared memory. */
76 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
77 const HChar* cc; /* cost centre for alloc */
78 Free_Fn_t free_fn; /* free fn */
80 /* The data associated to each ec is stored in 2 arrays:
81 an xec array, shared between an xt and all its snapshots.
82 a data array, private to each XTree.
83 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
84 xec and data arrays where the ec information is located (this
85 indirection is used to avoid huge xec and data arrays, in
86 case an XTree contains data only for a small number of ec.
87 The offset in the xec and data array is used as xtree ec unique
88 id i.e. an xecu. */
90 UInt d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
91 UInt* d4ecu2xecu;
93 /* ec information common to an xt and its snapshots. */
94 XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
96 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
97 See ips_order_cmp. */
98 XArray* ips_order_xecu;
99 } XT_shared;
101 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec
102 (with the index ecu/4). */
103 #define NO_OFFSET 0xffffffff
105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
106 const HChar* cc,
107 void (*free_fn)(void*))
109 XT_shared* shared;
111 vg_assert(alloc_fn);
112 vg_assert(cc);
113 vg_assert(free_fn);
114 shared = alloc_fn(cc, sizeof(*shared));
115 shared->nrRef = 0;
116 shared->alloc_fn = alloc_fn;
117 shared->cc = cc;
118 shared->free_fn = free_fn;
120 shared->d4ecu2xecu_sz = 0;
121 shared->d4ecu2xecu = NULL;
122 shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
123 shared->ips_order_xecu = NULL; // Allocated when needed.
125 return shared;
128 static void delete_XT_shared (XT_shared* shared)
130 vg_assert(shared->nrRef == 0);
131 shared->free_fn(shared->d4ecu2xecu);
132 VG_(deleteXA)(shared->xec);
133 if (shared->ips_order_xecu != NULL)
134 VG_(deleteXA)(shared->ips_order_xecu);
135 shared->free_fn(shared);
138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
139 In case stack traces are of different length, an 'absent' ips is
140 considered smaller than any other address. */
141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
142 static Int ips_order_cmp(const void* vleft, const void* vright)
144 const Xecu left_xecu = *(const Xecu*)vleft;
145 const Xecu right_xecu = *(const Xecu*)vright;
146 const xec* left = VG_(indexXA)(xec_data_for_sort, left_xecu);
147 const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
148 const StackTrace left_ips = VG_(get_ExeContext_StackTrace)(left->ec)
149 + left->top;
150 const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
151 + right->top;
152 UInt i;
154 const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
155 ? left->n_ips_sel : right->n_ips_sel;
157 // First see if we have a difference on the common nr of ips selected
158 for (i = 0; i < c_n_ips_sel; i++) {
159 if (left_ips[i] == right_ips[i]) continue;
160 if (left_ips[i] < right_ips[i]) return -1;
161 return 1;
163 // Common part is equal => compare lengths.
164 if (left->n_ips_sel < right->n_ips_sel) return -1;
165 if (left->n_ips_sel > right->n_ips_sel) return 1;
166 return 0;
169 // If needed, build or refresh shared->ips_order_xecu
170 static void ensure_ips_order_xecu_valid(XT_shared* shared)
172 UInt i;
173 UInt n_xecu;
175 if (shared->ips_order_xecu == NULL) {
176 shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
177 shared->free_fn, sizeof(Xecu));
178 VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
179 VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
182 if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
183 return;
185 n_xecu = VG_(sizeXA)(shared->xec);
186 for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
187 VG_(addToXA)(shared->ips_order_xecu, &i);
189 xec_data_for_sort = shared->xec;
190 VG_(sortXA)(shared->ips_order_xecu);
193 static void addRef_XT_shared (XT_shared* shared)
195 shared->nrRef++;
198 static UWord release_XT_shared(XT_shared* shared)
200 UWord nrRef;
202 vg_assert(shared->nrRef > 0);
203 nrRef = --shared->nrRef;
204 if (nrRef == 0)
205 delete_XT_shared(shared);
206 return nrRef;
210 struct _XTree {
211 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
212 const HChar* cc; /* cost centre for alloc */
213 Free_Fn_t free_fn; /* free fn */
214 Word dataSzB; /* data size in bytes */
215 XT_init_data_t init_data_fn;
216 XT_add_data_t add_data_fn;
217 XT_sub_data_t sub_data_fn;
218 XT_filter_IPs_t filter_IPs_fn;
220 XT_shared* shared;
222 HChar* tmp_data; /* temporary buffer, to insert new elements. */
223 XArray* data; /* of elements of size dataSzB */
227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
228 const HChar* cc,
229 Free_Fn_t free_fn,
230 Word dataSzB,
231 XT_init_data_t init_data_fn,
232 XT_add_data_t add_data_fn,
233 XT_sub_data_t sub_data_fn,
234 XT_filter_IPs_t filter_IPs_fn)
236 XTree* xt;
238 /* check user-supplied info .. */
239 vg_assert(alloc_fn);
240 vg_assert(free_fn);
241 vg_assert(dataSzB >= 0);
242 vg_assert(init_data_fn);
243 vg_assert(add_data_fn);
244 vg_assert(sub_data_fn);
246 xt = alloc_fn(cc, sizeof(struct _XTree) );
247 xt->alloc_fn = alloc_fn;
248 xt->cc = cc;
249 xt->free_fn = free_fn;
250 xt->dataSzB = dataSzB;
251 xt->init_data_fn = init_data_fn;
252 xt->add_data_fn = add_data_fn;
253 xt->sub_data_fn = sub_data_fn;
254 xt->filter_IPs_fn = filter_IPs_fn;
256 xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
257 addRef_XT_shared(xt->shared);
258 xt->tmp_data = alloc_fn(cc, xt->dataSzB);
259 xt->data = VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
261 return xt;
264 XTree* VG_(XT_snapshot)(XTree* xt)
266 XTree* nxt;
268 vg_assert(xt);
270 nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
272 *nxt = *xt;
273 addRef_XT_shared(nxt->shared);
274 nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
275 nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
277 return nxt;
280 void VG_(XT_delete) ( XTree* xt )
282 vg_assert(xt);
284 release_XT_shared(xt->shared);
285 xt->free_fn(xt->tmp_data);
286 VG_(deleteXA)(xt->data);
287 xt->free_fn(xt);
290 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
293 const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
294 XT_shared* shared = xt->shared;
296 /* First grow the d4ecu2xecu array if needed. */
297 if (d4ecu >= shared->d4ecu2xecu_sz) {
298 UInt old_sz = shared->d4ecu2xecu_sz;
299 UInt new_sz = (3 * d4ecu) / 2;
301 if (new_sz < 1000)
302 new_sz = 1000;
303 shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
304 new_sz * sizeof(UInt));
305 shared->d4ecu2xecu_sz = new_sz;
306 for (UInt i = old_sz; i < new_sz; i++)
307 shared->d4ecu2xecu[i] = NO_OFFSET;
310 if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
311 xec xe;
313 xe.ec = ec;
314 if (xt->filter_IPs_fn == NULL) {
315 xe.top = 0;
316 xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
317 } else {
318 UInt top;
319 UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
320 xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
321 &top, &n_ips_sel);
322 xe.top = (UShort)top;
323 xe.n_ips_sel = (UShort)n_ips_sel;
325 xt->init_data_fn(xt->tmp_data);
326 VG_(addToXA)(shared->xec, &xe);
327 shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
330 return shared->d4ecu2xecu[d4ecu];
333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
335 Xecu xecu = find_or_insert(xt, ec);
336 void* data = VG_(indexXA)(xt->data, xecu);
338 xt->add_data_fn(data, value);
339 return xecu;
342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
344 Xecu xecu = find_or_insert(xt, ec);
345 void* data = VG_(indexXA)(xt->data, xecu);
347 xt->sub_data_fn(data, value);
348 return xecu;
351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
353 void* data = VG_(indexXA)(xt->data, xecu);
354 xt->add_data_fn(data, value);
357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
359 void* data = VG_(indexXA)(xt->data, xecu);
360 xt->sub_data_fn(data, value);
363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
365 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
366 return (UInt)xe->n_ips_sel;
369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
371 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
372 return xe->ec;
375 static VgFile* xt_open (const HChar* outfilename)
377 VgFile* fp;
379 fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
380 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IROTH);
381 if (fp == NULL) {
382 VG_(message)(Vg_UserMsg,
383 "Error: can not open xtree output file `%s'\n",
384 outfilename);
386 return fp;
389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
391 // Print "cmd:" line.
392 static void FP_cmd(VgFile* fp)
394 UInt i;
396 FP("cmd: ");
397 FP("%s", VG_(args_the_exename));
398 for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
399 HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
400 FP(" %s", arg);
402 FP("\n");
405 /* ----------- Callgrind output ------------------------------------------- */
407 /* Output a callgrind format element in compressed format:
408 "name=(pos)" or "name=(pos) value" (if value_new)
409 or not compressed format: "name=value"
410 VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
411 name is the format element (e.g. fl, fn, cfi, cfn, ...).
412 pos is the value dictionary position, used for compressed format.
413 value_new is True if this is the first usage of value. */
414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
415 const HChar* value, Bool value_new)
417 if (!VG_(clo_xtree_compress_strings))
418 FP("%s=%s\n", name, value);
419 else if (value_new)
420 FP("%s=(%d) %s\n", name, pos, value);
421 else
422 FP("%s=(%d)\n", name, pos);
425 void VG_(XT_callgrind_print)
426 (XTree* xt,
427 const HChar* outfilename,
428 const HChar* events,
429 const HChar* (*img_value)(const void* value))
431 UInt n_xecu;
432 XT_shared* shared = xt->shared;
433 VgFile* fp = xt_open(outfilename);
434 DedupPoolAlloc* fnname_ddpa;
435 DedupPoolAlloc* filename_ddpa;
436 HChar* filename_buf = NULL;
437 UInt filename_buf_size = 0;
438 const HChar* filename_dir;
439 const HChar* filename_name;
441 if (fp == NULL)
442 return;
444 fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
445 "XT_callgrind_print.fn", xt->free_fn);
446 filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
447 "XT_callgrind_print.fl", xt->free_fn);
449 FP("# callgrind format\n");
450 FP("version: 1\n");
451 FP("creator: xtree-1\n");
452 FP("pid: %d\n", VG_(getpid)());
453 FP_cmd(fp);
455 /* Currently, we only need/support line positions. */
456 FP("\npositions:%s\n", " line");
458 /* Produce one "event:" line for each event, and the "events:" line. */
460 HChar strtok_events[VG_(strlen)(events)+1];
461 HChar* e;
462 HChar* ssaveptr;
463 HChar* p;
465 VG_(strcpy)(strtok_events, events);
466 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
467 e != NULL;
468 e = VG_(strtok_r)(NULL, ",", &ssaveptr))
469 FP("event: %s\n", e);
470 FP("events:");
471 VG_(strcpy)(strtok_events, events);
472 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
473 e != NULL;
474 e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
475 p = e;
476 while (*p) {
477 if (*p == ':')
478 *p = 0;
479 p++;
481 FP(" %s", e);
483 FP("\n");
485 xt->init_data_fn(xt->tmp_data); // to compute totals
487 n_xecu = VG_(sizeXA)(xt->data);
488 vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
489 for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
490 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
491 if (xe->n_ips_sel == 0)
492 continue;
494 const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
496 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
497 // in the variables called_filename/called_linenum/called_fnname.
498 // The booleans called_filename_new/called_fnname_new are set to True
499 // the first time the called_filename/called_fnname are encountered.
500 // The called_filename_nr/called_fnname_nr are numbers identifying
501 // the strings called_filename/called_fnname.
502 #define CALLED_FLF(n) \
503 if ((n) < 0 \
504 || !VG_(get_filename_linenum)(ep, ips[(n)], \
505 &filename_name, \
506 &filename_dir, \
507 &called_linenum)) { \
508 filename_name = "UnknownFile???"; \
509 called_linenum = 0; \
511 if ((n) < 0 \
512 || !VG_(get_fnname)(ep, ips[(n)], &called_fnname)) { \
513 called_fnname = "UnknownFn???"; \
516 UInt needed_size = VG_(strlen)(filename_dir) + 1 \
517 + VG_(strlen)(filename_name) + 1; \
518 if (filename_buf_size < needed_size) { \
519 filename_buf_size = needed_size; \
520 filename_buf = VG_(realloc)(xt->cc, filename_buf, \
521 filename_buf_size); \
524 VG_(strcpy)(filename_buf, filename_dir); \
525 if (filename_buf[0] != '\0') { \
526 VG_(strcat)(filename_buf, "/"); \
528 VG_(strcat)(filename_buf, filename_name); \
529 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \
530 filename_buf, \
531 &called_filename_new); \
532 called_filename = filename_buf; \
533 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \
534 called_fnname, \
535 &called_fnname_new);
537 /* Instead of unknown fnname ???, CALLED_FLF could use instead:
538 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
539 but that creates a lot of (useless) nodes at least for
540 valgrind self-hosting. */
542 if (img) {
543 const HChar* called_filename;
544 UInt called_filename_nr;
545 Bool called_filename_new; // True the first time we see this filename.
546 const HChar* called_fnname;
547 UInt called_fnname_nr;
548 Bool called_fnname_new; // True the first time we see this fnname.
549 UInt called_linenum;
550 UInt prev_linenum;
552 const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
553 const DiEpoch ep = VG_(get_ExeContext_epoch)(xe->ec);
555 Int ips_idx = xe->n_ips_sel - 1;
557 if (0) {
558 VG_(printf)("entry img %s\n", img);
559 VG_(pp_ExeContext)(xe->ec);
560 VG_(printf)("\n");
562 xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
563 CALLED_FLF(ips_idx);
564 for (;
565 ips_idx >= 0;
566 ips_idx--) {
567 FP_pos_str(fp, "fl", called_filename_nr,
568 called_filename, called_filename_new);
569 FP_pos_str(fp, "fn", called_fnname_nr,
570 called_fnname, called_fnname_new);
571 if (ips_idx == 0)
572 FP("%d %s\n", called_linenum, img);
573 else
574 FP("%d\n", called_linenum); //no self cost.
575 prev_linenum = called_linenum;
576 if (ips_idx >= 1) {
577 CALLED_FLF(ips_idx-1);
578 FP_pos_str(fp, "cfi", called_filename_nr,
579 called_filename, called_filename_new);
580 FP_pos_str(fp, "cfn", called_fnname_nr,
581 called_fnname, called_fnname_new);
582 called_filename_new = False;
583 called_fnname_new = False;
584 /* Giving a call count of 0 allows kcachegrind to hide the calls
585 column. A call count of 1 means kcachegrind would show in the
586 calls column the nr of stacktrace containing this arc, which
587 is very confusing. So, the less bad is to give a 0 call
588 count. */
589 FP("calls=0 %d\n", called_linenum);
590 FP("%d %s\n", prev_linenum, img);
593 FP("\n");
596 /* callgrind format is not really fully supporting (yet?) execution trees:
597 in an execution tree, self and inclusive costs are identical, and
598 cannot be added together.
599 If no totals: line is given, callgrind_annotate calculates the addition
600 of all costs, and so gives a wrong totals.
601 Giving a totals: line solves this, but the user must give the option
602 --inclusive=yes (kind of hack) to have all the functions given
603 in the output file. */
604 FP("totals: %s\n", img_value(xt->tmp_data));
605 VG_(fclose)(fp);
606 VG_(deleteDedupPA)(fnname_ddpa);
607 VG_(deleteDedupPA)(filename_ddpa);
608 VG_(free)(filename_buf);
612 /* ----------- Massif output ---------------------------------------------- */
614 /* For Massif output, some functions from the execontext are not output, a.o.
615 the allocation functions at the top of the stack and the functions below
616 main. So, the StackTrace of the execontexts in the xtree must be filtered.
617 Ms_Ec defines the subset of the stacktrace relevant for the report. */
618 typedef
619 struct {
620 StackTrace ips; // ips and n_ips provides the subset of the xtree ec
621 UInt n_ips; // to use for a massif report.
623 SizeT report_value; // The value to report for this stack trace.
624 } Ms_Ec;
626 /* Ms_Group defines (at a certain depth) a group of ec context that
627 have the same IPs at the given depth, and have the same 'parent'.
628 total is the sum of the values of all group elements.
629 A Ms_Group can also represent a set of ec contexts that do not
630 have the same IP, but that have each a total which is below the
631 significant size. Such a group has a NULL ms_ec, a zero group_io.
632 n_ec is the nr of insignificant ec that have been collected inside this
633 insignificant group, and total is the sum of all non significant ec
634 at the given depth. */
635 typedef
636 struct {
637 Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
638 Addr group_ip;
639 UInt n_ec;
640 SizeT total;
641 } Ms_Group;
643 /* Compare 2 groups by total, to have bigger total first. */
644 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
646 const Ms_Group* left = (const Ms_Group*)vleft;
647 const Ms_Group* right = (const Ms_Group*)vright;
649 // First reverse compare total
650 if (left->total > right->total) return -1;
651 if (left->total < right->total) return 1;
653 /* Equal size => compare IPs.
654 This (somewhat?) helps to have deterministic test results.
655 If this would change between platforms, then we should compare
656 function names/filename/linenr */
657 if (left->group_ip < right->group_ip) return -1;
658 if (left->group_ip > right->group_ip) return 1;
659 return 0;
662 /* Scan the addresses in ms_ec at the given depth.
663 On return,
664 *groups points to an array of Ms_Group sorted by total.
665 *n_groups is the nr of groups
666 The caller is responsible to free the allocated group array. */
667 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
668 UInt* n_groups, Ms_Group** groups)
670 UInt i, g;
671 Addr cur_group_ip = 0;
673 *n_groups = 0;
675 /* Handle special case somewhat more efficiently */
676 if (n_ec == 0) {
677 *groups = NULL;
678 return;
681 /* Compute how many groups we have. */
682 for (i = 0; i < n_ec; i++) {
683 if (ms_ec[i].n_ips > depth
684 && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
685 (*n_groups)++;
686 cur_group_ip = ms_ec[i].ips[depth];
690 /* make the group array. */
691 *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
692 i = 0;
693 for (g = 0; g < *n_groups; g++) {
694 while (ms_ec[i].n_ips <= depth)
695 i++;
696 cur_group_ip = ms_ec[i].ips[depth];
697 (*groups)[g].group_ip = cur_group_ip;
698 (*groups)[g].ms_ec = &ms_ec[i];
699 (*groups)[g].n_ec = 1;
700 (*groups)[g].total = ms_ec[i].report_value;
701 i++;
702 while (i < n_ec
703 && ms_ec[i].n_ips > depth
704 && cur_group_ip == ms_ec[i].ips[depth]) {
705 (*groups)[g].total += ms_ec[i].report_value;
706 i++;
707 (*groups)[g].n_ec++;
711 /* Search for insignificant groups, collect them all together
712 in the first insignificant group, and compact the group array. */
714 UInt insig1; // Position of first insignificant group.
715 UInt n_insig = 0; // Nr of insignificant groups found.
717 for (g = 0; g < *n_groups; g++) {
718 if ((*groups)[g].total < sig_sz) {
719 if (n_insig == 0) {
720 // First insig group => transform it into the special group
721 (*groups)[g].ms_ec = NULL;
722 (*groups)[g].group_ip = 0;
723 (*groups)[g].n_ec = 0;
724 // start the sum of insig total as total
725 insig1 = g;
726 } else {
727 // Add this insig group total into insig1 first group
728 (*groups)[insig1].total += (*groups)[g].total;
730 n_insig++;
731 } else {
732 if (n_insig > 1)
733 (*groups)[g - n_insig + 1] = (*groups)[g];
736 if (n_insig > 0) {
737 (*groups)[insig1].n_ec = n_insig;
738 *n_groups -= n_insig - 1;
740 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
743 /* Sort on total size, bigger size first. */
744 VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
747 static void ms_output_group (VgFile* fp, UInt depth, Ms_Group* group,
748 SizeT sig_sz, double sig_pct_threshold)
750 UInt i;
751 Ms_Group* groups;
752 UInt n_groups;
754 // If this is an insignificant group, handle it specially
755 if (group->ms_ec == NULL) {
756 const HChar* s = ( 1 == group->n_ec? "," : "s, all" );
757 vg_assert(group->group_ip == 0);
758 FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
759 depth+1, "", group->total, group->n_ec, s, sig_pct_threshold);
760 return;
763 // Normal group => output the group and its subgroups.
764 ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
765 &n_groups, &groups);
767 // FIXME JRS EPOCH 28 July 2017: HACK! Is this correct?
768 const DiEpoch cur_ep = VG_(current_DiEpoch)();
769 // // FIXME PW EPOCH : No, the above is not correct.
770 // Xtree Massif output regroups execontext in the layout of a 'tree'.
771 // So, possibly, the same IP address value can be in 2 different ec, but
772 // the epoch to symbolise this address must be retrieved from the ec it
773 // originates from.
774 // So, to fix this, it is not enough to make a group based on identical
775 // IP addr value, one must also find the di used to symbolise this address,
776 // A group will then be defined as 'same IP and same di'.
777 // Fix not trivial to do, so for the moment, --keep-debuginfo=yes will
778 // have no impact on xtree massif output.
780 FP("%*s" "n%u: %ld %s\n",
781 depth + 1, "",
782 n_groups,
783 group->total,
784 VG_(describe_IP)(cur_ep, group->ms_ec->ips[depth] - 1, NULL));
785 /* XTREE??? Massif original code removes 1 to get the IP description. I am
786 wondering if this is not something that predates revision r8818,
787 which introduced a -1 in the stack unwind (see m_stacktrace.c)
788 Kept for the moment to allow exact comparison with massif output, but
789 probably we should remove this, as we very probably end up 2 bytes before
790 the RA Return Address. */
792 /* Output sub groups of this group. */
793 for (i = 0; i < n_groups; i++)
794 ms_output_group(fp, depth+1, &groups[i], sig_sz, sig_pct_threshold);
796 VG_(free)(groups);
799 /* Allocate and build an array of Ms_Ec sorted by addresses in the
800 Ms_Ec StackTrace. */
801 static void prepare_ms_ec (XTree* xt,
802 ULong (*report_value)(const void* value),
803 ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
805 XT_shared* shared = xt->shared;
806 const UInt n_xecu = VG_(sizeXA)(shared->xec);
807 const UInt n_data_xecu = VG_(sizeXA)(xt->data);
808 Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
809 UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
811 vg_assert(n_data_xecu <= n_xecu);
813 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
814 ensure_ips_order_xecu_valid(shared);
816 *top_total = 0;
817 DMSG(1, "iteration %u\n", n_xecu);
818 for (UInt i = 0; i < n_xecu; i++) {
819 Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
820 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
822 if (xecu >= n_data_xecu)
823 continue; // No data for this xecu in xt->data.
824 ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
825 if (ms_ec[n_xecu_sel].n_ips == 0)
826 continue;
828 ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
829 ms_ec[n_xecu_sel].report_value
830 = (*report_value)(VG_(indexXA)(xt->data, xecu));
831 *top_total += ms_ec[n_xecu_sel].report_value;
833 n_xecu_sel++;
835 vg_assert(n_xecu_sel <= n_xecu);
837 *vms_ec = ms_ec;
838 *vn_ec = n_xecu_sel;
841 MsFile* VG_(XT_massif_open)
842 (const HChar* outfilename,
843 const HChar* desc,
844 const XArray* desc_args,
845 const HChar* time_unit)
847 UInt i;
848 VgFile* fp = xt_open(outfilename);
850 if (fp == NULL)
851 return NULL; // xt_open reported the error.
853 /* ------ file header ------------------------------- */
854 FP("desc:");
855 if (desc)
856 FP(" %s", desc);
857 i = 0;
858 if (desc_args) {
859 for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
860 HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
861 FP(" %s", arg);
864 if (0 == i && desc == NULL) FP(" (none)");
865 FP("\n");
867 FP_cmd(fp);
869 FP("time_unit: %s\n", time_unit);
871 return fp;
874 void VG_(XT_massif_close)(MsFile* fp)
876 if (fp == NULL)
877 return; // Error should have been reported by VG_(XT_massif_open)
879 VG_(fclose)(fp);
882 void VG_(XT_massif_print)
883 (MsFile* fp,
884 XTree* xt,
885 const Massif_Header* header,
886 ULong (*report_value)(const void* value))
888 UInt i;
890 if (fp == NULL)
891 return; // Normally VG_(XT_massif_open) already reported an error.
893 /* Compute/prepare Snapshot totals/data/... */
894 ULong top_total;
896 /* Following variables only used for detailed snapshot. */
897 UInt n_ec = 0;
898 Ms_Ec* ms_ec = NULL;
899 const HChar* kind =
900 header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
902 DMSG(1, "XT_massif_print %s\n", kind);
903 if (header->detailed) {
904 /* Prepare the Ms_Ec sorted array of stacktraces and the groups
905 at level 0. */
906 prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
907 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
908 } else if (xt == NULL) {
909 /* Non detailed, no xt => use the sz provided in the header. */
910 top_total = header->sz_B;
911 } else {
912 /* For non detailed snapshot, compute total directly from the xec. */
913 const XT_shared* shared = xt->shared;
914 const UInt n_xecu = VG_(sizeXA)(xt->data);
915 top_total = 0;
917 for (UInt xecu = 0; xecu < n_xecu; xecu++) {
918 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
919 if (xe->n_ips_sel == 0)
920 continue;
921 top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
925 /* ------ snapshot header --------------------------- */
926 FP("#-----------\n");
927 FP("snapshot=%d\n", header->snapshot_n);
928 FP("#-----------\n");
929 FP("time=%lld\n", header->time);
931 FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
932 FP("mem_heap_extra_B=%llu\n", header->extra_B);
933 FP("mem_stacks_B=%llu\n", header->stacks_B);
934 FP("heap_tree=%s\n", kind);
936 /* ------ detailed snapshot data ----------------------------- */
937 if (header->detailed) {
938 UInt n_groups;
939 Ms_Group* groups;
941 ULong sig_sz;
942 // Work out how big a child must be to be significant. If the current
943 // top_total is zero, then we set it to 1, which means everything will be
944 // judged insignificant -- this is sensible, as there's no point showing
945 // any detail for this case. Unless they used threshold=0, in which
946 // case we show them everything because that's what they asked for.
948 // Nb: We do this once now, rather than once per child, because if we do
949 // that the cost of all the divisions adds up to something significant.
950 if (0 == top_total && 0 != header->sig_threshold)
951 sig_sz = 1;
952 else
953 sig_sz = ((top_total + header->extra_B + header->stacks_B)
954 * header->sig_threshold) / 100;
956 /* Produce the groups at depth 0 */
957 DMSG(1, "XT_massif_print producing depth 0 groups\n");
958 ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
960 /* Output the top node. */
961 FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
963 /* Output depth 0 groups. */
964 DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups);
965 for (i = 0; i < n_groups; i++)
966 ms_output_group(fp, 0, &groups[i], sig_sz, header->sig_threshold);
968 VG_(free)(groups);
969 VG_(free)(ms_ec);
973 Int VG_(XT_offset_main_or_below_main)(DiEpoch ep, Addr* ips, Int n_ips)
975 /* Search for main or below main function.
976 To limit the nr of ips to examine, we maintain the deepest
977 offset where main was found, and we first search main
978 from there.
979 If no main is found, we will then do a search for main or
980 below main function till the top. */
981 static Int deepest_main = 0;
982 Vg_FnNameKind kind = Vg_FnNameNormal;
983 Int mbm = n_ips - 1; // Position of deepest main or below main.
984 Vg_FnNameKind mbmkind = Vg_FnNameNormal;
985 Int i;
987 for (i = n_ips - 1 - deepest_main;
988 i < n_ips;
989 i++) {
990 mbmkind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
991 if (mbmkind != Vg_FnNameNormal) {
992 mbm = i;
993 break;
997 /* Search for main or below main function till top. */
998 for (i = mbm - 1;
999 i >= 0 && mbmkind != Vg_FnNameMain;
1000 i--) {
1001 kind = VG_(get_fnname_kind_from_IP)(ep, ips[i]);
1002 if (kind != Vg_FnNameNormal) {
1003 mbm = i;
1004 mbmkind = kind;
1007 if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
1008 if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
1009 deepest_main = n_ips - 1 - mbm;
1010 return mbm;
1011 } else
1012 return n_ips-1;
1015 void VG_(XT_filter_1top_and_maybe_below_main)
1016 (Addr* ips, Int n_ips,
1017 UInt* top, UInt* n_ips_sel)
1019 Int mbm;
1021 *n_ips_sel = n_ips;
1022 if (n_ips == 0) {
1023 *top = 0;
1024 return;
1027 /* Filter top function. */
1028 *top = 1;
1030 if (VG_(clo_show_below_main))
1031 mbm = n_ips - 1;
1032 else {
1033 // FIXME PW EPOCH : use the real ips epoch
1034 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1035 mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1038 *n_ips_sel = mbm - *top + 1;
1041 void VG_(XT_filter_maybe_below_main)
1042 (Addr* ips, Int n_ips,
1043 UInt* top, UInt* n_ips_sel)
1045 Int mbm;
1047 *n_ips_sel = n_ips;
1048 *top = 0;
1049 if (n_ips == 0)
1050 return;
1052 if (VG_(clo_show_below_main))
1053 mbm = n_ips - 1;
1054 else {
1055 // FIXME PW EPOCH : use the real ips epoch
1056 const DiEpoch cur_ep = VG_(current_DiEpoch)();
1057 mbm = VG_(XT_offset_main_or_below_main)(cur_ep, ips, n_ips);
1059 *n_ips_sel = mbm - *top + 1;
1062 /*--------------------------------------------------------------------*/
1063 /*--- end m_xtree.c ---*/
1064 /*--------------------------------------------------------------------*/