Add DRD suppression patterns for races triggered by std::ostream
[valgrind.git] / cachegrind / cg_main.c
blobcd4de769c9d1ea3a1f473a374398906981685892
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
4 /*--- cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Cachegrind, a Valgrind tool for cache
9 profiling programs.
11 Copyright (C) 2002-2017 Nicholas Nethercote
12 njn@valgrind.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_debuginfo.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcfile.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_libcproc.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_xarray.h"
44 #include "pub_tool_clientstate.h"
45 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
47 #include "cg_arch.h"
48 #include "cg_sim.c"
49 #include "cg_branchpred.c"
51 /*------------------------------------------------------------*/
52 /*--- Constants ---*/
53 /*------------------------------------------------------------*/
55 /* Set to 1 for very verbose debugging */
56 #define DEBUG_CG 0
58 /*------------------------------------------------------------*/
59 /*--- Options ---*/
60 /*------------------------------------------------------------*/
62 static Bool clo_cache_sim = True; /* do cache simulation? */
63 static Bool clo_branch_sim = False; /* do branch simulation? */
64 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
66 /*------------------------------------------------------------*/
67 /*--- Cachesim configuration ---*/
68 /*------------------------------------------------------------*/
70 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
72 /*------------------------------------------------------------*/
73 /*--- Types and Data Structures ---*/
74 /*------------------------------------------------------------*/
76 typedef
77 struct {
78 ULong a; /* total # memory accesses of this kind */
79 ULong m1; /* misses in the first level cache */
80 ULong mL; /* misses in the second level cache */
82 CacheCC;
84 typedef
85 struct {
86 ULong b; /* total # branches of this kind */
87 ULong mp; /* number of branches mispredicted */
89 BranchCC;
91 //------------------------------------------------------------
92 // Primary data structure #1: CC table
93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94 // - an ordered set of CCs. CC indexing done by file/function/line (as
95 // determined from the instrAddr).
96 // - Traversed for dumping stats at end in file/func/line hierarchy.
98 typedef struct {
99 HChar* file;
100 const HChar* fn;
101 Int line;
103 CodeLoc;
105 typedef struct {
106 CodeLoc loc; /* Source location that these counts pertain to */
107 CacheCC Ir; /* Insn read counts */
108 CacheCC Dr; /* Data read counts */
109 CacheCC Dw; /* Data write/modify counts */
110 BranchCC Bc; /* Conditional branch counts */
111 BranchCC Bi; /* Indirect branch counts */
112 } LineCC;
114 // First compare file, then fn, then line.
115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
117 Word res;
118 const CodeLoc* a = (const CodeLoc*)vloc;
119 const CodeLoc* b = &(((const LineCC*)vcc)->loc);
121 res = VG_(strcmp)(a->file, b->file);
122 if (0 != res)
123 return res;
125 res = VG_(strcmp)(a->fn, b->fn);
126 if (0 != res)
127 return res;
129 return a->line - b->line;
132 static OSet* CC_table;
134 //------------------------------------------------------------
135 // Primary data structure #2: InstrInfo table
136 // - Holds the cached info about each instr that is used for simulation.
137 // - table(SB_start_addr, list(InstrInfo))
138 // - For each SB, each InstrInfo in the list holds info about the
139 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
140 // CC. This node is what's passed to the simulation function.
141 // - When SBs are discarded the relevant list(instr_details) is freed.
143 typedef struct _InstrInfo InstrInfo;
144 struct _InstrInfo {
145 Addr instr_addr;
146 UChar instr_len;
147 LineCC* parent; // parent line-CC
150 typedef struct _SB_info SB_info;
151 struct _SB_info {
152 Addr SB_addr; // key; MUST BE FIRST
153 Int n_instrs;
154 InstrInfo instrs[0];
157 static OSet* instrInfoTable;
159 //------------------------------------------------------------
160 // Secondary data structure: string table
161 // - holds strings, avoiding dups
162 // - used for filenames and function names, each of which will be
163 // pointed to by one or more CCs.
164 // - it also allows equality checks just by pointer comparison, which
165 // is good when printing the output file at the end.
167 static OSet* stringTable;
169 //------------------------------------------------------------
170 // Stats
171 static Int distinct_files = 0;
172 static Int distinct_fns = 0;
173 static Int distinct_lines = 0;
174 static Int distinct_instrsGen = 0;
175 static Int distinct_instrsNoX = 0;
177 static Int full_debugs = 0;
178 static Int file_line_debugs = 0;
179 static Int fn_debugs = 0;
180 static Int no_debugs = 0;
182 /*------------------------------------------------------------*/
183 /*--- String table operations ---*/
184 /*------------------------------------------------------------*/
186 static Word stringCmp( const void* key, const void* elem )
188 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
191 // Get a permanent string; either pull it out of the string table if it's
192 // been encountered before, or dup it and put it into the string table.
193 static HChar* get_perm_string(const HChar* s)
195 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
196 if (s_ptr) {
197 return *s_ptr;
198 } else {
199 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
200 *s_node = VG_(strdup)("cg.main.gps.1", s);
201 VG_(OSetGen_Insert)(stringTable, s_node);
202 return *s_node;
206 /*------------------------------------------------------------*/
207 /*--- CC table operations ---*/
208 /*------------------------------------------------------------*/
210 static void get_debug_info(Addr instr_addr, const HChar **dir,
211 const HChar **file, const HChar **fn, UInt* line)
213 DiEpoch ep = VG_(current_DiEpoch)();
214 Bool found_file_line = VG_(get_filename_linenum)(
216 instr_addr,
217 file, dir,
218 line
220 Bool found_fn = VG_(get_fnname)(ep, instr_addr, fn);
222 if (!found_file_line) {
223 *file = "???";
224 *line = 0;
226 if (!found_fn) {
227 *fn = "???";
230 if (found_file_line) {
231 if (found_fn) full_debugs++;
232 else file_line_debugs++;
233 } else {
234 if (found_fn) fn_debugs++;
235 else no_debugs++;
239 // Do a three step traversal: by file, then fn, then line.
240 // Returns a pointer to the line CC, creates a new one if necessary.
241 static LineCC* get_lineCC(Addr origAddr)
243 const HChar *fn, *file, *dir;
244 UInt line;
245 CodeLoc loc;
246 LineCC* lineCC;
248 get_debug_info(origAddr, &dir, &file, &fn, &line);
250 // Form an absolute pathname if a directory is available
251 HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
253 if (dir[0]) {
254 VG_(sprintf)(absfile, "%s/%s", dir, file);
255 } else {
256 VG_(sprintf)(absfile, "%s", file);
259 loc.file = absfile;
260 loc.fn = fn;
261 loc.line = line;
263 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
264 if (!lineCC) {
265 // Allocate and zero a new node.
266 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
267 lineCC->loc.file = get_perm_string(loc.file);
268 lineCC->loc.fn = get_perm_string(loc.fn);
269 lineCC->loc.line = loc.line;
270 lineCC->Ir.a = 0;
271 lineCC->Ir.m1 = 0;
272 lineCC->Ir.mL = 0;
273 lineCC->Dr.a = 0;
274 lineCC->Dr.m1 = 0;
275 lineCC->Dr.mL = 0;
276 lineCC->Dw.a = 0;
277 lineCC->Dw.m1 = 0;
278 lineCC->Dw.mL = 0;
279 lineCC->Bc.b = 0;
280 lineCC->Bc.mp = 0;
281 lineCC->Bi.b = 0;
282 lineCC->Bi.mp = 0;
283 VG_(OSetGen_Insert)(CC_table, lineCC);
286 return lineCC;
289 /*------------------------------------------------------------*/
290 /*--- Cache simulation functions ---*/
291 /*------------------------------------------------------------*/
293 /* A common case for an instruction read event is that the
294 * bytes read belong to the same cache line in both L1I and LL
295 * (if cache line sizes of L1 and LL are the same).
296 * As this can be detected at instrumentation time, and results
297 * in faster simulation, special-casing is benefical.
299 * Abbreviations used in var/function names:
300 * IrNoX - instruction read does not cross cache lines
301 * IrGen - generic instruction read; not detected as IrNoX
302 * Ir - not known / not important whether it is an IrNoX
305 // Only used with --cache-sim=no.
306 static VG_REGPARM(1)
307 void log_1Ir(InstrInfo* n)
309 n->parent->Ir.a++;
312 // Only used with --cache-sim=no.
313 static VG_REGPARM(2)
314 void log_2Ir(InstrInfo* n, InstrInfo* n2)
316 n->parent->Ir.a++;
317 n2->parent->Ir.a++;
320 // Only used with --cache-sim=no.
321 static VG_REGPARM(3)
322 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
324 n->parent->Ir.a++;
325 n2->parent->Ir.a++;
326 n3->parent->Ir.a++;
329 // Generic case for instruction reads: may cross cache lines.
330 // All other Ir handlers expect IrNoX instruction reads.
331 static VG_REGPARM(1)
332 void log_1IrGen_0D_cache_access(InstrInfo* n)
334 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
335 // n, n->instr_addr, n->instr_len);
336 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
337 &n->parent->Ir.m1, &n->parent->Ir.mL);
338 n->parent->Ir.a++;
341 static VG_REGPARM(1)
342 void log_1IrNoX_0D_cache_access(InstrInfo* n)
344 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
345 // n, n->instr_addr, n->instr_len);
346 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
347 &n->parent->Ir.m1, &n->parent->Ir.mL);
348 n->parent->Ir.a++;
351 static VG_REGPARM(2)
352 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
354 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
355 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
356 // n, n->instr_addr, n->instr_len,
357 // n2, n2->instr_addr, n2->instr_len);
358 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
359 &n->parent->Ir.m1, &n->parent->Ir.mL);
360 n->parent->Ir.a++;
361 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
362 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
363 n2->parent->Ir.a++;
366 static VG_REGPARM(3)
367 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
369 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
370 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
371 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
372 // n, n->instr_addr, n->instr_len,
373 // n2, n2->instr_addr, n2->instr_len,
374 // n3, n3->instr_addr, n3->instr_len);
375 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
376 &n->parent->Ir.m1, &n->parent->Ir.mL);
377 n->parent->Ir.a++;
378 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
379 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
380 n2->parent->Ir.a++;
381 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
382 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
383 n3->parent->Ir.a++;
386 static VG_REGPARM(3)
387 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
389 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
390 // " daddr=0x%010lx, dsize=%lu\n",
391 // n, n->instr_addr, n->instr_len, data_addr, data_size);
392 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
393 &n->parent->Ir.m1, &n->parent->Ir.mL);
394 n->parent->Ir.a++;
396 cachesim_D1_doref(data_addr, data_size,
397 &n->parent->Dr.m1, &n->parent->Dr.mL);
398 n->parent->Dr.a++;
401 static VG_REGPARM(3)
402 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
404 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
405 // " daddr=0x%010lx, dsize=%lu\n",
406 // n, n->instr_addr, n->instr_len, data_addr, data_size);
407 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
408 &n->parent->Ir.m1, &n->parent->Ir.mL);
409 n->parent->Ir.a++;
411 cachesim_D1_doref(data_addr, data_size,
412 &n->parent->Dw.m1, &n->parent->Dw.mL);
413 n->parent->Dw.a++;
416 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
417 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
418 you change them, you must change addEvent_D_guarded too. */
419 static VG_REGPARM(3)
420 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
422 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
423 // n, data_addr, data_size);
424 cachesim_D1_doref(data_addr, data_size,
425 &n->parent->Dr.m1, &n->parent->Dr.mL);
426 n->parent->Dr.a++;
429 /* See comment on log_0Ir_1Dr_cache_access. */
430 static VG_REGPARM(3)
431 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
433 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
434 // n, data_addr, data_size);
435 cachesim_D1_doref(data_addr, data_size,
436 &n->parent->Dw.m1, &n->parent->Dw.mL);
437 n->parent->Dw.a++;
440 /* For branches, we consult two different predictors, one which
441 predicts taken/untaken for conditional branches, and the other
442 which predicts the branch target address for indirect branches
443 (jump-to-register style ones). */
445 static VG_REGPARM(2)
446 void log_cond_branch(InstrInfo* n, Word taken)
448 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
449 // n, taken);
450 n->parent->Bc.b++;
451 n->parent->Bc.mp
452 += (1 & do_cond_branch_predict(n->instr_addr, taken));
455 static VG_REGPARM(2)
456 void log_ind_branch(InstrInfo* n, UWord actual_dst)
458 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
459 // n, actual_dst);
460 n->parent->Bi.b++;
461 n->parent->Bi.mp
462 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
466 /*------------------------------------------------------------*/
467 /*--- Instrumentation types and structures ---*/
468 /*------------------------------------------------------------*/
470 /* Maintain an ordered list of memory events which are outstanding, in
471 the sense that no IR has yet been generated to do the relevant
472 helper calls. The BB is scanned top to bottom and memory events
473 are added to the end of the list, merging with the most recent
474 notified event where possible (Dw immediately following Dr and
475 having the same size and EA can be merged).
477 This merging is done so that for architectures which have
478 load-op-store instructions (x86, amd64), the insn is treated as if
479 it makes just one memory reference (a modify), rather than two (a
480 read followed by a write at the same address).
482 At various points the list will need to be flushed, that is, IR
483 generated from it. That must happen before any possible exit from
484 the block (the end, or an IRStmt_Exit). Flushing also takes place
485 when there is no space to add a new event.
487 If we require the simulation statistics to be up to date with
488 respect to possible memory exceptions, then the list would have to
489 be flushed before each memory reference. That would however lose
490 performance by inhibiting event-merging during flushing.
492 Flushing the list consists of walking it start to end and emitting
493 instrumentation IR for each event, in the order in which they
494 appear. It may be possible to emit a single call for two adjacent
495 events in order to reduce the number of helper function calls made.
496 For example, it could well be profitable to handle two adjacent Ir
497 events with a single helper call. */
499 typedef
500 IRExpr
501 IRAtom;
503 typedef
504 enum {
505 Ev_IrNoX, // Instruction read not crossing cache lines
506 Ev_IrGen, // Generic Ir, not being detected as IrNoX
507 Ev_Dr, // Data read
508 Ev_Dw, // Data write
509 Ev_Dm, // Data modify (read then write)
510 Ev_Bc, // branch conditional
511 Ev_Bi // branch indirect (to unknown destination)
513 EventTag;
515 typedef
516 struct {
517 EventTag tag;
518 InstrInfo* inode;
519 union {
520 struct {
521 } IrGen;
522 struct {
523 } IrNoX;
524 struct {
525 IRAtom* ea;
526 Int szB;
527 } Dr;
528 struct {
529 IRAtom* ea;
530 Int szB;
531 } Dw;
532 struct {
533 IRAtom* ea;
534 Int szB;
535 } Dm;
536 struct {
537 IRAtom* taken; /* :: Ity_I1 */
538 } Bc;
539 struct {
540 IRAtom* dst;
541 } Bi;
542 } Ev;
544 Event;
546 static void init_Event ( Event* ev ) {
547 VG_(memset)(ev, 0, sizeof(Event));
550 static IRAtom* get_Event_dea ( Event* ev ) {
551 switch (ev->tag) {
552 case Ev_Dr: return ev->Ev.Dr.ea;
553 case Ev_Dw: return ev->Ev.Dw.ea;
554 case Ev_Dm: return ev->Ev.Dm.ea;
555 default: tl_assert(0);
559 static Int get_Event_dszB ( Event* ev ) {
560 switch (ev->tag) {
561 case Ev_Dr: return ev->Ev.Dr.szB;
562 case Ev_Dw: return ev->Ev.Dw.szB;
563 case Ev_Dm: return ev->Ev.Dm.szB;
564 default: tl_assert(0);
569 /* Up to this many unnotified events are allowed. Number is
570 arbitrary. Larger numbers allow more event merging to occur, but
571 potentially induce more spilling due to extending live ranges of
572 address temporaries. */
573 #define N_EVENTS 16
576 /* A struct which holds all the running state during instrumentation.
577 Mostly to avoid passing loads of parameters everywhere. */
578 typedef
579 struct {
580 /* The current outstanding-memory-event list. */
581 Event events[N_EVENTS];
582 Int events_used;
584 /* The array of InstrInfo bins for the BB. */
585 SB_info* sbInfo;
587 /* Number InstrInfo bins 'used' so far. */
588 Int sbInfo_i;
590 /* The output SB being constructed. */
591 IRSB* sbOut;
593 CgState;
596 /*------------------------------------------------------------*/
597 /*--- Instrumentation main ---*/
598 /*------------------------------------------------------------*/
600 // Note that origAddr is the real origAddr, not the address of the first
601 // instruction in the block (they can be different due to redirection).
602 static
603 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
605 Int i, n_instrs;
606 IRStmt* st;
607 SB_info* sbInfo;
609 // Count number of original instrs in SB
610 n_instrs = 0;
611 for (i = 0; i < sbIn->stmts_used; i++) {
612 st = sbIn->stmts[i];
613 if (Ist_IMark == st->tag) n_instrs++;
616 // Check that we don't have an entry for this BB in the instr-info table.
617 // If this assertion fails, there has been some screwup: some
618 // translations must have been discarded but Cachegrind hasn't discarded
619 // the corresponding entries in the instr-info table.
620 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
621 tl_assert(NULL == sbInfo);
623 // BB never translated before (at this address, at least; could have
624 // been unloaded and then reloaded elsewhere in memory)
625 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
626 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
627 sbInfo->SB_addr = origAddr;
628 sbInfo->n_instrs = n_instrs;
629 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
631 return sbInfo;
635 static void showEvent ( Event* ev )
637 switch (ev->tag) {
638 case Ev_IrGen:
639 VG_(printf)("IrGen %p\n", ev->inode);
640 break;
641 case Ev_IrNoX:
642 VG_(printf)("IrNoX %p\n", ev->inode);
643 break;
644 case Ev_Dr:
645 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
646 ppIRExpr(ev->Ev.Dr.ea);
647 VG_(printf)("\n");
648 break;
649 case Ev_Dw:
650 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
651 ppIRExpr(ev->Ev.Dw.ea);
652 VG_(printf)("\n");
653 break;
654 case Ev_Dm:
655 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
656 ppIRExpr(ev->Ev.Dm.ea);
657 VG_(printf)("\n");
658 break;
659 case Ev_Bc:
660 VG_(printf)("Bc %p GA=", ev->inode);
661 ppIRExpr(ev->Ev.Bc.taken);
662 VG_(printf)("\n");
663 break;
664 case Ev_Bi:
665 VG_(printf)("Bi %p DST=", ev->inode);
666 ppIRExpr(ev->Ev.Bi.dst);
667 VG_(printf)("\n");
668 break;
669 default:
670 tl_assert(0);
671 break;
675 // Reserve and initialise an InstrInfo for the first mention of a new insn.
676 static
677 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
679 InstrInfo* i_node;
680 tl_assert(cgs->sbInfo_i >= 0);
681 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
682 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
683 i_node->instr_addr = instr_addr;
684 i_node->instr_len = instr_len;
685 i_node->parent = get_lineCC(instr_addr);
686 cgs->sbInfo_i++;
687 return i_node;
691 /* Generate code for all outstanding memory events, and mark the queue
692 empty. Code is generated into cgs->bbOut, and this activity
693 'consumes' slots in cgs->sbInfo. */
695 static void flushEvents ( CgState* cgs )
697 Int i, regparms;
698 const HChar* helperName;
699 void* helperAddr;
700 IRExpr** argv;
701 IRExpr* i_node_expr;
702 IRDirty* di;
703 Event* ev;
704 Event* ev2;
705 Event* ev3;
707 i = 0;
708 while (i < cgs->events_used) {
710 helperName = NULL;
711 helperAddr = NULL;
712 argv = NULL;
713 regparms = 0;
715 /* generate IR to notify event i and possibly the ones
716 immediately following it. */
717 tl_assert(i >= 0 && i < cgs->events_used);
719 ev = &cgs->events[i];
720 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
721 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
723 if (DEBUG_CG) {
724 VG_(printf)(" flush ");
725 showEvent( ev );
728 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
730 /* Decide on helper fn to call and args to pass it, and advance
731 i appropriately. */
732 switch (ev->tag) {
733 case Ev_IrNoX:
734 /* Merge an IrNoX with a following Dr/Dm. */
735 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
736 /* Why is this true? It's because we're merging an Ir
737 with a following Dr or Dm. The Ir derives from the
738 instruction's IMark and the Dr/Dm from data
739 references which follow it. In short it holds
740 because each insn starts with an IMark, hence an
741 Ev_Ir, and so these Dr/Dm must pertain to the
742 immediately preceding Ir. Same applies to analogous
743 assertions in the subsequent cases. */
744 tl_assert(ev2->inode == ev->inode);
745 helperName = "log_1IrNoX_1Dr_cache_access";
746 helperAddr = &log_1IrNoX_1Dr_cache_access;
747 argv = mkIRExprVec_3( i_node_expr,
748 get_Event_dea(ev2),
749 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
750 regparms = 3;
751 i += 2;
753 /* Merge an IrNoX with a following Dw. */
754 else
755 if (ev2 && ev2->tag == Ev_Dw) {
756 tl_assert(ev2->inode == ev->inode);
757 helperName = "log_1IrNoX_1Dw_cache_access";
758 helperAddr = &log_1IrNoX_1Dw_cache_access;
759 argv = mkIRExprVec_3( i_node_expr,
760 get_Event_dea(ev2),
761 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
762 regparms = 3;
763 i += 2;
765 /* Merge an IrNoX with two following IrNoX's. */
766 else
767 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
769 if (clo_cache_sim) {
770 helperName = "log_3IrNoX_0D_cache_access";
771 helperAddr = &log_3IrNoX_0D_cache_access;
772 } else {
773 helperName = "log_3Ir";
774 helperAddr = &log_3Ir;
776 argv = mkIRExprVec_3( i_node_expr,
777 mkIRExpr_HWord( (HWord)ev2->inode ),
778 mkIRExpr_HWord( (HWord)ev3->inode ) );
779 regparms = 3;
780 i += 3;
782 /* Merge an IrNoX with one following IrNoX. */
783 else
784 if (ev2 && ev2->tag == Ev_IrNoX) {
785 if (clo_cache_sim) {
786 helperName = "log_2IrNoX_0D_cache_access";
787 helperAddr = &log_2IrNoX_0D_cache_access;
788 } else {
789 helperName = "log_2Ir";
790 helperAddr = &log_2Ir;
792 argv = mkIRExprVec_2( i_node_expr,
793 mkIRExpr_HWord( (HWord)ev2->inode ) );
794 regparms = 2;
795 i += 2;
797 /* No merging possible; emit as-is. */
798 else {
799 if (clo_cache_sim) {
800 helperName = "log_1IrNoX_0D_cache_access";
801 helperAddr = &log_1IrNoX_0D_cache_access;
802 } else {
803 helperName = "log_1Ir";
804 helperAddr = &log_1Ir;
806 argv = mkIRExprVec_1( i_node_expr );
807 regparms = 1;
808 i++;
810 break;
811 case Ev_IrGen:
812 if (clo_cache_sim) {
813 helperName = "log_1IrGen_0D_cache_access";
814 helperAddr = &log_1IrGen_0D_cache_access;
815 } else {
816 helperName = "log_1Ir";
817 helperAddr = &log_1Ir;
819 argv = mkIRExprVec_1( i_node_expr );
820 regparms = 1;
821 i++;
822 break;
823 case Ev_Dr:
824 case Ev_Dm:
825 /* Data read or modify */
826 helperName = "log_0Ir_1Dr_cache_access";
827 helperAddr = &log_0Ir_1Dr_cache_access;
828 argv = mkIRExprVec_3( i_node_expr,
829 get_Event_dea(ev),
830 mkIRExpr_HWord( get_Event_dszB(ev) ) );
831 regparms = 3;
832 i++;
833 break;
834 case Ev_Dw:
835 /* Data write */
836 helperName = "log_0Ir_1Dw_cache_access";
837 helperAddr = &log_0Ir_1Dw_cache_access;
838 argv = mkIRExprVec_3( i_node_expr,
839 get_Event_dea(ev),
840 mkIRExpr_HWord( get_Event_dszB(ev) ) );
841 regparms = 3;
842 i++;
843 break;
844 case Ev_Bc:
845 /* Conditional branch */
846 helperName = "log_cond_branch";
847 helperAddr = &log_cond_branch;
848 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
849 regparms = 2;
850 i++;
851 break;
852 case Ev_Bi:
853 /* Branch to an unknown destination */
854 helperName = "log_ind_branch";
855 helperAddr = &log_ind_branch;
856 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
857 regparms = 2;
858 i++;
859 break;
860 default:
861 tl_assert(0);
864 /* Add the helper. */
865 tl_assert(helperName);
866 tl_assert(helperAddr);
867 tl_assert(argv);
868 di = unsafeIRDirty_0_N( regparms,
869 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
870 argv );
871 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
874 cgs->events_used = 0;
877 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
879 Event* evt;
880 if (cgs->events_used == N_EVENTS)
881 flushEvents(cgs);
882 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
883 evt = &cgs->events[cgs->events_used];
884 init_Event(evt);
885 evt->inode = inode;
886 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
887 evt->tag = Ev_IrNoX;
888 distinct_instrsNoX++;
889 } else {
890 evt->tag = Ev_IrGen;
891 distinct_instrsGen++;
893 cgs->events_used++;
896 static
897 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
899 Event* evt;
900 tl_assert(isIRAtom(ea));
901 tl_assert(datasize >= 1 && datasize <= min_line_size);
902 if (!clo_cache_sim)
903 return;
904 if (cgs->events_used == N_EVENTS)
905 flushEvents(cgs);
906 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
907 evt = &cgs->events[cgs->events_used];
908 init_Event(evt);
909 evt->tag = Ev_Dr;
910 evt->inode = inode;
911 evt->Ev.Dr.szB = datasize;
912 evt->Ev.Dr.ea = ea;
913 cgs->events_used++;
916 static
917 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
919 Event* evt;
921 tl_assert(isIRAtom(ea));
922 tl_assert(datasize >= 1 && datasize <= min_line_size);
924 if (!clo_cache_sim)
925 return;
927 /* Is it possible to merge this write with the preceding read? */
928 if (cgs->events_used > 0) {
929 Event* lastEvt = &cgs->events[cgs->events_used-1];
930 if ( lastEvt->tag == Ev_Dr
931 && lastEvt->Ev.Dr.szB == datasize
932 && lastEvt->inode == inode
933 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
935 lastEvt->tag = Ev_Dm;
936 return;
940 /* No. Add as normal. */
941 if (cgs->events_used == N_EVENTS)
942 flushEvents(cgs);
943 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
944 evt = &cgs->events[cgs->events_used];
945 init_Event(evt);
946 evt->tag = Ev_Dw;
947 evt->inode = inode;
948 evt->Ev.Dw.szB = datasize;
949 evt->Ev.Dw.ea = ea;
950 cgs->events_used++;
953 static
954 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
955 Int datasize, IRAtom* ea, IRAtom* guard,
956 Bool isWrite )
958 tl_assert(isIRAtom(ea));
959 tl_assert(guard);
960 tl_assert(isIRAtom(guard));
961 tl_assert(datasize >= 1 && datasize <= min_line_size);
963 if (!clo_cache_sim)
964 return;
966 /* Adding guarded memory actions and merging them with the existing
967 queue is too complex. Simply flush the queue and add this
968 action immediately. Since guarded loads and stores are pretty
969 rare, this is not thought likely to cause any noticeable
970 performance loss as a result of the loss of event-merging
971 opportunities. */
972 tl_assert(cgs->events_used >= 0);
973 flushEvents(cgs);
974 tl_assert(cgs->events_used == 0);
975 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
976 IRExpr* i_node_expr;
977 const HChar* helperName;
978 void* helperAddr;
979 IRExpr** argv;
980 Int regparms;
981 IRDirty* di;
982 i_node_expr = mkIRExpr_HWord( (HWord)inode );
983 helperName = isWrite ? "log_0Ir_1Dw_cache_access"
984 : "log_0Ir_1Dr_cache_access";
985 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
986 : &log_0Ir_1Dr_cache_access;
987 argv = mkIRExprVec_3( i_node_expr,
988 ea, mkIRExpr_HWord( datasize ) );
989 regparms = 3;
990 di = unsafeIRDirty_0_N(
991 regparms,
992 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
993 argv );
994 di->guard = guard;
995 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
999 static
1000 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
1002 Event* evt;
1003 tl_assert(isIRAtom(guard));
1004 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1005 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1006 if (!clo_branch_sim)
1007 return;
1008 if (cgs->events_used == N_EVENTS)
1009 flushEvents(cgs);
1010 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1011 evt = &cgs->events[cgs->events_used];
1012 init_Event(evt);
1013 evt->tag = Ev_Bc;
1014 evt->inode = inode;
1015 evt->Ev.Bc.taken = guard;
1016 cgs->events_used++;
1019 static
1020 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1022 Event* evt;
1023 tl_assert(isIRAtom(whereTo));
1024 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1025 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1026 if (!clo_branch_sim)
1027 return;
1028 if (cgs->events_used == N_EVENTS)
1029 flushEvents(cgs);
1030 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1031 evt = &cgs->events[cgs->events_used];
1032 init_Event(evt);
1033 evt->tag = Ev_Bi;
1034 evt->inode = inode;
1035 evt->Ev.Bi.dst = whereTo;
1036 cgs->events_used++;
1039 ////////////////////////////////////////////////////////////
1042 static
1043 IRSB* cg_instrument ( VgCallbackClosure* closure,
1044 IRSB* sbIn,
1045 const VexGuestLayout* layout,
1046 const VexGuestExtents* vge,
1047 const VexArchInfo* archinfo_host,
1048 IRType gWordTy, IRType hWordTy )
1050 Int i;
1051 UInt isize;
1052 IRStmt* st;
1053 Addr cia; /* address of current insn */
1054 CgState cgs;
1055 IRTypeEnv* tyenv = sbIn->tyenv;
1056 InstrInfo* curr_inode = NULL;
1058 if (gWordTy != hWordTy) {
1059 /* We don't currently support this case. */
1060 VG_(tool_panic)("host/guest word size mismatch");
1063 // Set up new SB
1064 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1066 // Copy verbatim any IR preamble preceding the first IMark
1067 i = 0;
1068 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1069 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1070 i++;
1073 // Get the first statement, and initial cia from it
1074 tl_assert(sbIn->stmts_used > 0);
1075 tl_assert(i < sbIn->stmts_used);
1076 st = sbIn->stmts[i];
1077 tl_assert(Ist_IMark == st->tag);
1079 cia = st->Ist.IMark.addr;
1080 isize = st->Ist.IMark.len;
1081 // If Vex fails to decode an instruction, the size will be zero.
1082 // Pretend otherwise.
1083 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1085 // Set up running state and get block info
1086 tl_assert(closure->readdr == vge->base[0]);
1087 cgs.events_used = 0;
1088 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1089 cgs.sbInfo_i = 0;
1091 if (DEBUG_CG)
1092 VG_(printf)("\n\n---------- cg_instrument ----------\n");
1094 // Traverse the block, initialising inodes, adding events and flushing as
1095 // necessary.
1096 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1098 st = sbIn->stmts[i];
1099 tl_assert(isFlatIRStmt(st));
1101 switch (st->tag) {
1102 case Ist_NoOp:
1103 case Ist_AbiHint:
1104 case Ist_Put:
1105 case Ist_PutI:
1106 case Ist_MBE:
1107 break;
1109 case Ist_IMark:
1110 cia = st->Ist.IMark.addr;
1111 isize = st->Ist.IMark.len;
1113 // If Vex fails to decode an instruction, the size will be zero.
1114 // Pretend otherwise.
1115 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1117 // Sanity-check size.
1118 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1119 || VG_CLREQ_SZB == isize );
1121 // Get space for and init the inode, record it as the current one.
1122 // Subsequent Dr/Dw/Dm events from the same instruction will
1123 // also use it.
1124 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1126 addEvent_Ir( &cgs, curr_inode );
1127 break;
1129 case Ist_WrTmp: {
1130 IRExpr* data = st->Ist.WrTmp.data;
1131 if (data->tag == Iex_Load) {
1132 IRExpr* aexpr = data->Iex.Load.addr;
1133 // Note also, endianness info is ignored. I guess
1134 // that's not interesting.
1135 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1136 aexpr );
1138 break;
1141 case Ist_Store: {
1142 IRExpr* data = st->Ist.Store.data;
1143 IRExpr* aexpr = st->Ist.Store.addr;
1144 addEvent_Dw( &cgs, curr_inode,
1145 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1146 break;
1149 case Ist_StoreG: {
1150 IRStoreG* sg = st->Ist.StoreG.details;
1151 IRExpr* data = sg->data;
1152 IRExpr* addr = sg->addr;
1153 IRType type = typeOfIRExpr(tyenv, data);
1154 tl_assert(type != Ity_INVALID);
1155 addEvent_D_guarded( &cgs, curr_inode,
1156 sizeofIRType(type), addr, sg->guard,
1157 True/*isWrite*/ );
1158 break;
1161 case Ist_LoadG: {
1162 IRLoadG* lg = st->Ist.LoadG.details;
1163 IRType type = Ity_INVALID; /* loaded type */
1164 IRType typeWide = Ity_INVALID; /* after implicit widening */
1165 IRExpr* addr = lg->addr;
1166 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1167 tl_assert(type != Ity_INVALID);
1168 addEvent_D_guarded( &cgs, curr_inode,
1169 sizeofIRType(type), addr, lg->guard,
1170 False/*!isWrite*/ );
1171 break;
1174 case Ist_Dirty: {
1175 Int dataSize;
1176 IRDirty* d = st->Ist.Dirty.details;
1177 if (d->mFx != Ifx_None) {
1178 /* This dirty helper accesses memory. Collect the details. */
1179 tl_assert(d->mAddr != NULL);
1180 tl_assert(d->mSize != 0);
1181 dataSize = d->mSize;
1182 // Large (eg. 28B, 108B, 512B on x86) data-sized
1183 // instructions will be done inaccurately, but they're
1184 // very rare and this avoids errors from hitting more
1185 // than two cache lines in the simulation.
1186 if (dataSize > min_line_size)
1187 dataSize = min_line_size;
1188 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1189 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1190 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1191 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1192 } else {
1193 tl_assert(d->mAddr == NULL);
1194 tl_assert(d->mSize == 0);
1196 break;
1199 case Ist_CAS: {
1200 /* We treat it as a read and a write of the location. I
1201 think that is the same behaviour as it was before IRCAS
1202 was introduced, since prior to that point, the Vex
1203 front ends would translate a lock-prefixed instruction
1204 into a (normal) read followed by a (normal) write. */
1205 Int dataSize;
1206 IRCAS* cas = st->Ist.CAS.details;
1207 tl_assert(cas->addr != NULL);
1208 tl_assert(cas->dataLo != NULL);
1209 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1210 if (cas->dataHi != NULL)
1211 dataSize *= 2; /* since it's a doubleword-CAS */
1212 /* I don't think this can ever happen, but play safe. */
1213 if (dataSize > min_line_size)
1214 dataSize = min_line_size;
1215 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1216 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1217 break;
1220 case Ist_LLSC: {
1221 IRType dataTy;
1222 if (st->Ist.LLSC.storedata == NULL) {
1223 /* LL */
1224 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1225 addEvent_Dr( &cgs, curr_inode,
1226 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1227 /* flush events before LL, should help SC to succeed */
1228 flushEvents( &cgs );
1229 } else {
1230 /* SC */
1231 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1232 addEvent_Dw( &cgs, curr_inode,
1233 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1235 break;
1238 case Ist_Exit: {
1239 // call branch predictor only if this is a branch in guest code
1240 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1241 (st->Ist.Exit.jk == Ijk_Call) ||
1242 (st->Ist.Exit.jk == Ijk_Ret) )
1244 /* Stuff to widen the guard expression to a host word, so
1245 we can pass it to the branch predictor simulation
1246 functions easily. */
1247 Bool inverted;
1248 Addr nia, sea;
1249 IRConst* dst;
1250 IRType tyW = hWordTy;
1251 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1252 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1253 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1254 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1255 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1256 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1257 : IRExpr_Const(IRConst_U64(1));
1259 /* First we need to figure out whether the side exit got
1260 inverted by the ir optimiser. To do that, figure out
1261 the next (fallthrough) instruction's address and the
1262 side exit address and see if they are the same. */
1263 nia = cia + isize;
1265 /* Side exit address */
1266 dst = st->Ist.Exit.dst;
1267 if (tyW == Ity_I32) {
1268 tl_assert(dst->tag == Ico_U32);
1269 sea = dst->Ico.U32;
1270 } else {
1271 tl_assert(tyW == Ity_I64);
1272 tl_assert(dst->tag == Ico_U64);
1273 sea = dst->Ico.U64;
1276 inverted = nia == sea;
1278 /* Widen the guard expression. */
1279 addStmtToIRSB( cgs.sbOut,
1280 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1281 addStmtToIRSB( cgs.sbOut,
1282 IRStmt_WrTmp( guardW,
1283 IRExpr_Unop(widen,
1284 IRExpr_RdTmp(guard1))) );
1285 /* If the exit is inverted, invert the sense of the guard. */
1286 addStmtToIRSB(
1287 cgs.sbOut,
1288 IRStmt_WrTmp(
1289 guard,
1290 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1291 : IRExpr_RdTmp(guardW)
1293 /* And post the event. */
1294 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1297 /* We may never reach the next statement, so need to flush
1298 all outstanding transactions now. */
1299 flushEvents( &cgs );
1300 break;
1303 default:
1304 ppIRStmt(st);
1305 tl_assert(0);
1306 break;
1309 /* Copy the original statement */
1310 addStmtToIRSB( cgs.sbOut, st );
1312 if (DEBUG_CG) {
1313 ppIRStmt(st);
1314 VG_(printf)("\n");
1318 /* Deal with branches to unknown destinations. Except ignore ones
1319 which are function returns as we assume the return stack
1320 predictor never mispredicts. */
1321 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1322 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1323 switch (sbIn->next->tag) {
1324 case Iex_Const:
1325 break; /* boring - branch to known address */
1326 case Iex_RdTmp:
1327 /* looks like an indirect branch (branch to unknown) */
1328 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1329 break;
1330 default:
1331 /* shouldn't happen - if the incoming IR is properly
1332 flattened, should only have tmp and const cases to
1333 consider. */
1334 tl_assert(0);
1338 /* At the end of the bb. Flush outstandings. */
1339 flushEvents( &cgs );
1341 /* done. stay sane ... */
1342 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1344 if (DEBUG_CG) {
1345 VG_(printf)( "goto {");
1346 ppIRJumpKind(sbIn->jumpkind);
1347 VG_(printf)( "} ");
1348 ppIRExpr( sbIn->next );
1349 VG_(printf)( "}\n");
1352 return cgs.sbOut;
1355 /*------------------------------------------------------------*/
1356 /*--- Cache configuration ---*/
1357 /*------------------------------------------------------------*/
1359 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1360 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1361 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1363 /*------------------------------------------------------------*/
1364 /*--- cg_fini() and related function ---*/
1365 /*------------------------------------------------------------*/
1367 // Total reads/writes/misses. Calculated during CC traversal at the end.
1368 // All auto-zeroed.
1369 static CacheCC Ir_total;
1370 static CacheCC Dr_total;
1371 static CacheCC Dw_total;
1372 static BranchCC Bc_total;
1373 static BranchCC Bi_total;
1375 static void fprint_CC_table_and_calc_totals(void)
1377 Int i;
1378 VgFile *fp;
1379 HChar *currFile = NULL;
1380 const HChar *currFn = NULL;
1381 LineCC* lineCC;
1383 // Setup output filename. Nb: it's important to do this now, ie. as late
1384 // as possible. If we do it at start-up and the program forks and the
1385 // output file format string contains a %p (pid) specifier, both the
1386 // parent and child will incorrectly write to the same file; this
1387 // happened in 3.3.0.
1388 HChar* cachegrind_out_file =
1389 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1391 fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1392 VKI_S_IRUSR|VKI_S_IWUSR);
1393 if (fp == NULL) {
1394 // If the file can't be opened for whatever reason (conflict
1395 // between multiple cachegrinded processes?), give up now.
1396 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1397 cachegrind_out_file );
1398 VG_(umsg)(" ... so simulation results will be missing.\n");
1399 VG_(free)(cachegrind_out_file);
1400 return;
1401 } else {
1402 VG_(free)(cachegrind_out_file);
1405 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1406 // the 2nd colon makes cg_annotate's output look nicer.
1407 VG_(fprintf)(fp, "desc: I1 cache: %s\n"
1408 "desc: D1 cache: %s\n"
1409 "desc: LL cache: %s\n",
1410 I1.desc_line, D1.desc_line, LL.desc_line);
1412 // "cmd:" line
1413 VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1414 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1415 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1416 VG_(fprintf)(fp, " %s", arg);
1418 // "events:" line
1419 if (clo_cache_sim && clo_branch_sim) {
1420 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1421 "Bc Bcm Bi Bim\n");
1423 else if (clo_cache_sim && !clo_branch_sim) {
1424 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1425 "\n");
1427 else if (!clo_cache_sim && clo_branch_sim) {
1428 VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1430 else {
1431 VG_(fprintf)(fp, "\nevents: Ir\n");
1434 // Traverse every lineCC
1435 VG_(OSetGen_ResetIter)(CC_table);
1436 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1437 Bool just_hit_a_new_file = False;
1438 // If we've hit a new file, print a "fl=" line. Note that because
1439 // each string is stored exactly once in the string table, we can use
1440 // pointer comparison rather than strcmp() to test for equality, which
1441 // is good because most of the time the comparisons are equal and so
1442 // the whole strings would have to be checked.
1443 if ( lineCC->loc.file != currFile ) {
1444 currFile = lineCC->loc.file;
1445 VG_(fprintf)(fp, "fl=%s\n", currFile);
1446 distinct_files++;
1447 just_hit_a_new_file = True;
1449 // If we've hit a new function, print a "fn=" line. We know to do
1450 // this when the function name changes, and also every time we hit a
1451 // new file (in which case the new function name might be the same as
1452 // in the old file, hence the just_hit_a_new_file test).
1453 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1454 currFn = lineCC->loc.fn;
1455 VG_(fprintf)(fp, "fn=%s\n", currFn);
1456 distinct_fns++;
1459 // Print the LineCC
1460 if (clo_cache_sim && clo_branch_sim) {
1461 VG_(fprintf)(fp, "%d %llu %llu %llu"
1462 " %llu %llu %llu"
1463 " %llu %llu %llu"
1464 " %llu %llu %llu %llu\n",
1465 lineCC->loc.line,
1466 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1467 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1468 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1469 lineCC->Bc.b, lineCC->Bc.mp,
1470 lineCC->Bi.b, lineCC->Bi.mp);
1472 else if (clo_cache_sim && !clo_branch_sim) {
1473 VG_(fprintf)(fp, "%d %llu %llu %llu"
1474 " %llu %llu %llu"
1475 " %llu %llu %llu\n",
1476 lineCC->loc.line,
1477 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1478 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1479 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1481 else if (!clo_cache_sim && clo_branch_sim) {
1482 VG_(fprintf)(fp, "%d %llu"
1483 " %llu %llu %llu %llu\n",
1484 lineCC->loc.line,
1485 lineCC->Ir.a,
1486 lineCC->Bc.b, lineCC->Bc.mp,
1487 lineCC->Bi.b, lineCC->Bi.mp);
1489 else {
1490 VG_(fprintf)(fp, "%d %llu\n",
1491 lineCC->loc.line,
1492 lineCC->Ir.a);
1495 // Update summary stats
1496 Ir_total.a += lineCC->Ir.a;
1497 Ir_total.m1 += lineCC->Ir.m1;
1498 Ir_total.mL += lineCC->Ir.mL;
1499 Dr_total.a += lineCC->Dr.a;
1500 Dr_total.m1 += lineCC->Dr.m1;
1501 Dr_total.mL += lineCC->Dr.mL;
1502 Dw_total.a += lineCC->Dw.a;
1503 Dw_total.m1 += lineCC->Dw.m1;
1504 Dw_total.mL += lineCC->Dw.mL;
1505 Bc_total.b += lineCC->Bc.b;
1506 Bc_total.mp += lineCC->Bc.mp;
1507 Bi_total.b += lineCC->Bi.b;
1508 Bi_total.mp += lineCC->Bi.mp;
1510 distinct_lines++;
1513 // Summary stats must come after rest of table, since we calculate them
1514 // during traversal. */
1515 if (clo_cache_sim && clo_branch_sim) {
1516 VG_(fprintf)(fp, "summary:"
1517 " %llu %llu %llu"
1518 " %llu %llu %llu"
1519 " %llu %llu %llu"
1520 " %llu %llu %llu %llu\n",
1521 Ir_total.a, Ir_total.m1, Ir_total.mL,
1522 Dr_total.a, Dr_total.m1, Dr_total.mL,
1523 Dw_total.a, Dw_total.m1, Dw_total.mL,
1524 Bc_total.b, Bc_total.mp,
1525 Bi_total.b, Bi_total.mp);
1527 else if (clo_cache_sim && !clo_branch_sim) {
1528 VG_(fprintf)(fp, "summary:"
1529 " %llu %llu %llu"
1530 " %llu %llu %llu"
1531 " %llu %llu %llu\n",
1532 Ir_total.a, Ir_total.m1, Ir_total.mL,
1533 Dr_total.a, Dr_total.m1, Dr_total.mL,
1534 Dw_total.a, Dw_total.m1, Dw_total.mL);
1536 else if (!clo_cache_sim && clo_branch_sim) {
1537 VG_(fprintf)(fp, "summary:"
1538 " %llu"
1539 " %llu %llu %llu %llu\n",
1540 Ir_total.a,
1541 Bc_total.b, Bc_total.mp,
1542 Bi_total.b, Bi_total.mp);
1544 else {
1545 VG_(fprintf)(fp, "summary:"
1546 " %llu\n",
1547 Ir_total.a);
1550 VG_(fclose)(fp);
1553 static UInt ULong_width(ULong n)
1555 UInt w = 0;
1556 while (n > 0) {
1557 n = n / 10;
1558 w++;
1560 if (w == 0) w = 1;
1561 return w + (w-1)/3; // add space for commas
1564 static void cg_fini(Int exitcode)
1566 static HChar fmt[128]; // OK; large enough
1568 CacheCC D_total;
1569 BranchCC B_total;
1570 ULong LL_total_m, LL_total_mr, LL_total_mw,
1571 LL_total, LL_total_r, LL_total_w;
1572 Int l1, l2, l3;
1574 fprint_CC_table_and_calc_totals();
1576 if (VG_(clo_verbosity) == 0)
1577 return;
1579 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1580 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1582 /* I cache results. Use the I_refs value to determine the first column
1583 * width. */
1584 l1 = ULong_width(Ir_total.a);
1585 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1586 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1588 /* Make format string, getting width right for numbers */
1589 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1591 /* Always print this */
1592 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
1594 /* If cache profiling is enabled, show D access numbers and all
1595 miss numbers */
1596 if (clo_cache_sim) {
1597 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1598 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
1600 if (0 == Ir_total.a) Ir_total.a = 1;
1601 VG_(umsg)("I1 miss rate: %*.2f%%\n", l1,
1602 Ir_total.m1 * 100.0 / Ir_total.a);
1603 VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1604 Ir_total.mL * 100.0 / Ir_total.a);
1605 VG_(umsg)("\n");
1607 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1608 * determine the width of columns 2 & 3. */
1609 D_total.a = Dr_total.a + Dw_total.a;
1610 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1611 D_total.mL = Dr_total.mL + Dw_total.mL;
1613 /* Make format string, getting width right for numbers */
1614 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1615 l1, l2, l3);
1617 VG_(umsg)(fmt, "D refs: ",
1618 D_total.a, Dr_total.a, Dw_total.a);
1619 VG_(umsg)(fmt, "D1 misses: ",
1620 D_total.m1, Dr_total.m1, Dw_total.m1);
1621 VG_(umsg)(fmt, "LLd misses: ",
1622 D_total.mL, Dr_total.mL, Dw_total.mL);
1624 if (0 == D_total.a) D_total.a = 1;
1625 if (0 == Dr_total.a) Dr_total.a = 1;
1626 if (0 == Dw_total.a) Dw_total.a = 1;
1627 VG_(umsg)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1628 l1, D_total.m1 * 100.0 / D_total.a,
1629 l2, Dr_total.m1 * 100.0 / Dr_total.a,
1630 l3, Dw_total.m1 * 100.0 / Dw_total.a);
1631 VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1632 l1, D_total.mL * 100.0 / D_total.a,
1633 l2, Dr_total.mL * 100.0 / Dr_total.a,
1634 l3, Dw_total.mL * 100.0 / Dw_total.a);
1635 VG_(umsg)("\n");
1637 /* LL overall results */
1639 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1640 LL_total_r = Dr_total.m1 + Ir_total.m1;
1641 LL_total_w = Dw_total.m1;
1642 VG_(umsg)(fmt, "LL refs: ",
1643 LL_total, LL_total_r, LL_total_w);
1645 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1646 LL_total_mr = Dr_total.mL + Ir_total.mL;
1647 LL_total_mw = Dw_total.mL;
1648 VG_(umsg)(fmt, "LL misses: ",
1649 LL_total_m, LL_total_mr, LL_total_mw);
1651 VG_(umsg)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1652 l1, LL_total_m * 100.0 / (Ir_total.a + D_total.a),
1653 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1654 l3, LL_total_mw * 100.0 / Dw_total.a);
1657 /* If branch profiling is enabled, show branch overall results. */
1658 if (clo_branch_sim) {
1659 /* Make format string, getting width right for numbers */
1660 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1661 l1, l2, l3);
1663 if (0 == Bc_total.b) Bc_total.b = 1;
1664 if (0 == Bi_total.b) Bi_total.b = 1;
1665 B_total.b = Bc_total.b + Bi_total.b;
1666 B_total.mp = Bc_total.mp + Bi_total.mp;
1668 VG_(umsg)("\n");
1669 VG_(umsg)(fmt, "Branches: ",
1670 B_total.b, Bc_total.b, Bi_total.b);
1672 VG_(umsg)(fmt, "Mispredicts: ",
1673 B_total.mp, Bc_total.mp, Bi_total.mp);
1675 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1676 l1, B_total.mp * 100.0 / B_total.b,
1677 l2, Bc_total.mp * 100.0 / Bc_total.b,
1678 l3, Bi_total.mp * 100.0 / Bi_total.b);
1681 // Various stats
1682 if (VG_(clo_stats)) {
1683 Int debug_lookups = full_debugs + fn_debugs +
1684 file_line_debugs + no_debugs;
1686 VG_(dmsg)("\n");
1687 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1688 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1689 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1690 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1691 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1692 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
1694 VG_(dmsg)("cachegrind: with full info:%6.1f%% (%d)\n",
1695 full_debugs * 100.0 / debug_lookups, full_debugs);
1696 VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1697 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1698 VG_(dmsg)("cachegrind: with fn name info:%6.1f%% (%d)\n",
1699 fn_debugs * 100.0 / debug_lookups, fn_debugs);
1700 VG_(dmsg)("cachegrind: with zero info:%6.1f%% (%d)\n",
1701 no_debugs * 100.0 / debug_lookups, no_debugs);
1703 VG_(dmsg)("cachegrind: string table size: %u\n",
1704 VG_(OSetGen_Size)(stringTable));
1705 VG_(dmsg)("cachegrind: CC table size: %u\n",
1706 VG_(OSetGen_Size)(CC_table));
1707 VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
1708 VG_(OSetGen_Size)(instrInfoTable));
1712 /*--------------------------------------------------------------------*/
1713 /*--- Discarding BB info ---*/
1714 /*--------------------------------------------------------------------*/
1716 // Called when a translation is removed from the translation cache for
1717 // any reason at all: to free up space, because the guest code was
1718 // unmapped or modified, or for any arbitrary reason.
1719 static
1720 void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
1722 SB_info* sbInfo;
1723 Addr orig_addr = vge.base[0];
1725 tl_assert(vge.n_used > 0);
1727 if (DEBUG_CG)
1728 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1729 (void*)orig_addr,
1730 (void*)vge.base[0], (ULong)vge.len[0]);
1732 // Get BB info, remove from table, free BB info. Simple! Note that we
1733 // use orig_addr, not the first instruction address in vge.
1734 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1735 tl_assert(NULL != sbInfo);
1736 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1739 /*--------------------------------------------------------------------*/
1740 /*--- Command line processing ---*/
1741 /*--------------------------------------------------------------------*/
1743 static Bool cg_process_cmd_line_option(const HChar* arg)
1745 if (VG_(str_clo_cache_opt)(arg,
1746 &clo_I1_cache,
1747 &clo_D1_cache,
1748 &clo_LL_cache)) {}
1750 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1751 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1752 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1753 else
1754 return False;
1756 return True;
1759 static void cg_print_usage(void)
1761 VG_(print_cache_clo_opts)();
1762 VG_(printf)(
1763 " --cache-sim=yes|no [yes] collect cache stats?\n"
1764 " --branch-sim=yes|no [no] collect branch prediction stats?\n"
1765 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1769 static void cg_print_debug_usage(void)
1771 VG_(printf)(
1772 " (none)\n"
1776 /*--------------------------------------------------------------------*/
1777 /*--- Setup ---*/
1778 /*--------------------------------------------------------------------*/
1780 static void cg_post_clo_init(void); /* just below */
1782 static void cg_pre_clo_init(void)
1784 VG_(details_name) ("Cachegrind");
1785 VG_(details_version) (NULL);
1786 VG_(details_description) ("a cache and branch-prediction profiler");
1787 VG_(details_copyright_author)(
1788 "Copyright (C) 2002-2017, and GNU GPL'd, by Nicholas Nethercote et al.");
1789 VG_(details_bug_reports_to) (VG_BUGS_TO);
1790 VG_(details_avg_translation_sizeB) ( 500 );
1792 VG_(clo_vex_control).iropt_register_updates_default
1793 = VG_(clo_px_file_backed)
1794 = VexRegUpdSpAtMemAccess; // overridable by the user.
1796 VG_(basic_tool_funcs) (cg_post_clo_init,
1797 cg_instrument,
1798 cg_fini);
1800 VG_(needs_superblock_discards)(cg_discard_superblock_info);
1801 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1802 cg_print_usage,
1803 cg_print_debug_usage);
1806 static void cg_post_clo_init(void)
1808 cache_t I1c, D1c, LLc;
1810 CC_table =
1811 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1812 cmp_CodeLoc_LineCC,
1813 VG_(malloc), "cg.main.cpci.1",
1814 VG_(free));
1815 instrInfoTable =
1816 VG_(OSetGen_Create)(/*keyOff*/0,
1817 NULL,
1818 VG_(malloc), "cg.main.cpci.2",
1819 VG_(free));
1820 stringTable =
1821 VG_(OSetGen_Create)(/*keyOff*/0,
1822 stringCmp,
1823 VG_(malloc), "cg.main.cpci.3",
1824 VG_(free));
1826 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1827 &clo_I1_cache,
1828 &clo_D1_cache,
1829 &clo_LL_cache);
1831 // min_line_size is used to make sure that we never feed
1832 // accesses to the simulator straddling more than two
1833 // cache lines at any cache level
1834 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1835 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1837 Int largest_load_or_store_size
1838 = VG_(machine_get_size_of_largest_guest_register)();
1839 if (min_line_size < largest_load_or_store_size) {
1840 /* We can't continue, because the cache simulation might
1841 straddle more than 2 lines, and it will assert. So let's
1842 just stop before we start. */
1843 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1844 (Int)min_line_size);
1845 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1846 largest_load_or_store_size );
1847 VG_(umsg)(" but it is not. Exiting now.\n");
1848 VG_(exit)(1);
1851 cachesim_initcaches(I1c, D1c, LLc);
1854 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1856 /*--------------------------------------------------------------------*/
1857 /*--- end ---*/
1858 /*--------------------------------------------------------------------*/