drd: Add a consistency check
[valgrind.git] / cachegrind / cg_main.c
blob20fafbc5f0bcb1c95bf13c2885d83b2dcc2618eb
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
4 /*--- cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Cachegrind, a Valgrind tool for cache
9 profiling programs.
11 Copyright (C) 2002-2013 Nicholas Nethercote
12 njn@valgrind.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_debuginfo.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcfile.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_libcproc.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_xarray.h"
44 #include "pub_tool_clientstate.h"
45 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
47 #include "cg_arch.h"
48 #include "cg_sim.c"
49 #include "cg_branchpred.c"
51 /*------------------------------------------------------------*/
52 /*--- Constants ---*/
53 /*------------------------------------------------------------*/
55 /* Set to 1 for very verbose debugging */
56 #define DEBUG_CG 0
58 /*------------------------------------------------------------*/
59 /*--- Options ---*/
60 /*------------------------------------------------------------*/
62 static Bool clo_cache_sim = True; /* do cache simulation? */
63 static Bool clo_branch_sim = False; /* do branch simulation? */
64 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
66 /*------------------------------------------------------------*/
67 /*--- Cachesim configuration ---*/
68 /*------------------------------------------------------------*/
70 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
72 /*------------------------------------------------------------*/
73 /*--- Types and Data Structures ---*/
74 /*------------------------------------------------------------*/
76 typedef
77 struct {
78 ULong a; /* total # memory accesses of this kind */
79 ULong m1; /* misses in the first level cache */
80 ULong mL; /* misses in the second level cache */
82 CacheCC;
84 typedef
85 struct {
86 ULong b; /* total # branches of this kind */
87 ULong mp; /* number of branches mispredicted */
89 BranchCC;
91 //------------------------------------------------------------
92 // Primary data structure #1: CC table
93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94 // - an ordered set of CCs. CC indexing done by file/function/line (as
95 // determined from the instrAddr).
96 // - Traversed for dumping stats at end in file/func/line hierarchy.
98 typedef struct {
99 HChar* file;
100 const HChar* fn;
101 Int line;
103 CodeLoc;
105 typedef struct {
106 CodeLoc loc; /* Source location that these counts pertain to */
107 CacheCC Ir; /* Insn read counts */
108 CacheCC Dr; /* Data read counts */
109 CacheCC Dw; /* Data write/modify counts */
110 BranchCC Bc; /* Conditional branch counts */
111 BranchCC Bi; /* Indirect branch counts */
112 } LineCC;
114 // First compare file, then fn, then line.
115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
117 Word res;
118 const CodeLoc* a = (const CodeLoc*)vloc;
119 const CodeLoc* b = &(((const LineCC*)vcc)->loc);
121 res = VG_(strcmp)(a->file, b->file);
122 if (0 != res)
123 return res;
125 res = VG_(strcmp)(a->fn, b->fn);
126 if (0 != res)
127 return res;
129 return a->line - b->line;
132 static OSet* CC_table;
134 //------------------------------------------------------------
135 // Primary data structure #2: InstrInfo table
136 // - Holds the cached info about each instr that is used for simulation.
137 // - table(SB_start_addr, list(InstrInfo))
138 // - For each SB, each InstrInfo in the list holds info about the
139 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
140 // CC. This node is what's passed to the simulation function.
141 // - When SBs are discarded the relevant list(instr_details) is freed.
143 typedef struct _InstrInfo InstrInfo;
144 struct _InstrInfo {
145 Addr instr_addr;
146 UChar instr_len;
147 LineCC* parent; // parent line-CC
150 typedef struct _SB_info SB_info;
151 struct _SB_info {
152 Addr SB_addr; // key; MUST BE FIRST
153 Int n_instrs;
154 InstrInfo instrs[0];
157 static OSet* instrInfoTable;
159 //------------------------------------------------------------
160 // Secondary data structure: string table
161 // - holds strings, avoiding dups
162 // - used for filenames and function names, each of which will be
163 // pointed to by one or more CCs.
164 // - it also allows equality checks just by pointer comparison, which
165 // is good when printing the output file at the end.
167 static OSet* stringTable;
169 //------------------------------------------------------------
170 // Stats
171 static Int distinct_files = 0;
172 static Int distinct_fns = 0;
173 static Int distinct_lines = 0;
174 static Int distinct_instrsGen = 0;
175 static Int distinct_instrsNoX = 0;
177 static Int full_debugs = 0;
178 static Int file_line_debugs = 0;
179 static Int fn_debugs = 0;
180 static Int no_debugs = 0;
182 /*------------------------------------------------------------*/
183 /*--- String table operations ---*/
184 /*------------------------------------------------------------*/
186 static Word stringCmp( const void* key, const void* elem )
188 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
191 // Get a permanent string; either pull it out of the string table if it's
192 // been encountered before, or dup it and put it into the string table.
193 static HChar* get_perm_string(const HChar* s)
195 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
196 if (s_ptr) {
197 return *s_ptr;
198 } else {
199 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
200 *s_node = VG_(strdup)("cg.main.gps.1", s);
201 VG_(OSetGen_Insert)(stringTable, s_node);
202 return *s_node;
206 /*------------------------------------------------------------*/
207 /*--- CC table operations ---*/
208 /*------------------------------------------------------------*/
210 static void get_debug_info(Addr instr_addr, const HChar **dir,
211 const HChar **file, const HChar **fn, UInt* line)
213 Bool found_dirname;
214 Bool found_file_line = VG_(get_filename_linenum)(
215 instr_addr,
216 file, dir, &found_dirname,
217 line
219 Bool found_fn = VG_(get_fnname)(instr_addr, fn);
221 if (!found_file_line) {
222 *file = "???";
223 *line = 0;
225 if (!found_fn) {
226 *fn = "???";
229 if (found_file_line) {
230 if (found_fn) full_debugs++;
231 else file_line_debugs++;
232 } else {
233 if (found_fn) fn_debugs++;
234 else no_debugs++;
238 // Do a three step traversal: by file, then fn, then line.
239 // Returns a pointer to the line CC, creates a new one if necessary.
240 static LineCC* get_lineCC(Addr origAddr)
242 const HChar *fn, *file, *dir;
243 UInt line;
244 CodeLoc loc;
245 LineCC* lineCC;
247 get_debug_info(origAddr, &dir, &file, &fn, &line);
249 // Form an absolute pathname if a directory is available
250 HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
252 if (dir[0]) {
253 VG_(sprintf)(absfile, "%s/%s", dir, file);
254 } else {
255 VG_(sprintf)(absfile, "%s", file);
258 loc.file = absfile;
259 loc.fn = fn;
260 loc.line = line;
262 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
263 if (!lineCC) {
264 // Allocate and zero a new node.
265 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
266 lineCC->loc.file = get_perm_string(loc.file);
267 lineCC->loc.fn = get_perm_string(loc.fn);
268 lineCC->loc.line = loc.line;
269 lineCC->Ir.a = 0;
270 lineCC->Ir.m1 = 0;
271 lineCC->Ir.mL = 0;
272 lineCC->Dr.a = 0;
273 lineCC->Dr.m1 = 0;
274 lineCC->Dr.mL = 0;
275 lineCC->Dw.a = 0;
276 lineCC->Dw.m1 = 0;
277 lineCC->Dw.mL = 0;
278 lineCC->Bc.b = 0;
279 lineCC->Bc.mp = 0;
280 lineCC->Bi.b = 0;
281 lineCC->Bi.mp = 0;
282 VG_(OSetGen_Insert)(CC_table, lineCC);
285 return lineCC;
288 /*------------------------------------------------------------*/
289 /*--- Cache simulation functions ---*/
290 /*------------------------------------------------------------*/
292 /* A common case for an instruction read event is that the
293 * bytes read belong to the same cache line in both L1I and LL
294 * (if cache line sizes of L1 and LL are the same).
295 * As this can be detected at instrumentation time, and results
296 * in faster simulation, special-casing is benefical.
298 * Abbrevations used in var/function names:
299 * IrNoX - instruction read does not cross cache lines
300 * IrGen - generic instruction read; not detected as IrNoX
301 * Ir - not known / not important whether it is an IrNoX
304 // Only used with --cache-sim=no.
305 static VG_REGPARM(1)
306 void log_1Ir(InstrInfo* n)
308 n->parent->Ir.a++;
311 // Only used with --cache-sim=no.
312 static VG_REGPARM(2)
313 void log_2Ir(InstrInfo* n, InstrInfo* n2)
315 n->parent->Ir.a++;
316 n2->parent->Ir.a++;
319 // Only used with --cache-sim=no.
320 static VG_REGPARM(3)
321 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
323 n->parent->Ir.a++;
324 n2->parent->Ir.a++;
325 n3->parent->Ir.a++;
328 // Generic case for instruction reads: may cross cache lines.
329 // All other Ir handlers expect IrNoX instruction reads.
330 static VG_REGPARM(1)
331 void log_1IrGen_0D_cache_access(InstrInfo* n)
333 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
334 // n, n->instr_addr, n->instr_len);
335 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
336 &n->parent->Ir.m1, &n->parent->Ir.mL);
337 n->parent->Ir.a++;
340 static VG_REGPARM(1)
341 void log_1IrNoX_0D_cache_access(InstrInfo* n)
343 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
344 // n, n->instr_addr, n->instr_len);
345 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
346 &n->parent->Ir.m1, &n->parent->Ir.mL);
347 n->parent->Ir.a++;
350 static VG_REGPARM(2)
351 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
353 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
354 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
355 // n, n->instr_addr, n->instr_len,
356 // n2, n2->instr_addr, n2->instr_len);
357 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
358 &n->parent->Ir.m1, &n->parent->Ir.mL);
359 n->parent->Ir.a++;
360 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
361 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
362 n2->parent->Ir.a++;
365 static VG_REGPARM(3)
366 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
368 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
369 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
370 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
371 // n, n->instr_addr, n->instr_len,
372 // n2, n2->instr_addr, n2->instr_len,
373 // n3, n3->instr_addr, n3->instr_len);
374 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
375 &n->parent->Ir.m1, &n->parent->Ir.mL);
376 n->parent->Ir.a++;
377 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
378 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
379 n2->parent->Ir.a++;
380 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
381 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
382 n3->parent->Ir.a++;
385 static VG_REGPARM(3)
386 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
388 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
389 // " daddr=0x%010lx, dsize=%lu\n",
390 // n, n->instr_addr, n->instr_len, data_addr, data_size);
391 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
392 &n->parent->Ir.m1, &n->parent->Ir.mL);
393 n->parent->Ir.a++;
395 cachesim_D1_doref(data_addr, data_size,
396 &n->parent->Dr.m1, &n->parent->Dr.mL);
397 n->parent->Dr.a++;
400 static VG_REGPARM(3)
401 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
403 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
404 // " daddr=0x%010lx, dsize=%lu\n",
405 // n, n->instr_addr, n->instr_len, data_addr, data_size);
406 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
407 &n->parent->Ir.m1, &n->parent->Ir.mL);
408 n->parent->Ir.a++;
410 cachesim_D1_doref(data_addr, data_size,
411 &n->parent->Dw.m1, &n->parent->Dw.mL);
412 n->parent->Dw.a++;
415 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
416 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
417 you change them, you must change addEvent_D_guarded too. */
418 static VG_REGPARM(3)
419 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
421 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
422 // n, data_addr, data_size);
423 cachesim_D1_doref(data_addr, data_size,
424 &n->parent->Dr.m1, &n->parent->Dr.mL);
425 n->parent->Dr.a++;
428 /* See comment on log_0Ir_1Dr_cache_access. */
429 static VG_REGPARM(3)
430 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
432 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
433 // n, data_addr, data_size);
434 cachesim_D1_doref(data_addr, data_size,
435 &n->parent->Dw.m1, &n->parent->Dw.mL);
436 n->parent->Dw.a++;
439 /* For branches, we consult two different predictors, one which
440 predicts taken/untaken for conditional branches, and the other
441 which predicts the branch target address for indirect branches
442 (jump-to-register style ones). */
444 static VG_REGPARM(2)
445 void log_cond_branch(InstrInfo* n, Word taken)
447 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
448 // n, taken);
449 n->parent->Bc.b++;
450 n->parent->Bc.mp
451 += (1 & do_cond_branch_predict(n->instr_addr, taken));
454 static VG_REGPARM(2)
455 void log_ind_branch(InstrInfo* n, UWord actual_dst)
457 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
458 // n, actual_dst);
459 n->parent->Bi.b++;
460 n->parent->Bi.mp
461 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
465 /*------------------------------------------------------------*/
466 /*--- Instrumentation types and structures ---*/
467 /*------------------------------------------------------------*/
469 /* Maintain an ordered list of memory events which are outstanding, in
470 the sense that no IR has yet been generated to do the relevant
471 helper calls. The BB is scanned top to bottom and memory events
472 are added to the end of the list, merging with the most recent
473 notified event where possible (Dw immediately following Dr and
474 having the same size and EA can be merged).
476 This merging is done so that for architectures which have
477 load-op-store instructions (x86, amd64), the insn is treated as if
478 it makes just one memory reference (a modify), rather than two (a
479 read followed by a write at the same address).
481 At various points the list will need to be flushed, that is, IR
482 generated from it. That must happen before any possible exit from
483 the block (the end, or an IRStmt_Exit). Flushing also takes place
484 when there is no space to add a new event.
486 If we require the simulation statistics to be up to date with
487 respect to possible memory exceptions, then the list would have to
488 be flushed before each memory reference. That would however lose
489 performance by inhibiting event-merging during flushing.
491 Flushing the list consists of walking it start to end and emitting
492 instrumentation IR for each event, in the order in which they
493 appear. It may be possible to emit a single call for two adjacent
494 events in order to reduce the number of helper function calls made.
495 For example, it could well be profitable to handle two adjacent Ir
496 events with a single helper call. */
498 typedef
499 IRExpr
500 IRAtom;
502 typedef
503 enum {
504 Ev_IrNoX, // Instruction read not crossing cache lines
505 Ev_IrGen, // Generic Ir, not being detected as IrNoX
506 Ev_Dr, // Data read
507 Ev_Dw, // Data write
508 Ev_Dm, // Data modify (read then write)
509 Ev_Bc, // branch conditional
510 Ev_Bi // branch indirect (to unknown destination)
512 EventTag;
514 typedef
515 struct {
516 EventTag tag;
517 InstrInfo* inode;
518 union {
519 struct {
520 } IrGen;
521 struct {
522 } IrNoX;
523 struct {
524 IRAtom* ea;
525 Int szB;
526 } Dr;
527 struct {
528 IRAtom* ea;
529 Int szB;
530 } Dw;
531 struct {
532 IRAtom* ea;
533 Int szB;
534 } Dm;
535 struct {
536 IRAtom* taken; /* :: Ity_I1 */
537 } Bc;
538 struct {
539 IRAtom* dst;
540 } Bi;
541 } Ev;
543 Event;
545 static void init_Event ( Event* ev ) {
546 VG_(memset)(ev, 0, sizeof(Event));
549 static IRAtom* get_Event_dea ( Event* ev ) {
550 switch (ev->tag) {
551 case Ev_Dr: return ev->Ev.Dr.ea;
552 case Ev_Dw: return ev->Ev.Dw.ea;
553 case Ev_Dm: return ev->Ev.Dm.ea;
554 default: tl_assert(0);
558 static Int get_Event_dszB ( Event* ev ) {
559 switch (ev->tag) {
560 case Ev_Dr: return ev->Ev.Dr.szB;
561 case Ev_Dw: return ev->Ev.Dw.szB;
562 case Ev_Dm: return ev->Ev.Dm.szB;
563 default: tl_assert(0);
568 /* Up to this many unnotified events are allowed. Number is
569 arbitrary. Larger numbers allow more event merging to occur, but
570 potentially induce more spilling due to extending live ranges of
571 address temporaries. */
572 #define N_EVENTS 16
575 /* A struct which holds all the running state during instrumentation.
576 Mostly to avoid passing loads of parameters everywhere. */
577 typedef
578 struct {
579 /* The current outstanding-memory-event list. */
580 Event events[N_EVENTS];
581 Int events_used;
583 /* The array of InstrInfo bins for the BB. */
584 SB_info* sbInfo;
586 /* Number InstrInfo bins 'used' so far. */
587 Int sbInfo_i;
589 /* The output SB being constructed. */
590 IRSB* sbOut;
592 CgState;
595 /*------------------------------------------------------------*/
596 /*--- Instrumentation main ---*/
597 /*------------------------------------------------------------*/
599 // Note that origAddr is the real origAddr, not the address of the first
600 // instruction in the block (they can be different due to redirection).
601 static
602 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
604 Int i, n_instrs;
605 IRStmt* st;
606 SB_info* sbInfo;
608 // Count number of original instrs in SB
609 n_instrs = 0;
610 for (i = 0; i < sbIn->stmts_used; i++) {
611 st = sbIn->stmts[i];
612 if (Ist_IMark == st->tag) n_instrs++;
615 // Check that we don't have an entry for this BB in the instr-info table.
616 // If this assertion fails, there has been some screwup: some
617 // translations must have been discarded but Cachegrind hasn't discarded
618 // the corresponding entries in the instr-info table.
619 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
620 tl_assert(NULL == sbInfo);
622 // BB never translated before (at this address, at least; could have
623 // been unloaded and then reloaded elsewhere in memory)
624 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
625 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
626 sbInfo->SB_addr = origAddr;
627 sbInfo->n_instrs = n_instrs;
628 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
630 return sbInfo;
634 static void showEvent ( Event* ev )
636 switch (ev->tag) {
637 case Ev_IrGen:
638 VG_(printf)("IrGen %p\n", ev->inode);
639 break;
640 case Ev_IrNoX:
641 VG_(printf)("IrNoX %p\n", ev->inode);
642 break;
643 case Ev_Dr:
644 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
645 ppIRExpr(ev->Ev.Dr.ea);
646 VG_(printf)("\n");
647 break;
648 case Ev_Dw:
649 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
650 ppIRExpr(ev->Ev.Dw.ea);
651 VG_(printf)("\n");
652 break;
653 case Ev_Dm:
654 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
655 ppIRExpr(ev->Ev.Dm.ea);
656 VG_(printf)("\n");
657 break;
658 case Ev_Bc:
659 VG_(printf)("Bc %p GA=", ev->inode);
660 ppIRExpr(ev->Ev.Bc.taken);
661 VG_(printf)("\n");
662 break;
663 case Ev_Bi:
664 VG_(printf)("Bi %p DST=", ev->inode);
665 ppIRExpr(ev->Ev.Bi.dst);
666 VG_(printf)("\n");
667 break;
668 default:
669 tl_assert(0);
670 break;
674 // Reserve and initialise an InstrInfo for the first mention of a new insn.
675 static
676 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
678 InstrInfo* i_node;
679 tl_assert(cgs->sbInfo_i >= 0);
680 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
681 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
682 i_node->instr_addr = instr_addr;
683 i_node->instr_len = instr_len;
684 i_node->parent = get_lineCC(instr_addr);
685 cgs->sbInfo_i++;
686 return i_node;
690 /* Generate code for all outstanding memory events, and mark the queue
691 empty. Code is generated into cgs->bbOut, and this activity
692 'consumes' slots in cgs->sbInfo. */
694 static void flushEvents ( CgState* cgs )
696 Int i, regparms;
697 const HChar* helperName;
698 void* helperAddr;
699 IRExpr** argv;
700 IRExpr* i_node_expr;
701 IRDirty* di;
702 Event* ev;
703 Event* ev2;
704 Event* ev3;
706 i = 0;
707 while (i < cgs->events_used) {
709 helperName = NULL;
710 helperAddr = NULL;
711 argv = NULL;
712 regparms = 0;
714 /* generate IR to notify event i and possibly the ones
715 immediately following it. */
716 tl_assert(i >= 0 && i < cgs->events_used);
718 ev = &cgs->events[i];
719 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
720 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
722 if (DEBUG_CG) {
723 VG_(printf)(" flush ");
724 showEvent( ev );
727 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
729 /* Decide on helper fn to call and args to pass it, and advance
730 i appropriately. */
731 switch (ev->tag) {
732 case Ev_IrNoX:
733 /* Merge an IrNoX with a following Dr/Dm. */
734 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
735 /* Why is this true? It's because we're merging an Ir
736 with a following Dr or Dm. The Ir derives from the
737 instruction's IMark and the Dr/Dm from data
738 references which follow it. In short it holds
739 because each insn starts with an IMark, hence an
740 Ev_Ir, and so these Dr/Dm must pertain to the
741 immediately preceding Ir. Same applies to analogous
742 assertions in the subsequent cases. */
743 tl_assert(ev2->inode == ev->inode);
744 helperName = "log_1IrNoX_1Dr_cache_access";
745 helperAddr = &log_1IrNoX_1Dr_cache_access;
746 argv = mkIRExprVec_3( i_node_expr,
747 get_Event_dea(ev2),
748 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
749 regparms = 3;
750 i += 2;
752 /* Merge an IrNoX with a following Dw. */
753 else
754 if (ev2 && ev2->tag == Ev_Dw) {
755 tl_assert(ev2->inode == ev->inode);
756 helperName = "log_1IrNoX_1Dw_cache_access";
757 helperAddr = &log_1IrNoX_1Dw_cache_access;
758 argv = mkIRExprVec_3( i_node_expr,
759 get_Event_dea(ev2),
760 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
761 regparms = 3;
762 i += 2;
764 /* Merge an IrNoX with two following IrNoX's. */
765 else
766 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
768 if (clo_cache_sim) {
769 helperName = "log_3IrNoX_0D_cache_access";
770 helperAddr = &log_3IrNoX_0D_cache_access;
771 } else {
772 helperName = "log_3Ir";
773 helperAddr = &log_3Ir;
775 argv = mkIRExprVec_3( i_node_expr,
776 mkIRExpr_HWord( (HWord)ev2->inode ),
777 mkIRExpr_HWord( (HWord)ev3->inode ) );
778 regparms = 3;
779 i += 3;
781 /* Merge an IrNoX with one following IrNoX. */
782 else
783 if (ev2 && ev2->tag == Ev_IrNoX) {
784 if (clo_cache_sim) {
785 helperName = "log_2IrNoX_0D_cache_access";
786 helperAddr = &log_2IrNoX_0D_cache_access;
787 } else {
788 helperName = "log_2Ir";
789 helperAddr = &log_2Ir;
791 argv = mkIRExprVec_2( i_node_expr,
792 mkIRExpr_HWord( (HWord)ev2->inode ) );
793 regparms = 2;
794 i += 2;
796 /* No merging possible; emit as-is. */
797 else {
798 if (clo_cache_sim) {
799 helperName = "log_1IrNoX_0D_cache_access";
800 helperAddr = &log_1IrNoX_0D_cache_access;
801 } else {
802 helperName = "log_1Ir";
803 helperAddr = &log_1Ir;
805 argv = mkIRExprVec_1( i_node_expr );
806 regparms = 1;
807 i++;
809 break;
810 case Ev_IrGen:
811 if (clo_cache_sim) {
812 helperName = "log_1IrGen_0D_cache_access";
813 helperAddr = &log_1IrGen_0D_cache_access;
814 } else {
815 helperName = "log_1Ir";
816 helperAddr = &log_1Ir;
818 argv = mkIRExprVec_1( i_node_expr );
819 regparms = 1;
820 i++;
821 break;
822 case Ev_Dr:
823 case Ev_Dm:
824 /* Data read or modify */
825 helperName = "log_0Ir_1Dr_cache_access";
826 helperAddr = &log_0Ir_1Dr_cache_access;
827 argv = mkIRExprVec_3( i_node_expr,
828 get_Event_dea(ev),
829 mkIRExpr_HWord( get_Event_dszB(ev) ) );
830 regparms = 3;
831 i++;
832 break;
833 case Ev_Dw:
834 /* Data write */
835 helperName = "log_0Ir_1Dw_cache_access";
836 helperAddr = &log_0Ir_1Dw_cache_access;
837 argv = mkIRExprVec_3( i_node_expr,
838 get_Event_dea(ev),
839 mkIRExpr_HWord( get_Event_dszB(ev) ) );
840 regparms = 3;
841 i++;
842 break;
843 case Ev_Bc:
844 /* Conditional branch */
845 helperName = "log_cond_branch";
846 helperAddr = &log_cond_branch;
847 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
848 regparms = 2;
849 i++;
850 break;
851 case Ev_Bi:
852 /* Branch to an unknown destination */
853 helperName = "log_ind_branch";
854 helperAddr = &log_ind_branch;
855 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
856 regparms = 2;
857 i++;
858 break;
859 default:
860 tl_assert(0);
863 /* Add the helper. */
864 tl_assert(helperName);
865 tl_assert(helperAddr);
866 tl_assert(argv);
867 di = unsafeIRDirty_0_N( regparms,
868 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
869 argv );
870 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
873 cgs->events_used = 0;
876 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
878 Event* evt;
879 if (cgs->events_used == N_EVENTS)
880 flushEvents(cgs);
881 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
882 evt = &cgs->events[cgs->events_used];
883 init_Event(evt);
884 evt->inode = inode;
885 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
886 evt->tag = Ev_IrNoX;
887 distinct_instrsNoX++;
888 } else {
889 evt->tag = Ev_IrGen;
890 distinct_instrsGen++;
892 cgs->events_used++;
895 static
896 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
898 Event* evt;
899 tl_assert(isIRAtom(ea));
900 tl_assert(datasize >= 1 && datasize <= min_line_size);
901 if (!clo_cache_sim)
902 return;
903 if (cgs->events_used == N_EVENTS)
904 flushEvents(cgs);
905 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
906 evt = &cgs->events[cgs->events_used];
907 init_Event(evt);
908 evt->tag = Ev_Dr;
909 evt->inode = inode;
910 evt->Ev.Dr.szB = datasize;
911 evt->Ev.Dr.ea = ea;
912 cgs->events_used++;
915 static
916 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
918 Event* lastEvt;
919 Event* evt;
921 tl_assert(isIRAtom(ea));
922 tl_assert(datasize >= 1 && datasize <= min_line_size);
924 if (!clo_cache_sim)
925 return;
927 /* Is it possible to merge this write with the preceding read? */
928 lastEvt = &cgs->events[cgs->events_used-1];
929 if (cgs->events_used > 0
930 && lastEvt->tag == Ev_Dr
931 && lastEvt->Ev.Dr.szB == datasize
932 && lastEvt->inode == inode
933 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
935 lastEvt->tag = Ev_Dm;
936 return;
939 /* No. Add as normal. */
940 if (cgs->events_used == N_EVENTS)
941 flushEvents(cgs);
942 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
943 evt = &cgs->events[cgs->events_used];
944 init_Event(evt);
945 evt->tag = Ev_Dw;
946 evt->inode = inode;
947 evt->Ev.Dw.szB = datasize;
948 evt->Ev.Dw.ea = ea;
949 cgs->events_used++;
952 static
953 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
954 Int datasize, IRAtom* ea, IRAtom* guard,
955 Bool isWrite )
957 tl_assert(isIRAtom(ea));
958 tl_assert(guard);
959 tl_assert(isIRAtom(guard));
960 tl_assert(datasize >= 1 && datasize <= min_line_size);
962 if (!clo_cache_sim)
963 return;
965 /* Adding guarded memory actions and merging them with the existing
966 queue is too complex. Simply flush the queue and add this
967 action immediately. Since guarded loads and stores are pretty
968 rare, this is not thought likely to cause any noticeable
969 performance loss as a result of the loss of event-merging
970 opportunities. */
971 tl_assert(cgs->events_used >= 0);
972 flushEvents(cgs);
973 tl_assert(cgs->events_used == 0);
974 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
975 IRExpr* i_node_expr;
976 const HChar* helperName;
977 void* helperAddr;
978 IRExpr** argv;
979 Int regparms;
980 IRDirty* di;
981 i_node_expr = mkIRExpr_HWord( (HWord)inode );
982 helperName = isWrite ? "log_0Ir_1Dw_cache_access"
983 : "log_0Ir_1Dr_cache_access";
984 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
985 : &log_0Ir_1Dr_cache_access;
986 argv = mkIRExprVec_3( i_node_expr,
987 ea, mkIRExpr_HWord( datasize ) );
988 regparms = 3;
989 di = unsafeIRDirty_0_N(
990 regparms,
991 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
992 argv );
993 di->guard = guard;
994 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
998 static
999 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
1001 Event* evt;
1002 tl_assert(isIRAtom(guard));
1003 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1004 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1005 if (!clo_branch_sim)
1006 return;
1007 if (cgs->events_used == N_EVENTS)
1008 flushEvents(cgs);
1009 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1010 evt = &cgs->events[cgs->events_used];
1011 init_Event(evt);
1012 evt->tag = Ev_Bc;
1013 evt->inode = inode;
1014 evt->Ev.Bc.taken = guard;
1015 cgs->events_used++;
1018 static
1019 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1021 Event* evt;
1022 tl_assert(isIRAtom(whereTo));
1023 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1024 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1025 if (!clo_branch_sim)
1026 return;
1027 if (cgs->events_used == N_EVENTS)
1028 flushEvents(cgs);
1029 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1030 evt = &cgs->events[cgs->events_used];
1031 init_Event(evt);
1032 evt->tag = Ev_Bi;
1033 evt->inode = inode;
1034 evt->Ev.Bi.dst = whereTo;
1035 cgs->events_used++;
1038 ////////////////////////////////////////////////////////////
1041 static
1042 IRSB* cg_instrument ( VgCallbackClosure* closure,
1043 IRSB* sbIn,
1044 const VexGuestLayout* layout,
1045 const VexGuestExtents* vge,
1046 const VexArchInfo* archinfo_host,
1047 IRType gWordTy, IRType hWordTy )
1049 Int i, isize;
1050 IRStmt* st;
1051 Addr64 cia; /* address of current insn */
1052 CgState cgs;
1053 IRTypeEnv* tyenv = sbIn->tyenv;
1054 InstrInfo* curr_inode = NULL;
1056 if (gWordTy != hWordTy) {
1057 /* We don't currently support this case. */
1058 VG_(tool_panic)("host/guest word size mismatch");
1061 // Set up new SB
1062 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1064 // Copy verbatim any IR preamble preceding the first IMark
1065 i = 0;
1066 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1067 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1068 i++;
1071 // Get the first statement, and initial cia from it
1072 tl_assert(sbIn->stmts_used > 0);
1073 tl_assert(i < sbIn->stmts_used);
1074 st = sbIn->stmts[i];
1075 tl_assert(Ist_IMark == st->tag);
1077 cia = st->Ist.IMark.addr;
1078 isize = st->Ist.IMark.len;
1079 // If Vex fails to decode an instruction, the size will be zero.
1080 // Pretend otherwise.
1081 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1083 // Set up running state and get block info
1084 tl_assert(closure->readdr == vge->base[0]);
1085 cgs.events_used = 0;
1086 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1087 cgs.sbInfo_i = 0;
1089 if (DEBUG_CG)
1090 VG_(printf)("\n\n---------- cg_instrument ----------\n");
1092 // Traverse the block, initialising inodes, adding events and flushing as
1093 // necessary.
1094 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1096 st = sbIn->stmts[i];
1097 tl_assert(isFlatIRStmt(st));
1099 switch (st->tag) {
1100 case Ist_NoOp:
1101 case Ist_AbiHint:
1102 case Ist_Put:
1103 case Ist_PutI:
1104 case Ist_MBE:
1105 break;
1107 case Ist_IMark:
1108 cia = st->Ist.IMark.addr;
1109 isize = st->Ist.IMark.len;
1111 // If Vex fails to decode an instruction, the size will be zero.
1112 // Pretend otherwise.
1113 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1115 // Sanity-check size.
1116 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1117 || VG_CLREQ_SZB == isize );
1119 // Get space for and init the inode, record it as the current one.
1120 // Subsequent Dr/Dw/Dm events from the same instruction will
1121 // also use it.
1122 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1124 addEvent_Ir( &cgs, curr_inode );
1125 break;
1127 case Ist_WrTmp: {
1128 IRExpr* data = st->Ist.WrTmp.data;
1129 if (data->tag == Iex_Load) {
1130 IRExpr* aexpr = data->Iex.Load.addr;
1131 // Note also, endianness info is ignored. I guess
1132 // that's not interesting.
1133 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1134 aexpr );
1136 break;
1139 case Ist_Store: {
1140 IRExpr* data = st->Ist.Store.data;
1141 IRExpr* aexpr = st->Ist.Store.addr;
1142 addEvent_Dw( &cgs, curr_inode,
1143 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1144 break;
1147 case Ist_StoreG: {
1148 IRStoreG* sg = st->Ist.StoreG.details;
1149 IRExpr* data = sg->data;
1150 IRExpr* addr = sg->addr;
1151 IRType type = typeOfIRExpr(tyenv, data);
1152 tl_assert(type != Ity_INVALID);
1153 addEvent_D_guarded( &cgs, curr_inode,
1154 sizeofIRType(type), addr, sg->guard,
1155 True/*isWrite*/ );
1156 break;
1159 case Ist_LoadG: {
1160 IRLoadG* lg = st->Ist.LoadG.details;
1161 IRType type = Ity_INVALID; /* loaded type */
1162 IRType typeWide = Ity_INVALID; /* after implicit widening */
1163 IRExpr* addr = lg->addr;
1164 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1165 tl_assert(type != Ity_INVALID);
1166 addEvent_D_guarded( &cgs, curr_inode,
1167 sizeofIRType(type), addr, lg->guard,
1168 False/*!isWrite*/ );
1169 break;
1172 case Ist_Dirty: {
1173 Int dataSize;
1174 IRDirty* d = st->Ist.Dirty.details;
1175 if (d->mFx != Ifx_None) {
1176 /* This dirty helper accesses memory. Collect the details. */
1177 tl_assert(d->mAddr != NULL);
1178 tl_assert(d->mSize != 0);
1179 dataSize = d->mSize;
1180 // Large (eg. 28B, 108B, 512B on x86) data-sized
1181 // instructions will be done inaccurately, but they're
1182 // very rare and this avoids errors from hitting more
1183 // than two cache lines in the simulation.
1184 if (dataSize > min_line_size)
1185 dataSize = min_line_size;
1186 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1187 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1188 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1189 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1190 } else {
1191 tl_assert(d->mAddr == NULL);
1192 tl_assert(d->mSize == 0);
1194 break;
1197 case Ist_CAS: {
1198 /* We treat it as a read and a write of the location. I
1199 think that is the same behaviour as it was before IRCAS
1200 was introduced, since prior to that point, the Vex
1201 front ends would translate a lock-prefixed instruction
1202 into a (normal) read followed by a (normal) write. */
1203 Int dataSize;
1204 IRCAS* cas = st->Ist.CAS.details;
1205 tl_assert(cas->addr != NULL);
1206 tl_assert(cas->dataLo != NULL);
1207 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1208 if (cas->dataHi != NULL)
1209 dataSize *= 2; /* since it's a doubleword-CAS */
1210 /* I don't think this can ever happen, but play safe. */
1211 if (dataSize > min_line_size)
1212 dataSize = min_line_size;
1213 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1214 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1215 break;
1218 case Ist_LLSC: {
1219 IRType dataTy;
1220 if (st->Ist.LLSC.storedata == NULL) {
1221 /* LL */
1222 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1223 addEvent_Dr( &cgs, curr_inode,
1224 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1225 /* flush events before LL, should help SC to succeed */
1226 flushEvents( &cgs );
1227 } else {
1228 /* SC */
1229 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1230 addEvent_Dw( &cgs, curr_inode,
1231 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1233 break;
1236 case Ist_Exit: {
1237 // call branch predictor only if this is a branch in guest code
1238 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1239 (st->Ist.Exit.jk == Ijk_Call) ||
1240 (st->Ist.Exit.jk == Ijk_Ret) )
1242 /* Stuff to widen the guard expression to a host word, so
1243 we can pass it to the branch predictor simulation
1244 functions easily. */
1245 Bool inverted;
1246 Addr64 nia, sea;
1247 IRConst* dst;
1248 IRType tyW = hWordTy;
1249 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1250 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1251 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1252 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1253 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1254 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1255 : IRExpr_Const(IRConst_U64(1));
1257 /* First we need to figure out whether the side exit got
1258 inverted by the ir optimiser. To do that, figure out
1259 the next (fallthrough) instruction's address and the
1260 side exit address and see if they are the same. */
1261 nia = cia + (Addr64)isize;
1262 if (tyW == Ity_I32)
1263 nia &= 0xFFFFFFFFULL;
1265 /* Side exit address */
1266 dst = st->Ist.Exit.dst;
1267 if (tyW == Ity_I32) {
1268 tl_assert(dst->tag == Ico_U32);
1269 sea = (Addr64)(UInt)dst->Ico.U32;
1270 } else {
1271 tl_assert(tyW == Ity_I64);
1272 tl_assert(dst->tag == Ico_U64);
1273 sea = dst->Ico.U64;
1276 inverted = nia == sea;
1278 /* Widen the guard expression. */
1279 addStmtToIRSB( cgs.sbOut,
1280 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1281 addStmtToIRSB( cgs.sbOut,
1282 IRStmt_WrTmp( guardW,
1283 IRExpr_Unop(widen,
1284 IRExpr_RdTmp(guard1))) );
1285 /* If the exit is inverted, invert the sense of the guard. */
1286 addStmtToIRSB(
1287 cgs.sbOut,
1288 IRStmt_WrTmp(
1289 guard,
1290 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1291 : IRExpr_RdTmp(guardW)
1293 /* And post the event. */
1294 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1297 /* We may never reach the next statement, so need to flush
1298 all outstanding transactions now. */
1299 flushEvents( &cgs );
1300 break;
1303 default:
1304 ppIRStmt(st);
1305 tl_assert(0);
1306 break;
1309 /* Copy the original statement */
1310 addStmtToIRSB( cgs.sbOut, st );
1312 if (DEBUG_CG) {
1313 ppIRStmt(st);
1314 VG_(printf)("\n");
1318 /* Deal with branches to unknown destinations. Except ignore ones
1319 which are function returns as we assume the return stack
1320 predictor never mispredicts. */
1321 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1322 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1323 switch (sbIn->next->tag) {
1324 case Iex_Const:
1325 break; /* boring - branch to known address */
1326 case Iex_RdTmp:
1327 /* looks like an indirect branch (branch to unknown) */
1328 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1329 break;
1330 default:
1331 /* shouldn't happen - if the incoming IR is properly
1332 flattened, should only have tmp and const cases to
1333 consider. */
1334 tl_assert(0);
1338 /* At the end of the bb. Flush outstandings. */
1339 flushEvents( &cgs );
1341 /* done. stay sane ... */
1342 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1344 if (DEBUG_CG) {
1345 VG_(printf)( "goto {");
1346 ppIRJumpKind(sbIn->jumpkind);
1347 VG_(printf)( "} ");
1348 ppIRExpr( sbIn->next );
1349 VG_(printf)( "}\n");
1352 return cgs.sbOut;
1355 /*------------------------------------------------------------*/
1356 /*--- Cache configuration ---*/
1357 /*------------------------------------------------------------*/
1359 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1360 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1361 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1363 /*------------------------------------------------------------*/
1364 /*--- cg_fini() and related function ---*/
1365 /*------------------------------------------------------------*/
1367 // Total reads/writes/misses. Calculated during CC traversal at the end.
1368 // All auto-zeroed.
1369 static CacheCC Ir_total;
1370 static CacheCC Dr_total;
1371 static CacheCC Dw_total;
1372 static BranchCC Bc_total;
1373 static BranchCC Bi_total;
1375 static void fprint_CC_table_and_calc_totals(void)
1377 Int i;
1378 VgFile *fp;
1379 HChar *currFile = NULL;
1380 const HChar *currFn = NULL;
1381 LineCC* lineCC;
1383 // Setup output filename. Nb: it's important to do this now, ie. as late
1384 // as possible. If we do it at start-up and the program forks and the
1385 // output file format string contains a %p (pid) specifier, both the
1386 // parent and child will incorrectly write to the same file; this
1387 // happened in 3.3.0.
1388 HChar* cachegrind_out_file =
1389 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1391 fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1392 VKI_S_IRUSR|VKI_S_IWUSR);
1393 if (fp == NULL) {
1394 // If the file can't be opened for whatever reason (conflict
1395 // between multiple cachegrinded processes?), give up now.
1396 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1397 cachegrind_out_file );
1398 VG_(umsg)(" ... so simulation results will be missing.\n");
1399 VG_(free)(cachegrind_out_file);
1400 return;
1401 } else {
1402 VG_(free)(cachegrind_out_file);
1405 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1406 // the 2nd colon makes cg_annotate's output look nicer.
1407 VG_(fprintf)(fp, "desc: I1 cache: %s\n"
1408 "desc: D1 cache: %s\n"
1409 "desc: LL cache: %s\n",
1410 I1.desc_line, D1.desc_line, LL.desc_line);
1412 // "cmd:" line
1413 VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1414 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1415 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1416 VG_(fprintf)(fp, " %s", arg);
1418 // "events:" line
1419 if (clo_cache_sim && clo_branch_sim) {
1420 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1421 "Bc Bcm Bi Bim\n");
1423 else if (clo_cache_sim && !clo_branch_sim) {
1424 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1425 "\n");
1427 else if (!clo_cache_sim && clo_branch_sim) {
1428 VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1430 else {
1431 VG_(fprintf)(fp, "\nevents: Ir\n");
1434 // Traverse every lineCC
1435 VG_(OSetGen_ResetIter)(CC_table);
1436 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1437 Bool just_hit_a_new_file = False;
1438 // If we've hit a new file, print a "fl=" line. Note that because
1439 // each string is stored exactly once in the string table, we can use
1440 // pointer comparison rather than strcmp() to test for equality, which
1441 // is good because most of the time the comparisons are equal and so
1442 // the whole strings would have to be checked.
1443 if ( lineCC->loc.file != currFile ) {
1444 currFile = lineCC->loc.file;
1445 VG_(fprintf)(fp, "fl=%s\n", currFile);
1446 distinct_files++;
1447 just_hit_a_new_file = True;
1449 // If we've hit a new function, print a "fn=" line. We know to do
1450 // this when the function name changes, and also every time we hit a
1451 // new file (in which case the new function name might be the same as
1452 // in the old file, hence the just_hit_a_new_file test).
1453 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1454 currFn = lineCC->loc.fn;
1455 VG_(fprintf)(fp, "fn=%s\n", currFn);
1456 distinct_fns++;
1459 // Print the LineCC
1460 if (clo_cache_sim && clo_branch_sim) {
1461 VG_(fprintf)(fp, "%u %llu %llu %llu"
1462 " %llu %llu %llu"
1463 " %llu %llu %llu"
1464 " %llu %llu %llu %llu\n",
1465 lineCC->loc.line,
1466 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1467 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1468 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1469 lineCC->Bc.b, lineCC->Bc.mp,
1470 lineCC->Bi.b, lineCC->Bi.mp);
1472 else if (clo_cache_sim && !clo_branch_sim) {
1473 VG_(fprintf)(fp, "%u %llu %llu %llu"
1474 " %llu %llu %llu"
1475 " %llu %llu %llu\n",
1476 lineCC->loc.line,
1477 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1478 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1479 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1481 else if (!clo_cache_sim && clo_branch_sim) {
1482 VG_(fprintf)(fp, "%u %llu"
1483 " %llu %llu %llu %llu\n",
1484 lineCC->loc.line,
1485 lineCC->Ir.a,
1486 lineCC->Bc.b, lineCC->Bc.mp,
1487 lineCC->Bi.b, lineCC->Bi.mp);
1489 else {
1490 VG_(fprintf)(fp, "%u %llu\n",
1491 lineCC->loc.line,
1492 lineCC->Ir.a);
1495 // Update summary stats
1496 Ir_total.a += lineCC->Ir.a;
1497 Ir_total.m1 += lineCC->Ir.m1;
1498 Ir_total.mL += lineCC->Ir.mL;
1499 Dr_total.a += lineCC->Dr.a;
1500 Dr_total.m1 += lineCC->Dr.m1;
1501 Dr_total.mL += lineCC->Dr.mL;
1502 Dw_total.a += lineCC->Dw.a;
1503 Dw_total.m1 += lineCC->Dw.m1;
1504 Dw_total.mL += lineCC->Dw.mL;
1505 Bc_total.b += lineCC->Bc.b;
1506 Bc_total.mp += lineCC->Bc.mp;
1507 Bi_total.b += lineCC->Bi.b;
1508 Bi_total.mp += lineCC->Bi.mp;
1510 distinct_lines++;
1513 // Summary stats must come after rest of table, since we calculate them
1514 // during traversal. */
1515 if (clo_cache_sim && clo_branch_sim) {
1516 VG_(fprintf)(fp, "summary:"
1517 " %llu %llu %llu"
1518 " %llu %llu %llu"
1519 " %llu %llu %llu"
1520 " %llu %llu %llu %llu\n",
1521 Ir_total.a, Ir_total.m1, Ir_total.mL,
1522 Dr_total.a, Dr_total.m1, Dr_total.mL,
1523 Dw_total.a, Dw_total.m1, Dw_total.mL,
1524 Bc_total.b, Bc_total.mp,
1525 Bi_total.b, Bi_total.mp);
1527 else if (clo_cache_sim && !clo_branch_sim) {
1528 VG_(fprintf)(fp, "summary:"
1529 " %llu %llu %llu"
1530 " %llu %llu %llu"
1531 " %llu %llu %llu\n",
1532 Ir_total.a, Ir_total.m1, Ir_total.mL,
1533 Dr_total.a, Dr_total.m1, Dr_total.mL,
1534 Dw_total.a, Dw_total.m1, Dw_total.mL);
1536 else if (!clo_cache_sim && clo_branch_sim) {
1537 VG_(fprintf)(fp, "summary:"
1538 " %llu"
1539 " %llu %llu %llu %llu\n",
1540 Ir_total.a,
1541 Bc_total.b, Bc_total.mp,
1542 Bi_total.b, Bi_total.mp);
1544 else {
1545 VG_(fprintf)(fp, "summary:"
1546 " %llu\n",
1547 Ir_total.a);
1550 VG_(fclose)(fp);
1553 static UInt ULong_width(ULong n)
1555 UInt w = 0;
1556 while (n > 0) {
1557 n = n / 10;
1558 w++;
1560 if (w == 0) w = 1;
1561 return w + (w-1)/3; // add space for commas
1564 static void cg_fini(Int exitcode)
1566 static HChar buf1[128], buf2[128], buf3[128], buf4[123]; // FIXME
1567 static HChar fmt[128]; // OK; large enough
1569 CacheCC D_total;
1570 BranchCC B_total;
1571 ULong LL_total_m, LL_total_mr, LL_total_mw,
1572 LL_total, LL_total_r, LL_total_w;
1573 Int l1, l2, l3;
1575 fprint_CC_table_and_calc_totals();
1577 if (VG_(clo_verbosity) == 0)
1578 return;
1580 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1581 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1583 /* I cache results. Use the I_refs value to determine the first column
1584 * width. */
1585 l1 = ULong_width(Ir_total.a);
1586 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1587 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1589 /* Make format string, getting width right for numbers */
1590 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1592 /* Always print this */
1593 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
1595 /* If cache profiling is enabled, show D access numbers and all
1596 miss numbers */
1597 if (clo_cache_sim) {
1598 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1599 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
1601 if (0 == Ir_total.a) Ir_total.a = 1;
1602 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
1603 VG_(umsg)("I1 miss rate: %s\n", buf1);
1605 VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
1606 VG_(umsg)("LLi miss rate: %s\n", buf1);
1607 VG_(umsg)("\n");
1609 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1610 * determine the width of columns 2 & 3. */
1611 D_total.a = Dr_total.a + Dw_total.a;
1612 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1613 D_total.mL = Dr_total.mL + Dw_total.mL;
1615 /* Make format string, getting width right for numbers */
1616 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1617 l1, l2, l3);
1619 VG_(umsg)(fmt, "D refs: ",
1620 D_total.a, Dr_total.a, Dw_total.a);
1621 VG_(umsg)(fmt, "D1 misses: ",
1622 D_total.m1, Dr_total.m1, Dw_total.m1);
1623 VG_(umsg)(fmt, "LLd misses: ",
1624 D_total.mL, Dr_total.mL, Dw_total.mL);
1626 if (0 == D_total.a) D_total.a = 1;
1627 if (0 == Dr_total.a) Dr_total.a = 1;
1628 if (0 == Dw_total.a) Dw_total.a = 1;
1629 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1);
1630 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
1631 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
1632 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1634 VG_(percentify)( D_total.mL, D_total.a, 1, l1+1, buf1);
1635 VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
1636 VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
1637 VG_(umsg)("LLd miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1638 VG_(umsg)("\n");
1640 /* LL overall results */
1642 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1643 LL_total_r = Dr_total.m1 + Ir_total.m1;
1644 LL_total_w = Dw_total.m1;
1645 VG_(umsg)(fmt, "LL refs: ",
1646 LL_total, LL_total_r, LL_total_w);
1648 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1649 LL_total_mr = Dr_total.mL + Ir_total.mL;
1650 LL_total_mw = Dw_total.mL;
1651 VG_(umsg)(fmt, "LL misses: ",
1652 LL_total_m, LL_total_mr, LL_total_mw);
1654 VG_(percentify)(LL_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1);
1655 VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
1656 VG_(percentify)(LL_total_mw, Dw_total.a, 1, l3+1, buf3);
1657 VG_(umsg)("LL miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1660 /* If branch profiling is enabled, show branch overall results. */
1661 if (clo_branch_sim) {
1662 /* Make format string, getting width right for numbers */
1663 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1664 l1, l2, l3);
1666 if (0 == Bc_total.b) Bc_total.b = 1;
1667 if (0 == Bi_total.b) Bi_total.b = 1;
1668 B_total.b = Bc_total.b + Bi_total.b;
1669 B_total.mp = Bc_total.mp + Bi_total.mp;
1671 VG_(umsg)("\n");
1672 VG_(umsg)(fmt, "Branches: ",
1673 B_total.b, Bc_total.b, Bi_total.b);
1675 VG_(umsg)(fmt, "Mispredicts: ",
1676 B_total.mp, Bc_total.mp, Bi_total.mp);
1678 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1);
1679 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
1680 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
1682 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
1685 // Various stats
1686 if (VG_(clo_stats)) {
1687 Int debug_lookups = full_debugs + fn_debugs +
1688 file_line_debugs + no_debugs;
1690 VG_(dmsg)("\n");
1691 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1692 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1693 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1694 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1695 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1696 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
1698 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1);
1699 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
1700 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3);
1701 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4);
1702 VG_(dmsg)("cachegrind: with full info:%s (%d)\n",
1703 buf1, full_debugs);
1704 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
1705 buf2, file_line_debugs);
1706 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n",
1707 buf3, fn_debugs);
1708 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n",
1709 buf4, no_debugs);
1711 VG_(dmsg)("cachegrind: string table size: %lu\n",
1712 VG_(OSetGen_Size)(stringTable));
1713 VG_(dmsg)("cachegrind: CC table size: %lu\n",
1714 VG_(OSetGen_Size)(CC_table));
1715 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1716 VG_(OSetGen_Size)(instrInfoTable));
1720 /*--------------------------------------------------------------------*/
1721 /*--- Discarding BB info ---*/
1722 /*--------------------------------------------------------------------*/
1724 // Called when a translation is removed from the translation cache for
1725 // any reason at all: to free up space, because the guest code was
1726 // unmapped or modified, or for any arbitrary reason.
1727 static
1728 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
1730 SB_info* sbInfo;
1731 Addr orig_addr = (Addr)vge.base[0];
1733 tl_assert(vge.n_used > 0);
1735 if (DEBUG_CG)
1736 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1737 (void*)(Addr)orig_addr,
1738 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1740 // Get BB info, remove from table, free BB info. Simple! Note that we
1741 // use orig_addr, not the first instruction address in vge.
1742 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1743 tl_assert(NULL != sbInfo);
1744 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1747 /*--------------------------------------------------------------------*/
1748 /*--- Command line processing ---*/
1749 /*--------------------------------------------------------------------*/
1751 static Bool cg_process_cmd_line_option(const HChar* arg)
1753 if (VG_(str_clo_cache_opt)(arg,
1754 &clo_I1_cache,
1755 &clo_D1_cache,
1756 &clo_LL_cache)) {}
1758 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1759 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1760 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1761 else
1762 return False;
1764 return True;
1767 static void cg_print_usage(void)
1769 VG_(print_cache_clo_opts)();
1770 VG_(printf)(
1771 " --cache-sim=yes|no [yes] collect cache stats?\n"
1772 " --branch-sim=yes|no [no] collect branch prediction stats?\n"
1773 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1777 static void cg_print_debug_usage(void)
1779 VG_(printf)(
1780 " (none)\n"
1784 /*--------------------------------------------------------------------*/
1785 /*--- Setup ---*/
1786 /*--------------------------------------------------------------------*/
1788 static void cg_post_clo_init(void); /* just below */
1790 static void cg_pre_clo_init(void)
1792 VG_(details_name) ("Cachegrind");
1793 VG_(details_version) (NULL);
1794 VG_(details_description) ("a cache and branch-prediction profiler");
1795 VG_(details_copyright_author)(
1796 "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.");
1797 VG_(details_bug_reports_to) (VG_BUGS_TO);
1798 VG_(details_avg_translation_sizeB) ( 500 );
1800 VG_(clo_vex_control).iropt_register_updates
1801 = VexRegUpdSpAtMemAccess; // overridable by the user.
1802 VG_(basic_tool_funcs) (cg_post_clo_init,
1803 cg_instrument,
1804 cg_fini);
1806 VG_(needs_superblock_discards)(cg_discard_superblock_info);
1807 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1808 cg_print_usage,
1809 cg_print_debug_usage);
1812 static void cg_post_clo_init(void)
1814 cache_t I1c, D1c, LLc;
1816 CC_table =
1817 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1818 cmp_CodeLoc_LineCC,
1819 VG_(malloc), "cg.main.cpci.1",
1820 VG_(free));
1821 instrInfoTable =
1822 VG_(OSetGen_Create)(/*keyOff*/0,
1823 NULL,
1824 VG_(malloc), "cg.main.cpci.2",
1825 VG_(free));
1826 stringTable =
1827 VG_(OSetGen_Create)(/*keyOff*/0,
1828 stringCmp,
1829 VG_(malloc), "cg.main.cpci.3",
1830 VG_(free));
1832 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1833 &clo_I1_cache,
1834 &clo_D1_cache,
1835 &clo_LL_cache);
1837 // min_line_size is used to make sure that we never feed
1838 // accesses to the simulator straddling more than two
1839 // cache lines at any cache level
1840 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1841 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1843 Int largest_load_or_store_size
1844 = VG_(machine_get_size_of_largest_guest_register)();
1845 if (min_line_size < largest_load_or_store_size) {
1846 /* We can't continue, because the cache simulation might
1847 straddle more than 2 lines, and it will assert. So let's
1848 just stop before we start. */
1849 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1850 (Int)min_line_size);
1851 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1852 largest_load_or_store_size );
1853 VG_(umsg)(" but it is not. Exiting now.\n");
1854 VG_(exit)(1);
1857 cachesim_initcaches(I1c, D1c, LLc);
1860 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1862 /*--------------------------------------------------------------------*/
1863 /*--- end ---*/
1864 /*--------------------------------------------------------------------*/