2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Cachegrind, a Valgrind tool for cache
11 Copyright (C) 2002-2013 Nicholas Nethercote
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_vki.h"
34 #include "pub_tool_debuginfo.h"
35 #include "pub_tool_libcbase.h"
36 #include "pub_tool_libcassert.h"
37 #include "pub_tool_libcfile.h"
38 #include "pub_tool_libcprint.h"
39 #include "pub_tool_libcproc.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_tooliface.h"
45 #include "pub_tool_xarray.h"
46 #include "pub_tool_clientstate.h"
47 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
51 #include "cg_branchpred.c"
53 /*------------------------------------------------------------*/
55 /*------------------------------------------------------------*/
57 /* Set to 1 for very verbose debugging */
60 #define MIN_LINE_SIZE 16
61 #define FILE_LEN VKI_PATH_MAX
64 /*------------------------------------------------------------*/
66 /*------------------------------------------------------------*/
68 static Bool clo_cache_sim
= True
; /* do cache simulation? */
69 static Bool clo_branch_sim
= False
; /* do branch simulation? */
70 static const HChar
* clo_cachegrind_out_file
= "cachegrind.out.%p";
72 /*------------------------------------------------------------*/
73 /*--- Cachesim configuration ---*/
74 /*------------------------------------------------------------*/
76 static Int min_line_size
= 0; /* min of L1 and LL cache line sizes */
78 /*------------------------------------------------------------*/
79 /*--- Types and Data Structures ---*/
80 /*------------------------------------------------------------*/
84 ULong a
; /* total # memory accesses of this kind */
85 ULong m1
; /* misses in the first level cache */
86 ULong mL
; /* misses in the second level cache */
92 ULong b
; /* total # branches of this kind */
93 ULong mp
; /* number of branches mispredicted */
97 //------------------------------------------------------------
98 // Primary data structure #1: CC table
99 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
100 // - an ordered set of CCs. CC indexing done by file/function/line (as
101 // determined from the instrAddr).
102 // - Traversed for dumping stats at end in file/func/line hierarchy.
112 CodeLoc loc
; /* Source location that these counts pertain to */
113 CacheCC Ir
; /* Insn read counts */
114 CacheCC Dr
; /* Data read counts */
115 CacheCC Dw
; /* Data write/modify counts */
116 BranchCC Bc
; /* Conditional branch counts */
117 BranchCC Bi
; /* Indirect branch counts */
120 // First compare file, then fn, then line.
121 static Word
cmp_CodeLoc_LineCC(const void *vloc
, const void *vcc
)
124 const CodeLoc
* a
= (const CodeLoc
*)vloc
;
125 const CodeLoc
* b
= &(((const LineCC
*)vcc
)->loc
);
127 res
= VG_(strcmp
)(a
->file
, b
->file
);
131 res
= VG_(strcmp
)(a
->fn
, b
->fn
);
135 return a
->line
- b
->line
;
138 static OSet
* CC_table
;
140 //------------------------------------------------------------
141 // Primary data structure #2: InstrInfo table
142 // - Holds the cached info about each instr that is used for simulation.
143 // - table(SB_start_addr, list(InstrInfo))
144 // - For each SB, each InstrInfo in the list holds info about the
145 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
146 // CC. This node is what's passed to the simulation function.
147 // - When SBs are discarded the relevant list(instr_details) is freed.
149 typedef struct _InstrInfo InstrInfo
;
153 LineCC
* parent
; // parent line-CC
156 typedef struct _SB_info SB_info
;
158 Addr SB_addr
; // key; MUST BE FIRST
163 static OSet
* instrInfoTable
;
165 //------------------------------------------------------------
166 // Secondary data structure: string table
167 // - holds strings, avoiding dups
168 // - used for filenames and function names, each of which will be
169 // pointed to by one or more CCs.
170 // - it also allows equality checks just by pointer comparison, which
171 // is good when printing the output file at the end.
173 static OSet
* stringTable
;
175 //------------------------------------------------------------
177 static Int distinct_files
= 0;
178 static Int distinct_fns
= 0;
179 static Int distinct_lines
= 0;
180 static Int distinct_instrsGen
= 0;
181 static Int distinct_instrsNoX
= 0;
183 static Int full_debugs
= 0;
184 static Int file_line_debugs
= 0;
185 static Int fn_debugs
= 0;
186 static Int no_debugs
= 0;
188 /*------------------------------------------------------------*/
189 /*--- String table operations ---*/
190 /*------------------------------------------------------------*/
192 static Word
stringCmp( const void* key
, const void* elem
)
194 return VG_(strcmp
)(*(const HChar
*const *)key
, *(const HChar
*const *)elem
);
197 // Get a permanent string; either pull it out of the string table if it's
198 // been encountered before, or dup it and put it into the string table.
199 static HChar
* get_perm_string(HChar
* s
)
201 HChar
** s_ptr
= VG_(OSetGen_Lookup
)(stringTable
, &s
);
205 HChar
** s_node
= VG_(OSetGen_AllocNode
)(stringTable
, sizeof(HChar
*));
206 *s_node
= VG_(strdup
)("cg.main.gps.1", s
);
207 VG_(OSetGen_Insert
)(stringTable
, s_node
);
212 /*------------------------------------------------------------*/
213 /*--- CC table operations ---*/
214 /*------------------------------------------------------------*/
216 static void get_debug_info(Addr instr_addr
, HChar file
[FILE_LEN
],
217 HChar fn
[FN_LEN
], UInt
* line
)
221 Bool found_file_line
= VG_(get_filename_linenum
)(
224 dir
, FILE_LEN
, &found_dirname
,
227 Bool found_fn
= VG_(get_fnname
)(instr_addr
, fn
, FN_LEN
);
229 if (!found_file_line
) {
230 VG_(strcpy
)(file
, "???");
234 VG_(strcpy
)(fn
, "???");
239 tl_assert(VG_(strlen
)(dir
) + VG_(strlen
)(file
) + 1 < FILE_LEN
);
240 VG_(strcat
)(dir
, "/"); // Append '/'
241 VG_(strcat
)(dir
, file
); // Append file to dir
242 VG_(strcpy
)(file
, dir
); // Move dir+file to file
245 if (found_file_line
) {
246 if (found_fn
) full_debugs
++;
247 else file_line_debugs
++;
249 if (found_fn
) fn_debugs
++;
254 // Do a three step traversal: by file, then fn, then line.
255 // Returns a pointer to the line CC, creates a new one if necessary.
256 static LineCC
* get_lineCC(Addr origAddr
)
258 HChar file
[FILE_LEN
], fn
[FN_LEN
];
263 get_debug_info(origAddr
, file
, fn
, &line
);
269 lineCC
= VG_(OSetGen_Lookup
)(CC_table
, &loc
);
271 // Allocate and zero a new node.
272 lineCC
= VG_(OSetGen_AllocNode
)(CC_table
, sizeof(LineCC
));
273 lineCC
->loc
.file
= get_perm_string(loc
.file
);
274 lineCC
->loc
.fn
= get_perm_string(loc
.fn
);
275 lineCC
->loc
.line
= loc
.line
;
289 VG_(OSetGen_Insert
)(CC_table
, lineCC
);
295 /*------------------------------------------------------------*/
296 /*--- Cache simulation functions ---*/
297 /*------------------------------------------------------------*/
299 /* A common case for an instruction read event is that the
300 * bytes read belong to the same cache line in both L1I and LL
301 * (if cache line sizes of L1 and LL are the same).
302 * As this can be detected at instrumentation time, and results
303 * in faster simulation, special-casing is benefical.
305 * Abbrevations used in var/function names:
306 * IrNoX - instruction read does not cross cache lines
307 * IrGen - generic instruction read; not detected as IrNoX
308 * Ir - not known / not important whether it is an IrNoX
311 // Only used with --cache-sim=no.
313 void log_1Ir(InstrInfo
* n
)
318 // Only used with --cache-sim=no.
320 void log_2Ir(InstrInfo
* n
, InstrInfo
* n2
)
326 // Only used with --cache-sim=no.
328 void log_3Ir(InstrInfo
* n
, InstrInfo
* n2
, InstrInfo
* n3
)
335 // Generic case for instruction reads: may cross cache lines.
336 // All other Ir handlers expect IrNoX instruction reads.
338 void log_1IrGen_0D_cache_access(InstrInfo
* n
)
340 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
341 // n, n->instr_addr, n->instr_len);
342 cachesim_I1_doref_Gen(n
->instr_addr
, n
->instr_len
,
343 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
348 void log_1IrNoX_0D_cache_access(InstrInfo
* n
)
350 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
351 // n, n->instr_addr, n->instr_len);
352 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
353 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
358 void log_2IrNoX_0D_cache_access(InstrInfo
* n
, InstrInfo
* n2
)
360 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
361 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
362 // n, n->instr_addr, n->instr_len,
363 // n2, n2->instr_addr, n2->instr_len);
364 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
365 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
367 cachesim_I1_doref_NoX(n2
->instr_addr
, n2
->instr_len
,
368 &n2
->parent
->Ir
.m1
, &n2
->parent
->Ir
.mL
);
373 void log_3IrNoX_0D_cache_access(InstrInfo
* n
, InstrInfo
* n2
, InstrInfo
* n3
)
375 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
376 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
377 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
378 // n, n->instr_addr, n->instr_len,
379 // n2, n2->instr_addr, n2->instr_len,
380 // n3, n3->instr_addr, n3->instr_len);
381 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
382 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
384 cachesim_I1_doref_NoX(n2
->instr_addr
, n2
->instr_len
,
385 &n2
->parent
->Ir
.m1
, &n2
->parent
->Ir
.mL
);
387 cachesim_I1_doref_NoX(n3
->instr_addr
, n3
->instr_len
,
388 &n3
->parent
->Ir
.m1
, &n3
->parent
->Ir
.mL
);
393 void log_1IrNoX_1Dr_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
395 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
396 // " daddr=0x%010lx, dsize=%lu\n",
397 // n, n->instr_addr, n->instr_len, data_addr, data_size);
398 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
399 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
402 cachesim_D1_doref(data_addr
, data_size
,
403 &n
->parent
->Dr
.m1
, &n
->parent
->Dr
.mL
);
408 void log_1IrNoX_1Dw_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
410 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
411 // " daddr=0x%010lx, dsize=%lu\n",
412 // n, n->instr_addr, n->instr_len, data_addr, data_size);
413 cachesim_I1_doref_NoX(n
->instr_addr
, n
->instr_len
,
414 &n
->parent
->Ir
.m1
, &n
->parent
->Ir
.mL
);
417 cachesim_D1_doref(data_addr
, data_size
,
418 &n
->parent
->Dw
.m1
, &n
->parent
->Dw
.mL
);
422 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
423 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
424 you change them, you must change addEvent_D_guarded too. */
426 void log_0Ir_1Dr_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
428 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
429 // n, data_addr, data_size);
430 cachesim_D1_doref(data_addr
, data_size
,
431 &n
->parent
->Dr
.m1
, &n
->parent
->Dr
.mL
);
435 /* See comment on log_0Ir_1Dr_cache_access. */
437 void log_0Ir_1Dw_cache_access(InstrInfo
* n
, Addr data_addr
, Word data_size
)
439 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
440 // n, data_addr, data_size);
441 cachesim_D1_doref(data_addr
, data_size
,
442 &n
->parent
->Dw
.m1
, &n
->parent
->Dw
.mL
);
446 /* For branches, we consult two different predictors, one which
447 predicts taken/untaken for conditional branches, and the other
448 which predicts the branch target address for indirect branches
449 (jump-to-register style ones). */
452 void log_cond_branch(InstrInfo
* n
, Word taken
)
454 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
458 += (1 & do_cond_branch_predict(n
->instr_addr
, taken
));
462 void log_ind_branch(InstrInfo
* n
, UWord actual_dst
)
464 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
468 += (1 & do_ind_branch_predict(n
->instr_addr
, actual_dst
));
472 /*------------------------------------------------------------*/
473 /*--- Instrumentation types and structures ---*/
474 /*------------------------------------------------------------*/
476 /* Maintain an ordered list of memory events which are outstanding, in
477 the sense that no IR has yet been generated to do the relevant
478 helper calls. The BB is scanned top to bottom and memory events
479 are added to the end of the list, merging with the most recent
480 notified event where possible (Dw immediately following Dr and
481 having the same size and EA can be merged).
483 This merging is done so that for architectures which have
484 load-op-store instructions (x86, amd64), the insn is treated as if
485 it makes just one memory reference (a modify), rather than two (a
486 read followed by a write at the same address).
488 At various points the list will need to be flushed, that is, IR
489 generated from it. That must happen before any possible exit from
490 the block (the end, or an IRStmt_Exit). Flushing also takes place
491 when there is no space to add a new event.
493 If we require the simulation statistics to be up to date with
494 respect to possible memory exceptions, then the list would have to
495 be flushed before each memory reference. That would however lose
496 performance by inhibiting event-merging during flushing.
498 Flushing the list consists of walking it start to end and emitting
499 instrumentation IR for each event, in the order in which they
500 appear. It may be possible to emit a single call for two adjacent
501 events in order to reduce the number of helper function calls made.
502 For example, it could well be profitable to handle two adjacent Ir
503 events with a single helper call. */
511 Ev_IrNoX
, // Instruction read not crossing cache lines
512 Ev_IrGen
, // Generic Ir, not being detected as IrNoX
515 Ev_Dm
, // Data modify (read then write)
516 Ev_Bc
, // branch conditional
517 Ev_Bi
// branch indirect (to unknown destination)
543 IRAtom
* taken
; /* :: Ity_I1 */
552 static void init_Event ( Event
* ev
) {
553 VG_(memset
)(ev
, 0, sizeof(Event
));
556 static IRAtom
* get_Event_dea ( Event
* ev
) {
558 case Ev_Dr
: return ev
->Ev
.Dr
.ea
;
559 case Ev_Dw
: return ev
->Ev
.Dw
.ea
;
560 case Ev_Dm
: return ev
->Ev
.Dm
.ea
;
561 default: tl_assert(0);
565 static Int
get_Event_dszB ( Event
* ev
) {
567 case Ev_Dr
: return ev
->Ev
.Dr
.szB
;
568 case Ev_Dw
: return ev
->Ev
.Dw
.szB
;
569 case Ev_Dm
: return ev
->Ev
.Dm
.szB
;
570 default: tl_assert(0);
575 /* Up to this many unnotified events are allowed. Number is
576 arbitrary. Larger numbers allow more event merging to occur, but
577 potentially induce more spilling due to extending live ranges of
578 address temporaries. */
582 /* A struct which holds all the running state during instrumentation.
583 Mostly to avoid passing loads of parameters everywhere. */
586 /* The current outstanding-memory-event list. */
587 Event events
[N_EVENTS
];
590 /* The array of InstrInfo bins for the BB. */
593 /* Number InstrInfo bins 'used' so far. */
596 /* The output SB being constructed. */
602 /*------------------------------------------------------------*/
603 /*--- Instrumentation main ---*/
604 /*------------------------------------------------------------*/
606 // Note that origAddr is the real origAddr, not the address of the first
607 // instruction in the block (they can be different due to redirection).
609 SB_info
* get_SB_info(IRSB
* sbIn
, Addr origAddr
)
615 // Count number of original instrs in SB
617 for (i
= 0; i
< sbIn
->stmts_used
; i
++) {
619 if (Ist_IMark
== st
->tag
) n_instrs
++;
622 // Check that we don't have an entry for this BB in the instr-info table.
623 // If this assertion fails, there has been some screwup: some
624 // translations must have been discarded but Cachegrind hasn't discarded
625 // the corresponding entries in the instr-info table.
626 sbInfo
= VG_(OSetGen_Lookup
)(instrInfoTable
, &origAddr
);
627 tl_assert(NULL
== sbInfo
);
629 // BB never translated before (at this address, at least; could have
630 // been unloaded and then reloaded elsewhere in memory)
631 sbInfo
= VG_(OSetGen_AllocNode
)(instrInfoTable
,
632 sizeof(SB_info
) + n_instrs
*sizeof(InstrInfo
));
633 sbInfo
->SB_addr
= origAddr
;
634 sbInfo
->n_instrs
= n_instrs
;
635 VG_(OSetGen_Insert
)( instrInfoTable
, sbInfo
);
641 static void showEvent ( Event
* ev
)
645 VG_(printf
)("IrGen %p\n", ev
->inode
);
648 VG_(printf
)("IrNoX %p\n", ev
->inode
);
651 VG_(printf
)("Dr %p %d EA=", ev
->inode
, ev
->Ev
.Dr
.szB
);
652 ppIRExpr(ev
->Ev
.Dr
.ea
);
656 VG_(printf
)("Dw %p %d EA=", ev
->inode
, ev
->Ev
.Dw
.szB
);
657 ppIRExpr(ev
->Ev
.Dw
.ea
);
661 VG_(printf
)("Dm %p %d EA=", ev
->inode
, ev
->Ev
.Dm
.szB
);
662 ppIRExpr(ev
->Ev
.Dm
.ea
);
666 VG_(printf
)("Bc %p GA=", ev
->inode
);
667 ppIRExpr(ev
->Ev
.Bc
.taken
);
671 VG_(printf
)("Bi %p DST=", ev
->inode
);
672 ppIRExpr(ev
->Ev
.Bi
.dst
);
681 // Reserve and initialise an InstrInfo for the first mention of a new insn.
683 InstrInfo
* setup_InstrInfo ( CgState
* cgs
, Addr instr_addr
, UInt instr_len
)
686 tl_assert(cgs
->sbInfo_i
>= 0);
687 tl_assert(cgs
->sbInfo_i
< cgs
->sbInfo
->n_instrs
);
688 i_node
= &cgs
->sbInfo
->instrs
[ cgs
->sbInfo_i
];
689 i_node
->instr_addr
= instr_addr
;
690 i_node
->instr_len
= instr_len
;
691 i_node
->parent
= get_lineCC(instr_addr
);
697 /* Generate code for all outstanding memory events, and mark the queue
698 empty. Code is generated into cgs->bbOut, and this activity
699 'consumes' slots in cgs->sbInfo. */
701 static void flushEvents ( CgState
* cgs
)
704 const HChar
* helperName
;
714 while (i
< cgs
->events_used
) {
721 /* generate IR to notify event i and possibly the ones
722 immediately following it. */
723 tl_assert(i
>= 0 && i
< cgs
->events_used
);
725 ev
= &cgs
->events
[i
];
726 ev2
= ( i
< cgs
->events_used
-1 ? &cgs
->events
[i
+1] : NULL
);
727 ev3
= ( i
< cgs
->events_used
-2 ? &cgs
->events
[i
+2] : NULL
);
730 VG_(printf
)(" flush ");
734 i_node_expr
= mkIRExpr_HWord( (HWord
)ev
->inode
);
736 /* Decide on helper fn to call and args to pass it, and advance
740 /* Merge an IrNoX with a following Dr/Dm. */
741 if (ev2
&& (ev2
->tag
== Ev_Dr
|| ev2
->tag
== Ev_Dm
)) {
742 /* Why is this true? It's because we're merging an Ir
743 with a following Dr or Dm. The Ir derives from the
744 instruction's IMark and the Dr/Dm from data
745 references which follow it. In short it holds
746 because each insn starts with an IMark, hence an
747 Ev_Ir, and so these Dr/Dm must pertain to the
748 immediately preceding Ir. Same applies to analogous
749 assertions in the subsequent cases. */
750 tl_assert(ev2
->inode
== ev
->inode
);
751 helperName
= "log_1IrNoX_1Dr_cache_access";
752 helperAddr
= &log_1IrNoX_1Dr_cache_access
;
753 argv
= mkIRExprVec_3( i_node_expr
,
755 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
759 /* Merge an IrNoX with a following Dw. */
761 if (ev2
&& ev2
->tag
== Ev_Dw
) {
762 tl_assert(ev2
->inode
== ev
->inode
);
763 helperName
= "log_1IrNoX_1Dw_cache_access";
764 helperAddr
= &log_1IrNoX_1Dw_cache_access
;
765 argv
= mkIRExprVec_3( i_node_expr
,
767 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
771 /* Merge an IrNoX with two following IrNoX's. */
773 if (ev2
&& ev3
&& ev2
->tag
== Ev_IrNoX
&& ev3
->tag
== Ev_IrNoX
)
776 helperName
= "log_3IrNoX_0D_cache_access";
777 helperAddr
= &log_3IrNoX_0D_cache_access
;
779 helperName
= "log_3Ir";
780 helperAddr
= &log_3Ir
;
782 argv
= mkIRExprVec_3( i_node_expr
,
783 mkIRExpr_HWord( (HWord
)ev2
->inode
),
784 mkIRExpr_HWord( (HWord
)ev3
->inode
) );
788 /* Merge an IrNoX with one following IrNoX. */
790 if (ev2
&& ev2
->tag
== Ev_IrNoX
) {
792 helperName
= "log_2IrNoX_0D_cache_access";
793 helperAddr
= &log_2IrNoX_0D_cache_access
;
795 helperName
= "log_2Ir";
796 helperAddr
= &log_2Ir
;
798 argv
= mkIRExprVec_2( i_node_expr
,
799 mkIRExpr_HWord( (HWord
)ev2
->inode
) );
803 /* No merging possible; emit as-is. */
806 helperName
= "log_1IrNoX_0D_cache_access";
807 helperAddr
= &log_1IrNoX_0D_cache_access
;
809 helperName
= "log_1Ir";
810 helperAddr
= &log_1Ir
;
812 argv
= mkIRExprVec_1( i_node_expr
);
819 helperName
= "log_1IrGen_0D_cache_access";
820 helperAddr
= &log_1IrGen_0D_cache_access
;
822 helperName
= "log_1Ir";
823 helperAddr
= &log_1Ir
;
825 argv
= mkIRExprVec_1( i_node_expr
);
831 /* Data read or modify */
832 helperName
= "log_0Ir_1Dr_cache_access";
833 helperAddr
= &log_0Ir_1Dr_cache_access
;
834 argv
= mkIRExprVec_3( i_node_expr
,
836 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
842 helperName
= "log_0Ir_1Dw_cache_access";
843 helperAddr
= &log_0Ir_1Dw_cache_access
;
844 argv
= mkIRExprVec_3( i_node_expr
,
846 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
851 /* Conditional branch */
852 helperName
= "log_cond_branch";
853 helperAddr
= &log_cond_branch
;
854 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bc
.taken
);
859 /* Branch to an unknown destination */
860 helperName
= "log_ind_branch";
861 helperAddr
= &log_ind_branch
;
862 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bi
.dst
);
870 /* Add the helper. */
871 tl_assert(helperName
);
872 tl_assert(helperAddr
);
874 di
= unsafeIRDirty_0_N( regparms
,
875 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
877 addStmtToIRSB( cgs
->sbOut
, IRStmt_Dirty(di
) );
880 cgs
->events_used
= 0;
883 static void addEvent_Ir ( CgState
* cgs
, InstrInfo
* inode
)
886 if (cgs
->events_used
== N_EVENTS
)
888 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
889 evt
= &cgs
->events
[cgs
->events_used
];
892 if (cachesim_is_IrNoX(inode
->instr_addr
, inode
->instr_len
)) {
894 distinct_instrsNoX
++;
897 distinct_instrsGen
++;
903 void addEvent_Dr ( CgState
* cgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
906 tl_assert(isIRAtom(ea
));
907 tl_assert(datasize
>= 1 && datasize
<= min_line_size
);
910 if (cgs
->events_used
== N_EVENTS
)
912 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
913 evt
= &cgs
->events
[cgs
->events_used
];
917 evt
->Ev
.Dr
.szB
= datasize
;
923 void addEvent_Dw ( CgState
* cgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
928 tl_assert(isIRAtom(ea
));
929 tl_assert(datasize
>= 1 && datasize
<= min_line_size
);
934 /* Is it possible to merge this write with the preceding read? */
935 lastEvt
= &cgs
->events
[cgs
->events_used
-1];
936 if (cgs
->events_used
> 0
937 && lastEvt
->tag
== Ev_Dr
938 && lastEvt
->Ev
.Dr
.szB
== datasize
939 && lastEvt
->inode
== inode
940 && eqIRAtom(lastEvt
->Ev
.Dr
.ea
, ea
))
942 lastEvt
->tag
= Ev_Dm
;
946 /* No. Add as normal. */
947 if (cgs
->events_used
== N_EVENTS
)
949 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
950 evt
= &cgs
->events
[cgs
->events_used
];
954 evt
->Ev
.Dw
.szB
= datasize
;
960 void addEvent_D_guarded ( CgState
* cgs
, InstrInfo
* inode
,
961 Int datasize
, IRAtom
* ea
, IRAtom
* guard
,
964 tl_assert(isIRAtom(ea
));
966 tl_assert(isIRAtom(guard
));
967 tl_assert(datasize
>= 1 && datasize
<= min_line_size
);
972 /* Adding guarded memory actions and merging them with the existing
973 queue is too complex. Simply flush the queue and add this
974 action immediately. Since guarded loads and stores are pretty
975 rare, this is not thought likely to cause any noticeable
976 performance loss as a result of the loss of event-merging
978 tl_assert(cgs
->events_used
>= 0);
980 tl_assert(cgs
->events_used
== 0);
981 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
983 const HChar
* helperName
;
988 i_node_expr
= mkIRExpr_HWord( (HWord
)inode
);
989 helperName
= isWrite
? "log_0Ir_1Dw_cache_access"
990 : "log_0Ir_1Dr_cache_access";
991 helperAddr
= isWrite
? &log_0Ir_1Dw_cache_access
992 : &log_0Ir_1Dr_cache_access
;
993 argv
= mkIRExprVec_3( i_node_expr
,
994 ea
, mkIRExpr_HWord( datasize
) );
996 di
= unsafeIRDirty_0_N(
998 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
1001 addStmtToIRSB( cgs
->sbOut
, IRStmt_Dirty(di
) );
1006 void addEvent_Bc ( CgState
* cgs
, InstrInfo
* inode
, IRAtom
* guard
)
1009 tl_assert(isIRAtom(guard
));
1010 tl_assert(typeOfIRExpr(cgs
->sbOut
->tyenv
, guard
)
1011 == (sizeof(HWord
)==4 ? Ity_I32
: Ity_I64
));
1012 if (!clo_branch_sim
)
1014 if (cgs
->events_used
== N_EVENTS
)
1016 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
1017 evt
= &cgs
->events
[cgs
->events_used
];
1021 evt
->Ev
.Bc
.taken
= guard
;
1026 void addEvent_Bi ( CgState
* cgs
, InstrInfo
* inode
, IRAtom
* whereTo
)
1029 tl_assert(isIRAtom(whereTo
));
1030 tl_assert(typeOfIRExpr(cgs
->sbOut
->tyenv
, whereTo
)
1031 == (sizeof(HWord
)==4 ? Ity_I32
: Ity_I64
));
1032 if (!clo_branch_sim
)
1034 if (cgs
->events_used
== N_EVENTS
)
1036 tl_assert(cgs
->events_used
>= 0 && cgs
->events_used
< N_EVENTS
);
1037 evt
= &cgs
->events
[cgs
->events_used
];
1041 evt
->Ev
.Bi
.dst
= whereTo
;
1045 ////////////////////////////////////////////////////////////
1049 IRSB
* cg_instrument ( VgCallbackClosure
* closure
,
1051 VexGuestLayout
* layout
,
1052 VexGuestExtents
* vge
,
1053 VexArchInfo
* archinfo_host
,
1054 IRType gWordTy
, IRType hWordTy
)
1058 Addr64 cia
; /* address of current insn */
1060 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
1061 InstrInfo
* curr_inode
= NULL
;
1063 if (gWordTy
!= hWordTy
) {
1064 /* We don't currently support this case. */
1065 VG_(tool_panic
)("host/guest word size mismatch");
1069 cgs
.sbOut
= deepCopyIRSBExceptStmts(sbIn
);
1071 // Copy verbatim any IR preamble preceding the first IMark
1073 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
1074 addStmtToIRSB( cgs
.sbOut
, sbIn
->stmts
[i
] );
1078 // Get the first statement, and initial cia from it
1079 tl_assert(sbIn
->stmts_used
> 0);
1080 tl_assert(i
< sbIn
->stmts_used
);
1081 st
= sbIn
->stmts
[i
];
1082 tl_assert(Ist_IMark
== st
->tag
);
1084 cia
= st
->Ist
.IMark
.addr
;
1085 isize
= st
->Ist
.IMark
.len
;
1086 // If Vex fails to decode an instruction, the size will be zero.
1087 // Pretend otherwise.
1088 if (isize
== 0) isize
= VG_MIN_INSTR_SZB
;
1090 // Set up running state and get block info
1091 tl_assert(closure
->readdr
== vge
->base
[0]);
1092 cgs
.events_used
= 0;
1093 cgs
.sbInfo
= get_SB_info(sbIn
, (Addr
)closure
->readdr
);
1097 VG_(printf
)("\n\n---------- cg_instrument ----------\n");
1099 // Traverse the block, initialising inodes, adding events and flushing as
1101 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
1103 st
= sbIn
->stmts
[i
];
1104 tl_assert(isFlatIRStmt(st
));
1115 cia
= st
->Ist
.IMark
.addr
;
1116 isize
= st
->Ist
.IMark
.len
;
1118 // If Vex fails to decode an instruction, the size will be zero.
1119 // Pretend otherwise.
1120 if (isize
== 0) isize
= VG_MIN_INSTR_SZB
;
1122 // Sanity-check size.
1123 tl_assert( (VG_MIN_INSTR_SZB
<= isize
&& isize
<= VG_MAX_INSTR_SZB
)
1124 || VG_CLREQ_SZB
== isize
);
1126 // Get space for and init the inode, record it as the current one.
1127 // Subsequent Dr/Dw/Dm events from the same instruction will
1129 curr_inode
= setup_InstrInfo(&cgs
, cia
, isize
);
1131 addEvent_Ir( &cgs
, curr_inode
);
1135 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
1136 if (data
->tag
== Iex_Load
) {
1137 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
1138 // Note also, endianness info is ignored. I guess
1139 // that's not interesting.
1140 addEvent_Dr( &cgs
, curr_inode
, sizeofIRType(data
->Iex
.Load
.ty
),
1147 IRExpr
* data
= st
->Ist
.Store
.data
;
1148 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
1149 addEvent_Dw( &cgs
, curr_inode
,
1150 sizeofIRType(typeOfIRExpr(tyenv
, data
)), aexpr
);
1155 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
1156 IRExpr
* data
= sg
->data
;
1157 IRExpr
* addr
= sg
->addr
;
1158 IRType type
= typeOfIRExpr(tyenv
, data
);
1159 tl_assert(type
!= Ity_INVALID
);
1160 addEvent_D_guarded( &cgs
, curr_inode
,
1161 sizeofIRType(type
), addr
, sg
->guard
,
1167 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
1168 IRType type
= Ity_INVALID
; /* loaded type */
1169 IRType typeWide
= Ity_INVALID
; /* after implicit widening */
1170 IRExpr
* addr
= lg
->addr
;
1171 typeOfIRLoadGOp(lg
->cvt
, &typeWide
, &type
);
1172 tl_assert(type
!= Ity_INVALID
);
1173 addEvent_D_guarded( &cgs
, curr_inode
,
1174 sizeofIRType(type
), addr
, lg
->guard
,
1175 False
/*!isWrite*/ );
1181 IRDirty
* d
= st
->Ist
.Dirty
.details
;
1182 if (d
->mFx
!= Ifx_None
) {
1183 /* This dirty helper accesses memory. Collect the details. */
1184 tl_assert(d
->mAddr
!= NULL
);
1185 tl_assert(d
->mSize
!= 0);
1186 dataSize
= d
->mSize
;
1187 // Large (eg. 28B, 108B, 512B on x86) data-sized
1188 // instructions will be done inaccurately, but they're
1189 // very rare and this avoids errors from hitting more
1190 // than two cache lines in the simulation.
1191 if (dataSize
> min_line_size
)
1192 dataSize
= min_line_size
;
1193 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1194 addEvent_Dr( &cgs
, curr_inode
, dataSize
, d
->mAddr
);
1195 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1196 addEvent_Dw( &cgs
, curr_inode
, dataSize
, d
->mAddr
);
1198 tl_assert(d
->mAddr
== NULL
);
1199 tl_assert(d
->mSize
== 0);
1205 /* We treat it as a read and a write of the location. I
1206 think that is the same behaviour as it was before IRCAS
1207 was introduced, since prior to that point, the Vex
1208 front ends would translate a lock-prefixed instruction
1209 into a (normal) read followed by a (normal) write. */
1211 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1212 tl_assert(cas
->addr
!= NULL
);
1213 tl_assert(cas
->dataLo
!= NULL
);
1214 dataSize
= sizeofIRType(typeOfIRExpr(tyenv
, cas
->dataLo
));
1215 if (cas
->dataHi
!= NULL
)
1216 dataSize
*= 2; /* since it's a doubleword-CAS */
1217 /* I don't think this can ever happen, but play safe. */
1218 if (dataSize
> min_line_size
)
1219 dataSize
= min_line_size
;
1220 addEvent_Dr( &cgs
, curr_inode
, dataSize
, cas
->addr
);
1221 addEvent_Dw( &cgs
, curr_inode
, dataSize
, cas
->addr
);
1227 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1229 dataTy
= typeOfIRTemp(tyenv
, st
->Ist
.LLSC
.result
);
1230 addEvent_Dr( &cgs
, curr_inode
,
1231 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1232 /* flush events before LL, should help SC to succeed */
1233 flushEvents( &cgs
);
1236 dataTy
= typeOfIRExpr(tyenv
, st
->Ist
.LLSC
.storedata
);
1237 addEvent_Dw( &cgs
, curr_inode
,
1238 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1244 // call branch predictor only if this is a branch in guest code
1245 if ( (st
->Ist
.Exit
.jk
== Ijk_Boring
) ||
1246 (st
->Ist
.Exit
.jk
== Ijk_Call
) ||
1247 (st
->Ist
.Exit
.jk
== Ijk_Ret
) )
1249 /* Stuff to widen the guard expression to a host word, so
1250 we can pass it to the branch predictor simulation
1251 functions easily. */
1255 IRType tyW
= hWordTy
;
1256 IROp widen
= tyW
==Ity_I32
? Iop_1Uto32
: Iop_1Uto64
;
1257 IROp opXOR
= tyW
==Ity_I32
? Iop_Xor32
: Iop_Xor64
;
1258 IRTemp guard1
= newIRTemp(cgs
.sbOut
->tyenv
, Ity_I1
);
1259 IRTemp guardW
= newIRTemp(cgs
.sbOut
->tyenv
, tyW
);
1260 IRTemp guard
= newIRTemp(cgs
.sbOut
->tyenv
, tyW
);
1261 IRExpr
* one
= tyW
==Ity_I32
? IRExpr_Const(IRConst_U32(1))
1262 : IRExpr_Const(IRConst_U64(1));
1264 /* First we need to figure out whether the side exit got
1265 inverted by the ir optimiser. To do that, figure out
1266 the next (fallthrough) instruction's address and the
1267 side exit address and see if they are the same. */
1268 nia
= cia
+ (Addr64
)isize
;
1270 nia
&= 0xFFFFFFFFULL
;
1272 /* Side exit address */
1273 dst
= st
->Ist
.Exit
.dst
;
1274 if (tyW
== Ity_I32
) {
1275 tl_assert(dst
->tag
== Ico_U32
);
1276 sea
= (Addr64
)(UInt
)dst
->Ico
.U32
;
1278 tl_assert(tyW
== Ity_I64
);
1279 tl_assert(dst
->tag
== Ico_U64
);
1283 inverted
= nia
== sea
;
1285 /* Widen the guard expression. */
1286 addStmtToIRSB( cgs
.sbOut
,
1287 IRStmt_WrTmp( guard1
, st
->Ist
.Exit
.guard
));
1288 addStmtToIRSB( cgs
.sbOut
,
1289 IRStmt_WrTmp( guardW
,
1291 IRExpr_RdTmp(guard1
))) );
1292 /* If the exit is inverted, invert the sense of the guard. */
1297 inverted
? IRExpr_Binop(opXOR
, IRExpr_RdTmp(guardW
), one
)
1298 : IRExpr_RdTmp(guardW
)
1300 /* And post the event. */
1301 addEvent_Bc( &cgs
, curr_inode
, IRExpr_RdTmp(guard
) );
1304 /* We may never reach the next statement, so need to flush
1305 all outstanding transactions now. */
1306 flushEvents( &cgs
);
1316 /* Copy the original statement */
1317 addStmtToIRSB( cgs
.sbOut
, st
);
1325 /* Deal with branches to unknown destinations. Except ignore ones
1326 which are function returns as we assume the return stack
1327 predictor never mispredicts. */
1328 if ((sbIn
->jumpkind
== Ijk_Boring
) || (sbIn
->jumpkind
== Ijk_Call
)) {
1329 if (0) { ppIRExpr( sbIn
->next
); VG_(printf
)("\n"); }
1330 switch (sbIn
->next
->tag
) {
1332 break; /* boring - branch to known address */
1334 /* looks like an indirect branch (branch to unknown) */
1335 addEvent_Bi( &cgs
, curr_inode
, sbIn
->next
);
1338 /* shouldn't happen - if the incoming IR is properly
1339 flattened, should only have tmp and const cases to
1345 /* At the end of the bb. Flush outstandings. */
1346 flushEvents( &cgs
);
1348 /* done. stay sane ... */
1349 tl_assert(cgs
.sbInfo_i
== cgs
.sbInfo
->n_instrs
);
1352 VG_(printf
)( "goto {");
1353 ppIRJumpKind(sbIn
->jumpkind
);
1355 ppIRExpr( sbIn
->next
);
1356 VG_(printf
)( "}\n");
1362 /*------------------------------------------------------------*/
1363 /*--- Cache configuration ---*/
1364 /*------------------------------------------------------------*/
1366 static cache_t clo_I1_cache
= UNDEFINED_CACHE
;
1367 static cache_t clo_D1_cache
= UNDEFINED_CACHE
;
1368 static cache_t clo_LL_cache
= UNDEFINED_CACHE
;
1370 /*------------------------------------------------------------*/
1371 /*--- cg_fini() and related function ---*/
1372 /*------------------------------------------------------------*/
1374 // Total reads/writes/misses. Calculated during CC traversal at the end.
1376 static CacheCC Ir_total
;
1377 static CacheCC Dr_total
;
1378 static CacheCC Dw_total
;
1379 static BranchCC Bc_total
;
1380 static BranchCC Bi_total
;
1382 static void fprint_CC_table_and_calc_totals(void)
1387 HChar
*currFile
= NULL
, *currFn
= NULL
;
1390 // Setup output filename. Nb: it's important to do this now, ie. as late
1391 // as possible. If we do it at start-up and the program forks and the
1392 // output file format string contains a %p (pid) specifier, both the
1393 // parent and child will incorrectly write to the same file; this
1394 // happened in 3.3.0.
1395 HChar
* cachegrind_out_file
=
1396 VG_(expand_file_name
)("--cachegrind-out-file", clo_cachegrind_out_file
);
1398 sres
= VG_(open
)(cachegrind_out_file
, VKI_O_CREAT
|VKI_O_TRUNC
|VKI_O_WRONLY
,
1399 VKI_S_IRUSR
|VKI_S_IWUSR
);
1400 if (sr_isError(sres
)) {
1401 // If the file can't be opened for whatever reason (conflict
1402 // between multiple cachegrinded processes?), give up now.
1403 VG_(umsg
)("error: can't open cache simulation output file '%s'\n",
1404 cachegrind_out_file
);
1405 VG_(umsg
)(" ... so simulation results will be missing.\n");
1406 VG_(free
)(cachegrind_out_file
);
1410 VG_(free
)(cachegrind_out_file
);
1413 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1414 // the 2nd colon makes cg_annotate's output look nicer.
1415 VG_(sprintf
)(buf
, "desc: I1 cache: %s\n"
1416 "desc: D1 cache: %s\n"
1417 "desc: LL cache: %s\n",
1418 I1
.desc_line
, D1
.desc_line
, LL
.desc_line
);
1419 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1422 VG_(strcpy
)(buf
, "cmd:");
1423 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1424 VG_(write
)(fd
, " ", 1);
1425 VG_(write
)(fd
, VG_(args_the_exename
),
1426 VG_(strlen
)( VG_(args_the_exename
) ));
1427 for (i
= 0; i
< VG_(sizeXA
)( VG_(args_for_client
) ); i
++) {
1428 HChar
* arg
= * (HChar
**) VG_(indexXA
)( VG_(args_for_client
), i
);
1430 VG_(write
)(fd
, " ", 1);
1431 VG_(write
)(fd
, arg
, VG_(strlen
)( arg
));
1435 if (clo_cache_sim
&& clo_branch_sim
) {
1436 VG_(sprintf
)(buf
, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1439 else if (clo_cache_sim
&& !clo_branch_sim
) {
1440 VG_(sprintf
)(buf
, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1443 else if (!clo_cache_sim
&& clo_branch_sim
) {
1444 VG_(sprintf
)(buf
, "\nevents: Ir "
1448 VG_(sprintf
)(buf
, "\nevents: Ir\n");
1451 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1453 // Traverse every lineCC
1454 VG_(OSetGen_ResetIter
)(CC_table
);
1455 while ( (lineCC
= VG_(OSetGen_Next
)(CC_table
)) ) {
1456 Bool just_hit_a_new_file
= False
;
1457 // If we've hit a new file, print a "fl=" line. Note that because
1458 // each string is stored exactly once in the string table, we can use
1459 // pointer comparison rather than strcmp() to test for equality, which
1460 // is good because most of the time the comparisons are equal and so
1461 // the whole strings would have to be checked.
1462 if ( lineCC
->loc
.file
!= currFile
) {
1463 currFile
= lineCC
->loc
.file
;
1464 VG_(sprintf
)(buf
, "fl=%s\n", currFile
);
1465 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1467 just_hit_a_new_file
= True
;
1469 // If we've hit a new function, print a "fn=" line. We know to do
1470 // this when the function name changes, and also every time we hit a
1471 // new file (in which case the new function name might be the same as
1472 // in the old file, hence the just_hit_a_new_file test).
1473 if ( just_hit_a_new_file
|| lineCC
->loc
.fn
!= currFn
) {
1474 currFn
= lineCC
->loc
.fn
;
1475 VG_(sprintf
)(buf
, "fn=%s\n", currFn
);
1476 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1481 if (clo_cache_sim
&& clo_branch_sim
) {
1482 VG_(sprintf
)(buf
, "%u %llu %llu %llu"
1485 " %llu %llu %llu %llu\n",
1487 lineCC
->Ir
.a
, lineCC
->Ir
.m1
, lineCC
->Ir
.mL
,
1488 lineCC
->Dr
.a
, lineCC
->Dr
.m1
, lineCC
->Dr
.mL
,
1489 lineCC
->Dw
.a
, lineCC
->Dw
.m1
, lineCC
->Dw
.mL
,
1490 lineCC
->Bc
.b
, lineCC
->Bc
.mp
,
1491 lineCC
->Bi
.b
, lineCC
->Bi
.mp
);
1493 else if (clo_cache_sim
&& !clo_branch_sim
) {
1494 VG_(sprintf
)(buf
, "%u %llu %llu %llu"
1496 " %llu %llu %llu\n",
1498 lineCC
->Ir
.a
, lineCC
->Ir
.m1
, lineCC
->Ir
.mL
,
1499 lineCC
->Dr
.a
, lineCC
->Dr
.m1
, lineCC
->Dr
.mL
,
1500 lineCC
->Dw
.a
, lineCC
->Dw
.m1
, lineCC
->Dw
.mL
);
1502 else if (!clo_cache_sim
&& clo_branch_sim
) {
1503 VG_(sprintf
)(buf
, "%u %llu"
1504 " %llu %llu %llu %llu\n",
1507 lineCC
->Bc
.b
, lineCC
->Bc
.mp
,
1508 lineCC
->Bi
.b
, lineCC
->Bi
.mp
);
1511 VG_(sprintf
)(buf
, "%u %llu\n",
1516 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1518 // Update summary stats
1519 Ir_total
.a
+= lineCC
->Ir
.a
;
1520 Ir_total
.m1
+= lineCC
->Ir
.m1
;
1521 Ir_total
.mL
+= lineCC
->Ir
.mL
;
1522 Dr_total
.a
+= lineCC
->Dr
.a
;
1523 Dr_total
.m1
+= lineCC
->Dr
.m1
;
1524 Dr_total
.mL
+= lineCC
->Dr
.mL
;
1525 Dw_total
.a
+= lineCC
->Dw
.a
;
1526 Dw_total
.m1
+= lineCC
->Dw
.m1
;
1527 Dw_total
.mL
+= lineCC
->Dw
.mL
;
1528 Bc_total
.b
+= lineCC
->Bc
.b
;
1529 Bc_total
.mp
+= lineCC
->Bc
.mp
;
1530 Bi_total
.b
+= lineCC
->Bi
.b
;
1531 Bi_total
.mp
+= lineCC
->Bi
.mp
;
1536 // Summary stats must come after rest of table, since we calculate them
1537 // during traversal. */
1538 if (clo_cache_sim
&& clo_branch_sim
) {
1539 VG_(sprintf
)(buf
, "summary:"
1543 " %llu %llu %llu %llu\n",
1544 Ir_total
.a
, Ir_total
.m1
, Ir_total
.mL
,
1545 Dr_total
.a
, Dr_total
.m1
, Dr_total
.mL
,
1546 Dw_total
.a
, Dw_total
.m1
, Dw_total
.mL
,
1547 Bc_total
.b
, Bc_total
.mp
,
1548 Bi_total
.b
, Bi_total
.mp
);
1550 else if (clo_cache_sim
&& !clo_branch_sim
) {
1551 VG_(sprintf
)(buf
, "summary:"
1554 " %llu %llu %llu\n",
1555 Ir_total
.a
, Ir_total
.m1
, Ir_total
.mL
,
1556 Dr_total
.a
, Dr_total
.m1
, Dr_total
.mL
,
1557 Dw_total
.a
, Dw_total
.m1
, Dw_total
.mL
);
1559 else if (!clo_cache_sim
&& clo_branch_sim
) {
1560 VG_(sprintf
)(buf
, "summary:"
1562 " %llu %llu %llu %llu\n",
1564 Bc_total
.b
, Bc_total
.mp
,
1565 Bi_total
.b
, Bi_total
.mp
);
1568 VG_(sprintf
)(buf
, "summary:"
1573 VG_(write
)(fd
, (void*)buf
, VG_(strlen
)(buf
));
1577 static UInt
ULong_width(ULong n
)
1585 return w
+ (w
-1)/3; // add space for commas
1588 static void cg_fini(Int exitcode
)
1590 static HChar buf1
[128], buf2
[128], buf3
[128], buf4
[123];
1591 static HChar fmt
[128];
1595 ULong LL_total_m
, LL_total_mr
, LL_total_mw
,
1596 LL_total
, LL_total_r
, LL_total_w
;
1599 fprint_CC_table_and_calc_totals();
1601 if (VG_(clo_verbosity
) == 0)
1604 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1605 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1607 /* I cache results. Use the I_refs value to determine the first column
1609 l1
= ULong_width(Ir_total
.a
);
1610 l2
= ULong_width(CG_MAX(Dr_total
.a
, Bc_total
.b
));
1611 l3
= ULong_width(CG_MAX(Dw_total
.a
, Bi_total
.b
));
1613 /* Make format string, getting width right for numbers */
1614 VG_(sprintf
)(fmt
, "%%s %%,%dllu\n", l1
);
1616 /* Always print this */
1617 VG_(umsg
)(fmt
, "I refs: ", Ir_total
.a
);
1619 /* If cache profiling is enabled, show D access numbers and all
1621 if (clo_cache_sim
) {
1622 VG_(umsg
)(fmt
, "I1 misses: ", Ir_total
.m1
);
1623 VG_(umsg
)(fmt
, "LLi misses: ", Ir_total
.mL
);
1625 if (0 == Ir_total
.a
) Ir_total
.a
= 1;
1626 VG_(percentify
)(Ir_total
.m1
, Ir_total
.a
, 2, l1
+1, buf1
);
1627 VG_(umsg
)("I1 miss rate: %s\n", buf1
);
1629 VG_(percentify
)(Ir_total
.mL
, Ir_total
.a
, 2, l1
+1, buf1
);
1630 VG_(umsg
)("LLi miss rate: %s\n", buf1
);
1633 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1634 * determine the width of columns 2 & 3. */
1635 D_total
.a
= Dr_total
.a
+ Dw_total
.a
;
1636 D_total
.m1
= Dr_total
.m1
+ Dw_total
.m1
;
1637 D_total
.mL
= Dr_total
.mL
+ Dw_total
.mL
;
1639 /* Make format string, getting width right for numbers */
1640 VG_(sprintf
)(fmt
, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1643 VG_(umsg
)(fmt
, "D refs: ",
1644 D_total
.a
, Dr_total
.a
, Dw_total
.a
);
1645 VG_(umsg
)(fmt
, "D1 misses: ",
1646 D_total
.m1
, Dr_total
.m1
, Dw_total
.m1
);
1647 VG_(umsg
)(fmt
, "LLd misses: ",
1648 D_total
.mL
, Dr_total
.mL
, Dw_total
.mL
);
1650 if (0 == D_total
.a
) D_total
.a
= 1;
1651 if (0 == Dr_total
.a
) Dr_total
.a
= 1;
1652 if (0 == Dw_total
.a
) Dw_total
.a
= 1;
1653 VG_(percentify
)( D_total
.m1
, D_total
.a
, 1, l1
+1, buf1
);
1654 VG_(percentify
)(Dr_total
.m1
, Dr_total
.a
, 1, l2
+1, buf2
);
1655 VG_(percentify
)(Dw_total
.m1
, Dw_total
.a
, 1, l3
+1, buf3
);
1656 VG_(umsg
)("D1 miss rate: %s (%s + %s )\n", buf1
, buf2
,buf3
);
1658 VG_(percentify
)( D_total
.mL
, D_total
.a
, 1, l1
+1, buf1
);
1659 VG_(percentify
)(Dr_total
.mL
, Dr_total
.a
, 1, l2
+1, buf2
);
1660 VG_(percentify
)(Dw_total
.mL
, Dw_total
.a
, 1, l3
+1, buf3
);
1661 VG_(umsg
)("LLd miss rate: %s (%s + %s )\n", buf1
, buf2
,buf3
);
1664 /* LL overall results */
1666 LL_total
= Dr_total
.m1
+ Dw_total
.m1
+ Ir_total
.m1
;
1667 LL_total_r
= Dr_total
.m1
+ Ir_total
.m1
;
1668 LL_total_w
= Dw_total
.m1
;
1669 VG_(umsg
)(fmt
, "LL refs: ",
1670 LL_total
, LL_total_r
, LL_total_w
);
1672 LL_total_m
= Dr_total
.mL
+ Dw_total
.mL
+ Ir_total
.mL
;
1673 LL_total_mr
= Dr_total
.mL
+ Ir_total
.mL
;
1674 LL_total_mw
= Dw_total
.mL
;
1675 VG_(umsg
)(fmt
, "LL misses: ",
1676 LL_total_m
, LL_total_mr
, LL_total_mw
);
1678 VG_(percentify
)(LL_total_m
, (Ir_total
.a
+ D_total
.a
), 1, l1
+1, buf1
);
1679 VG_(percentify
)(LL_total_mr
, (Ir_total
.a
+ Dr_total
.a
), 1, l2
+1, buf2
);
1680 VG_(percentify
)(LL_total_mw
, Dw_total
.a
, 1, l3
+1, buf3
);
1681 VG_(umsg
)("LL miss rate: %s (%s + %s )\n", buf1
, buf2
,buf3
);
1684 /* If branch profiling is enabled, show branch overall results. */
1685 if (clo_branch_sim
) {
1686 /* Make format string, getting width right for numbers */
1687 VG_(sprintf
)(fmt
, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1690 if (0 == Bc_total
.b
) Bc_total
.b
= 1;
1691 if (0 == Bi_total
.b
) Bi_total
.b
= 1;
1692 B_total
.b
= Bc_total
.b
+ Bi_total
.b
;
1693 B_total
.mp
= Bc_total
.mp
+ Bi_total
.mp
;
1696 VG_(umsg
)(fmt
, "Branches: ",
1697 B_total
.b
, Bc_total
.b
, Bi_total
.b
);
1699 VG_(umsg
)(fmt
, "Mispredicts: ",
1700 B_total
.mp
, Bc_total
.mp
, Bi_total
.mp
);
1702 VG_(percentify
)(B_total
.mp
, B_total
.b
, 1, l1
+1, buf1
);
1703 VG_(percentify
)(Bc_total
.mp
, Bc_total
.b
, 1, l2
+1, buf2
);
1704 VG_(percentify
)(Bi_total
.mp
, Bi_total
.b
, 1, l3
+1, buf3
);
1706 VG_(umsg
)("Mispred rate: %s (%s + %s )\n", buf1
, buf2
,buf3
);
1710 if (VG_(clo_stats
)) {
1711 Int debug_lookups
= full_debugs
+ fn_debugs
+
1712 file_line_debugs
+ no_debugs
;
1715 VG_(dmsg
)("cachegrind: distinct files : %d\n", distinct_files
);
1716 VG_(dmsg
)("cachegrind: distinct functions : %d\n", distinct_fns
);
1717 VG_(dmsg
)("cachegrind: distinct lines : %d\n", distinct_lines
);
1718 VG_(dmsg
)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX
);
1719 VG_(dmsg
)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen
);
1720 VG_(dmsg
)("cachegrind: debug lookups : %d\n", debug_lookups
);
1722 VG_(percentify
)(full_debugs
, debug_lookups
, 1, 6, buf1
);
1723 VG_(percentify
)(file_line_debugs
, debug_lookups
, 1, 6, buf2
);
1724 VG_(percentify
)(fn_debugs
, debug_lookups
, 1, 6, buf3
);
1725 VG_(percentify
)(no_debugs
, debug_lookups
, 1, 6, buf4
);
1726 VG_(dmsg
)("cachegrind: with full info:%s (%d)\n",
1728 VG_(dmsg
)("cachegrind: with file/line info:%s (%d)\n",
1729 buf2
, file_line_debugs
);
1730 VG_(dmsg
)("cachegrind: with fn name info:%s (%d)\n",
1732 VG_(dmsg
)("cachegrind: with zero info:%s (%d)\n",
1735 VG_(dmsg
)("cachegrind: string table size: %lu\n",
1736 VG_(OSetGen_Size
)(stringTable
));
1737 VG_(dmsg
)("cachegrind: CC table size: %lu\n",
1738 VG_(OSetGen_Size
)(CC_table
));
1739 VG_(dmsg
)("cachegrind: InstrInfo table size: %lu\n",
1740 VG_(OSetGen_Size
)(instrInfoTable
));
1744 /*--------------------------------------------------------------------*/
1745 /*--- Discarding BB info ---*/
1746 /*--------------------------------------------------------------------*/
1748 // Called when a translation is removed from the translation cache for
1749 // any reason at all: to free up space, because the guest code was
1750 // unmapped or modified, or for any arbitrary reason.
1752 void cg_discard_superblock_info ( Addr64 orig_addr64
, VexGuestExtents vge
)
1755 Addr orig_addr
= (Addr
)vge
.base
[0];
1757 tl_assert(vge
.n_used
> 0);
1760 VG_(printf
)( "discard_basic_block_info: %p, %p, %llu\n",
1761 (void*)(Addr
)orig_addr
,
1762 (void*)(Addr
)vge
.base
[0], (ULong
)vge
.len
[0]);
1764 // Get BB info, remove from table, free BB info. Simple! Note that we
1765 // use orig_addr, not the first instruction address in vge.
1766 sbInfo
= VG_(OSetGen_Remove
)(instrInfoTable
, &orig_addr
);
1767 tl_assert(NULL
!= sbInfo
);
1768 VG_(OSetGen_FreeNode
)(instrInfoTable
, sbInfo
);
1771 /*--------------------------------------------------------------------*/
1772 /*--- Command line processing ---*/
1773 /*--------------------------------------------------------------------*/
1775 static Bool
cg_process_cmd_line_option(const HChar
* arg
)
1777 if (VG_(str_clo_cache_opt
)(arg
,
1782 else if VG_STR_CLO( arg
, "--cachegrind-out-file", clo_cachegrind_out_file
) {}
1783 else if VG_BOOL_CLO(arg
, "--cache-sim", clo_cache_sim
) {}
1784 else if VG_BOOL_CLO(arg
, "--branch-sim", clo_branch_sim
) {}
1791 static void cg_print_usage(void)
1793 VG_(print_cache_clo_opts
)();
1795 " --cache-sim=yes|no [yes] collect cache stats?\n"
1796 " --branch-sim=yes|no [no] collect branch prediction stats?\n"
1797 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1801 static void cg_print_debug_usage(void)
1808 /*--------------------------------------------------------------------*/
1810 /*--------------------------------------------------------------------*/
1812 static void cg_post_clo_init(void); /* just below */
1814 static void cg_pre_clo_init(void)
1816 VG_(details_name
) ("Cachegrind");
1817 VG_(details_version
) (NULL
);
1818 VG_(details_description
) ("a cache and branch-prediction profiler");
1819 VG_(details_copyright_author
)(
1820 "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.");
1821 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
1822 VG_(details_avg_translation_sizeB
) ( 500 );
1824 VG_(clo_vex_control
).iropt_register_updates
1825 = VexRegUpdSpAtMemAccess
; // overridable by the user.
1826 VG_(basic_tool_funcs
) (cg_post_clo_init
,
1830 VG_(needs_superblock_discards
)(cg_discard_superblock_info
);
1831 VG_(needs_command_line_options
)(cg_process_cmd_line_option
,
1833 cg_print_debug_usage
);
1836 static void cg_post_clo_init(void)
1838 cache_t I1c
, D1c
, LLc
;
1841 VG_(OSetGen_Create
)(offsetof(LineCC
, loc
),
1843 VG_(malloc
), "cg.main.cpci.1",
1846 VG_(OSetGen_Create
)(/*keyOff*/0,
1848 VG_(malloc
), "cg.main.cpci.2",
1851 VG_(OSetGen_Create
)(/*keyOff*/0,
1853 VG_(malloc
), "cg.main.cpci.3",
1856 VG_(post_clo_init_configure_caches
)(&I1c
, &D1c
, &LLc
,
1861 // min_line_size is used to make sure that we never feed
1862 // accesses to the simulator straddling more than two
1863 // cache lines at any cache level
1864 min_line_size
= (I1c
.line_size
< D1c
.line_size
) ? I1c
.line_size
: D1c
.line_size
;
1865 min_line_size
= (LLc
.line_size
< min_line_size
) ? LLc
.line_size
: min_line_size
;
1867 Int largest_load_or_store_size
1868 = VG_(machine_get_size_of_largest_guest_register
)();
1869 if (min_line_size
< largest_load_or_store_size
) {
1870 /* We can't continue, because the cache simulation might
1871 straddle more than 2 lines, and it will assert. So let's
1872 just stop before we start. */
1873 VG_(umsg
)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1874 (Int
)min_line_size
);
1875 VG_(umsg
)(" must be equal to or larger than the maximum register size (%d)\n",
1876 largest_load_or_store_size
);
1877 VG_(umsg
)(" but it is not. Exiting now.\n");
1881 cachesim_initcaches(I1c
, D1c
, LLc
);
1884 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init
)
1886 /*--------------------------------------------------------------------*/
1888 /*--------------------------------------------------------------------*/