Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / cachegrind / cg_main.c
blobe8f534c98b29cb196f7e0e8bc2225cec4459c370
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
4 /*--- cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Cachegrind, a high-precision tracing profiler
9 built with Valgrind.
11 Copyright (C) 2002-2023 Nicholas Nethercote
12 njn@valgrind.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_tool_basics.h"
31 #include "pub_tool_debuginfo.h"
32 #include "pub_tool_libcbase.h"
33 #include "pub_tool_libcassert.h"
34 #include "pub_tool_libcfile.h"
35 #include "pub_tool_libcprint.h"
36 #include "pub_tool_libcproc.h"
37 #include "pub_tool_mallocfree.h"
38 #include "pub_tool_options.h"
39 #include "pub_tool_oset.h"
40 #include "pub_tool_tooliface.h"
41 #include "pub_tool_transtab.h"
42 #include "pub_tool_xarray.h"
43 #include "pub_tool_clientstate.h"
44 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
46 #include "cachegrind.h"
47 #include "cg_arch.h"
48 #include "cg_sim.c"
49 #include "cg_branchpred.c"
51 /*------------------------------------------------------------*/
52 /*--- Constants ---*/
53 /*------------------------------------------------------------*/
55 /* Set to 1 for very verbose debugging */
56 #define DEBUG_CG 0
58 /*------------------------------------------------------------*/
59 /*--- Options ---*/
60 /*------------------------------------------------------------*/
62 static Bool clo_cache_sim = False; /* do cache simulation? */
63 static Bool clo_branch_sim = False; /* do branch simulation? */
64 static Bool clo_instr_at_start = True; /* instrument at startup? */
65 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
67 /*------------------------------------------------------------*/
68 /*--- Cachesim configuration ---*/
69 /*------------------------------------------------------------*/
71 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
73 /*------------------------------------------------------------*/
74 /*--- Types and Data Structures ---*/
75 /*------------------------------------------------------------*/
77 typedef
78 struct {
79 ULong a; /* total # memory accesses of this kind */
80 ULong m1; /* misses in the first level cache */
81 ULong mL; /* misses in the second level cache */
83 CacheCC;
85 typedef
86 struct {
87 ULong b; /* total # branches of this kind */
88 ULong mp; /* number of branches mispredicted */
90 BranchCC;
92 //------------------------------------------------------------
93 // Primary data structure #1: CC table
94 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
95 // - an ordered set of CCs. CC indexing done by file/function/line (as
96 // determined from the instrAddr).
97 // - Traversed for dumping stats at end in file/func/line hierarchy.
99 typedef struct {
100 HChar* file;
101 const HChar* fn;
102 Int line;
104 CodeLoc;
106 typedef struct {
107 CodeLoc loc; /* Source location that these counts pertain to */
108 CacheCC Ir; /* Insn read counts */
109 CacheCC Dr; /* Data read counts */
110 CacheCC Dw; /* Data write/modify counts */
111 BranchCC Bc; /* Conditional branch counts */
112 BranchCC Bi; /* Indirect branch counts */
113 } LineCC;
115 // First compare file, then fn, then line.
116 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
118 Word res;
119 const CodeLoc* a = (const CodeLoc*)vloc;
120 const CodeLoc* b = &(((const LineCC*)vcc)->loc);
122 res = VG_(strcmp)(a->file, b->file);
123 if (0 != res)
124 return res;
126 res = VG_(strcmp)(a->fn, b->fn);
127 if (0 != res)
128 return res;
130 return a->line - b->line;
133 static OSet* CC_table;
135 //------------------------------------------------------------
136 // Primary data structure #2: InstrInfo table
137 // - Holds the cached info about each instr that is used for simulation.
138 // - table(SB_start_addr, list(InstrInfo))
139 // - For each SB, each InstrInfo in the list holds info about the
140 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
141 // CC. This node is what's passed to the simulation function.
142 // - When SBs are discarded the relevant list(instr_details) is freed.
144 typedef struct _InstrInfo InstrInfo;
145 struct _InstrInfo {
146 Addr instr_addr;
147 UChar instr_len;
148 LineCC* parent; // parent line-CC
151 typedef struct _SB_info SB_info;
152 struct _SB_info {
153 Addr SB_addr; // key; MUST BE FIRST
154 Int n_instrs;
155 InstrInfo instrs[0];
158 static OSet* instrInfoTable;
160 //------------------------------------------------------------
161 // Secondary data structure: string table
162 // - holds strings, avoiding dups
163 // - used for filenames and function names, each of which will be
164 // pointed to by one or more CCs.
165 // - it also allows equality checks just by pointer comparison, which
166 // is good when printing the output file at the end.
168 static OSet* stringTable;
170 //------------------------------------------------------------
171 // Stats
172 static Int distinct_files = 0;
173 static Int distinct_fns = 0;
174 static Int distinct_lines = 0;
175 static Int distinct_instrsGen = 0;
176 static Int distinct_instrsNoX = 0;
178 static Int full_debugs = 0;
179 static Int file_line_debugs = 0;
180 static Int fn_debugs = 0;
181 static Int no_debugs = 0;
183 //------------------------------------------------------------
184 // Instrumentation control
185 static Bool instr_enabled = True;
187 /*------------------------------------------------------------*/
188 /*--- String table operations ---*/
189 /*------------------------------------------------------------*/
191 static Word stringCmp( const void* key, const void* elem )
193 return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
196 // Get a permanent string; either pull it out of the string table if it's
197 // been encountered before, or dup it and put it into the string table.
198 static HChar* get_perm_string(const HChar* s)
200 HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
201 if (s_ptr) {
202 return *s_ptr;
203 } else {
204 HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
205 *s_node = VG_(strdup)("cg.main.gps.1", s);
206 VG_(OSetGen_Insert)(stringTable, s_node);
207 return *s_node;
211 /*------------------------------------------------------------*/
212 /*--- CC table operations ---*/
213 /*------------------------------------------------------------*/
215 static void get_debug_info(Addr instr_addr, const HChar **dir,
216 const HChar **file, const HChar **fn, UInt* line)
218 DiEpoch ep = VG_(current_DiEpoch)();
219 Bool found_file_line = VG_(get_filename_linenum)(
221 instr_addr,
222 file, dir,
223 line
225 Bool found_fn = VG_(get_fnname)(ep, instr_addr, fn);
227 if (!found_file_line) {
228 *file = "???";
229 *line = 0;
231 if (!found_fn) {
232 *fn = "???";
235 if (found_file_line) {
236 if (found_fn) full_debugs++;
237 else file_line_debugs++;
238 } else {
239 if (found_fn) fn_debugs++;
240 else no_debugs++;
244 // Do a three step traversal: by file, then fn, then line.
245 // Returns a pointer to the line CC, creates a new one if necessary.
246 static LineCC* get_lineCC(Addr origAddr)
248 const HChar *fn, *file, *dir;
249 UInt line;
250 CodeLoc loc;
251 LineCC* lineCC;
253 get_debug_info(origAddr, &dir, &file, &fn, &line);
255 // Form an absolute pathname if a directory is available
256 HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
258 if (dir[0]) {
259 VG_(sprintf)(absfile, "%s/%s", dir, file);
260 } else {
261 VG_(sprintf)(absfile, "%s", file);
264 loc.file = absfile;
265 loc.fn = fn;
266 loc.line = line;
268 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
269 if (!lineCC) {
270 // Allocate and zero a new node.
271 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
272 lineCC->loc.file = get_perm_string(loc.file);
273 lineCC->loc.fn = get_perm_string(loc.fn);
274 lineCC->loc.line = loc.line;
275 lineCC->Ir.a = 0;
276 lineCC->Ir.m1 = 0;
277 lineCC->Ir.mL = 0;
278 lineCC->Dr.a = 0;
279 lineCC->Dr.m1 = 0;
280 lineCC->Dr.mL = 0;
281 lineCC->Dw.a = 0;
282 lineCC->Dw.m1 = 0;
283 lineCC->Dw.mL = 0;
284 lineCC->Bc.b = 0;
285 lineCC->Bc.mp = 0;
286 lineCC->Bi.b = 0;
287 lineCC->Bi.mp = 0;
288 VG_(OSetGen_Insert)(CC_table, lineCC);
291 return lineCC;
294 /*------------------------------------------------------------*/
295 /*--- Cache simulation functions ---*/
296 /*------------------------------------------------------------*/
298 /* A common case for an instruction read event is that the
299 * bytes read belong to the same cache line in both L1I and LL
300 * (if cache line sizes of L1 and LL are the same).
301 * As this can be detected at instrumentation time, and results
302 * in faster simulation, special-casing is benefical.
304 * Abbreviations used in var/function names:
305 * IrNoX - instruction read does not cross cache lines
306 * IrGen - generic instruction read; not detected as IrNoX
307 * Ir - not known / not important whether it is an IrNoX
310 // Only used with --cache-sim=no.
311 static VG_REGPARM(1)
312 void log_1Ir(InstrInfo* n)
314 n->parent->Ir.a++;
317 // Only used with --cache-sim=no.
318 static VG_REGPARM(2)
319 void log_2Ir(InstrInfo* n, InstrInfo* n2)
321 n->parent->Ir.a++;
322 n2->parent->Ir.a++;
325 // Only used with --cache-sim=no.
326 static VG_REGPARM(3)
327 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
329 n->parent->Ir.a++;
330 n2->parent->Ir.a++;
331 n3->parent->Ir.a++;
334 // Generic case for instruction reads: may cross cache lines.
335 // All other Ir handlers expect IrNoX instruction reads.
336 static VG_REGPARM(1)
337 void log_1IrGen_0D_cache_access(InstrInfo* n)
339 //VG_(printf)("1IrGen_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
340 // n, n->instr_addr, n->instr_len);
341 cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
342 &n->parent->Ir.m1, &n->parent->Ir.mL);
343 n->parent->Ir.a++;
346 static VG_REGPARM(1)
347 void log_1IrNoX_0D_cache_access(InstrInfo* n)
349 //VG_(printf)("1IrNoX_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
350 // n, n->instr_addr, n->instr_len);
351 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
352 &n->parent->Ir.m1, &n->parent->Ir.mL);
353 n->parent->Ir.a++;
356 static VG_REGPARM(2)
357 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
359 //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
360 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
361 // n, n->instr_addr, n->instr_len,
362 // n2, n2->instr_addr, n2->instr_len);
363 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
364 &n->parent->Ir.m1, &n->parent->Ir.mL);
365 n->parent->Ir.a++;
366 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
367 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
368 n2->parent->Ir.a++;
371 static VG_REGPARM(3)
372 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
374 //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
375 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
376 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
377 // n, n->instr_addr, n->instr_len,
378 // n2, n2->instr_addr, n2->instr_len,
379 // n3, n3->instr_addr, n3->instr_len);
380 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
381 &n->parent->Ir.m1, &n->parent->Ir.mL);
382 n->parent->Ir.a++;
383 cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
384 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
385 n2->parent->Ir.a++;
386 cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
387 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
388 n3->parent->Ir.a++;
391 static VG_REGPARM(3)
392 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
394 //VG_(printf)("1IrNoX_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
395 // " daddr=0x%010lx, dsize=%lu\n",
396 // n, n->instr_addr, n->instr_len, data_addr, data_size);
397 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
398 &n->parent->Ir.m1, &n->parent->Ir.mL);
399 n->parent->Ir.a++;
401 cachesim_D1_doref(data_addr, data_size,
402 &n->parent->Dr.m1, &n->parent->Dr.mL);
403 n->parent->Dr.a++;
406 static VG_REGPARM(3)
407 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
409 //VG_(printf)("1IrNoX_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
410 // " daddr=0x%010lx, dsize=%lu\n",
411 // n, n->instr_addr, n->instr_len, data_addr, data_size);
412 cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
413 &n->parent->Ir.m1, &n->parent->Ir.mL);
414 n->parent->Ir.a++;
416 cachesim_D1_doref(data_addr, data_size,
417 &n->parent->Dw.m1, &n->parent->Dw.mL);
418 n->parent->Dw.a++;
421 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
422 and log_0Ir_1Dw_cache_access have exactly the same prototype. If
423 you change them, you must change addEvent_D_guarded too. */
424 static VG_REGPARM(3)
425 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
427 //VG_(printf)("0Ir_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
428 // n, data_addr, data_size);
429 cachesim_D1_doref(data_addr, data_size,
430 &n->parent->Dr.m1, &n->parent->Dr.mL);
431 n->parent->Dr.a++;
434 /* See comment on log_0Ir_1Dr_cache_access. */
435 static VG_REGPARM(3)
436 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
438 //VG_(printf)("0Ir_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
439 // n, data_addr, data_size);
440 cachesim_D1_doref(data_addr, data_size,
441 &n->parent->Dw.m1, &n->parent->Dw.mL);
442 n->parent->Dw.a++;
445 /* For branches, we consult two different predictors, one which
446 predicts taken/untaken for conditional branches, and the other
447 which predicts the branch target address for indirect branches
448 (jump-to-register style ones). */
450 static VG_REGPARM(2)
451 void log_cond_branch(InstrInfo* n, Word taken)
453 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
454 // n, taken);
455 n->parent->Bc.b++;
456 n->parent->Bc.mp
457 += (1 & do_cond_branch_predict(n->instr_addr, taken));
460 static VG_REGPARM(2)
461 void log_ind_branch(InstrInfo* n, UWord actual_dst)
463 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
464 // n, actual_dst);
465 n->parent->Bi.b++;
466 n->parent->Bi.mp
467 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
471 /*------------------------------------------------------------*/
472 /*--- Instrumentation types and structures ---*/
473 /*------------------------------------------------------------*/
475 /* Maintain an ordered list of memory events which are outstanding, in
476 the sense that no IR has yet been generated to do the relevant
477 helper calls. The BB is scanned top to bottom and memory events
478 are added to the end of the list, merging with the most recent
479 notified event where possible (Dw immediately following Dr and
480 having the same size and EA can be merged).
482 This merging is done so that for architectures which have
483 load-op-store instructions (x86, amd64), the insn is treated as if
484 it makes just one memory reference (a modify), rather than two (a
485 read followed by a write at the same address).
487 At various points the list will need to be flushed, that is, IR
488 generated from it. That must happen before any possible exit from
489 the block (the end, or an IRStmt_Exit). Flushing also takes place
490 when there is no space to add a new event.
492 If we require the simulation statistics to be up to date with
493 respect to possible memory exceptions, then the list would have to
494 be flushed before each memory reference. That would however lose
495 performance by inhibiting event-merging during flushing.
497 Flushing the list consists of walking it start to end and emitting
498 instrumentation IR for each event, in the order in which they
499 appear. It may be possible to emit a single call for two adjacent
500 events in order to reduce the number of helper function calls made.
501 For example, it could well be profitable to handle two adjacent Ir
502 events with a single helper call. */
504 typedef
505 IRExpr
506 IRAtom;
508 typedef
509 enum {
510 Ev_IrNoX, // Instruction read not crossing cache lines
511 Ev_IrGen, // Generic Ir, not being detected as IrNoX
512 Ev_Dr, // Data read
513 Ev_Dw, // Data write
514 Ev_Dm, // Data modify (read then write)
515 Ev_Bc, // branch conditional
516 Ev_Bi // branch indirect (to unknown destination)
518 EventTag;
520 typedef
521 struct {
522 EventTag tag;
523 InstrInfo* inode;
524 union {
525 struct {
526 } IrGen;
527 struct {
528 } IrNoX;
529 struct {
530 IRAtom* ea;
531 Int szB;
532 } Dr;
533 struct {
534 IRAtom* ea;
535 Int szB;
536 } Dw;
537 struct {
538 IRAtom* ea;
539 Int szB;
540 } Dm;
541 struct {
542 IRAtom* taken; /* :: Ity_I1 */
543 } Bc;
544 struct {
545 IRAtom* dst;
546 } Bi;
547 } Ev;
549 Event;
551 static void init_Event ( Event* ev ) {
552 VG_(memset)(ev, 0, sizeof(Event));
555 static IRAtom* get_Event_dea ( Event* ev ) {
556 switch (ev->tag) {
557 case Ev_Dr: return ev->Ev.Dr.ea;
558 case Ev_Dw: return ev->Ev.Dw.ea;
559 case Ev_Dm: return ev->Ev.Dm.ea;
560 default: tl_assert(0);
564 static Int get_Event_dszB ( Event* ev ) {
565 switch (ev->tag) {
566 case Ev_Dr: return ev->Ev.Dr.szB;
567 case Ev_Dw: return ev->Ev.Dw.szB;
568 case Ev_Dm: return ev->Ev.Dm.szB;
569 default: tl_assert(0);
574 /* Up to this many unnotified events are allowed. Number is
575 arbitrary. Larger numbers allow more event merging to occur, but
576 potentially induce more spilling due to extending live ranges of
577 address temporaries. */
578 #define N_EVENTS 16
581 /* A struct which holds all the running state during instrumentation.
582 Mostly to avoid passing loads of parameters everywhere. */
583 typedef
584 struct {
585 /* The current outstanding-memory-event list. */
586 Event events[N_EVENTS];
587 Int events_used;
589 /* The array of InstrInfo bins for the BB. */
590 SB_info* sbInfo;
592 /* Number InstrInfo bins 'used' so far. */
593 Int sbInfo_i;
595 /* The output SB being constructed. */
596 IRSB* sbOut;
598 CgState;
601 /*------------------------------------------------------------*/
602 /*--- Instrumentation main ---*/
603 /*------------------------------------------------------------*/
605 // Note that origAddr is the real origAddr, not the address of the first
606 // instruction in the block (they can be different due to redirection).
607 static
608 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
610 Int i, n_instrs;
611 IRStmt* st;
612 SB_info* sbInfo;
614 // Count number of original instrs in SB
615 n_instrs = 0;
616 for (i = 0; i < sbIn->stmts_used; i++) {
617 st = sbIn->stmts[i];
618 if (Ist_IMark == st->tag) n_instrs++;
621 // Check that we don't have an entry for this BB in the instr-info table.
622 // If this assertion fails, there has been some screwup: some
623 // translations must have been discarded but Cachegrind hasn't discarded
624 // the corresponding entries in the instr-info table.
625 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
626 tl_assert(NULL == sbInfo);
628 // BB never translated before (at this address, at least; could have
629 // been unloaded and then reloaded elsewhere in memory)
630 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
631 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
632 sbInfo->SB_addr = origAddr;
633 sbInfo->n_instrs = n_instrs;
634 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
636 return sbInfo;
640 static void showEvent ( Event* ev )
642 switch (ev->tag) {
643 case Ev_IrGen:
644 VG_(printf)("IrGen %p\n", ev->inode);
645 break;
646 case Ev_IrNoX:
647 VG_(printf)("IrNoX %p\n", ev->inode);
648 break;
649 case Ev_Dr:
650 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
651 ppIRExpr(ev->Ev.Dr.ea);
652 VG_(printf)("\n");
653 break;
654 case Ev_Dw:
655 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
656 ppIRExpr(ev->Ev.Dw.ea);
657 VG_(printf)("\n");
658 break;
659 case Ev_Dm:
660 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
661 ppIRExpr(ev->Ev.Dm.ea);
662 VG_(printf)("\n");
663 break;
664 case Ev_Bc:
665 VG_(printf)("Bc %p GA=", ev->inode);
666 ppIRExpr(ev->Ev.Bc.taken);
667 VG_(printf)("\n");
668 break;
669 case Ev_Bi:
670 VG_(printf)("Bi %p DST=", ev->inode);
671 ppIRExpr(ev->Ev.Bi.dst);
672 VG_(printf)("\n");
673 break;
674 default:
675 tl_assert(0);
676 break;
680 // Reserve and initialise an InstrInfo for the first mention of a new insn.
681 static
682 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
684 InstrInfo* i_node;
685 tl_assert(cgs->sbInfo_i >= 0);
686 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
687 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
688 i_node->instr_addr = instr_addr;
689 i_node->instr_len = instr_len;
690 i_node->parent = get_lineCC(instr_addr);
691 cgs->sbInfo_i++;
692 return i_node;
696 /* Generate code for all outstanding memory events, and mark the queue
697 empty. Code is generated into cgs->bbOut, and this activity
698 'consumes' slots in cgs->sbInfo. */
700 static void flushEvents ( CgState* cgs )
702 Int i, regparms;
703 const HChar* helperName;
704 void* helperAddr;
705 IRExpr** argv;
706 IRExpr* i_node_expr;
707 IRDirty* di;
708 Event* ev;
709 Event* ev2;
710 Event* ev3;
712 i = 0;
713 while (i < cgs->events_used) {
715 helperName = NULL;
716 helperAddr = NULL;
717 argv = NULL;
718 regparms = 0;
720 /* generate IR to notify event i and possibly the ones
721 immediately following it. */
722 tl_assert(i >= 0 && i < cgs->events_used);
724 ev = &cgs->events[i];
725 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
726 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
728 if (DEBUG_CG) {
729 VG_(printf)(" flush ");
730 showEvent( ev );
733 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
735 /* Decide on helper fn to call and args to pass it, and advance
736 i appropriately. */
737 switch (ev->tag) {
738 case Ev_IrNoX:
739 /* Merge an IrNoX with a following Dr/Dm. */
740 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
741 /* Why is this true? It's because we're merging an Ir
742 with a following Dr or Dm. The Ir derives from the
743 instruction's IMark and the Dr/Dm from data
744 references which follow it. In short it holds
745 because each insn starts with an IMark, hence an
746 Ev_Ir, and so these Dr/Dm must pertain to the
747 immediately preceding Ir. Same applies to analogous
748 assertions in the subsequent cases. */
749 tl_assert(ev2->inode == ev->inode);
750 helperName = "log_1IrNoX_1Dr_cache_access";
751 helperAddr = &log_1IrNoX_1Dr_cache_access;
752 argv = mkIRExprVec_3( i_node_expr,
753 get_Event_dea(ev2),
754 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
755 regparms = 3;
756 i += 2;
758 /* Merge an IrNoX with a following Dw. */
759 else
760 if (ev2 && ev2->tag == Ev_Dw) {
761 tl_assert(ev2->inode == ev->inode);
762 helperName = "log_1IrNoX_1Dw_cache_access";
763 helperAddr = &log_1IrNoX_1Dw_cache_access;
764 argv = mkIRExprVec_3( i_node_expr,
765 get_Event_dea(ev2),
766 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
767 regparms = 3;
768 i += 2;
770 /* Merge an IrNoX with two following IrNoX's. */
771 else
772 if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
774 if (clo_cache_sim) {
775 helperName = "log_3IrNoX_0D_cache_access";
776 helperAddr = &log_3IrNoX_0D_cache_access;
777 } else {
778 helperName = "log_3Ir";
779 helperAddr = &log_3Ir;
781 argv = mkIRExprVec_3( i_node_expr,
782 mkIRExpr_HWord( (HWord)ev2->inode ),
783 mkIRExpr_HWord( (HWord)ev3->inode ) );
784 regparms = 3;
785 i += 3;
787 /* Merge an IrNoX with one following IrNoX. */
788 else
789 if (ev2 && ev2->tag == Ev_IrNoX) {
790 if (clo_cache_sim) {
791 helperName = "log_2IrNoX_0D_cache_access";
792 helperAddr = &log_2IrNoX_0D_cache_access;
793 } else {
794 helperName = "log_2Ir";
795 helperAddr = &log_2Ir;
797 argv = mkIRExprVec_2( i_node_expr,
798 mkIRExpr_HWord( (HWord)ev2->inode ) );
799 regparms = 2;
800 i += 2;
802 /* No merging possible; emit as-is. */
803 else {
804 if (clo_cache_sim) {
805 helperName = "log_1IrNoX_0D_cache_access";
806 helperAddr = &log_1IrNoX_0D_cache_access;
807 } else {
808 helperName = "log_1Ir";
809 helperAddr = &log_1Ir;
811 argv = mkIRExprVec_1( i_node_expr );
812 regparms = 1;
813 i++;
815 break;
816 case Ev_IrGen:
817 if (clo_cache_sim) {
818 helperName = "log_1IrGen_0D_cache_access";
819 helperAddr = &log_1IrGen_0D_cache_access;
820 } else {
821 helperName = "log_1Ir";
822 helperAddr = &log_1Ir;
824 argv = mkIRExprVec_1( i_node_expr );
825 regparms = 1;
826 i++;
827 break;
828 case Ev_Dr:
829 case Ev_Dm:
830 /* Data read or modify */
831 helperName = "log_0Ir_1Dr_cache_access";
832 helperAddr = &log_0Ir_1Dr_cache_access;
833 argv = mkIRExprVec_3( i_node_expr,
834 get_Event_dea(ev),
835 mkIRExpr_HWord( get_Event_dszB(ev) ) );
836 regparms = 3;
837 i++;
838 break;
839 case Ev_Dw:
840 /* Data write */
841 helperName = "log_0Ir_1Dw_cache_access";
842 helperAddr = &log_0Ir_1Dw_cache_access;
843 argv = mkIRExprVec_3( i_node_expr,
844 get_Event_dea(ev),
845 mkIRExpr_HWord( get_Event_dszB(ev) ) );
846 regparms = 3;
847 i++;
848 break;
849 case Ev_Bc:
850 /* Conditional branch */
851 helperName = "log_cond_branch";
852 helperAddr = &log_cond_branch;
853 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
854 regparms = 2;
855 i++;
856 break;
857 case Ev_Bi:
858 /* Branch to an unknown destination */
859 helperName = "log_ind_branch";
860 helperAddr = &log_ind_branch;
861 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
862 regparms = 2;
863 i++;
864 break;
865 default:
866 tl_assert(0);
869 /* Add the helper. */
870 tl_assert(helperName);
871 tl_assert(helperAddr);
872 tl_assert(argv);
873 di = unsafeIRDirty_0_N( regparms,
874 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
875 argv );
876 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
879 cgs->events_used = 0;
882 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
884 Event* evt;
885 if (cgs->events_used == N_EVENTS)
886 flushEvents(cgs);
887 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
888 evt = &cgs->events[cgs->events_used];
889 init_Event(evt);
890 evt->inode = inode;
891 if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
892 evt->tag = Ev_IrNoX;
893 distinct_instrsNoX++;
894 } else {
895 evt->tag = Ev_IrGen;
896 distinct_instrsGen++;
898 cgs->events_used++;
901 static
902 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
904 tl_assert(isIRAtom(ea));
906 if (!clo_cache_sim)
907 return;
909 tl_assert(datasize >= 1 && datasize <= min_line_size);
911 if (cgs->events_used == N_EVENTS) {
912 flushEvents(cgs);
914 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
915 Event* evt = &cgs->events[cgs->events_used];
916 init_Event(evt);
917 evt->tag = Ev_Dr;
918 evt->inode = inode;
919 evt->Ev.Dr.szB = datasize;
920 evt->Ev.Dr.ea = ea;
921 cgs->events_used++;
924 static
925 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
927 tl_assert(isIRAtom(ea));
929 if (!clo_cache_sim)
930 return;
932 tl_assert(datasize >= 1 && datasize <= min_line_size);
934 /* Is it possible to merge this write with the preceding read? */
935 if (cgs->events_used > 0) {
936 Event* lastEvt = &cgs->events[cgs->events_used-1];
937 if ( lastEvt->tag == Ev_Dr
938 && lastEvt->Ev.Dr.szB == datasize
939 && lastEvt->inode == inode
940 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
942 lastEvt->tag = Ev_Dm;
943 return;
947 /* No. Add as normal. */
948 if (cgs->events_used == N_EVENTS)
949 flushEvents(cgs);
950 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
951 Event* evt = &cgs->events[cgs->events_used];
952 init_Event(evt);
953 evt->tag = Ev_Dw;
954 evt->inode = inode;
955 evt->Ev.Dw.szB = datasize;
956 evt->Ev.Dw.ea = ea;
957 cgs->events_used++;
960 static
961 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
962 Int datasize, IRAtom* ea, IRAtom* guard,
963 Bool isWrite )
965 tl_assert(isIRAtom(ea));
966 tl_assert(guard);
967 tl_assert(isIRAtom(guard));
969 if (!clo_cache_sim)
970 return;
972 tl_assert(datasize >= 1 && datasize <= min_line_size);
974 /* Adding guarded memory actions and merging them with the existing
975 queue is too complex. Simply flush the queue and add this
976 action immediately. Since guarded loads and stores are pretty
977 rare, this is not thought likely to cause any noticeable
978 performance loss as a result of the loss of event-merging
979 opportunities. */
980 tl_assert(cgs->events_used >= 0);
981 flushEvents(cgs);
982 tl_assert(cgs->events_used == 0);
983 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
984 IRExpr* i_node_expr;
985 const HChar* helperName;
986 void* helperAddr;
987 IRExpr** argv;
988 Int regparms;
989 IRDirty* di;
990 i_node_expr = mkIRExpr_HWord( (HWord)inode );
991 helperName = isWrite ? "log_0Ir_1Dw_cache_access"
992 : "log_0Ir_1Dr_cache_access";
993 helperAddr = isWrite ? &log_0Ir_1Dw_cache_access
994 : &log_0Ir_1Dr_cache_access;
995 argv = mkIRExprVec_3( i_node_expr,
996 ea, mkIRExpr_HWord( datasize ) );
997 regparms = 3;
998 di = unsafeIRDirty_0_N(
999 regparms,
1000 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
1001 argv );
1002 di->guard = guard;
1003 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
1007 static
1008 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
1010 Event* evt;
1011 tl_assert(isIRAtom(guard));
1012 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1013 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
1014 if (!clo_branch_sim)
1015 return;
1016 if (cgs->events_used == N_EVENTS)
1017 flushEvents(cgs);
1018 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1019 evt = &cgs->events[cgs->events_used];
1020 init_Event(evt);
1021 evt->tag = Ev_Bc;
1022 evt->inode = inode;
1023 evt->Ev.Bc.taken = guard;
1024 cgs->events_used++;
1027 static
1028 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1030 Event* evt;
1031 tl_assert(isIRAtom(whereTo));
1032 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1033 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
1034 if (!clo_branch_sim)
1035 return;
1036 if (cgs->events_used == N_EVENTS)
1037 flushEvents(cgs);
1038 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1039 evt = &cgs->events[cgs->events_used];
1040 init_Event(evt);
1041 evt->tag = Ev_Bi;
1042 evt->inode = inode;
1043 evt->Ev.Bi.dst = whereTo;
1044 cgs->events_used++;
1047 ////////////////////////////////////////////////////////////
1050 static
1051 IRSB* cg_instrument ( VgCallbackClosure* closure,
1052 IRSB* sbIn,
1053 const VexGuestLayout* layout,
1054 const VexGuestExtents* vge,
1055 const VexArchInfo* archinfo_host,
1056 IRType gWordTy, IRType hWordTy )
1058 Int i;
1059 UInt isize;
1060 IRStmt* st;
1061 Addr cia; /* address of current insn */
1062 CgState cgs;
1063 IRTypeEnv* tyenv = sbIn->tyenv;
1064 InstrInfo* curr_inode = NULL;
1066 if (gWordTy != hWordTy) {
1067 /* We don't currently support this case. */
1068 VG_(tool_panic)("host/guest word size mismatch");
1071 if (!instr_enabled) {
1072 return sbIn;
1075 // Set up new SB
1076 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1078 // Copy verbatim any IR preamble preceding the first IMark
1079 i = 0;
1080 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1081 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1082 i++;
1085 // Get the first statement, and initial cia from it
1086 tl_assert(sbIn->stmts_used > 0);
1087 tl_assert(i < sbIn->stmts_used);
1088 st = sbIn->stmts[i];
1089 tl_assert(Ist_IMark == st->tag);
1091 cia = st->Ist.IMark.addr;
1092 isize = st->Ist.IMark.len;
1093 // If Vex fails to decode an instruction, the size will be zero.
1094 // Pretend otherwise.
1095 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1097 // Set up running state and get block info
1098 tl_assert(closure->readdr == vge->base[0]);
1099 cgs.events_used = 0;
1100 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
1101 cgs.sbInfo_i = 0;
1103 if (DEBUG_CG)
1104 VG_(printf)("\n\n---------- cg_instrument ----------\n");
1106 // Traverse the block, initialising inodes, adding events and flushing as
1107 // necessary.
1108 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1110 st = sbIn->stmts[i];
1111 tl_assert(isFlatIRStmt(st));
1113 switch (st->tag) {
1114 case Ist_NoOp:
1115 case Ist_AbiHint:
1116 case Ist_Put:
1117 case Ist_PutI:
1118 case Ist_MBE:
1119 break;
1121 case Ist_IMark:
1122 cia = st->Ist.IMark.addr;
1123 isize = st->Ist.IMark.len;
1125 // If Vex fails to decode an instruction, the size will be zero.
1126 // Pretend otherwise.
1127 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1129 // Sanity-check size.
1130 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1131 || VG_CLREQ_SZB == isize );
1133 // Get space for and init the inode, record it as the current one.
1134 // Subsequent Dr/Dw/Dm events from the same instruction will
1135 // also use it.
1136 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1138 addEvent_Ir( &cgs, curr_inode );
1139 break;
1141 case Ist_WrTmp: {
1142 IRExpr* data = st->Ist.WrTmp.data;
1143 if (data->tag == Iex_Load) {
1144 IRExpr* aexpr = data->Iex.Load.addr;
1145 // Note also, endianness info is ignored. I guess
1146 // that's not interesting.
1147 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1148 aexpr );
1150 break;
1153 case Ist_Store: {
1154 IRExpr* data = st->Ist.Store.data;
1155 IRExpr* aexpr = st->Ist.Store.addr;
1156 addEvent_Dw( &cgs, curr_inode,
1157 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1158 break;
1161 case Ist_StoreG: {
1162 IRStoreG* sg = st->Ist.StoreG.details;
1163 IRExpr* data = sg->data;
1164 IRExpr* addr = sg->addr;
1165 IRType type = typeOfIRExpr(tyenv, data);
1166 tl_assert(type != Ity_INVALID);
1167 addEvent_D_guarded( &cgs, curr_inode,
1168 sizeofIRType(type), addr, sg->guard,
1169 True/*isWrite*/ );
1170 break;
1173 case Ist_LoadG: {
1174 IRLoadG* lg = st->Ist.LoadG.details;
1175 IRType type = Ity_INVALID; /* loaded type */
1176 IRType typeWide = Ity_INVALID; /* after implicit widening */
1177 IRExpr* addr = lg->addr;
1178 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1179 tl_assert(type != Ity_INVALID);
1180 addEvent_D_guarded( &cgs, curr_inode,
1181 sizeofIRType(type), addr, lg->guard,
1182 False/*!isWrite*/ );
1183 break;
1186 case Ist_Dirty: {
1187 Int dataSize;
1188 IRDirty* d = st->Ist.Dirty.details;
1189 if (d->mFx != Ifx_None) {
1190 /* This dirty helper accesses memory. Collect the details. */
1191 tl_assert(d->mAddr != NULL);
1192 tl_assert(d->mSize != 0);
1193 dataSize = d->mSize;
1194 // Large (eg. 28B, 108B, 512B on x86) data-sized
1195 // instructions will be done inaccurately, but they're
1196 // very rare and this avoids errors from hitting more
1197 // than two cache lines in the simulation.
1198 if (dataSize > min_line_size)
1199 dataSize = min_line_size;
1200 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1201 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1202 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1203 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1204 } else {
1205 tl_assert(d->mAddr == NULL);
1206 tl_assert(d->mSize == 0);
1208 break;
1211 case Ist_CAS: {
1212 /* We treat it as a read and a write of the location. I
1213 think that is the same behaviour as it was before IRCAS
1214 was introduced, since prior to that point, the Vex
1215 front ends would translate a lock-prefixed instruction
1216 into a (normal) read followed by a (normal) write. */
1217 Int dataSize;
1218 IRCAS* cas = st->Ist.CAS.details;
1219 tl_assert(cas->addr != NULL);
1220 tl_assert(cas->dataLo != NULL);
1221 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1222 if (cas->dataHi != NULL)
1223 dataSize *= 2; /* since it's a doubleword-CAS */
1224 /* I don't think this can ever happen, but play safe. */
1225 if (dataSize > min_line_size)
1226 dataSize = min_line_size;
1227 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1228 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1229 break;
1232 case Ist_LLSC: {
1233 IRType dataTy;
1234 if (st->Ist.LLSC.storedata == NULL) {
1235 /* LL */
1236 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1237 addEvent_Dr( &cgs, curr_inode,
1238 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1239 /* flush events before LL, should help SC to succeed */
1240 flushEvents( &cgs );
1241 } else {
1242 /* SC */
1243 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1244 addEvent_Dw( &cgs, curr_inode,
1245 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1247 break;
1250 case Ist_Exit: {
1251 // call branch predictor only if this is a branch in guest code
1252 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1253 (st->Ist.Exit.jk == Ijk_Call) ||
1254 (st->Ist.Exit.jk == Ijk_Ret) )
1256 /* Stuff to widen the guard expression to a host word, so
1257 we can pass it to the branch predictor simulation
1258 functions easily. */
1259 Bool inverted;
1260 Addr nia, sea;
1261 IRConst* dst;
1262 IRType tyW = hWordTy;
1263 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1264 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1265 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1266 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1267 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1268 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1269 : IRExpr_Const(IRConst_U64(1));
1271 /* First we need to figure out whether the side exit got
1272 inverted by the ir optimiser. To do that, figure out
1273 the next (fallthrough) instruction's address and the
1274 side exit address and see if they are the same. */
1275 nia = cia + isize;
1277 /* Side exit address */
1278 dst = st->Ist.Exit.dst;
1279 if (tyW == Ity_I32) {
1280 tl_assert(dst->tag == Ico_U32);
1281 sea = dst->Ico.U32;
1282 } else {
1283 tl_assert(tyW == Ity_I64);
1284 tl_assert(dst->tag == Ico_U64);
1285 sea = dst->Ico.U64;
1288 inverted = nia == sea;
1290 /* Widen the guard expression. */
1291 addStmtToIRSB( cgs.sbOut,
1292 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1293 addStmtToIRSB( cgs.sbOut,
1294 IRStmt_WrTmp( guardW,
1295 IRExpr_Unop(widen,
1296 IRExpr_RdTmp(guard1))) );
1297 /* If the exit is inverted, invert the sense of the guard. */
1298 addStmtToIRSB(
1299 cgs.sbOut,
1300 IRStmt_WrTmp(
1301 guard,
1302 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1303 : IRExpr_RdTmp(guardW)
1305 /* And post the event. */
1306 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1309 /* We may never reach the next statement, so need to flush
1310 all outstanding transactions now. */
1311 flushEvents( &cgs );
1312 break;
1315 default:
1316 ppIRStmt(st);
1317 tl_assert(0);
1318 break;
1321 /* Copy the original statement */
1322 addStmtToIRSB( cgs.sbOut, st );
1324 if (DEBUG_CG) {
1325 ppIRStmt(st);
1326 VG_(printf)("\n");
1330 /* Deal with branches to unknown destinations. Except ignore ones
1331 which are function returns as we assume the return stack
1332 predictor never mispredicts. */
1333 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1334 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1335 switch (sbIn->next->tag) {
1336 case Iex_Const:
1337 break; /* boring - branch to known address */
1338 case Iex_RdTmp:
1339 /* looks like an indirect branch (branch to unknown) */
1340 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1341 break;
1342 default:
1343 /* shouldn't happen - if the incoming IR is properly
1344 flattened, should only have tmp and const cases to
1345 consider. */
1346 tl_assert(0);
1350 /* At the end of the bb. Flush outstandings. */
1351 flushEvents( &cgs );
1353 /* done. stay sane ... */
1354 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1356 if (DEBUG_CG) {
1357 VG_(printf)( "goto {");
1358 ppIRJumpKind(sbIn->jumpkind);
1359 VG_(printf)( "} ");
1360 ppIRExpr( sbIn->next );
1361 VG_(printf)( "}\n");
1364 return cgs.sbOut;
1367 /*------------------------------------------------------------*/
1368 /*--- Cache configuration ---*/
1369 /*------------------------------------------------------------*/
1371 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1372 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1373 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1375 /*------------------------------------------------------------*/
1376 /*--- cg_fini() and related function ---*/
1377 /*------------------------------------------------------------*/
1379 // Total reads/writes/misses. Calculated during CC traversal at the end.
1380 // All auto-zeroed.
1381 static CacheCC Ir_total;
1382 static CacheCC Dr_total;
1383 static CacheCC Dw_total;
1384 static BranchCC Bc_total;
1385 static BranchCC Bi_total;
1387 static void fprint_CC_table_and_calc_totals(void)
1389 Int i;
1390 VgFile *fp;
1391 HChar *currFile = NULL;
1392 const HChar *currFn = NULL;
1393 LineCC* lineCC;
1395 // Setup output filename. Nb: it's important to do this now, ie. as late
1396 // as possible. If we do it at start-up and the program forks and the
1397 // output file format string contains a %p (pid) specifier, both the
1398 // parent and child will incorrectly write to the same file; this
1399 // happened in 3.3.0.
1400 HChar* cachegrind_out_file =
1401 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1403 fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1404 VKI_S_IRUSR|VKI_S_IWUSR);
1405 if (fp == NULL) {
1406 // If the file can't be opened for whatever reason (conflict
1407 // between multiple cachegrinded processes?), give up now.
1408 VG_(umsg)("error: can't open output data file '%s'\n",
1409 cachegrind_out_file );
1410 VG_(umsg)(" ... so detailed results will be missing.\n");
1411 VG_(free)(cachegrind_out_file);
1412 return;
1413 } else {
1414 VG_(free)(cachegrind_out_file);
1417 if (clo_cache_sim) {
1418 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1419 // the 2nd colon makes cg_annotate's output look nicer.
1420 VG_(fprintf)(fp, "desc: I1 cache: %s\n"
1421 "desc: D1 cache: %s\n"
1422 "desc: LL cache: %s\n",
1423 I1.desc_line, D1.desc_line, LL.desc_line);
1426 // "cmd:" line
1427 VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1428 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1429 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1430 VG_(fprintf)(fp, " %s", arg);
1432 // "events:" line
1433 if (clo_cache_sim && clo_branch_sim) {
1434 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1435 "Bc Bcm Bi Bim\n");
1437 else if (clo_cache_sim && !clo_branch_sim) {
1438 VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1439 "\n");
1441 else if (!clo_cache_sim && clo_branch_sim) {
1442 VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1444 else {
1445 VG_(fprintf)(fp, "\nevents: Ir\n");
1448 // Traverse every lineCC
1449 VG_(OSetGen_ResetIter)(CC_table);
1450 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1451 Bool just_hit_a_new_file = False;
1452 // If we've hit a new file, print a "fl=" line. Note that because
1453 // each string is stored exactly once in the string table, we can use
1454 // pointer comparison rather than strcmp() to test for equality, which
1455 // is good because most of the time the comparisons are equal and so
1456 // the whole strings would have to be checked.
1457 if ( lineCC->loc.file != currFile ) {
1458 currFile = lineCC->loc.file;
1459 VG_(fprintf)(fp, "fl=%s\n", currFile);
1460 distinct_files++;
1461 just_hit_a_new_file = True;
1463 // If we've hit a new function, print a "fn=" line. We know to do
1464 // this when the function name changes, and also every time we hit a
1465 // new file (in which case the new function name might be the same as
1466 // in the old file, hence the just_hit_a_new_file test).
1467 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1468 currFn = lineCC->loc.fn;
1469 VG_(fprintf)(fp, "fn=%s\n", currFn);
1470 distinct_fns++;
1473 // Print the LineCC
1474 if (clo_cache_sim && clo_branch_sim) {
1475 VG_(fprintf)(fp, "%d %llu %llu %llu"
1476 " %llu %llu %llu"
1477 " %llu %llu %llu"
1478 " %llu %llu %llu %llu\n",
1479 lineCC->loc.line,
1480 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1481 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1482 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1483 lineCC->Bc.b, lineCC->Bc.mp,
1484 lineCC->Bi.b, lineCC->Bi.mp);
1486 else if (clo_cache_sim && !clo_branch_sim) {
1487 VG_(fprintf)(fp, "%d %llu %llu %llu"
1488 " %llu %llu %llu"
1489 " %llu %llu %llu\n",
1490 lineCC->loc.line,
1491 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1492 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1493 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1495 else if (!clo_cache_sim && clo_branch_sim) {
1496 VG_(fprintf)(fp, "%d %llu"
1497 " %llu %llu %llu %llu\n",
1498 lineCC->loc.line,
1499 lineCC->Ir.a,
1500 lineCC->Bc.b, lineCC->Bc.mp,
1501 lineCC->Bi.b, lineCC->Bi.mp);
1503 else {
1504 VG_(fprintf)(fp, "%d %llu\n",
1505 lineCC->loc.line,
1506 lineCC->Ir.a);
1509 // Update summary stats
1510 Ir_total.a += lineCC->Ir.a;
1511 Ir_total.m1 += lineCC->Ir.m1;
1512 Ir_total.mL += lineCC->Ir.mL;
1513 Dr_total.a += lineCC->Dr.a;
1514 Dr_total.m1 += lineCC->Dr.m1;
1515 Dr_total.mL += lineCC->Dr.mL;
1516 Dw_total.a += lineCC->Dw.a;
1517 Dw_total.m1 += lineCC->Dw.m1;
1518 Dw_total.mL += lineCC->Dw.mL;
1519 Bc_total.b += lineCC->Bc.b;
1520 Bc_total.mp += lineCC->Bc.mp;
1521 Bi_total.b += lineCC->Bi.b;
1522 Bi_total.mp += lineCC->Bi.mp;
1524 distinct_lines++;
1527 // Summary stats must come after rest of table, since we calculate them
1528 // during traversal.
1529 if (clo_cache_sim && clo_branch_sim) {
1530 VG_(fprintf)(fp, "summary:"
1531 " %llu %llu %llu"
1532 " %llu %llu %llu"
1533 " %llu %llu %llu"
1534 " %llu %llu %llu %llu\n",
1535 Ir_total.a, Ir_total.m1, Ir_total.mL,
1536 Dr_total.a, Dr_total.m1, Dr_total.mL,
1537 Dw_total.a, Dw_total.m1, Dw_total.mL,
1538 Bc_total.b, Bc_total.mp,
1539 Bi_total.b, Bi_total.mp);
1541 else if (clo_cache_sim && !clo_branch_sim) {
1542 VG_(fprintf)(fp, "summary:"
1543 " %llu %llu %llu"
1544 " %llu %llu %llu"
1545 " %llu %llu %llu\n",
1546 Ir_total.a, Ir_total.m1, Ir_total.mL,
1547 Dr_total.a, Dr_total.m1, Dr_total.mL,
1548 Dw_total.a, Dw_total.m1, Dw_total.mL);
1550 else if (!clo_cache_sim && clo_branch_sim) {
1551 VG_(fprintf)(fp, "summary:"
1552 " %llu"
1553 " %llu %llu %llu %llu\n",
1554 Ir_total.a,
1555 Bc_total.b, Bc_total.mp,
1556 Bi_total.b, Bi_total.mp);
1558 else {
1559 VG_(fprintf)(fp, "summary:"
1560 " %llu\n",
1561 Ir_total.a);
1564 VG_(fclose)(fp);
1567 static UInt ULong_width(ULong n)
1569 UInt w = 0;
1570 while (n > 0) {
1571 n = n / 10;
1572 w++;
1574 if (w == 0) w = 1;
1575 return w + (w-1)/3; // add space for commas
1578 static void cg_fini(Int exitcode)
1580 static HChar fmt[128]; // OK; large enough
1582 CacheCC D_total;
1583 BranchCC B_total;
1584 ULong LL_total_m, LL_total_mr, LL_total_mw,
1585 LL_total, LL_total_r, LL_total_w;
1586 Int l1, l2, l3;
1588 fprint_CC_table_and_calc_totals();
1590 if (VG_(clo_verbosity) == 0)
1591 return;
1593 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1594 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1596 /* I cache results. Use the I_refs value to determine the first column
1597 * width. */
1598 l1 = ULong_width(Ir_total.a);
1599 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1600 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1602 /* Make format string, getting width right for numbers */
1603 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1605 /* Always print this */
1606 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
1608 /* If cache profiling is enabled, show D access numbers and all
1609 miss numbers */
1610 if (clo_cache_sim) {
1611 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1612 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
1614 if (0 == Ir_total.a) Ir_total.a = 1;
1615 VG_(umsg)("I1 miss rate: %*.2f%%\n", l1,
1616 Ir_total.m1 * 100.0 / Ir_total.a);
1617 VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1618 Ir_total.mL * 100.0 / Ir_total.a);
1619 VG_(umsg)("\n");
1621 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1622 * determine the width of columns 2 & 3. */
1623 D_total.a = Dr_total.a + Dw_total.a;
1624 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1625 D_total.mL = Dr_total.mL + Dw_total.mL;
1627 /* Make format string, getting width right for numbers */
1628 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1629 l1, l2, l3);
1631 VG_(umsg)(fmt, "D refs: ",
1632 D_total.a, Dr_total.a, Dw_total.a);
1633 VG_(umsg)(fmt, "D1 misses: ",
1634 D_total.m1, Dr_total.m1, Dw_total.m1);
1635 VG_(umsg)(fmt, "LLd misses: ",
1636 D_total.mL, Dr_total.mL, Dw_total.mL);
1638 if (0 == D_total.a) D_total.a = 1;
1639 if (0 == Dr_total.a) Dr_total.a = 1;
1640 if (0 == Dw_total.a) Dw_total.a = 1;
1641 VG_(umsg)("D1 miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1642 l1, D_total.m1 * 100.0 / D_total.a,
1643 l2, Dr_total.m1 * 100.0 / Dr_total.a,
1644 l3, Dw_total.m1 * 100.0 / Dw_total.a);
1645 VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1646 l1, D_total.mL * 100.0 / D_total.a,
1647 l2, Dr_total.mL * 100.0 / Dr_total.a,
1648 l3, Dw_total.mL * 100.0 / Dw_total.a);
1649 VG_(umsg)("\n");
1651 /* LL overall results */
1653 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1654 LL_total_r = Dr_total.m1 + Ir_total.m1;
1655 LL_total_w = Dw_total.m1;
1656 VG_(umsg)(fmt, "LL refs: ",
1657 LL_total, LL_total_r, LL_total_w);
1659 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1660 LL_total_mr = Dr_total.mL + Ir_total.mL;
1661 LL_total_mw = Dw_total.mL;
1662 VG_(umsg)(fmt, "LL misses: ",
1663 LL_total_m, LL_total_mr, LL_total_mw);
1665 VG_(umsg)("LL miss rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1666 l1, LL_total_m * 100.0 / (Ir_total.a + D_total.a),
1667 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1668 l3, LL_total_mw * 100.0 / Dw_total.a);
1671 /* If branch profiling is enabled, show branch overall results. */
1672 if (clo_branch_sim) {
1673 /* Make format string, getting width right for numbers */
1674 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1675 l1, l2, l3);
1677 if (0 == Bc_total.b) Bc_total.b = 1;
1678 if (0 == Bi_total.b) Bi_total.b = 1;
1679 B_total.b = Bc_total.b + Bi_total.b;
1680 B_total.mp = Bc_total.mp + Bi_total.mp;
1682 VG_(umsg)("\n");
1683 VG_(umsg)(fmt, "Branches: ",
1684 B_total.b, Bc_total.b, Bi_total.b);
1686 VG_(umsg)(fmt, "Mispredicts: ",
1687 B_total.mp, Bc_total.mp, Bi_total.mp);
1689 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1690 l1, B_total.mp * 100.0 / B_total.b,
1691 l2, Bc_total.mp * 100.0 / Bc_total.b,
1692 l3, Bi_total.mp * 100.0 / Bi_total.b);
1695 // Various stats
1696 if (VG_(clo_stats)) {
1697 Int debug_lookups = full_debugs + fn_debugs +
1698 file_line_debugs + no_debugs;
1700 VG_(dmsg)("\n");
1701 VG_(dmsg)("cachegrind: distinct files : %d\n", distinct_files);
1702 VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1703 VG_(dmsg)("cachegrind: distinct lines : %d\n", distinct_lines);
1704 VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1705 VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1706 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
1708 VG_(dmsg)("cachegrind: with full info:%6.1f%% (%d)\n",
1709 full_debugs * 100.0 / debug_lookups, full_debugs);
1710 VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1711 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1712 VG_(dmsg)("cachegrind: with fn name info:%6.1f%% (%d)\n",
1713 fn_debugs * 100.0 / debug_lookups, fn_debugs);
1714 VG_(dmsg)("cachegrind: with zero info:%6.1f%% (%d)\n",
1715 no_debugs * 100.0 / debug_lookups, no_debugs);
1717 VG_(dmsg)("cachegrind: string table size: %u\n",
1718 VG_(OSetGen_Size)(stringTable));
1719 VG_(dmsg)("cachegrind: CC table size: %u\n",
1720 VG_(OSetGen_Size)(CC_table));
1721 VG_(dmsg)("cachegrind: InstrInfo table size: %u\n",
1722 VG_(OSetGen_Size)(instrInfoTable));
1726 /*--------------------------------------------------------------------*/
1727 /*--- Discarding BB info ---*/
1728 /*--------------------------------------------------------------------*/
1730 // Called when a translation is removed from the translation cache for
1731 // any reason at all: to free up space, because the guest code was
1732 // unmapped or modified, or for any arbitrary reason.
1733 static
1734 void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
1736 Addr orig_addr = vge.base[0];
1738 tl_assert(vge.n_used > 0);
1740 if (DEBUG_CG)
1741 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1742 (void*)orig_addr,
1743 (void*)vge.base[0], (ULong)vge.len[0]);
1745 // Get SB info, remove from table, free SB info. Simple! Unless
1746 // instrumentation is currently disabled, in which case we won't have an SB
1747 // info. Note that we use orig_addr, not the first instruction address in
1748 // `vge`.
1749 SB_info* sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1750 if (sbInfo) {
1751 tl_assert(instr_enabled);
1752 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1753 } else {
1754 tl_assert(!instr_enabled);
1758 /*--------------------------------------------------------------------*/
1759 /*--- Command line processing ---*/
1760 /*--------------------------------------------------------------------*/
1762 static Bool cg_process_cmd_line_option(const HChar* arg)
1764 if (VG_(str_clo_cache_opt)(arg,
1765 &clo_I1_cache,
1766 &clo_D1_cache,
1767 &clo_LL_cache)) {}
1769 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1770 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1771 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1772 else if VG_BOOL_CLO(arg, "--instr-at-start", clo_instr_at_start) {}
1773 else
1774 return False;
1776 return True;
1779 static void cg_print_usage(void)
1781 VG_(printf)(
1782 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1783 " --cache-sim=yes|no collect cache stats? [no]\n"
1784 " --branch-sim=yes|no collect branch prediction stats? [no]\n"
1785 " --instr-at-start=yes|no instrument at start? [yes]\n"
1787 VG_(print_cache_clo_opts)();
1790 static void cg_print_debug_usage(void)
1792 VG_(printf)(
1793 " (none)\n"
1797 /*--------------------------------------------------------------------*/
1798 /*--- Client requests ---*/
1799 /*--------------------------------------------------------------------*/
1801 static void set_instr_enabled(Bool enable)
1803 if (enable) {
1804 // Enable instrumentation.
1805 if (!instr_enabled) {
1806 // Discard first, then update `instr_enabled`;
1807 // `cg_discard_superblock_info` relies on that.
1808 VG_(discard_translations_safely)((Addr)0x1000, ~(SizeT)0xfff, "cachegrind");
1809 instr_enabled = True;
1810 } else {
1811 VG_(dmsg)("warning: CACHEGRIND_START_INSTRUMENTATION called,\n");
1812 VG_(dmsg)(" but instrumentation is already enabled\n");
1814 } else {
1815 // Disable instrumentation.
1816 if (instr_enabled) {
1817 // Discard first, then update `instr_enabled`;
1818 // `cg_discard_superblock_info` relies on that.
1819 VG_(discard_translations_safely)((Addr)0x1000, ~(SizeT)0xfff, "cachegrind");
1820 instr_enabled = False;
1821 } else {
1822 VG_(dmsg)("warning: CACHEGRIND_STOP_INSTRUMENTATION called,\n");
1823 VG_(dmsg)(" but instrumentation is already disabled\n");
1828 static Bool cg_handle_client_request(ThreadId tid, UWord *args, UWord *ret)
1830 if (!VG_IS_TOOL_USERREQ('C', 'G', args[0])
1831 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1832 return False;
1834 switch(args[0]) {
1835 case VG_USERREQ__CG_START_INSTRUMENTATION:
1836 set_instr_enabled(True);
1837 *ret = 0;
1838 return True;
1840 case VG_USERREQ__CG_STOP_INSTRUMENTATION:
1841 set_instr_enabled(False);
1842 *ret = 0;
1843 return True;
1845 default:
1846 VG_(message)(Vg_UserMsg,
1847 "Warning: unknown cachegrind client request code %llx\n",
1848 (ULong)args[0]);
1849 return False;
1853 /*--------------------------------------------------------------------*/
1854 /*--- Setup ---*/
1855 /*--------------------------------------------------------------------*/
1857 static void cg_post_clo_init(void); /* just below */
1859 static void cg_pre_clo_init(void)
1861 VG_(details_name) ("Cachegrind");
1862 VG_(details_version) (NULL);
1863 VG_(details_description) ("a high-precision tracing profiler");
1864 VG_(details_copyright_author)(
1865 "Copyright (C) 2002-2024, and GNU GPL'd, by Nicholas Nethercote et al.");
1866 VG_(details_bug_reports_to) (VG_BUGS_TO);
1867 VG_(details_avg_translation_sizeB) ( 500 );
1869 VG_(clo_vex_control).iropt_register_updates_default
1870 = VG_(clo_px_file_backed)
1871 = VexRegUpdSpAtMemAccess; // overridable by the user.
1873 VG_(basic_tool_funcs) (cg_post_clo_init,
1874 cg_instrument,
1875 cg_fini);
1877 VG_(needs_superblock_discards)(cg_discard_superblock_info);
1878 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1879 cg_print_usage,
1880 cg_print_debug_usage);
1881 VG_(needs_client_requests)(cg_handle_client_request);
1884 static void cg_post_clo_init(void)
1886 cache_t I1c, D1c, LLc;
1888 CC_table =
1889 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1890 cmp_CodeLoc_LineCC,
1891 VG_(malloc), "cg.main.cpci.1",
1892 VG_(free));
1893 instrInfoTable =
1894 VG_(OSetGen_Create)(/*keyOff*/0,
1895 NULL,
1896 VG_(malloc), "cg.main.cpci.2",
1897 VG_(free));
1898 stringTable =
1899 VG_(OSetGen_Create)(/*keyOff*/0,
1900 stringCmp,
1901 VG_(malloc), "cg.main.cpci.3",
1902 VG_(free));
1904 if (clo_cache_sim) {
1905 VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1906 &clo_I1_cache,
1907 &clo_D1_cache,
1908 &clo_LL_cache);
1910 // min_line_size is used to make sure that we never feed
1911 // accesses to the simulator straddling more than two
1912 // cache lines at any cache level
1913 min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1914 min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1916 Int largest_load_or_store_size
1917 = VG_(machine_get_size_of_largest_guest_register)();
1918 if (min_line_size < largest_load_or_store_size) {
1919 /* We can't continue, because the cache simulation might
1920 straddle more than 2 lines, and it will assert. So let's
1921 just stop before we start. */
1922 VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1923 (Int)min_line_size);
1924 VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
1925 largest_load_or_store_size );
1926 VG_(umsg)(" but it is not. Exiting now.\n");
1927 VG_(exit)(1);
1930 cachesim_initcaches(I1c, D1c, LLc);
1933 // When instrumentation client requests are enabled, we start with
1934 // instrumentation off.
1935 if (!clo_instr_at_start) {
1936 instr_enabled = False;
1940 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1942 /*--------------------------------------------------------------------*/
1943 /*--- end ---*/
1944 /*--------------------------------------------------------------------*/