Bug 497723 - tweak awk filter in regtest
[valgrind.git] / callgrind / main.c
blob39aa230ed2716a41f1c49bf2f7cc44167bdd18ff
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind ---*/
4 /*--- main.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
11 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "config.h"
33 #include "callgrind.h"
34 #include "global.h"
36 #include "pub_tool_threadstate.h"
37 #include "pub_tool_gdbserver.h"
38 #include "pub_tool_transtab.h" // VG_(discard_translations_safely)
40 #include "cg_branchpred.c"
42 /*------------------------------------------------------------*/
43 /*--- Global variables ---*/
44 /*------------------------------------------------------------*/
46 /* for all threads */
47 CommandLineOptions CLG_(clo);
48 Statistics CLG_(stat);
49 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
51 /* thread and signal handler specific */
52 exec_state CLG_(current_state);
54 /* min of L1 and LL cache line sizes. This only gets set to a
55 non-zero value if we are doing cache simulation. */
56 Int CLG_(min_line_size) = 0;
59 /*------------------------------------------------------------*/
60 /*--- Statistics ---*/
61 /*------------------------------------------------------------*/
63 static void CLG_(init_statistics)(Statistics* s)
65 s->call_counter = 0;
66 s->jcnd_counter = 0;
67 s->jump_counter = 0;
68 s->rec_call_counter = 0;
69 s->ret_counter = 0;
70 s->bb_executions = 0;
72 s->context_counter = 0;
73 s->bb_retranslations = 0;
75 s->distinct_objs = 0;
76 s->distinct_files = 0;
77 s->distinct_fns = 0;
78 s->distinct_contexts = 0;
79 s->distinct_bbs = 0;
80 s->distinct_bbccs = 0;
81 s->distinct_instrs = 0;
82 s->distinct_skips = 0;
84 s->bb_hash_resizes = 0;
85 s->bbcc_hash_resizes = 0;
86 s->jcc_hash_resizes = 0;
87 s->cxt_hash_resizes = 0;
88 s->fn_array_resizes = 0;
89 s->call_stack_resizes = 0;
90 s->fn_stack_resizes = 0;
92 s->full_debug_BBs = 0;
93 s->file_line_debug_BBs = 0;
94 s->fn_name_debug_BBs = 0;
95 s->no_debug_BBs = 0;
96 s->bbcc_lru_misses = 0;
97 s->jcc_lru_misses = 0;
98 s->cxt_lru_misses = 0;
99 s->bbcc_clones = 0;
103 /*------------------------------------------------------------*/
104 /*--- Simple callbacks (not cache similator) ---*/
105 /*------------------------------------------------------------*/
107 VG_REGPARM(1)
108 static void log_global_event(InstrInfo* ii)
110 ULong* cost_Bus;
112 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
113 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
115 if (!CLG_(current_state).collect) return;
117 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
119 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
121 if (CLG_(current_state).nonskipped)
122 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
123 else
124 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
125 cost_Bus[0]++;
129 /* For branches, we consult two different predictors, one which
130 predicts taken/untaken for conditional branches, and the other
131 which predicts the branch target address for indirect branches
132 (jump-to-register style ones). */
134 static VG_REGPARM(2)
135 void log_cond_branch(InstrInfo* ii, Word taken)
137 Bool miss;
138 Int fullOffset_Bc;
139 ULong* cost_Bc;
141 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n",
142 CLG_(bb_base) + ii->instr_offset, taken);
144 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
146 if (!CLG_(current_state).collect) return;
148 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
150 if (CLG_(current_state).nonskipped)
151 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
152 else
153 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
155 fullOffset_Bc = fullOffset(EG_BC);
156 CLG_(current_state).cost[ fullOffset_Bc ]++;
157 cost_Bc[0]++;
158 if (miss) {
159 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
160 cost_Bc[1]++;
164 static VG_REGPARM(2)
165 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
167 Bool miss;
168 Int fullOffset_Bi;
169 ULong* cost_Bi;
171 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
172 CLG_(bb_base) + ii->instr_offset, actual_dst);
174 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
176 if (!CLG_(current_state).collect) return;
178 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
180 if (CLG_(current_state).nonskipped)
181 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
182 else
183 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
185 fullOffset_Bi = fullOffset(EG_BI);
186 CLG_(current_state).cost[ fullOffset_Bi ]++;
187 cost_Bi[0]++;
188 if (miss) {
189 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
190 cost_Bi[1]++;
194 /*------------------------------------------------------------*/
195 /*--- Instrumentation structures and event queue handling ---*/
196 /*------------------------------------------------------------*/
198 /* Maintain an ordered list of memory events which are outstanding, in
199 the sense that no IR has yet been generated to do the relevant
200 helper calls. The BB is scanned top to bottom and memory events
201 are added to the end of the list, merging with the most recent
202 notified event where possible (Dw immediately following Dr and
203 having the same size and EA can be merged).
205 This merging is done so that for architectures which have
206 load-op-store instructions (x86, amd64), the insn is treated as if
207 it makes just one memory reference (a modify), rather than two (a
208 read followed by a write at the same address).
210 At various points the list will need to be flushed, that is, IR
211 generated from it. That must happen before any possible exit from
212 the block (the end, or an IRStmt_Exit). Flushing also takes place
213 when there is no space to add a new event.
215 If we require the simulation statistics to be up to date with
216 respect to possible memory exceptions, then the list would have to
217 be flushed before each memory reference. That would however lose
218 performance by inhibiting event-merging during flushing.
220 Flushing the list consists of walking it start to end and emitting
221 instrumentation IR for each event, in the order in which they
222 appear. It may be possible to emit a single call for two adjacent
223 events in order to reduce the number of helper function calls made.
224 For example, it could well be profitable to handle two adjacent Ir
225 events with a single helper call. */
227 typedef
228 IRExpr
229 IRAtom;
231 typedef
232 enum {
233 Ev_Ir, // Instruction read
234 Ev_Dr, // Data read
235 Ev_Dw, // Data write
236 Ev_Dm, // Data modify (read then write)
237 Ev_Bc, // branch conditional
238 Ev_Bi, // branch indirect (to unknown destination)
239 Ev_G // Global bus event
241 EventTag;
243 typedef
244 struct {
245 EventTag tag;
246 InstrInfo* inode;
247 union {
248 struct {
249 } Ir;
250 struct {
251 IRAtom* ea;
252 Int szB;
253 } Dr;
254 struct {
255 IRAtom* ea;
256 Int szB;
257 } Dw;
258 struct {
259 IRAtom* ea;
260 Int szB;
261 } Dm;
262 struct {
263 IRAtom* taken; /* :: Ity_I1 */
264 } Bc;
265 struct {
266 IRAtom* dst;
267 } Bi;
268 struct {
269 } G;
270 } Ev;
272 Event;
274 static void init_Event ( Event* ev ) {
275 VG_(memset)(ev, 0, sizeof(Event));
278 static IRAtom* get_Event_dea ( Event* ev ) {
279 switch (ev->tag) {
280 case Ev_Dr: return ev->Ev.Dr.ea;
281 case Ev_Dw: return ev->Ev.Dw.ea;
282 case Ev_Dm: return ev->Ev.Dm.ea;
283 default: tl_assert(0);
287 static Int get_Event_dszB ( Event* ev ) {
288 switch (ev->tag) {
289 case Ev_Dr: return ev->Ev.Dr.szB;
290 case Ev_Dw: return ev->Ev.Dw.szB;
291 case Ev_Dm: return ev->Ev.Dm.szB;
292 default: tl_assert(0);
297 /* Up to this many unnotified events are allowed. Number is
298 arbitrary. Larger numbers allow more event merging to occur, but
299 potentially induce more spilling due to extending live ranges of
300 address temporaries. */
301 #define N_EVENTS 16
304 /* A struct which holds all the running state during instrumentation.
305 Mostly to avoid passing loads of parameters everywhere. */
306 typedef struct {
307 /* The current outstanding-memory-event list. */
308 Event events[N_EVENTS];
309 Int events_used;
311 /* The array of InstrInfo's is part of BB struct. */
312 BB* bb;
314 /* BB seen before (ie. re-instrumentation) */
315 Bool seen_before;
317 /* Number InstrInfo bins 'used' so far. */
318 UInt ii_index;
320 // current offset of guest instructions from BB start
321 UInt instr_offset;
323 /* The output SB being constructed. */
324 IRSB* sbOut;
325 } ClgState;
328 static void showEvent ( Event* ev )
330 switch (ev->tag) {
331 case Ev_Ir:
332 VG_(printf)("Ir (InstrInfo %p) at +%u\n",
333 ev->inode, ev->inode->instr_offset);
334 break;
335 case Ev_Dr:
336 VG_(printf)("Dr (InstrInfo %p) at +%u %d EA=",
337 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
338 ppIRExpr(ev->Ev.Dr.ea);
339 VG_(printf)("\n");
340 break;
341 case Ev_Dw:
342 VG_(printf)("Dw (InstrInfo %p) at +%u %d EA=",
343 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
344 ppIRExpr(ev->Ev.Dw.ea);
345 VG_(printf)("\n");
346 break;
347 case Ev_Dm:
348 VG_(printf)("Dm (InstrInfo %p) at +%u %d EA=",
349 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
350 ppIRExpr(ev->Ev.Dm.ea);
351 VG_(printf)("\n");
352 break;
353 case Ev_Bc:
354 VG_(printf)("Bc %p GA=", ev->inode);
355 ppIRExpr(ev->Ev.Bc.taken);
356 VG_(printf)("\n");
357 break;
358 case Ev_Bi:
359 VG_(printf)("Bi %p DST=", ev->inode);
360 ppIRExpr(ev->Ev.Bi.dst);
361 VG_(printf)("\n");
362 break;
363 case Ev_G:
364 VG_(printf)("G %p\n", ev->inode);
365 break;
366 default:
367 tl_assert(0);
368 break;
372 /* Generate code for all outstanding memory events, and mark the queue
373 empty. Code is generated into cgs->sbOut, and this activity
374 'consumes' slots in cgs->bb. */
376 static void flushEvents ( ClgState* clgs )
378 Int i, regparms, inew;
379 const HChar* helperName;
380 void* helperAddr;
381 IRExpr** argv;
382 IRExpr* i_node_expr;
383 IRDirty* di;
384 Event* ev;
385 Event* ev2;
386 Event* ev3;
388 if (!clgs->seen_before) {
389 // extend event sets as needed
390 // available sets: D0 Dr
391 for(i=0; i<clgs->events_used; i++) {
392 ev = &clgs->events[i];
393 switch(ev->tag) {
394 case Ev_Ir:
395 // Ir event always is first for a guest instruction
396 CLG_ASSERT(ev->inode->eventset == 0);
397 ev->inode->eventset = CLG_(sets).base;
398 break;
399 case Ev_Dr:
400 // extend event set by Dr counters
401 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
402 EG_DR);
403 break;
404 case Ev_Dw:
405 case Ev_Dm:
406 // extend event set by Dw counters
407 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
408 EG_DW);
409 break;
410 case Ev_Bc:
411 // extend event set by Bc counters
412 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
413 EG_BC);
414 break;
415 case Ev_Bi:
416 // extend event set by Bi counters
417 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
418 EG_BI);
419 break;
420 case Ev_G:
421 // extend event set by Bus counter
422 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
423 EG_BUS);
424 break;
425 default:
426 tl_assert(0);
431 for(i = 0; i < clgs->events_used; i = inew) {
433 helperName = NULL;
434 helperAddr = NULL;
435 argv = NULL;
436 regparms = 0;
438 /* generate IR to notify event i and possibly the ones
439 immediately following it. */
440 tl_assert(i >= 0 && i < clgs->events_used);
442 ev = &clgs->events[i];
443 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
444 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
446 CLG_DEBUGIF(5) {
447 VG_(printf)(" flush ");
448 showEvent( ev );
451 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
453 /* Decide on helper fn to call and args to pass it, and advance
454 i appropriately.
455 Dm events have same effect as Dw events */
456 switch (ev->tag) {
457 case Ev_Ir:
458 /* Merge an Ir with a following Dr. */
459 if (ev2 && ev2->tag == Ev_Dr) {
460 /* Why is this true? It's because we're merging an Ir
461 with a following Dr. The Ir derives from the
462 instruction's IMark and the Dr from data
463 references which follow it. In short it holds
464 because each insn starts with an IMark, hence an
465 Ev_Ir, and so these Dr must pertain to the
466 immediately preceding Ir. Same applies to analogous
467 assertions in the subsequent cases. */
468 tl_assert(ev2->inode == ev->inode);
469 helperName = CLG_(cachesim).log_1I1Dr_name;
470 helperAddr = CLG_(cachesim).log_1I1Dr;
471 argv = mkIRExprVec_3( i_node_expr,
472 get_Event_dea(ev2),
473 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
474 regparms = 3;
475 inew = i+2;
477 /* Merge an Ir with a following Dw/Dm. */
478 else
479 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
480 tl_assert(ev2->inode == ev->inode);
481 helperName = CLG_(cachesim).log_1I1Dw_name;
482 helperAddr = CLG_(cachesim).log_1I1Dw;
483 argv = mkIRExprVec_3( i_node_expr,
484 get_Event_dea(ev2),
485 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
486 regparms = 3;
487 inew = i+2;
489 /* Merge an Ir with two following Irs. */
490 else
491 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
492 helperName = CLG_(cachesim).log_3I0D_name;
493 helperAddr = CLG_(cachesim).log_3I0D;
494 argv = mkIRExprVec_3( i_node_expr,
495 mkIRExpr_HWord( (HWord)ev2->inode ),
496 mkIRExpr_HWord( (HWord)ev3->inode ) );
497 regparms = 3;
498 inew = i+3;
500 /* Merge an Ir with one following Ir. */
501 else
502 if (ev2 && ev2->tag == Ev_Ir) {
503 helperName = CLG_(cachesim).log_2I0D_name;
504 helperAddr = CLG_(cachesim).log_2I0D;
505 argv = mkIRExprVec_2( i_node_expr,
506 mkIRExpr_HWord( (HWord)ev2->inode ) );
507 regparms = 2;
508 inew = i+2;
510 /* No merging possible; emit as-is. */
511 else {
512 helperName = CLG_(cachesim).log_1I0D_name;
513 helperAddr = CLG_(cachesim).log_1I0D;
514 argv = mkIRExprVec_1( i_node_expr );
515 regparms = 1;
516 inew = i+1;
518 break;
519 case Ev_Dr:
520 /* Data read or modify */
521 helperName = CLG_(cachesim).log_0I1Dr_name;
522 helperAddr = CLG_(cachesim).log_0I1Dr;
523 argv = mkIRExprVec_3( i_node_expr,
524 get_Event_dea(ev),
525 mkIRExpr_HWord( get_Event_dszB(ev) ) );
526 regparms = 3;
527 inew = i+1;
528 break;
529 case Ev_Dw:
530 case Ev_Dm:
531 /* Data write */
532 helperName = CLG_(cachesim).log_0I1Dw_name;
533 helperAddr = CLG_(cachesim).log_0I1Dw;
534 argv = mkIRExprVec_3( i_node_expr,
535 get_Event_dea(ev),
536 mkIRExpr_HWord( get_Event_dszB(ev) ) );
537 regparms = 3;
538 inew = i+1;
539 break;
540 case Ev_Bc:
541 /* Conditional branch */
542 helperName = "log_cond_branch";
543 helperAddr = &log_cond_branch;
544 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
545 regparms = 2;
546 inew = i+1;
547 break;
548 case Ev_Bi:
549 /* Branch to an unknown destination */
550 helperName = "log_ind_branch";
551 helperAddr = &log_ind_branch;
552 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
553 regparms = 2;
554 inew = i+1;
555 break;
556 case Ev_G:
557 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
558 helperName = "log_global_event";
559 helperAddr = &log_global_event;
560 argv = mkIRExprVec_1( i_node_expr );
561 regparms = 1;
562 inew = i+1;
563 break;
564 default:
565 tl_assert(0);
568 CLG_DEBUGIF(5) {
569 if (inew > i+1) {
570 VG_(printf)(" merge ");
571 showEvent( ev2 );
573 if (inew > i+2) {
574 VG_(printf)(" merge ");
575 showEvent( ev3 );
577 if (helperAddr)
578 VG_(printf)(" call %s (%p)\n",
579 helperName, helperAddr);
582 /* helper could be unset depending on the simulator used */
583 if (helperAddr == 0) continue;
585 /* Add the helper. */
586 tl_assert(helperName);
587 tl_assert(helperAddr);
588 tl_assert(argv);
589 di = unsafeIRDirty_0_N( regparms,
590 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
591 argv );
592 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
595 clgs->events_used = 0;
598 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
600 Event* evt;
601 tl_assert(clgs->seen_before || (inode->eventset == 0));
602 if (!CLG_(clo).simulate_cache) return;
604 if (clgs->events_used == N_EVENTS)
605 flushEvents(clgs);
606 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
607 evt = &clgs->events[clgs->events_used];
608 init_Event(evt);
609 evt->tag = Ev_Ir;
610 evt->inode = inode;
611 clgs->events_used++;
614 static
615 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
617 Event* evt;
618 tl_assert(isIRAtom(ea));
619 tl_assert(datasize >= 1);
620 if (!CLG_(clo).simulate_cache) return;
621 tl_assert(datasize <= CLG_(min_line_size));
623 if (clgs->events_used == N_EVENTS)
624 flushEvents(clgs);
625 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
626 evt = &clgs->events[clgs->events_used];
627 init_Event(evt);
628 evt->tag = Ev_Dr;
629 evt->inode = inode;
630 evt->Ev.Dr.szB = datasize;
631 evt->Ev.Dr.ea = ea;
632 clgs->events_used++;
635 static
636 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
638 Event* evt;
639 tl_assert(isIRAtom(ea));
640 tl_assert(datasize >= 1);
641 if (!CLG_(clo).simulate_cache) return;
642 tl_assert(datasize <= CLG_(min_line_size));
644 /* Is it possible to merge this write with the preceding read? */
645 if (clgs->events_used > 0) {
646 Event* lastEvt = &clgs->events[clgs->events_used-1];
647 if ( lastEvt->tag == Ev_Dr
648 && lastEvt->Ev.Dr.szB == datasize
649 && lastEvt->inode == inode
650 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
652 lastEvt->tag = Ev_Dm;
653 return;
657 /* No. Add as normal. */
658 if (clgs->events_used == N_EVENTS)
659 flushEvents(clgs);
660 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
661 evt = &clgs->events[clgs->events_used];
662 init_Event(evt);
663 evt->tag = Ev_Dw;
664 evt->inode = inode;
665 evt->Ev.Dw.szB = datasize;
666 evt->Ev.Dw.ea = ea;
667 clgs->events_used++;
670 static
671 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode,
672 Int datasize, IRAtom* ea, IRAtom* guard,
673 Bool isWrite )
675 tl_assert(isIRAtom(ea));
676 tl_assert(guard);
677 tl_assert(isIRAtom(guard));
678 tl_assert(datasize >= 1);
679 if (!CLG_(clo).simulate_cache) return;
680 tl_assert(datasize <= CLG_(min_line_size));
682 /* Adding guarded memory actions and merging them with the existing
683 queue is too complex. Simply flush the queue and add this
684 action immediately. Since guarded loads and stores are pretty
685 rare, this is not thought likely to cause any noticeable
686 performance loss as a result of the loss of event-merging
687 opportunities. */
688 tl_assert(clgs->events_used >= 0);
689 flushEvents(clgs);
690 tl_assert(clgs->events_used == 0);
691 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
692 IRExpr* i_node_expr;
693 const HChar* helperName;
694 void* helperAddr;
695 IRExpr** argv;
696 Int regparms;
697 IRDirty* di;
698 i_node_expr = mkIRExpr_HWord( (HWord)inode );
699 helperName = isWrite ? CLG_(cachesim).log_0I1Dw_name
700 : CLG_(cachesim).log_0I1Dr_name;
701 helperAddr = isWrite ? CLG_(cachesim).log_0I1Dw
702 : CLG_(cachesim).log_0I1Dr;
703 argv = mkIRExprVec_3( i_node_expr,
704 ea, mkIRExpr_HWord( datasize ) );
705 regparms = 3;
706 di = unsafeIRDirty_0_N(
707 regparms,
708 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
709 argv );
710 di->guard = guard;
711 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
714 static
715 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
717 Event* evt;
718 tl_assert(isIRAtom(guard));
719 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
720 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
721 if (!CLG_(clo).simulate_branch) return;
723 if (clgs->events_used == N_EVENTS)
724 flushEvents(clgs);
725 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
726 evt = &clgs->events[clgs->events_used];
727 init_Event(evt);
728 evt->tag = Ev_Bc;
729 evt->inode = inode;
730 evt->Ev.Bc.taken = guard;
731 clgs->events_used++;
734 static
735 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
737 Event* evt;
738 tl_assert(isIRAtom(whereTo));
739 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
740 == (sizeof(RegWord)==4 ? Ity_I32 : Ity_I64));
741 if (!CLG_(clo).simulate_branch) return;
743 if (clgs->events_used == N_EVENTS)
744 flushEvents(clgs);
745 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
746 evt = &clgs->events[clgs->events_used];
747 init_Event(evt);
748 evt->tag = Ev_Bi;
749 evt->inode = inode;
750 evt->Ev.Bi.dst = whereTo;
751 clgs->events_used++;
754 static
755 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
757 Event* evt;
758 if (!CLG_(clo).collect_bus) return;
760 if (clgs->events_used == N_EVENTS)
761 flushEvents(clgs);
762 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
763 evt = &clgs->events[clgs->events_used];
764 init_Event(evt);
765 evt->tag = Ev_G;
766 evt->inode = inode;
767 clgs->events_used++;
770 /* Initialise or check (if already seen before) an InstrInfo for next insn.
771 We only can set instr_offset/instr_size here. The required event set and
772 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
773 instructions. The event set is extended as required on flush of the event
774 queue (when Dm events were determined), cost offsets are determined at
775 end of BB instrumentation. */
776 static
777 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
779 InstrInfo* ii;
780 tl_assert(clgs->ii_index < clgs->bb->instr_count);
781 ii = &clgs->bb->instr[ clgs->ii_index ];
783 if (clgs->seen_before) {
784 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
785 CLG_ASSERT(ii->instr_size == instr_size);
787 else {
788 ii->instr_offset = clgs->instr_offset;
789 ii->instr_size = instr_size;
790 ii->cost_offset = 0;
791 ii->eventset = 0;
794 clgs->ii_index++;
795 clgs->instr_offset += instr_size;
796 CLG_(stat).distinct_instrs++;
798 return ii;
801 // return total number of cost values needed for this BB
802 static
803 UInt update_cost_offsets( ClgState* clgs )
805 Int i;
806 InstrInfo* ii;
807 UInt cost_offset = 0;
809 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
810 for(i=0; i<clgs->ii_index; i++) {
811 ii = &clgs->bb->instr[i];
812 if (clgs->seen_before) {
813 CLG_ASSERT(ii->cost_offset == cost_offset);
814 } else
815 ii->cost_offset = cost_offset;
816 cost_offset += ii->eventset ? ii->eventset->size : 0;
819 return cost_offset;
822 /*------------------------------------------------------------*/
823 /*--- Instrumentation ---*/
824 /*------------------------------------------------------------*/
826 #if defined(VG_BIGENDIAN)
827 # define CLGEndness Iend_BE
828 #elif defined(VG_LITTLEENDIAN)
829 # define CLGEndness Iend_LE
830 #else
831 # error "Unknown endianness"
832 #endif
834 static
835 Addr IRConst2Addr(IRConst* con)
837 Addr addr;
839 if (sizeof(RegWord) == 4) {
840 CLG_ASSERT( con->tag == Ico_U32 );
841 addr = con->Ico.U32;
843 else if (sizeof(RegWord) == 8) {
844 CLG_ASSERT( con->tag == Ico_U64 );
845 addr = con->Ico.U64;
847 else
848 VG_(tool_panic)("Callgrind: invalid Addr type");
850 return addr;
853 /* First pass over a BB to instrument, counting instructions and jumps
854 * This is needed for the size of the BB struct to allocate
856 * Called from CLG_(get_bb)
858 void CLG_(collectBlockInfo)(IRSB* sbIn,
859 /*INOUT*/ UInt* instrs,
860 /*INOUT*/ UInt* cjmps,
861 /*INOUT*/ Bool* cjmp_inverted)
863 Int i;
864 IRStmt* st;
865 Addr instrAddr =0, jumpDst;
866 UInt instrLen = 0;
867 Bool toNextInstr = False;
869 // Ist_Exit has to be ignored in preamble code, before first IMark:
870 // preamble code is added by VEX for self modifying code, and has
871 // nothing to do with client code
872 Bool inPreamble = True;
874 if (!sbIn) return;
876 for (i = 0; i < sbIn->stmts_used; i++) {
877 st = sbIn->stmts[i];
878 if (Ist_IMark == st->tag) {
879 inPreamble = False;
881 instrAddr = st->Ist.IMark.addr;
882 instrLen = st->Ist.IMark.len;
884 (*instrs)++;
885 toNextInstr = False;
887 if (inPreamble) continue;
888 if (Ist_Exit == st->tag) {
889 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
890 toNextInstr = (jumpDst == instrAddr + instrLen);
892 (*cjmps)++;
896 /* if the last instructions of BB conditionally jumps to next instruction
897 * (= first instruction of next BB in memory), this is a inverted by VEX.
899 *cjmp_inverted = toNextInstr;
902 static
903 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
905 addStmtToIRSB( bbOut,
906 IRStmt_Store(CLGEndness,
907 IRExpr_Const(hWordTy == Ity_I32 ?
908 IRConst_U32( addr ) :
909 IRConst_U64( addr )),
910 IRExpr_Const(IRConst_U32(val)) ));
914 /* add helper call to setup_bbcc, with pointer to BB struct as argument
916 * precondition for setup_bbcc:
917 * - jmps_passed has number of cond.jumps passed in last executed BB
918 * - current_bbcc has a pointer to the BBCC of the last executed BB
919 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
920 * current_bbcc->bb->jmp_addr
921 * gives the address of the jump source.
923 * the setup does 2 things:
924 * - trace call:
925 * * Unwind own call stack, i.e sync our ESP with real ESP
926 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
927 * * For CALLs or JMPs crossing objects, record call arg +
928 * push are on own call stack
930 * - prepare for cache log functions:
931 * set current_bbcc to BBCC that gets the costs for this BB execution
932 * attached
934 static
935 void addBBSetupCall(ClgState* clgs)
937 IRDirty* di;
938 IRExpr *arg1, **argv;
940 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
941 argv = mkIRExprVec_1(arg1);
942 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
943 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
944 argv);
945 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
949 static
950 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
951 IRSB* sbIn,
952 const VexGuestLayout* layout,
953 const VexGuestExtents* vge,
954 const VexArchInfo* archinfo_host,
955 IRType gWordTy, IRType hWordTy )
957 Int i;
958 IRStmt* st;
959 Addr origAddr;
960 InstrInfo* curr_inode = NULL;
961 ClgState clgs;
962 UInt cJumps = 0;
963 IRTypeEnv* tyenv = sbIn->tyenv;
965 if (gWordTy != hWordTy) {
966 /* We don't currently support this case. */
967 VG_(tool_panic)("host/guest word size mismatch");
970 // No instrumentation if it is switched off
971 if (! CLG_(instrument_state)) {
972 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
973 (Addr)closure->readdr);
974 return sbIn;
977 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
979 /* Set up SB for instrumented IR */
980 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
982 // Copy verbatim any IR preamble preceding the first IMark
983 i = 0;
984 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
985 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
986 i++;
989 // Get the first statement, and origAddr from it
990 CLG_ASSERT(sbIn->stmts_used >0);
991 CLG_ASSERT(i < sbIn->stmts_used);
992 st = sbIn->stmts[i];
993 CLG_ASSERT(Ist_IMark == st->tag);
995 origAddr = st->Ist.IMark.addr + st->Ist.IMark.delta;
996 CLG_ASSERT(origAddr == st->Ist.IMark.addr
997 + st->Ist.IMark.delta); // XXX: check no overflow
999 /* Get BB struct (creating if necessary).
1000 * JS: The hash table is keyed with orig_addr_noredir -- important!
1001 * JW: Why? If it is because of different chasing of the redirection,
1002 * this is not needed, as chasing is switched off in callgrind
1004 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
1006 addBBSetupCall(&clgs);
1008 // Set up running state
1009 clgs.events_used = 0;
1010 clgs.ii_index = 0;
1011 clgs.instr_offset = 0;
1013 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1015 st = sbIn->stmts[i];
1016 CLG_ASSERT(isFlatIRStmt(st));
1018 switch (st->tag) {
1019 case Ist_NoOp:
1020 case Ist_AbiHint:
1021 case Ist_Put:
1022 case Ist_PutI:
1023 case Ist_MBE:
1024 break;
1026 case Ist_IMark: {
1027 Addr cia = st->Ist.IMark.addr + st->Ist.IMark.delta;
1028 UInt isize = st->Ist.IMark.len;
1029 CLG_ASSERT(clgs.instr_offset == cia - origAddr);
1030 // If Vex fails to decode an instruction, the size will be zero.
1031 // Pretend otherwise.
1032 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1034 // Sanity-check size.
1035 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1036 || VG_CLREQ_SZB == isize );
1038 // Init the inode, record it as the current one.
1039 // Subsequent Dr/Dw/Dm events from the same instruction will
1040 // also use it.
1041 curr_inode = next_InstrInfo (&clgs, isize);
1043 addEvent_Ir( &clgs, curr_inode );
1044 break;
1047 case Ist_WrTmp: {
1048 IRExpr* data = st->Ist.WrTmp.data;
1049 if (data->tag == Iex_Load) {
1050 IRExpr* aexpr = data->Iex.Load.addr;
1051 // Note also, endianness info is ignored. I guess
1052 // that's not interesting.
1053 addEvent_Dr( &clgs, curr_inode,
1054 sizeofIRType(data->Iex.Load.ty), aexpr );
1056 break;
1059 case Ist_Store: {
1060 IRExpr* data = st->Ist.Store.data;
1061 IRExpr* aexpr = st->Ist.Store.addr;
1062 addEvent_Dw( &clgs, curr_inode,
1063 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1064 break;
1067 case Ist_StoreG: {
1068 IRStoreG* sg = st->Ist.StoreG.details;
1069 IRExpr* data = sg->data;
1070 IRExpr* addr = sg->addr;
1071 IRType type = typeOfIRExpr(tyenv, data);
1072 tl_assert(type != Ity_INVALID);
1073 addEvent_D_guarded( &clgs, curr_inode,
1074 sizeofIRType(type), addr, sg->guard,
1075 True/*isWrite*/ );
1076 break;
1079 case Ist_LoadG: {
1080 IRLoadG* lg = st->Ist.LoadG.details;
1081 IRType type = Ity_INVALID; /* loaded type */
1082 IRType typeWide = Ity_INVALID; /* after implicit widening */
1083 IRExpr* addr = lg->addr;
1084 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1085 tl_assert(type != Ity_INVALID);
1086 addEvent_D_guarded( &clgs, curr_inode,
1087 sizeofIRType(type), addr, lg->guard,
1088 False/*!isWrite*/ );
1089 break;
1092 case Ist_Dirty: {
1093 Int dataSize;
1094 IRDirty* d = st->Ist.Dirty.details;
1095 if (d->mFx != Ifx_None) {
1096 /* This dirty helper accesses memory. Collect the details. */
1097 tl_assert(d->mAddr != NULL);
1098 tl_assert(d->mSize != 0);
1099 dataSize = d->mSize;
1100 // Large (eg. 28B, 108B, 512B on x86) data-sized
1101 // instructions will be done inaccurately, but they're
1102 // very rare and this avoids errors from hitting more
1103 // than two cache lines in the simulation.
1104 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1105 dataSize = CLG_(min_line_size);
1106 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1107 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1108 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1109 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1110 } else {
1111 tl_assert(d->mAddr == NULL);
1112 tl_assert(d->mSize == 0);
1114 break;
1117 case Ist_CAS: {
1118 /* We treat it as a read and a write of the location. I
1119 think that is the same behaviour as it was before IRCAS
1120 was introduced, since prior to that point, the Vex
1121 front ends would translate a lock-prefixed instruction
1122 into a (normal) read followed by a (normal) write. */
1123 Int dataSize;
1124 IRCAS* cas = st->Ist.CAS.details;
1125 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1126 CLG_ASSERT(cas->dataLo);
1127 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1128 if (cas->dataHi != NULL)
1129 dataSize *= 2; /* since this is a doubleword-cas */
1130 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1131 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1132 addEvent_G( &clgs, curr_inode );
1133 break;
1136 case Ist_LLSC: {
1137 IRType dataTy;
1138 if (st->Ist.LLSC.storedata == NULL) {
1139 /* LL */
1140 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1141 addEvent_Dr( &clgs, curr_inode,
1142 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1143 /* flush events before LL, should help SC to succeed */
1144 flushEvents( &clgs );
1145 } else {
1146 /* SC */
1147 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1148 addEvent_Dw( &clgs, curr_inode,
1149 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1150 /* I don't know whether the global-bus-lock cost should
1151 be attributed to the LL or the SC, but it doesn't
1152 really matter since they always have to be used in
1153 pairs anyway. Hence put it (quite arbitrarily) on
1154 the SC. */
1155 addEvent_G( &clgs, curr_inode );
1157 break;
1160 case Ist_Exit: {
1161 Bool guest_exit, inverted;
1163 /* VEX code generation sometimes inverts conditional branches.
1164 * As Callgrind counts (conditional) jumps, it has to correct
1165 * inversions. The heuristic is the following:
1166 * (1) Callgrind switches off SB chasing and unrolling, and
1167 * therefore it assumes that a candidate for inversion only is
1168 * the last conditional branch in an SB.
1169 * (2) inversion is assumed if the branch jumps to the address of
1170 * the next guest instruction in memory.
1171 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1173 * Branching behavior is also used for branch prediction. Note that
1174 * above heuristic is different from what Cachegrind does.
1175 * Cachegrind uses (2) for all branches.
1177 if (cJumps+1 == clgs.bb->cjmp_count)
1178 inverted = clgs.bb->cjmp_inverted;
1179 else
1180 inverted = False;
1182 // call branch predictor only if this is a branch in guest code
1183 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1184 (st->Ist.Exit.jk == Ijk_Call) ||
1185 (st->Ist.Exit.jk == Ijk_Ret);
1187 if (guest_exit) {
1188 /* Stuff to widen the guard expression to a host word, so
1189 we can pass it to the branch predictor simulation
1190 functions easily. */
1191 IRType tyW = hWordTy;
1192 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1193 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1194 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1195 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1196 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1197 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1198 : IRExpr_Const(IRConst_U64(1));
1200 /* Widen the guard expression. */
1201 addStmtToIRSB( clgs.sbOut,
1202 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1203 addStmtToIRSB( clgs.sbOut,
1204 IRStmt_WrTmp( guardW,
1205 IRExpr_Unop(widen,
1206 IRExpr_RdTmp(guard1))) );
1207 /* If the exit is inverted, invert the sense of the guard. */
1208 addStmtToIRSB(
1209 clgs.sbOut,
1210 IRStmt_WrTmp(
1211 guard,
1212 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1213 : IRExpr_RdTmp(guardW)
1215 /* And post the event. */
1216 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1219 /* We may never reach the next statement, so need to flush
1220 all outstanding transactions now. */
1221 flushEvents( &clgs );
1223 CLG_ASSERT(clgs.ii_index>0);
1224 if (!clgs.seen_before) {
1225 ClgJumpKind jk;
1227 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1228 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return;
1229 else {
1230 if (IRConst2Addr(st->Ist.Exit.dst) ==
1231 origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1232 jk = jk_None;
1233 else
1234 jk = jk_Jump;
1237 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1238 clgs.bb->jmp[cJumps].jmpkind = jk;
1241 /* Update global variable jmps_passed before the jump
1242 * A correction is needed if VEX inverted the last jump condition
1244 UInt val = inverted ? cJumps+1 : cJumps;
1245 addConstMemStoreStmt( clgs.sbOut,
1246 (UWord) &CLG_(current_state).jmps_passed,
1247 val, hWordTy);
1248 cJumps++;
1250 break;
1253 default:
1254 tl_assert(0);
1255 break;
1258 /* Copy the original statement */
1259 addStmtToIRSB( clgs.sbOut, st );
1261 CLG_DEBUGIF(5) {
1262 VG_(printf)(" pass ");
1263 ppIRStmt(st);
1264 VG_(printf)("\n");
1268 /* Deal with branches to unknown destinations. Except ignore ones
1269 which are function returns as we assume the return stack
1270 predictor never mispredicts. */
1271 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1272 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1273 switch (sbIn->next->tag) {
1274 case Iex_Const:
1275 break; /* boring - branch to known address */
1276 case Iex_RdTmp:
1277 /* looks like an indirect branch (branch to unknown) */
1278 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1279 break;
1280 default:
1281 /* shouldn't happen - if the incoming IR is properly
1282 flattened, should only have tmp and const cases to
1283 consider. */
1284 tl_assert(0);
1288 /* At the end of the bb. Flush outstandings. */
1289 flushEvents( &clgs );
1291 /* Update global variable jmps_passed at end of SB.
1292 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1293 * this can be omitted if there is no conditional jump in this SB.
1294 * A correction is needed if VEX inverted the last jump condition
1296 if (cJumps>0) {
1297 UInt jmps_passed = cJumps;
1298 if (clgs.bb->cjmp_inverted) jmps_passed--;
1299 addConstMemStoreStmt( clgs.sbOut,
1300 (UWord) &CLG_(current_state).jmps_passed,
1301 jmps_passed, hWordTy);
1303 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1304 CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index);
1306 /* Info for final exit from BB */
1308 ClgJumpKind jk;
1310 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1311 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return;
1312 else {
1313 jk = jk_Jump;
1314 if ((sbIn->next->tag == Iex_Const) &&
1315 (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1316 origAddr + clgs.instr_offset))
1317 jk = jk_None;
1319 clgs.bb->jmp[cJumps].jmpkind = jk;
1320 /* Instruction index of the call/ret at BB end
1321 * (it is wrong for fall-through, but does not matter) */
1322 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1325 /* swap information of last exit with final exit if inverted */
1326 if (clgs.bb->cjmp_inverted) {
1327 ClgJumpKind jk;
1328 UInt instr;
1330 jk = clgs.bb->jmp[cJumps].jmpkind;
1331 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1332 clgs.bb->jmp[cJumps-1].jmpkind = jk;
1333 instr = clgs.bb->jmp[cJumps].instr;
1334 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1335 clgs.bb->jmp[cJumps-1].instr = instr;
1338 if (clgs.seen_before) {
1339 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1340 CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset);
1342 else {
1343 clgs.bb->cost_count = update_cost_offsets(&clgs);
1344 clgs.bb->instr_len = clgs.instr_offset;
1347 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1348 origAddr, clgs.bb->instr_len,
1349 clgs.bb->cjmp_count, clgs.bb->cost_count);
1350 if (cJumps>0) {
1351 CLG_DEBUG(3, " [ ");
1352 for (i=0;i<cJumps;i++)
1353 CLG_DEBUG(3, "%u ", clgs.bb->jmp[i].instr);
1354 CLG_DEBUG(3, "], last inverted: %s \n",
1355 clgs.bb->cjmp_inverted ? "yes":"no");
1358 return clgs.sbOut;
1361 /*--------------------------------------------------------------------*/
1362 /*--- Discarding BB info ---*/
1363 /*--------------------------------------------------------------------*/
1365 // Called when a translation is removed from the translation cache for
1366 // any reason at all: to free up space, because the guest code was
1367 // unmapped or modified, or for any arbitrary reason.
1368 static
1369 void clg_discard_superblock_info ( Addr orig_addr, VexGuestExtents vge )
1371 tl_assert(vge.n_used > 0);
1373 if (0)
1374 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1375 (void*)orig_addr,
1376 (void*)vge.base[0], (ULong)vge.len[0]);
1378 // Get BB info, remove from table, free BB info. Simple!
1379 // When created, the BB is keyed by the first instruction address,
1380 // (not orig_addr, but eventually redirected address). Thus, we
1381 // use the first instruction address in vge.
1382 CLG_(delete_bb)(vge.base[0]);
1386 /*------------------------------------------------------------*/
1387 /*--- CLG_(fini)() and related function ---*/
1388 /*------------------------------------------------------------*/
1392 static void zero_thread_cost(thread_info* t)
1394 Int i;
1396 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1397 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1399 /* reset call counters to current for active calls */
1400 CLG_(copy_cost)( CLG_(sets).full,
1401 CLG_(current_call_stack).entry[i].enter_cost,
1402 CLG_(current_state).cost );
1403 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1406 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1408 /* set counter for last dump */
1409 CLG_(copy_cost)( CLG_(sets).full,
1410 t->lastdump_cost, CLG_(current_state).cost );
1413 void CLG_(zero_all_cost)(Bool only_current_thread)
1415 if (VG_(clo_verbosity) > 1)
1416 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1418 if (only_current_thread)
1419 zero_thread_cost(CLG_(get_current_thread)());
1420 else
1421 CLG_(forall_threads)(zero_thread_cost);
1423 if (VG_(clo_verbosity) > 1)
1424 VG_(message)(Vg_DebugMsg, " ...done\n");
1427 static
1428 void unwind_thread(thread_info* t)
1430 /* unwind signal handlers */
1431 while(CLG_(current_state).sig !=0)
1432 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1434 /* unwind regular call stack */
1435 while(CLG_(current_call_stack).sp>0)
1436 CLG_(pop_call_stack)();
1438 /* reset context and function stack for context generation */
1439 CLG_(init_exec_state)( &CLG_(current_state) );
1440 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1443 static
1444 void zero_state_cost(thread_info* t)
1446 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1449 void CLG_(set_instrument_state)(const HChar* reason, Bool state)
1451 if (CLG_(instrument_state) == state) {
1452 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1453 reason, state ? "ON" : "OFF");
1454 return;
1456 CLG_(instrument_state) = state;
1457 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1458 reason, state ? "ON" : "OFF");
1460 VG_(discard_translations_safely)( (Addr)0x1000, ~(SizeT)0xfff, "callgrind");
1462 /* reset internal state: call stacks, simulator */
1463 CLG_(forall_threads)(unwind_thread);
1464 CLG_(forall_threads)(zero_state_cost);
1465 (*CLG_(cachesim).clear)();
1467 if (VG_(clo_verbosity) > 1)
1468 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1469 reason, state ? "ON" : "OFF");
1472 /* helper for dump_state_togdb */
1473 static void dump_state_of_thread_togdb(thread_info* ti)
1475 static FullCost sum = 0, tmp = 0;
1476 Int t, i;
1477 BBCC *from, *to;
1478 call_entry* ce;
1479 HChar *mcost;
1481 t = CLG_(current_tid);
1482 CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1483 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1484 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1485 ti->states.entry[0]->cost);
1486 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1487 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1488 VG_(gdb_printf)("events-%d: %s\n", t, mcost);
1489 VG_(free)(mcost);
1490 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1492 ce = 0;
1493 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1494 ce = CLG_(get_call_entry)(i);
1495 /* if this frame is skipped, we don't have counters */
1496 if (!ce->jcc) continue;
1498 from = ce->jcc->from;
1499 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1500 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1502 /* FIXME: EventSets! */
1503 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1504 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1505 CLG_(add_diff_cost)( CLG_(sets).full, sum,
1506 ce->enter_cost, CLG_(current_state).cost );
1507 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1509 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1510 VG_(gdb_printf)("events-%d-%d: %s\n",t, i, mcost);
1511 VG_(free)(mcost);
1513 if (ce && ce->jcc) {
1514 to = ce->jcc->to;
1515 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1519 /* Dump current state */
1520 static void dump_state_togdb(void)
1522 thread_info** th;
1523 int t;
1524 Int orig_tid = CLG_(current_tid);
1526 VG_(gdb_printf)("instrumentation: %s\n",
1527 CLG_(instrument_state) ? "on":"off");
1528 if (!CLG_(instrument_state)) return;
1530 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1531 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1532 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1533 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1534 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1535 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1537 /* "events:" line. Given here because it will be dynamic in the future */
1538 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1539 VG_(gdb_printf)("events: %s\n", evmap);
1540 VG_(free)(evmap);
1541 /* "part:" line (number of last part. Is 0 at start */
1542 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1544 /* threads */
1545 th = CLG_(get_threads)();
1546 VG_(gdb_printf)("threads:");
1547 for(t=1;t<VG_N_THREADS;t++) {
1548 if (!th[t]) continue;
1549 VG_(gdb_printf)(" %d", t);
1551 VG_(gdb_printf)("\n");
1552 VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1553 CLG_(forall_threads)(dump_state_of_thread_togdb);
1557 static void print_monitor_help ( void )
1559 VG_(gdb_printf) ("\n");
1560 VG_(gdb_printf) ("callgrind monitor commands:\n");
1561 VG_(gdb_printf) (" dump [<dump_hint>]\n");
1562 VG_(gdb_printf) (" dump counters\n");
1563 VG_(gdb_printf) (" zero\n");
1564 VG_(gdb_printf) (" zero counters\n");
1565 VG_(gdb_printf) (" status\n");
1566 VG_(gdb_printf) (" print status\n");
1567 VG_(gdb_printf) (" instrumentation [on|off]\n");
1568 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n");
1569 VG_(gdb_printf) ("\n");
1572 /* return True if request recognised, False otherwise */
1573 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req)
1575 HChar* wcmd;
1576 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
1577 HChar *ssaveptr;
1579 VG_(strcpy) (s, req);
1581 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1582 switch (VG_(keyword_id) ("help dump zero status instrumentation",
1583 wcmd, kwd_report_duplicated_matches)) {
1584 case -2: /* multiple matches */
1585 return True;
1586 case -1: /* not found */
1587 return False;
1588 case 0: /* help */
1589 print_monitor_help();
1590 return True;
1591 case 1: { /* dump */
1592 CLG_(dump_profile)(req, False);
1593 return True;
1595 case 2: { /* zero */
1596 CLG_(zero_all_cost)(False);
1597 return True;
1600 case 3: { /* status */
1601 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1602 if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1603 /* internal interface to callgrind_control */
1604 dump_state_togdb();
1605 return True;
1608 if (!CLG_(instrument_state)) {
1609 VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1610 } else {
1611 // Status information to be improved ...
1612 thread_info** th = CLG_(get_threads)();
1613 Int t, tcount = 0;
1614 for(t=1;t<VG_N_THREADS;t++)
1615 if (th[t]) tcount++;
1616 VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1618 return True;
1621 case 4: { /* instrumentation */
1622 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1623 if (!arg) {
1624 VG_(gdb_printf)("instrumentation: %s\n",
1625 CLG_(instrument_state) ? "on":"off");
1627 else
1628 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1629 return True;
1632 default:
1633 tl_assert(0);
1634 return False;
1638 static
1639 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1641 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1642 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1643 return False;
1645 switch(args[0]) {
1646 case VG_USERREQ__DUMP_STATS:
1647 CLG_(dump_profile)("Client Request", True);
1648 *ret = 0; /* meaningless */
1649 break;
1651 case VG_USERREQ__DUMP_STATS_AT:
1653 const HChar *arg = (HChar*)args[1];
1654 HChar buf[30 + VG_(strlen)(arg)]; // large enough
1655 VG_(sprintf)(buf,"Client Request: %s", arg);
1656 CLG_(dump_profile)(buf, True);
1657 *ret = 0; /* meaningless */
1659 break;
1661 case VG_USERREQ__ZERO_STATS:
1662 CLG_(zero_all_cost)(True);
1663 *ret = 0; /* meaningless */
1664 break;
1666 case VG_USERREQ__TOGGLE_COLLECT:
1667 CLG_(current_state).collect = !CLG_(current_state).collect;
1668 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1669 CLG_(current_state).collect ? "ON" : "OFF");
1670 *ret = 0; /* meaningless */
1671 break;
1673 case VG_USERREQ__START_INSTRUMENTATION:
1674 CLG_(set_instrument_state)("Client Request", True);
1675 *ret = 0; /* meaningless */
1676 break;
1678 case VG_USERREQ__STOP_INSTRUMENTATION:
1679 CLG_(set_instrument_state)("Client Request", False);
1680 *ret = 0; /* meaningless */
1681 break;
1683 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1684 Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]);
1685 if (handled)
1686 *ret = 1;
1687 else
1688 *ret = 0;
1689 return handled;
1691 default:
1692 VG_(message)(Vg_UserMsg,
1693 "Warning: unknown callgrind client request code %llx\n",
1694 (ULong)args[0]);
1695 return False;
1698 return True;
1702 /* Syscall Timing. syscalltime[tid] is the time at which thread tid last
1703 started a syscall. */
1705 /* struct vki_timespec syscalltime[VG_N_THREADS];
1706 Whatever the syscall we use to measure the syscall time, we convert to
1707 seconds and nanoseconds. */
1708 struct vki_timespec *syscalltime;
1709 struct vki_timespec *syscallcputime;
1712 static
1713 void collect_time (struct vki_timespec *systime, struct vki_timespec *syscputime)
1715 switch (CLG_(clo).collect_systime) {
1716 default: tl_assert (0);
1717 case systime_msec: {
1718 UInt ms_timer = VG_(read_millisecond_timer)();
1719 systime->tv_sec = ms_timer / 1000;
1720 systime->tv_nsec = (ms_timer % 1000) * 1000000L;
1721 break;
1723 case systime_usec: {
1724 struct vki_timeval tv_now;
1725 VG_(gettimeofday)(&tv_now, NULL);
1726 systime->tv_sec = tv_now.tv_sec;
1727 systime->tv_nsec = tv_now.tv_usec * 1000;
1728 break;
1730 case systime_nsec:
1731 # if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_freebsd)
1732 VG_(clock_gettime)(systime, VKI_CLOCK_MONOTONIC);
1733 VG_(clock_gettime)(syscputime, VKI_CLOCK_THREAD_CPUTIME_ID);
1735 # elif defined(VGO_darwin)
1736 tl_assert(0);
1737 # else
1738 # error "Unknown OS"
1739 # endif
1740 break;
1744 static
1745 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1746 UWord* args, UInt nArgs)
1748 collect_time(&syscalltime[tid],
1749 CLG_(clo).collect_systime == systime_nsec ? &syscallcputime[tid] : NULL);
1752 /* Returns "after - before" in the unit as specified by --collect-systime.
1753 after is supposed to be >= before, and tv_nsec must be >= 0 and < One_Second_In_Nsec. */
1754 static
1755 ULong vki_timespec_diff (struct vki_timespec after, struct vki_timespec before)
1757 vki_time_t diff_sec = after.tv_sec - before.tv_sec;
1758 long diff_nsec = after.tv_nsec - before.tv_nsec;
1759 ULong nsec_factor; // factor to convert the desired unit into nsec.
1761 if (diff_nsec < 0) {
1762 diff_sec--;
1763 diff_nsec += 1000000000ULL;
1765 switch (CLG_(clo).collect_systime) {
1766 case systime_no: tl_assert (0);
1767 case systime_msec: nsec_factor = 1000000ULL; break;
1768 case systime_usec: nsec_factor = 1000ULL; break;
1769 case systime_nsec: nsec_factor = 1ULL; break;
1770 default: tl_assert(0);
1772 return ((ULong) diff_sec * 1000000000ULL + diff_nsec) / nsec_factor;
1775 static
1776 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1777 UWord* args, UInt nArgs, SysRes res)
1779 if (CLG_(current_state).bbcc) {
1780 Int o;
1781 struct vki_timespec ts_now;
1782 struct vki_timespec ts_cpunow;
1783 ULong diff;
1785 collect_time(&ts_now,
1786 CLG_(clo).collect_systime == systime_nsec ? &ts_cpunow : NULL);
1788 diff = vki_timespec_diff (ts_now, syscalltime[tid]);
1790 /* offset o is for "SysCount", o+1 for "SysTime",
1791 o+2 is (optionally) "SysCpuTime". */
1792 o = fullOffset(EG_SYS);
1793 CLG_ASSERT(o>=0);
1794 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
1795 diff);
1797 if (!CLG_(current_state).bbcc->skipped)
1798 CLG_(init_cost_lz)(CLG_(sets).full,
1799 &(CLG_(current_state).bbcc->skipped));
1800 CLG_(current_state).cost[o] ++;
1801 CLG_(current_state).cost[o+1] += diff;
1802 CLG_(current_state).bbcc->skipped[o] ++;
1803 CLG_(current_state).bbcc->skipped[o+1] += diff;
1804 if (CLG_(clo).collect_systime == systime_nsec) {
1805 diff = vki_timespec_diff (ts_cpunow, syscallcputime[tid]);
1806 CLG_DEBUG(0," SysCpuTime (Off %d) for Syscall %u: %llu\n", o+2, syscallno,
1807 diff);
1808 CLG_(current_state).cost[o+2] += diff;
1809 CLG_(current_state).bbcc->skipped[o+2] += diff;
1814 static UInt ULong_width(ULong n)
1816 UInt w = 0;
1817 while (n > 0) {
1818 n = n / 10;
1819 w++;
1821 if (w == 0) w = 1;
1822 return w + (w-1)/3; // add space for commas
1825 static
1826 void branchsim_printstat(int l1, int l2, int l3)
1828 static HChar fmt[128]; // large enough
1829 FullCost total;
1830 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1831 ULong B_total_b, B_total_mp;
1833 total = CLG_(total_cost);
1834 Bc_total_b = total[ fullOffset(EG_BC) ];
1835 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1836 Bi_total_b = total[ fullOffset(EG_BI) ];
1837 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1839 /* Make format string, getting width right for numbers */
1840 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1841 l1, l2, l3);
1843 if (0 == Bc_total_b) Bc_total_b = 1;
1844 if (0 == Bi_total_b) Bi_total_b = 1;
1845 B_total_b = Bc_total_b + Bi_total_b;
1846 B_total_mp = Bc_total_mp + Bi_total_mp;
1848 VG_(umsg)("\n");
1849 VG_(umsg)(fmt, "Branches: ",
1850 B_total_b, Bc_total_b, Bi_total_b);
1852 VG_(umsg)(fmt, "Mispredicts: ",
1853 B_total_mp, Bc_total_mp, Bi_total_mp);
1855 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1856 l1, B_total_mp * 100.0 / B_total_b,
1857 l2, Bc_total_mp * 100.0 / Bc_total_b,
1858 l3, Bi_total_mp * 100.0 / Bi_total_b);
1861 static
1862 void clg_print_stats(void)
1864 int BB_lookups =
1865 CLG_(stat).full_debug_BBs +
1866 CLG_(stat).fn_name_debug_BBs +
1867 CLG_(stat).file_line_debug_BBs +
1868 CLG_(stat).no_debug_BBs;
1870 /* Hash table stats */
1871 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1872 CLG_(stat).distinct_objs);
1873 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1874 CLG_(stat).distinct_files);
1875 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1876 CLG_(stat).distinct_fns);
1877 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1878 CLG_(stat).distinct_contexts);
1879 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1880 CLG_(stat).distinct_bbs);
1881 VG_(message)(Vg_DebugMsg, "Cost entries: %u (Chunks %u)\n",
1882 CLG_(costarray_entries), CLG_(costarray_chunks));
1883 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1884 CLG_(stat).distinct_bbccs);
1885 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1886 CLG_(stat).distinct_jccs);
1887 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1888 CLG_(stat).distinct_skips);
1889 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1890 BB_lookups);
1891 if (BB_lookups>0) {
1892 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1893 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1894 CLG_(stat).full_debug_BBs);
1895 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1896 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1897 CLG_(stat).file_line_debug_BBs);
1898 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1899 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1900 CLG_(stat).fn_name_debug_BBs);
1901 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1902 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1903 CLG_(stat).no_debug_BBs);
1905 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1906 CLG_(stat).bbcc_clones);
1907 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1908 CLG_(stat).bb_retranslations);
1909 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1910 CLG_(stat).distinct_instrs);
1912 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1913 CLG_(stat).cxt_lru_misses);
1914 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1915 CLG_(stat).bbcc_lru_misses);
1916 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1917 CLG_(stat).jcc_lru_misses);
1918 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1919 CLG_(stat).bb_executions);
1920 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1921 CLG_(stat).call_counter);
1922 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1923 CLG_(stat).jcnd_counter);
1924 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1925 CLG_(stat).jump_counter);
1926 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1927 CLG_(stat).rec_call_counter);
1928 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1929 CLG_(stat).ret_counter);
1933 static
1934 void finish(void)
1936 HChar fmt[128]; // large enough
1937 Int l1, l2, l3;
1938 FullCost total;
1940 CLG_DEBUG(0, "finish()\n");
1942 (*CLG_(cachesim).finish)();
1944 /* pop all remaining items from CallStack for correct sum
1946 CLG_(forall_threads)(unwind_thread);
1948 CLG_(dump_profile)(0, False);
1950 if (VG_(clo_verbosity) == 0) return;
1952 if (VG_(clo_stats)) {
1953 VG_(message)(Vg_DebugMsg, "\n");
1954 clg_print_stats();
1955 VG_(message)(Vg_DebugMsg, "\n");
1958 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1959 VG_(message)(Vg_UserMsg, "Events : %s\n", evmap);
1960 VG_(free)(evmap);
1961 HChar *mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), CLG_(total_cost));
1962 VG_(message)(Vg_UserMsg, "Collected : %s\n", mcost);
1963 VG_(free)(mcost);
1964 VG_(message)(Vg_UserMsg, "\n");
1966 /* determine value widths for statistics */
1967 total = CLG_(total_cost);
1968 l1 = ULong_width( total[fullOffset(EG_IR)] );
1969 l2 = l3 = 0;
1970 if (CLG_(clo).simulate_cache) {
1971 l2 = ULong_width( total[fullOffset(EG_DR)] );
1972 l3 = ULong_width( total[fullOffset(EG_DW)] );
1974 if (CLG_(clo).simulate_branch) {
1975 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1976 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1977 if (l2b > l2) l2 = l2b;
1978 if (l3b > l3) l3 = l3b;
1981 /* Make format string, getting width right for numbers */
1982 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1984 /* Always print this */
1985 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1987 if (CLG_(clo).simulate_cache)
1988 (*CLG_(cachesim).printstat)(l1, l2, l3);
1990 if (CLG_(clo).simulate_branch)
1991 branchsim_printstat(l1, l2, l3);
1996 void CLG_(fini)(Int exitcode)
1998 finish();
2002 /*--------------------------------------------------------------------*/
2003 /*--- Setup ---*/
2004 /*--------------------------------------------------------------------*/
2006 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
2008 static ULong last_blocks_done = 0;
2010 if (0)
2011 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
2013 /* throttle calls to CLG_(run_thread) by number of BBs executed */
2014 if (blocks_done - last_blocks_done < 5000) return;
2015 last_blocks_done = blocks_done;
2017 CLG_(run_thread)( tid );
2020 static
2021 void CLG_(post_clo_init)(void)
2023 if (VG_(clo_vex_control).iropt_register_updates_default
2024 != VexRegUpdSpAtMemAccess) {
2025 CLG_DEBUG(1, " Using user specified value for "
2026 "--vex-iropt-register-updates\n");
2027 } else {
2028 CLG_DEBUG(1,
2029 " Using default --vex-iropt-register-updates="
2030 "sp-at-mem-access\n");
2033 if (CLG_(clo).collect_systime != systime_no) {
2034 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
2035 CLG_(post_syscalltime));
2036 syscalltime = CLG_MALLOC("cl.main.pci.1",
2037 VG_N_THREADS * sizeof syscalltime[0]);
2038 for (UInt i = 0; i < VG_N_THREADS; ++i) {
2039 syscalltime[i].tv_sec = 0;
2040 syscalltime[i].tv_nsec = 0;
2042 if (CLG_(clo).collect_systime == systime_nsec) {
2043 syscallcputime = CLG_MALLOC("cl.main.pci.2",
2044 VG_N_THREADS * sizeof syscallcputime[0]);
2045 for (UInt i = 0; i < VG_N_THREADS; ++i) {
2046 syscallcputime[i].tv_sec = 0;
2047 syscallcputime[i].tv_nsec = 0;
2052 if (VG_(clo_px_file_backed) != VexRegUpdSpAtMemAccess) {
2053 CLG_DEBUG(1, " Using user specified value for "
2054 "--px-file-backed\n");
2055 } else {
2056 CLG_DEBUG(1,
2057 " Using default --px-file-backed="
2058 "sp-at-mem-access\n");
2061 if (VG_(clo_vex_control).iropt_unroll_thresh != 0) {
2062 VG_(message)(Vg_UserMsg,
2063 "callgrind only works with --vex-iropt-unroll-thresh=0\n"
2064 "=> resetting it back to 0\n");
2065 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2067 if (VG_(clo_vex_control).guest_chase) {
2068 VG_(message)(Vg_UserMsg,
2069 "callgrind only works with --vex-guest-chase=no\n"
2070 "=> resetting it back to 'no'\n");
2071 VG_(clo_vex_control).guest_chase = False; // cannot be overridden.
2074 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
2075 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
2076 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
2078 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
2079 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
2080 CLG_(clo).dump_line = True;
2083 CLG_(init_dumps)();
2085 (*CLG_(cachesim).post_clo_init)();
2087 CLG_(init_eventsets)();
2088 CLG_(init_statistics)(& CLG_(stat));
2089 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
2091 /* initialize hash tables */
2092 CLG_(init_obj_table)();
2093 CLG_(init_cxt_table)();
2094 CLG_(init_bb_hash)();
2096 CLG_(init_threads)();
2097 CLG_(run_thread)(1);
2099 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
2101 if (VG_(clo_verbosity) > 0) {
2102 VG_(message)(Vg_UserMsg,
2103 "For interactive control, run 'callgrind_control%s%s -h'.\n",
2104 (VG_(arg_vgdb_prefix) ? " " : ""),
2105 (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : ""));
2109 static
2110 void CLG_(pre_clo_init)(void)
2112 VG_(details_name) ("Callgrind");
2113 VG_(details_version) (NULL);
2114 VG_(details_description) ("a call-graph generating cache profiler");
2115 VG_(details_copyright_author)("Copyright (C) 2002-2017, and GNU GPL'd, "
2116 "by Josef Weidendorfer et al.");
2117 VG_(details_bug_reports_to) (VG_BUGS_TO);
2118 VG_(details_avg_translation_sizeB) ( 500 );
2120 VG_(clo_vex_control).iropt_register_updates_default
2121 = VG_(clo_px_file_backed)
2122 = VexRegUpdSpAtMemAccess; // overridable by the user.
2124 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2125 VG_(clo_vex_control).guest_chase = False; // cannot be overridden.
2127 VG_(basic_tool_funcs) (CLG_(post_clo_init),
2128 CLG_(instrument),
2129 CLG_(fini));
2131 VG_(needs_superblock_discards)(clg_discard_superblock_info);
2134 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
2135 CLG_(print_usage),
2136 CLG_(print_debug_usage));
2138 VG_(needs_client_requests)(CLG_(handle_client_request));
2139 VG_(needs_print_stats) (clg_print_stats);
2141 VG_(track_start_client_code) ( & clg_start_client_code_callback );
2142 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
2143 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
2145 CLG_(set_clo_defaults)();
2149 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
2151 /*--------------------------------------------------------------------*/
2152 /*--- end main.c ---*/
2153 /*--------------------------------------------------------------------*/