2 /*--------------------------------------------------------------------*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Callgrind, a Valgrind tool for call graph
11 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
33 #include "callgrind.h"
36 #include "pub_tool_threadstate.h"
37 #include "pub_tool_gdbserver.h"
38 #include "pub_tool_transtab.h" // VG_(discard_translations_safely)
40 #include "cg_branchpred.c"
42 /*------------------------------------------------------------*/
43 /*--- Global variables ---*/
44 /*------------------------------------------------------------*/
47 CommandLineOptions
CLG_(clo
);
48 Statistics
CLG_(stat
);
49 Bool
CLG_(instrument_state
) = True
; /* Instrumentation on ? */
51 /* thread and signal handler specific */
52 exec_state
CLG_(current_state
);
54 /* min of L1 and LL cache line sizes. This only gets set to a
55 non-zero value if we are doing cache simulation. */
56 Int
CLG_(min_line_size
) = 0;
59 /*------------------------------------------------------------*/
60 /*--- Statistics ---*/
61 /*------------------------------------------------------------*/
63 static void CLG_(init_statistics
)(Statistics
* s
)
68 s
->rec_call_counter
= 0;
72 s
->context_counter
= 0;
73 s
->bb_retranslations
= 0;
76 s
->distinct_files
= 0;
78 s
->distinct_contexts
= 0;
80 s
->distinct_bbccs
= 0;
81 s
->distinct_instrs
= 0;
82 s
->distinct_skips
= 0;
84 s
->bb_hash_resizes
= 0;
85 s
->bbcc_hash_resizes
= 0;
86 s
->jcc_hash_resizes
= 0;
87 s
->cxt_hash_resizes
= 0;
88 s
->fn_array_resizes
= 0;
89 s
->call_stack_resizes
= 0;
90 s
->fn_stack_resizes
= 0;
92 s
->full_debug_BBs
= 0;
93 s
->file_line_debug_BBs
= 0;
94 s
->fn_name_debug_BBs
= 0;
96 s
->bbcc_lru_misses
= 0;
97 s
->jcc_lru_misses
= 0;
98 s
->cxt_lru_misses
= 0;
103 /*------------------------------------------------------------*/
104 /*--- Simple callbacks (not cache similator) ---*/
105 /*------------------------------------------------------------*/
108 static void log_global_event(InstrInfo
* ii
)
112 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
113 CLG_(bb_base
) + ii
->instr_offset
, ii
->instr_size
);
115 if (!CLG_(current_state
).collect
) return;
117 CLG_ASSERT( (ii
->eventset
->mask
& (1u<<EG_BUS
))>0 );
119 CLG_(current_state
).cost
[ fullOffset(EG_BUS
) ]++;
121 if (CLG_(current_state
).nonskipped
)
122 cost_Bus
= CLG_(current_state
).nonskipped
->skipped
+ fullOffset(EG_BUS
);
124 cost_Bus
= CLG_(cost_base
) + ii
->cost_offset
+ ii
->eventset
->offset
[EG_BUS
];
129 /* For branches, we consult two different predictors, one which
130 predicts taken/untaken for conditional branches, and the other
131 which predicts the branch target address for indirect branches
132 (jump-to-register style ones). */
135 void log_cond_branch(InstrInfo
* ii
, Word taken
)
141 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n",
142 CLG_(bb_base
) + ii
->instr_offset
, taken
);
144 miss
= 1 & do_cond_branch_predict(CLG_(bb_base
) + ii
->instr_offset
, taken
);
146 if (!CLG_(current_state
).collect
) return;
148 CLG_ASSERT( (ii
->eventset
->mask
& (1u<<EG_BC
))>0 );
150 if (CLG_(current_state
).nonskipped
)
151 cost_Bc
= CLG_(current_state
).nonskipped
->skipped
+ fullOffset(EG_BC
);
153 cost_Bc
= CLG_(cost_base
) + ii
->cost_offset
+ ii
->eventset
->offset
[EG_BC
];
155 fullOffset_Bc
= fullOffset(EG_BC
);
156 CLG_(current_state
).cost
[ fullOffset_Bc
]++;
159 CLG_(current_state
).cost
[ fullOffset_Bc
+1 ]++;
165 void log_ind_branch(InstrInfo
* ii
, UWord actual_dst
)
171 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
172 CLG_(bb_base
) + ii
->instr_offset
, actual_dst
);
174 miss
= 1 & do_ind_branch_predict(CLG_(bb_base
) + ii
->instr_offset
, actual_dst
);
176 if (!CLG_(current_state
).collect
) return;
178 CLG_ASSERT( (ii
->eventset
->mask
& (1u<<EG_BI
))>0 );
180 if (CLG_(current_state
).nonskipped
)
181 cost_Bi
= CLG_(current_state
).nonskipped
->skipped
+ fullOffset(EG_BI
);
183 cost_Bi
= CLG_(cost_base
) + ii
->cost_offset
+ ii
->eventset
->offset
[EG_BI
];
185 fullOffset_Bi
= fullOffset(EG_BI
);
186 CLG_(current_state
).cost
[ fullOffset_Bi
]++;
189 CLG_(current_state
).cost
[ fullOffset_Bi
+1 ]++;
194 /*------------------------------------------------------------*/
195 /*--- Instrumentation structures and event queue handling ---*/
196 /*------------------------------------------------------------*/
198 /* Maintain an ordered list of memory events which are outstanding, in
199 the sense that no IR has yet been generated to do the relevant
200 helper calls. The BB is scanned top to bottom and memory events
201 are added to the end of the list, merging with the most recent
202 notified event where possible (Dw immediately following Dr and
203 having the same size and EA can be merged).
205 This merging is done so that for architectures which have
206 load-op-store instructions (x86, amd64), the insn is treated as if
207 it makes just one memory reference (a modify), rather than two (a
208 read followed by a write at the same address).
210 At various points the list will need to be flushed, that is, IR
211 generated from it. That must happen before any possible exit from
212 the block (the end, or an IRStmt_Exit). Flushing also takes place
213 when there is no space to add a new event.
215 If we require the simulation statistics to be up to date with
216 respect to possible memory exceptions, then the list would have to
217 be flushed before each memory reference. That would however lose
218 performance by inhibiting event-merging during flushing.
220 Flushing the list consists of walking it start to end and emitting
221 instrumentation IR for each event, in the order in which they
222 appear. It may be possible to emit a single call for two adjacent
223 events in order to reduce the number of helper function calls made.
224 For example, it could well be profitable to handle two adjacent Ir
225 events with a single helper call. */
233 Ev_Ir
, // Instruction read
236 Ev_Dm
, // Data modify (read then write)
237 Ev_Bc
, // branch conditional
238 Ev_Bi
, // branch indirect (to unknown destination)
239 Ev_G
// Global bus event
263 IRAtom
* taken
; /* :: Ity_I1 */
274 static void init_Event ( Event
* ev
) {
275 VG_(memset
)(ev
, 0, sizeof(Event
));
278 static IRAtom
* get_Event_dea ( Event
* ev
) {
280 case Ev_Dr
: return ev
->Ev
.Dr
.ea
;
281 case Ev_Dw
: return ev
->Ev
.Dw
.ea
;
282 case Ev_Dm
: return ev
->Ev
.Dm
.ea
;
283 default: tl_assert(0);
287 static Int
get_Event_dszB ( Event
* ev
) {
289 case Ev_Dr
: return ev
->Ev
.Dr
.szB
;
290 case Ev_Dw
: return ev
->Ev
.Dw
.szB
;
291 case Ev_Dm
: return ev
->Ev
.Dm
.szB
;
292 default: tl_assert(0);
297 /* Up to this many unnotified events are allowed. Number is
298 arbitrary. Larger numbers allow more event merging to occur, but
299 potentially induce more spilling due to extending live ranges of
300 address temporaries. */
304 /* A struct which holds all the running state during instrumentation.
305 Mostly to avoid passing loads of parameters everywhere. */
307 /* The current outstanding-memory-event list. */
308 Event events
[N_EVENTS
];
311 /* The array of InstrInfo's is part of BB struct. */
314 /* BB seen before (ie. re-instrumentation) */
317 /* Number InstrInfo bins 'used' so far. */
320 // current offset of guest instructions from BB start
323 /* The output SB being constructed. */
328 static void showEvent ( Event
* ev
)
332 VG_(printf
)("Ir (InstrInfo %p) at +%u\n",
333 ev
->inode
, ev
->inode
->instr_offset
);
336 VG_(printf
)("Dr (InstrInfo %p) at +%u %d EA=",
337 ev
->inode
, ev
->inode
->instr_offset
, ev
->Ev
.Dr
.szB
);
338 ppIRExpr(ev
->Ev
.Dr
.ea
);
342 VG_(printf
)("Dw (InstrInfo %p) at +%u %d EA=",
343 ev
->inode
, ev
->inode
->instr_offset
, ev
->Ev
.Dw
.szB
);
344 ppIRExpr(ev
->Ev
.Dw
.ea
);
348 VG_(printf
)("Dm (InstrInfo %p) at +%u %d EA=",
349 ev
->inode
, ev
->inode
->instr_offset
, ev
->Ev
.Dm
.szB
);
350 ppIRExpr(ev
->Ev
.Dm
.ea
);
354 VG_(printf
)("Bc %p GA=", ev
->inode
);
355 ppIRExpr(ev
->Ev
.Bc
.taken
);
359 VG_(printf
)("Bi %p DST=", ev
->inode
);
360 ppIRExpr(ev
->Ev
.Bi
.dst
);
364 VG_(printf
)("G %p\n", ev
->inode
);
372 /* Generate code for all outstanding memory events, and mark the queue
373 empty. Code is generated into cgs->sbOut, and this activity
374 'consumes' slots in cgs->bb. */
376 static void flushEvents ( ClgState
* clgs
)
378 Int i
, regparms
, inew
;
379 const HChar
* helperName
;
388 if (!clgs
->seen_before
) {
389 // extend event sets as needed
390 // available sets: D0 Dr
391 for(i
=0; i
<clgs
->events_used
; i
++) {
392 ev
= &clgs
->events
[i
];
395 // Ir event always is first for a guest instruction
396 CLG_ASSERT(ev
->inode
->eventset
== 0);
397 ev
->inode
->eventset
= CLG_(sets
).base
;
400 // extend event set by Dr counters
401 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
406 // extend event set by Dw counters
407 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
411 // extend event set by Bc counters
412 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
416 // extend event set by Bi counters
417 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
421 // extend event set by Bus counter
422 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
431 for(i
= 0; i
< clgs
->events_used
; i
= inew
) {
438 /* generate IR to notify event i and possibly the ones
439 immediately following it. */
440 tl_assert(i
>= 0 && i
< clgs
->events_used
);
442 ev
= &clgs
->events
[i
];
443 ev2
= ( i
< clgs
->events_used
-1 ? &clgs
->events
[i
+1] : NULL
);
444 ev3
= ( i
< clgs
->events_used
-2 ? &clgs
->events
[i
+2] : NULL
);
447 VG_(printf
)(" flush ");
451 i_node_expr
= mkIRExpr_HWord( (HWord
)ev
->inode
);
453 /* Decide on helper fn to call and args to pass it, and advance
455 Dm events have same effect as Dw events */
458 /* Merge an Ir with a following Dr. */
459 if (ev2
&& ev2
->tag
== Ev_Dr
) {
460 /* Why is this true? It's because we're merging an Ir
461 with a following Dr. The Ir derives from the
462 instruction's IMark and the Dr from data
463 references which follow it. In short it holds
464 because each insn starts with an IMark, hence an
465 Ev_Ir, and so these Dr must pertain to the
466 immediately preceding Ir. Same applies to analogous
467 assertions in the subsequent cases. */
468 tl_assert(ev2
->inode
== ev
->inode
);
469 helperName
= CLG_(cachesim
).log_1I1Dr_name
;
470 helperAddr
= CLG_(cachesim
).log_1I1Dr
;
471 argv
= mkIRExprVec_3( i_node_expr
,
473 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
477 /* Merge an Ir with a following Dw/Dm. */
479 if (ev2
&& (ev2
->tag
== Ev_Dw
|| ev2
->tag
== Ev_Dm
)) {
480 tl_assert(ev2
->inode
== ev
->inode
);
481 helperName
= CLG_(cachesim
).log_1I1Dw_name
;
482 helperAddr
= CLG_(cachesim
).log_1I1Dw
;
483 argv
= mkIRExprVec_3( i_node_expr
,
485 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
489 /* Merge an Ir with two following Irs. */
491 if (ev2
&& ev3
&& ev2
->tag
== Ev_Ir
&& ev3
->tag
== Ev_Ir
) {
492 helperName
= CLG_(cachesim
).log_3I0D_name
;
493 helperAddr
= CLG_(cachesim
).log_3I0D
;
494 argv
= mkIRExprVec_3( i_node_expr
,
495 mkIRExpr_HWord( (HWord
)ev2
->inode
),
496 mkIRExpr_HWord( (HWord
)ev3
->inode
) );
500 /* Merge an Ir with one following Ir. */
502 if (ev2
&& ev2
->tag
== Ev_Ir
) {
503 helperName
= CLG_(cachesim
).log_2I0D_name
;
504 helperAddr
= CLG_(cachesim
).log_2I0D
;
505 argv
= mkIRExprVec_2( i_node_expr
,
506 mkIRExpr_HWord( (HWord
)ev2
->inode
) );
510 /* No merging possible; emit as-is. */
512 helperName
= CLG_(cachesim
).log_1I0D_name
;
513 helperAddr
= CLG_(cachesim
).log_1I0D
;
514 argv
= mkIRExprVec_1( i_node_expr
);
520 /* Data read or modify */
521 helperName
= CLG_(cachesim
).log_0I1Dr_name
;
522 helperAddr
= CLG_(cachesim
).log_0I1Dr
;
523 argv
= mkIRExprVec_3( i_node_expr
,
525 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
532 helperName
= CLG_(cachesim
).log_0I1Dw_name
;
533 helperAddr
= CLG_(cachesim
).log_0I1Dw
;
534 argv
= mkIRExprVec_3( i_node_expr
,
536 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
541 /* Conditional branch */
542 helperName
= "log_cond_branch";
543 helperAddr
= &log_cond_branch
;
544 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bc
.taken
);
549 /* Branch to an unknown destination */
550 helperName
= "log_ind_branch";
551 helperAddr
= &log_ind_branch
;
552 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bi
.dst
);
557 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
558 helperName
= "log_global_event";
559 helperAddr
= &log_global_event
;
560 argv
= mkIRExprVec_1( i_node_expr
);
570 VG_(printf
)(" merge ");
574 VG_(printf
)(" merge ");
578 VG_(printf
)(" call %s (%p)\n",
579 helperName
, helperAddr
);
582 /* helper could be unset depending on the simulator used */
583 if (helperAddr
== 0) continue;
585 /* Add the helper. */
586 tl_assert(helperName
);
587 tl_assert(helperAddr
);
589 di
= unsafeIRDirty_0_N( regparms
,
590 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
592 addStmtToIRSB( clgs
->sbOut
, IRStmt_Dirty(di
) );
595 clgs
->events_used
= 0;
598 static void addEvent_Ir ( ClgState
* clgs
, InstrInfo
* inode
)
601 tl_assert(clgs
->seen_before
|| (inode
->eventset
== 0));
602 if (!CLG_(clo
).simulate_cache
) return;
604 if (clgs
->events_used
== N_EVENTS
)
606 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
607 evt
= &clgs
->events
[clgs
->events_used
];
615 void addEvent_Dr ( ClgState
* clgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
618 tl_assert(isIRAtom(ea
));
619 tl_assert(datasize
>= 1);
620 if (!CLG_(clo
).simulate_cache
) return;
621 tl_assert(datasize
<= CLG_(min_line_size
));
623 if (clgs
->events_used
== N_EVENTS
)
625 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
626 evt
= &clgs
->events
[clgs
->events_used
];
630 evt
->Ev
.Dr
.szB
= datasize
;
636 void addEvent_Dw ( ClgState
* clgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
639 tl_assert(isIRAtom(ea
));
640 tl_assert(datasize
>= 1);
641 if (!CLG_(clo
).simulate_cache
) return;
642 tl_assert(datasize
<= CLG_(min_line_size
));
644 /* Is it possible to merge this write with the preceding read? */
645 if (clgs
->events_used
> 0) {
646 Event
* lastEvt
= &clgs
->events
[clgs
->events_used
-1];
647 if ( lastEvt
->tag
== Ev_Dr
648 && lastEvt
->Ev
.Dr
.szB
== datasize
649 && lastEvt
->inode
== inode
650 && eqIRAtom(lastEvt
->Ev
.Dr
.ea
, ea
))
652 lastEvt
->tag
= Ev_Dm
;
657 /* No. Add as normal. */
658 if (clgs
->events_used
== N_EVENTS
)
660 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
661 evt
= &clgs
->events
[clgs
->events_used
];
665 evt
->Ev
.Dw
.szB
= datasize
;
671 void addEvent_D_guarded ( ClgState
* clgs
, InstrInfo
* inode
,
672 Int datasize
, IRAtom
* ea
, IRAtom
* guard
,
675 tl_assert(isIRAtom(ea
));
677 tl_assert(isIRAtom(guard
));
678 tl_assert(datasize
>= 1);
679 if (!CLG_(clo
).simulate_cache
) return;
680 tl_assert(datasize
<= CLG_(min_line_size
));
682 /* Adding guarded memory actions and merging them with the existing
683 queue is too complex. Simply flush the queue and add this
684 action immediately. Since guarded loads and stores are pretty
685 rare, this is not thought likely to cause any noticeable
686 performance loss as a result of the loss of event-merging
688 tl_assert(clgs
->events_used
>= 0);
690 tl_assert(clgs
->events_used
== 0);
691 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
693 const HChar
* helperName
;
698 i_node_expr
= mkIRExpr_HWord( (HWord
)inode
);
699 helperName
= isWrite
? CLG_(cachesim
).log_0I1Dw_name
700 : CLG_(cachesim
).log_0I1Dr_name
;
701 helperAddr
= isWrite
? CLG_(cachesim
).log_0I1Dw
702 : CLG_(cachesim
).log_0I1Dr
;
703 argv
= mkIRExprVec_3( i_node_expr
,
704 ea
, mkIRExpr_HWord( datasize
) );
706 di
= unsafeIRDirty_0_N(
708 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
711 addStmtToIRSB( clgs
->sbOut
, IRStmt_Dirty(di
) );
715 void addEvent_Bc ( ClgState
* clgs
, InstrInfo
* inode
, IRAtom
* guard
)
718 tl_assert(isIRAtom(guard
));
719 tl_assert(typeOfIRExpr(clgs
->sbOut
->tyenv
, guard
)
720 == (sizeof(RegWord
)==4 ? Ity_I32
: Ity_I64
));
721 if (!CLG_(clo
).simulate_branch
) return;
723 if (clgs
->events_used
== N_EVENTS
)
725 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
726 evt
= &clgs
->events
[clgs
->events_used
];
730 evt
->Ev
.Bc
.taken
= guard
;
735 void addEvent_Bi ( ClgState
* clgs
, InstrInfo
* inode
, IRAtom
* whereTo
)
738 tl_assert(isIRAtom(whereTo
));
739 tl_assert(typeOfIRExpr(clgs
->sbOut
->tyenv
, whereTo
)
740 == (sizeof(RegWord
)==4 ? Ity_I32
: Ity_I64
));
741 if (!CLG_(clo
).simulate_branch
) return;
743 if (clgs
->events_used
== N_EVENTS
)
745 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
746 evt
= &clgs
->events
[clgs
->events_used
];
750 evt
->Ev
.Bi
.dst
= whereTo
;
755 void addEvent_G ( ClgState
* clgs
, InstrInfo
* inode
)
758 if (!CLG_(clo
).collect_bus
) return;
760 if (clgs
->events_used
== N_EVENTS
)
762 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
763 evt
= &clgs
->events
[clgs
->events_used
];
770 /* Initialise or check (if already seen before) an InstrInfo for next insn.
771 We only can set instr_offset/instr_size here. The required event set and
772 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
773 instructions. The event set is extended as required on flush of the event
774 queue (when Dm events were determined), cost offsets are determined at
775 end of BB instrumentation. */
777 InstrInfo
* next_InstrInfo ( ClgState
* clgs
, UInt instr_size
)
780 tl_assert(clgs
->ii_index
< clgs
->bb
->instr_count
);
781 ii
= &clgs
->bb
->instr
[ clgs
->ii_index
];
783 if (clgs
->seen_before
) {
784 CLG_ASSERT(ii
->instr_offset
== clgs
->instr_offset
);
785 CLG_ASSERT(ii
->instr_size
== instr_size
);
788 ii
->instr_offset
= clgs
->instr_offset
;
789 ii
->instr_size
= instr_size
;
795 clgs
->instr_offset
+= instr_size
;
796 CLG_(stat
).distinct_instrs
++;
801 // return total number of cost values needed for this BB
803 UInt
update_cost_offsets( ClgState
* clgs
)
807 UInt cost_offset
= 0;
809 CLG_ASSERT(clgs
->bb
->instr_count
== clgs
->ii_index
);
810 for(i
=0; i
<clgs
->ii_index
; i
++) {
811 ii
= &clgs
->bb
->instr
[i
];
812 if (clgs
->seen_before
) {
813 CLG_ASSERT(ii
->cost_offset
== cost_offset
);
815 ii
->cost_offset
= cost_offset
;
816 cost_offset
+= ii
->eventset
? ii
->eventset
->size
: 0;
822 /*------------------------------------------------------------*/
823 /*--- Instrumentation ---*/
824 /*------------------------------------------------------------*/
826 #if defined(VG_BIGENDIAN)
827 # define CLGEndness Iend_BE
828 #elif defined(VG_LITTLEENDIAN)
829 # define CLGEndness Iend_LE
831 # error "Unknown endianness"
835 Addr
IRConst2Addr(IRConst
* con
)
839 if (sizeof(RegWord
) == 4) {
840 CLG_ASSERT( con
->tag
== Ico_U32
);
843 else if (sizeof(RegWord
) == 8) {
844 CLG_ASSERT( con
->tag
== Ico_U64
);
848 VG_(tool_panic
)("Callgrind: invalid Addr type");
853 /* First pass over a BB to instrument, counting instructions and jumps
854 * This is needed for the size of the BB struct to allocate
856 * Called from CLG_(get_bb)
858 void CLG_(collectBlockInfo
)(IRSB
* sbIn
,
859 /*INOUT*/ UInt
* instrs
,
860 /*INOUT*/ UInt
* cjmps
,
861 /*INOUT*/ Bool
* cjmp_inverted
)
865 Addr instrAddr
=0, jumpDst
;
867 Bool toNextInstr
= False
;
869 // Ist_Exit has to be ignored in preamble code, before first IMark:
870 // preamble code is added by VEX for self modifying code, and has
871 // nothing to do with client code
872 Bool inPreamble
= True
;
876 for (i
= 0; i
< sbIn
->stmts_used
; i
++) {
878 if (Ist_IMark
== st
->tag
) {
881 instrAddr
= st
->Ist
.IMark
.addr
;
882 instrLen
= st
->Ist
.IMark
.len
;
887 if (inPreamble
) continue;
888 if (Ist_Exit
== st
->tag
) {
889 jumpDst
= IRConst2Addr(st
->Ist
.Exit
.dst
);
890 toNextInstr
= (jumpDst
== instrAddr
+ instrLen
);
896 /* if the last instructions of BB conditionally jumps to next instruction
897 * (= first instruction of next BB in memory), this is a inverted by VEX.
899 *cjmp_inverted
= toNextInstr
;
903 void addConstMemStoreStmt( IRSB
* bbOut
, UWord addr
, UInt val
, IRType hWordTy
)
905 addStmtToIRSB( bbOut
,
906 IRStmt_Store(CLGEndness
,
907 IRExpr_Const(hWordTy
== Ity_I32
?
908 IRConst_U32( addr
) :
909 IRConst_U64( addr
)),
910 IRExpr_Const(IRConst_U32(val
)) ));
914 /* add helper call to setup_bbcc, with pointer to BB struct as argument
916 * precondition for setup_bbcc:
917 * - jmps_passed has number of cond.jumps passed in last executed BB
918 * - current_bbcc has a pointer to the BBCC of the last executed BB
919 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
920 * current_bbcc->bb->jmp_addr
921 * gives the address of the jump source.
923 * the setup does 2 things:
925 * * Unwind own call stack, i.e sync our ESP with real ESP
926 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
927 * * For CALLs or JMPs crossing objects, record call arg +
928 * push are on own call stack
930 * - prepare for cache log functions:
931 * set current_bbcc to BBCC that gets the costs for this BB execution
935 void addBBSetupCall(ClgState
* clgs
)
938 IRExpr
*arg1
, **argv
;
940 arg1
= mkIRExpr_HWord( (HWord
)clgs
->bb
);
941 argv
= mkIRExprVec_1(arg1
);
942 di
= unsafeIRDirty_0_N( 1, "setup_bbcc",
943 VG_(fnptr_to_fnentry
)( & CLG_(setup_bbcc
) ),
945 addStmtToIRSB( clgs
->sbOut
, IRStmt_Dirty(di
) );
950 IRSB
* CLG_(instrument
)( VgCallbackClosure
* closure
,
952 const VexGuestLayout
* layout
,
953 const VexGuestExtents
* vge
,
954 const VexArchInfo
* archinfo_host
,
955 IRType gWordTy
, IRType hWordTy
)
960 InstrInfo
* curr_inode
= NULL
;
963 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
965 if (gWordTy
!= hWordTy
) {
966 /* We don't currently support this case. */
967 VG_(tool_panic
)("host/guest word size mismatch");
970 // No instrumentation if it is switched off
971 if (! CLG_(instrument_state
)) {
972 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
973 (Addr
)closure
->readdr
);
977 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr
)closure
->readdr
);
979 /* Set up SB for instrumented IR */
980 clgs
.sbOut
= deepCopyIRSBExceptStmts(sbIn
);
982 // Copy verbatim any IR preamble preceding the first IMark
984 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
985 addStmtToIRSB( clgs
.sbOut
, sbIn
->stmts
[i
] );
989 // Get the first statement, and origAddr from it
990 CLG_ASSERT(sbIn
->stmts_used
>0);
991 CLG_ASSERT(i
< sbIn
->stmts_used
);
993 CLG_ASSERT(Ist_IMark
== st
->tag
);
995 origAddr
= st
->Ist
.IMark
.addr
+ st
->Ist
.IMark
.delta
;
996 CLG_ASSERT(origAddr
== st
->Ist
.IMark
.addr
997 + st
->Ist
.IMark
.delta
); // XXX: check no overflow
999 /* Get BB struct (creating if necessary).
1000 * JS: The hash table is keyed with orig_addr_noredir -- important!
1001 * JW: Why? If it is because of different chasing of the redirection,
1002 * this is not needed, as chasing is switched off in callgrind
1004 clgs
.bb
= CLG_(get_bb
)(origAddr
, sbIn
, &(clgs
.seen_before
));
1006 addBBSetupCall(&clgs
);
1008 // Set up running state
1009 clgs
.events_used
= 0;
1011 clgs
.instr_offset
= 0;
1013 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
1015 st
= sbIn
->stmts
[i
];
1016 CLG_ASSERT(isFlatIRStmt(st
));
1027 Addr cia
= st
->Ist
.IMark
.addr
+ st
->Ist
.IMark
.delta
;
1028 UInt isize
= st
->Ist
.IMark
.len
;
1029 CLG_ASSERT(clgs
.instr_offset
== cia
- origAddr
);
1030 // If Vex fails to decode an instruction, the size will be zero.
1031 // Pretend otherwise.
1032 if (isize
== 0) isize
= VG_MIN_INSTR_SZB
;
1034 // Sanity-check size.
1035 tl_assert( (VG_MIN_INSTR_SZB
<= isize
&& isize
<= VG_MAX_INSTR_SZB
)
1036 || VG_CLREQ_SZB
== isize
);
1038 // Init the inode, record it as the current one.
1039 // Subsequent Dr/Dw/Dm events from the same instruction will
1041 curr_inode
= next_InstrInfo (&clgs
, isize
);
1043 addEvent_Ir( &clgs
, curr_inode
);
1048 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
1049 if (data
->tag
== Iex_Load
) {
1050 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
1051 // Note also, endianness info is ignored. I guess
1052 // that's not interesting.
1053 addEvent_Dr( &clgs
, curr_inode
,
1054 sizeofIRType(data
->Iex
.Load
.ty
), aexpr
);
1060 IRExpr
* data
= st
->Ist
.Store
.data
;
1061 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
1062 addEvent_Dw( &clgs
, curr_inode
,
1063 sizeofIRType(typeOfIRExpr(sbIn
->tyenv
, data
)), aexpr
);
1068 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
1069 IRExpr
* data
= sg
->data
;
1070 IRExpr
* addr
= sg
->addr
;
1071 IRType type
= typeOfIRExpr(tyenv
, data
);
1072 tl_assert(type
!= Ity_INVALID
);
1073 addEvent_D_guarded( &clgs
, curr_inode
,
1074 sizeofIRType(type
), addr
, sg
->guard
,
1080 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
1081 IRType type
= Ity_INVALID
; /* loaded type */
1082 IRType typeWide
= Ity_INVALID
; /* after implicit widening */
1083 IRExpr
* addr
= lg
->addr
;
1084 typeOfIRLoadGOp(lg
->cvt
, &typeWide
, &type
);
1085 tl_assert(type
!= Ity_INVALID
);
1086 addEvent_D_guarded( &clgs
, curr_inode
,
1087 sizeofIRType(type
), addr
, lg
->guard
,
1088 False
/*!isWrite*/ );
1094 IRDirty
* d
= st
->Ist
.Dirty
.details
;
1095 if (d
->mFx
!= Ifx_None
) {
1096 /* This dirty helper accesses memory. Collect the details. */
1097 tl_assert(d
->mAddr
!= NULL
);
1098 tl_assert(d
->mSize
!= 0);
1099 dataSize
= d
->mSize
;
1100 // Large (eg. 28B, 108B, 512B on x86) data-sized
1101 // instructions will be done inaccurately, but they're
1102 // very rare and this avoids errors from hitting more
1103 // than two cache lines in the simulation.
1104 if (CLG_(clo
).simulate_cache
&& dataSize
> CLG_(min_line_size
))
1105 dataSize
= CLG_(min_line_size
);
1106 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1107 addEvent_Dr( &clgs
, curr_inode
, dataSize
, d
->mAddr
);
1108 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1109 addEvent_Dw( &clgs
, curr_inode
, dataSize
, d
->mAddr
);
1111 tl_assert(d
->mAddr
== NULL
);
1112 tl_assert(d
->mSize
== 0);
1118 /* We treat it as a read and a write of the location. I
1119 think that is the same behaviour as it was before IRCAS
1120 was introduced, since prior to that point, the Vex
1121 front ends would translate a lock-prefixed instruction
1122 into a (normal) read followed by a (normal) write. */
1124 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1125 CLG_ASSERT(cas
->addr
&& isIRAtom(cas
->addr
));
1126 CLG_ASSERT(cas
->dataLo
);
1127 dataSize
= sizeofIRType(typeOfIRExpr(sbIn
->tyenv
, cas
->dataLo
));
1128 if (cas
->dataHi
!= NULL
)
1129 dataSize
*= 2; /* since this is a doubleword-cas */
1130 addEvent_Dr( &clgs
, curr_inode
, dataSize
, cas
->addr
);
1131 addEvent_Dw( &clgs
, curr_inode
, dataSize
, cas
->addr
);
1132 addEvent_G( &clgs
, curr_inode
);
1138 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1140 dataTy
= typeOfIRTemp(sbIn
->tyenv
, st
->Ist
.LLSC
.result
);
1141 addEvent_Dr( &clgs
, curr_inode
,
1142 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1143 /* flush events before LL, should help SC to succeed */
1144 flushEvents( &clgs
);
1147 dataTy
= typeOfIRExpr(sbIn
->tyenv
, st
->Ist
.LLSC
.storedata
);
1148 addEvent_Dw( &clgs
, curr_inode
,
1149 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1150 /* I don't know whether the global-bus-lock cost should
1151 be attributed to the LL or the SC, but it doesn't
1152 really matter since they always have to be used in
1153 pairs anyway. Hence put it (quite arbitrarily) on
1155 addEvent_G( &clgs
, curr_inode
);
1161 Bool guest_exit
, inverted
;
1163 /* VEX code generation sometimes inverts conditional branches.
1164 * As Callgrind counts (conditional) jumps, it has to correct
1165 * inversions. The heuristic is the following:
1166 * (1) Callgrind switches off SB chasing and unrolling, and
1167 * therefore it assumes that a candidate for inversion only is
1168 * the last conditional branch in an SB.
1169 * (2) inversion is assumed if the branch jumps to the address of
1170 * the next guest instruction in memory.
1171 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1173 * Branching behavior is also used for branch prediction. Note that
1174 * above heuristic is different from what Cachegrind does.
1175 * Cachegrind uses (2) for all branches.
1177 if (cJumps
+1 == clgs
.bb
->cjmp_count
)
1178 inverted
= clgs
.bb
->cjmp_inverted
;
1182 // call branch predictor only if this is a branch in guest code
1183 guest_exit
= (st
->Ist
.Exit
.jk
== Ijk_Boring
) ||
1184 (st
->Ist
.Exit
.jk
== Ijk_Call
) ||
1185 (st
->Ist
.Exit
.jk
== Ijk_Ret
);
1188 /* Stuff to widen the guard expression to a host word, so
1189 we can pass it to the branch predictor simulation
1190 functions easily. */
1191 IRType tyW
= hWordTy
;
1192 IROp widen
= tyW
==Ity_I32
? Iop_1Uto32
: Iop_1Uto64
;
1193 IROp opXOR
= tyW
==Ity_I32
? Iop_Xor32
: Iop_Xor64
;
1194 IRTemp guard1
= newIRTemp(clgs
.sbOut
->tyenv
, Ity_I1
);
1195 IRTemp guardW
= newIRTemp(clgs
.sbOut
->tyenv
, tyW
);
1196 IRTemp guard
= newIRTemp(clgs
.sbOut
->tyenv
, tyW
);
1197 IRExpr
* one
= tyW
==Ity_I32
? IRExpr_Const(IRConst_U32(1))
1198 : IRExpr_Const(IRConst_U64(1));
1200 /* Widen the guard expression. */
1201 addStmtToIRSB( clgs
.sbOut
,
1202 IRStmt_WrTmp( guard1
, st
->Ist
.Exit
.guard
));
1203 addStmtToIRSB( clgs
.sbOut
,
1204 IRStmt_WrTmp( guardW
,
1206 IRExpr_RdTmp(guard1
))) );
1207 /* If the exit is inverted, invert the sense of the guard. */
1212 inverted
? IRExpr_Binop(opXOR
, IRExpr_RdTmp(guardW
), one
)
1213 : IRExpr_RdTmp(guardW
)
1215 /* And post the event. */
1216 addEvent_Bc( &clgs
, curr_inode
, IRExpr_RdTmp(guard
) );
1219 /* We may never reach the next statement, so need to flush
1220 all outstanding transactions now. */
1221 flushEvents( &clgs
);
1223 CLG_ASSERT(clgs
.ii_index
>0);
1224 if (!clgs
.seen_before
) {
1227 if (st
->Ist
.Exit
.jk
== Ijk_Call
) jk
= jk_Call
;
1228 else if (st
->Ist
.Exit
.jk
== Ijk_Ret
) jk
= jk_Return
;
1230 if (IRConst2Addr(st
->Ist
.Exit
.dst
) ==
1231 origAddr
+ curr_inode
->instr_offset
+ curr_inode
->instr_size
)
1237 clgs
.bb
->jmp
[cJumps
].instr
= clgs
.ii_index
-1;
1238 clgs
.bb
->jmp
[cJumps
].jmpkind
= jk
;
1241 /* Update global variable jmps_passed before the jump
1242 * A correction is needed if VEX inverted the last jump condition
1244 UInt val
= inverted
? cJumps
+1 : cJumps
;
1245 addConstMemStoreStmt( clgs
.sbOut
,
1246 (UWord
) &CLG_(current_state
).jmps_passed
,
1258 /* Copy the original statement */
1259 addStmtToIRSB( clgs
.sbOut
, st
);
1262 VG_(printf
)(" pass ");
1268 /* Deal with branches to unknown destinations. Except ignore ones
1269 which are function returns as we assume the return stack
1270 predictor never mispredicts. */
1271 if ((sbIn
->jumpkind
== Ijk_Boring
) || (sbIn
->jumpkind
== Ijk_Call
)) {
1272 if (0) { ppIRExpr( sbIn
->next
); VG_(printf
)("\n"); }
1273 switch (sbIn
->next
->tag
) {
1275 break; /* boring - branch to known address */
1277 /* looks like an indirect branch (branch to unknown) */
1278 addEvent_Bi( &clgs
, curr_inode
, sbIn
->next
);
1281 /* shouldn't happen - if the incoming IR is properly
1282 flattened, should only have tmp and const cases to
1288 /* At the end of the bb. Flush outstandings. */
1289 flushEvents( &clgs
);
1291 /* Update global variable jmps_passed at end of SB.
1292 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1293 * this can be omitted if there is no conditional jump in this SB.
1294 * A correction is needed if VEX inverted the last jump condition
1297 UInt jmps_passed
= cJumps
;
1298 if (clgs
.bb
->cjmp_inverted
) jmps_passed
--;
1299 addConstMemStoreStmt( clgs
.sbOut
,
1300 (UWord
) &CLG_(current_state
).jmps_passed
,
1301 jmps_passed
, hWordTy
);
1303 CLG_ASSERT(clgs
.bb
->cjmp_count
== cJumps
);
1304 CLG_ASSERT(clgs
.bb
->instr_count
== clgs
.ii_index
);
1306 /* Info for final exit from BB */
1310 if (sbIn
->jumpkind
== Ijk_Call
) jk
= jk_Call
;
1311 else if (sbIn
->jumpkind
== Ijk_Ret
) jk
= jk_Return
;
1314 if ((sbIn
->next
->tag
== Iex_Const
) &&
1315 (IRConst2Addr(sbIn
->next
->Iex
.Const
.con
) ==
1316 origAddr
+ clgs
.instr_offset
))
1319 clgs
.bb
->jmp
[cJumps
].jmpkind
= jk
;
1320 /* Instruction index of the call/ret at BB end
1321 * (it is wrong for fall-through, but does not matter) */
1322 clgs
.bb
->jmp
[cJumps
].instr
= clgs
.ii_index
-1;
1325 /* swap information of last exit with final exit if inverted */
1326 if (clgs
.bb
->cjmp_inverted
) {
1330 jk
= clgs
.bb
->jmp
[cJumps
].jmpkind
;
1331 clgs
.bb
->jmp
[cJumps
].jmpkind
= clgs
.bb
->jmp
[cJumps
-1].jmpkind
;
1332 clgs
.bb
->jmp
[cJumps
-1].jmpkind
= jk
;
1333 instr
= clgs
.bb
->jmp
[cJumps
].instr
;
1334 clgs
.bb
->jmp
[cJumps
].instr
= clgs
.bb
->jmp
[cJumps
-1].instr
;
1335 clgs
.bb
->jmp
[cJumps
-1].instr
= instr
;
1338 if (clgs
.seen_before
) {
1339 CLG_ASSERT(clgs
.bb
->cost_count
== update_cost_offsets(&clgs
));
1340 CLG_ASSERT(clgs
.bb
->instr_len
== clgs
.instr_offset
);
1343 clgs
.bb
->cost_count
= update_cost_offsets(&clgs
);
1344 clgs
.bb
->instr_len
= clgs
.instr_offset
;
1347 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1348 origAddr
, clgs
.bb
->instr_len
,
1349 clgs
.bb
->cjmp_count
, clgs
.bb
->cost_count
);
1351 CLG_DEBUG(3, " [ ");
1352 for (i
=0;i
<cJumps
;i
++)
1353 CLG_DEBUG(3, "%u ", clgs
.bb
->jmp
[i
].instr
);
1354 CLG_DEBUG(3, "], last inverted: %s \n",
1355 clgs
.bb
->cjmp_inverted
? "yes":"no");
1361 /*--------------------------------------------------------------------*/
1362 /*--- Discarding BB info ---*/
1363 /*--------------------------------------------------------------------*/
1365 // Called when a translation is removed from the translation cache for
1366 // any reason at all: to free up space, because the guest code was
1367 // unmapped or modified, or for any arbitrary reason.
1369 void clg_discard_superblock_info ( Addr orig_addr
, VexGuestExtents vge
)
1371 tl_assert(vge
.n_used
> 0);
1374 VG_(printf
)( "discard_superblock_info: %p, %p, %llu\n",
1376 (void*)vge
.base
[0], (ULong
)vge
.len
[0]);
1378 // Get BB info, remove from table, free BB info. Simple!
1379 // When created, the BB is keyed by the first instruction address,
1380 // (not orig_addr, but eventually redirected address). Thus, we
1381 // use the first instruction address in vge.
1382 CLG_(delete_bb
)(vge
.base
[0]);
1386 /*------------------------------------------------------------*/
1387 /*--- CLG_(fini)() and related function ---*/
1388 /*------------------------------------------------------------*/
1392 static void zero_thread_cost(thread_info
* t
)
1396 for(i
= 0; i
< CLG_(current_call_stack
).sp
; i
++) {
1397 if (!CLG_(current_call_stack
).entry
[i
].jcc
) continue;
1399 /* reset call counters to current for active calls */
1400 CLG_(copy_cost
)( CLG_(sets
).full
,
1401 CLG_(current_call_stack
).entry
[i
].enter_cost
,
1402 CLG_(current_state
).cost
);
1403 CLG_(current_call_stack
).entry
[i
].jcc
->call_counter
= 0;
1406 CLG_(forall_bbccs
)(CLG_(zero_bbcc
));
1408 /* set counter for last dump */
1409 CLG_(copy_cost
)( CLG_(sets
).full
,
1410 t
->lastdump_cost
, CLG_(current_state
).cost
);
1413 void CLG_(zero_all_cost
)(Bool only_current_thread
)
1415 if (VG_(clo_verbosity
) > 1)
1416 VG_(message
)(Vg_DebugMsg
, " Zeroing costs...\n");
1418 if (only_current_thread
)
1419 zero_thread_cost(CLG_(get_current_thread
)());
1421 CLG_(forall_threads
)(zero_thread_cost
);
1423 if (VG_(clo_verbosity
) > 1)
1424 VG_(message
)(Vg_DebugMsg
, " ...done\n");
1428 void unwind_thread(thread_info
* t
)
1430 /* unwind signal handlers */
1431 while(CLG_(current_state
).sig
!=0)
1432 CLG_(post_signal
)(CLG_(current_tid
),CLG_(current_state
).sig
);
1434 /* unwind regular call stack */
1435 while(CLG_(current_call_stack
).sp
>0)
1436 CLG_(pop_call_stack
)();
1438 /* reset context and function stack for context generation */
1439 CLG_(init_exec_state
)( &CLG_(current_state
) );
1440 CLG_(current_fn_stack
).top
= CLG_(current_fn_stack
).bottom
;
1444 void zero_state_cost(thread_info
* t
)
1446 CLG_(zero_cost
)( CLG_(sets
).full
, CLG_(current_state
).cost
);
1449 void CLG_(set_instrument_state
)(const HChar
* reason
, Bool state
)
1451 if (CLG_(instrument_state
) == state
) {
1452 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1453 reason
, state
? "ON" : "OFF");
1456 CLG_(instrument_state
) = state
;
1457 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1458 reason
, state
? "ON" : "OFF");
1460 VG_(discard_translations_safely
)( (Addr
)0x1000, ~(SizeT
)0xfff, "callgrind");
1462 /* reset internal state: call stacks, simulator */
1463 CLG_(forall_threads
)(unwind_thread
);
1464 CLG_(forall_threads
)(zero_state_cost
);
1465 (*CLG_(cachesim
).clear
)();
1467 if (VG_(clo_verbosity
) > 1)
1468 VG_(message
)(Vg_DebugMsg
, "%s: instrumentation switched %s\n",
1469 reason
, state
? "ON" : "OFF");
1472 /* helper for dump_state_togdb */
1473 static void dump_state_of_thread_togdb(thread_info
* ti
)
1475 static FullCost sum
= 0, tmp
= 0;
1481 t
= CLG_(current_tid
);
1482 CLG_(init_cost_lz
)( CLG_(sets
).full
, &sum
);
1483 CLG_(copy_cost_lz
)( CLG_(sets
).full
, &tmp
, ti
->lastdump_cost
);
1484 CLG_(add_diff_cost
)( CLG_(sets
).full
, sum
, ti
->lastdump_cost
,
1485 ti
->states
.entry
[0]->cost
);
1486 CLG_(copy_cost
)( CLG_(sets
).full
, ti
->lastdump_cost
, tmp
);
1487 mcost
= CLG_(mappingcost_as_string
)(CLG_(dumpmap
), sum
);
1488 VG_(gdb_printf
)("events-%d: %s\n", t
, mcost
);
1490 VG_(gdb_printf
)("frames-%d: %d\n", t
, CLG_(current_call_stack
).sp
);
1493 for(i
= 0; i
< CLG_(current_call_stack
).sp
; i
++) {
1494 ce
= CLG_(get_call_entry
)(i
);
1495 /* if this frame is skipped, we don't have counters */
1496 if (!ce
->jcc
) continue;
1498 from
= ce
->jcc
->from
;
1499 VG_(gdb_printf
)("function-%d-%d: %s\n",t
, i
, from
->cxt
->fn
[0]->name
);
1500 VG_(gdb_printf
)("calls-%d-%d: %llu\n",t
, i
, ce
->jcc
->call_counter
);
1502 /* FIXME: EventSets! */
1503 CLG_(copy_cost
)( CLG_(sets
).full
, sum
, ce
->jcc
->cost
);
1504 CLG_(copy_cost
)( CLG_(sets
).full
, tmp
, ce
->enter_cost
);
1505 CLG_(add_diff_cost
)( CLG_(sets
).full
, sum
,
1506 ce
->enter_cost
, CLG_(current_state
).cost
);
1507 CLG_(copy_cost
)( CLG_(sets
).full
, ce
->enter_cost
, tmp
);
1509 mcost
= CLG_(mappingcost_as_string
)(CLG_(dumpmap
), sum
);
1510 VG_(gdb_printf
)("events-%d-%d: %s\n",t
, i
, mcost
);
1513 if (ce
&& ce
->jcc
) {
1515 VG_(gdb_printf
)("function-%d-%d: %s\n",t
, i
, to
->cxt
->fn
[0]->name
);
1519 /* Dump current state */
1520 static void dump_state_togdb(void)
1524 Int orig_tid
= CLG_(current_tid
);
1526 VG_(gdb_printf
)("instrumentation: %s\n",
1527 CLG_(instrument_state
) ? "on":"off");
1528 if (!CLG_(instrument_state
)) return;
1530 VG_(gdb_printf
)("executed-bbs: %llu\n", CLG_(stat
).bb_executions
);
1531 VG_(gdb_printf
)("executed-calls: %llu\n", CLG_(stat
).call_counter
);
1532 VG_(gdb_printf
)("distinct-bbs: %d\n", CLG_(stat
).distinct_bbs
);
1533 VG_(gdb_printf
)("distinct-calls: %d\n", CLG_(stat
).distinct_jccs
);
1534 VG_(gdb_printf
)("distinct-functions: %d\n", CLG_(stat
).distinct_fns
);
1535 VG_(gdb_printf
)("distinct-contexts: %d\n", CLG_(stat
).distinct_contexts
);
1537 /* "events:" line. Given here because it will be dynamic in the future */
1538 HChar
*evmap
= CLG_(eventmapping_as_string
)(CLG_(dumpmap
));
1539 VG_(gdb_printf
)("events: %s\n", evmap
);
1541 /* "part:" line (number of last part. Is 0 at start */
1542 VG_(gdb_printf
)("part: %d\n", CLG_(get_dump_counter
)());
1545 th
= CLG_(get_threads
)();
1546 VG_(gdb_printf
)("threads:");
1547 for(t
=1;t
<VG_N_THREADS
;t
++) {
1548 if (!th
[t
]) continue;
1549 VG_(gdb_printf
)(" %d", t
);
1551 VG_(gdb_printf
)("\n");
1552 VG_(gdb_printf
)("current-tid: %d\n", orig_tid
);
1553 CLG_(forall_threads
)(dump_state_of_thread_togdb
);
1557 static void print_monitor_help ( void )
1559 VG_(gdb_printf
) ("\n");
1560 VG_(gdb_printf
) ("callgrind monitor commands:\n");
1561 VG_(gdb_printf
) (" dump [<dump_hint>]\n");
1562 VG_(gdb_printf
) (" dump counters\n");
1563 VG_(gdb_printf
) (" zero\n");
1564 VG_(gdb_printf
) (" zero counters\n");
1565 VG_(gdb_printf
) (" status\n");
1566 VG_(gdb_printf
) (" print status\n");
1567 VG_(gdb_printf
) (" instrumentation [on|off]\n");
1568 VG_(gdb_printf
) (" get/set (if on/off given) instrumentation state\n");
1569 VG_(gdb_printf
) ("\n");
1572 /* return True if request recognised, False otherwise */
1573 static Bool
handle_gdb_monitor_command (ThreadId tid
, const HChar
*req
)
1576 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
1579 VG_(strcpy
) (s
, req
);
1581 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
1582 switch (VG_(keyword_id
) ("help dump zero status instrumentation",
1583 wcmd
, kwd_report_duplicated_matches
)) {
1584 case -2: /* multiple matches */
1586 case -1: /* not found */
1589 print_monitor_help();
1591 case 1: { /* dump */
1592 CLG_(dump_profile
)(req
, False
);
1595 case 2: { /* zero */
1596 CLG_(zero_all_cost
)(False
);
1600 case 3: { /* status */
1601 HChar
* arg
= VG_(strtok_r
) (0, " ", &ssaveptr
);
1602 if (arg
&& (VG_(strcmp
)(arg
, "internal") == 0)) {
1603 /* internal interface to callgrind_control */
1608 if (!CLG_(instrument_state
)) {
1609 VG_(gdb_printf
)("No status available as instrumentation is switched off\n");
1611 // Status information to be improved ...
1612 thread_info
** th
= CLG_(get_threads
)();
1614 for(t
=1;t
<VG_N_THREADS
;t
++)
1615 if (th
[t
]) tcount
++;
1616 VG_(gdb_printf
)("%d thread(s) running.\n", tcount
);
1621 case 4: { /* instrumentation */
1622 HChar
* arg
= VG_(strtok_r
) (0, " ", &ssaveptr
);
1624 VG_(gdb_printf
)("instrumentation: %s\n",
1625 CLG_(instrument_state
) ? "on":"off");
1628 CLG_(set_instrument_state
)("Command", VG_(strcmp
)(arg
,"off")!=0);
1639 Bool
CLG_(handle_client_request
)(ThreadId tid
, UWord
*args
, UWord
*ret
)
1641 if (!VG_IS_TOOL_USERREQ('C','T',args
[0])
1642 && VG_USERREQ__GDB_MONITOR_COMMAND
!= args
[0])
1646 case VG_USERREQ__DUMP_STATS
:
1647 CLG_(dump_profile
)("Client Request", True
);
1648 *ret
= 0; /* meaningless */
1651 case VG_USERREQ__DUMP_STATS_AT
:
1653 const HChar
*arg
= (HChar
*)args
[1];
1654 HChar buf
[30 + VG_(strlen
)(arg
)]; // large enough
1655 VG_(sprintf
)(buf
,"Client Request: %s", arg
);
1656 CLG_(dump_profile
)(buf
, True
);
1657 *ret
= 0; /* meaningless */
1661 case VG_USERREQ__ZERO_STATS
:
1662 CLG_(zero_all_cost
)(True
);
1663 *ret
= 0; /* meaningless */
1666 case VG_USERREQ__TOGGLE_COLLECT
:
1667 CLG_(current_state
).collect
= !CLG_(current_state
).collect
;
1668 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1669 CLG_(current_state
).collect
? "ON" : "OFF");
1670 *ret
= 0; /* meaningless */
1673 case VG_USERREQ__START_INSTRUMENTATION
:
1674 CLG_(set_instrument_state
)("Client Request", True
);
1675 *ret
= 0; /* meaningless */
1678 case VG_USERREQ__STOP_INSTRUMENTATION
:
1679 CLG_(set_instrument_state
)("Client Request", False
);
1680 *ret
= 0; /* meaningless */
1683 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
1684 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)args
[1]);
1692 VG_(message
)(Vg_UserMsg
,
1693 "Warning: unknown callgrind client request code %llx\n",
1702 /* Syscall Timing. syscalltime[tid] is the time at which thread tid last
1703 started a syscall. */
1705 /* struct vki_timespec syscalltime[VG_N_THREADS];
1706 Whatever the syscall we use to measure the syscall time, we convert to
1707 seconds and nanoseconds. */
1708 struct vki_timespec
*syscalltime
;
1709 struct vki_timespec
*syscallcputime
;
1713 void collect_time (struct vki_timespec
*systime
, struct vki_timespec
*syscputime
)
1715 switch (CLG_(clo
).collect_systime
) {
1716 default: tl_assert (0);
1717 case systime_msec
: {
1718 UInt ms_timer
= VG_(read_millisecond_timer
)();
1719 systime
->tv_sec
= ms_timer
/ 1000;
1720 systime
->tv_nsec
= (ms_timer
% 1000) * 1000000L;
1723 case systime_usec
: {
1724 struct vki_timeval tv_now
;
1725 VG_(gettimeofday
)(&tv_now
, NULL
);
1726 systime
->tv_sec
= tv_now
.tv_sec
;
1727 systime
->tv_nsec
= tv_now
.tv_usec
* 1000;
1731 # if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_freebsd)
1732 VG_(clock_gettime
)(systime
, VKI_CLOCK_MONOTONIC
);
1733 VG_(clock_gettime
)(syscputime
, VKI_CLOCK_THREAD_CPUTIME_ID
);
1735 # elif defined(VGO_darwin)
1738 # error "Unknown OS"
1745 void CLG_(pre_syscalltime
)(ThreadId tid
, UInt syscallno
,
1746 UWord
* args
, UInt nArgs
)
1748 collect_time(&syscalltime
[tid
],
1749 CLG_(clo
).collect_systime
== systime_nsec
? &syscallcputime
[tid
] : NULL
);
1752 /* Returns "after - before" in the unit as specified by --collect-systime.
1753 after is supposed to be >= before, and tv_nsec must be >= 0 and < One_Second_In_Nsec. */
1755 ULong
vki_timespec_diff (struct vki_timespec after
, struct vki_timespec before
)
1757 vki_time_t diff_sec
= after
.tv_sec
- before
.tv_sec
;
1758 long diff_nsec
= after
.tv_nsec
- before
.tv_nsec
;
1759 ULong nsec_factor
; // factor to convert the desired unit into nsec.
1761 if (diff_nsec
< 0) {
1763 diff_nsec
+= 1000000000ULL;
1765 switch (CLG_(clo
).collect_systime
) {
1766 case systime_no
: tl_assert (0);
1767 case systime_msec
: nsec_factor
= 1000000ULL; break;
1768 case systime_usec
: nsec_factor
= 1000ULL; break;
1769 case systime_nsec
: nsec_factor
= 1ULL; break;
1770 default: tl_assert(0);
1772 return ((ULong
) diff_sec
* 1000000000ULL + diff_nsec
) / nsec_factor
;
1776 void CLG_(post_syscalltime
)(ThreadId tid
, UInt syscallno
,
1777 UWord
* args
, UInt nArgs
, SysRes res
)
1779 if (CLG_(current_state
).bbcc
) {
1781 struct vki_timespec ts_now
;
1782 struct vki_timespec ts_cpunow
;
1785 collect_time(&ts_now
,
1786 CLG_(clo
).collect_systime
== systime_nsec
? &ts_cpunow
: NULL
);
1788 diff
= vki_timespec_diff (ts_now
, syscalltime
[tid
]);
1790 /* offset o is for "SysCount", o+1 for "SysTime",
1791 o+2 is (optionally) "SysCpuTime". */
1792 o
= fullOffset(EG_SYS
);
1794 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o
, syscallno
,
1797 if (!CLG_(current_state
).bbcc
->skipped
)
1798 CLG_(init_cost_lz
)(CLG_(sets
).full
,
1799 &(CLG_(current_state
).bbcc
->skipped
));
1800 CLG_(current_state
).cost
[o
] ++;
1801 CLG_(current_state
).cost
[o
+1] += diff
;
1802 CLG_(current_state
).bbcc
->skipped
[o
] ++;
1803 CLG_(current_state
).bbcc
->skipped
[o
+1] += diff
;
1804 if (CLG_(clo
).collect_systime
== systime_nsec
) {
1805 diff
= vki_timespec_diff (ts_cpunow
, syscallcputime
[tid
]);
1806 CLG_DEBUG(0," SysCpuTime (Off %d) for Syscall %u: %llu\n", o
+2, syscallno
,
1808 CLG_(current_state
).cost
[o
+2] += diff
;
1809 CLG_(current_state
).bbcc
->skipped
[o
+2] += diff
;
1814 static UInt
ULong_width(ULong n
)
1822 return w
+ (w
-1)/3; // add space for commas
1826 void branchsim_printstat(int l1
, int l2
, int l3
)
1828 static HChar fmt
[128]; // large enough
1830 ULong Bc_total_b
, Bc_total_mp
, Bi_total_b
, Bi_total_mp
;
1831 ULong B_total_b
, B_total_mp
;
1833 total
= CLG_(total_cost
);
1834 Bc_total_b
= total
[ fullOffset(EG_BC
) ];
1835 Bc_total_mp
= total
[ fullOffset(EG_BC
)+1 ];
1836 Bi_total_b
= total
[ fullOffset(EG_BI
) ];
1837 Bi_total_mp
= total
[ fullOffset(EG_BI
)+1 ];
1839 /* Make format string, getting width right for numbers */
1840 VG_(sprintf
)(fmt
, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1843 if (0 == Bc_total_b
) Bc_total_b
= 1;
1844 if (0 == Bi_total_b
) Bi_total_b
= 1;
1845 B_total_b
= Bc_total_b
+ Bi_total_b
;
1846 B_total_mp
= Bc_total_mp
+ Bi_total_mp
;
1849 VG_(umsg
)(fmt
, "Branches: ",
1850 B_total_b
, Bc_total_b
, Bi_total_b
);
1852 VG_(umsg
)(fmt
, "Mispredicts: ",
1853 B_total_mp
, Bc_total_mp
, Bi_total_mp
);
1855 VG_(umsg
)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1856 l1
, B_total_mp
* 100.0 / B_total_b
,
1857 l2
, Bc_total_mp
* 100.0 / Bc_total_b
,
1858 l3
, Bi_total_mp
* 100.0 / Bi_total_b
);
1862 void clg_print_stats(void)
1865 CLG_(stat
).full_debug_BBs
+
1866 CLG_(stat
).fn_name_debug_BBs
+
1867 CLG_(stat
).file_line_debug_BBs
+
1868 CLG_(stat
).no_debug_BBs
;
1870 /* Hash table stats */
1871 VG_(message
)(Vg_DebugMsg
, "Distinct objects: %d\n",
1872 CLG_(stat
).distinct_objs
);
1873 VG_(message
)(Vg_DebugMsg
, "Distinct files: %d\n",
1874 CLG_(stat
).distinct_files
);
1875 VG_(message
)(Vg_DebugMsg
, "Distinct fns: %d\n",
1876 CLG_(stat
).distinct_fns
);
1877 VG_(message
)(Vg_DebugMsg
, "Distinct contexts:%d\n",
1878 CLG_(stat
).distinct_contexts
);
1879 VG_(message
)(Vg_DebugMsg
, "Distinct BBs: %d\n",
1880 CLG_(stat
).distinct_bbs
);
1881 VG_(message
)(Vg_DebugMsg
, "Cost entries: %u (Chunks %u)\n",
1882 CLG_(costarray_entries
), CLG_(costarray_chunks
));
1883 VG_(message
)(Vg_DebugMsg
, "Distinct BBCCs: %d\n",
1884 CLG_(stat
).distinct_bbccs
);
1885 VG_(message
)(Vg_DebugMsg
, "Distinct JCCs: %d\n",
1886 CLG_(stat
).distinct_jccs
);
1887 VG_(message
)(Vg_DebugMsg
, "Distinct skips: %d\n",
1888 CLG_(stat
).distinct_skips
);
1889 VG_(message
)(Vg_DebugMsg
, "BB lookups: %d\n",
1892 VG_(message
)(Vg_DebugMsg
, "With full debug info:%3d%% (%d)\n",
1893 CLG_(stat
).full_debug_BBs
* 100 / BB_lookups
,
1894 CLG_(stat
).full_debug_BBs
);
1895 VG_(message
)(Vg_DebugMsg
, "With file/line debug info:%3d%% (%d)\n",
1896 CLG_(stat
).file_line_debug_BBs
* 100 / BB_lookups
,
1897 CLG_(stat
).file_line_debug_BBs
);
1898 VG_(message
)(Vg_DebugMsg
, "With fn name debug info:%3d%% (%d)\n",
1899 CLG_(stat
).fn_name_debug_BBs
* 100 / BB_lookups
,
1900 CLG_(stat
).fn_name_debug_BBs
);
1901 VG_(message
)(Vg_DebugMsg
, "With no debug info:%3d%% (%d)\n",
1902 CLG_(stat
).no_debug_BBs
* 100 / BB_lookups
,
1903 CLG_(stat
).no_debug_BBs
);
1905 VG_(message
)(Vg_DebugMsg
, "BBCC Clones: %d\n",
1906 CLG_(stat
).bbcc_clones
);
1907 VG_(message
)(Vg_DebugMsg
, "BBs Retranslated: %d\n",
1908 CLG_(stat
).bb_retranslations
);
1909 VG_(message
)(Vg_DebugMsg
, "Distinct instrs: %d\n",
1910 CLG_(stat
).distinct_instrs
);
1912 VG_(message
)(Vg_DebugMsg
, "LRU Contxt Misses: %d\n",
1913 CLG_(stat
).cxt_lru_misses
);
1914 VG_(message
)(Vg_DebugMsg
, "LRU BBCC Misses: %d\n",
1915 CLG_(stat
).bbcc_lru_misses
);
1916 VG_(message
)(Vg_DebugMsg
, "LRU JCC Misses: %d\n",
1917 CLG_(stat
).jcc_lru_misses
);
1918 VG_(message
)(Vg_DebugMsg
, "BBs Executed: %llu\n",
1919 CLG_(stat
).bb_executions
);
1920 VG_(message
)(Vg_DebugMsg
, "Calls: %llu\n",
1921 CLG_(stat
).call_counter
);
1922 VG_(message
)(Vg_DebugMsg
, "CondJMP followed: %llu\n",
1923 CLG_(stat
).jcnd_counter
);
1924 VG_(message
)(Vg_DebugMsg
, "Boring JMPs: %llu\n",
1925 CLG_(stat
).jump_counter
);
1926 VG_(message
)(Vg_DebugMsg
, "Recursive calls: %llu\n",
1927 CLG_(stat
).rec_call_counter
);
1928 VG_(message
)(Vg_DebugMsg
, "Returns: %llu\n",
1929 CLG_(stat
).ret_counter
);
1936 HChar fmt
[128]; // large enough
1940 CLG_DEBUG(0, "finish()\n");
1942 (*CLG_(cachesim
).finish
)();
1944 /* pop all remaining items from CallStack for correct sum
1946 CLG_(forall_threads
)(unwind_thread
);
1948 CLG_(dump_profile
)(0, False
);
1950 if (VG_(clo_verbosity
) == 0) return;
1952 if (VG_(clo_stats
)) {
1953 VG_(message
)(Vg_DebugMsg
, "\n");
1955 VG_(message
)(Vg_DebugMsg
, "\n");
1958 HChar
*evmap
= CLG_(eventmapping_as_string
)(CLG_(dumpmap
));
1959 VG_(message
)(Vg_UserMsg
, "Events : %s\n", evmap
);
1961 HChar
*mcost
= CLG_(mappingcost_as_string
)(CLG_(dumpmap
), CLG_(total_cost
));
1962 VG_(message
)(Vg_UserMsg
, "Collected : %s\n", mcost
);
1964 VG_(message
)(Vg_UserMsg
, "\n");
1966 /* determine value widths for statistics */
1967 total
= CLG_(total_cost
);
1968 l1
= ULong_width( total
[fullOffset(EG_IR
)] );
1970 if (CLG_(clo
).simulate_cache
) {
1971 l2
= ULong_width( total
[fullOffset(EG_DR
)] );
1972 l3
= ULong_width( total
[fullOffset(EG_DW
)] );
1974 if (CLG_(clo
).simulate_branch
) {
1975 int l2b
= ULong_width( total
[fullOffset(EG_BC
)] );
1976 int l3b
= ULong_width( total
[fullOffset(EG_BI
)] );
1977 if (l2b
> l2
) l2
= l2b
;
1978 if (l3b
> l3
) l3
= l3b
;
1981 /* Make format string, getting width right for numbers */
1982 VG_(sprintf
)(fmt
, "%%s %%,%dllu\n", l1
);
1984 /* Always print this */
1985 VG_(umsg
)(fmt
, "I refs: ", total
[fullOffset(EG_IR
)] );
1987 if (CLG_(clo
).simulate_cache
)
1988 (*CLG_(cachesim
).printstat
)(l1
, l2
, l3
);
1990 if (CLG_(clo
).simulate_branch
)
1991 branchsim_printstat(l1
, l2
, l3
);
1996 void CLG_(fini
)(Int exitcode
)
2002 /*--------------------------------------------------------------------*/
2004 /*--------------------------------------------------------------------*/
2006 static void clg_start_client_code_callback ( ThreadId tid
, ULong blocks_done
)
2008 static ULong last_blocks_done
= 0;
2011 VG_(printf
)("%d R %llu\n", (Int
)tid
, blocks_done
);
2013 /* throttle calls to CLG_(run_thread) by number of BBs executed */
2014 if (blocks_done
- last_blocks_done
< 5000) return;
2015 last_blocks_done
= blocks_done
;
2017 CLG_(run_thread
)( tid
);
2021 void CLG_(post_clo_init
)(void)
2023 if (VG_(clo_vex_control
).iropt_register_updates_default
2024 != VexRegUpdSpAtMemAccess
) {
2025 CLG_DEBUG(1, " Using user specified value for "
2026 "--vex-iropt-register-updates\n");
2029 " Using default --vex-iropt-register-updates="
2030 "sp-at-mem-access\n");
2033 if (CLG_(clo
).collect_systime
!= systime_no
) {
2034 VG_(needs_syscall_wrapper
)(CLG_(pre_syscalltime
),
2035 CLG_(post_syscalltime
));
2036 syscalltime
= CLG_MALLOC("cl.main.pci.1",
2037 VG_N_THREADS
* sizeof syscalltime
[0]);
2038 for (UInt i
= 0; i
< VG_N_THREADS
; ++i
) {
2039 syscalltime
[i
].tv_sec
= 0;
2040 syscalltime
[i
].tv_nsec
= 0;
2042 if (CLG_(clo
).collect_systime
== systime_nsec
) {
2043 syscallcputime
= CLG_MALLOC("cl.main.pci.2",
2044 VG_N_THREADS
* sizeof syscallcputime
[0]);
2045 for (UInt i
= 0; i
< VG_N_THREADS
; ++i
) {
2046 syscallcputime
[i
].tv_sec
= 0;
2047 syscallcputime
[i
].tv_nsec
= 0;
2052 if (VG_(clo_px_file_backed
) != VexRegUpdSpAtMemAccess
) {
2053 CLG_DEBUG(1, " Using user specified value for "
2054 "--px-file-backed\n");
2057 " Using default --px-file-backed="
2058 "sp-at-mem-access\n");
2061 if (VG_(clo_vex_control
).iropt_unroll_thresh
!= 0) {
2062 VG_(message
)(Vg_UserMsg
,
2063 "callgrind only works with --vex-iropt-unroll-thresh=0\n"
2064 "=> resetting it back to 0\n");
2065 VG_(clo_vex_control
).iropt_unroll_thresh
= 0; // cannot be overridden.
2067 if (VG_(clo_vex_control
).guest_chase
) {
2068 VG_(message
)(Vg_UserMsg
,
2069 "callgrind only works with --vex-guest-chase=no\n"
2070 "=> resetting it back to 'no'\n");
2071 VG_(clo_vex_control
).guest_chase
= False
; // cannot be overridden.
2074 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo
).separate_threads
? "Yes":"No");
2075 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo
).separate_callers
);
2076 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo
).separate_recursions
);
2078 if (!CLG_(clo
).dump_line
&& !CLG_(clo
).dump_instr
&& !CLG_(clo
).dump_bb
) {
2079 VG_(message
)(Vg_UserMsg
, "Using source line as position.\n");
2080 CLG_(clo
).dump_line
= True
;
2085 (*CLG_(cachesim
).post_clo_init
)();
2087 CLG_(init_eventsets
)();
2088 CLG_(init_statistics
)(& CLG_(stat
));
2089 CLG_(init_cost_lz
)( CLG_(sets
).full
, &CLG_(total_cost
) );
2091 /* initialize hash tables */
2092 CLG_(init_obj_table
)();
2093 CLG_(init_cxt_table
)();
2094 CLG_(init_bb_hash
)();
2096 CLG_(init_threads
)();
2097 CLG_(run_thread
)(1);
2099 CLG_(instrument_state
) = CLG_(clo
).instrument_atstart
;
2101 if (VG_(clo_verbosity
) > 0) {
2102 VG_(message
)(Vg_UserMsg
,
2103 "For interactive control, run 'callgrind_control%s%s -h'.\n",
2104 (VG_(arg_vgdb_prefix
) ? " " : ""),
2105 (VG_(arg_vgdb_prefix
) ? VG_(arg_vgdb_prefix
) : ""));
2110 void CLG_(pre_clo_init
)(void)
2112 VG_(details_name
) ("Callgrind");
2113 VG_(details_version
) (NULL
);
2114 VG_(details_description
) ("a call-graph generating cache profiler");
2115 VG_(details_copyright_author
)("Copyright (C) 2002-2017, and GNU GPL'd, "
2116 "by Josef Weidendorfer et al.");
2117 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
2118 VG_(details_avg_translation_sizeB
) ( 500 );
2120 VG_(clo_vex_control
).iropt_register_updates_default
2121 = VG_(clo_px_file_backed
)
2122 = VexRegUpdSpAtMemAccess
; // overridable by the user.
2124 VG_(clo_vex_control
).iropt_unroll_thresh
= 0; // cannot be overridden.
2125 VG_(clo_vex_control
).guest_chase
= False
; // cannot be overridden.
2127 VG_(basic_tool_funcs
) (CLG_(post_clo_init
),
2131 VG_(needs_superblock_discards
)(clg_discard_superblock_info
);
2134 VG_(needs_command_line_options
)(CLG_(process_cmd_line_option
),
2136 CLG_(print_debug_usage
));
2138 VG_(needs_client_requests
)(CLG_(handle_client_request
));
2139 VG_(needs_print_stats
) (clg_print_stats
);
2141 VG_(track_start_client_code
) ( & clg_start_client_code_callback
);
2142 VG_(track_pre_deliver_signal
) ( & CLG_(pre_signal
) );
2143 VG_(track_post_deliver_signal
)( & CLG_(post_signal
) );
2145 CLG_(set_clo_defaults
)();
2149 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init
))
2151 /*--------------------------------------------------------------------*/
2152 /*--- end main.c ---*/
2153 /*--------------------------------------------------------------------*/